This Python pipeline creates a PMML XGBoost model. Scikit-learn APIs fetch the data from Teradata Vantage. Therefore, this pipeline must specify the connection, predictors, and variables.
""" iris_db_xgb_model.py: Creates XGBoost classification model ******************** * Generated model file is in PMML format. * To score this model , user needs insert/upload PMML model into Vantage table ******************** """ from sklearn import datasets import pandas as pd from sklearn2pmml.pipeline import PMMLPipeline from sklearn2pmml import sklearn2pmml import os import time from teradataml import * display.print_sqlmr_query = True passwd = "alice" uid = "alice" con = create_context(host="server123@mydomain.com", username=uid, password=passwd) con train_df = DataFrame.from_query("select * from iris_train") traid_pd = train_df.to_pandas() traid_pd type(traid_pd) X = traid_pd[['sepal_length','sepal_width','petal_length', 'petal_width' ]] y=traid_pd[['species']] X from sklearn import svm from xgboost import XGBClassifier #pipeline pipeline = PMMLPipeline([ ("classifier", XGBClassifier()) ]) pipeline.fit(X, y.values.ravel()) sklearn2pmml(pipeline, "iris_db_xgb_model.pmml", with_repr = True)