This Python pipeline creates a PMML Random Forest model. Scikit-learn APIs fetch the data from Teradata Vantage. Therefore, this pipeline must specify the connection, predictors, and variables.
""" iris_db_rf_model.py: Creates Random Forest model ******************** * Generated model file is in PMML format. * To score this model , user needs insert/upload PMML model into Vantage table ******************** """ import pandas as pd from sklearn2pmml.pipeline import PMMLPipeline from sklearn2pmml import sklearn2pmml from sklearn.ensemble import RandomForestClassifier import os import time from teradataml import * display.print_sqlmr_query = True passwd = "alice" uid = "alice" host = "server123@mydomain.com" con = create_context(host=host, username=uid, password=passwd) con train_df = DataFrame.from_query("select * from iris_train") traid_pd1 = train_df.to_pandas() traid_pd1 type(traid_pd1) X = traid_pd1[['sepal_length','sepal_width','petal_length', 'petal_width' ]] y=traid_pd1[['species']] pipeline = PMMLPipeline([ ("classifier", RandomForestClassifier()) ]) pipeline.fit(X, y.values.ravel()) sklearn2pmml(pipeline, "iris_db_rf_model.pmml", with_repr = True)