Python Example - Teradata Vantage

Teradata Vantage™ - Bring Your Own Model User Guide

Deployment

VantageCloud

VantageCore

Edition

Enterprise

IntelliFlex

Lake

VMware

Product

Teradata Vantage

Release Number

5.0

Published

October 2023

Language

English (United States)

Last Update

2024-04-06

dita:mapPath

fee1607120608274.ditamap

dita:ditavalPath

ayr1485454803741.ditaval

dita:id

fee1607120608274

ModelTable: boston_trans_normcont_model.py

"""                                                                                                                   
boston_trans_normcont_model.py: Creates Random Forest Regression model using minMax trasformation
********************
*  Generated model file is in PMML format.
*  To score this model , user needs insert/upload PMML model into Vantage table
********************
"""
import pyodbc
import pandas as pd
from sklearn2pmml.pipeline import PMMLPipeline
from sklearn2pmml import sklearn2pmml
from sklearn.ensemble import RandomForestRegressor
from sklearn_pandas import DataFrameMapper
from sklearn.preprocessing import MinMaxScaler
import os
import time
from teradataml import *
 
display.print_sqlmr_query = True
passwd = "alice"
uid = "alice"
 
con = create_context(host="server123@mydomain.com", username=uid, password=passwd)
contrain_df = DataFrame.from_query("select * from boston_train")
train_pd = train_df.to_pandas()
test_df = DataFrame.from_query("select * from boston_test")
test_pd = test_df.to_pandas()
train_pd = train_pd.apply(pd.to_numeric, errors='ignore')
test_pd = test_pd.apply(pd.to_numeric, errors='ignore')
train_pd
type(train_pd)
features = train_pd.columns.drop('medv')
target = 'medv'
 
pipeline = PMMLPipeline([
("mapping", DataFrameMapper([
(['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax', 'ptratio', 'black', 'lstat'], MinMaxScaler())
])),
("rfc", RandomForestRegressor(n_estimators = 100, random_state = 0))
])
 
pipeline.fit(train_pd[features], train_pd[target])
sklearn2pmml(pipeline, "../sql/boston_trans_normcont_py_model.pmml", with_repr = True)
pipeline.fit(train_pd[features], train_pd[target])
sklearn2pmml(pipeline, "../sql/boston_trans_normcont_py_model.pmml", with_repr = True)
 
#predict using test data
#........................
from pypmml import Model
model = Model.load('../sql/boston_trans_normcont_py_model.pmml')
predictions = model.predict(test_pd[features])
#print(predictions)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
print(predictions)

Query that Calls PMMLPredict Function

SELECT * FROM mldb.PMMLPredict (
  ON boston_test
  ON (SELECT * FROM pmml_models 
      WHERE model_id='boston_trans_normcont_py_model') DIMENSION
  USING
    Accumulate ('id')
) AS td;

Query Output

 *** Query completed. 101 rows found. 3 columns returned.
 *** Total elapsed time was 2 seconds.
 
         id prediction                json_report
----------- ------------------------- -------------------------------------
         40 27.476999999999993        {"predicted_medv":27.476999999999993}
        265 34.07200000000001         {"predicted_medv":34.07200000000001}
         80 21.135000000000005        {"predicted_medv":21.135000000000005}
        305 8.758                     {"predicted_medv":8.758}
        345 29.806999999999995        {"predicted_medv":29.806999999999995}
        120 15.331999999999994        {"predicted_medv":15.331999999999994}
        385 9.281000000000002         {"predicted_medv":9.281000000000002}
        160 33.23100000000002         {"predicted_medv":33.23100000000002}
        200 32.365000000000016        {"predicted_medv":32.365000000000016}
        425 20.94199999999999         {"predicted_medv":20.94199999999999}
        240 26.675                    {"predicted_medv":26.675}
        465 20.026000000000014        {"predicted_medv":20.026000000000014}
        505 26.566999999999997        {"predicted_medv":26.566999999999997}
        ...