ModelTable: boston_trans_normcont_model.py
"""
boston_trans_normcont_model.py: Creates Random Forest Regression model using minMax trasformation
********************
* Generated model file is in PMML format.
* To score this model , user needs insert/upload PMML model into Vantage table
********************
"""
import pyodbc
import pandas as pd
from sklearn2pmml.pipeline import PMMLPipeline
from sklearn2pmml import sklearn2pmml
from sklearn.ensemble import RandomForestRegressor
from sklearn_pandas import DataFrameMapper
from sklearn.preprocessing import MinMaxScaler
import os
import time
from teradataml import *
display.print_sqlmr_query = True
passwd = "alice"
uid = "alice"
con = create_context(host="server123@mydomain.com", username=uid, password=passwd)
contrain_df = DataFrame.from_query("select * from boston_train")
train_pd = train_df.to_pandas()
test_df = DataFrame.from_query("select * from boston_test")
test_pd = test_df.to_pandas()
train_pd = train_pd.apply(pd.to_numeric, errors='ignore')
test_pd = test_pd.apply(pd.to_numeric, errors='ignore')
train_pd
type(train_pd)
features = train_pd.columns.drop('medv')
target = 'medv'
pipeline = PMMLPipeline([
("mapping", DataFrameMapper([
(['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax', 'ptratio', 'black', 'lstat'], MinMaxScaler())
])),
("rfc", RandomForestRegressor(n_estimators = 100, random_state = 0))
])
pipeline.fit(train_pd[features], train_pd[target])
sklearn2pmml(pipeline, "../sql/boston_trans_normcont_py_model.pmml", with_repr = True)
pipeline.fit(train_pd[features], train_pd[target])
sklearn2pmml(pipeline, "../sql/boston_trans_normcont_py_model.pmml", with_repr = True)
#predict using test data
#........................
from pypmml import Model
model = Model.load('../sql/boston_trans_normcont_py_model.pmml')
predictions = model.predict(test_pd[features])
#print(predictions)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
print(predictions)
Query that Calls PMMLPredict Function
SELECT * FROM mldb.PMMLPredict (
ON boston_test
ON (SELECT * FROM pmml_models
WHERE model_id='boston_trans_normcont_py_model') DIMENSION
USING
Accumulate ('id')
) AS td;
Query Output
*** Query completed. 101 rows found. 3 columns returned.
*** Total elapsed time was 2 seconds.
id prediction json_report
----------- ------------------------- -------------------------------------
40 27.476999999999993 {"predicted_medv":27.476999999999993}
265 34.07200000000001 {"predicted_medv":34.07200000000001}
80 21.135000000000005 {"predicted_medv":21.135000000000005}
305 8.758 {"predicted_medv":8.758}
345 29.806999999999995 {"predicted_medv":29.806999999999995}
120 15.331999999999994 {"predicted_medv":15.331999999999994}
385 9.281000000000002 {"predicted_medv":9.281000000000002}
160 33.23100000000002 {"predicted_medv":33.23100000000002}
200 32.365000000000016 {"predicted_medv":32.365000000000016}
425 20.94199999999999 {"predicted_medv":20.94199999999999}
240 26.675 {"predicted_medv":26.675}
465 20.026000000000014 {"predicted_medv":20.026000000000014}
505 26.566999999999997 {"predicted_medv":26.566999999999997}
...