Random Forest Model | BYOM | Teradata Vantage - 3.0 - Random Forest - Teradata Vantage

Teradata Vantageā„¢ - Bring Your Own Model User Guide

Product
Teradata Vantage
Release Number
3.0
Published
May 2022
Last Update
2022-06-02
Content Type
User Guide
Publication ID
B700-1111-051K
Language
English (United States)
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import os
import time
from teradataml import *
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType, StringTensorType, Int64TensorType
import numpy
 
 
display.print_sqlmr_query = True
passwd = "alice"
uid = "alice"
host="server123@mydomain.com"
 
con = create_context(host=host, username=uid, password=passwd)
con
 
train_df = DataFrame.from_query("select * from boston_train")
train_pd = train_df.to_pandas()
test_df = DataFrame.from_query("select * from boston_test")
test_pd = test_df.to_pandas()
train_pd = train_pd.apply(pd.to_numeric, errors='ignore')
test_pd = test_pd.apply(pd.to_numeric, errors='ignore')
train_pd
type(train_pd)
features = train_pd.columns.drop('medv')
target = 'medv'
 
rfc = RandomForestRegressor(n_estimators = 100)
rfc.fit(train_pd[features], train_pd[target])
 
def convert_dataframe_schema(df, drop=None):
    inputs = []
    for k, v in zip(df.columns, df.dtypes):
        if drop is not None and k in drop:
            continue
        if v == 'int64':
            t = Int64TensorType([None, 1])
        elif v == 'float64':
            t = FloatTensorType([None, 1])
        else:
            t = StringTensorType([None, 1])
        inputs.append((k, t))
    return inputs
 
initial_inputs = convert_dataframe_schema(train_pd[features])
initial_type = [('numfeat', FloatTensorType([None, len(features)]))]
 
try:
    model_onnx = convert_sklearn(rfc, 'pipeline_test', initial_types=initial_type)
except Exception as e:
    print(e)
 
with open("boston_db_rf_reg_model.onnx", "wb") as f:
    f.write(model_onnx.SerializeToString())