import pandas as pd from sklearn.ensemble import RandomForestRegressor import os import time from teradataml import * from skl2onnx import convert_sklearn from skl2onnx.common.data_types import FloatTensorType, StringTensorType, Int64TensorType import numpy display.print_sqlmr_query = True passwd = "alice" uid = "alice" host="server123@mydomain.com" con = create_context(host=host, username=uid, password=passwd) con train_df = DataFrame.from_query("select * from boston_train") train_pd = train_df.to_pandas() test_df = DataFrame.from_query("select * from boston_test") test_pd = test_df.to_pandas() train_pd = train_pd.apply(pd.to_numeric, errors='ignore') test_pd = test_pd.apply(pd.to_numeric, errors='ignore') train_pd type(train_pd) features = train_pd.columns.drop('medv') target = 'medv' rfc = RandomForestRegressor(n_estimators = 100) rfc.fit(train_pd[features], train_pd[target]) def convert_dataframe_schema(df, drop=None): inputs = [] for k, v in zip(df.columns, df.dtypes): if drop is not None and k in drop: continue if v == 'int64': t = Int64TensorType([None, 1]) elif v == 'float64': t = FloatTensorType([None, 1]) else: t = StringTensorType([None, 1]) inputs.append((k, t)) return inputs initial_inputs = convert_dataframe_schema(train_pd[features]) initial_type = [('numfeat', FloatTensorType([None, len(features)]))] try: model_onnx = convert_sklearn(rfc, 'pipeline_test', initial_types=initial_type) except Exception as e: print(e) with open("boston_db_rf_reg_model.onnx", "wb") as f: f.write(model_onnx.SerializeToString())