from sklearn import datasets, tree
import pandas as pd
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
from sklearn.pipeline import Pipeline
import os
import time
from teradataml import *
display.print_sqlmr_query = True
passwd = "alice"
uid = "alice"
host="server123@mydomain.com"
con = create_context(host=host, username=uid, password=passwd)
con
train_df = DataFrame.from_query("select * from iris_train")
traid_pd = train_df.to_pandas()
traid_pd
type(traid_pd)
X = traid_pd.drop('species', axis=1)
y = traid_pd[['species']]
pipeline = Pipeline([
("classifier", tree.DecisionTreeClassifier())
])
pipeline.fit(X, y.values.ravel())
# Convert into ONNX format
initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_sklearn(pipeline, initial_types=initial_type)
with open("iris_db_dt_model.onnx", "wb") as f:
f.write(onx.SerializeToString())