ONNXPredict using Logistic Regression model.¶
Setup¶
In [ ]:
# Import required libraries.
import tempfile
import getpass
from teradataml import DataFrame, load_example_data, create_context, \
db_drop_table, remove_context, save_byom, retrieve_byom, delete_byom, list_byom
from teradataml.options.configure import configure
In [ ]:
# Create the connection.
host = getpass.getpass("Host: ")
username = getpass.getpass("Username: ")
password = getpass.getpass("Password: ")
con = create_context(host=host, username=username, password=password)
Load example data and use sample() for splitting input data into testing and training dataset.¶
In [ ]:
# Load example data.
load_example_data("byom", "iris_input")
In [ ]:
iris_input = DataFrame("iris_input")
In [ ]:
# Create 2 samples of input data - sample 1 will have 80% of total rows and sample 2 will have 20% of total rows.
iris_sample = iris_input.sample(frac=[0.8, 0.2])
iris_sample
Out[ ]:
id | sepal_length | sepal_width | petal_length | petal_width | species | sampleid |
---|---|---|---|---|---|---|
17 | 5.4 | 3.9 | 1.3 | 0.4 | 1 | 1 |
38 | 4.9 | 3.6 | 1.4 | 0.1 | 1 | 1 |
78 | 6.7 | 3.0 | 5.0 | 1.7 | 2 | 1 |
122 | 5.6 | 2.8 | 4.9 | 2.0 | 3 | 2 |
59 | 6.6 | 2.9 | 4.6 | 1.3 | 2 | 1 |
40 | 5.1 | 3.4 | 1.5 | 0.2 | 1 | 1 |
80 | 5.7 | 2.6 | 3.5 | 1.0 | 2 | 1 |
120 | 6.0 | 2.2 | 5.0 | 1.5 | 3 | 1 |
19 | 5.7 | 3.8 | 1.7 | 0.3 | 1 | 1 |
61 | 5.0 | 2.0 | 3.5 | 1.0 | 2 | 2 |
In [ ]:
# Create train dataset from sample 1 by filtering on "sampleid" and drop "sampleid" column as it is not required for training model.
iris_train = iris_sample[iris_sample.sampleid == "1"].drop("sampleid", axis = 1)
iris_train
Out[ ]:
id | sepal_length | sepal_width | petal_length | petal_width | species |
---|---|---|---|---|---|
17 | 5.4 | 3.9 | 1.3 | 0.4 | 1 |
76 | 6.6 | 3.0 | 4.4 | 1.4 | 2 |
116 | 6.4 | 3.2 | 5.3 | 2.3 | 3 |
19 | 5.7 | 3.8 | 1.7 | 0.3 | 1 |
99 | 5.1 | 2.5 | 3.0 | 1.1 | 2 |
40 | 5.1 | 3.4 | 1.5 | 0.2 | 1 |
80 | 5.7 | 2.6 | 3.5 | 1.0 | 2 |
120 | 6.0 | 2.2 | 5.0 | 1.5 | 3 |
59 | 6.6 | 2.9 | 4.6 | 1.3 | 2 |
78 | 6.7 | 3.0 | 5.0 | 1.7 | 2 |
In [ ]:
# Create test dataset from sample 2 by filtering on "sampleid" and drop "sampleid" column as it is not required for scoring.
iris_test = iris_sample[iris_sample.sampleid == "2"].drop("sampleid", axis = 1)
iris_test
Out[ ]:
id | sepal_length | sepal_width | petal_length | petal_width | species |
---|---|---|---|---|---|
119 | 7.7 | 2.6 | 6.9 | 2.3 | 3 |
108 | 7.3 | 2.9 | 6.3 | 1.8 | 3 |
24 | 5.1 | 3.3 | 1.7 | 0.5 | 1 |
59 | 6.6 | 2.9 | 4.6 | 1.3 | 2 |
91 | 5.5 | 2.6 | 4.4 | 1.2 | 2 |
120 | 6.0 | 2.2 | 5.0 | 1.5 | 3 |
57 | 6.3 | 3.3 | 4.7 | 1.6 | 2 |
55 | 6.5 | 2.8 | 4.6 | 1.5 | 2 |
97 | 5.7 | 2.9 | 4.2 | 1.3 | 2 |
78 | 6.7 | 3.0 | 5.0 | 1.7 | 2 |
Prepare dataset for creating a Logistic Regression model.¶
In [ ]:
# Convert teradataml dataframe to pandas dataframe.
# features : Training data.
# target : Training targets.
train_pd = iris_train.to_pandas()
features = train_pd.columns.drop('species')
target = 'species'
Train Model.¶
In [ ]:
# Import required libraries.
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
In [ ]:
# Generate the Logistic Regression model.
log_reg_pipe_obj = Pipeline([
('scaler', StandardScaler()),
("LogReg", LogisticRegression())
])
In [ ]:
log_reg_pipe_obj.fit(train_pd[features], train_pd[target])
Out[ ]:
Pipeline(steps=[('scaler', StandardScaler()), ('LogReg', LogisticRegression())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Pipeline(steps=[('scaler', StandardScaler()), ('LogReg', LogisticRegression())])
StandardScaler()
LogisticRegression()
Save the model in ONNX format.¶
In [ ]:
# Import required libraries.
from skl2onnx import to_onnx
In [ ]:
# Create temporary filepath to save model.
temp_dir = tempfile.TemporaryDirectory()
model_file_path = f"{temp_dir.name}/iris_db_log_reg_model.onnx"
In [ ]:
onx = to_onnx(log_reg_pipe_obj, train_pd.iloc[:,:4].astype(np.float32))
In [ ]:
with open(model_file_path, "wb") as f:
f.write(onx.SerializeToString())
Save the model in Vantage.¶
In [ ]:
# Save the ONNX model in Vantage.
save_byom("onnx_log_reg_iris", model_file_path, "byom_models")
Created the model table 'byom_models' as it does not exist. Model is saved.
In [ ]:
# List the ONNX models in Vantage.
list_byom("byom_models")
model model_id onnx_log_reg_iris b'8081208736B6C326F...'
Retrieve the model from Vantage.¶
In [ ]:
# Retrieve the model from table "byom_models", using the model id 'onnx_log_reg_iris'.
modeldata = retrieve_byom("onnx_log_reg_iris", "byom_models")
In [ ]:
configure.byom_install_location = getpass.getpass("byom_install_location: ")
Score the model.¶
In [ ]:
# Import required libraries
from teradataml import ONNXPredict
In [ ]:
# Perform prediction using ONNXPredict() and the ONNX model stored in Vantage.
predict_output = ONNXPredict(
modeldata = modeldata,
newdata = iris_test,
accumulate = ['id', 'sepal_length', 'petal_length'],
overwrite_cached_models = '*',
model_output_fields = "output_label"
)
In [ ]:
# Print the query.
print(predict_output.show_query())
SELECT * FROM "mldb".ONNXPredict( ON "MLDB"."ml__select__1663138010926557" AS InputTable PARTITION BY ANY ON (select model_id,model from "MLDB"."ml__filter__1663142281856859") AS ModelTable DIMENSION USING Accumulate('id','sepal_length','petal_length') ModelOutputFields('output_label') ) as sqlmr
In [ ]:
# Print the result.
predict_output.result
Out[ ]:
id | sepal_length | petal_length | output_label |
---|---|---|---|
137 | 6.3 | 5.6 | [2] |
9 | 4.4 | 1.4 | [1] |
104 | 6.3 | 5.6 | [2] |
78 | 6.7 | 5.0 | [2] |
148 | 6.5 | 5.2 | [2] |
135 | 6.1 | 5.6 | [2] |
150 | 5.9 | 5.1 | [2] |
94 | 5.0 | 3.3 | [1] |
76 | 6.6 | 4.4 | [1] |
139 | 6.0 | 4.8 | [2] |
Cleanup.¶
In [ ]:
# Drop input data tables.
# Delete the model from table "byom_models", using the model id 'onnx_log_reg_iris'.
delete_byom("onnx_log_reg_iris", "byom_models")
Model is deleted.
In [ ]:
db_drop_table("byom_models")
Out[ ]:
True
In [ ]:
db_drop_table("iris_input")
Out[ ]:
True
In [ ]:
remove_context()
Out[ ]:
True