ONNXPredict using XGBoost Classifier model.¶
Setup¶
In [ ]:
# Import required libraries.
import tempfile
import getpass
from teradataml import DataFrame, load_example_data, create_context, \
db_drop_table, remove_context, save_byom, retrieve_byom, delete_byom, list_byom
from teradataml.options.configure import configure
In [ ]:
# Create the connection.
host = getpass.getpass("Host: ")
username = getpass.getpass("Username: ")
password = getpass.getpass("Password: ")
con = create_context(host=host, username=username, password=password)
Load example data and use sample() for splitting input data into testing and training dataset.¶
In [ ]:
# Load example data.
load_example_data("byom", "iris_input")
WARNING: Skipped loading table iris_input since it already exists in the database.
In [ ]:
iris_input = DataFrame("iris_input")
In [ ]:
# Create 2 samples of input data - sample 1 will have 80% of total rows and sample 2 will have 20% of total rows.
iris_sample = iris_input.sample(frac=[0.8, 0.2])
iris_sample
Out[ ]:
| id | sepal_length | sepal_width | petal_length | petal_width | species | sampleid |
|---|---|---|---|---|---|---|
| 17 | 5.4 | 3.9 | 1.3 | 0.4 | 1 | 2 |
| 38 | 4.9 | 3.6 | 1.4 | 0.1 | 1 | 2 |
| 78 | 6.7 | 3.0 | 5.0 | 1.7 | 2 | 1 |
| 122 | 5.6 | 2.8 | 4.9 | 2.0 | 3 | 1 |
| 59 | 6.6 | 2.9 | 4.6 | 1.3 | 2 | 1 |
| 40 | 5.1 | 3.4 | 1.5 | 0.2 | 1 | 2 |
| 80 | 5.7 | 2.6 | 3.5 | 1.0 | 2 | 1 |
| 120 | 6.0 | 2.2 | 5.0 | 1.5 | 3 | 1 |
| 19 | 5.7 | 3.8 | 1.7 | 0.3 | 1 | 2 |
| 61 | 5.0 | 2.0 | 3.5 | 1.0 | 2 | 1 |
In [ ]:
# Create train dataset from sample 1 by filtering on "sampleid" and drop "sampleid" column as it is not required for training model.
iris_train = iris_sample[iris_sample.sampleid == "1"].drop("sampleid", axis = 1)
iris_train
Out[ ]:
| id | sepal_length | sepal_width | petal_length | petal_width | species |
|---|---|---|---|---|---|
| 139 | 6.0 | 3.0 | 4.8 | 1.8 | 3 |
| 38 | 4.9 | 3.6 | 1.4 | 0.1 | 1 |
| 78 | 6.7 | 3.0 | 5.0 | 1.7 | 2 |
| 122 | 5.6 | 2.8 | 4.9 | 2.0 | 3 |
| 99 | 5.1 | 2.5 | 3.0 | 1.1 | 2 |
| 40 | 5.1 | 3.4 | 1.5 | 0.2 | 1 |
| 120 | 6.0 | 2.2 | 5.0 | 1.5 | 3 |
| 55 | 6.5 | 2.8 | 4.6 | 1.5 | 2 |
| 59 | 6.6 | 2.9 | 4.6 | 1.3 | 2 |
| 61 | 5.0 | 2.0 | 3.5 | 1.0 | 2 |
In [ ]:
# Create test dataset from sample 2 by filtering on "sampleid" and drop "sampleid" column as it is not required for scoring.
iris_test = iris_sample[iris_sample.sampleid == "2"].drop("sampleid", axis = 1)
iris_test
Out[ ]:
| id | sepal_length | sepal_width | petal_length | petal_width | species |
|---|---|---|---|---|---|
| 83 | 5.8 | 2.7 | 3.9 | 1.2 | 2 |
| 24 | 5.1 | 3.3 | 1.7 | 0.5 | 1 |
| 1 | 5.1 | 3.5 | 1.4 | 0.2 | 1 |
| 59 | 6.6 | 2.9 | 4.6 | 1.3 | 2 |
| 85 | 5.4 | 3.0 | 4.5 | 1.5 | 2 |
| 118 | 7.7 | 3.8 | 6.7 | 2.2 | 3 |
| 55 | 6.5 | 2.8 | 4.6 | 1.5 | 2 |
| 95 | 5.6 | 2.7 | 4.2 | 1.3 | 2 |
| 97 | 5.7 | 2.9 | 4.2 | 1.3 | 2 |
| 26 | 5.0 | 3.0 | 1.6 | 0.2 | 1 |
Prepare dataset for creating a XGBoost Classifier model.¶
In [ ]:
# Convert teradataml dataframe to pandas dataframe.
# features : Training data.
# target : Training targets.
train_pd = iris_train.to_pandas()
features = train_pd.columns.drop('species')
target = 'species'
Train Model.¶
In [ ]:
# Import required libraries.
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
In [ ]:
# Generate the XGBoost Classifier model.
xgb_pipe_obj = Pipeline([
('scaler', StandardScaler()),
("xgbc", XGBClassifier(n_estimators=5))
])
In [ ]:
xgb_pipe_obj.fit(train_pd[features], train_pd[target])
Save the model in ONNX format.¶
In [ ]:
# Import required libraries.
from skl2onnx import to_onnx, convert_sklearn, update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
In [ ]:
# Create temporary filepath to save model.
temp_dir = tempfile.TemporaryDirectory()
model_file_path = f"{temp_dir.name}/iris_db_xgbc_model.onnx"
In [ ]:
# Update or register a new converter for the model pipeline
update_registered_converter(
XGBClassifier, 'XGBoostXGBClassifier',
calculate_linear_classifier_output_shapes, convert_xgboost,
options={'nocl': [True, False], 'zipmap': [True]})
In [ ]:
# Convert XGBoost model to ONNX.
onx = to_onnx(xgb_pipe_obj, train_pd.iloc[:,:4].astype(np.float32),
target_opset={'':12, 'ai.onnx.ml': 2})
In [ ]:
# Save the model in ONNX format.
with open(model_file_path, "wb") as f:
f.write(onx.SerializeToString())
Save the model in Vantage.¶
In [ ]:
# Save the ONNX model in Vantage.
save_byom("onnx_xgb_iris", model_file_path, "byom_models")
Created the model table 'byom_models' as it does not exist. Model is saved.
In [ ]:
# List the ONNX models in Vantage.
list_byom("byom_models")
model model_id onnx_xgb_iris b'8071208736B6C326F...'
Retrieve the model from Vantage.¶
In [ ]:
# Retrieve the model from table "byom_models", using the model id 'onnx_xgb_iris'.
modeldata = retrieve_byom("onnx_xgb_iris", "byom_models")
In [ ]:
configure.byom_install_location = getpass.getpass("byom_install_location: ")
Score the model.¶
In [ ]:
# Import required libraries
from teradataml import ONNXPredict
In [ ]:
# Perform prediction using ONNXPredict() and the ONNX model stored in Vantage.
predict_output = ONNXPredict(
modeldata = modeldata,
newdata = iris_test,
accumulate = ["id","petal_length", "petal_width"],
overwrite_cached_models = '*',
model_output_fields = "output_label"
)
In [ ]:
# Print the query.
print(predict_output.show_query())
SELECT * FROM "mldb".ONNXPredict(
ON "MLDB"."ml__select__1666869336346812" AS InputTable
PARTITION BY ANY
ON (select model_id,model from "MLDB"."ml__filter__1666869978076131") AS ModelTable
DIMENSION
USING
Accumulate('id','petal_length','petal_width')
ModelOutputFields('output_label')
OverwriteCachedModel('*')
) as sqlmr
In [ ]:
# Print the predict_output.
predict_output.result
Out[ ]:
| id | petal_length | petal_width | output_label |
|---|---|---|---|
| 30 | 1.6 | 0.2 | [1] |
| 118 | 6.7 | 2.2 | [3] |
| 150 | 5.1 | 1.8 | [3] |
| 13 | 1.4 | 0.1 | [1] |
| 138 | 5.5 | 1.8 | [3] |
| 116 | 5.3 | 2.3 | [3] |
| 133 | 5.6 | 2.2 | [3] |
| 24 | 1.7 | 0.5 | [1] |
| 123 | 6.7 | 2.0 | [3] |
| 40 | 1.5 | 0.2 | [1] |
Cleanup.¶
In [ ]:
# Drop input data tables.
# Delete the model from table "byom_models", using the model id 'onnx_xgb_iris'.
delete_byom("onnx_xgb_iris", "byom_models")
Model is deleted.
In [ ]:
db_drop_table("byom_models")
Out[ ]:
True
In [ ]:
db_drop_table("iris_input")
Out[ ]:
True
In [ ]:
remove_context()
Out[ ]:
True