PMMLPredict() with Support Vector Machine model using Regression¶
Setup.¶
In [1]:
# Import required libraries
import getpass
import tempfile
from teradataml import PMMLPredict, DataFrame, load_example_data, create_context, \
db_drop_table, remove_context, save_byom, delete_byom, retrieve_byom, list_byom
from teradataml.options.configure import configure
In [2]:
# Create the connection.
host = getpass.getpass("Host: ")
username = getpass.getpass("Username: ")
password = getpass.getpass("Password: ")
con = create_context(host=host, username=username, password=password)
Host: ········ Username: ········ Password: ········
Load Example Data.¶
In [3]:
load_example_data("decisionforest", "boston")
WARNING: Skipped loading table boston since it already exists in the database.
In [4]:
boston_input = DataFrame("boston")
In [5]:
# Create 2 samples of input data - sample 1 will have 80% of total rows and sample 2 will have 20% of total rows.
boston_sample = boston_input.sample(frac=[0.8, 0.2])
In [6]:
# Create train dataset from sample 1 by filtering on "sampleid" and drop "sampleid" column as it is not required for training model.
boston_train = boston_sample[boston_sample.sampleid == "1"].drop("sampleid", axis = 1)
boston_train
Out[6]:
id | crim | zn | indus | chas | nox | rm | age | dis | rad | tax | ptratio | black | lstat | medv |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
448 | 9.92485 | 0 | 18.1 | 0 | 0.74 | 6.251 | 96 | 2.198 | 24 | 666 | 20.2 | 388.52 | 16.44 | 12.6 |
223 | 0.62356 | 0 | 6.2 | 1 | 0.507 | 6.879 | 77 | 3.2721 | 8 | 307 | 17.4 | 390.39 | 9.93 | 27.5 |
488 | 4.83567 | 0 | 18.1 | 0 | 0.583 | 5.905 | 53 | 3.1523 | 24 | 666 | 20.2 | 388.22 | 11.45 | 20.6 |
305 | 0.05515 | 33 | 2.18 | 0 | 0.472 | 7.236 | 41 | 4.022 | 7 | 222 | 18.4 | 393.68 | 6.93 | 36.1 |
427 | 12.2472 | 0 | 18.1 | 0 | 0.584 | 5.837 | 59 | 1.9976 | 24 | 666 | 20.2 | 24.65 | 15.69 | 10.2 |
469 | 15.5757 | 0 | 18.1 | 0 | 0.58 | 5.926 | 71 | 2.9084 | 24 | 666 | 20.2 | 368.74 | 18.13 | 19.1 |
61 | 0.14932 | 25 | 5.13 | 0 | 0.453 | 5.741 | 66 | 7.2254 | 8 | 284 | 19.7 | 395.11 | 13.15 | 18.7 |
326 | 0.19186 | 0 | 7.38 | 0 | 0.493 | 6.431 | 14 | 5.4159 | 5 | 287 | 19.6 | 393.68 | 5.08 | 24.6 |
101 | 0.14866 | 0 | 8.56 | 0 | 0.52 | 6.727 | 79 | 2.7778 | 5 | 384 | 20.9 | 394.76 | 9.42 | 27.5 |
40 | 0.02763 | 75 | 2.95 | 0 | 0.428 | 6.595 | 21 | 5.4011 | 3 | 252 | 18.3 | 395.63 | 4.32 | 30.8 |
In [7]:
# Create test dataset from sample 2 by filtering on "sampleid" and drop "sampleid" column as it is not required for scoring.
boston_test = boston_sample[boston_sample.sampleid == "2"].drop("sampleid", axis = 1)
boston_test
Out[7]:
id | crim | zn | indus | chas | nox | rm | age | dis | rad | tax | ptratio | black | lstat | medv |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
160 | 1.42502 | 0 | 19.58 | 0 | 0.871 | 6.51 | 100 | 1.7659 | 5 | 403 | 14.7 | 364.31 | 7.39 | 23.3 |
57 | 0.02055 | 85 | 0.74 | 0 | 0.41 | 6.383 | 35 | 9.1876 | 2 | 313 | 17.3 | 396.9 | 5.77 | 24.7 |
118 | 0.15098 | 0 | 10.01 | 0 | 0.547 | 6.021 | 82 | 2.7474 | 6 | 432 | 17.8 | 394.51 | 10.3 | 19.2 |
282 | 0.03705 | 20 | 3.33 | 0 | 0.4429 | 6.968 | 37 | 5.2447 | 5 | 216 | 14.9 | 392.23 | 4.59 | 35.4 |
299 | 0.06466 | 70 | 2.24 | 0 | 0.4 | 6.345 | 20 | 7.8278 | 5 | 358 | 14.8 | 368.24 | 4.97 | 22.5 |
467 | 3.77498 | 0 | 18.1 | 0 | 0.655 | 5.952 | 84 | 2.8715 | 24 | 666 | 20.2 | 22.01 | 17.15 | 19.0 |
221 | 0.35809 | 0 | 6.2 | 1 | 0.507 | 6.951 | 88 | 2.8617 | 8 | 307 | 17.4 | 391.7 | 9.71 | 26.7 |
116 | 0.17134 | 0 | 10.01 | 0 | 0.547 | 5.928 | 88 | 2.4631 | 6 | 432 | 17.8 | 344.91 | 15.76 | 18.3 |
34 | 1.15172 | 0 | 8.14 | 0 | 0.538 | 5.701 | 95 | 3.7872 | 4 | 307 | 21.0 | 358.77 | 18.35 | 13.1 |
162 | 1.46336 | 0 | 19.58 | 0 | 0.605 | 7.489 | 90 | 1.9709 | 5 | 403 | 14.7 | 374.43 | 1.73 | 50.0 |
Train SVM Regression Model¶
In [8]:
# Import required libraries.
import numpy as np
from nyoka import skl_to_pmml
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR
In [9]:
# Convert teradataml dataframe to pandas dataframe.
# features : Training data.
# target : Training targets.
train_pd = boston_train.to_pandas()
features = train_pd.columns.drop('medv')
target = 'medv'
In [10]:
# Generate the SVM Regression model.
regr = Pipeline([
("svr", SVR(kernel= 'linear'))
])
In [11]:
regr.fit(train_pd[features], train_pd[target])
Out[11]:
Pipeline(steps=[('svr', SVR(kernel='linear'))])
Save the model in PMML format.¶
In [12]:
temp_dir = tempfile.TemporaryDirectory()
model_file_path = f"{temp_dir.name}/boston_db_svr_model.pmml"
In [13]:
skl_to_pmml(regr, features, target, model_file_path)
Save the model in Vantage.¶
In [14]:
# Save the PMML Model in Vantage.
save_byom("pmml_svr_boston", model_file_path, "byom_models")
Created the model table 'byom_models' as it does not exist. Model is saved.
List the models from Vanatge¶
In [15]:
# List the PMML Models in Vantage.
list_byom("byom_models")
model model_id pmml_svr_boston b'3C3F786D6C20766572...'
Retrieve the model from Vantage.¶
In [16]:
# Retrieve the model from table "byom_models", using the model id 'pmml_svr_boston'.
modeldata = retrieve_byom("pmml_svr_boston", "byom_models")
Set "configure.byom_install_location" to the database where BYOM functions are installed.¶
In [17]:
configure.byom_install_location = getpass.getpass("byom_install_location: ")
byom_install_location: ········
Score the model.¶
In [18]:
# Perform prediction using PMMLPredict() and the PMML model stored in Vantage.
result = PMMLPredict(
modeldata = modeldata,
newdata = boston_test,
accumulate = ['id', 'rm', 'lstat','ptratio'],
overwrite_cached_models = '*',
)
In [19]:
# Print the query.
print(result.show_query())
SELECT * FROM "mldb".PMMLPredict( ON "MLDB"."ml__select__1645517679699217" AS InputTable PARTITION BY ANY ON (select model_id,model from "MLDB"."ml__filter__1645521215241325") AS ModelTable DIMENSION USING Accumulate('id','rm','lstat','ptratio') OverwriteCachedModel('*') ) as sqlmr
In [20]:
# Print the result.
result.result
Out[20]:
id | rm | lstat | ptratio | prediction | json_report |
---|---|---|---|---|---|
467 | 5.952 | 17.15 | 20.2 | 11.312941660878282 | {"predicted_medv":11.312941660878282} |
322 | 6.376 | 6.87 | 19.6 | 24.24203555205095 | {"predicted_medv":24.24203555205095} |
301 | 6.871 | 6.07 | 14.8 | 29.780764973146873 | {"predicted_medv":29.780764973146873} |
406 | 5.683 | 22.98 | 20.2 | 9.163877780148328 | {"predicted_medv":9.163877780148328} |
278 | 6.826 | 4.16 | 17.6 | 32.04837204324504 | {"predicted_medv":32.04837204324504} |
160 | 6.51 | 7.39 | 14.7 | 26.214879231825037 | {"predicted_medv":26.214879231825037} |
465 | 6.209 | 13.22 | 20.2 | 19.157576835196963 | {"predicted_medv":19.157576835196963} |
177 | 6.02 | 10.11 | 16.6 | 24.57108833740422 | {"predicted_medv":24.57108833740422} |
343 | 6.54 | 8.65 | 15.9 | 24.04016286752376 | {"predicted_medv":24.04016286752376} |
282 | 6.968 | 4.59 | 14.9 | 33.103048511903765 | {"predicted_medv":33.103048511903765} |
Cleanup.¶
In [21]:
# Delete the model from table "byom_models", using the model id 'pmml_svr_boston'.
delete_byom("pmml_svr_boston", "byom_models")
Model is deleted.
In [22]:
# Drop models table.
db_drop_table("byom_models")
Out[22]:
True
In [24]:
# Drop input data tables.
db_drop_table("boston")
Out[24]:
True
In [25]:
# One must run remove_context() to close the connection and garbage collect internally generated objects.
remove_context()
Out[25]:
True
In [ ]: