PMMLPredict using Nueral Networks MLP Classifier model.¶
Setup¶
In [1]:
# Import required libraries
import tempfile
import getpass
from teradataml import PMMLPredict, DataFrame, load_example_data, create_context, \
db_drop_table, remove_context, save_byom, retrieve_byom, delete_byom, list_byom
from teradataml.options.configure import configure
In [2]:
# Create the connection.
host = getpass.getpass("Host: ")
username = getpass.getpass("Username: ")
password = getpass.getpass("Password: ")
con = create_context(host=host, username=username, password=password)
Host: ········ Username: ········ Password: ········
Load example data and use sample() for splitting input data into testing and training dataset.¶
In [3]:
# Load the example data.
load_example_data("byom", "iris_input")
WARNING: Skipped loading table iris_input since it already exists in the database.
In [4]:
# Create teradataml DataFrames
iris_input = DataFrame("iris_input")
In [5]:
# Create 2 samples of input data - sample 1 will have 80% of total rows and sample 2 will have 20% of total rows.
iris_sample = iris_input.sample(frac=[0.8, 0.2])
iris_sample
Out[5]:
id | sepal_length | sepal_width | petal_length | petal_width | species | sampleid |
---|---|---|---|---|---|---|
78 | 6.7 | 3.0 | 5.0 | 1.7 | 2 | 1 |
141 | 6.7 | 3.1 | 5.6 | 2.4 | 3 | 1 |
17 | 5.4 | 3.9 | 1.3 | 0.4 | 1 | 1 |
40 | 5.1 | 3.4 | 1.5 | 0.2 | 1 | 2 |
120 | 6.0 | 2.2 | 5.0 | 1.5 | 3 | 1 |
122 | 5.6 | 2.8 | 4.9 | 2.0 | 3 | 1 |
19 | 5.7 | 3.8 | 1.7 | 0.3 | 1 | 1 |
59 | 6.6 | 2.9 | 4.6 | 1.3 | 2 | 1 |
80 | 5.7 | 2.6 | 3.5 | 1.0 | 2 | 1 |
101 | 6.3 | 3.3 | 6.0 | 2.5 | 3 | 1 |
In [6]:
# Create train dataset from sample 1 by filtering on "sampleid" and drop "sampleid" column as it is not required for training model.
iris_train = iris_sample[iris_sample.sampleid == "1"].drop("sampleid", axis = 1)
iris_train
Out[6]:
id | sepal_length | sepal_width | petal_length | petal_width | species |
---|---|---|---|---|---|
116 | 6.4 | 3.2 | 5.3 | 2.3 | 3 |
17 | 5.4 | 3.9 | 1.3 | 0.4 | 1 |
139 | 6.0 | 3.0 | 4.8 | 1.8 | 3 |
40 | 5.1 | 3.4 | 1.5 | 0.2 | 1 |
120 | 6.0 | 2.2 | 5.0 | 1.5 | 3 |
122 | 5.6 | 2.8 | 4.9 | 2.0 | 3 |
19 | 5.7 | 3.8 | 1.7 | 0.3 | 1 |
59 | 6.6 | 2.9 | 4.6 | 1.3 | 2 |
80 | 5.7 | 2.6 | 3.5 | 1.0 | 2 |
101 | 6.3 | 3.3 | 6.0 | 2.5 | 3 |
In [7]:
# Create test dataset from sample 2 by filtering on "sampleid" and drop "sampleid" column as it is not required for scoring.
iris_test = iris_sample[iris_sample.sampleid == "2"].drop("sampleid", axis = 1)
iris_test
Out[7]:
id | sepal_length | sepal_width | petal_length | petal_width | species |
---|---|---|---|---|---|
5 | 5.0 | 3.6 | 1.4 | 0.2 | 1 |
118 | 7.7 | 3.8 | 6.7 | 2.2 | 3 |
95 | 5.6 | 2.7 | 4.2 | 1.3 | 2 |
36 | 5.0 | 3.2 | 1.2 | 0.2 | 1 |
30 | 4.7 | 3.2 | 1.6 | 0.2 | 1 |
51 | 7.0 | 3.2 | 4.7 | 1.4 | 2 |
66 | 6.7 | 3.1 | 4.4 | 1.4 | 2 |
142 | 6.9 | 3.1 | 5.1 | 2.3 | 3 |
15 | 5.8 | 4.0 | 1.2 | 0.2 | 1 |
80 | 5.7 | 2.6 | 3.5 | 1.0 | 2 |
Train the MLP Model.¶
In [8]:
# Import required libraries.
import numpy as np
from nyoka import skl_to_pmml
from sklearn.pipeline import Pipeline
from sklearn_pandas import DataFrameMapper
from sklearn.impute import SimpleImputer
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
In [9]:
# Convert teradataml dataframe to pandas dataframe.
# features : Training data.
# target : Training targets.
traid_pd = iris_train.to_pandas()
features = traid_pd.columns.drop('species')
target = 'species'
In [10]:
# Generate the Multi-layer Perceptron classifier model.
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
MLP_pipe_obj = Pipeline([
("mapping", DataFrameMapper([
(['sepal_length', 'sepal_width'], StandardScaler()) ,
(['petal_length', 'petal_width'], imputer)
])),
("mlp", MLPClassifier(random_state=7, max_iter=200))
])
In [11]:
MLP_pipe_obj.fit(traid_pd[features], traid_pd[target])
C:\Users\pg255042\Anaconda3\envs\teraml\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:617: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet. % self.max_iter, ConvergenceWarning)
Out[11]:
Pipeline(steps=[('mapping', DataFrameMapper(drop_cols=[], features=[(['sepal_length', 'sepal_width'], StandardScaler()), (['petal_length', 'petal_width'], SimpleImputer())])), ('mlp', MLPClassifier(random_state=7))])
Save the model in PMML format.¶
In [12]:
temp_dir = tempfile.TemporaryDirectory()
model_file_path = f"{temp_dir.name}/iris_db_MLP_model.pmml"
In [13]:
skl_to_pmml(MLP_pipe_obj, features, target, model_file_path)
Save the model in Vantage.¶
In [14]:
# Save the PMML Model in Vantage.
save_byom("pmml_MLP_iris", model_file_path, "byom_models")
Created the model table 'byom_models' as it does not exist. Model is saved.
List the models from Vantage¶
In [15]:
# List the PMML Models in Vantage.
list_byom("byom_models")
model model_id pmml_MLP_iris b'3C3F786D6C20766572...'
Retrieve the model from Vantage.¶
In [16]:
# Retrieve the model from table "byom_models", using the model id 'pmml_MLP_iris'.
modeldata = retrieve_byom("pmml_MLP_iris", "byom_models")
In [17]:
configure.byom_install_location = getpass.getpass("byom_install_location: ")
byom_install_location: ········
Score the model.¶
In [18]:
# Perform prediction using PMMLPredict() and the PMML model stored in Vantage.
result = PMMLPredict(
modeldata = modeldata,
newdata = iris_test,
accumulate = ['id', 'sepal_length', 'petal_length'],
overwrite_cached_models = '*',
)
In [19]:
# Print the query.
print(result.show_query())
SELECT * FROM "mldb".PMMLPredict( ON "MLDB"."ml__select__1644917801303812" AS InputTable PARTITION BY ANY ON (select model_id,model from "MLDB"."ml__filter__1644917506408693") AS ModelTable DIMENSION USING Accumulate('id','sepal_length','petal_length') OverwriteCachedModel('*') ) as sqlmr
In [20]:
# Print the result.
result.result
Out[20]:
id | sepal_length | petal_length | prediction | json_report |
---|---|---|---|---|
135 | 6.1 | 5.6 | 2 | {"probability_1":0.0028729239874679523,"predicted_species":2,"probability_2":0.7423866117249545,"probability_3":0.25474046428757763} |
30 | 4.7 | 1.6 | 1 | {"probability_1":0.9752530343409634,"predicted_species":1,"probability_2":0.02018545712712987,"probability_3":0.004561508531906737} |
91 | 5.5 | 4.4 | 2 | {"probability_1":0.016532572212896994,"predicted_species":2,"probability_2":0.8223711670195064,"probability_3":0.16109626076759653} |
93 | 5.8 | 4.0 | 2 | {"probability_1":0.012263607385504423,"predicted_species":2,"probability_2":0.7630946810271045,"probability_3":0.22464171158739107} |
58 | 4.9 | 3.3 | 2 | {"probability_1":0.12780047884946583,"predicted_species":2,"probability_2":0.7844663235360635,"probability_3":0.08773319761447054} |
11 | 5.4 | 1.5 | 1 | {"probability_1":0.9794910162772901,"predicted_species":1,"probability_2":0.013188707349543775,"probability_3":0.0073202763731660715} |
49 | 5.3 | 1.5 | 1 | {"probability_1":0.9848004762482929,"predicted_species":1,"probability_2":0.009852620035280505,"probability_3":0.00534690371642658} |
106 | 7.6 | 6.6 | 3 | {"probability_1":2.719129314523456E-4,"predicted_species":3,"probability_2":0.14966921107492842,"probability_3":0.8500588759936192} |
108 | 7.3 | 6.3 | 3 | {"probability_1":5.731525145961445E-4,"predicted_species":3,"probability_2":0.2686439360005586,"probability_3":0.7307829114848453} |
97 | 5.7 | 4.2 | 2 | {"probability_1":0.02599453802753972,"predicted_species":2,"probability_2":0.7005718844339104,"probability_3":0.2734335775385499} |
Cleanup.¶
In [21]:
# Delete the model from table "byom_models", using the model id 'pmml_MLP_iris'.
delete_byom("pmml_MLP_iris", "byom_models")
Model is deleted.
In [22]:
# Drop models table.
db_drop_table("byom_models")
Out[22]:
True
In [23]:
# Drop input data tables.
db_drop_table("iris_input")
Out[23]:
True
In [24]:
# One must run remove_context() to close the connection and garbage collect internally generated objects.
remove_context()
Out[24]:
True