Scoring with Random Forest | BYOM | Teradata Package for R - Scoring with Random Forest

Scoring with Random Forest | BYOM | Teradata Package for R - Scoring with Random Forest - Teradata Package for R

Teradata® Package for R User Guide

Product

Teradata Package for R

Release Number

17.00

Published

July 2021

Language

English (United States)

Last Update

2023-08-08

dita:mapPath

yih1585763700215.ditamap

dita:ditavalPath

ayr1485454803741.ditaval

dita:id

B700-4005

Product Category

Teradata Vantage

Load libraries and Boston dataset.

> library(pmml)
> library(MASS)
> library(randomForest)
> dataBox <- xform_wrap(Boston)

Get the current context (connection).

# Get the current context/connection.
> con <- td_get_context()$connection

Normalize all numeric variables of the loaded Boston dataset to lie between 0 and 1.

# Normalize all numeric variables of the loaded boston dataset to lie
# between 0 and 1.
> dataBox <- xform_min_max(dataBox)

Include only derived fields for modeling last element derived_medv index.

# Include only derived fields for modeling
# last element derived_medv index
> last <- length(names(dataBox$data))

Create dataset without original predictors 1-13 and last derived_medv to train the model.

# Create dataset without original predictors 1-13 and last derived_medv
# to train the model.
> dataset <- dataBox$data[-c(1:13, eval(last))]
> names(dataset)

Generate random forest model and save the model to a pmml file.

# Generate random forest model and save it to a pmml file.
> fit <- randomForest(medv ~ ., data=na.omit(dataset), ntree=50)
> tpmml <- pmml(fit, transforms = dataBox)

Save pmml file in the tdplyr installation directory.

# pmml file will be saved in the tdplyr installation directory.
> save_pmml(tpmml,"boston_trans_normcont_model.pmml")

Create a "pmml_models" table on Vantage and insert pmml file in the BLOB column of the table.

# User should create a 'pmml_models' table on Vantage and insert pmml file in the
# BLOB column of the table.
# Create following table on vantage.
> crt_tbl <- "CREATE SET TABLE pmml_models(model_id VARCHAR(40), model BLOB)
              PRIMARY INDEX (model_id);"
> DBI::dbExecute(con, crt_tbl)

Create a file load_pmml_model.txt that has a model_id and a model file name.

# Create a file load_pmml_model.txt that has a model_id and a model file name
# entry such as:
# boston_trans_normcont_model|boston_trans_normcont_model.pmml
# This file and the pmml models to be loaded should be in the same directory.

Load model.

Load model with BTEQ.

# Loading model with BTEQ.
# .import vartext file load_pmml_model.txt
# .repeat *
# USING (c1 VARCHAR(40), c2 BLOB AS DEFERRED BY NAME) INSERT INTO pmml_models(:c1, :c2);

Load model with TDStudio.

# Loading model with TDStudio. The '?' will open a GUI to browse the PMML file
# that needs to be updated.
# insert into pmml_models values ('iris_db_rf_model', ?);

Load sample test dataset.

# Load the sample test dataset.
> loadExampleData("pmmlpredict_example", "boston")

Create object of class "tbl_teradata" on the sample test dataset.

# Create object of class "tbl_teradata" on this dataset.
> boston <- tbl(con, "boston")

Create a sample test data to score.

# Create a sample test data to score. 
> boston_test <- td_sample(df=boston, n=0.3)

Select the pmml file to be used for scoring.

# select the pmml file to be used for scoring.
> modeldata <- tbl(con, "pmml_models") %>% filter(model_id=='boston_trans_normcont_model')

Perform scoring.

# Perform scoring.
> result <- td_pmml_predict_sqle(modeldata = modeldata, newdata = boston_test, accumulate = c("id"))

> result
############ result ############
 
# Source:   SQL [?? x 1]
# Database: Teradata
# Groups: 
      id prediction         json_report                             
   <int> <chr>              <chr>                                   
 1   425 13.842523809523811 "{\"Predicted_medv\":13.842523809523811}"
 2    40 30.47793333333333  "{\"Predicted_medv\":30.47793333333333}"
 3    80 20.82613333333333  "{\"Predicted_medv\":20.82613333333333}"
 4   345 30.578633333333332 "{\"Predicted_medv\":30.578633333333332}"
 5   265 35.1652            "{\"Predicted_medv\":35.1652}"          
 6   305 34.18370000000001  "{\"Predicted_medv\":34.18370000000001}"
 7   320 20.92856666666667  "{\"Predicted_medv\":20.92856666666667}"
 8   375 18.00184666666667  "{\"Predicted_medv\":18.00184666666667}"
 9     5 34.78486666666667  "{\"Predicted_medv\":34.78486666666667}"
10    20 18.954966666666667 "{\"Predicted_medv\":18.954966666666667}"
# … with more rows