- Load libraries and Boston dataset.
> library(pmml) > library(MASS) > library(randomForest) > dataBox <- xform_wrap(Boston)
- Get the current context (connection).
# Get the current context/connection. > con <- td_get_context()$connection
- Normalize all numeric variables of the loaded Boston dataset to lie between 0 and 1.
# Normalize all numeric variables of the loaded boston dataset to lie # between 0 and 1. > dataBox <- xform_min_max(dataBox)
- Include only derived fields for modeling last element derived_medv index.
# Include only derived fields for modeling # last element derived_medv index > last <- length(names(dataBox$data))
- Create dataset without original predictors 1-13 and last derived_medv to train the model.
# Create dataset without original predictors 1-13 and last derived_medv # to train the model. > dataset <- dataBox$data[-c(1:13, eval(last))] > names(dataset)
- Generate random forest model and save the model to a pmml file.
# Generate random forest model and save it to a pmml file. > fit <- randomForest(medv ~ ., data=na.omit(dataset), ntree=50) > tpmml <- pmml(fit, transforms = dataBox)
- Save pmml file in the tdplyr installation directory.
# pmml file will be saved in the tdplyr installation directory. > save_pmml(tpmml,"boston_trans_normcont_model.pmml")
- Create a "pmml_models" table on Vantage and insert pmml file in the BLOB column of the table.
# User should create a 'pmml_models' table on Vantage and insert pmml file in the # BLOB column of the table. # Create following table on vantage. > crt_tbl <- "CREATE SET TABLE pmml_models(model_id VARCHAR(40), model BLOB) PRIMARY INDEX (model_id);" > DBI::dbExecute(con, crt_tbl)
- Create a file load_pmml_model.txt that has a model_id and a model file name.
# Create a file load_pmml_model.txt that has a model_id and a model file name # entry such as: # boston_trans_normcont_model|boston_trans_normcont_model.pmml # This file and the pmml models to be loaded should be in the same directory.
- Load model.
- Load model with BTEQ.
# Loading model with BTEQ. # .import vartext file load_pmml_model.txt # .repeat * # USING (c1 VARCHAR(40), c2 BLOB AS DEFERRED BY NAME) INSERT INTO pmml_models(:c1, :c2);
- Load model with TDStudio.
# Loading model with TDStudio. The '?' will open a GUI to browse the PMML file # that needs to be updated. # insert into pmml_models values ('iris_db_rf_model', ?);
- Load model with BTEQ.
- Load sample test dataset.
# Load the sample test dataset. > loadExampleData("pmmlpredict_example", "boston")
- Create object of class "tbl_teradata" on the sample test dataset.
# Create object of class "tbl_teradata" on this dataset. > boston <- tbl(con, "boston")
- Create a sample test data to score.
# Create a sample test data to score. > boston_test <- td_sample(df=boston, n=0.3)
- Select the pmml file to be used for scoring.
# select the pmml file to be used for scoring. > modeldata <- tbl(con, "pmml_models") %>% filter(model_id=='boston_trans_normcont_model')
- Perform scoring.
# Perform scoring. > result <- td_pmml_predict_sqle(modeldata = modeldata, newdata = boston_test, accumulate = c("id")) > result ############ result ############ # Source: SQL [?? x 1] # Database: Teradata # Groups: id prediction json_report <int> <chr> <chr> 1 425 13.842523809523811 "{\"Predicted_medv\":13.842523809523811}" 2 40 30.47793333333333 "{\"Predicted_medv\":30.47793333333333}" 3 80 20.82613333333333 "{\"Predicted_medv\":20.82613333333333}" 4 345 30.578633333333332 "{\"Predicted_medv\":30.578633333333332}" 5 265 35.1652 "{\"Predicted_medv\":35.1652}" 6 305 34.18370000000001 "{\"Predicted_medv\":34.18370000000001}" 7 320 20.92856666666667 "{\"Predicted_medv\":20.92856666666667}" 8 375 18.00184666666667 "{\"Predicted_medv\":18.00184666666667}" 9 5 34.78486666666667 "{\"Predicted_medv\":34.78486666666667}" 10 20 18.954966666666667 "{\"Predicted_medv\":18.954966666666667}" # … with more rows