This example performs scoring with XGBoost. This example assumes that the user is connected to another database where byom is not installed and runs a query with XGBoost model with no prediction values.
Set the global option in order to point to the database ("mldb" in this example) where byom is installed.
- Load the libraries.
> library(pmml) > library(xgboost)
- Convert tbl_teradata to "data.frame".
# Convert tbl_teradata to "data.frame". > iris_train_df <- as.data.frame(iris_train)
- Generate multinomial model using iris data.
# Multinomial model using iris data. > xgb_model <- xgboost(data = as.matrix(iris_train_df[, 2:5]), label = as.numeric(iris_train_df[, 6])-1, max_depth = 2, eta = 1, nthread = 2, nrounds = 2, objective = "multi:softprob", num_class = 3 ) > xgb_model
- Convert the generated model to PMML format.
- Save the tree information in an external file.
# Convert the generated model to "PMML" format. # Save the tree information in an external file. > xgb.dump(xgb_model, "xgb_model.dumped.trees")
- Convert the external file to PMML.
# Convert to PMML. > train_data_colnames <- colnames(as.matrix(iris_train_df[, 2:5])) > xgb_model_pmml <- pmml(xgb_model, input_feature_names = train_data_colnames, output_label_name = "species", output_categories = c(1, 2, 3), xgb_dump_file = "xgb_model.dumped.trees" )
- Save pmml file in the tdplyr installation directory.
# pmml file will be saved in the tdplyr installation directory. > save_pmml(xgb_model_pmml, "xgb_model_tdplyr.pmml")
- Save the tree information in an external file.
- Create a "pmml_models" table on Vantage and insert the pmml file in the BLOB column of the table.
# User should create a 'pmml_models' table on Vantage and insert pmml file in the # BLOB column of the table. # Create following table on vantage. > crt_tbl <- "CREATE SET TABLE pmml_models(model_id VARCHAR(40), model BLOB) PRIMARY INDEX (model_id);" > DBI::dbExecute(con, sql(crt_tbl))
- Create a file load_pmml_model.txt that has a model_id and a model file name.
# Create a file load_pmml_model.txt that has a model_id and a model file name # entry such as: # xgb_model_tdplyr|xgb_model_tdplyr.pmml # # This file and the pmml models to be loaded should be in the same directory.
- Load model.
- Load model with BTEQ.
# Loading model with BTEQ. # .import vartext file load_pmml_model.txt # .repeat * # USING (c1 VARCHAR(40), c2 BLOB AS DEFERRED BY NAME) INSERT INTO pmml_models(:c1, :c2);
- Load model with TDStudio.
# Loading model with TDStudio. The '?' will open a GUI to browse the PMML file # that needs to be updated. # insert into pmml_models values ('xgb_model_tdplyr', ?);
- Load model with BTEQ.
- Set global option to point to the database where byom is installed.
# Set the global option in order to point to the database # (mldb in this case) where byom is installed. > options(byom.install.location="mldb")
- Load sample test dataset.
# Load the sample test dataset. > loadExampleData("pmmlpredict_example", "iris_train", "iris_test")
- Create object of class "tbl_teradata" on the sample test dataset.
# Create object of class "tbl_teradata" on this dataset. > iris_train <- tbl(con, "iris_train") > iris_test <- tbl(con, "iris_test")
- Select the pmml file to be used for scoring.
# select the pmml file to be used for scoring. > modeldata <- tbl(con, "pmml_models") %>% filter(model_id=='iris_db_xgb_model')
- Perform scoring.
# Perform scoring. # Run a query with XGBoost model with no prediction # values. It also uses "overwrite.cached.models" argument. > ml_name <- "iris_db_xgb_model" > pmml_predict_out <- td_pmml_predict(modeldata = modeldata, newdata = iris_test, accumulate = "id", overwrite.cached.models = ml_name)