Description
The XGBoostPredict function applies the model output by the
XGBoost (td_xgboost_mle
) function to a new data set, outputting
predicted labels for each data point.
Usage
td_xgboost_predict_mle (
object = NULL,
object.order.column = NULL,
newdata = NULL,
newdata.partition.column = "ANY",
newdata.order.column = NULL,
id.column = NULL,
terms = NULL,
iter.num = NULL,
num.boosted.trees = NULL,
attribute.name.column = NULL,
attribute.value.column = NULL,
output.response.probdist = FALSE,
output.responses = NULL,
newdata.sequence.column = NULL,
object.sequence.column = NULL
)
## S3 method for class 'td_xgboost_mle'
predict(
object = NULL,
object.order.column = NULL,
newdata = NULL,
newdata.partition.column = "ANY",
newdata.order.column = NULL,
id.column = NULL,
terms = NULL,
iter.num = NULL,
num.boosted.trees = NULL,
attribute.name.column = NULL,
attribute.value.column = NULL,
output.response.probdist = FALSE,
output.responses = NULL,
newdata.sequence.column = NULL,
object.sequence.column = NULL
)
Arguments
object |
Required Argument. |
object.order.column |
Required Argument. |
newdata |
Required Argument. |
newdata.partition.column |
Optional Argument. |
newdata.order.column |
Optional Argument. |
id.column |
Optional Argument. |
terms |
Optional Argument. |
iter.num |
Optional Argument. |
num.boosted.trees |
Optional Argument. |
attribute.name.column |
Optional Argument. |
attribute.value.column |
Optional Argument. |
output.response.probdist |
Optional Argument. |
output.responses |
Optional Argument.
Default Value: NULL |
newdata.sequence.column |
Optional Argument. |
object.sequence.column |
Optional Argument. |
Value
Function returns an object of class "td_xgboost_predict_mle" which is
a named list containing object of class "tbl_teradata".
Named list member can be referenced directly with the "$" operator
using name: result.
Examples
# Get the current context/connection
con <- td_get_context()$connection
# Load example data.
loadExampleData("xgboost_example", "housing_train_binary", "iris_train", "sparse_iris_train",
"sparse_iris_attribute")
loadExampleData("xgboostpredict_example", "housing_test_binary", "iris_test",
"sparse_iris_test")
# Example 1: Binary Classification
# Create object(s) of class "tbl_teradata".
housing_train_binary <- tbl(con, "housing_train_binary")
housing_test_binary <- tbl(con,"housing_test_binary")
# Create model
td_xgboost_out1 <- td_xgboost_mle(data=housing_train_binary,
id.column='sn',
formula = (homestyle ~ driveway + recroom + fullbase + gashw + airco + prefarea +
price + lotsize + bedrooms + bathrms + stories + garagepl),
num.boosted.trees=2,
loss.function='binomial',
prediction.type='classification',
reg.lambda=1,
shrinkage.factor=0.1,
iter.num=10,
min.node.size=1,
max.depth=10
)
# Use the generated model to find prediction.
td_xgboost_predict_out1 <- td_xgboost_predict_mle(newdata=housing_test_binary,
object=td_xgboost_out1,
object.order.column= c('tree_id','iter','class_num'),
id.column='sn',
terms='homestyle',
num.boosted.trees=1)
# Alternatively use S3 predict method to find the prediction.
predict_out <- predict(td_xgboost_out1,
newdata=housing_test_binary,
object.order.column= c('tree_id','iter','class_num'),
id.column='sn',
terms='homestyle',
num.boosted.trees=1)
# Example 2: Multiple-Class Classification
iris_train <- tbl(con,"iris_train")
iris_test <- tbl(con,"iris_test")
td_xgboost_out2 <- td_xgboost_mle(data=iris_train,
id.column='id',
formula = (species ~ sepal_length + sepal_length +
petal_length + petal_width + species),
num.boosted.trees=2,
loss.function='softmax',
reg.lambda=1,
shrinkage.factor=0.1,
iter.num=10,
min.node.size=1,
max.depth=10)
# Use the generated model to find prediction.
td_xgboost_predict_out2 <- td_xgboost_predict_mle(newdata=iris_test,
object=td_xgboost_out2,
object.order.column=c('tree_id', 'iter', 'class_num'),
id.column='id',
terms='species',
num.boosted.trees=2)
# Example 3: Sparse Input Format. "response.column" argument is specified instead of "formula".
# Create object(s) of class "tbl_teradata".
sparse_iris_train <- tbl(con,"sparse_iris_train")
sparse_iris_attribute <- tbl(con,"sparse_iris_attribute")
sparse_iris_test <- tbl(con,"sparse_iris_test")
td_xgboost_out3 <- td_xgboost_mle(data=sparse_iris_train,
attribute.table=sparse_iris_attribute,
id.column='id',
attribute.name.column='attribute',
attribute.value.column='value_col',
response.column="species",
loss.function='SOFTMAX',
reg.lambda=1,
num.boosted.trees=2,
shrinkage.factor=0.1,
column.subsampling=1.0,
iter.num=10,
min.node.size=1,
max.depth=10,
variance=0,
seed=1
)
# Use the generated model to find prediction.
td_xgboost_predict_out3 <- td_xgboost_predict_mle(newdata=sparse_iris_test,
object=td_xgboost_out3,
object.order.column=c('tree_id', 'iter', 'class_num'),
id.column='id',
attribute.name.column='attribute',
attribute.value.column='value_col',
terms='species',
num.boosted.trees=2)