Description
The AdaBoost function takes a training data set and a single decision
tree and uses adaptive boosting to produce a strong classifying model
that can be input to the function AdaBoostPredict (td_adaboost_predict_mle
).
Usage
td_adaboost_mle (
attribute.data = NULL,
attribute.name.columns = NULL,
attribute.value.column = NULL,
categorical.attribute.data = NULL,
response.data = NULL,
id.columns = NULL,
response.column = NULL,
iter.num = 20,
num.splits = 10,
approx.splits = TRUE,
split.measure = "gini",
max.depth = 3,
min.node.size = 100,
output.response.probdist = FALSE,
categorical.encoding = "graycode",
attribute.data.sequence.column = NULL,
response.data.sequence.column = NULL,
categorical.attribute.data.sequence.column = NULL
)
Arguments
attribute.data |
Required Argument. |
attribute.name.columns |
Required Argument. |
attribute.value.column |
Required Argument. |
categorical.attribute.data |
Optional Argument. |
response.data |
Required Argument. |
id.columns |
Required Argument. |
response.column |
Required Argument. |
iter.num |
Optional Argument. |
num.splits |
Optional Argument. |
approx.splits |
Optional Argument. |
split.measure |
Optional Argument. |
max.depth |
Optional Argument. |
min.node.size |
Optional Argument. |
output.response.probdist |
Optional Argument. |
categorical.encoding |
Optional Argument. |
attribute.data.sequence.column |
Optional Argument. |
response.data.sequence.column |
Optional Argument. |
categorical.attribute.data.sequence.column |
Optional Argument. |
Value
Function returns an object of class "td_adaboost_mle" which is a
named list containing objects of class "tbl_teradata".
Named list members can be referenced directly with the "$" operator
using following names:
model.table
output
Examples
# Get the current context/connection
con <- td_get_context()$connection
# Load the data to run the example
loadExampleData("adaboost_example", "housing_train", "housing_cat", "housing_train_response",
"iris_attribute_train", "iris_response_train")
# Create object(s) of class "tbl_teradata".
housing_train <- tbl(con, "housing_train")
housing_cat <- tbl(con, "housing_cat")
housing_train_response <- tbl(con, "housing_train_response")
iris_attribute_train <- tbl(con, "iris_attribute_train")
iris_response_train <- tbl(con, "iris_response_train")
# Example 1 - This example uses home sales data to create a model that predicts home
# style when input to td_adaboost_predict_mle() function.
#
# Input description:
# housing_train (attribute.data) : tbl_teradata containing real estate
# sales data. There are six numerical
# predictors and six categorical
# predictors. The response variable
# is 'homestyle'.
# housing_cat (categorical.attribute.data) : tbl_teradata that lists all the
# categorical predictors.
# housing_response (response.data) : tbl_teradata that lists the responses
# for each instance in 'attribute.data' as
# specified by 'id.columms'.
td_unpivot_out <- td_unpivot_mle(data = housing_train,
unpivot = c("price", "lotsize", "bedrooms", "bathrms",
"stories","driveway", "recroom", "fullbase",
"gashw", "airco", "garagepl", "prefarea"),
accumulate = "sn")
td_adaboost_out1 <- td_adaboost_mle(attribute.data = td_unpivot_out$result,
attribute.name.columns = "attribute",
attribute.value.column = "value_col",
categorical.attribute.data = housing_cat,
response.data = housing_train_response,
id.columns = "sn",
response.column = "response",
iter.num = 20,
num.splits = 10,
max.depth = 3,
min.node.size = 100)
# Example 2 - This example uses the iris flower dataset to create a model that predicts
# the species when input to td_adaboost_predict_mle().
#
# Input description:
# iris_attribute_train (attribute.data) : tbl_teradata containing the iris flower
# dataset in the sparse format.
# iris_response_train (response.data) : tbl_teradata specifying the response variable
# for each instance.
td_adaboost_out2 <- td_adaboost_mle(attribute.data = iris_attribute_train,
attribute.name.columns = "attribute",
attribute.value.column = "attrvalue",
response.data = iris_response_train,
id.columns = "pid",
response.column = "response",
iter.num = 5,
num.splits = 10,
max.depth = 3,
min.node.size = 5,
output.response.probdist = TRUE,
approx.splits = FALSE)