Description
The DecisionTree function creates a single decision tree
in a distributed fashion, either weighted or unweighted.
The model tbl_teradata that this function outputs can be
input to the function DecisionTreePredict (td_decision_tree_predict_sqle
)
or DecisionTreePredict (td_decision_tree_predict_mle
.
Usage
td_decision_tree_mle (
data = NULL,
attribute.name.columns = NULL,
attribute.value.column = NULL,
id.columns = NULL,
attribute.table = NULL,
response.table = NULL,
response.column = NULL,
categorical.attribute.table = NULL,
splits.table = NULL,
split.value = NULL,
num.splits = 10,
approx.splits = TRUE,
nodesize = 100,
max.depth = 30,
weighted = FALSE,
weight.column = NULL,
split.measure = "gini",
output.response.probdist = FALSE,
response.probdist.type = "Laplace",
categorical.encoding = "graycode",
attribute.table.sequence.column = NULL,
data.sequence.column = NULL,
categorical.attribute.table.sequence.column = NULL,
response.table.sequence.column = NULL,
splits.table.sequence.column = NULL
)
Arguments
data |
Optional Argument. Required if you omit "attribute.table" and
"response.table" arguments. |
attribute.name.columns |
Required Argument. |
attribute.value.column |
Required Argument. |
id.columns |
Required Argument. |
attribute.table |
Optional Argument. Required if you omit "data" argument. |
response.table |
Optional Argument. Required if you omit "data" argument. |
response.column |
Required Argument. |
categorical.attribute.table |
Optional Argument. |
splits.table |
Optional Argument. |
split.value |
Optional Argument. |
num.splits |
Optional Argument. |
approx.splits |
Optional Argument. |
nodesize |
Optional Argument. |
max.depth |
Optional Argument. |
weighted |
Optional Argument. |
weight.column |
Optional Argument. |
split.measure |
Optional Argument. |
output.response.probdist |
Optional Argument. |
response.probdist.type |
Optional Argument. |
categorical.encoding |
Optional Argument. |
attribute.table.sequence.column |
Optional Argument. |
data.sequence.column |
Optional Argument. |
categorical.attribute.table.sequence.column |
Optional Argument. |
response.table.sequence.column |
Optional Argument. |
splits.table.sequence.column |
Optional Argument. |
Value
Function returns an object of class "td_decision_tree_mle" which is a named list containing Teradata tbl objects. Named list members can be referenced directly with the "$" operator using following names:
model.table
-
intermediate.splits.table
final.response.tableto
output
Examples
# Get the current context/connection
con <- td_get_context()$connection
# Load example data.
loadExampleData("decision_tree_example", "iris_attribute_train", "iris_response_train",
"iris_altinput")
# Create object(s) of class "tbl_teradata".
iris_attribute_train <- tbl(con, "iris_attribute_train")
iris_response_train <- tbl(con, "iris_response_train")
iris_altinput <- tbl(con, "iris_altinput")
# Example 1 - Create decision tree by specifying attribute and response tables.
td_decision_tree_out1 <- td_decision_tree_mle(attribute.name.columns = c("attribute"),
attribute.value.column = "attrvalue",
id.columns = c("pid"),
attribute.table = iris_attribute_train,
response.table = iris_response_train,
response.column = "response",
num.splits = 3,
approx.splits = FALSE,
nodesize = 10,
max.depth = 10,
split.measure = "gini"
)
# Example 2 - Create decision tree by specifying only the "data" argument.
td_decision_tree_out2 <- td_decision_tree_mle(data = iris_altinput,
attribute.name.columns = c("attribute"),
attribute.value.column = "attrvalue",
id.columns = c("pid"),
response.column = "response",
num.splits = 3,
approx.splits = FALSE,
nodesize = 10,
max.depth = 10,
split.measure = "gini"
)