Example: TD_DecisionForest Classification
Input table
encoded | ROW_I | attribute_1 | attribute_2 | attribute_3 | ... | attribute_49 | sample_id |
---|---|---|---|---|---|---|---|
0 | 99 | -0.0664 | -0.0999 | -0.0949 | ... | -0.0942 | 2 |
0 | 101 | -0.0603 | -0.0938 | -0.0900 | ... | -0.0935 | 2 |
1 | 114 | 0.0000 | 0.0001 | 0.0001 | ... | 0.0001 | 2 |
1 | 115 | 0.0001 | 0.0001 | 0.0001 | ... | 0.0001 | 2 |
... | ... | ... | ... | ... | ... | ... | ... |
TD_DecisionForest Call for Classification
CREATE VOLATILE TABLE DecisionForestOutput AS ( SELECT * FROM TD_DecisionForest( ON DT_Input AS PARTITION BY ANY USING ResponseColumn('encoded') InputColumns(‘[2:12]’) TreeType('CLASSIFICATION') ) AS dt ) WITH DATA ON COMMIT PRESERVE ROWS;
TD_DecisionForest Output for Classification
SELECT * FROM DecisionForestOutput
task_index | tree_num | tree_order | tree |
---|---|---|---|
24 | 0 | 0 | {"id_":1,"size_":75,"maxDepth_":5,"label_":"1","responseCounts_":{"1":75},"nodeType_":"CLASSIFICATION_LEAF"} |
35 | 0 | 0 | {"id_":1,"size_":82,"maxDepth_":5,"label_":"0","responseCounts_":{"0":82},"nodeType_":"CLASSIFICATION_LEAF"} |
Example: TD_DecisionForest Regression
The following is a sample of housing data taken from Boston housing dataset.
CRIM | ZN | INDUS | CHAS | NOX | RM | AGE | DIS | RAD | TAX | PTRATIO | B | LSTAT | medv |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0.05188 | 0.0 | 4.49 | 0.0 | 0.449 | 6.015 | 45.1 | 4.4272 | 3.0 | 247 | 18.5 | 396.99 | 12.86 | 22.5 |
0.30347 | 0.0 | 7.38 | 0.0 | 0.493 | 6.312 | 28.9 | 5.4159 | 5.0 | 287 | 19.6 | 396.90 | 6.15 | 23.0 |
0.6147 | 0.0 | 6.20 | 0.0 | 0.507 | 6.618 | 80.8 | 3.2721 | 8.0 | 307 | 17.4 | 396.90 | 7.6 | 30.1 |
0.04527 | 0.0 | 11.93 | 0.0 | 0.537 | 6.120 | 76.7 | 2.2875 | 1.0 | 273 | 21.0 | 396.90 | 9.08 | 20.6 |
0.12816 | 12.5 | 6.07 | 0.0 | 0.409 | 5.885 | 33 | 6.4890 | 4.0 | 345 | 18.9 | 396.90 | 8.79 | 20.9 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
TD_DecisionForest SQL Call for Regression
SELECT * FROM TD_DecisionForest ( ON housing_sample AS inputtable PARTITION BY ANY USING ResponseColumn('medv') InputColumns('[0:12]') MaxDepth(12) MinNodeSize(1) NumTrees(4) ModelType('REGRESSION') Seed(1) Mtry(3) MtrySeed(1) ) AS dt;
TD_DecisionForest Output for Regression
task_index | tree_num | regression_tree | |
---|---|---|---|
0 | 0 | {"id_":1,"sum_":201.700000,"sumSq_":6781.890000,"size_":6,"maxDepth_":12,"nodeType_":"REGRESSION_NODE","split_":{"splitValue_":7.091500,"attr_":"rm","type_":"REGRESSION_NUMERIC_SPLIT","score_":32984.915253,"scoreImprove_":32984.915253,"leftNodeSize_":5,"rightNodeSize_":1},"leftChild_":{"id_":2,"sum_":167.000000,"sumSq_":5577.800000,"size_":5,"maxDepth_":11,"value_":33.400000,"nodeType_":"REGRESSION_LEAF"},"rightChild_":{"id_":3,"sum_":34.700000,"sumSq_":1204.090000,"size_":1,"maxDepth_":11,"value_":34.700000,"nodeType_":"REGRESSION_LEAF"}} | |
2 | 0 | {"id_":1,"sum_":208.800000,"sumSq_":4905.980000,"size_":9,"maxDepth_":12,"nodeType_":"REGRESSION_NODE","split_":{"splitValue_":6.465000,"attr_":"rm","type_":"REGRESSION_NUMERIC_SPLIT","score_":37076.368050,"scoreImprove_":37076.368050,"leftNodeSize_":8,"rightNodeSize_":1},"leftChild_":{"id_":2,"sum_":178.700000,"sumSq_":3999.970000,"size_":8,"maxDepth_":11,"value_":22.337500,"nodeType_":"REGRESSION_LEAF"},"rightChild_":{"id_":3,"sum_":30.100000,"sumSq_":906.010000,"size_":1,"maxDepth_":11,"value_":30.100000,"nodeType_":"REGRESSION_LEAF"}} | |
3 | 0 | {"id_":1,"sum_":93.600000,"sumSq_":2194.560000,"size_":4,"maxDepth_":12,"nodeType_":"REGRESSION_NODE","split_":{"splitValue_":7.060000,"attr_":"lstat","type_":"REGRESSION_NUMERIC_SPLIT","score_":6272.052528,"scoreImprove_":6272.052528,"leftNodeSize_":3,"rightNodeSize_":1},"leftChild_":{"id_":2,"sum_":72.000000,"sumSq_":1728.000000,"size_":3,"maxDepth_":11,"value_":24.000000,"nodeType_":"REGRESSION_LEAF"},"rightChild_":{"id_":3,"sum_":21.600000,"sumSq_":466.560000,"size_":1,"maxDepth_":11,"value_":21.600000,"nodeType_":"REGRESSION_LEAF"}} |