XGBoostPredict for Classification - Analytics Database

Database Analytic Functions

Deployment
VantageCloud
VantageCore
Edition
Enterprise
IntelliFlex
VMware
Product
Analytics Database
Release Number
17.20
Published
June 2022
Language
English (United States)
Last Update
2024-04-06
dita:mapPath
gjn1627595495337.ditamap
dita:ditavalPath
ayr1485454803741.ditaval
dita:id
jmh1512506877710
Product Category
Teradata Vantageā„¢

This example shows how to use XGBoost for classification: first use TD_XGBoost to train a classification model, and then use TD_XGBoostPredict to test the model.

The following sectoins show the input table, SQL query, and output model table when training a classification model using TD_XGBoost.

Input Table: xgboostTrain

  id rent lotsize bedrooms bathrms driveway homestyle
0 1 3200 900 2 1 0 1
1 1 4000 1400 3 2 1 2
2 1 2000 750 1 1 0 1
3 1 10000 3800 4 2 1 3
4 1 7500 2750 3 2 0 2
5 1 15000 2750 5 3 1 3

SQL Function Call: TD_XGBoost

create table xgboost_classification_model AS(
    SELECT * FROM TD_XGBoost (
        ON xgboostTrain AS inputtable partition by ANY
        USING
        ResponseColumn('homestyle')
        InputColumns('rent','lotsize','bedrooms','bathrms','driveway')
        MaxDepth(3)
        NumBoostedTrees(1)
        ModelType('classification')
        Seed(1)
        IterNum(2)
    ) AS dt
)with data;

Output Model Tble: xgboost_classification_model

  task_index tree_num iter class_num tree_order classification_tree
0 0 1 1 2 0 {"id_":1,"sum_":0.000000,"sumSq_":1.333333,"size_":6,"maxDepth_":3,"nodeType_":"REGRESSION_NODE","split_":{"splitValue_":8750.000000,"attr_":"rent","type_":"REGRESSION_NUMERIC_SPLIT","score_":1.333333,"scoreImprove_":1.333333,"leftNodeSize_":4,"rightNodeSize_":2},"leftChild_":{"id_":2,"sum_":-1.333333,"sumSq_":0.444444,"size_":4,"maxDepth_":2,"nodeType_":"REGRESSION_NODE","split_":{"splitValue_":5750.000000,"attr_":"rent","type_":"REGRESSION_NUMERIC_SPLIT","score_":0.000000,"scoreImprove_":0.000000,"leftNodeSize_":3,"rightNodeSize_":1},"leftChild_":{"id_":4,"sum_":-1.000000,"sumSq_":0.333333,"size_":3,"maxDepth_":1,"nodeType_":"REGRESSION_NODE","split_":{"splitValue_":3600.000000,"attr_":"rent","type_":"REGRESSION_NUMERIC_SPLIT","score_":0.000000,"scoreImprove_":0.000000,"leftNodeSize_":2,"rightNodeSize_":1},"leftChild_":{"id_":8,"sum_":-0.666667,"sumSq_":0.222222,"size_":2,"maxDepth_":0,"value_":-0.333333,"nodeType_":"REGRESSION_LEAF","prediction_":-0.000000},"rightChild_":{"id_":9,"sum_":-0.333333,"sumSq_":0.111111,"size_":1,"maxDepth_":0,"value_":-0.333333,"nodeType_":"REGRESSION_LEAF","prediction_":-0.000000}},"rightChild_":{"id_":5,"sum_":-0.333333,"sumSq_":0.111111,"size_":1,"maxDepth_":0,"value_":-0.333333,"nodeType_":"REGRESSION_LEAF","prediction_":-0.000000}},"rightChild_":{"id_":3,"sum_":1.333333,"sumSq_":0.888889,"size_":2,"maxDepth_":0,"value_":0.666667,"nodeType_":"REGRESSION_LEAF","prediction_":0.000001}}
1 0 1 2 1 0 {"id_":1,"sum_":0.000000,"sumSq_":1.333332,"size_":6,"maxDepth_":3,"nodeType_":"REGRESSION_NODE","split_":{"splitValue_":3600.000000,"attr_":"rent","type_":"REGRESSION_NUMERIC_SPLIT","score_":0.333333,"scoreImprove_":0.333333,"leftNodeSize_":2,"rightNodeSize_":4},"leftChild_":{"id_":2,"sum_":-0.666666,"sumSq_":0.222222,"size_":2,"maxDepth_":0,"value_":-0.333333,"nodeType_":"REGRESSION_LEAF","prediction_":-0.000000},"rightChild_":{"id_":3,"sum_":0.666666,"sumSq_":1.111110,"size_":4,"maxDepth_":2,"nodeType_":"REGRESSION_NODE","split_":{"splitValue_":8750.000000,"attr_":"rent","type_":"REGRESSION_NUMERIC_SPLIT","score_":0.999999,"scoreImprove_":0.999999,"leftNodeSize_":2,"rightNodeSize_":2},"leftChild_":{"id_":6,"sum_":1.333333,"sumSq_":0.888888,"size_":2,"maxDepth_":0,"value_":0.666666,"nodeType_":"REGRESSION_LEAF","prediction_":0.000001},"rightChild_":{"id_":7,"sum_":-0.666666,"sumSq_":0.222222,"size_":2,"maxDepth_":0,"value_":-0.333333,"nodeType_":"REGRESSION_LEAF","prediction_":-0.000000}}}
2 0 1 2 2 0 {"id_":1,"sum_":-0.000000,"sumSq_":1.333332,"size_":6,"maxDepth_":3,"nodeType_":"REGRESSION_NODE","split_":{"splitValue_":8750.000000,"attr_":"rent","type_":"REGRESSION_NUMERIC_SPLIT","score_":1.333332,"scoreImprove_":1.333332,"leftNodeSize_":4,"rightNodeSize_":2},"leftChild_":{"id_":2,"sum_":-1.333333,"sumSq_":0.444444,"size_":4,"maxDepth_":2,"nodeType_":"REGRESSION_NODE","split_":{"splitValue_":3600.000000,"attr_":"rent","type_":"REGRESSION_NUMERIC_SPLIT","score_":0.000000,"scoreImprove_":0.000000,"leftNodeSize_":2,"rightNodeSize_":2},"leftChild_":{"id_":4,"sum_":-0.666666,"sumSq_":0.222222,"size_":2,"maxDepth_":0,"value_":-0.333333,"nodeType_":"REGRESSION_LEAF","prediction_":-0.000000},"rightChild_":{"id_":5,"sum_":-0.666666,"sumSq_":0.222222,"size_":2,"maxDepth_":0,"value_":-0.333333,"nodeType_":"REGRESSION_LEAF","prediction_":-0.000000}},"rightChild_":{"id_":3,"sum_":1.333333,"sumSq_":0.888888,"size_":2,"maxDepth_":0,"value_":0.666666,"nodeType_":"REGRESSION_LEAF","prediction_":0.000001}}
3 0 -1 -1 -1 -1 {"lossType":"SOFTMAX","numBoostedTrees":1,"iterNum":2,"classMapping":{"1":0,"2":1,"3":2}}
4 0 1 2 0 0 {"id_":1,"sum_":-0.000000,"sumSq_":1.333332,"size_":6,"maxDepth_":3,"nodeType_":"REGRESSION_NODE","split_":{"splitValue_":3600.000000,"attr_":"rent","type_":"REGRESSION_NUMERIC_SPLIT","score_":1.333332,"scoreImprove_":1.333332,"leftNodeSize_":2,"rightNodeSize_":4},"leftChild_":{"id_":2,"sum_":1.333333,"sumSq_":0.888888,"size_":2,"maxDepth_":0,"value_":0.666666,"nodeType_":"REGRESSION_LEAF","prediction_":0.000001},"rightChild_":{"id_":3,"sum_":-1.333333,"sumSq_":0.444444,"size_":4,"maxDepth_":2,"nodeType_":"REGRESSION_NODE","split_":{"splitValue_":8750.000000,"attr_":"rent","type_":"REGRESSION_NUMERIC_SPLIT","score_":0.000000,"scoreImprove_":0.000000,"leftNodeSize_":2,"rightNodeSize_":2},"leftChild_":{"id_":6,"sum_":-0.666666,"sumSq_":0.222222,"size_":2,"maxDepth_":0,"value_":-0.333333,"nodeType_":"REGRESSION_LEAF","prediction_":-0.000000},"rightChild_":{"id_":7,"sum_":-0.666666,"sumSq_":0.222222,"size_":2,"maxDepth_":0,"value_":-0.333333,"nodeType_":"REGRESSION_LEAF","prediction_":-0.000000}}}
5 0 1 1 1 0 {"id_":1,"sum_":0.000000,"sumSq_":1.333333,"size_":6,"maxDepth_":3,"nodeType_":"REGRESSION_NODE","split_":{"splitValue_":3600.000000,"attr_":"rent","type_":"REGRESSION_NUMERIC_SPLIT","score_":0.333333,"scoreImprove_":0.333333,"leftNodeSize_":2,"rightNodeSize_":4},"leftChild_":{"id_":2,"sum_":-0.666667,"sumSq_":0.222222,"size_":2,"maxDepth_":0,"value_":-0.333333,"nodeType_":"REGRESSION_LEAF","prediction_":-0.000000},"rightChild_":{"id_":3,"sum_":0.666667,"sumSq_":1.111111,"size_":4,"maxDepth_":2,"nodeType_":"REGRESSION_NODE","split_":{"splitValue_":8750.000000,"attr_":"rent","type_":"REGRESSION_NUMERIC_SPLIT","score_":1.000000,"scoreImprove_":1.000000,"leftNodeSize_":2,"rightNodeSize_":2},"leftChild_":{"id_":6,"sum_":1.333333,"sumSq_":0.888889,"size_":2,"maxDepth_":0,"value_":0.666667,"nodeType_":"REGRESSION_LEAF","prediction_":0.000001},"rightChild_":{"id_":7,"sum_":-0.666667,"sumSq_":0.222222,"size_":2,"maxDepth_":0,"value_":-0.333333,"nodeType_":"REGRESSION_LEAF","prediction_":-0.000000}}}
6 0 1 1 0 0 {"id_":1,"sum_":0.000000,"sumSq_":1.333333,"size_":6,"maxDepth_":3,"nodeType_":"REGRESSION_NODE","split_":{"splitValue_":3600.000000,"attr_":"rent","type_":"REGRESSION_NUMERIC_SPLIT","score_":1.333333,"scoreImprove_":1.333333,"leftNodeSize_":2,"rightNodeSize_":4},"leftChild_":{"id_":2,"sum_":1.333333,"sumSq_":0.888889,"size_":2,"maxDepth_":0,"value_":0.666667,"nodeType_":"REGRESSION_LEAF","prediction_":0.000001},"rightChild_":{"id_":3,"sum_":-1.333333,"sumSq_":0.444444,"size_":4,"maxDepth_":2,"nodeType_":"REGRESSION_NODE","split_":{"splitValue_":12500.000000,"attr_":"rent","type_":"REGRESSION_NUMERIC_SPLIT","score_":0.000000,"scoreImprove_":0.000000,"leftNodeSize_":3,"rightNodeSize_":1},"leftChild_":{"id_":6,"sum_":-1.000000,"sumSq_":0.333333,"size_":3,"maxDepth_":1,"nodeType_":"REGRESSION_NODE","split_":{"splitValue_":8750.000000,"attr_":"rent","type_":"REGRESSION_NUMERIC_SPLIT","score_":0.000000,"scoreImprove_":0.000000,"leftNodeSize_":2,"rightNodeSize_":1},"leftChild_":{"id_":12,"sum_":-0.666667,"sumSq_":0.222222,"size_":2,"maxDepth_":0,"value_":-0.333333,"nodeType_":"REGRESSION_LEAF","prediction_":-0.000000},"rightChild_":{"id_":13,"sum_":-0.333333,"sumSq_":0.111111,"size_":1,"maxDepth_":0,"value_":-0.333333,"nodeType_":"REGRESSION_LEAF","prediction_":-0.000000}},"rightChild_":{"id_":7,"sum_":-0.333333,"sumSq_":0.111111,"size_":1,"maxDepth_":0,"value_":-0.333333,"nodeType_":"REGRESSION_LEAF","prediction_":-0.000000}}}

The following sections show the input table, SQL query and output table when testing the classification model using TD_XGBoostPredict.

Input Table: xgboostTest

  id rent lotsize bedrooms bathrooms driveway
0 3 1250 550 1 1 1
1 4 5500 3550 3 1 0
2 1 12500 4000 3 2 1
3 2 18000 5550 8 6 0

SQL Function Call: TD_XGBoostPredict Query

SELECT * FROM TD_XGBoostPredict(
    ON test_data AS inputtable partition by ANY
    ON xgboost_classification_model AS modeltable dimension
    USING
      IdColumn('id')
      ModelType('Classification')
      OutputProb('t')
      Responses('1','2','3')
) AS dt;

Output Table

  id Prediction Prob_1 Prob_2 Prob_3
0 3 1 0.33333377777792600 0.3333331111110370 0.3333331111110370
1 4 2 0.3333331111110370 0.33333377777792600 0.3333331111110370
2 1 3 0.3333331111110370 0.3333331111110370 0.33333377777792600
3 2 3 0.3333331111110370 0.3333331111110370 0.33333377777792600