TD_GLM Examples | GLM | Teradata Vantage - Examples: How to Use TD_GLM - Teradata Vantage

Teradata® VantageCloud Lake

Deployment
VantageCloud
Edition
Lake
Product
Teradata Vantage
Published
January 2023
Language
English (United States)
Last Update
2024-04-03
dita:mapPath
phg1621910019905.ditamap
dita:ditavalPath
pny1626732985837.ditaval
dita:id
phg1621910019905

Input Table

Example: TD_GLM Using Credit Data Set

The following credit data set is used in this example:

ID A1 A2 A7 A10 A13 A14 A0_b A0_a A3_Y A3_u A4_p A4_g A5_k A5_cc A5_d A5_c A5_aa A5_m A5_q A5_w A5_e A5_ff A5_j A5_x A5_i A6_v A6_h A6_bb A6_z A6_ff A6_j A8_t A8_f A9_t A9_f A11_t A11_f A12_g A12_s Outcome
61 0.218228 2.17724 0.142986 1.25309 0.238997 -0.130455 1 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 1 1 0 1
297 -0.453325 -0.0247979 -0.404925 -0.77231 0.886307 -0.225685 0 1 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0
631 -1.13175 -0.77234 -0.199458 -0.547266 -0.761392-0.643699 -0.225817 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 1 0 1 0 0
122 -0.657541 1.20223 0.00600835 0.802998 -0.54366 -0.120063 1 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 1 0 1 0 1
... ... ... ... ... ... ... ... ... ... ... .. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...

TD_GLM Call for Credit Data

CREATE VOLATILE TABLE td_glm_output_credit_ex AS (
    SELECT * FROM td_glm (
        ON credit_ex_merged
        USING
        InputColumns('a1', 'a2', 'a7', 'a10', 'a13', 'a14', 'a0_b', 'a0_a', 'a3_y', 'a3_u', 'a4_p', 'a4_g', 'a5_k', 'a5_cc', 'a5_d', 'a5_c', 'a5_aa', 'a5_m', 'a5_q', 'a5_w', 'a5_e', 'a5_ff', 'a5_j', 'a5_x', 'a5_i', 'a6_v', 'a6_h', 'a6_bb', 'a6_z', 'a6_ff', 'a6_j', 'a8_t', 'a8_f', 'a9_t', 'a9_f', 'a11_t', 'a11_f', 'a12_g', 'a12_s')
        ResponseColumn('Outcome')
        Family('Binomial')
        BatchSize(10)
        MaxIterNum(300)
        RegularizationLambda(0.02)
        Alpha(0.15)
        IterNumNoChange(50)
        Tolerance(0.001)
        Intercept('true')
        LearningRate('optimal')
        InitialEta(0.001)
        Momentum(0.0)
        LocalSGDIterations(0)
    ) AS dt
) WITH DATA
ON COMMIT PRESERVE ROWS
;

TD_GLM Output for Credit Data

Attribute Predictor Estimate Value
-13 LocaleSGD Iterations 0  
-12 Nesterov   FALSE
-11 Momentum 0  
-10 Learning Rate (Final) 0.287682  
-9 Learning Rate (Initial) 0.001  
-8 Number of Iterations 156 CONVERGED
-7 Alpha 0.15 Elasticnet
-6 Regularization 0.02 ENABLED
-5 BIC 151.787  
-4 AIC 80.4189  
-3 Number of Observations 44  
-2 Loglik -0.209461  
-1 Loss Function   LOG
0 (Intercept) 0.146566  
1 A1 0.732289  
2 A2 0  
3 A7 0.717899  
4 A10 0.682358  
5 A13 0.822302  
6 A14 0.176791  
7 A0_b 0.172178  
8 A0_a -0.12165  
9 A3_y -0.285135  
10 A3_u 0.335663  
11 A4_p -0.285135  
12 A4_g 0.335663  
13 A5_k 0.0358046  
14 A5_cc 0  
15 A5_d 0.0480538  
16 A5_c 0.430725  
17 A5_aa 0  
18 A5_m -0.332389  
19 A5_q 0.524153  
20 A5_w -0.599829  
21 A5_e 0.0257252  
22 A5_ff -0.359011  
23 A5_j -0.119432  
24 A5_x 0.0494795  
25 A5_i 0  
26 A6_v -0.387493  
27 A6_h 0.0415697  
28 A6_bb 0.0604108  
29 A6_z 0  
30 A6_ff -0.372217  
31 A6_j 0.538139  
32 A8_t 0.9259  
33 A8-f -0.875372  
34 A9_t 0  
35 A9_f 0  
36 A11_t 0.197957  
37 A11_f -0.146928  
38 A12_g -0.221414  
39 A12_s 0.272506  

Example: TD_GLM Using Housing Data

This example takes raw housing data, and does the following:
  1. Uses TD_ScaleFit to standardize the data.
  2. Uses TD_ScaleTransform to transform the data.
  3. Uses TD_GLM to get a model.

Raw Housing Data

ID MecInc HouseAge AveRoom AveBedrms Population AveOccup Latitude Longitude MedHouseVal
2833 1.3527 30 2.24754 0.742574 169 1.67327 35.39 -119.02 0.6
5328 2.7679 23 3.03868 1.06446 2031 1.63658 34.04 -118.45 2.775
5300 1.583 19 3.14815 1.04548 3751 2.4373 34.07 -118.45 3.5
12433 1.7344 24 3.2984 1.05856 4042 4.4663 33.51 -116.01 0.664
... ... ... ... ... ... ... ... ... ...

TD_ScaleFit Call for Housing Data

SELECT * FROM TD_ScaleFit(
    ON cal_housing_ex_raw AS InputTable
    OUT VOLATILE TABLE OutputTable(scaleFitOut_cal_ex)
    USING
    TargetColumns('medinc', 'houseage', 'averooms', 'avebedrms', 'population', 'aveoccup', 'latitude', 'longitude')
    ScaleMethod('STD')
) AS dt2;

TD_ScaleFit Output for Housing Data

TD_STATTYPE_SCLFIT MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude Longitude
min 1.0472 7 2.24752 0.742574 47 1.63658 32.64 -123.35
max 10.7721 52 8.89305 2.56522 4145 5.45536 40.99 -116.01
sum 261.401 2134 349.412 74.0563 91566 200.592 2459.39 -8256.05
count 69 69 69 69 69 69 69 69
null 0 0 0 0 0 0 0 0
avg 3.78842 30.9275 1.07328 1.07328 1327.04 2.90714 2.90714 -119.653
variance 5.40242 142.803 1.55253 0.0458083 861633 0.618171 0.618171 4.73921
std 2.30741 11.8631 1.23694 0.212472 921.491 0.780521 0.780521 2.16114
ustd 2.32431 11.95 1.246 0.214029 928.242 0.786239 0.786239 2.17697
multiplier 1 1 1 1 1 1 1 1
intercept 0 0 0 0 0 0 0 0
location 3.78842 30.9275 5.06394 1.07328 1327.04 2.90714 2.90714 -119.653
scale 2.30741 11.8631 1.23694 0.212472 921.491 0.780521 0.780521 2.16114
globalscale_false nan nan nan nan nan nan nan nan
ScaleMethodNumberMapping: [0:mean, 1:sum, 2:ustd, 3:std, 4:range, 5:midrange,6:maxabs, 7:rescale] 3 3 3 3 3 3 3 3
missvalue_keep nan nan nan nan nan nan nan nan

TD_ScaleTransform Call for Housing Data

CREATE MULTISET TABLE cal_housing_ex_scaled AS (
    SELECT * FROM TD_ScaleTransform(
        ON cal_housing_ex_raw AS InputTable
        ON scaleFitOut_cal_ex AS FitTable DIMENSION
        USING
        accumulate('id', 'MedHouseVal')
    ) AS dt1
) WITH data;

TD_ScaleTransform Output for Housing Data

ID MedHouseVal MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude Longitude
244 1.117 -0.605796 1.10194 -0.160367 0.426688 1.02221 1.04102 0.946201 -1.18785
670 1.922 -0.00308521 0.427582 -0.129699 -0.530672 -0.761856 -0.21256 0.906345 -1.16008
686 1.578 -0.152084 -0.0781865 -0.625426 -0.513581 -0.685892 -0.533101 1.01705 -1.14157
1754 1.651 -0.0263147 0.596172 0.454207 -0.0272726 0.0683203 -0.0827654 1.01705 -1.23412
... ... ... ... ... ... ... ... ... ...

TD_GLM Call for Housing Data

CREATE VOLATILE TABLE td_glm_cal_ex AS (
    SELECT * from TD_GLM (
        ON cal_housing_ex_scaled
        USING
        InputColumns('medinc', 'houseage', 'averooms', 'avebedrms', 'population', 'aveoccup', 'latitude', 'longitude')
        ResponseColumn('MedHouseVal')
        Family('Gaussian')
        BatchSize(10)
        MaxIterNum(300)
        RegularizationLambda(0.02)
        Alpha(0.15)
        IterNumNoChange(50)
        Intercept('true')
        LearningRate('invtime')
        InitialEta(0.05)
        Momentum(0)
        Nesterov('false')
        LocalSGDIterations(0)
    ) AS dt
) WITH DATA
ON COMMIT PRESERVE ROWS;

TD_GLM Output for Housing Data

Attribute Predictor Estimate Value
-13 LocalSGD Iterations 0  
-12 Nesterov    
-11 Momentum 0  
-10 Learning Rate (Final) 0.0133974  
-9 Learning Rate (Initial) 0.05  
-8 Number of Iterations 194 CONVERGED
-7 Alpha 0.15 Elasticnet
-6 Regularization 0.02 ENABLE
-5 BIC -67.6236  
-4 AIC -87.7305  
-3 Number of Observations 69  
-2 MSE 0.216033  
-1 Loss Function   SQUARED_ERROR
0 (Intercept) 2.07174  
1 MedInc 0.782883  
2 HouseAge 0.231914  
3 AveRooms 0.0619822  
4 AveBedrms -0.113656  
5 Population 0.211336  
6 AveOccup -0.388201  
7 Latitude -0.195511  
8 Longitude -0.193884