TD_TargetEncodingTransform Examples - Analytics Database

Database Analytic Functions

Deployment
VantageCloud
VantageCore
Edition
Enterprise
IntelliFlex
VMware
Product
Analytics Database
Release Number
17.20
Published
June 2022
Language
English (United States)
Last Update
2024-04-06
dita:mapPath
gjn1627595495337.ditamap
dita:ditavalPath
ayr1485454803741.ditaval
dita:id
jmh1512506877710
Product Category
Teradata Vantageā„¢

TD_TargetEncodingTransform Example

InputTable: The following table contains a subset of titantic dataset.

CREATE TABLE titanic_train (
   passenger INTEGER,
   survived INTEGER,
   pclass INTEGER,
   name VARCHAR(90) CHARACTER SET LATIN NOT CASESPECIFIC,
   gender VARCHAR(10) CHARACTER SET LATIN NOT CASESPECIFIC,
   age INTEGER,
   sibsp INTEGER,
   parch INTEGER,
   ticket VARCHAR(20) CHARACTER SET LATIN NOT CASESPECIFIC,
   fare FLOAT,
   cabin VARCHAR(20) CHARACTER SET LATIN NOT CASESPECIFIC,
   embarked VARCHAR(10) CHARACTER SET LATIN NOT CASESPECIFIC)
PRIMARY INDEX ( passenger );
 
INSERT INTO titanic_train (2, 1, 1, 'Cumings; Mrs. John Bradley (Florence Briggs Thayer)', 'female', 38, 1, 0, 'PC 17599', 71.2833, 'C85', 'C');
INSERT INTO titanic_train (4, 1, 1, 'Futrelle; Mrs. Jacques Heath (Lily May Peel)', 'female', 35, 1, 0, '113803', 53.1, 'C123', 'S');
INSERT INTO titanic_train (7, 0, 1, 'McCarthy; Mr. Timothy J', 'male', 54, 0, 0, '17463', 51.8625, 'E46', 'S');
INSERT INTO titanic_train (10, 1, 2, 'Nasser; Mrs. Nicholas (Adele Achem)', 'female', 14, 1, 0, '237736', 30.0708, '', 'C');
INSERT INTO titanic_train (16, 1, 2, 'Hewlett; Mrs. (Mary D Kingcome) ', 'female', 55, 0, 0, '248706', 16, '', 'S');
INSERT INTO titanic_train (21, 0, 2, 'Fynney; Mr. Joseph J', 'male', 35, 0, 0, '239865', 26, '', 'S');
INSERT INTO titanic_train (40, 1, 3, 'Nicola-Yarred; Miss. Jamila', 'female', 14, 1, 0, '2651', 11.2417, '', 'C');
INSERT INTO titanic_train (61, 0, 3, 'Sirayanian; Mr. Orsen', 'male', 22, 0, 0, '2669', 7.2292, '', 'C');
INSERT INTO titanic_train (80, 1, 3, 'Dowdell; Miss. Elizabeth', 'female', 30, 0, 0, '364516', 12.475, '', 'S
SELECT * FROM titanic_train ORDER BY 1;

Input table:

passenger survived pclass name gender age sibsp parch ticket fare cabin embarked
2 1 1 Cumings; Mrs John Bradley (Florence Briggs Thayer) female 38 1 0 PC 17599 71.2833 CBS C
4 1 1 Futrell; Mrs. Jacques Heath (Lily May Peel) female 35 1 0 113803 53.1 C123 S
7 0 1 McCarthy; Mr. Timothy J male 54 0 0 117463 51.8625 E46 S
10 1 2 Nasser; Mrs. Nicholas (Adele Achem) female 14 1 0 237736 30.0707   C
16 1 2 Hewlett; Mrs. (Mary D Kingcome) female 55 0 0 248706 16   S
21 0 2 Fynney; Mr. Joseph J male 35 0 0 239865 26   S
40 1 3 Nicola-Yarred; Miss. Jamila female 14 1 0 2651 11.2417   C
61 0 3 Sirayanian; Mr. Orsen male 22 0 0 2669 7.2292   C
80 1 3 Dowdell; Miss. Elizabeth female 30 0 0 364516 12.475   S

TD_TargetEncodingTransform Query with EncoderMethod CBM_BETA

SELECT * FROM TD_TargetEncodingFit (
   ON titanic_train AS InputTable
   ON categoryTable AS CategoryTable DIMENSION
   OUT PERMANENT TABLE OutputTable(betaEncFitTbl)    
   USING    
   EncoderMethod('CBM_BETA')
   TargetColumns('gender','embarked')
   ResponseColumn ('survived')
   DefaultValues(-1, -2)
) AS dt;

CBM_BETA FitTable:

TD_ColumnName_TEFIT TD_Category_TEFIT TD_Alpha_TEFIT TD_Beta_TEFIT TD_DefaultCategory_TEFIT TD_CategoryCount_TEFIT TD_ColumnIndex_TEFIT TD_EncoderMethod_TEFIT_CBM_BETA gender embarked
embarked C 3.666667 1.333333 -2 2 1 ? ? ?
embarked S 3.666667 2.333333 -2 2 1 ? ? ?
gender female 6.666667 0.333333 -1 2 0 ? ? ?
gender male 0.666667 3.333333 -1 2 0 ? ? ?

Transform query:

SELECT * FROM TD_TargetEncodingTransform (
   ON titanic_train AS InputTable
   ON betaEncFitTbl AS FitTable DIMENSION
   USING
   Accumulate('passenger')
) AS dt ORDER BY passenger;

Transform output:

gender embarked passenger
0.952381 0.733333 2
0.952381 0.611111 4
0.166667 0.611111 7
0.952381 0.733333 10
0.952381 0.611111 16
0.166667 0.611111 21
0.952381 0.733333 40
0.166667 0.733333 61
0.952381 0.611111 80

TD_TargetEncodingFit Query with EncoderMethod CBM_DIRICHLET

SELECT * FROM TD_TargetEncodingFit (
   ON titanic_train AS InputTable
   ON categoryTable AS CategoryTable DIMENSION
   OUT PERMANENT TABLE OutputTable(dirichletEncFitTbl)    
   USING    
   EncoderMethod('CBM_DIRICHLET')
   TargetColumns('gender','embarked')
   ResponseColumn ('pclass')
   DefaultValues(-1, -2)
   NumDistinctResponses(3)
) AS dt;

CBM_DIRICHLET FitTable:

TD_ColumnName_TEFIT TD_Category_TEFIT TD_Alpha1_TEFIT_gender TD_Alpha2_TEFIT_gender TD_Alpha3_TEFIT_gender TD_Alpha1_TEFIT_embarked TD_Alpha2_TEFIT_embarked TD_Alpha3_TEFIT_embarked TD_DefaultCategory_TEFIT TD_CategoryCount_TEFIT TD_ColumnIndex_TEFIT TD_EncoderMethod_TEFIT_CBM_DIRICHLET TD_NumResponses_TEFIT_3
embarked C ? ? ? 1.333333 1.333333 2.333333 -2 2 1 ? ?
embarked S ? ? ? 2.333333 2.333333 1.333333 -2 2 1 ? ?
gender female 2.333333 2.333333 2.333333 ? ? ? -1 2 0 ? ?
gender male 1.333333 1.333333 1.333333 ? ? ? -1 2 0 ? ?

Transform query:

SELECT * FROM TD_TargetEncodingTransform (
   ON titanic_train AS InputTable
   ON dirichletEncFitTbl AS FitTable DIMENSION
   USING
   Accumulate('passenger')
) AS dt ORDER BY passenger;

Transform output:

gender_1 gender_2 gender_3 embarked_1 embarked_2 embarked_3 passenger
0.333333 0.333333 0.333333 0.266667 0.266667 0.466667 2
0.333333 0.333333 0.333333 0.388889 0.388889 0.222222 4
0.333333 0.333333 0.333333 0.388889 0.388889 0.222222 7
0.333333 0.333333 0.333333 0.266667 0.266667 0.466667 10
0.333333 0.333333 0.333333 0.388889 0.388889 0.222222 16
0.333333 0.333333 0.333333 0.388889 0.388889 0.222222 21
0.333333 0.333333 0.333333 0.266667 0.266667 0.466667 40
0.333333 0.333333 0.333333 0.266667 0.266667 0.466667 61
0.333333 0.333333 0.333333 0.388889 0.388889 0.222222 80

TD_TargetEncodingTransform Query with EncoderMethod CBM_GAUSSIAN_INVERSE_GAMMA

SELECT * FROM TD_TargetEncodingFit (
   ON titanic_train AS InputTable
   ON categoryTable AS CategoryTable DIMENSION
   OUT PERMANENT TABLE OutputTable(GIGEncFitTbl)    
   USING    
   EncoderMethod('CBM_GAUSSIAN_INVERSE_GAMMA')
   TargetColumns('gender','embarked')
   ResponseColumn ('age')
   DefaultValues(-1, -2)
) AS dt;

Output:

TD_ColumnName_TEFIT TD_Category_TEFIT TD_U0_TEFIT TD_V0_TEFIT TD_Alpha0_TEFIT TD_Beta0_Count TD_DefaultCategory_TEFIT TD_CategoryCount_TEFIT TD_Columnindex_TEFIT TD_EncoderMehod_TEFIT_CBM_GAUSSIAN_INVERSE_GAMMA gender embarked
embarked C 17.600000 5.000000 4.000000 258.600000 -2 2 1 ? ? ?
embarked S 34.833333 6.000000 4.500000 798.366667 -2 2 1 ? ? ?
gender female 26.571429 7.000000 5.000000 534.857143 -1 2 0 ? ? ?
gender male 27.750000 4.000000 3.500000 643.875000 -1 2 0 ? ? ?

Transform query:

SELECT * FROM TD_TargetEncodingTransform (
   ON titanic_train AS InputTable
   ON GIGEncFitTbl AS FitTable DIMENSION
   USING
   Accumulate('passenger')
) AS dt ORDER BY passenger;

Transform output:

gender embarked passenger
26.57143 17.6 2
26.57143 34.83333 4
27.75 34.83333 7
26.57143 17.6 10
26.57143 34.83333 16
27.75 34.83333 21
26.57143 17.6 40
27.75 17.6 61
26.57143 34.83333 80