TD_OneHotEncodingTransform Example | OneHotEncodingTransform - Example: How to Use TD_OneHotEncodingTransform - Analytics Database

Database Analytic Functions

Deployment
VantageCloud
VantageCore
Edition
Enterprise
IntelliFlex
VMware
Product
Analytics Database
Release Number
17.20
Published
June 2022
Language
English (United States)
Last Update
2024-04-06
dita:mapPath
gjn1627595495337.ditamap
dita:ditavalPath
ayr1485454803741.ditaval
dita:id
jmh1512506877710
Product Category
Teradata Vantageā„¢
Multiple column support for TD_OrdinalEncoding, TD_OneHotEncoding, and TD_Histogram is available in release 17.20.03.07 and later. If you are using an older version, the TargetColumn argument accepts only one column.

Dense InputTable: onehot_titanic_dataset

Passenger_id Survived Pclass Name Age Gender City Cabin
5 1 D Mr. John Doe 27 male Del a
4 0 B Mrs. Jacques Heath 25 female Hyd c
6 1 E Mr. Ben Tennison 22 male Hyd b
3 1 C Mrs. Laina 26 female Pune b
1 0 A Mr. Owen Harris 22 male Pune a
2 1 B Mrs. John Bradley 38 female Hyd a

FitTable: onehot_titanic_fit_output

CREATE TABLE onehot_titanic_fit_output as (
SELECT * FROM TD_OneHotEncodingFit(
ON onehot_titanic_dataset AS INPUTTABLE
USING
TargetColumn('Gender','Cabin','City')
OtherColumnName('other')
IsInputDense('true')
CategoryCounts(2,3,3)
Approach('Auto')
) AS dt 
)with data;

TD_OneHotEncodingTransform Output Table: FitTable

Gender Gender_0 Gender_1 Gender_other Cabin Cabin_0 Cabin_1 Cabin_2 Cabin_other City City_0 City_1 City_2 City_other
  female male                      
          a b c            
                    Del Hyd Pune  

TD_OneHotEncodingTransform SQL Call: Dense Input

SELECT * FROM TD_OneHotEncodingTransform (
ON onehot_titanic_dataset AS InputTable
ON onehot_titanic_fit_output AS FitTable Dimension
USING
IsInputDense('True')
) AS dt ORDER BY 1;

TD_OneHotEncodingTransform Output Table: Dense Input

Passenger_id Survived Pclass Name Age Gender_0 Gender_1 Gender_other City_0 City_1 City_2 City_other Cabin_0 Cabin_1 Cabin_2 Cabin_other
1 0 A Mr. Owen Harris 22 0 1 0 0 0 1 0 1 0 0 0
2 1 B Mrs. John Bradley 38 1 0 0 0 1 0 0 1 0 0 0
3 1 C Mrs. Laina 26 1 0 0 0 0 1 0 0 1 0 0
4 0 B Mrs. Jacques Heath 25 1 0 0 0 1 0 0 0 0 1 0
5 1 D Mr. John Doe 27 0 1 0 1 0 0 0 1 0 0 0
6 1 E Mr. Ben Tennison 22 0 1 0 0 1 0 0 0 1 0 0

Sparse InputTable: onehot_sparse_input

DROP TABLE onehot_sparse_input;
CREATE TABLE onehot_sparse_input (id INTEGER, attribute_column VARCHAR(20), value_column VARCHAR(20));
INSERT INTO onehot_sparse_input VALUES (1, 'Survived', 0);
INSERT INTO onehot_sparse_input VALUES (2, 'Survived', 1);
INSERT INTO onehot_sparse_input VALUES (3, 'Survived', 1);
INSERT INTO onehot_sparse_input VALUES (1, 'Pclass', 'A');
INSERT INTO onehot_sparse_input VALUES (2, 'Pclass', 'B');
INSERT INTO onehot_sparse_input VALUES (3, 'Pclass', 'C');
INSERT INTO onehot_sparse_input VALUES (1, 'Name', 'Mr. Owen Harris');
INSERT INTO onehot_sparse_input VALUES (2, 'Name', 'Mrs. John Bradley');
INSERT INTO onehot_sparse_input VALUES (3, 'Name', 'Mrs. Laina');
INSERT INTO onehot_sparse_input VALUES (1, 'Age', 22);
INSERT INTO onehot_sparse_input VALUES (2, 'Age', 38);
INSERT INTO onehot_sparse_input VALUES (3, 'Age', 26);
INSERT INTO onehot_sparse_input VALUES (1, 'Gender', 'male');
INSERT INTO onehot_sparse_input VALUES (2, 'Gender', 'female');
INSERT INTO onehot_sparse_input VALUES (3, 'Gender', 'female');
INSERT INTO onehot_sparse_input VALUES (1, 'City', 'Del');
INSERT INTO onehot_sparse_input VALUES (2, 'City', 'Hyd');
INSERT INTO onehot_sparse_input VALUES (3, 'City', 'Pune');
INSERT INTO onehot_sparse_input VALUES (1, 'Cabin', 'a');
INSERT INTO onehot_sparse_input VALUES (2, 'Cabin', 'a');
INSERT INTO onehot_sparse_input VALUES (3, 'Cabin', 'b');

Input data: onehot_sparse_input

id attribute_column value_column
3 Survived 1
3 Pclass C
3 Name Mrs. Laina
3 Age 26
3 Gender female
3 City Pune
3 Cabin b
1 Survived 0
1 Pclass A
1 Name Mr. Owen Harris
1 Age 22
1 Gender male
1 City Del
1 Cabin a
2 Survived 1
2 Pclass B
2 Name Mrs. John Bradley
2 Age 38
2 Gender female
2 City Hyd
2 Cabin a

FitTable: onehot_sparse_fit

CREATE TABLE onehot_sparse_fit AS (
SELECT * FROM TD_OneHotEncodingFit (
 ON onehot_sparse_input AS InputTable PARTITION BY attribute_column
 USING
 IsInputDense ('false')
 TargetAttributes ('Gender','Cabin','City')
 AttributeColumn ('attribute_column')
 ValueColumn ('value_column')
) AS dt ) WITH DATA;

TD_OneHotEncodingTransform FitTable Output Table: onehot_sparse_fit

attribute_column value_column TD_VALUE_TYPE_OHEFIT
City Pune 0
Gender male 0
Cabin a 0
City Del 0
Gender female 0
Cabin b 0
City Hyd 0

TD_OneHotEncodingTransform SQL Call: Sparse Input

SELECT * FROM TD_OneHotEncodingTransform (
ON onehot_sparse_input AS InputTable PARTITION BY attribute_column
ON onehot_sparse_fit AS FitTable PARTITION BY attribute_column
USING
IsInputDense ('false')
) AS dt ORDER BY 1,2;

TD_OneHotEncodingTransform Output Table: Sparse Input

id attribute_column value_column
1 Age 22
1 Cabin_a 1
1 Cabin_b 0
1 Cabin_other 0
1 City_Del 1
1 City_Hyd 0
1 City_other 0
1 City_Pune 0
1 Name Mr. Owen Harris
1 Pclass A
1 Gender_female 0
1 Gender_male 1
1 Gender_other 0
1 Survived 0
2 Age 38
2 Cabin_a 1
2 Cabin_b 0
2 Cabin_other 0
2 City_Del 0
2 City_Hyd 1
2 City_other 0
2 City_Pune 0
2 Name Mrs. John Bradley
2 Pclass B
2 Gender_female 1
2 Gender_male 0
2 Gender_other 0
2 Survived 1
3 Age 26
3 Cabin_a 0
3 Cabin_b 1
3 Cabin_other 0
3 City_Del 0
3 City_Hyd 0
3 City_other 0
3 City_Pune 1
3 Name Mrs. Laina
3 Pclass C
3 Gender_female 1
3 Gender_male 0
3 Gender_other 0
3 Survived 1