TD_OneHotEncodingFit Example | OneHotEncodingFit - Example: How to Use TD_OneHotEncodingFit - Analytics Database

Database Analytic Functions

Deployment
VantageCloud
VantageCore
Edition
Enterprise
IntelliFlex
VMware
Product
Analytics Database
Release Number
17.20
Published
June 2022
Language
English (United States)
Last Update
2024-04-06
dita:mapPath
gjn1627595495337.ditamap
dita:ditavalPath
ayr1485454803741.ditaval
dita:id
jmh1512506877710
Product Category
Teradata Vantageā„¢
Multiple column support for TD_OrdinalEncoding, TD_OneHotEncoding, and TD_Histogram is available in release 17.20.03.07 and later. If you are using an older version, the TargetColumn argument accepts only one column.

TD_OneHotEncodingFit Input

Every complete example in this document is available in a zip file that you can download. The zip file includes a SQL script file that creates the input tables for the examples. If you are reading this document on https://docs.teradata.com/, you can download the zip file from the attachment in the left sidebar.

TD_OneHotEncodingFit Dense InputTable: onehot_titanic_dataset

Input Data:

Passenger_id Survived Pclass Name Age Gender City Cabin
5 1 D Mr. John Doe 27 male Del a
4 0 B Mrs. Jacques Heath 25 female Hyd c
6 1 E Mr. Ben Tennison 22 male Hyd b
3 1 C Mrs. Laina 26 female Pune b
1 0 A Mr. Owen Harris 22 male Pune a
2 1 B Mrs. John Bradley 38 female Hyd a
DROP TABLE onehot_titanic_dataset;
CREATE SET TABLE onehot_titanic_dataset(
 Passenger_id INTEGER,
 Survived INTEGER,
 Pclass VARCHAR(5) CHARACTER SET LATIN NOT CASESPECIFIC,
 Name VARCHAR(20) CHARACTER SET LATIN NOT CASESPECIFIC,
 Age INTEGER,
 Gender VARCHAR(10) CHARACTER SET LATIN NOT CASESPECIFIC,
 City VARCHAR(5) CHARACTER SET LATIN NOT CASESPECIFIC,
 Cabin VARCHAR(5) CHARACTER SET LATIN NOT CASESPECIFIC
);
-- Dense Input set.
INSERT INTO onehot_titanic_dataset VALUES (1, 0, 'A', 'Mr. Owen Harris', 22, 'male','Pune','a');
INSERT INTO onehot_titanic_dataset VALUES (2, 1, 'B', 'Mrs. John Bradley', 38, 'female','Hyd','a');
INSERT INTO onehot_titanic_dataset VALUES (3, 1, 'C', 'Mrs. Laina', 26, 'female','Pune','b');
INSERT INTO onehot_titanic_dataset VALUES (4, 0, 'B', 'Mrs. Jacques Heath', 25, 'female','Hyd','c');
INSERT INTO onehot_titanic_dataset VALUES (5, 1, 'D', 'Mr. John Doe', 27, 'male','Del','a');
INSERT INTO onehot_titanic_dataset VALUES (6, 1, 'E', 'Mr. Ben Tennision', 22, 'male','Hyd','b');

TD_OneHotEncodingFit SQL Call: Dense Input [Auto Approach]

SELECT * FROM TD_OneHotEncodingFit(
ON onehot_titanic_dataset AS INPUTTABLE
USING
TargetColumn('Gender','Cabin','City')
OtherColumnName('other')
IsInputDense('true')
CategoryCounts(2,3,3)
Approach('Auto')
) AS dt;

TD_OneHotEncodingFit Output Table: Dense Input [Auto Approach]

Gender Gender_0 Gender_1 Gender_other Cabin Cabin_0 Cabin_1 Cabin_2 Cabin_other City City_0 City_1 City_2 City_other
                    Del Hyd Pune  
  female male                      
          a b c            

TD_OneHotEncodingFit CategoryTable: categoryTable

CREATE TABLE categoryTable (column_name VARCHAR(20) CHARACTER SET Latin NOT CASESPECIFIC,
 category VARCHAR(20) CHARACTER SET Latin NOT CASESPECIFIC);
 
INSERT INTO categoryTable values('Gender','Male');
INSERT INTO categoryTable values('Gender','Female');
INSERT INTO categoryTable values('Cabin','a');
INSERT INTO categoryTable values('Cabin','b');
INSERT INTO categoryTable values('Cabin','c');
INSERT INTO categoryTable values('City','Del');
INSERT INTO categoryTable values('City','Hyd');

TD_OneHotEncodingFit SQL Call: Dense Input [List Approach]

SELECT * FROM TD_OneHotEncodingFit(
ON onehot_titanic_dataset AS INPUTTABLE
ON categoryTable AS categoryTable Dimension
USING
TargetColumn('Gender','Cabin','City')
CategoryCounts(2,3,2)
TargetColumnNames ('column_name')
CategoriesColumn ('category')
OtherColumnName('other')
IsInputDense('true')
Approach('List')
) AS dt ;

TD_OneHotEncodingFit Output Table: Dense Input [List Approach]

Gender Gender_0 Gender_1 Gender_other Cabin Cabin_0 Cabin_1 Cabin_2 Cabin_other City City_0 City_1 City_2 City_other
                    Del Hyd    
  female male                      
          a b c            

TD_OneHotEncodingFit Sparse InputTable: onehot_sparse_input

DROP TABLE onehot_sparse_input;
 
CREATE TABLE onehot_sparse_input (id INTEGER, attribute_column VARCHAR(20), value_column VARCHAR(20));
 
INSERT INTO onehot_sparse_input VALUES (1, 'Survived', 0);
INSERT INTO onehot_sparse_input VALUES (2, 'Survived', 1);
INSERT INTO onehot_sparse_input VALUES (3, 'Survived', 1);
INSERT INTO onehot_sparse_input VALUES (1, 'Pclass', 'A');
INSERT INTO onehot_sparse_input VALUES (2, 'Pclass', 'B');
INSERT INTO onehot_sparse_input VALUES (3, 'Pclass', 'C');
INSERT INTO onehot_sparse_input VALUES (1, 'Name', 'Mr. Owen Harris');
INSERT INTO onehot_sparse_input VALUES (2, 'Name', 'Mrs. John Bradley');
INSERT INTO onehot_sparse_input VALUES (3, 'Name', 'Mrs. Laina');
INSERT INTO onehot_sparse_input VALUES (1, 'Age', 22);
INSERT INTO onehot_sparse_input VALUES (2, 'Age', 38);
INSERT INTO onehot_sparse_input VALUES (3, 'Age', 26);
INSERT INTO onehot_sparse_input VALUES (1, 'Gender', 'male');
INSERT INTO onehot_sparse_input VALUES (2, 'Gender', 'female');
INSERT INTO onehot_sparse_input VALUES (3, 'Gender', 'female');
INSERT INTO onehot_sparse_input VALUES (1, 'City', 'Del');
INSERT INTO onehot_sparse_input VALUES (2, 'City', 'Hyd');
INSERT INTO onehot_sparse_input VALUES (3, 'City', 'Pune');
INSERT INTO onehot_sparse_input VALUES (1, 'Cabin', 'a');
INSERT INTO onehot_sparse_input VALUES (2, 'Cabin', 'a');
INSERT INTO onehot_sparse_input VALUES (3, 'Cabin', 'b');

TD_OneHotEncodingFit SQL Call: Sparse Input

SELECT * FROM TD_OneHotEncodingFit(
ON onehot_sparse_input AS InputTable PARTITION BY attribute_column
USING
IsInputDense ('false')
TargetAttributes ('Gender','Cabin','City')
AttributeColumn ('attribute_column')
ValueColumn ('value_column')
) AS dt ORDER BY 1,2;

TD_OneHotEncodingFit Output Table: Sparse Input

attribute_column value_column TD_VALUE_TYPE_OHEFIT
Cabin a 0
Cabin v 0
City Del 0
City Hyd 0
City Pune 0
Gender female 0
Gender male 0