Multiple column support for TD_OrdinalEncoding, TD_OneHotEncoding, and TD_Histogram is available in release 17.20.03.07 and later. If you are using an older version, the TargetColumn argument accepts only one column.
TD_OneHotEncodingFit Input
Every complete example in this document is available in a zip file that you can download. The zip file includes a SQL script file that creates the input tables for the examples. If you are reading this document on https://docs.teradata.com/, you can download the zip file from the attachment in the left sidebar.
DROP TABLE onehot_titanic_dataset;
CREATE SET TABLE onehot_titanic_dataset(
Passenger_id INTEGER,
Survived INTEGER,
Pclass VARCHAR(5) CHARACTER SET LATIN NOT CASESPECIFIC,
Name VARCHAR(20) CHARACTER SET LATIN NOT CASESPECIFIC,
Age INTEGER,
Gender VARCHAR(10) CHARACTER SET LATIN NOT CASESPECIFIC,
City VARCHAR(5) CHARACTER SET LATIN NOT CASESPECIFIC,
Cabin VARCHAR(5) CHARACTER SET LATIN NOT CASESPECIFIC
);
-- Dense Input set.
INSERT INTO onehot_titanic_dataset VALUES (1, 0, 'A', 'Mr. Owen Harris', 22, 'male','Pune','a');
INSERT INTO onehot_titanic_dataset VALUES (2, 1, 'B', 'Mrs. John Bradley', 38, 'female','Hyd','a');
INSERT INTO onehot_titanic_dataset VALUES (3, 1, 'C', 'Mrs. Laina', 26, 'female','Pune','b');
INSERT INTO onehot_titanic_dataset VALUES (4, 0, 'B', 'Mrs. Jacques Heath', 25, 'female','Hyd','c');
INSERT INTO onehot_titanic_dataset VALUES (5, 1, 'D', 'Mr. John Doe', 27, 'male','Del','a');
INSERT INTO onehot_titanic_dataset VALUES (6, 1, 'E', 'Mr. Ben Tennision', 22, 'male','Hyd','b');
SELECT * FROM TD_OneHotEncodingFit(
ON onehot_titanic_dataset AS INPUTTABLE
USING
TargetColumn('Gender','Cabin','City')
OtherColumnName('other')
IsInputDense('true')
CategoryCounts(2,3,3)
Approach('Auto')
) AS dt;
CREATE TABLE categoryTable (column_name VARCHAR(20) CHARACTER SET Latin NOT CASESPECIFIC,
category VARCHAR(20) CHARACTER SET Latin NOT CASESPECIFIC);
INSERT INTO categoryTable values('Gender','Male');
INSERT INTO categoryTable values('Gender','Female');
INSERT INTO categoryTable values('Cabin','a');
INSERT INTO categoryTable values('Cabin','b');
INSERT INTO categoryTable values('Cabin','c');
INSERT INTO categoryTable values('City','Del');
INSERT INTO categoryTable values('City','Hyd');
SELECT * FROM TD_OneHotEncodingFit(
ON onehot_titanic_dataset AS INPUTTABLE
ON categoryTable AS categoryTable Dimension
USING
TargetColumn('Gender','Cabin','City')
CategoryCounts(2,3,2)
TargetColumnNames ('column_name')
CategoriesColumn ('category')
OtherColumnName('other')
IsInputDense('true')
Approach('List')
) AS dt ;
DROP TABLE onehot_sparse_input;
CREATE TABLE onehot_sparse_input (id INTEGER, attribute_column VARCHAR(20), value_column VARCHAR(20));
INSERT INTO onehot_sparse_input VALUES (1, 'Survived', 0);
INSERT INTO onehot_sparse_input VALUES (2, 'Survived', 1);
INSERT INTO onehot_sparse_input VALUES (3, 'Survived', 1);
INSERT INTO onehot_sparse_input VALUES (1, 'Pclass', 'A');
INSERT INTO onehot_sparse_input VALUES (2, 'Pclass', 'B');
INSERT INTO onehot_sparse_input VALUES (3, 'Pclass', 'C');
INSERT INTO onehot_sparse_input VALUES (1, 'Name', 'Mr. Owen Harris');
INSERT INTO onehot_sparse_input VALUES (2, 'Name', 'Mrs. John Bradley');
INSERT INTO onehot_sparse_input VALUES (3, 'Name', 'Mrs. Laina');
INSERT INTO onehot_sparse_input VALUES (1, 'Age', 22);
INSERT INTO onehot_sparse_input VALUES (2, 'Age', 38);
INSERT INTO onehot_sparse_input VALUES (3, 'Age', 26);
INSERT INTO onehot_sparse_input VALUES (1, 'Gender', 'male');
INSERT INTO onehot_sparse_input VALUES (2, 'Gender', 'female');
INSERT INTO onehot_sparse_input VALUES (3, 'Gender', 'female');
INSERT INTO onehot_sparse_input VALUES (1, 'City', 'Del');
INSERT INTO onehot_sparse_input VALUES (2, 'City', 'Hyd');
INSERT INTO onehot_sparse_input VALUES (3, 'City', 'Pune');
INSERT INTO onehot_sparse_input VALUES (1, 'Cabin', 'a');
INSERT INTO onehot_sparse_input VALUES (2, 'Cabin', 'a');
INSERT INTO onehot_sparse_input VALUES (3, 'Cabin', 'b');
TD_OneHotEncodingFit SQL Call: Sparse Input
SELECT * FROM TD_OneHotEncodingFit(
ON onehot_sparse_input AS InputTable PARTITION BY attribute_column
USING
IsInputDense ('false')
TargetAttributes ('Gender','Cabin','City')
AttributeColumn ('attribute_column')
ValueColumn ('value_column')
) AS dt ORDER BY 1,2;