Multiple column support for TD_OrdinalEncoding, TD_OneHotEncoding, and TD_Histogram is available in release 17.20.03.07 and later. If you are using an older version, the TargetColumn argument accepts only one column.
Dense InputTable: onehot_titanic_dataset
Passenger_id |
Survived |
Pclass |
Name |
Age |
Gender |
City |
Cabin |
5 |
1 |
D |
Mr. John Doe |
27 |
male |
Del |
a |
4 |
0 |
B |
Mrs. Jacques Heath |
25 |
female |
Hyd |
c |
6 |
1 |
E |
Mr. Ben Tennison |
22 |
male |
Hyd |
b |
3 |
1 |
C |
Mrs. Laina |
26 |
female |
Pune |
b |
1 |
0 |
A |
Mr. Owen Harris |
22 |
male |
Pune |
a |
2 |
1 |
B |
Mrs. John Bradley |
38 |
female |
Hyd |
a |
FitTable: onehot_titanic_fit_output
CREATE TABLE onehot_titanic_fit_output as (
SELECT * FROM TD_OneHotEncodingFit(
ON onehot_titanic_dataset AS INPUTTABLE
USING
TargetColumn('Gender','Cabin','City')
OtherColumnName('other')
IsInputDense('true')
CategoryCounts(2,3,3)
Approach('Auto')
) AS dt
)with data;
TD_OneHotEncodingTransform Output Table: FitTable
Gender |
Gender_0 |
Gender_1 |
Gender_other |
Cabin |
Cabin_0 |
Cabin_1 |
Cabin_2 |
Cabin_other |
City |
City_0 |
City_1 |
City_2 |
City_other |
|
female |
male |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
a |
b |
c |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Del |
Hyd |
Pune |
|
TD_OneHotEncodingTransform SQL Call: Dense Input
SELECT * FROM TD_OneHotEncodingTransform (
ON onehot_titanic_dataset AS InputTable
ON onehot_titanic_fit_output AS FitTable Dimension
USING
IsInputDense('True')
) AS dt ORDER BY 1;
TD_OneHotEncodingTransform Output Table: Dense Input
Passenger_id |
Survived |
Pclass |
Name |
Age |
Gender_0 |
Gender_1 |
Gender_other |
City_0 |
City_1 |
City_2 |
City_other |
Cabin_0 |
Cabin_1 |
Cabin_2 |
Cabin_other |
1 |
0 |
A |
Mr. Owen Harris |
22 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
1 |
0 |
0 |
0 |
2 |
1 |
B |
Mrs. John Bradley |
38 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
1 |
0 |
0 |
0 |
3 |
1 |
C |
Mrs. Laina |
26 |
1 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
1 |
0 |
0 |
4 |
0 |
B |
Mrs. Jacques Heath |
25 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
1 |
0 |
5 |
1 |
D |
Mr. John Doe |
27 |
0 |
1 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
6 |
1 |
E |
Mr. Ben Tennison |
22 |
0 |
1 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
Sparse InputTable: onehot_sparse_input
DROP TABLE onehot_sparse_input;
CREATE TABLE onehot_sparse_input (id INTEGER, attribute_column VARCHAR(20), value_column VARCHAR(20));
INSERT INTO onehot_sparse_input VALUES (1, 'Survived', 0);
INSERT INTO onehot_sparse_input VALUES (2, 'Survived', 1);
INSERT INTO onehot_sparse_input VALUES (3, 'Survived', 1);
INSERT INTO onehot_sparse_input VALUES (1, 'Pclass', 'A');
INSERT INTO onehot_sparse_input VALUES (2, 'Pclass', 'B');
INSERT INTO onehot_sparse_input VALUES (3, 'Pclass', 'C');
INSERT INTO onehot_sparse_input VALUES (1, 'Name', 'Mr. Owen Harris');
INSERT INTO onehot_sparse_input VALUES (2, 'Name', 'Mrs. John Bradley');
INSERT INTO onehot_sparse_input VALUES (3, 'Name', 'Mrs. Laina');
INSERT INTO onehot_sparse_input VALUES (1, 'Age', 22);
INSERT INTO onehot_sparse_input VALUES (2, 'Age', 38);
INSERT INTO onehot_sparse_input VALUES (3, 'Age', 26);
INSERT INTO onehot_sparse_input VALUES (1, 'Gender', 'male');
INSERT INTO onehot_sparse_input VALUES (2, 'Gender', 'female');
INSERT INTO onehot_sparse_input VALUES (3, 'Gender', 'female');
INSERT INTO onehot_sparse_input VALUES (1, 'City', 'Del');
INSERT INTO onehot_sparse_input VALUES (2, 'City', 'Hyd');
INSERT INTO onehot_sparse_input VALUES (3, 'City', 'Pune');
INSERT INTO onehot_sparse_input VALUES (1, 'Cabin', 'a');
INSERT INTO onehot_sparse_input VALUES (2, 'Cabin', 'a');
INSERT INTO onehot_sparse_input VALUES (3, 'Cabin', 'b');
Input data: onehot_sparse_input
id |
attribute_column |
value_column |
3 |
Survived |
1 |
3 |
Pclass |
C |
3 |
Name |
Mrs. Laina |
3 |
Age |
26 |
3 |
Gender |
female |
3 |
City |
Pune |
3 |
Cabin |
b |
1 |
Survived |
0 |
1 |
Pclass |
A |
1 |
Name |
Mr. Owen Harris |
1 |
Age |
22 |
1 |
Gender |
male |
1 |
City |
Del |
1 |
Cabin |
a |
2 |
Survived |
1 |
2 |
Pclass |
B |
2 |
Name |
Mrs. John Bradley |
2 |
Age |
38 |
2 |
Gender |
female |
2 |
City |
Hyd |
2 |
Cabin |
a |
FitTable: onehot_sparse_fit
CREATE TABLE onehot_sparse_fit AS (
SELECT * FROM TD_OneHotEncodingFit (
ON onehot_sparse_input AS InputTable PARTITION BY attribute_column
USING
IsInputDense ('false')
TargetAttributes ('Gender','Cabin','City')
AttributeColumn ('attribute_column')
ValueColumn ('value_column')
) AS dt ) WITH DATA;
TD_OneHotEncodingTransform FitTable Output Table: onehot_sparse_fit
attribute_column |
value_column |
TD_VALUE_TYPE_OHEFIT |
City |
Pune |
0 |
Gender |
male |
0 |
Cabin |
a |
0 |
City |
Del |
0 |
Gender |
female |
0 |
Cabin |
b |
0 |
City |
Hyd |
0 |
TD_OneHotEncodingTransform SQL Call: Sparse Input
SELECT * FROM TD_OneHotEncodingTransform (
ON onehot_sparse_input AS InputTable PARTITION BY attribute_column
ON onehot_sparse_fit AS FitTable PARTITION BY attribute_column
USING
IsInputDense ('false')
) AS dt ORDER BY 1,2;
TD_OneHotEncodingTransform Output Table: Sparse Input
id |
attribute_column |
value_column |
1 |
Age |
22 |
1 |
Cabin_a |
1 |
1 |
Cabin_b |
0 |
1 |
Cabin_other |
0 |
1 |
City_Del |
1 |
1 |
City_Hyd |
0 |
1 |
City_other |
0 |
1 |
City_Pune |
0 |
1 |
Name |
Mr. Owen Harris |
1 |
Pclass |
A |
1 |
Gender_female |
0 |
1 |
Gender_male |
1 |
1 |
Gender_other |
0 |
1 |
Survived |
0 |
2 |
Age |
38 |
2 |
Cabin_a |
1 |
2 |
Cabin_b |
0 |
2 |
Cabin_other |
0 |
2 |
City_Del |
0 |
2 |
City_Hyd |
1 |
2 |
City_other |
0 |
2 |
City_Pune |
0 |
2 |
Name |
Mrs. John Bradley |
2 |
Pclass |
B |
2 |
Gender_female |
1 |
2 |
Gender_male |
0 |
2 |
Gender_other |
0 |
2 |
Survived |
1 |
3 |
Age |
26 |
3 |
Cabin_a |
0 |
3 |
Cabin_b |
1 |
3 |
Cabin_other |
0 |
3 |
City_Del |
0 |
3 |
City_Hyd |
0 |
3 |
City_other |
0 |
3 |
City_Pune |
1 |
3 |
Name |
Mrs. Laina |
3 |
Pclass |
C |
3 |
Gender_female |
1 |
3 |
Gender_male |
0 |
3 |
Gender_other |
0 |
3 |
Survived |
1 |