Used known iris dataset, where species values are changed to 1, 2, and 3.
Use the following code to create the table and insert the rows used in this example:
CREATE MULTISET TABLE iris_enc_sample ,FALLBACK , NO BEFORE JOURNAL, NO AFTER JOURNAL, CHECKSUM = DEFAULT, DEFAULT MERGEBLOCKRATIO, MAP = TD_MAP1 ( id INTEGER, sepal_length FLOAT, sepal_width FLOAT, petal_length FLOAT, petal_width FLOAT, species INTEGER) PRIMARY INDEX ( id ); INSERT INTO iris_enc_sample (id,sepal_length,sepal_width,petal_length,petal_width,species) VALUES (78,6.0,2.9,4.5,1.5,2); INSERT INTO iris_enc_sample (id,sepal_length,sepal_width,petal_length,petal_width,species) VALUES (141,6.9,3.1,5.1,2.3,3); INSERT INTO iris_enc_sample (id,sepal_length,sepal_width,petal_length,petal_width,species) VALUES (97,6.2,2.9,4.3,1.3,2); INSERT INTO iris_enc_sample (id,sepal_length,sepal_width,petal_length,petal_width,species) VALUES (13,4.3,3.0,1.1,0.1,1); INSERT INTO iris_enc_sample (id,sepal_length,sepal_width,petal_length,petal_width,species) VALUES (112,6.8,3.0,5.5,2.1,3); INSERT INTO iris_enc_sample (id,sepal_length,sepal_width,petal_length,petal_width,species) VALUES (89,5.5,2.5,4.0,1.3,2); INSERT INTO iris_enc_sample (id,sepal_length,sepal_width,petal_length,petal_width,species) VALUES (7,5.0,3.4,1.5,0.2,1); INSERT INTO iris_enc_sample (id,sepal_length,sepal_width,petal_length,petal_width,species) VALUES (85,6.0,3.4,4.5,1.6,2); INSERT INTO iris_enc_sample (id,sepal_length,sepal_width,petal_length,petal_width,species) VALUES (5,5.4,3.9,1.7,0.4,1); INSERT INTO iris_enc_sample (id,sepal_length,sepal_width,petal_length,petal_width,species) VALUES (83,6.0,2.7,5.1,1.6,2); INSERT INTO iris_enc_sample (id,sepal_length,sepal_width,petal_length,petal_width,species) VALUES (39,5.1,3.4,1.5,0.2,1); INSERT INTO iris_enc_sample (id,sepal_length,sepal_width,petal_length,petal_width,species) VALUES (35,5.0,3.2,1.2,0.2,1); INSERT INTO iris_enc_sample (id,sepal_length,sepal_width,petal_length,petal_width,species) VALUES (94,5.6,2.7,4.2,1.3,2); INSERT INTO iris_enc_sample (id,sepal_length,sepal_width,petal_length,petal_width,species) VALUES (27,5.2,3.5,1.5,0.2,1); INSERT INTO iris_enc_sample (id,sepal_length,sepal_width,petal_length,petal_width,species) VALUES (111,6.4,2.7,5.3,1.9,3);
TD_SMOTE SQL Call Using SamplingStrategy with smote
SELECT * FROM TD_SMOTE ( ON iris_enc_sample AS InputTable PARTITION BY ANY USING NumberOfNeighbors(5) IDColumn('id') MinorityClass('3') ResponseColumn('species') InputColumns('sepal_length', 'sepal_width', 'petal_length', 'petal_width') OversamplingFactor(2) SamplingStrategy('smote') ) AS dt;
id | sepal_length | sepal_width | petal_length | petal_width | species |
---|---|---|---|---|---|
111 | 6.4169546624 | 2.9899449291 | 5.3430063214 | 2.0269648927 | 3 |
111 | 6.4995803363 | 2.7037404941 | 5.329428712 | 1.9927850369 | 3 |
112 | 6.4149847879 | 2.8598442445 | 5.4052837772 | 1.9678773908 | 3 |
112 | 6.5928787566 | 2.9323622059 | 5.4090139685 | 1.9253975269 | 3 |
141 | 6.8490544092 | 2.9613735225 | 5.1891754861 | 1.942250495 | 3 |
141 | 6.4820585 | 2.8475600946 | 5.2059669699 | 1.9552546123 | 3 |
TD_SMOTE SQL Call Using SamplingStrategy with adasyn
SELECT * FROM TD_SMOTE ( ON iris_enc_sample AS InputTable PARTITION BY ANY USING NumberOfNeighbors(5) IDColumn('id') MinorityClass('3') ResponseColumn('species') InputColumns('sepal_length', 'sepal_width', 'petal_length', 'petal_width') OversamplingFactor(2) SamplingStrategy('adasyn') ) AS dt;
id | sepal_length | sepal_width | petal_length | petal_width | species |
---|---|---|---|---|---|
111 | 6.3830453376 | 2.7 | 5.2569936786 | 1.709552661 | 3 |
111 | 6.3004196637 | 2.7024936628 | 5.182285152 | 1.7144299261 | 3 |
112 | 6.896253803 | 3.0467185852 | 5.3105675544 | 2.2321226092 | 3 |
112 | 6.3857575132 | 2.9323622059 | 5.3180279369 | 1.6634938172 | 3 |
141 | 6.8898108818 | 3.0653433806 | 5.2783509723 | 2.1211252475 | 3 |
141 | 6.1477053001 | 3.2893299291 | 4.7820990903 | 1.6966955715 | 3 |
TD_SMOTE SQL Call Using SamplingStrategy with borderline
SELECT * FROM TD_SMOTE ( ON iris_enc_sample AS InputTable PARTITION BY ANY USING NumberOfNeighbors(10) IDColumn('id') MinorityClass('3') ResponseColumn('species') InputColumns('sepal_length', 'sepal_width', 'petal_length', 'petal_width') OversamplingFactor(2) SamplingStrategy('borderline') ) AS dt;
id | sepal_length | sepal_width | petal_length | petal_width | species |
---|---|---|---|---|---|
111 | 6.3830453376 | 2.7 | 5.2569936786 | 1.709552661 | 3 |
111 | 6.1510491593 | 2.7149619766 | 4.770283184 | 1.204112223 | 3 |
112 | 6.4149847879 | 2.8598442445 | 5.4052837772 | 1.9678773908 | 3 |
112 | 6.4893181349 | 2.9774540686 | 4.9540838108 | 1.4015901076 | 3 |
141 | 6.8898108818 | 3.0653433806 | 5.2783509723 | 2.1211252475 | 3 |
141 | 5.6461755001 | 3.6048798108 | 3.2985615116 | 0.6624594084 | 3 |