Example: Cal Housing Data Set
Starting Data (cal_housing_ex_raw)
Only part of the dataset is shown in this example.
id |
MedInc |
HouseAge |
AveRooms |
AveBedrms |
Population |
AveOccup |
Latitude |
Longitude |
MedHouseVal |
14870 |
1.858 |
23 |
3.901 |
1.077 |
1025 |
2.47 |
32.64 |
-117.11 |
0.675 |
6044 |
2.114 |
27 |
3.855 |
1.072 |
1024 |
4.633 |
34.05 |
-117.74 |
1.109 |
3593 |
6.654 |
32 |
6.331 |
0.995 |
1285 |
3.104 |
34.24 |
-118.48 |
2.676 |
9454 |
1.228 |
25 |
5.504 |
1.154 |
991 |
2.629 |
39.77 |
-123.23 |
0.603 |
18760 |
3.282 |
16 |
5.998 |
1.076 |
1414 |
3.081 |
40.6 |
-122.25 |
1.283 |
11670 |
4.5 |
28 |
5.102 |
1.044 |
2112 |
2.63 |
33.84 |
-118.01 |
2.021 |
17768 |
2.756 |
29 |
4.53 |
1.04 |
3572 |
4.603 |
37.35 |
-121.85 |
1.601 |
244 |
2.391 |
44 |
4.866 |
1.164 |
2269 |
3.72 |
37.78 |
-122.22 |
1.117 |
5328 |
2.768 |
23 |
3.039 |
1.064 |
2031 |
1.637 |
34.04 |
-118.45 |
2.775 |
14365 |
2.164 |
43 |
4.533 |
0.995 |
392 |
1.867 |
32.72 |
-117.23 |
2.442 |
2313 |
2.486 |
15 |
5.468 |
1.045 |
649 |
2.449 |
36.94 |
-119.7 |
0.863 |
12342 |
2.588 |
28 |
6.268 |
1.372 |
3470 |
2.59 |
33.84 |
-116.53 |
1.59 |
6558 |
6.827 |
36 |
7.021 |
1.036 |
1897 |
2.71 |
34.2 |
-118.11 |
3.594 |
17157 |
9.78 |
20 |
6.678 |
0.918 |
324 |
2.219 |
37.43 |
-122.21 |
5 |
18099 |
5.753 |
27 |
6.437 |
1.027 |
1259 |
2.868 |
37.32 |
-122.01 |
4.314 |
Standardize Data Using TD_ScaleFit with Housing Data
SELECT * FROM TD_ScaleFit(
ON cal_housing_ex_raw AS InputTable
OUT VOLATILE TABLE OutputTable(scaleFitOut_cal_ex)
USING
TargetColumns('medinc', 'houseage', 'averooms', 'avebedrms', 'population', 'aveoccup', 'latitude',
'longitude')
ScaleMethod('STD')
) AS dt2;
Output from TD_ScaleFit
TD_STATYPE |
MedInc |
HouseAge |
AveRooms |
AveBedrms |
Population |
AveOccup |
Latitude |
Longitude |
min |
1.228 |
15 |
3.039 |
0.918 |
324 |
1.637 |
32.64 |
-123.23 |
max |
9.78 |
44 |
7.021 |
1.372 |
3572 |
4.633 |
40.6 |
-116.53 |
sum |
57.698 |
416 |
79.531 |
16.08 |
23714 |
43.208 |
536.76 |
-1795.16 |
count |
15 |
15 |
15 |
15 |
15 |
15 |
15 |
15 |
null |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
variance |
5.755 |
71.352 |
1.377 |
0.01 |
974223.638 |
0.746 |
6.407 |
5.488 |
std |
2.318 |
8.161 |
1.134 |
0.099 |
953.559 |
0.834 |
2.445 |
2.263 |
ustd |
2.318 |
8.161 |
1.134 |
0.099 |
953.559 |
0.834 |
2.445 |
2.263 |
multiplier |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
intercept |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
location |
3.847 |
27.733 |
5.302 |
1.072 |
1580.933 |
2.881 |
35.784 |
-119.677 |
scale |
2.318 |
8.161 |
1.134 |
0.099 |
953.559 |
0.834 |
2.445 |
2.263 |
globalscale_false |
null |
null |
null |
null |
null |
null |
null |
null |
ScaleMethodNumberMapping: [0:mean,1:sum,2:ustd,3:std,4:range,5:midrange,6:maxabs,7:rescale] |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
missvalue_KEEP |
null |
null |
null |
null |
null |
null |
null |
null |
Standardize Data Using TD_ScaleTransform with Housing Data
CREATE MULTISET TABLE cal_housing_ex_scaled AS (
SELECT * from TD_ScaleTransform(
ON cal_housing_ex_raw AS InputTable
ON scaleFitOut_cal_ex AS FitTable DIMENSION
USING
Accumulate('id', 'MedHouseVal')
) AS dt1
) with data;
Output Using TD_ScaleTransform
id |
MedHouseVal |
MedInc |
HouseAge |
AveRooms |
AveBedrms |
Population |
AveOccup |
Latitude |
Longitude |
14870 |
0.675 |
-0.858 |
-0.58 |
-1.236 |
0.051 |
-0.583 |
-0.492 |
-1.286 |
1.134 |
6044 |
1.109 |
-0.747 |
-0.09 |
-1.276 |
0.004 |
-.584 |
2.101 |
-0.709 |
0.856 |
3593 |
2.676 |
1.211 |
0.523 |
0.908 |
-0.777 |
-.31 |
0.268 |
-0.631 |
0.529 |
9454 |
0.603 |
-1.13 |
-0.335 |
0.178 |
0.827 |
-.619 |
-0.302 |
1.63 |
-1.57 |
18760 |
1.283 |
-0.006 |
-1.438 |
0.614 |
0.043 |
-0.175 |
0.24 |
1.969 |
-1.137 |
11670 |
2.021 |
0.282 |
0.033 |
-0.176 |
-0.287 |
0.557 |
-0.3 |
-0.795 |
0.737 |
17768 |
1.601 |
-0.47 |
0.155 |
-0.681 |
-.324 |
2.088 |
2.065 |
0.64 |
-0.96 |
244 |
1.117 |
-0.628 |
1.993 |
-0.385 |
0.929 |
0.722 |
1.006 |
0.816 |
-1.123 |
5328 |
2.775 |
-0.465 |
-0.58 |
-1.996 |
-0.077 |
0.472 |
-1.491 |
-0.713 |
0.542 |
14365 |
2.442 |
-0.726 |
1.871 |
-0.678 |
-.0776 |
-1.247 |
-1.215 |
-1.253 |
1.081 |
2313 |
0.863 |
-.0587 |
-1.56 |
0.146 |
-0.27 |
-0.977 |
-0.517 |
0.473 |
-.01 |
12342 |
1.59 |
-0.543 |
0.033 |
0.852 |
3.035 |
1.981 |
-0.349 |
-0.795 |
1.391 |
6558 |
3.594 |
1.286 |
1.013 |
1.517 |
-0.367 |
0.331 |
-0.204 |
-0.648 |
0.692 |
17157 |
5 |
2.56 |
-0.948 |
1.214 |
-1.558 |
-1.318 |
-0.793 |
0.673 |
1.119 |
18099 |
4.314 |
0.822 |
-0.09 |
1.001 |
-0.452 |
-0.338 |
-0.015 |
0.628 |
-1.044 |
Example: Using TD_SVM SQL Using Regression Model
CREATE VOLATILE TABLE svm_model_cal_housing AS (
SELECT * FROM TD_SVM (
ON cal_housing_ex_scaled
USING
InputColumns('medinc', 'houseage', 'averooms', 'avebedrms', 'population', 'aveoccup', 'latitude', 'longitude')
ResponseColumn('MedHouseVal')
ModelType('Regression')
BatchSize(10)
MaxIterNum(300)
) AS dt
) WITH DATA
ON COMMIT PRESERVE ROWS;
Output from TD_SVM
attribute |
predictor |
estimate |
value |
-17 |
OneClass SVM |
NaN |
FALSE |
-16 |
Kernel |
NaN |
LINEAR |
-15 |
Intercept Scaling |
1.000000 |
None |
-14 |
Epsilon |
0.100000 |
None |
-13 |
LocalSGD Iterations |
0.000000 |
None |
-12 |
Nesterov |
NaN |
FALSE |
-11 |
Momentum |
0.000000 |
None |
-10 |
Learning Rate (Final) |
0.204246 |
None |
-9 |
Learning Rate (Initial) |
0.050000 |
None |
-8 |
Number of Iterations |
227.000000 |
CONVERGED |
-7 |
Alpha |
0.150000 |
Elasticnet |
-6 |
Regularization |
0.020000 |
ENABLED |
-5 |
BIC |
5.572674 |
None |
-4 |
AIC |
-0.799777 |
None |
-3 |
Number of Observations |
15.000000 |
None |
-2 |
MSE |
0.285556 |
None |
-1 |
Loss Function |
NaN |
EPSILON_INSENSITIVE |
0 |
(Intercept) |
2.191718 |
None |
1 |
MedInc |
1.226729 |
None |
2 |
HouseAge |
.0232213 |
None |
3 |
AveRooms |
-.332659 |
None |
4 |
AveBedrms |
0.000000 |
None |
5 |
Population |
0.113633 |
None |
6 |
AveOccup |
-0.286434 |
None |
7 |
Latitude |
-0.261509 |
None |
8 |
Longitude |
-0.198519 |
None |
Example: TD_SVM Classification iris Data Set
id |
sepal_length |
sepal_width |
petal_length |
petal_width |
variety |
61 |
5.0 |
2.0 |
3.5 |
1.0 |
Versicolor |
51 |
7.0 |
3.2 |
4.7 |
1.4 |
Versicolor |
50 |
5.1 |
3.4 |
1.5 |
0.2 |
Setosa |
59 |
6.6 |
2.9 |
4.6 |
1.3 |
Versicolor |
38 |
4.9 |
3.6 |
1.4 |
0.1 |
Setosa |
... |
... |
... |
... |
... |
... |
90 |
5.5 |
2.5 |
4.0 |
1.3 |
Versicolor |
67 |
5.6 |
3.0 |
4.5 |
1.5 |
Versicolor |
44 |
5.0 |
3.5 |
1.6 |
0.6 |
Setosa |
42 |
4.5 |
2.3 |
1.3 |
0.3 |
Setosa |
82 |
5.5 |
2.4 |
3.7 |
1.0 |
Versicolor |
TD_SVM Call Using Classification Model
CREATE VOLATILE TABLE svm_model_iris_data AS (
SELECT * FROM TD_SVM (
ON iris_data
USING
InputColumns('[1:4]')
ResponseColumn('label')
ModelType('Classification')
BatchSize(10)
MaxIterNum(300)
) AS dt
) WITH DATA
ON COMMIT PRESERVE ROWS;
Output from TD_SVM
attribute |
predictor |
estimate |
value |
-17 |
OneClass SVM |
NaN |
FALSE |
-16 |
Kernel |
NaN |
LINEAR |
-15 |
Intercept Scaling |
1.000000 |
None |
-13 |
LocalSGD Iterations |
0.000000 |
None |
-12 |
Nesterov |
NaN |
FALSE |
-11 |
Momentum |
0.000000 |
None |
-10 |
Learning Rate (Final) |
0.677479 |
None |
-9 |
Learning Rate (Initial) |
0.050000 |
None |
-8 |
Number of Iterations |
56.000000 |
CONVERGED |
-7 |
Alpha |
0.150000 |
Elasticnet |
-6 |
Regularization |
0.020000 |
ENABLED |
-5 |
BIC |
20.794415 |
None |
-4 |
AIC |
10.000000 |
None |
-3 |
Number of Observations |
64.000000 |
None |
-2 |
Loglik |
-0.000000 |
None |
-1 |
Loss Function |
NaN |
HINGE |
0 |
(Intercept) |
0.43063 |
None |
1 |
sepal_length |
0.163885 |
None |
2 |
sepal_width |
0.907101 |
None |
3 |
petal_length |
-1.411058 |
None |
4 |
petal_width |
-0.527589 |
None |