InputTable
create table kmeans_input_table (id int, c1 int, c2 int);
insert into kmeans_input_table values(1,1,1);
insert into kmeans_input_table values(2,2,2);
insert into kmeans_input_table values(3,8,8);
insert into kmeans_input_table values(4,9,9);
id C1 C2
-- -- --
1 1 1
2 2 2
3 8 8
4 9 9
InitialCentroidsTable
create table kmeans_initial_centroids_table as (select * from kmeans_input_table where id in (2,4)) with data;
id C1 C2
-- -- --
2 2 2
4 9 9
Example: TD_KMeans SQL Call: InitialCentroidsTable is Not Provided
SELECT * FROM TD_KMeans (
ON kmeans_input_table AS InputTable
USING
IdColumn('id')
TargetColumns('c1','c2')
NumClusters(2)
Seed(0)
StopThreshold(0.0395)
MaxIterNum(3)
)AS dt;
Output: InitialCentroidsTable is Not Provided
td_clusterid_kmeans C1 C2 td_size_kmeans td_withinss_kmeans id td_modelinfo_kmeans
------------------- --- --- --------------- ------------------ --- -------------------
0 1.5 1.5 2 1 NULL NULL
1 8.5 8.5 2 1 NULL NULL
NULL NULL NULL NULL NULL NULL Converged : True
NULL NULL NULL NULL NULL NULL Number of Iterations : 2
NULL NULL NULL NULL NULL NULL Number of Clusters : 2
NULL NULL NULL NULL NULL NULL Total_WithinSS 2.00000000000000E+00
NULL NULL NULL NULL NULL NULL Between_SS : 9.80000000000000E+01
NULL NULL NULL NULL NULL NULL Method for InitialCentroids : Random
Example: TD_KMeans SQL Call: InitialCentroidsTable is Provided
SELECT * FROM TD_KMeans (
ON kmeans_input_table AS InputTable
ON kmeans_initial_centroids_table AS InitialCentroidsTable DIMENSION
USING
IdColumn('id')
TargetColumns('c1','c2')
StopThreshold(0.0395)
MaxIterNum(3)
) AS dt;
Output: InitialCentroidsTable is Provided
td_clusterid_kmeans C1 C2 td_size_kmeans td_withinss_kmeans id td_modelinfo_kmeans
------------------- --- --- --------------- ------------------ ---- --------------------
0 1.5 1.5 2 1 NULL NULL
1 8.5 8.5 2 1 NULL NULL
NULL NULL NULL NULL NULL NULL Converged : True
NULL NULL NULL NULL NULL NULL Number of Iterations : 2
NULL NULL NULL NULL NULL NULL Number of Clusters : 2
NULL NULL NULL NULL NULL NULL Total_WithinSS : 2.00000000000000E+00
NULL NULL NULL NULL NULL NULL Between_SS : 9.80000000000000E+01
NULL NULL NULL NULL NULL NULL Method for InitialCentroids : Externally supplied InitialCentroidsTable
Example: TD_KMeans SQL Call: TD_KMeans Call with OutputClusterAssignment Set to true
SELECT * FROM TD_KMeans (
ON kmeans_input_table AS InputTable
ON kmeans_initial_centroids_table AS InitialCentroidsTable DIMENSION
USING
IdColumn('id')
TargetColumns('c1','c2')
StopThreshold(0.0395)
MaxIterNum(3)
OutputClusterAssignment('true')
)AS dt;
Output: TD_KMeans Call with OutputClusterAssignment Set to true
id td_clusterid_kmeans
----------- --------------------
1 0
2 0
3 1
4 1
Example: TD_KMeans SQL Call: with OutputClusterAssignment Set to true and ModelTable [Out Table Clause] is Provided
SELECT * FROM TD_KMeans (
ON kmeans_input_table AS InputTable
ON kmeans_initial_centroids_table AS InitialCentroidsTable DIMENSION
OUT TABLE ModelTable(kmeans_model)
USING
IdColumn('id')
TargetColumns('c1','c2')
StopThreshold(0.0395)
MaxIterNum(3)
OutputClusterAssignment('true')
)AS dt;
Output: TD_KMeans Call with OutputClusterAssignment Set to true and ModelTable [Out Table Clause] is Provided
id td_clusterid_kmeans
----------- --------------------
1 0
2 0
3 1
4 1
KMeans ModelTable Output:
td_clusterid_kmeans c1 c2 td_size_kmeans td_withinss_kmeans id td_modelinfo_kmeans
------------------- ----------- ----------- -------------- ------------------ ---- -----------
Null Null Null Null Null Null Converged : True
Null Null Null Null Null Null Number of Clusters : 2
Null Null Null Null Null Null Total_WithinSS : 2.00000000000000E+00
Null Null Null Null Null Null Between_SS : 9.80000000000000E+01
Null Null Null Null Null Null Number of Iterations : 2
0 1.500000000 1.500000000 2 1.000000000 Null Null
1 8.500000000 8.500000000 2 1.000000000 Null Null
Null Null Null Null Null Null Method for InitialCentroids : Externally supplied InitialCentroidsTable
Input [InitialCentroidsMethod : 'kmeans++']
create table kmeans_input_table_2 (id INTEGER, c1 FLOAT, c2 FLOAT);
insert into kmeans_input_table_2 values(1,18,18);
insert into kmeans_input_table_2 values(2,19,19);
insert into kmeans_input_table_2 values(3,20,20);
insert into kmeans_input_table_2 values(4,55,55);
insert into kmeans_input_table_2 values(5,56,56);
insert into kmeans_input_table_2 values(6,57,57);
insert into kmeans_input_table_2 values(7,88,88);
insert into kmeans_input_table_2 values(8,89,89);
insert into kmeans_input_table_2 values(9,90,90);
kmeans_input_table_2
id c1 c2
-- -- --
1 18 18
2 19 19
3 20 20
4 55 55
5 56 56
6 57 57
7 88 88
8 89 89
9 90 90
Example: TD_KMeans SQL Call [InitialCentroidsMethod : 'kmeans++']
SELECT * FROM TD_KMeans (
ON kmeans_input_table_2 AS InputTable
USING
IdColumn('id')
TargetColumns('c1','c2')
InitialCentroidsMethod('kmeans++')
NumClusters(3)
Seed(10)
StopThreshold(0.0395)
MaxIterNum(3)
) AS dt;
Output [InitialCentroidsMethod : 'kmeans++']
td_clusterid_kmeans c1 c2 td_size_kmeans td_withinss_kmeans id td_modelinfo_kmeans
------------------- ---- --- -------------- ------------------- --- -------------------
0 89 89 3 4 NULL NULL
1 19 19 3 4 NULL NULL
2 56 56 3 4 NULL NULL
NULL NULL NULL NULL NULL NULL Converged : True
NULL NULL NULL NULL NULL NULL Number of Iterations : 2
NULL NULL NULL NULL NULL NULL Number of Clusters : 3
NULL NULL NULL NULL NULL NULL Total_WithinSS :1.20000000000000E+01
NULL NULL NULL NULL NULL NULL Between_SS :1.47160000000000E+04
NULL NULL NULL NULL NULL NULL Method for InitialCentroids: KMeans++