TD_KMeans Examples | kmeans | Teradata Vantage - Example: How to Use TD_KMeans - Analytics Database

Database Analytic Functions

Deployment
VantageCloud
VantageCore
Edition
Enterprise
IntelliFlex
VMware
Product
Analytics Database
Release Number
17.20
Published
June 2022
Language
English (United States)
Last Update
2024-04-06
dita:mapPath
gjn1627595495337.ditamap
dita:ditavalPath
ayr1485454803741.ditaval
dita:id
jmh1512506877710
Product Category
Teradata Vantageā„¢

InputTable

create table kmeans_input_table (id int, c1 int, c2 int);
insert into kmeans_input_table values(1,1,1);
insert into kmeans_input_table values(2,2,2);
insert into kmeans_input_table values(3,8,8);
insert into kmeans_input_table values(4,9,9);
 id C1 C2 
 -- -- -- 
  1  1  1
  2  2  2
  3  8  8
  4  9  9 

InitialCentroidsTable

create table kmeans_initial_centroids_table as (select * from kmeans_input_table where id in (2,4)) with data;
 id C1 C2 
 -- -- -- 
  2  2  2
  4  9  9

Example: TD_KMeans SQL Call: InitialCentroidsTable is Not Provided

SELECT * FROM TD_KMeans (
ON kmeans_input_table AS InputTable 
USING
IdColumn('id') 
TargetColumns('c1','c2') 
NumClusters(2)
Seed(0) 
StopThreshold(0.0395) 
MaxIterNum(3)
)AS dt;

Output: InitialCentroidsTable is Not Provided

td_clusterid_kmeans  C1  C2  td_size_kmeans  td_withinss_kmeans  id  td_modelinfo_kmeans
-------------------  --- --- --------------- ------------------  ---  -------------------
0     1.5   1.5   2     1     NULL  NULL 
1     8.5   8.5   2     1     NULL  NULL
NULL  NULL  NULL  NULL  NULL  NULL  Converged : True
NULL  NULL  NULL  NULL  NULL  NULL  Number of Iterations : 2
NULL  NULL  NULL  NULL  NULL  NULL  Number of Clusters : 2
NULL  NULL  NULL  NULL  NULL  NULL  Total_WithinSS 2.00000000000000E+00
NULL  NULL  NULL  NULL  NULL  NULL  Between_SS : 9.80000000000000E+01
NULL  NULL  NULL  NULL  NULL  NULL  Method for InitialCentroids : Random

Example: TD_KMeans SQL Call: InitialCentroidsTable is Provided

SELECT * FROM TD_KMeans (
  ON kmeans_input_table AS InputTable
  ON kmeans_initial_centroids_table AS InitialCentroidsTable DIMENSION
  USING
  IdColumn('id')
  TargetColumns('c1','c2')
  StopThreshold(0.0395)
  MaxIterNum(3)
) AS dt;

Output: InitialCentroidsTable is Provided

td_clusterid_kmeans  C1  C2  td_size_kmeans  td_withinss_kmeans  id  td_modelinfo_kmeans
-------------------  --- --- --------------- ------------------ ---- --------------------
0     1.5   1.5   2     1     NULL  NULL  
1     8.5   8.5   2     1     NULL  NULL        
NULL  NULL  NULL  NULL  NULL  NULL  Converged : True
NULL  NULL  NULL  NULL  NULL  NULL  Number of Iterations : 2
NULL  NULL  NULL  NULL  NULL  NULL  Number of Clusters : 2
NULL  NULL  NULL  NULL  NULL  NULL  Total_WithinSS : 2.00000000000000E+00
NULL  NULL  NULL  NULL  NULL  NULL  Between_SS : 9.80000000000000E+01
NULL  NULL  NULL  NULL  NULL  NULL  Method for InitialCentroids : Externally supplied InitialCentroidsTable

Example: TD_KMeans SQL Call: TD_KMeans Call with OutputClusterAssignment Set to true

SELECT * FROM TD_KMeans (
ON kmeans_input_table AS InputTable
ON kmeans_initial_centroids_table AS InitialCentroidsTable DIMENSION
USING
IdColumn('id')
TargetColumns('c1','c2')
StopThreshold(0.0395)
MaxIterNum(3)
OutputClusterAssignment('true')
)AS dt;

Output: TD_KMeans Call with OutputClusterAssignment Set to true

        id   td_clusterid_kmeans
-----------  --------------------
          1                     0
          2                     0
          3                     1
          4                     1

Example: TD_KMeans SQL Call: with OutputClusterAssignment Set to true and ModelTable [Out Table Clause] is Provided

SELECT * FROM TD_KMeans (
ON kmeans_input_table AS InputTable
ON kmeans_initial_centroids_table AS InitialCentroidsTable DIMENSION
OUT TABLE ModelTable(kmeans_model)
USING
IdColumn('id')
TargetColumns('c1','c2')
StopThreshold(0.0395)
MaxIterNum(3)
OutputClusterAssignment('true')
)AS dt;

Output: TD_KMeans Call with OutputClusterAssignment Set to true and ModelTable [Out Table Clause] is Provided

         id   td_clusterid_kmeans
-----------  --------------------
          1                     0
          2                     0
          3                     1
          4                     1

KMeans ModelTable Output:

td_clusterid_kmeans  c1  c2  td_size_kmeans  td_withinss_kmeans  id  td_modelinfo_kmeans                   
------------------- ----------- ----------- -------------- ------------------ ---- -----------
Null  Null         Null          Null   Null         Null  Converged : True                     
Null  Null         Null          Null   Null         Null  Number of Clusters : 2               
Null  Null         Null          Null   Null         Null  Total_WithinSS : 2.00000000000000E+00
Null  Null         Null          Null   Null         Null  Between_SS : 9.80000000000000E+01    
Null  Null         Null          Null   Null         Null  Number of Iterations : 2             
0     1.500000000  1.500000000   2      1.000000000  Null  Null   
1     8.500000000  8.500000000   2      1.000000000  Null  Null 
Null  Null         Null          Null   Null         Null  Method for InitialCentroids : Externally supplied InitialCentroidsTable

Input [InitialCentroidsMethod : 'kmeans++']

create table kmeans_input_table_2 (id INTEGER, c1 FLOAT, c2 FLOAT);
insert into kmeans_input_table_2 values(1,18,18);
insert into kmeans_input_table_2 values(2,19,19);
insert into kmeans_input_table_2 values(3,20,20);
insert into kmeans_input_table_2 values(4,55,55);
insert into kmeans_input_table_2 values(5,56,56);
insert into kmeans_input_table_2 values(6,57,57);
insert into kmeans_input_table_2 values(7,88,88);
insert into kmeans_input_table_2 values(8,89,89);
insert into kmeans_input_table_2 values(9,90,90);

kmeans_input_table_2

 id c1 c2 
 -- -- -- 
  1  18  18
  2  19  19
  3  20  20
  4  55  55 
  5  56  56
  6  57  57
  7  88  88
  8  89  89
  9  90  90

Example: TD_KMeans SQL Call [InitialCentroidsMethod : 'kmeans++']

SELECT * FROM TD_KMeans (
ON kmeans_input_table_2 AS InputTable
USING
IdColumn('id')
TargetColumns('c1','c2')
InitialCentroidsMethod('kmeans++')
NumClusters(3)
Seed(10)
StopThreshold(0.0395)
MaxIterNum(3)
) AS dt;

Output [InitialCentroidsMethod : 'kmeans++']

td_clusterid_kmeans  c1  c2  td_size_kmeans  td_withinss_kmeans  id  td_modelinfo_kmeans
------------------- ---- --- --------------  ------------------- --- -------------------
0     89    89    3     4     NULL  NULL                                  
1     19    19    3     4     NULL  NULL
2     56    56    3     4     NULL  NULL                                  
NULL  NULL  NULL  NULL  NULL  NULL  Converged : True
NULL  NULL  NULL  NULL  NULL  NULL  Number of Iterations : 2              
NULL  NULL  NULL  NULL  NULL  NULL  Number of Clusters : 3
NULL  NULL  NULL  NULL  NULL  NULL  Total_WithinSS :1.20000000000000E+01  
NULL  NULL  NULL  NULL  NULL  NULL  Between_SS :1.47160000000000E+04
NULL  NULL  NULL  NULL  NULL  NULL  Method for InitialCentroids: KMeans++