Input - Aster Analytics

Teradata Aster Analytics Foundation User Guide

Product
Aster Analytics
Release Number
6.21
Published
November 2016
Language
English (United States)
Last Update
2018-04-14
dita:mapPath
kiu1466024880662.ditamap
dita:ditavalPath
AA-notempfilter_pdf_output.ditaval
dita:id
B700-1021
lifecycle
previous
Product Category
Software

Both examples use the same input. The input table is generated from the output of the StringSimilarity function, using the following SQL query and adding the match_tag column (which is used for the supervised FellegiSunter function).

DROP TABLE IF EXISTS fstrainer_input;

CREATE FACT TABLE fstrainer_input (PARTITION KEY (id)) AS
SELECT * FROM StringSimilarity (
  ON strsimilarity_input PARTITION BY ANY
  ComparisonColumnPairs (
                'jaro (src_text1 , tar_text ) AS jaro1_sim',
                'LD (src_text1 , tar_text, 2) AS ld1_sim',
                'n_gram (src_text1 , tar_text, 2) AS ngram1_sim',
                'jaro_winkler (src_text1 , tar_text, 2) AS jw1_sim'
  )
  CaseSensitive ('true')
  Accumulate ('id','src_text1','tar_text')
);

ALTER TABLE fstrainer_input
ADD column match_tag varchar;

update fstrainer_input set match_tag=  'M' where id = 1;
update fstrainer_input set match_tag=  'M' where id = 2;
update fstrainer_input set match_tag=  'M' where id = 3;
update fstrainer_input set match_tag=  'U' where id = 4;
update fstrainer_input set match_tag=  'U' where id = 5;
update fstrainer_input set match_tag=  'M' where id = 6;
update fstrainer_input set match_tag=  'U' where id = 7;
update fstrainer_input set match_tag=  'M' where id = 8;
update fstrainer_input set match_tag=  'M' where id = 9;
update fstrainer_input set match_tag=  'U' where id = 10;
update fstrainer_input set match_tag=  'U' where id = 11;
update fstrainer_input set match_tag=  'U' where id = 12;

SELECT * FROM fstrainer_input ORDER BY 1;