Both examples use the same input. The input table is generated from the output of the StringSimilarity function, using the following SQL query and adding the match_tag column (which is used for the supervised FellegiSunter function).
DROP TABLE IF EXISTS fstrainer_input; CREATE FACT TABLE fstrainer_input (PARTITION KEY (id)) AS SELECT * FROM StringSimilarity ( ON strsimilarity_input PARTITION BY ANY ComparisonColumnPairs ( 'jaro (src_text1 , tar_text ) AS jaro1_sim', 'LD (src_text1 , tar_text, 2) AS ld1_sim', 'n_gram (src_text1 , tar_text, 2) AS ngram1_sim', 'jaro_winkler (src_text1 , tar_text, 2) AS jw1_sim' ) CaseSensitive ('true') Accumulate ('id','src_text1','tar_text') ); ALTER TABLE fstrainer_input ADD column match_tag varchar; update fstrainer_input set match_tag= 'M' where id = 1; update fstrainer_input set match_tag= 'M' where id = 2; update fstrainer_input set match_tag= 'M' where id = 3; update fstrainer_input set match_tag= 'U' where id = 4; update fstrainer_input set match_tag= 'U' where id = 5; update fstrainer_input set match_tag= 'M' where id = 6; update fstrainer_input set match_tag= 'U' where id = 7; update fstrainer_input set match_tag= 'M' where id = 8; update fstrainer_input set match_tag= 'M' where id = 9; update fstrainer_input set match_tag= 'U' where id = 10; update fstrainer_input set match_tag= 'U' where id = 11; update fstrainer_input set match_tag= 'U' where id = 12; SELECT * FROM fstrainer_input ORDER BY 1;