Both examples use the same input table, which is generated from the output of the StringSimilarity function from the following SQL query.
DROP TABLE IF EXISTS fspredict_input; CREATE FACT TABLE fspredict_input (PARTITION KEY (id)) AS SELECT * FROM StringSimilarity ( ON strsimilarity_input PARTITION BY ANY ComparisonColumnPairs ( 'jaro (src_text2 , tar_text ) AS jaro1_sim', 'LD (src_text2 , tar_text, 2) AS ld1_sim', 'n_gram (src_text2 , tar_text, 2) AS ngram1_sim', 'jaro_winkler (src_text2 , tar_text, 2) AS jw1_sim' ) CaseSensitive ('true') Accumulate ('id','src_text2','tar_text') ); SELECT * FROM fspredict_input ORDER BY 1;