StringSimilarity Input
| id |
src_text1 |
src_text2 |
tar_text |
| 1 |
astre |
astter |
aster |
| 2 |
hone |
fone |
phone |
| 3 |
acquiese |
acquire |
acquiesce |
| 4 |
AAAACCCCCGGGGA |
CCCGGGAACCAACC |
CCAGGGAAACCCAC |
| 5 |
alice |
allen |
allies |
| 6 |
angela |
angle |
angels |
| 7 |
senter |
center |
centre |
| 8 |
chef |
cheap |
chief |
| 9 |
circus |
circle |
circuit |
| 10 |
debt |
debut |
debris |
| 11 |
deal |
dell |
lead |
| 12 |
bare |
bear |
bear |
StringSimilarity SQL Call with Specified Column Ranges
SELECT * FROM StringSimilarity (
ON strsimilarity_input PARTITION BY ANY
USING
ComparisonColumnPairs ('jaro (src_text1, tar_text) AS jaro1_sim',
'LD (src_text1, tar_text) AS ld1_sim',
'n_gram (src_text1, tar_text, 2) AS ngram1_sim',
'jaro_winkler (src_text1, tar_text, 0.1) AS jw1_sim'
)
CaseSensitive ('true')
Accumulate ('[0:1]', 'tar_text')
) AS dt ORDER BY id;
StringSimilarity Output with Specified Column Ranges
Columns 1-3| id |
src_text1 |
tar_text |
| 1 |
astre |
aster |
| 2 |
hone |
phone |
| 3 |
acquiese |
acquiesce |
| 4 |
AAAACCCCCGGGGA |
CCAGGGAAACCCAC |
| 5 |
alice |
allies |
| 6 |
angela |
angels |
| 7 |
senter |
center |
| 8 |
chef |
chief |
| 9 |
circus |
circuit |
| 10 |
debt |
debris |
| 11 |
deal |
lead |
| 12 |
bare |
bear |
Columns 4-7| jaro1_sim |
ld1_sim |
ngram1_sim |
jw1_sim |
| 0.933 |
0.6 |
0.5 |
0.953 |
| 0.933 |
0.8 |
0.75 |
0.933 |
| 0.926 |
0.778 |
0.5 |
0.948 |
| 0.824 |
0.214 |
0.385 |
0.824 |
| 0.822 |
0.5 |
0.4 |
0.858 |
| 0.889 |
0.833 |
0.8 |
0.933 |
| 0.822 |
0.5 |
0.4 |
0.822 |
| 0.933 |
0.8 |
0.5 |
0.947 |
| 0.849 |
0.714 |
0.667 |
0.91 |
| 0.75 |
0.5 |
0.4 |
0.825 |
| 0.667 |
0.5 |
0.333 |
0.667 |
| 0.833 |
0.5 |
0.333 |
0.85 |