Example 2: Using TD_TextParser to Get Token Frequency and List of Positions - Analytics Database

Database Analytic Functions

Deployment
VantageCloud
VantageCore
Edition
Enterprise
IntelliFlex
VMware
Product
Analytics Database
Release Number
17.20
Published
June 2022
ft:locale
en-US
ft:lastEdition
2025-07-09
dita:mapPath
gjn1627595495337.ditamap
dita:ditavalPath
qkf1628213546010.ditaval
dita:id
jmh1512506877710
Product Category
Teradata Vantageā„¢

InputTable: test_table data

CREATE TABLE test_table (
    id INTEGER, paragraph VARCHAR(100)
);
INSERT INTO test_table (id, paragraph) VALUES(1, 'Programmers program with program, as.as   programming languages a program');
INSERT INTO test_table (id, paragraph) VALUES(2, 'The quick brown fox jumps over the lazy dog');

SELECT Statement

SELECT * from test_table;

Result:

paragraph
-----------------------------------------------------------------------
Programmers program with program, as.as programming languages a program
The quick brown fox jumps over the lazy dog

StopWords table: Custom set of words to be removed when parsing

CREATE TABLE stopwords (word varchar(10));
INSERT INTO stopwords('a');
INSERT INTO stopwords('an');
INSERT INTO stopwords('and');
INSERT INTO stopwords('the');

SELECT Statement

SELECT * from stopwords

Result:

word
-----
the
and
an
a

Query 1 (Tokenizing with default delimiter)

SELECT * FROM TD_TextParser (
ON test_table AS InputTable
USING
TextColumn ('paragraph')
RemoveStopWords ('true')
) as dt ORDER BY 1,4

Result:

id paragraph token locations
1 Programmers program with program, as.as programming languages a program programmers 1
1 Programmers program with program, as.as programming languages a program program 2
1 Programmers program with program, as.as programming languages a program programming 5
1 Programmers program with program, as.as programming languages a program language 6
1 Programmers program with program, as.as programming languages a program program 8
2 The quick brown fox jumps over the lazy dog quick 1
2 The quick brown fox jumps over the lazy dog brown 2
2 The quick brown fox jumps over the lazy dog fox 3
2 The quick brown fox jumps over the lazy dog jumps 4
2 The quick brown fox jumps over the lazy dog over 5
2 The quick brown fox jumps over the lazy dog lazy 7
2 The quick brown fox jumps over the lazy dog dog 8

Query 2 (Using StopWordsTable, ListPositions, and TokenFrequency)

SELECT * FROM TD_TextParser (
ON test_table AS InputTable
ON stopwords as StopWordsTable DIMENSION
USING
TextColumn ('paragraph')
RemoveStopWords ('true')
DocIDColumn('id')
ListPositions('t')
TokenFrequency('t')
) as dt ORDER BY 1,2

Result:

id paragraph token freqeuncy locations
1 Programmers program with program, as.as programming languages a program programmers 1 1
1 Programmers program with program, as.as programming languages a program program 2 2,8
1 Programmers program with program, as.as programming languages a program with 1 3
1 Programmers program with program, as.as programming languages a program the 1 4
1 Programmers program with program, as.as programming languages a program programming 1 5
1 Programmers program with program, as.as programming languages a program language 1 6
2 The quick brown fox jumps over the lazy dog the 2 0,6
2 The quick brown fox jumps over the lazy dog quick 1 1
2 The quick brown fox jumps over the lazy dog brown 1 2
2 The quick brown fox jumps over the lazy dog fox 1 3
2 The quick brown fox jumps over the lazy dog jumps 1 4
2 The quick brown fox jumps over the lazy dog over 1 5
2 The quick brown fox jumps over the lazy dog lazy 1 7
2 The quick brown fox jumps over the lazy dog dog 1 8

Query 3 (Using OutputByWords set to false and Delimiter is a blank space)

SELECT * FROM TD_TextParser (
ON test_table AS InputTable
ON stopwords as StopWordsTable DIMENSION
USING
TextColumn ('paragraph')
RemoveStopWords ('true')
Delimiter(' ')
OutputByWord('false')
) as dt ORDER BY 1,2

Result:

id paragraph tokens
1 Programmers program with program, as.as programming languages a program programmers program program programming languages program
2 The quick brown fox jumps over the lazy dog quick brown fox jumps over lazy dog

Query 4 (Using DelimiterRegex)

SELECT * FROM TD_TextParser (
ON test_table AS InputTable
USING
TextColumn ('paragraph')
RemoveStopWords ('true')
DocIDColumn('id')
DelimiterRegex('[ \t\f\r\n]+')
ListPositions('true')
) as dt ORDER BY 1,4

Result:

id paragraph tokens locations
1 Programmers program with program, as.as programming languages a program programmers 1
1 Programmers program with program, as.as programming languages a program program 2,8
1 Programmers program with program, as.as programming languages a program programming 5
1 Programmers program with program, as.as programming languages a program language 6
2 The quick brown fox jumps over the lazy dog quick 1
2 The quick brown fox jumps over the lazy dog brown 2
2 The quick brown fox jumps over the lazy dog fox 3
2 The quick brown fox jumps over the lazy dog jumps 4
2 The quick brown fox jumps over the lazy dog over 5
2 The quick brown fox jumps over the lazy dog lazy 7
2 The quick brown fox jumps over the lazy dog dog 8