TextMorph Example: TextMorph with POSTagger and TextTagger - Teradata Vantage

Machine Learning Engine Analytic Function Reference

Product
Teradata Vantage
Release Number
8.10
1.1
Published
October 2019
Language
English (United States)
Last Update
2019-12-31
dita:mapPath
ima1540829771750.ditamap
dita:ditavalPath
jsj1481748799576.ditaval
dita:id
B700-4003
lifecycle
previous
Product Category
Teradata Vantageā„¢

This example uses the function POSTagger (ML Engine) to create the input table for TextMorph, whose output table is input to the function TextTagger (ML Engine).

POSTagger Input: pos_input

id txt
s1 Roger Federer born on 8 August 1981, is a greatest tennis player, who has been continuously ranked inside the top 10 since October 2002 and has won Wimbledon, USOpen, Australian and FrenchOpen titles mutiple times

POSTagger SQL Call

CREATE MULTISET TABLE postagger_output AS (
  SELECT * FROM POSTagger (
    ON pos_input
    USING
    Accumulate ('id')
    TextColumn ('txt')
  ) AS dt
) WITH DATA;

POSTagger Output and TextMorph Input: postagger_output

SELECT * FROM postagger_output;
 id word_sn word         pos_tag 
 -- ------- ------------ ------- 
 s1       3 born         VBN    
 s1       5 8            CD     
 s1       6 august       NN     
 s1       7 1981         CD     
 s1       9 is           VBZ    
 s1      10 a            DT     
 s1      11 greatest     JJS    
 s1      12 tennis       NN     
 s1      13 player       NN     
 s1      14 ,            O      
 s1      15 who          WP     
 s1      17 been         VBN    
 s1      18 continuously RB     
 s1      19 ranked       VBN    
 s1      20 inside       IN     
 s1      21 the          DT     
 s1      22 top          JJ     
 s1      23 10           CD     
 s1      24 since        IN     
 s1      25 october      JJ     
 s1      26 2002         CD     
 s1      27 and          CC     
 s1      28 has          VBZ    
 s1      29 won          VBN    
 s1      30 wimbledon    NN     
 s1      31 ,            O      
 s1      33 ,            O      
 s1      34 australian   JJ     
 s1      35 and          CC     
 s1      36 frenchopen   JJ     
 s1      37 titles       NNS    
 s1      38 mutiple      JJ     
 s1      39 times        NNS    
 s1      32 usopen       JJ     
 s1      16 has          VBZ    
 s1       8 ,            O      
 s1       4 on           IN     
 s1       2 federer      NN     
 s1       1 roger        NN

TextMorph SQL Call

CREATE MULTISET TABLE textmorph_output AS (
  SELECT * FROM TextMorph (
    ON postagger_output
    USING
    WordColumn ('word')
    POSTagColumn ('pos_tag')
    Accumulate ('id', 'word_sn', 'word', 'pos_tag')
  ) AS dt
) WITH DATA;

TextMorph Output and TextTagger Input: textmorph_output

SELECT * FROM textmorph_output ORDER BY id, word_sn;
 id word_sn word         pos_tag morph        pos  
 -- ------- ------------ ------- ------------ ---- 
 s1       1 roger        NN      roger        noun
 s1       2 federer      NN      federer      noun
 s1       3 born         VBN     bear         verb
 s1       4 on           IN      on           NULL
 s1       5 8            CD      8            NULL
 s1       6 august       NN      august       noun
 s1       7 1981         CD      1981         NULL
 s1       8 ,            O       ,            NULL
 s1       9 is           VBZ     be           verb
 s1      10 a            DT      a            NULL
 s1      11 greatest     JJS     great        adj 
 s1      12 tennis       NN      tennis       noun
 s1      13 player       NN      player       noun
 s1      14 ,            O       ,            NULL
 s1      15 who          WP      who          NULL
 s1      16 has          VBZ     have         verb
 s1      17 been         VBN     be           verb
 s1      18 continuously RB      continuously adv 
 s1      19 ranked       VBN     rank         verb
 s1      20 inside       IN      inside       NULL
 s1      21 the          DT      the          NULL
 s1      22 top          JJ      top          adj 
 s1      23 10           CD      10           NULL
 s1      24 since        IN      since        NULL
 s1      25 october      JJ      october      adj 
 s1      26 2002         CD      2002         NULL
 s1      27 and          CC      and          NULL
 s1      28 has          VBZ     have         verb
 s1      29 won          VBN     win          verb
 s1      30 wimbledon    NN      wimbledon    noun
 s1      31 ,            O       ,            NULL
 s1      32 usopen       JJ      usopen       adj 
 s1      33 ,            O       ,            NULL
 s1      34 australian   JJ      australian   adj 
 s1      35 and          CC      and          NULL
 s1      36 frenchopen   JJ      frenchopen   adj 
 s1      37 titles       NNS     title        noun
 s1      38 mutiple      JJ      mutiple      adj 
 s1      39 times        NNS     time         noun

TextTagger SQL Call

SELECT * FROM TextTagger (
  ON textmorph_output
  USING
  TaggingRules('equal(morph, "Australian") as grandslam',
        'equal(morph, "wimbledon") as grandslam',
        'equal(morph, "USOpen") as grandslam',
        'equal(morph, "FrenchOpen") as grandslam')
  Accumulate ('id', 'word_sn', 'morph')
) AS dt ORDER BY id, word_sn;

TextTagger Output

 id word_sn morph        tag       
 -- ------- ------------ --------- 
 s1       1 roger                 
 s1       2 federer               
 s1       3 bear                  
 s1       4 on                    
 s1       5 8                     
 s1       6 august                
 s1       7 1981                  
 s1       8 ,                     
 s1       9 be                    
 s1      10 a                     
 s1      11 great                 
 s1      12 tennis                
 s1      13 player                
 s1      14 ,                     
 s1      15 who                   
 s1      16 have                  
 s1      17 be                    
 s1      18 continuously          
 s1      19 rank                  
 s1      20 inside                
 s1      21 the                   
 s1      22 top                   
 s1      23 10                    
 s1      24 since                 
 s1      25 october               
 s1      26 2002                  
 s1      27 and                   
 s1      28 have                  
 s1      29 win                   
 s1      30 wimbledon    grandslam
 s1      31 ,                     
 s1      32 usopen       grandslam
 s1      33 ,                     
 s1      34 australian   grandslam
 s1      35 and                   
 s1      36 frenchopen   grandslam
 s1      37 title                 
 s1      38 mutiple               
 s1      39 time

Download a zip file of all examples and a SQL script file that creates their input tables from the attachment in the left sidebar.