Description
The TextTagging function tags text documents according to user-defined rules that use text-processing and logical operators.
Usage
td_text_tagger_mle (
data = NULL,
rules.data = NULL,
language = "en",
rules = NULL,
tokenize = FALSE,
outputby.tag = FALSE,
tag.delimiter = ",",
accumulate = NULL,
data.sequence.column = NULL,
rules.data.sequence.column = NULL,
data.order.column = NULL,
rules.data.order.column = NULL
)
Arguments
data |
Required Argument. |
data.order.column |
Optional Argument. |
rules.data |
Optional Argument. |
rules.data.order.column |
Optional Argument. |
language |
Optional Argument.
If "tokenize" argument is set to TRUE, then the function uses the language
specified in this argument to create the word tokenizer. |
rules |
Optional Argument. |
tokenize |
Optional Argument. |
outputby.tag |
Optional Argument. |
tag.delimiter |
Optional Argument. |
accumulate |
Optional Argument. |
data.sequence.column |
Optional Argument. |
rules.data.sequence.column |
Optional Argument. |
Value
Function returns an object of class "td_text_tagger_mle" which is a
named list containing object of class "tbl_teradata".
Named list member can be referenced directly with the "$" operator
using the name: result.
Examples
# Get the current context/connection
con <- td_get_context()$connection
# Load example data.
loadExampleData("texttagger_example", "text_inputs", "rule_inputs")
# Create object(s) of class "tbl_teradata".
text_inputs <- tbl(con, "text_inputs")
rule_inputs <- tbl(con, "rule_inputs")
# Example 1 - Specifying rules as an argument.
# Defining the rules to be used in "rules" argument.
r1 <- 'contain(content, "floods",1,) or contain(content,"tsunamis",1,) AS Natural-Disaster'
r2 <- 'contain(content,"Roger",1,) and contain(content,"Nadal",1,) AS Tennis-Rivalry'
r3 <- 'contain(titles,"Tennis",1,) and contain(content,"Roger",1,) AS Tennis-Greats'
r4 <- 'contain(content,"India",1,) and contain(content,"Pakistan",1,) AS Cricket-Rivalry'
r5 <- 'contain(content,"Australia",1,) and contain(content,"England",1,) AS The-Ashes'
td_text_tagger_out1 <- td_text_tagger_mle(data = text_inputs,
outputby.tag = TRUE,
rules=c(r1, r2, r3, r4, r5),
accumulate = c("id")
)
# Example 2 - Specifying rules in a tbl_teradata.
td_text_tagger_out2 <- td_text_tagger_mle(data = text_inputs,
rules.data = rule_inputs,
accumulate = c("id")
)
# Example 3 - Specify dictionary file in rules argument.
# Defining the rules to be used in "rules" argument.
r1 <- 'dict(content, "keywords.txt",1,) and equal(titles, "Chennai Floods") AS Natural-Disaster'
r2 <- 'dict(content, "keywords.txt", 2,) and equal(catalog, "sports") AS Great-Sports-Rivalry '
td_text_tagger_out3 <- td_text_tagger_mle(data = text_inputs,
rules=c(r1, r2),
accumulate = c("id")
)
# Example 4 - Specify superdist in rules argument.
# Defining the rules to be used in "rules" argument.
r1 <- 'superdist(content,"Chennai","floods",sent,,) AS Chennai-Flood-Disaster'
r2 <- 'superdist(content,"Roger","titles",para, "Nadal",para) AS Roger-Champion'
r3 <- 'superdist(content,"Roger","Nadal",para,,) AS Tennis-Rivalry'
r4 <- 'contain(content,regex"[A|a]shes",2,) AS Aus-Eng-Cricket'
r5 <- 'superdist(content,"Australia","won",nw5,,) AS Aus-victory'
td_text_tagger_out4 <- td_text_tagger_mle(data = text_inputs,
rules=c(r1, r2, r3, r4, r5),
accumulate = c("id")
)