Description
The LDAInference function uses the model tbl_teradata generated by the
function LDATrainer (td_lda_mle
) to infer the topic distribution
in a set of new documents.
You can use the distribution for tasks such as classification and clustering.
Usage
td_lda_inference_mle (
object = NULL,
data = NULL,
docid.column = NULL,
word.column = NULL,
count.column = NULL,
out.topicnum = "all",
out.topicwordnum = "none",
data.sequence.column = NULL,
object.sequence.column = NULL
)
Arguments
object |
Required Argument. |
data |
Required Argument. |
docid.column |
Required Argument. |
word.column |
Required Argument. |
count.column |
Optional Argument. |
out.topicnum |
Optional Argument. |
out.topicwordnum |
Optional Argument. |
data.sequence.column |
Optional Argument. |
object.sequence.column |
Optional Argument. |
Value
Function returns an object of class "td_lda_inference_mle" which is a
named list containing objects of class "tbl_teradata".
Named list members can be referenced directly with the "$" operator
using following names:
doc.distribution.data
-
output
Examples
# Get the current context/connection
con <- td_get_context()$connection
# Load example data.
loadExampleData("ldainference_example", "complaints_testtoken")
loadExampleData("lda_example","complaints_traintoken")
# Create object(s) of class "tbl_teradata".
complaints_testtoken <- tbl(con, "complaints_testtoken")
complaints_traintoken <- tbl(con, "complaints_traintoken")
# Example 1 - This example uses the model generated by the td_lda_mle()
# function to infer the topic distribution in a set of new documents.
td_lda_out <- td_lda_mle(data=complaints_traintoken,
docid.column='doc_id',
word.column='token',
topic.num=5,
alpha=0.1,
eta=0.1,
maxiter=50,
convergence.delta=0.0001,
seed=2,
out.topicnum='all',
out.topicwordnum='none'
)
td_lda_inference_mle_out <- td_lda_inference_mle(object = td_lda_out,
data = complaints_testtoken,
docid.column = "doc_id",
word.column = "token",
out.topicnum = '5',
out.topicwordnum = '5'
)
# Example 2 - Default case without any optional arguments.
td_lda_inference_mle_out <- td_lda_inference_mle(object = td_lda_out,
data = complaints_testtoken,
docid.column = "doc_id",
word.column = "token"
)
# Example 3 - Example with "count.column" argument and 'model.table' tbl_teradata of
# "td_lda_out" object.
td_lda_inference_mle_out <- td_lda_inference_mle(data=complaints_testtoken,
object=td_lda_out$model.table,
docid.column='doc_id',
word.column='token',
count.column='frequency',
out.topicwordnum='none'
)