Input
The input is the same as in NaiveBayesTextClassifierPredict_MLE Example: TopK.
SQL Call
SELECT * FROM NaiveBayesTextClassifierPredict_MLE ( ON ( SELECT doc_id, doc_name, lower(token) AS token FROM TextTokenizer ( ON complaints_with_docnames PARTITION BY ANY USING TextColumn ('text_data') OutputByWord ('true') Accumulate ('doc_id','doc_name') ) AS dt1 ) AS PredictorValues PARTITION BY doc_id ON complaints_tokens_model AS "model" DIMENSION USING InputTokenColumn ('token') ModelType ('Bernoulli') DocIdColumns ('doc_id') Accumulate ('doc_name') OutputProb ('true') Responses ('crash','no_crash') ) AS dt ORDER BY doc_id;
Output
doc_id prediction loglik_crash loglik_no_crash prob_crash prob_no_crash doc_name ------ ---------- ------------------- ------------------- ---------------------- ------------------- -------- 1 no_crash -131.94910249765127 -98.37344066139619 2.619843435790522E-15 0.9999999999999974 A 2 no_crash -106.37022582143115 -93.41932479796596 2.3740732728258604E-6 0.9999976259267271 B 3 no_crash -104.57804977231783 -74.45875519616708 8.305323757638484E-14 0.9999999999999168 C 4 no_crash -109.84041607458022 -80.04840368356992 1.1521085876129415E-13 0.9999999999998849 D 5 crash -115.41886392381602 -117.8143759790836 0.9164844344558446 0.08351556554415548 E 6 no_crash -131.7074631737852 -116.11212602416647 1.686673646176863E-7 0.9999998313326354 F 7 crash -111.70603934552983 -115.48060778944125 0.9775677609771067 0.02243223902289321 G 8 no_crash -111.0525856461103 -91.9733577040394 5.176027601098163E-9 0.9999999948239724 H 9 no_crash -117.02384748590075 -108.85303741743412 2.8270889838546065E-4 0.9997172911016146 I 10 no_crash -105.11394903363593 -82.65103082787691 1.7558142755826997E-10 0.9999999998244187 J