ModelTable: boston_db_rf_discretize_transformation.R
library(dplyr)
library(tdplyr)
library(randomForest)
library(pmml)
# Create connection
con <- td_create_context(host='server123@mydomain.com', uid='alice', pwd='alice', dType='native')
# Reference table from DBS
boston <- tbl(con, "boston_train")
# Convert boston to a local dataframe
boston_df <- as.data.frame(boston)
# Print the boston dataframe
boston_df
dataBox <- xform_wrap(boston_df)
#discretize column 'rm' using values from file boston_rm_intervals.csv
dataBox <- xform_discretize(dataBox,xform_info="[rm ->rm_new][double -> integer]"
,table="./transformations/boston_rm_intervals.csv",default_value = 6,
map_missing_to="4")
#remove column name "id" and "rm" from formula, keep new defined rm_new
fit <- randomForest(medv ~ ., data=dataBox$data[-c(1,7)], ntree=100, importance=TRUE, proximity=TRUE)
tpmml <- pmml(fit, transforms = dataBox)
save_pmml(tpmml,"../sql/boston_db_trans_dstize_R_model.pmml")
boston_rm_intervals.csv
3],3 (3:4],4
(4:5],5
(5:6],6
(6:7],7
(7,8
Query that Calls PMMLPredict Function
SELECT * FROM mldb.PMMLPredict (
ON boston_test
ON (SELECT * FROM pmml_models
WHERE model_id='boston_db_trans_dstize_R_model') DIMENSION
USING
Accumulate ('id')
) AS td;
Query Output
*** Query completed. 101 rows found. 3 columns returned.
*** Total elapsed time was 8 seconds.
id prediction json_report
----------- ------------------------- -------------------------------------
40 29.601149999999997 {"Predicted_medv":29.601149999999997}
265 38.08418333333333 {"Predicted_medv":38.08418333333333}
80 21.62221666666666 {"Predicted_medv":21.62221666666666}
305 34.96583333333334 {"Predicted_medv":34.96583333333334}
345 29.420766666666673 {"Predicted_medv":29.420766666666673}
120 20.92538333333334 {"Predicted_medv":20.92538333333334}
385 9.28720678769115 {"Predicted_medv":9.28720678769115}
160 25.555683333333334 {"Predicted_medv":25.555683333333334}
200 28.65589999999999 {"Predicted_medv":28.65589999999999}
425 14.45643333333333 {"Predicted_medv":14.45643333333333}
240 24.163116666666674 {"Predicted_medv":24.163116666666674}
465 20.628146464646466 {"Predicted_medv":20.628146464646466}
505 23.146850000000004 {"Predicted_medv":23.146850000000004}
15 19.669494871794882 {"Predicted_medv":19.669494871794882}
55 19.088999999999995 {"Predicted_medv":19.088999999999995}
280 30.91968333333333 {"Predicted_medv":30.91968333333333}
95 22.0883 {"Predicted_medv":22.0883}
320 20.567800000000013 {"Predicted_medv":20.567800000000013}
...