Random Forest Model | BYOM | Teradata Vantage - 2.0 - Random Forest Model - Teradata Vantage

Teradata Vantage™ - Bring Your Own Model User Guide

Product
Teradata Vantage
Release Number
2.0
Release Date
October 2021
Content Type
User Guide
Publication ID
B700-1111-051K
Language
English (United States)
passwd = "alice"
uid = "alice"
host = "TDCLOUD17-4-2"
import teradatasql
connection = teradatasql.connect(host=host, user=uid, password=passwd)
​
cur = connection.cursor()
​
# Fetch training dataset from TD
result = cur.execute("select * from freediemac;")
train_df = result.fetchall()
​
import h2o
# Server must be running. Use command java -jar h2o.jar to start the server from ~/Projects/H2O/h2o...
h2o.init()
​
columns=['CREDIT_SCORE', 'FIRST_PAYMENT_DATE', 'FIRST_TIME_HOMEBUYER_FLAG', 'MATURITY_DATE', 'METROPOLITAN_STATISTICAL_AREA', 'MORTGAGE_INSURANCE_PERCENTAGE', 'NUMBER_OF_UNITS', 'OCCUPANCY_STATUS', 'ORIGINAL_COMBINED_LOAN_TO_VALUE', 'ORIGINAL_DEBT_TO_INCOME_RATIO', 'ORIGINAL_UPB', 'ORIGINAL_LOAN_TO_VALUE', 'ORIGINAL_INTEREST_RATE', 'CHANNEL', 'PREPAYMENT_PENALTY_MORTGAGE_FLAG', 'PRODUCT_TYPE', 'PROPERTY_STATE', 'PROPERTY_TYPE', 'POSTAL_CODE', 'LOAN_SEQUENCE_NUMBER', 'LOAN_PURPOSE', 'ORIGINAL_LOAN_TERM', 'NUMBER_OF_BORROWERS', 'SELLER_NAME', 'SERVICER_NAME', 'PREPAID', 'DELINQUENT']
​
loans = h2o.H2OFrame(train_df, column_names=columns)
loans['DELINQUENT'] = loans['DELINQUENT'].asfactor()
response = 'DELINQUENT'
predictors = ['CREDIT_SCORE', 'FIRST_PAYMENT_DATE', 'FIRST_TIME_HOMEBUYER_FLAG', 'MATURITY_DATE', 'METROPOLITAN_STATISTICAL_AREA', 'MORTGAGE_INSURANCE_PERCENTAGE', 'NUMBER_OF_UNITS', 'OCCUPANCY_STATUS', 'ORIGINAL_COMBINED_LOAN_TO_VALUE', 'ORIGINAL_DEBT_TO_INCOME_RATIO', 'ORIGINAL_UPB', 'ORIGINAL_LOAN_TO_VALUE', 'ORIGINAL_INTEREST_RATE', 'CHANNEL', 'PREPAYMENT_PENALTY_MORTGAGE_FLAG', 'PRODUCT_TYPE', 'PROPERTY_STATE', 'PROPERTY_TYPE', 'POSTAL_CODE', 'LOAN_SEQUENCE_NUMBER', 'LOAN_PURPOSE', 'ORIGINAL_LOAN_TERM', 'NUMBER_OF_BORROWERS', 'SELLER_NAME', 'SERVICER_NAME', 'PREPAID']
​
train, valid = loans.split_frame(ratios=[.9], seed=1234)
from h2o.estimators import H2ORandomForestEstimator
# Build and train the model:
loans_drf = H2ORandomForestEstimator(ntrees=100000,
                                    max_depth=0)
​
loans_drf.train(x=predictors,
               y=response,
               training_frame=train,
               validation_frame=valid)
​
path="../sql/loans_100000_0_fm"
model_path = loans_drf.save_mojo(path=path, force=True)
​
print(model_path)