passwd = "alice" uid = "alice" host = "TDCLOUD17-4-2" import teradatasql connection = teradatasql.connect(host=host, user=uid, password=passwd) cur = connection.cursor() # Fetch training dataset from TD result = cur.execute("select * from freediemac;") train_df = result.fetchall() import h2o # Server must be running. Use command java -jar h2o.jar to start the server from ~/Projects/H2O/h2o... h2o.init() columns=['CREDIT_SCORE', 'FIRST_PAYMENT_DATE', 'FIRST_TIME_HOMEBUYER_FLAG', 'MATURITY_DATE', 'METROPOLITAN_STATISTICAL_AREA', 'MORTGAGE_INSURANCE_PERCENTAGE', 'NUMBER_OF_UNITS', 'OCCUPANCY_STATUS', 'ORIGINAL_COMBINED_LOAN_TO_VALUE', 'ORIGINAL_DEBT_TO_INCOME_RATIO', 'ORIGINAL_UPB', 'ORIGINAL_LOAN_TO_VALUE', 'ORIGINAL_INTEREST_RATE', 'CHANNEL', 'PREPAYMENT_PENALTY_MORTGAGE_FLAG', 'PRODUCT_TYPE', 'PROPERTY_STATE', 'PROPERTY_TYPE', 'POSTAL_CODE', 'LOAN_SEQUENCE_NUMBER', 'LOAN_PURPOSE', 'ORIGINAL_LOAN_TERM', 'NUMBER_OF_BORROWERS', 'SELLER_NAME', 'SERVICER_NAME', 'PREPAID', 'DELINQUENT'] loans = h2o.H2OFrame(train_df, column_names=columns) loans['DELINQUENT'] = loans['DELINQUENT'].asfactor() response = 'DELINQUENT' predictors = ['CREDIT_SCORE', 'FIRST_PAYMENT_DATE', 'FIRST_TIME_HOMEBUYER_FLAG', 'MATURITY_DATE', 'METROPOLITAN_STATISTICAL_AREA', 'MORTGAGE_INSURANCE_PERCENTAGE', 'NUMBER_OF_UNITS', 'OCCUPANCY_STATUS', 'ORIGINAL_COMBINED_LOAN_TO_VALUE', 'ORIGINAL_DEBT_TO_INCOME_RATIO', 'ORIGINAL_UPB', 'ORIGINAL_LOAN_TO_VALUE', 'ORIGINAL_INTEREST_RATE', 'CHANNEL', 'PREPAYMENT_PENALTY_MORTGAGE_FLAG', 'PRODUCT_TYPE', 'PROPERTY_STATE', 'PROPERTY_TYPE', 'POSTAL_CODE', 'LOAN_SEQUENCE_NUMBER', 'LOAN_PURPOSE', 'ORIGINAL_LOAN_TERM', 'NUMBER_OF_BORROWERS', 'SELLER_NAME', 'SERVICER_NAME', 'PREPAID'] train, valid = loans.split_frame(ratios=[.9], seed=1234) from h2o.estimators import H2ORandomForestEstimator # Build and train the model: loans_drf = H2ORandomForestEstimator(ntrees=100000, max_depth=0) loans_drf.train(x=predictors, y=response, training_frame=train, validation_frame=valid) path="../sql/loans_100000_0_fm" model_path = loans_drf.save_mojo(path=path, force=True) print(model_path)