15.4 Alternative Model Training Process
Rather than using a tuned base learner we could stack multiple models generated from the same base learner and allow the super learner to perform to tuning process.
- Create a grid of paraterms to tune.
# Define GBM hyperparameter grid
<- list(
hyper_grid max_depth = c(1, 3, 5),
min_rows = c(1, 5, 10),
learn_rate = c(0.01, 0.05, 0.1),
learn_rate_annealing = c(0.99, 1),
sample_rate = c(0.5, 0.75, 1),
col_sample_rate = c(0.8, 0.9, 1)
)
- Define control parameters
<- list(
search_criteria # Perform a random search of all the combinations
strategy = "RandomDiscrete",
# And stop after reaching the maximum number of models
max_models = 25
)
- Measure the performance of each model
# Build random grid search
<- h2o.grid(
random_grid algorithm = "gbm",
grid_id = "gbm_grid",
x = X,
y = Y,
training_frame = train_h2o,
hyper_params = hyper_grid,
search_criteria = search_criteria,
ntrees = 5000,
stopping_metric = "RMSE",
stopping_rounds = 10,
stopping_tolerance = 0,
nfolds = 10,
fold_assignment = "Modulo",
keep_cross_validation_predictions = TRUE,
seed = 123
)
- Arrange the models by performance
# Sort results by RMSE
h2o.getGrid(
grid_id = "gbm_grid",
sort_by = "rmse"
)
## H2O Grid Details
## ================
##
## Grid ID: gbm_grid
## Used hyper parameters:
## - col_sample_rate
## - learn_rate
## - learn_rate_annealing
## - max_depth
## - min_rows
## - sample_rate
## Number of models: 25
## Number of failed models: 0
##
## Hyper-Parameter Search Summary: ordered by increasing rmse
## col_sample_rate learn_rate learn_rate_annealing max_depth min_rows
## 1 0.90000 0.01000 1.00000 5.00000 10.00000
## 2 0.90000 0.01000 1.00000 5.00000 1.00000
## 3 0.80000 0.10000 1.00000 3.00000 5.00000
## 4 0.80000 0.10000 0.99000 3.00000 1.00000
## 5 0.80000 0.05000 1.00000 5.00000 5.00000
## sample_rate model_ids rmse
## 1 0.50000 gbm_grid_model_11 22299.59397
## 2 1.00000 gbm_grid_model_2 23016.72073
## 3 0.75000 gbm_grid_model_1 23058.95350
## 4 0.75000 gbm_grid_model_20 23110.08213
## 5 0.75000 gbm_grid_model_9 23217.62738
##
## ---
## col_sample_rate learn_rate learn_rate_annealing max_depth min_rows
## 20 1.00000 0.01000 0.99000 5.00000 5.00000
## 21 0.80000 0.01000 0.99000 5.00000 1.00000
## 22 0.90000 0.01000 0.99000 5.00000 10.00000
## 23 0.90000 0.01000 0.99000 3.00000 10.00000
## 24 0.80000 0.01000 0.99000 1.00000 5.00000
## 25 0.90000 0.01000 0.99000 1.00000 10.00000
## sample_rate model_ids rmse
## 20 0.50000 gbm_grid_model_22 40781.63918
## 21 0.75000 gbm_grid_model_21 40793.61345
## 22 0.75000 gbm_grid_model_23 41133.16192
## 23 1.00000 gbm_grid_model_17 44768.53587
## 24 0.50000 gbm_grid_model_14 57401.59369
## 25 0.50000 gbm_grid_model_12 57405.33471
- Train the stacked model
# Train a stacked ensemble using the GBM grid
<- h2o.stackedEnsemble(
ensemble x = X,
y = Y,
training_frame = train_h2o,
model_id = "ensemble_gbm_grid",
base_models = random_grid@model_ids,
metalearner_algorithm = "gbm"
)
- Compare the tuned model vs the stacked model
# Tuned model
@model_ids[[1]] |>
random_gridh2o.getModel() |>
h2o.performance(newdata = test_h2o)
## H2ORegressionMetrics: gbm
##
## MSE: 357841477
## RMSE: 18916.7
## MAE: 12116.5
## RMSLE: 0.1004672
## Mean Residual Deviance : 357841477
# Stacked model
h2o.performance(ensemble, newdata = test_h2o)
## H2ORegressionMetrics: stackedensemble
##
## MSE: 420921340
## RMSE: 20516.37
## MAE: 12952.93
## RMSLE: 0.1035663
## Mean Residual Deviance : 420921340
h2o.shutdown(prompt = FALSE)