Setting enviroment
Loading libraries
# Helper packages
# For creating our train-test splits
library(rsample)
# For minor feature engineering tasks
library(recipes)
# Modeling packages
library(h2o)
h2o.init(max_mem_size = "8g")
## Warning in h2o.clusterInfo():
## Your H2O cluster version is (3 months and 18 days) old. There may be a newer version available.
## Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html
Getting our training set
# Load and split the Ames housing data
<- AmesHousing::make_ames()
ames
# for reproducibility
set.seed(123)
<- initial_split(ames, strata = "Sale_Price")
split <- training(split)
ames_train <- testing(split) ames_test
Loading data to h2o session
# Make sure we have consistent categorical levels
<- recipe(Sale_Price ~ .,
blueprint data = ames_train) %>%
step_other(all_nominal(),
threshold = 0.005)
# Create training set for h2o
<- prep(blueprint,
train_h2o training = ames_train,
retain = TRUE) %>%
juice() %>%
as.h2o()
# Create testing set for h2o
<- prep(blueprint,
test_h2o training = ames_train) %>%
bake(new_data = ames_test) %>%
as.h2o()
# Get response and feature names
<- "Sale_Price"
Y <- setdiff(names(ames_train), Y) X