Setting enviroment
Loading libraries
# Helper packages
# For creating our train-test splits
library(rsample)
# For minor feature engineering tasks
library(recipes)
# Modeling packages
library(h2o)
h2o.init(max_mem_size = "8g")## Warning in h2o.clusterInfo():
## Your H2O cluster version is (3 months and 18 days) old. There may be a newer version available.
## Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html
Getting our training set
# Load and split the Ames housing data
ames <- AmesHousing::make_ames()
# for reproducibility
set.seed(123)
split <- initial_split(ames, strata = "Sale_Price")
ames_train <- training(split)
ames_test <- testing(split)Loading data to h2o session
# Make sure we have consistent categorical levels
blueprint <- recipe(Sale_Price ~ .,
data = ames_train) %>%
step_other(all_nominal(),
threshold = 0.005)
# Create training set for h2o
train_h2o <- prep(blueprint,
training = ames_train,
retain = TRUE) %>%
juice() %>%
as.h2o()
# Create testing set for h2o
test_h2o <- prep(blueprint,
training = ames_train) %>%
bake(new_data = ames_test) %>%
as.h2o()
# Get response and feature names
Y <- "Sale_Price"
X <- setdiff(names(ames_train), Y)