Setting enviroment

Loading libraries

# Helper packages
#  For creating our train-test splits
library(rsample)
#  For minor feature engineering tasks
library(recipes)

# Modeling packages
library(h2o)
h2o.init(max_mem_size = "8g")

## Warning in h2o.clusterInfo(): 
## Your H2O cluster version is (3 months and 18 days) old. There may be a newer version available.
## Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html

Getting our training set

# Load and split the Ames housing data
ames <- AmesHousing::make_ames()

# for reproducibility
set.seed(123)  
split <- initial_split(ames, strata = "Sale_Price")
ames_train <- training(split)
ames_test <- testing(split)

Loading data to h2o session

# Make sure we have consistent categorical levels
blueprint <- recipe(Sale_Price ~ .,
                    data = ames_train) %>%
  step_other(all_nominal(),
             threshold = 0.005)

# Create training set for h2o
train_h2o <- prep(blueprint,
                  training = ames_train,
                  retain = TRUE) %>%
  juice() %>%
  as.h2o()

# Create testing set for h2o
test_h2o <- prep(blueprint,
                 training = ames_train) %>%
  bake(new_data = ames_test) %>%
  as.h2o()

# Get response and feature names
Y <- "Sale_Price"
X <- setdiff(names(ames_train), Y)