8.64 Tuning the xgboost regression model

We are using the Carseats dataset. Let’s do the validation split with a different seed.

set.seed(1001)
carseats_split <- initial_split(Carseats)

carseats_train <- training(carseats_split)
carseats_test <- testing(carseats_split)

Create the bootstrap folds.

set.seed(2341)
carseats_boot <- bootstraps(carseats_train, times = bootstraps_samples, apparent = TRUE, strata = Sales)

carseats_boot
## # Bootstrap sampling using stratification with apparent sample 
## # A tibble: 101 × 2
##    splits            id          
##    <list>            <chr>       
##  1 <split [300/115]> Bootstrap001
##  2 <split [300/107]> Bootstrap002
##  3 <split [300/116]> Bootstrap003
##  4 <split [300/109]> Bootstrap004
##  5 <split [300/106]> Bootstrap005
##  6 <split [300/97]>  Bootstrap006
##  7 <split [300/104]> Bootstrap007
##  8 <split [300/99]>  Bootstrap008
##  9 <split [300/108]> Bootstrap009
## 10 <split [300/103]> Bootstrap010
## # ℹ 91 more rows

Model spec

xgb_spec <- 
     boost_tree(
          trees = 2000, 
          mtry = tune(), 
          min_n = tune(), 
          tree_depth = tune(), 
          learn_rate = tune()
     ) %>% 
     set_engine("xgboost") %>% 
     set_mode("regression")

Create the workflow()

xgb_wf <- workflow() %>% 
     add_model(xgb_spec) %>% 
     add_formula(Sales ~ .)