8.64 Tuning the xgboost regression model
We are using the Carseats
dataset. Let’s do the validation split with a different seed.
set.seed(1001)
<- initial_split(Carseats)
carseats_split
<- training(carseats_split)
carseats_train <- testing(carseats_split) carseats_test
Create the bootstrap folds.
set.seed(2341)
<- bootstraps(carseats_train, times = bootstraps_samples, apparent = TRUE, strata = Sales)
carseats_boot
carseats_boot
## # Bootstrap sampling using stratification with apparent sample
## # A tibble: 101 × 2
## splits id
## <list> <chr>
## 1 <split [300/115]> Bootstrap001
## 2 <split [300/107]> Bootstrap002
## 3 <split [300/116]> Bootstrap003
## 4 <split [300/109]> Bootstrap004
## 5 <split [300/106]> Bootstrap005
## 6 <split [300/97]> Bootstrap006
## 7 <split [300/104]> Bootstrap007
## 8 <split [300/99]> Bootstrap008
## 9 <split [300/108]> Bootstrap009
## 10 <split [300/103]> Bootstrap010
## # ℹ 91 more rows
Model spec
<-
xgb_spec boost_tree(
trees = 2000,
mtry = tune(),
min_n = tune(),
tree_depth = tune(),
learn_rate = tune()
%>%
) set_engine("xgboost") %>%
set_mode("regression")
Create the workflow()
<- workflow() %>%
xgb_wf add_model(xgb_spec) %>%
add_formula(Sales ~ .)