Managing many workflows
rand_forest_spec <- rand_forest(
mode = 'regression',
mtry = 2,
trees = 25,
min_n = 10
) %>%
set_engine('ranger')
tree_workflows <- workflow_set(
preproc = list(
"variables" = workflow_variables(diam, c(longitude, latitude, site_order)),
"simple_formula" = diam ~ is.na(date_planted) + longitude + latitude,
"trees_recipe" = trees_recipe
),
models = list(
"lm" = linear_model_spec,
"rf" = rand_forest_spec
)
)
tree_workflows
## # A workflow set/tibble: 6 × 4
## wflow_id info option result
## <chr> <list> <list> <list>
## 1 variables_lm <tibble [1 × 4]> <opts[0]> <list [0]>
## 2 variables_rf <tibble [1 × 4]> <opts[0]> <list [0]>
## 3 simple_formula_lm <tibble [1 × 4]> <opts[0]> <list [0]>
## 4 simple_formula_rf <tibble [1 × 4]> <opts[0]> <list [0]>
## 5 trees_recipe_lm <tibble [1 × 4]> <opts[0]> <list [0]>
## 6 trees_recipe_rf <tibble [1 × 4]> <opts[0]> <list [0]>
tree_predictions <- tree_workflows %>%
rowwise() %>%
mutate(fitted_wf = list(fit(info$workflow[[1]], trees_training))) %>%
mutate(pred = list(predict(fitted_wf, trees_testing)))
tree_predictions %>%
mutate(rmse = rmse_vec(trees_testing$diam, pred$.pred))
## # A tibble: 6 × 7
## # Rowwise:
## wflow_id info option result fitted_wf pred rmse
## <chr> <list> <list> <list> <list> <list> <dbl>
## 1 variables_lm <tibble [1 × 4]> <opts[0]> <list> <workflow> <tibble> 0.367
## 2 variables_rf <tibble [1 × 4]> <opts[0]> <list> <workflow> <tibble> 0.310
## 3 simple_formula_lm <tibble [1 × 4]> <opts[0]> <list> <workflow> <tibble> 0.356
## 4 simple_formula_rf <tibble [1 × 4]> <opts[0]> <list> <workflow> <tibble> 0.337
## 5 trees_recipe_lm <tibble [1 × 4]> <opts[0]> <list> <workflow> <tibble> 0.316
## 6 trees_recipe_rf <tibble [1 × 4]> <opts[0]> <list> <workflow> <tibble> 0.319