13.3 Finalizing the model

Two methods:
manually choose parameters, or
select_best()
select_best(mlp_sfd_tune, metric = "iic")
# or, a manual selection that corresponds to the regular method
reg_param <-
tibble(
num_comp = 0,
epochs = 200,
hidden_units = 5,
penalty = 1 # log10
)# A tibble: 1 x 5
hidden_units penalty epochs num_comp .config
<int> <dbl> <int> <int> <chr>
1 9 0.00780 158 14 Preprocessor06_Model1
final_sfd_wflow <-
mlp_wflow %>%
finalize_workflow(select_best(mlp_sfd_tune, metric = "rmse")) Now the model can fit the entire training set:
final_sfd_fit <-
final_sfd_wflow %>%
fit(Chicago_train)This object can now be used to make future predictions on new data.
final_sfd_fit %>%
predict(new_data = Chicago_test) %>%
bind_cols(Chicago_test) %>%
rmse(truth = ridership, estimate = .pred)# A tibble: 1 x 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 rmse standard 3.01
final_sfd_fit %>%
predict(new_data = Chicago_test) %>%
bind_cols(Chicago_test) %>%
mutate(weekday = wday(date,
label = TRUE)) %>%
ggplot(aes(ridership,
.pred,
color = weekday)) +
geom_point() +
geom_abline(color = "red") +
scale_color_brewer(type = "qual") +
coord_fixed() +
expand_limits(x = 0, y = 0)
ggsave("images/13_test_performance.png")
I learned here that the recipe must exclude the dependent variable for the predict() to run on test data.