13.3 Finalizing the model
Two methods:
manually choose parameters, or
select_best()
select_best(mlp_sfd_tune, metric = "iic")
# or, a manual selection that corresponds to the regular method
<-
reg_param tibble(
num_comp = 0,
epochs = 200,
hidden_units = 5,
penalty = 1 # log10
)
# A tibble: 1 x 5
hidden_units penalty epochs num_comp .config
<int> <dbl> <int> <int> <chr>
1 9 0.00780 158 14 Preprocessor06_Model1
<-
final_sfd_wflow %>%
mlp_wflow finalize_workflow(select_best(mlp_sfd_tune, metric = "rmse"))
Now the model can fit the entire training set:
<-
final_sfd_fit %>%
final_sfd_wflow fit(Chicago_train)
This object can now be used to make future predictions on new data.
%>%
final_sfd_fit predict(new_data = Chicago_test) %>%
bind_cols(Chicago_test) %>%
rmse(truth = ridership, estimate = .pred)
# A tibble: 1 x 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 rmse standard 3.01
%>%
final_sfd_fit predict(new_data = Chicago_test) %>%
bind_cols(Chicago_test) %>%
mutate(weekday = wday(date,
label = TRUE)) %>%
ggplot(aes(ridership,
.pred, color = weekday)) +
geom_point() +
geom_abline(color = "red") +
scale_color_brewer(type = "qual") +
coord_fixed() +
expand_limits(x = 0, y = 0)
ggsave("images/13_test_performance.png")
I learned here that the recipe must exclude the dependent variable for the predict()
to run on test data.