29.10 Feature Engineering
daily <- flights %>%
mutate(date = make_date(year, month, day)) %>%
group_by(date) %>%
summarise(n = n())
ggplot(daily) +
aes(date, n) +
geom_line()
Feature engineering = using data to create new features to use in models
daily <- daily %>%
mutate(wday = wday(date, label = TRUE, week_start = 1))
ggplot(daily) +
aes(wday, n) +
geom_boxplot()
mod <- lm(n ~ wday, data = daily)
grid <- daily %>%
data_grid(wday) %>%
add_predictions(mod, "n")
ggplot(daily) +
aes(wday, n) +
geom_boxplot() +
geom_point(data = grid, colour = "red", size = 4)
daily <- daily %>%
add_residuals(mod)
base_plot <- ggplot(daily) +
aes(date, resid) +
geom_ref_line(h = 0) +
geom_line()
base_plot
base_plot +
aes(color = wday)
base_plot +
geom_smooth(se = FALSE, span = 0.20)
term <- function(date) {
cut(date,
breaks = ymd(20130101, 20130605, 20130825, 20140101),
labels = c("spring", "summer", "fall")
)
}
daily <- daily %>%
mutate(term = term(date))
mod2 <- MASS::rlm(n ~ wday * term, data = daily)
daily %>%
add_residuals(mod2, "resid") %>%
ggplot() +
aes(date, resid) +
geom_hline(yintercept = 0, linewidth = 2, colour = "white") +
geom_line()