29.3 Prediction

To make the prediction of our model,

sim1_mod <- lm(y ~ x, data = sim1)
coef(sim1_mod)

we construct a data_grid() for simulating new data to be predicted:

grid <- sim1 %>% 
  data_grid(x) 

and add the prediction with add_predictions() function:

data_pred <- grid %>% 
  add_predictions(sim1_mod)

head(data_pred)
ggplot(sim1, aes(x,y)) +
  geom_point() +
  geom_line(aes(y = pred), data = data_pred, colour = "red", linewidth = 2)+
  geom_smooth(method="lm",se=F)

as well as before we can add residuals to our data to visualize their trend with add_residuals() function:

data_res <- sim1 %>% 
  add_residuals(sim1_mod)

head(data_res)
ggplot(data_res, aes(x, resid)) + 
  geom_ref_line(h = 0) +
  geom_point() 

29.3.1 Interaction

When we need more investigations:

mod1 <- lm(y ~ x1 + x2, data = sim3)
mod2 <- lm(y ~ x1 * x2, data = sim3)

In this case, as we have two models to compare we gather_predictions:

grid2 <- sim3 %>% 
  data_grid(x1, x2) %>% 
  gather_predictions(mod1, mod2)


grid2%>%count(model)

head(grid2)
ggplot(sim3, aes(x1, y, colour = x2)) + 
  geom_point() + 
  geom_line(data = grid2, aes(y = pred)) + 
  facet_wrap(~ model)

as well as gather_residuals():

sim3 %>% 
  gather_residuals(mod1, mod2)%>%
  ggplot(aes(x1, resid, colour = x2)) + 
  geom_point() + 
  facet_grid(model ~ x2)

To conclude we look at the output of different transformations:

sim5 <- tibble(
  x = seq(0, 3.5 * pi, length = 50),
  y = 4 * sin(x) + rnorm(length(x))
)

ggplot(sim5, aes(x, y)) +
  geom_point()
mod1 <- lm(y ~ ns(x, 1), data = sim5)
mod2 <- lm(y ~ ns(x, 2), data = sim5)
mod3 <- lm(y ~ ns(x, 3), data = sim5)
mod4 <- lm(y ~ ns(x, 4), data = sim5)
mod5 <- lm(y ~ ns(x, 5), data = sim5)

grid <- sim5 %>% 
  data_grid(x = seq_range(x, n = 50, expand = 0.1)) %>% 
  gather_predictions(mod1, mod2, mod3, mod4, mod5, .pred = "y")

ggplot(sim5, aes(x, y)) + 
  geom_point() +
  geom_line(data = grid, colour = "red") +
  facet_wrap(~ model)