Example: Forming a linear predictor from a multiple regression
From https://github.com/behrman/ros
Simulated data.
set.seed(33)
n <- 100
k <- 10
a <- 1
b <- 1:k
theta <- 5
sigma <- 2
data_2 <-
tibble(
X = matrix(runif(n * k, min = 0, max = 1), nrow = n, ncol = k),
z = rep(0:1, n / 2) %>% sample(),
y = as.double(a + X %*% b + theta * z + rnorm(n, mean = 0, sd = sigma))
)
Fit linear regression model.
## stan_glm
## family: gaussian [identity]
## formula: y ~ X + z
## observations: 100
## predictors: 12
## ------
## Median MAD_SD
## (Intercept) 0.8 1.1
## X1 0.4 0.7
## X2 1.6 0.7
## X3 3.0 0.7
## X4 4.0 0.7
## X5 4.9 0.7
## X6 5.1 0.8
## X7 6.8 0.7
## X8 8.2 0.7
## X9 9.0 0.7
## X10 12.0 0.8
## z 5.5 0.4
##
## Auxiliary parameter(s):
## Median MAD_SD
## sigma 2.0 0.2
##
## ------
## * For help interpreting the printed output see ?print.stanreg
## * For info on the priors used see ?prior_summary.stanreg
Outcome vs. predicted value.
data_2 %>%
mutate(pred = predict(fit_2)) %>%
ggplot(aes(pred, y, color = factor(z))) +
geom_point() +
geom_abline(slope = 1, intercept = 0) +
coord_fixed() +
scale_y_continuous(breaks = scales::breaks_width(10)) +
labs(
title = "Outcome vs. predicted value",
x = "Predicted value",
y = "Outcome",
color = "Treatment"
)