13.3 Prediction & classification

weather%>%pull(humidity9am)%>%range()
## [1] 13 99

Posterior predictions of binary outcome:

set.seed(84735)
binary_prediction <- posterior_predict(
  rain_model_1, 
  newdata = data.frame(humidity9am = 99))
set.seed(84735)
rain_model_1_df <- as.data.frame(rain_model_1) %>% 
  mutate(log_odds = `(Intercept)` + humidity9am*99,
         odds = exp(log_odds),
         prob = odds / (1 + odds),
         Y = rbinom(20000, size = 1, prob = prob))

rain_model_1_df %>% head
##   (Intercept) humidity9am    log_odds      odds      prob Y
## 1   -4.244453  0.04454776  0.16577499 1.1803075 0.5413491 0
## 2   -4.207118  0.04209875 -0.03934156 0.9614223 0.4901659 1
## 3   -3.899987  0.03877941 -0.06082497 0.9409879 0.4847984 0
## 4   -3.942503  0.03948000 -0.03398243 0.9665885 0.4915052 1
## 5   -4.743743  0.05139157  0.34402150 1.4106090 0.5851671 0
## 6   -4.405777  0.04565130  0.11370120 1.1204173 0.5283947 1
weather%>%
  group_by(raintomorrow)%>%
  reframe(avg=mean(humidity9am))%>%
  mutate(prop=avg/sum(avg))
## # A tibble: 2 × 3
##   raintomorrow   avg  prop
##   <fct>        <dbl> <dbl>
## 1 No            58.4 0.449
## 2 Yes           71.6 0.551
mcmc_hist(binary_prediction) + 
  labs(x = "Y")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(rain_model_1_df, aes(x = Y)) + 
  stat_count()

# Summarize the posterior predictions of Y
table(binary_prediction)
## binary_prediction
##     0     1 
##  9196 10804
colMeans(binary_prediction)
##      1 
## 0.5402