8.5 Posterior analysis with MCMC

Markov chains of \(\pi\) for 10,000 iterations each.

8.5.1 Posterior simulation

library(rstan)
# STEP 1: DEFINE the model
art_model <- "
  data {
    int<lower = 0, upper = 100> Y;
  }
  parameters {
    real<lower = 0, upper = 1> pi;
  }
  model {
    Y ~ binomial(100, pi);
    pi ~ beta(4, 6);
  }
"

# STEP 2: SIMULATE the posterior
art_sim <- stan(model_code = art_model, 
                data = list(Y = 14), 
                chains = 4,
                iter = 5000*2, 
                seed = 84735)
library(bayesplot)
# Parallel trace plots & density plots
mcmc_trace(art_sim, pars = "pi", size = 0.5) + 
  xlab("iteration")

mcmc_dens_overlay(art_sim, pars = "pi")

# Autocorrelation plot
mcmc_acf(art_sim, pars = "pi")

# Markov chain diagnostics
rhat(art_sim, pars = "pi")
## [1] 1.000811
neff_ratio(art_sim, pars = "pi")
## [1] 0.399955

8.5.2 Posterior estimation & hypothesis testing

Combined 20,000 Markov chain values.

# The actual Beta(18, 92) posterior
plot_beta(alpha = 18, beta = 92) + 
  lims(x = c(0, 0.35))

# MCMC posterior approximation
mcmc_dens(art_sim, pars = "pi") + 
  lims(x = c(0,0.35))

library(broom.mixed)
tidy(art_sim, conf.int = TRUE, conf.level = 0.95)
## # A tibble: 1 × 5
##   term  estimate std.error conf.low conf.high
##   <chr>    <dbl>     <dbl>    <dbl>     <dbl>
## 1 pi       0.162    0.0351    0.101     0.237
# Shade in the middle 95% interval
mcmc_areas(art_sim, pars = "pi", prob = 0.95)

# Store the 4 chains in 1 data frame
art_chains_df <- as.data.frame(art_sim, pars = "lp__", include = FALSE)
dim(art_chains_df)
## [1] 20000     1
# Calculate posterior summaries of pi
art_chains_df %>% 
  summarize(post_mean = mean(pi), 
            post_median = median(pi),
            post_mode = sample_mode(pi),
            lower_95 = quantile(pi, 0.025),
            upper_95 = quantile(pi, 0.975))
##   post_mean post_median post_mode  lower_95  upper_95
## 1 0.1637786   0.1618674 0.1507449 0.1006319 0.2374311

Calculate summary statistics directly from the Markov chain values.

We can approximate the posterior probability \[P(\pi < 0.20 | Y =14)\]

library(janitor)
# Tabulate pi values that are below 0.20
art_chains_df %>% 
  mutate(exceeds = pi < 0.20) %>% 
  tabyl(exceeds)
##  exceeds     n percent
##    FALSE  3042  0.1521
##     TRUE 16958  0.8479

Compare results

8.5.3 Posterior prediction

Utilize the Markov chain values to approximate the posterior predictive model of \(Y'\). Repeating previous assumptions (sampling and posterior variability) and using rbinom(), we obtain:

# Set the seed
set.seed(1)

# Predict a value of Y' for each pi value in the chain
art_chains_df <- art_chains_df %>% 
  mutate(y_predict = rbinom(length(pi), 
                            size = 20, 
                            prob = pi))

# Check it out
art_chains_df %>% 
  head(3)
##           pi y_predict
## 1 0.10809936         1
## 2 0.08175123         1
## 3 0.09811126         2
# Plot the 20,000 predictions
ggplot(art_chains_df, aes(x = y_predict)) + 
  stat_count()

Approximate the posterior mean prediction \(E(Y'|Y=14)\) and the posterior prediction interval for \(Y'\).

art_chains_df %>% 
  summarize(mean = mean(y_predict),
            lower_80 = quantile(y_predict, 0.1),
            upper_80 = quantile(y_predict, 0.9))
##     mean lower_80 upper_80
## 1 3.2793        1        6