8.5 Posterior analysis with MCMC
Markov chains of \(\pi\) for 10,000 iterations each.
8.5.1 Posterior simulation
library(rstan)
# STEP 1: DEFINE the model
art_model <- "
data {
int<lower = 0, upper = 100> Y;
}
parameters {
real<lower = 0, upper = 1> pi;
}
model {
Y ~ binomial(100, pi);
pi ~ beta(4, 6);
}
"
# STEP 2: SIMULATE the posterior
art_sim <- stan(model_code = art_model,
data = list(Y = 14),
chains = 4,
iter = 5000*2,
seed = 84735)library(bayesplot)
# Parallel trace plots & density plots
mcmc_trace(art_sim, pars = "pi", size = 0.5) +
xlab("iteration")
mcmc_dens_overlay(art_sim, pars = "pi")
# Autocorrelation plot
mcmc_acf(art_sim, pars = "pi")
# Markov chain diagnostics
rhat(art_sim, pars = "pi")## [1] 1.000811
neff_ratio(art_sim, pars = "pi")## [1] 0.399955
8.5.2 Posterior estimation & hypothesis testing
Combined 20,000 Markov chain values.
# The actual Beta(18, 92) posterior
plot_beta(alpha = 18, beta = 92) +
lims(x = c(0, 0.35))
# MCMC posterior approximation
mcmc_dens(art_sim, pars = "pi") +
lims(x = c(0,0.35))
library(broom.mixed)
tidy(art_sim, conf.int = TRUE, conf.level = 0.95)## # A tibble: 1 × 5
## term estimate std.error conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 pi 0.162 0.0351 0.101 0.237
# Shade in the middle 95% interval
mcmc_areas(art_sim, pars = "pi", prob = 0.95)
# Store the 4 chains in 1 data frame
art_chains_df <- as.data.frame(art_sim, pars = "lp__", include = FALSE)
dim(art_chains_df)## [1] 20000 1
# Calculate posterior summaries of pi
art_chains_df %>%
summarize(post_mean = mean(pi),
post_median = median(pi),
post_mode = sample_mode(pi),
lower_95 = quantile(pi, 0.025),
upper_95 = quantile(pi, 0.975))## post_mean post_median post_mode lower_95 upper_95
## 1 0.1637786 0.1618674 0.1507449 0.1006319 0.2374311
Calculate summary statistics directly from the Markov chain values.
We can approximate the posterior probability \[P(\pi < 0.20 | Y =14)\]
library(janitor)
# Tabulate pi values that are below 0.20
art_chains_df %>%
mutate(exceeds = pi < 0.20) %>%
tabyl(exceeds)## exceeds n percent
## FALSE 3042 0.1521
## TRUE 16958 0.8479
Compare results

8.5.3 Posterior prediction
Utilize the Markov chain values to approximate the posterior predictive model of \(Y'\). Repeating previous assumptions (sampling and posterior variability) and using rbinom(), we obtain:
# Set the seed
set.seed(1)
# Predict a value of Y' for each pi value in the chain
art_chains_df <- art_chains_df %>%
mutate(y_predict = rbinom(length(pi),
size = 20,
prob = pi))
# Check it out
art_chains_df %>%
head(3)## pi y_predict
## 1 0.10809936 1
## 2 0.08175123 1
## 3 0.09811126 2
# Plot the 20,000 predictions
ggplot(art_chains_df, aes(x = y_predict)) +
stat_count()
Approximate the posterior mean prediction \(E(Y'|Y=14)\) and the posterior prediction interval for \(Y'\).
art_chains_df %>%
summarize(mean = mean(y_predict),
lower_80 = quantile(y_predict, 0.1),
upper_80 = quantile(y_predict, 0.9))## mean lower_80 upper_80
## 1 3.2793 1 6