8.5 Posterior analysis with MCMC
Markov chains of \(\pi\) for 10,000 iterations each.
8.5.1 Posterior simulation
library(rstan)
# STEP 1: DEFINE the model
<- "
art_model data {
int<lower = 0, upper = 100> Y;
}
parameters {
real<lower = 0, upper = 1> pi;
}
model {
Y ~ binomial(100, pi);
pi ~ beta(4, 6);
}
"
# STEP 2: SIMULATE the posterior
<- stan(model_code = art_model,
art_sim data = list(Y = 14),
chains = 4,
iter = 5000*2,
seed = 84735)
library(bayesplot)
# Parallel trace plots & density plots
mcmc_trace(art_sim, pars = "pi", size = 0.5) +
xlab("iteration")
mcmc_dens_overlay(art_sim, pars = "pi")
# Autocorrelation plot
mcmc_acf(art_sim, pars = "pi")
# Markov chain diagnostics
rhat(art_sim, pars = "pi")
## [1] 1.000811
neff_ratio(art_sim, pars = "pi")
## [1] 0.399955
8.5.2 Posterior estimation & hypothesis testing
Combined 20,000 Markov chain values.
# The actual Beta(18, 92) posterior
plot_beta(alpha = 18, beta = 92) +
lims(x = c(0, 0.35))
# MCMC posterior approximation
mcmc_dens(art_sim, pars = "pi") +
lims(x = c(0,0.35))
library(broom.mixed)
tidy(art_sim, conf.int = TRUE, conf.level = 0.95)
## # A tibble: 1 × 5
## term estimate std.error conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 pi 0.162 0.0351 0.101 0.237
# Shade in the middle 95% interval
mcmc_areas(art_sim, pars = "pi", prob = 0.95)
# Store the 4 chains in 1 data frame
<- as.data.frame(art_sim, pars = "lp__", include = FALSE)
art_chains_df dim(art_chains_df)
## [1] 20000 1
# Calculate posterior summaries of pi
%>%
art_chains_df summarize(post_mean = mean(pi),
post_median = median(pi),
post_mode = sample_mode(pi),
lower_95 = quantile(pi, 0.025),
upper_95 = quantile(pi, 0.975))
## post_mean post_median post_mode lower_95 upper_95
## 1 0.1637786 0.1618674 0.1507449 0.1006319 0.2374311
Calculate summary statistics directly from the Markov chain values.
We can approximate the posterior probability \[P(\pi < 0.20 | Y =14)\]
library(janitor)
# Tabulate pi values that are below 0.20
%>%
art_chains_df mutate(exceeds = pi < 0.20) %>%
tabyl(exceeds)
## exceeds n percent
## FALSE 3042 0.1521
## TRUE 16958 0.8479
Compare results
8.5.3 Posterior prediction
Utilize the Markov chain values to approximate the posterior predictive model of \(Y'\). Repeating previous assumptions (sampling and posterior variability) and using rbinom()
, we obtain:
# Set the seed
set.seed(1)
# Predict a value of Y' for each pi value in the chain
<- art_chains_df %>%
art_chains_df mutate(y_predict = rbinom(length(pi),
size = 20,
prob = pi))
# Check it out
%>%
art_chains_df head(3)
## pi y_predict
## 1 0.10809936 1
## 2 0.08175123 1
## 3 0.09811126 2
# Plot the 20,000 predictions
ggplot(art_chains_df, aes(x = y_predict)) +
stat_count()
Approximate the posterior mean prediction \(E(Y'|Y=14)\) and the posterior prediction interval for \(Y'\).
%>%
art_chains_df summarize(mean = mean(y_predict),
lower_80 = quantile(y_predict, 0.1),
upper_80 = quantile(y_predict, 0.9))
## mean lower_80 upper_80
## 1 3.2793 1 6