Confidence Interval Simulation
This is simulation of 100 draws from a distribution with mean 6 and standard deviation 40. The standard error will be close to 4.
library(dplyr)
library(ggplot2)
set.seed(42)
mu <- 6
sig <- 40
n_draws <- 100
n_reps <- 1000
sim_coverage <- tibble(
estimate = rep(0,n_reps), se = estimate
)
for(i in 1:n_reps){
sim_data <- rnorm(n_draws,mean=mu, sd= sig)
sim_coverage$estimate[[i]] <- mean(sim_data)
sim_coverage$se[[i]] = sd(sim_data)/sqrt(n_draws)
}
sim_coverage <- sim_coverage |>
mutate(min_95 = qt(0.025,n_draws-1)*se + estimate,
max_95 = qt(0.975,n_draws-1)*se + estimate,
min_50 = qt(0.25,n_draws-1)*se + estimate,
max_50 = qt(0.75,n_draws-1)*se + estimate,
covered_95 = min_95 <= mu & max_95 >= mu,
covered_50 = min_50 <= mu & max_50 >= mu
)
ggplot(head(sim_coverage,100),aes(x= 1:100, y = estimate)) +
geom_pointrange(aes(ymin = min_95,ymax = max_95))+
geom_pointrange(aes(ymin = min_50,ymax = max_50), linewidth= 1.5) +
geom_hline(aes(yintercept=6)) +
xlab('Simulation')
We expect on about 50% of the time that the 50% CI contains the true value, while we expect 95% of the time that the 95% CI contains the true value.
sim_coverage |> summarise(p_covered_95 = mean(sim_coverage$covered_95),
p_covered_50 = mean(sim_coverage$covered_50))
## # A tibble: 1 × 2
## p_covered_95 p_covered_50
## <dbl> <dbl>
## 1 0.953 0.493