Returning to the example

First lets try to use the last idea, adjusting for pre-treatment variables.

exp <- gen_random_exp()
stan_glm(y ~ z + Female + Age, data = exp, refresh = 0)

## stan_glm
##  family:       gaussian [identity]
##  formula:      y ~ z + Female + Age
##  observations: 8
##  predictors:   4
## ------
##             Median MAD_SD
## (Intercept) 99.4    4.6  
## z           -7.5    1.9  
## Female       2.5    2.1  
## Age          1.0    0.1  
## 
## Auxiliary parameter(s):
##       Median MAD_SD
## sigma 1.9    0.8   
## 
## ------
## * For help interpreting the printed output see ?print.stanreg
## * For info on the priors used see ?prior_summary.stanreg

Better then our completely randomized!

We can also compute the error on the effect by through simulation, using lm to avoid degenerate cases and speed it up.

set.seed(42)
n <- 30
avg_eff <- rep(0,n)
for(i in 1:n){
  #print(n)
  exp <- gen_random_exp()  
  fit <- lm(y ~ z + Female + Age, data = exp)
  avg_eff[i] <- coef(fit)[2]
}

c(mean(avg_eff), sd(avg_eff))

## [1] -7.500000  0.379049

18.5.1 Block Randomization

One way to consider the block thing is to consider only age: The 4 oldest in one block the for youngest in the other.

gen_random_block_exp <- function(){
  res <- hypo_data |> 
    mutate(
       block = if_else(Age > 55, 1, 0 ) ) |>
    group_by(block) |>
    mutate(
     z = sample(c(rep(0, 2), rep(1, 2)),4)
    ) |> ungroup() |> 
    mutate(
     y = if_else(z==1, y1, y0)
     )
  res
}

  
n <- 100
avg_eff <- rep(0,n)
for(i in 1:n){
  exp <- gen_random_block_exp() |> group_by(z) |> summarise(res = mean(y))
  avg_eff[i] <- (exp |> filter(z==1))$res -(exp |> filter(z==0))$res
}

c(mean(avg_eff), sd(avg_eff))

## [1] -7.012500  3.226973

Better then total randomization as expected.

Another way to get same result. Note that block is not really needed since it is balanced, Same result either way.

set.seed(33)
n <- 30
avg_eff <- rep(0,n)
for(i in 1:n){
  #print(n)
  exp <- gen_random_block_exp()
  fit <- lm(y ~ z, data = exp)
  #fit <- lm(y ~ z + block, data = exp)
  avg_eff[i] <- coef(fit)[2]
}

c(mean(avg_eff), sd(avg_eff))

## [1] -7.625000  3.188577

If add Age, it works better (Age completely splits the data it into perfect pairs, but they were not perfectly sampled)

set.seed(33)
n <- 30
avg_eff <- rep(0,n)
for(i in 1:n){
  #print(n)
  exp <- gen_random_block_exp()  
  fit <- lm(y ~ z +   Age, data = exp)
  avg_eff[i] <- coef(fit)[2]
}

c(mean(avg_eff), sd(avg_eff))

## [1] -7.491228  1.026754

(IF i do add ‘block’ to the above, the errors increase due to colinearity)

set.seed(33)
n <- 30
avg_eff <- rep(0,n)
for(i in 1:n){
  #print(n)
  exp <- gen_random_block_exp()  
  fit <- lm(y ~ z + block +   Age, data = exp)
  avg_eff[i] <- coef(fit)[2]
}

c(mean(avg_eff), sd(avg_eff))

## [1] -7.500000  1.969596