kidiq <-read.csv("data/kidiq.csv")# assign each row to a random foldkidiq$fold <-sample(rep(1:10,ceiling(nrow(kidiq)/10)))[1:nrow(kidiq)]kidiq <-as_tibble(kidiq) |>mutate(row_id =row_number())kidiq$score <-0
set.seed(33)for( i in1:10){ fit <-stan_glm(kid_score ~ mom_hs + mom_iq, data = kidiq[kidiq$fold != i,], refresh =0) testdat <- kidiq[kidiq$fold == i,] sigma <-as.data.frame(fit)$sigma# This for loop 'smells' , but worksfor(j in1:nrow(testdat)) { newdat <- testdat[j,] pred <-posterior_linpred(fit,newdata = newdat) testdat[j,]$score <-log(mean(dnorm(pred - newdat$kid_score, mean =0, sd = sigma))) } kidiq <-rows_update(kidiq, testdat, by ='row_id')}
sum(kidiq$score)# -1877.47
sd(kidiq$score)*sqrt(nrow(kidiq))# 14.3
There exists a function kfold that can do all this for you though!
set.seed(33)kfold_3 <-kfold(fit_3)kfold_3
##
## Based on 10-fold cross-validation.
##
## Estimate SE
## elpd_kfold -1876.6 14.2
## p_kfold 4.5 0.7
## kfoldic 3753.1 28.5