19.7 Case Studies (side note)

Sometimes you want to run a bunch of models, without having to copy/paste each one.

BUT, you also want the summary function to show the appropriate model call, not one with hidden variables (e.g., lm(y ~ x, data = data)).

We can achieve this by building expressions and unquoting as needed:

library(purrr)

vars <- data.frame(x = c("hp", "hp"),
                   y = c("mpg", "cyl"))

x_sym <- syms(vars$x)
y_sym <- syms(vars$y)

formulae <- map2(x_sym, y_sym, \(x, y) expr(!!y ~ !!x))
formulae
#> [[1]]
#> mpg ~ hp
#> 
#> [[2]]
#> cyl ~ hp
models <- map(formulae, \(f) expr(lm(!!f, data = mtcars)))
summary(eval(models[[1]]))
#> 
#> Call:
#> lm(formula = mpg ~ hp, data = mtcars)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -5.7121 -2.1122 -0.8854  1.5819  8.2360 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) 30.09886    1.63392  18.421  < 2e-16 ***
#> hp          -0.06823    0.01012  -6.742 1.79e-07 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 3.863 on 30 degrees of freedom
#> Multiple R-squared:  0.6024, Adjusted R-squared:  0.5892 
#> F-statistic: 45.46 on 1 and 30 DF,  p-value: 1.788e-07

As a function:

lm_df <- function(df, data) {
  x_sym <- map(df$x, as.symbol)
  y_sym <- map(df$y, as.symbol)
  data <- enexpr(data)
  
  formulae <- map2(x_sym, y_sym, \(x, y) expr(!!y ~ !!x))
  models <- map(formulae, \(f) expr(lm(!!f, !!data)))
  
  map(models, \(m) summary(eval(m)))
}

vars <- data.frame(x = c("hp", "hp"),
                   y = c("mpg", "cyl"))
lm_df(vars, data = mtcars)
#> [[1]]
#> 
#> Call:
#> lm(formula = mpg ~ hp, data = mtcars)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -5.7121 -2.1122 -0.8854  1.5819  8.2360 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) 30.09886    1.63392  18.421  < 2e-16 ***
#> hp          -0.06823    0.01012  -6.742 1.79e-07 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 3.863 on 30 degrees of freedom
#> Multiple R-squared:  0.6024, Adjusted R-squared:  0.5892 
#> F-statistic: 45.46 on 1 and 30 DF,  p-value: 1.788e-07
#> 
#> 
#> [[2]]
#> 
#> Call:
#> lm(formula = cyl ~ hp, data = mtcars)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -2.27078 -0.74879 -0.06417  0.63512  1.74067 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) 3.006795   0.425485   7.067 7.41e-08 ***
#> hp          0.021684   0.002635   8.229 3.48e-09 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 1.006 on 30 degrees of freedom
#> Multiple R-squared:  0.693,  Adjusted R-squared:  0.6827 
#> F-statistic: 67.71 on 1 and 30 DF,  p-value: 3.478e-09