18.1 Chapter 18 Setup

Load in the data and set up explainer

library(tidymodels)
library(skimr)
library(DALEX)
library(DALEXtra)
library(iBreakDown)

rush_model <- readRDS(here::here("data", "18-fit_rush_yards.RDS"))
rush_df <- readRDS(here::here("data", "18-nfl_rush_df.RDS"))

skim(rush_df)
Table 18.1: Data summary
Name rush_df
Number of rows 95186
Number of columns 40
_______________________
Column type frequency:
character 9
factor 15
numeric 16
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
posteam_type 0 1 4 4 0 2 0
game_wday 0 1 3 5 0 4 0
game_half 0 1 5 8 0 3 0
run_location 0 1 4 6 0 3 0
run_gap 0 1 3 6 0 3 0
run_gap_dir 0 1 8 12 0 7 0
surface 0 1 4 5 0 2 0
roof 0 1 4 8 0 4 0
position 0 1 2 2 0 4 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
season 0 1 TRUE 7 202: 13935, 201: 13792, 201: 13726, 201: 13517
week 0 1 TRUE 21 15: 5831, 17: 5815, 14: 5774, 3: 5687
score 0 1 FALSE 2 0: 91955, 1: 3231
first_down 0 1 FALSE 2 0: 71845, 1: 23341
game_month 0 1 TRUE 4 12: 31623, 10: 22281, 11: 22142, 9: 19140
game_week 0 1 TRUE 18 53: 7245, 50: 5911, 38: 5753, 51: 5667
game_time 0 1 TRUE 10 13: 49795, 16: 24222, 20: 17960, 18: 793
qtr 0 1 TRUE 5 1: 24079, 4: 23911, 2: 23315, 3: 23270
down 0 1 TRUE 4 1: 52324, 2: 31524, 3: 9908, 4: 1430
goal_to_go 0 1 FALSE 2 0: 88458, 1: 6728
shotgun 0 1 FALSE 2 0: 58260, 1: 36926
no_huddle 0 1 FALSE 2 0: 87972, 1: 7214
qb_dropback 0 1 FALSE 2 0: 89705, 1: 5481
qb_scramble 0 1 FALSE 2 0: 89705, 1: 5481
two_point_attempt 0 1 FALSE 2 0: 95006, 1: 180

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
rushing_yards 0 1 4.43 6.39 -17.00 1.00 3.00 6.00 99.00 ▇▂▁▁▁
rushing_fantasy_points 0 1 0.63 1.33 -3.70 0.10 0.30 0.60 15.90 ▆▇▁▁▁
wind 0 1 6.06 5.62 0.00 0.00 6.00 9.00 71.00 ▇▁▁▁▁
temp 0 1 60.98 15.24 -6.00 52.00 67.00 68.00 97.00 ▁▁▃▇▂
rusher_age 0 1 25.96 3.17 20.95 23.57 25.29 27.76 43.46 ▇▆▂▁▁
yardline_100 0 1 51.03 25.47 1.00 31.00 55.00 73.00 99.00 ▅▅▆▇▃
quarter_seconds_remaining 0 1 461.02 263.99 0.00 228.00 458.00 692.00 900.00 ▇▇▇▇▇
half_seconds_remaining 0 1 908.72 526.68 0.00 445.00 900.00 1367.00 1800.00 ▇▇▇▇▇
game_seconds_remaining 0 1 1804.95 1053.60 0.00 896.00 1800.00 2713.00 3600.00 ▇▇▇▇▇
fixed_drive 0 1 11.38 6.90 1.00 5.00 11.00 17.00 38.00 ▇▆▆▁▁
drive_play_count 0 1 7.86 3.58 0.00 5.00 8.00 10.00 21.00 ▅▇▆▂▁
ydstogo 0 1 8.14 3.88 0.00 5.00 10.00 10.00 46.00 ▅▇▁▁▁
score_differential 0 1 0.47 10.70 -56.00 -6.00 0.00 7.00 52.00 ▁▁▇▂▁
ep 0 1 2.35 1.72 -2.92 1.00 2.14 3.62 6.59 ▁▅▇▅▂
vegas_wp 0 1 0.57 0.31 0.00 0.30 0.60 0.85 1.00 ▅▅▅▅▇
total_line 0 1 45.75 4.25 35.00 43.00 45.50 48.50 63.50 ▂▇▆▁▁
explainer_boost <- 
  explain_tidymodels(
    rush_model, 
    data = rush_df,
    y = rush_df$rushing_yards,
    verbose = TRUE
  )
## Preparation of a new explainer is initiated
##   -> model label       :  workflow  (  default  )
##   -> data              :  95186  rows  40  cols 
##   -> data              :  tibble converted into a data.frame 
##   -> target variable   :  95186  values 
##   -> predict function  :  yhat.workflow  will be used (  default  )
##   -> predicted values  :  No value for predict function target column. (  default  )
##   -> model_info        :  package tidymodels , ver. 1.1.1 , task regression (  default  ) 
##   -> predicted values  :  the predict_function returns an error when executed (  WARNING  ) 
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  the residual_function returns an error when executed (  WARNING  ) 
##   A new explainer has been created!