18.1 Chapter 18 Setup
Load in the data and set up explainer
library(tidymodels)
library(skimr)
library(DALEX)
library(DALEXtra)
library(iBreakDown)
<- readRDS(here::here("data", "18-fit_rush_yards.RDS"))
rush_model <- readRDS(here::here("data", "18-nfl_rush_df.RDS"))
rush_df
skim(rush_df)
Name | rush_df |
Number of rows | 95186 |
Number of columns | 40 |
_______________________ | |
Column type frequency: | |
character | 9 |
factor | 15 |
numeric | 16 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
posteam_type | 0 | 1 | 4 | 4 | 0 | 2 | 0 |
game_wday | 0 | 1 | 3 | 5 | 0 | 4 | 0 |
game_half | 0 | 1 | 5 | 8 | 0 | 3 | 0 |
run_location | 0 | 1 | 4 | 6 | 0 | 3 | 0 |
run_gap | 0 | 1 | 3 | 6 | 0 | 3 | 0 |
run_gap_dir | 0 | 1 | 8 | 12 | 0 | 7 | 0 |
surface | 0 | 1 | 4 | 5 | 0 | 2 | 0 |
roof | 0 | 1 | 4 | 8 | 0 | 4 | 0 |
position | 0 | 1 | 2 | 2 | 0 | 4 | 0 |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
season | 0 | 1 | TRUE | 7 | 202: 13935, 201: 13792, 201: 13726, 201: 13517 |
week | 0 | 1 | TRUE | 21 | 15: 5831, 17: 5815, 14: 5774, 3: 5687 |
score | 0 | 1 | FALSE | 2 | 0: 91955, 1: 3231 |
first_down | 0 | 1 | FALSE | 2 | 0: 71845, 1: 23341 |
game_month | 0 | 1 | TRUE | 4 | 12: 31623, 10: 22281, 11: 22142, 9: 19140 |
game_week | 0 | 1 | TRUE | 18 | 53: 7245, 50: 5911, 38: 5753, 51: 5667 |
game_time | 0 | 1 | TRUE | 10 | 13: 49795, 16: 24222, 20: 17960, 18: 793 |
qtr | 0 | 1 | TRUE | 5 | 1: 24079, 4: 23911, 2: 23315, 3: 23270 |
down | 0 | 1 | TRUE | 4 | 1: 52324, 2: 31524, 3: 9908, 4: 1430 |
goal_to_go | 0 | 1 | FALSE | 2 | 0: 88458, 1: 6728 |
shotgun | 0 | 1 | FALSE | 2 | 0: 58260, 1: 36926 |
no_huddle | 0 | 1 | FALSE | 2 | 0: 87972, 1: 7214 |
qb_dropback | 0 | 1 | FALSE | 2 | 0: 89705, 1: 5481 |
qb_scramble | 0 | 1 | FALSE | 2 | 0: 89705, 1: 5481 |
two_point_attempt | 0 | 1 | FALSE | 2 | 0: 95006, 1: 180 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
rushing_yards | 0 | 1 | 4.43 | 6.39 | -17.00 | 1.00 | 3.00 | 6.00 | 99.00 | ▇▂▁▁▁ |
rushing_fantasy_points | 0 | 1 | 0.63 | 1.33 | -3.70 | 0.10 | 0.30 | 0.60 | 15.90 | ▆▇▁▁▁ |
wind | 0 | 1 | 6.06 | 5.62 | 0.00 | 0.00 | 6.00 | 9.00 | 71.00 | ▇▁▁▁▁ |
temp | 0 | 1 | 60.98 | 15.24 | -6.00 | 52.00 | 67.00 | 68.00 | 97.00 | ▁▁▃▇▂ |
rusher_age | 0 | 1 | 25.96 | 3.17 | 20.95 | 23.57 | 25.29 | 27.76 | 43.46 | ▇▆▂▁▁ |
yardline_100 | 0 | 1 | 51.03 | 25.47 | 1.00 | 31.00 | 55.00 | 73.00 | 99.00 | ▅▅▆▇▃ |
quarter_seconds_remaining | 0 | 1 | 461.02 | 263.99 | 0.00 | 228.00 | 458.00 | 692.00 | 900.00 | ▇▇▇▇▇ |
half_seconds_remaining | 0 | 1 | 908.72 | 526.68 | 0.00 | 445.00 | 900.00 | 1367.00 | 1800.00 | ▇▇▇▇▇ |
game_seconds_remaining | 0 | 1 | 1804.95 | 1053.60 | 0.00 | 896.00 | 1800.00 | 2713.00 | 3600.00 | ▇▇▇▇▇ |
fixed_drive | 0 | 1 | 11.38 | 6.90 | 1.00 | 5.00 | 11.00 | 17.00 | 38.00 | ▇▆▆▁▁ |
drive_play_count | 0 | 1 | 7.86 | 3.58 | 0.00 | 5.00 | 8.00 | 10.00 | 21.00 | ▅▇▆▂▁ |
ydstogo | 0 | 1 | 8.14 | 3.88 | 0.00 | 5.00 | 10.00 | 10.00 | 46.00 | ▅▇▁▁▁ |
score_differential | 0 | 1 | 0.47 | 10.70 | -56.00 | -6.00 | 0.00 | 7.00 | 52.00 | ▁▁▇▂▁ |
ep | 0 | 1 | 2.35 | 1.72 | -2.92 | 1.00 | 2.14 | 3.62 | 6.59 | ▁▅▇▅▂ |
vegas_wp | 0 | 1 | 0.57 | 0.31 | 0.00 | 0.30 | 0.60 | 0.85 | 1.00 | ▅▅▅▅▇ |
total_line | 0 | 1 | 45.75 | 4.25 | 35.00 | 43.00 | 45.50 | 48.50 | 63.50 | ▂▇▆▁▁ |
<-
explainer_boost explain_tidymodels(
rush_model, data = rush_df,
y = rush_df$rushing_yards,
verbose = TRUE
)
## Preparation of a new explainer is initiated
## -> model label : workflow ( default )
## -> data : 95186 rows 40 cols
## -> data : tibble converted into a data.frame
## -> target variable : 95186 values
## -> predict function : yhat.workflow will be used ( default )
## -> predicted values : No value for predict function target column. ( default )
## -> model_info : package tidymodels , ver. 1.1.1 , task regression ( default )
## -> predicted values : the predict_function returns an error when executed ( WARNING )
## -> residual function : difference between y and yhat ( default )
## -> residuals : the residual_function returns an error when executed ( WARNING )
## A new explainer has been created!