Model assembly
Process:
- Set target to log of player value
- Remove a few variables from dataset for training purposes:
nationality
,overall
,potential
,value_eur
, andwage_eur
- Train four models: deep gbm, shallow gbm, ranger random forest, ols with splines
Note: The textbook R examples use all fifa records for both training models and creating explainer objects. In our examples, we use a separate training dataset for model creation. We also instantiate model explainers using the test set. This is consistent with the Python-based examples in the text.
# logged target, remove some features
fifa_small <- fifa |>
mutate(LogValue = log10(value_eur)) |>
select(-nationality, -overall,-potential, -value_eur, -wage_eur)
# train test
set.seed(2687)
fs_split <- initial_split(fifa_small, prop = 0.75)
train <- training(fs_split )
test <- testing(fs_split )
# fit models
fifa_gbm_deep <- gbm(LogValue~., data = train, n.trees = 250,
interaction.depth = 4, distribution = "gaussian")
fifa_gbm_shallow <- gbm(LogValue~., data = train, n.trees = 250,
interaction.depth = 1, distribution = "gaussian")
fifa_rf <- ranger(LogValue~., data = train, num.trees = 250)
# spline-based formula for ols
ols.formula <- formula(
paste0('LogValue ~ ',
paste0(
paste0("rcs(",names(train)[-length(train)],")"),
collapse="+")))
fifa_ols <- ols(ols.formula,data = train)
Next, create explainer objects.
# explainer objects
fifa_gbm_exp_deep <- DALEX::explain(fifa_gbm_deep,
data = test, y = 10^test$LogValue,
predict_function = function(m,x) 10^predict(m, x, n.trees = 250),
label = "GBM deep")
fifa_gbm_exp_shallow <- DALEX::explain(fifa_gbm_shallow,
data = test, y = 10^test$LogValue,
predict_function = function(m,x) 10^predict(m, x, n.trees = 250),
label = "GBM shallow")
fifa_rf_exp <- DALEX::explain(fifa_rf,
data = test, y = 10^test$LogValue,
predict_function = function(m,x) 10^predict(m, x)$predictions,
label = "RF")
fifa_rm_exp <- DALEX::explain(fifa_ols,
data = test, y = 10^test$LogValue,
predict_function = function(m,x) 10^predict(m, x),
label = "RM")