Model assembly

Process:

  • Set target to log of player value
  • Remove a few variables from dataset for training purposes: nationality, overall, potential, value_eur, and wage_eur
  • Train four models: deep gbm, shallow gbm, ranger random forest, ols with splines

Note: The textbook R examples use all fifa records for both training models and creating explainer objects. In our examples, we use a separate training dataset for model creation. We also instantiate model explainers using the test set. This is consistent with the Python-based examples in the text.

# logged target, remove some features 
fifa_small <- fifa |>
   mutate(LogValue = log10(value_eur)) |>
    select(-nationality, -overall,-potential, -value_eur, -wage_eur)

# train test
set.seed(2687)
fs_split <- initial_split(fifa_small, prop = 0.75)
train <- training(fs_split )
test <- testing(fs_split )

# fit models
fifa_gbm_deep <- gbm(LogValue~., data = train, n.trees = 250,
        interaction.depth = 4, distribution = "gaussian")

fifa_gbm_shallow <- gbm(LogValue~., data = train, n.trees = 250,
        interaction.depth = 1, distribution = "gaussian")

fifa_rf <- ranger(LogValue~., data = train, num.trees = 250)

# spline-based formula for ols
ols.formula <- formula(
    paste0('LogValue ~ ',
           paste0(
             paste0("rcs(",names(train)[-length(train)],")"),
             collapse="+")))

fifa_ols <- ols(ols.formula,data = train)

Next, create explainer objects.

# explainer objects
fifa_gbm_exp_deep <- DALEX::explain(fifa_gbm_deep, 
        data = test, y = 10^test$LogValue, 
        predict_function = function(m,x) 10^predict(m, x, n.trees = 250),
        label = "GBM deep")

fifa_gbm_exp_shallow <- DALEX::explain(fifa_gbm_shallow, 
        data = test, y = 10^test$LogValue, 
        predict_function = function(m,x) 10^predict(m, x, n.trees = 250),
        label = "GBM shallow")

fifa_rf_exp <- DALEX::explain(fifa_rf, 
        data = test, y = 10^test$LogValue, 
        predict_function = function(m,x) 10^predict(m, x)$predictions,
        label = "RF")

fifa_rm_exp <- DALEX::explain(fifa_ols, 
        data = test, y = 10^test$LogValue, 
        predict_function = function(m,x) 10^predict(m, x),
        label = "RM")