2.3 Example 2
Code for Section 2.4 at https://bookdown.org/max/FES/stroke-tour.html#stroke-exploration
Code to compare 2-way interaction models to their main effects model
a
and b
are two models from train()
<- function(a, b, metric = a$metric[1], ...) {
compare_models_1way <- list(a, b)
mods <- resamples(mods)
rs <- diff(rs, metric = metric[1], ...)
diffs $statistics[[1]][[1]]
diffs }
risk_preds
is contained in the original data file and has the predictor
names for the risk related variables
2.3.1 Create a “null model” with no predictors to get baseline performance
Compare the models with single predictors to the risk model. These data make
https://bookdown.org/max/FES/stroke-tour.html#tab:stroke-strokeRiskAssociations
VC_preds
and risk_preds
contain the predictor names for different sets.
<-
one_predictor_res data.frame(Predictor = c(VC_preds, risk_preds),
Improvement = NA,
Pvalue = NA,
ROC = NA,
stringsAsFactors = FALSE)
for (i in 1:nrow(one_predictor_res)) {
set.seed(63331)
<- train(Stroke ~ .,
var_mod data = stroke_train[, c("Stroke", one_predictor_res$Predictor[i])],
method = "glm",
metric = "ROC",
trControl = ctrl)
<- compare_models_1way(var_mod,
tmp_diff
null_mod, alternative = "greater")
$ROC[i] <- getTrainPerf(var_mod)[1, "TrainROC"]
one_predictor_res$Improvement[i] <- tmp_diff$estimate
one_predictor_res$Pvalue[i] <- tmp_diff$p.value
one_predictor_res }
2.3.2 With Tidymodels
attempting this with tidymodels, not exactly the same need to make some sort of resample object to feed workflow_map
<- vfold_cv(tidy_training, v = 10, repeats = 5)
model_folds # model_boots <- bootstraps(tidy_training, times = 50) #want to bootstrap this?
#defining the null model
<-
null_class_model null_model() %>%
set_engine("parsnip") %>%
set_mode("classification")
<- workflow_set(preproc = c(Stroke ~ .),
null_model models = list(null_mod = null_class_model))
# defining the model
<-
lm_model logistic_reg(
mode = "classification", # outcome is a classification Stroke Y/N
engine = "glm", #using glm like the example
penalty = NULL,
mixture = NULL
)
# make named list of single variable formulae
<-
single_var_formulae c(VC_preds, risk_preds) %>%
paste0("Stroke ~ ", .) %>%
set_names(., c(VC_preds, risk_preds)) %>%
as.list() %>%
map(., as.formula)
Create the workflow set, all of our models use the same type of model and input data
<- workflow_set(preproc = single_var_formulae,
single_var_models models = list(lm = lm_model))
<- bind_rows(null_model, single_var_models)
all_models
# control grid/resamples allow processing of resampled data and parallel processing
# here we are asking only to save the predictions
<- control_resamples(save_pred = TRUE) control
::registerDoParallel()
doParallel<-
all_models1 %>%
all_models #map over each model and its resamples, use the control parameters, and be noisy
workflow_map(.,
resamples = model_folds,
control = control,
verbose = TRUE)
# save(all_models1, file = "data/all_models1.RData", compress = "xz")
# per model what are the predictions, summmarize over resamples
collect_predictions(all_models1, summarize = TRUE)
## # A tibble: 2,492 × 9
## wflow_id .config preproc model .row Stroke .pred_N .pred_Y .pred…¹
## <chr> <fct> <chr> <chr> <int> <fct> <dbl> <dbl> <fct>
## 1 formula_null_mod Preproce… formula null… 1 N 0.495 0.505 Y
## 2 formula_null_mod Preproce… formula null… 2 N 0.478 0.522 Y
## 3 formula_null_mod Preproce… formula null… 3 N 0.495 0.505 Y
## 4 formula_null_mod Preproce… formula null… 4 N 0.475 0.525 Y
## 5 formula_null_mod Preproce… formula null… 5 N 0.485 0.515 Y
## 6 formula_null_mod Preproce… formula null… 6 N 0.498 0.502 Y
## 7 formula_null_mod Preproce… formula null… 7 N 0.488 0.512 Y
## 8 formula_null_mod Preproce… formula null… 8 N 0.485 0.515 Y
## 9 formula_null_mod Preproce… formula null… 9 N 0.491 0.509 Y
## 10 formula_null_mod Preproce… formula null… 10 N 0.482 0.518 Y
## # … with 2,482 more rows, and abbreviated variable name ¹.pred_class
# per model what are the outputs in terms of fit (auc_roc) and accuracy
collect_metrics(all_models1)
## # A tibble: 56 × 9
## wflow_id .config preproc model .metric .esti…¹ mean n std_err
## <chr> <fct> <chr> <chr> <chr> <chr> <dbl> <int> <dbl>
## 1 formula_null_mod Preproces… formula null… accura… binary 0.378 50 0.0131
## 2 formula_null_mod Preproces… formula null… roc_auc binary 0.5 50 0
## 3 CALCVol_lm Preproces… formula logi… accura… binary 0.388 50 0.0183
## 4 CALCVol_lm Preproces… formula logi… roc_auc binary 0.457 50 0.0291
## 5 CALCVolProp_lm Preproces… formula logi… accura… binary 0.441 50 0.0196
## 6 CALCVolProp_lm Preproces… formula logi… roc_auc binary 0.502 50 0.0260
## 7 MATXVol_lm Preproces… formula logi… accura… binary 0.346 50 0.0177
## 8 MATXVol_lm Preproces… formula logi… roc_auc binary 0.380 50 0.0277
## 9 MATXVolProp_lm Preproces… formula logi… accura… binary 0.415 50 0.0222
## 10 MATXVolProp_lm Preproces… formula logi… roc_auc binary 0.516 50 0.0285
## # … with 46 more rows, and abbreviated variable name ¹.estimator
# plot the output to show which individual parameters have the most impact
autoplot(
all_models1,rank_metric = "roc_auc", # <- how to order models
metric = "roc_auc", # <- which metric to visualize
select_best = FALSE # <- one point per workflow
+
) geom_text(aes(y = mean - 1/10*mean,
label = wflow_id),
angle = 90, hjust = 1) +
theme(legend.position = "none")
# per model, sort by auc_roc
%>%
all_models1 rank_results() %>%
filter(.metric == "roc_auc")
## # A tibble: 28 × 9
## wflow_id .config .metric mean std_err n prepr…¹ model rank
## <chr> <fct> <chr> <dbl> <dbl> <int> <chr> <chr> <int>
## 1 MaxRemodelingRatio_lm Prepro… roc_auc 0.682 0.0300 50 formula logi… 1
## 2 MaxMaxWallThickness_… Prepro… roc_auc 0.669 0.0276 50 formula logi… 2
## 3 MaxStenosisByArea_lm Prepro… roc_auc 0.651 0.0256 50 formula logi… 3
## 4 MaxStenosisByDiamete… Prepro… roc_auc 0.640 0.0281 50 formula logi… 4
## 5 MaxDilationByArea_lm Prepro… roc_auc 0.625 0.0302 50 formula logi… 5
## 6 MaxLRNCArea_lm Prepro… roc_auc 0.622 0.0281 50 formula logi… 6
## 7 LRNCVolProp_lm Prepro… roc_auc 0.598 0.0260 50 formula logi… 7
## 8 MaxMATXArea_lm Prepro… roc_auc 0.595 0.0298 50 formula logi… 8
## 9 MaxWallArea_lm Prepro… roc_auc 0.591 0.0300 50 formula logi… 9
## 10 CoronaryArteryDiseas… Prepro… roc_auc 0.574 0.0208 50 formula logi… 10
## # … with 18 more rows, and abbreviated variable name ¹preprocessor
# Data in table 2.3
# https://bookdown.org/max/FES/stroke-tour.html#tab:stroke-strokeRiskAssociations
%>%
one_predictor_res ::filter(Predictor %in% risk_preds) %>%
dplyrarrange(Pvalue)
## Predictor Improvement Pvalue ROC
## 1 CoronaryArteryDisease 0.079000 0.0002957741 0.579000
## 2 DiabetesHistory 0.066500 0.0003019908 0.566500
## 3 HypertensionHistory 0.065000 0.0004269919 0.565000
## 4 age 0.082975 0.0011215271 0.582975
## 5 AtrialFibrillation 0.044000 0.0013131334 0.544000
## 6 SmokingHistory -0.015000 0.7372947489 0.485000
## 7 sex -0.029000 0.8970159364 0.471000
## 8 HypercholesterolemiaHistory -0.101000 0.9999998987 0.399000
# Figure 2.4
# https://bookdown.org/max/FES/stroke-tour.html#fig:stroke-vascuCAPAssocations
<-
vc_pred recipe(Stroke ~ ., data = stroke_train %>% dplyr::select(Stroke, !!!VC_preds)) %>%
step_YeoJohnson(all_predictors()) %>%
prep(stroke_train %>% dplyr::select(Stroke, !!!VC_preds)) %>%
bake(., new_data = NULL) %>%
gather(Predictor, value, -Stroke)
%>%head vc_pred
## # A tibble: 6 × 3
## Stroke Predictor value
## <fct> <chr> <dbl>
## 1 N CALCVol 9.51
## 2 N CALCVol 4.90
## 3 N CALCVol 7.63
## 4 Y CALCVol 13.2
## 5 N CALCVol 6.93
## 6 N CALCVol 2.27
<-
pred_max %>%
vc_pred group_by(Predictor) %>%
summarize(max_val = max(value)) %>%
inner_join(one_predictor_res %>% dplyr::select(Pvalue, Predictor)) %>%
mutate(
x = 1.5,
value = 1.25 * max_val,
label = paste0("p-value: ", format.pval(Pvalue, digits = 2, sci = FALSE, eps = .0001))
)
## Joining, by = "Predictor"
%>%head pred_max
## # A tibble: 6 × 6
## Predictor max_val Pvalue x value label
## <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 CALCVol 15.7 0.000872 1.5 19.6 p-value: 0.00087
## 2 CALCVolProp 0.440 0.000856 1.5 0.550 p-value: 0.00086
## 3 LRNCVol 23.2 1.00 1.5 29.0 p-value: 1.00000
## 4 LRNCVolProp 0.495 0.207 1.5 0.618 p-value: 0.20723
## 5 MATXVol 41681. 1.00 1.5 52101. p-value: 1.00000
## 6 MATXVolProp 0.870 1.00 1.5 1.09 p-value: 1.00000
<- pred_max$Predictor[order(pred_max$Pvalue)]
new_order
<-
vc_pred %>%
vc_pred mutate(Predictor = factor(Predictor, levels = new_order))
<-
pred_max %>%
pred_max mutate(Predictor = factor(Predictor, levels = new_order))
<-
fig_2_4 ggplot(vc_pred, aes(x = Stroke, y = value)) +
geom_boxplot() +
geom_point(alpha = 0.3, cex = .5) +
geom_text(data = pred_max, aes(x = x, label = label), size = 3) +
facet_wrap(~Predictor, scales = "free_y") +
ylab("")
fig_2_4
# Figure 2.5
# https://bookdown.org/max/FES/stroke-tour.html#fig:stroke-maxRemodelingRatioROC
<-
fig_2_5 roc_curve(stroke_train, Stroke, MaxRemodelingRatio) %>%
# used opposite values
ggplot(aes(x = specificity, y = 1-sensitivity)) +
geom_abline(alpha = .5, lty = 2) +
geom_path()
fig_2_5
2.3.3 Interaction exploration
Here they create all the pairs of all of the image analysis components there are 171 interactions
<-
pairs combn(VC_preds, 2) %>%
t() %>%
as.data.frame(stringsAsFactors = FALSE) %>%
mutate(
Improvement = NA,
Pvalue = NA,
ROC = NA
)
Run comparisons with caret
<-
retained_pairs %>%
pairs1 ::filter(ROC > 0.5 & Pvalue <= 0.2)
dplyr%>%head retained_pairs
## V1 V2 Improvement Pvalue ROC
## 1 MATXVol MaxMaxWallThickness 0.07635 0.001903838 0.65435
## 2 MATXVol MaxRemodelingRatio 0.11085 0.001345363 0.55150
## 3 MATXVol MaxStenosisByArea 0.02325 0.086210254 0.60960
## 4 MATXVol MaxStenosisByDiameter 0.01930 0.197228736 0.61550
## 5 LRNCVol MaxMATXArea 0.02590 0.152003535 0.55690
## 6 LRNCVol MaxRemodelingRatio 0.03345 0.119774508 0.59510
# Figure 2.6
# https://bookdown.org/max/FES/stroke-tour.html#fig:stroke-interactionScreening
<-
vol_plot %>%
pairs1 ::filter(ROC > 0.5) %>%
dplyrmutate(Term = paste(V1, "by", V2, "\nROC:", round(ROC, 2))) %>%
ggplot(aes(x = Improvement, y = -log10(Pvalue))) +
xlab("Improvement") +
geom_point(alpha = .2, aes(size = ROC, text = Term))
<- ggplotly(vol_plot, tooltip = "Term")
vol_plot vol_plot
Create interaction formula of things that matter most
<-
int_form %>%
pairs1 ::filter(ROC > 0.5 & Pvalue <= 0.2 & Improvement > 0) %>%
dplyrmutate(form = paste0(V1, ":", V2)) %>%
pull(form) %>%
paste(collapse = "+")
<- paste("~", int_form)
int_form <- as.formula(int_form)
int_form %>%head int_form
## ~MATXVol:MaxMaxWallThickness + MATXVol:MaxRemodelingRatio + MATXVol:MaxStenosisByArea +
## MATXVol:MaxStenosisByDiameter + LRNCVol:MaxMATXArea + LRNCVol:MaxRemodelingRatio +
## MaxCALCAreaProp:MaxMATXAreaProp + MaxCALCAreaProp:MaxRemodelingRatio +
## MaxDilationByArea:MaxMaxWallThickness + MaxDilationByArea:MaxRemodelingRatio +
## MaxMATXAreaProp:MaxLRNCArea + MaxMATXAreaProp:MaxRemodelingRatio +
## MaxMATXAreaProp:MaxWallArea + MaxMaxWallThickness:MaxStenosisByArea +
## MaxMaxWallThickness:WallVol + MaxRemodelingRatio:MaxWallArea +
## MaxRemodelingRatio:WallVol + MaxStenosisByArea:WallVol
This part of the script is to work through all of of the potential models:
- original risk set alone
- imaging predictors alone
- risk and imaging predictors together
- imaging predictors and interactions of imaging predictors, and
- risk, imaging predictors, and interactions of imaging predictors
All the models are run below.
<-
risk_train %>%
stroke_train ::select(one_of(risk_preds), Stroke)
dplyr
%>%head risk_train
## age sex SmokingHistory AtrialFibrillation CoronaryArteryDisease
## 1 72 1 1 0 0
## 2 76 1 1 0 0
## 4 72 0 0 0 0
## 5 61 1 1 0 0
## 7 65 1 0 0 0
## 8 64 1 1 0 1
## DiabetesHistory HypercholesterolemiaHistory HypertensionHistory Stroke
## 1 0 0 1 N
## 2 1 1 1 N
## 4 0 0 0 N
## 5 1 1 1 Y
## 7 0 0 1 N
## 8 0 1 1 N
<-
image_train %>%
stroke_train ::select(one_of(VC_preds), Stroke)
dplyr
%>%head image_train
## CALCVol CALCVolProp MATXVol MATXVolProp LRNCVol LRNCVolProp MaxCALCArea
## 1 235.252599 0.070442702 3156.835 0.7599582 224.87171 0.09108513 12.350494
## 2 31.433595 0.016164769 3032.861 0.8133063 368.56066 0.13398944 7.130660
## 4 113.404823 0.038081488 3835.220 0.7825256 321.15893 0.08303659 16.286916
## 5 780.823789 0.213432061 3518.877 0.7610895 140.51735 0.03206536 63.350869
## 7 84.055774 0.041383842 2990.273 0.7498691 293.26992 0.07539753 17.583561
## 8 5.644322 0.002824946 3359.323 0.8492801 55.76888 0.01983567 2.841252
## MaxCALCAreaProp MaxDilationByArea MaxMATXArea MaxMATXAreaProp MaxLRNCArea
## 1 0.36576842 520.98259 71.24743 0.9523705 21.686815
## 2 0.21124686 91.72005 27.21084 0.9455539 6.434661
## 4 0.40881121 270.96930 38.12211 0.9459098 5.705054
## 5 0.57620574 2270.45120 341.12089 0.9691989 6.046787
## 7 0.32150685 95.15505 56.57457 0.9213197 7.213451
## 8 0.07734609 298.42121 33.92709 0.9612049 4.595196
## MaxLRNCAreaProp MaxMaxWallThickness MaxRemodelingRatio MaxStenosisByArea
## 1 0.4295781 2.409943 5.697931 18.99554
## 2 0.2815101 2.540334 1.739927 30.23761
## 4 0.1547786 3.708515 2.831636 33.93906
## 5 0.1870965 6.115838 15.647750 34.30985
## 7 0.2169263 3.975168 1.912069 36.59954
## 8 0.2053274 2.581908 2.181675 40.31766
## MaxWallArea WallVol MaxStenosisByDiameter Stroke
## 1 106.20676 4192.170 10.54411 N
## 2 33.36714 3917.040 18.64620 N
## 4 55.34671 4935.327 19.73511 N
## 5 426.47858 4909.504 20.28832 Y
## 7 59.82696 4045.053 49.29705 N
## 8 34.79742 3960.832 30.50857 N
<- function(...) c(twoClassSummary(...), defaultSummary(...))
fiveStats = trainControl(method = "none", classProbs = TRUE,
internal_ctrl allowParallel = FALSE)
<- caretFuncs
lrFuncsNew $summary <- fiveStats
lrFuncsNew<- rfeControl(functions = lrFuncsNew,
rfeCtrl method = "repeatedcv",
repeats = 5,
rerank = FALSE,
returnResamp = "all",
saveDetails = TRUE,
verbose = FALSE)
RFE procedure using risk predictors
All pair-wise interactions.
<-
risk_int_filtered_recipe recipe(Stroke ~ ., data = risk_train) %>%
step_interact(~ all_predictors():all_predictors()) %>%
step_corr(all_predictors(), threshold = 0.75) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
step_zv(all_predictors())
set.seed(63331)
<- rfe(
risk_int_filtered_rfe
risk_int_filtered_recipe,data = risk_train,
sizes = 1:36,
rfeControl = rfeCtrl,
metric = "ROC",
## train options
method = "glm",
trControl = internal_ctrl
)
# Main effects
<-
risk_main_filtered_recipe recipe(Stroke ~ ., data = risk_train) %>%
step_corr(all_predictors(), threshold = 0.75) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
step_zv(all_predictors())
set.seed(63331)
<- rfe(
risk_main_filtered_rfe
risk_main_filtered_recipe,data = risk_train,
sizes = 1:8,
rfeControl = rfeCtrl,
metric = "ROC",
## train options
method = "glm",
trControl = internal_ctrl
)
RFE procedure using imaging predictors.
<-
img_int_filtered_recipe recipe(Stroke ~ ., data = image_train) %>%
step_interact(int_form) %>%
step_corr(all_predictors(), threshold = 0.75) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
step_YeoJohnson(all_predictors()) %>%
step_zv(all_predictors())
set.seed(63331)
<- rfe(
img_int_filtered_rfe
img_int_filtered_recipe,data = image_train,
sizes = 1:35,
rfeControl = rfeCtrl,
metric = "ROC",
## train options
method = "glm",
trControl = internal_ctrl
)
<-
img_main_filtered_recipe recipe(Stroke ~ ., data = image_train) %>%
step_corr(all_predictors(), threshold = 0.75) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
step_YeoJohnson(all_predictors()) %>%
step_zv(all_predictors())
set.seed(63331)
<- rfe(
img_main_filtered_rfe
img_main_filtered_recipe,data = image_train,
sizes = 1:19,
rfeControl = rfeCtrl,
metric = "ROC",
## train options
method = "glm",
trControl = internal_ctrl
)
<-
both_int_filtered_recipe recipe(Stroke ~ ., data = stroke_train) %>%
step_interact(int_form) %>%
step_corr(all_predictors(), threshold = 0.75) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
step_YeoJohnson(all_predictors()) %>%
step_zv(all_predictors())
set.seed(63331)
<- rfe(
both_int_filtered_rfe
both_int_filtered_recipe,data = stroke_train,
sizes = 1:44,
rfeControl = rfeCtrl,
metric = "ROC",
## train options
method = "glm",
trControl = internal_ctrl
)
<-
both_main_filtered_recipe recipe(Stroke ~ ., data = stroke_train) %>%
step_corr(all_predictors(), threshold = 0.75) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
step_YeoJohnson(all_predictors()) %>%
step_zv(all_predictors())
set.seed(63331)
<- rfe(
both_main_filtered_rfe
both_main_filtered_recipe,data = stroke_train,
sizes = 1:28,
rfeControl = rfeCtrl,
metric = "ROC",
## train options
method = "glm",
trControl = internal_ctrl
)
<-
risk_int_recipe recipe(Stroke ~ ., data = risk_train) %>%
step_interact(~ all_predictors():all_predictors()) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
step_zv(all_predictors())
set.seed(63331)
<- rfe(
risk_int_rfe
risk_int_recipe,data = risk_train,
sizes = 1:36,
rfeControl = rfeCtrl,
metric = "ROC",
## train options
method = "glm",
trControl = internal_ctrl
)
<-
risk_main_recipe recipe(Stroke ~ ., data = risk_train) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
step_zv(all_predictors())
set.seed(63331)
<- rfe(
risk_main_rfe
risk_main_recipe,data = risk_train,
sizes = 1:8,
rfeControl = rfeCtrl,
metric = "ROC",
## train options
method = "glm",
trControl = internal_ctrl
)
<-
img_int_recipe recipe(Stroke ~ ., data = image_train) %>%
step_interact(int_form) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
step_YeoJohnson(all_predictors()) %>%
step_zv(all_predictors())
set.seed(63331)
<- rfe(
img_int_rfe
img_int_recipe,data = image_train,
sizes = 1:35,
rfeControl = rfeCtrl,
metric = "ROC",
## train options
method = "glm",
trControl = internal_ctrl
)
<-
img_main_recipe recipe(Stroke ~ ., data = image_train) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
step_YeoJohnson(all_predictors()) %>%
step_zv(all_predictors())
set.seed(63331)
<- rfe(
img_main_rfe
img_main_recipe,data = image_train,
sizes = 1:19,
rfeControl = rfeCtrl,
metric = "ROC",
## train options
method = "glm",
trControl = internal_ctrl
)
<-
both_int_recipe recipe(Stroke ~ ., data = stroke_train) %>%
step_interact(int_form) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
step_YeoJohnson(all_predictors()) %>%
step_zv(all_predictors())
set.seed(63331)
<- rfe(
both_int_rfe
both_int_recipe,data = stroke_train,
sizes = 1:44,
rfeControl = rfeCtrl,
metric = "ROC",
## train options
method = "glm",
trControl = internal_ctrl
)
<-
both_main_recipe recipe(Stroke ~ ., data = stroke_train) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
step_YeoJohnson(all_predictors()) %>%
step_zv(all_predictors())
set.seed(63331)
<- rfe(
both_main_rfe
both_main_recipe,data = stroke_train,
sizes = 1:28,
rfeControl = rfeCtrl,
metric = "ROC",
## train options
method = "glm",
trControl = internal_ctrl
)
<- function(x, lab, int = FALSE) {
format_data <-
dat %>%
x pluck("results") %>%
mutate(Predictors = !!lab) %>%
::select(ROC, Variables, Predictors, Variables, Num_Resamples) %>%
dplyrmutate(Model = "Main Effects")
if (int)
$Model <- "Interactions"
dat
dat
}
<-
filtered_dat bind_rows(
format_data(risk_main_filtered_rfe, lab = "Risk Predictors"),
format_data(risk_int_filtered_rfe, lab = "Risk Predictors", TRUE),
format_data(img_main_filtered_rfe, lab = "Imaging Predictors"),
format_data(img_int_filtered_rfe, lab = "Imaging Predictors", TRUE),
format_data(both_main_filtered_rfe, lab = "All Predictors"),
format_data(both_int_filtered_rfe, lab = "All Predictors", TRUE)
%>%
) mutate(
Predictors = factor(
Predictors, levels = c("Risk Predictors", "Imaging Predictors", "All Predictors")
),Model = factor(Model, levels = c("Main Effects", "Interactions")),
Filtering = "Correlation Filter"
)
<-
unfiltered_dat bind_rows(
format_data(risk_main_rfe, lab = "Risk Predictors"),
format_data(risk_int_rfe, lab = "Risk Predictors", TRUE),
format_data(img_main_rfe, lab = "Imaging Predictors"),
format_data(img_int_rfe, lab = "Imaging Predictors", TRUE),
format_data(both_main_rfe, lab = "All Predictors"),
format_data(both_int_rfe, lab = "All Predictors", TRUE)
%>%
) mutate(
Predictors = factor(
Predictors, levels = c("Risk Predictors", "Imaging Predictors", "All Predictors")
),Model = factor(Model, levels = c("Main Effects", "Interactions")),
Filtering = "No Filter"
)
<-
rfe_data bind_rows(filtered_dat, unfiltered_dat) %>%
mutate(
Filtering = factor(Filtering, levels = c("No Filter", "Correlation Filter"))
)
# https://bookdown.org/max/FES/predictive-modeling-across-sets.html#fig:stroke-rfe-res
ggplot(rfe_data, aes(x = Variables, y = ROC, col = Model)) +
geom_point(size = 0.75) +
geom_line() +
facet_grid(Filtering ~ Predictors) +
scale_color_manual(values = c("#6A3D9A", "#CAB2D6"))
# https://bookdown.org/max/FES/predictive-modeling-across-sets.html#tab:stroke-rfe-tab
<-
rfe_tab %>%
img_main_filtered_rfe pluck("variables") %>%
filter(Variables == img_main_filtered_rfe$optsize) %>%
group_by(var) %>%
count() %>%
arrange(desc(n)) %>%
mutate(final = ifelse(var %in% img_main_filtered_rfe$optVariables, "Yes", "No")) %>%
ungroup()