5.7 Supervised algorithm #1: Random forest

“A (random) forest filled with decision trees”

set.seed(17)

# we will do no resampling based prediction error
# although it is advised to do so even for random forests
trctrl <- trainControl(method = "none")

# we will now train random forest model
rfFit <- train(subtype~., 
               data = training, 
               method = "ranger",
               trControl=trctrl,
               importance="permutation", # calculate importance
               tuneGrid = data.frame(mtry=100,
                                     min.node.size = 1,
                                     splitrule="gini")
               )
# print OOB error
rfFit$finalModel$prediction.error
## [1] 0.03076923
# plot of most important variables

plot(varImp(rfFit),top=10)