9.13 Lab: Support Vector Classifier
library("tidymodels")
library("kernlab") # We'll use the plot method from this.
set.seed(1)
<- matrix(
sim_data rnorm (20 * 2),
ncol = 2,
dimnames = list(NULL, c("x1", "x2"))
%>%
) as_tibble() %>%
mutate(
y = factor(c(rep(-1, 10), rep(1, 10)))
%>%
) mutate(
x1 = ifelse(y == 1, x1 + 1, x1),
x2 = ifelse(y == 1, x2 + 1, x2)
)
%>%
sim_data ggplot() +
aes(x1, x2, color = y) +
geom_point() +
labs(title = "Trying to make a hyperplane classifier",
subtitle = "simulated data",
caption = "R4DS book club") +
theme_minimal()
# generated this using their process then saved it to use here.
<- readRDS("data/09-testdat.rds") %>%
test_data rename(x1 = x.1, x2 = x.2)
%>%
test_data ggplot() +
aes(x1, x2, color = y) +
geom_point() +
labs(title = "Trying to make a hyperplane classifier",
subtitle = "simulated data",
caption = "R4DS book club") +
theme_minimal()
We create a spec
for a model, which we’ll update throughout this lab with different costs.
<- svm_poly(degree = 1) %>%
svm_linear_spec set_mode("classification") %>%
set_engine("kernlab", scaled = FALSE)
Then we do a couple fits with manual cost.
<- svm_linear_spec %>%
svm_linear_fit_10 set_args(cost = 10) %>%
fit(y ~ ., data = sim_data)
svm_linear_fit_10
## parsnip model object
##
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 10
##
## Polynomial kernel function.
## Hyperparameters : degree = 1 scale = 1 offset = 1
##
## Number of Support Vectors : 7
##
## Objective Function Value : -52.4483
## Training error : 0.15
## Probability model included.
%>%
svm_linear_fit_10 extract_fit_engine() %>%
plot()
<- svm_linear_spec %>%
svm_linear_fit_01 set_args(cost = 0.1) %>%
fit(y ~ ., data = sim_data)
svm_linear_fit_01
## parsnip model object
##
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 0.1
##
## Polynomial kernel function.
## Hyperparameters : degree = 1 scale = 1 offset = 1
##
## Number of Support Vectors : 16
##
## Objective Function Value : -1.189
## Training error : 0.05
## Probability model included.
%>%
svm_linear_fit_01 extract_fit_engine() %>%
plot()
<- svm_linear_spec %>%
svm_linear_fit_001 set_args(cost = 0.01) %>%
fit(y ~ ., data = sim_data)
svm_linear_fit_001
## parsnip model object
##
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 0.01
##
## Polynomial kernel function.
## Hyperparameters : degree = 1 scale = 1 offset = 1
##
## Number of Support Vectors : 20
##
## Objective Function Value : -0.1859
## Training error : 0.25
## Probability model included.
%>%
svm_linear_fit_001 extract_fit_engine() %>%
plot()
9.13.1 Tuning
Let’s find the best cost.
<- workflow() %>%
svm_linear_wf add_model(
%>% set_args(cost = tune())
svm_linear_spec %>%
) add_formula(y ~ .)
set.seed(1234)
<- vfold_cv(sim_data, strata = y)
sim_data_fold
<- grid_regular(cost(), levels = 10)
param_grid
# Our grid isn't identical to the book, but it's close enough.
param_grid
## # A tibble: 10 × 1
## cost
## <dbl>
## 1 0.000977
## 2 0.00310
## 3 0.00984
## 4 0.0312
## 5 0.0992
## 6 0.315
## 7 1
## 8 3.17
## 9 10.1
## 10 32
<- tune_grid(
tune_res
svm_linear_wf, resamples = sim_data_fold,
grid = param_grid
)
# We ran this locally and then saved it so everyone doesn't need to wait for
# this to process each time they build the book.
# saveRDS(tune_res, "data/09-tune_res.rds")
autoplot(tune_res)
Tune can pull out the best result for us.
<- select_best(tune_res, metric = "accuracy")
best_cost
<- finalize_workflow(svm_linear_wf, best_cost)
svm_linear_final
<- svm_linear_final %>% fit(sim_data)
svm_linear_fit
%>%
svm_linear_fit augment(new_data = test_data) %>%
conf_mat(truth = y, estimate = .pred_class)
## Truth
## Prediction -1 1
## -1 9 1
## 1 2 8
\[\text{accuracy} = \frac{9 + 8}{9 + 1 + 2 + 8} = 0.85\]
%>%
svm_linear_fit_001 augment(new_data = test_data) %>%
conf_mat(truth = y, estimate = .pred_class)
## Truth
## Prediction -1 1
## -1 11 6
## 1 0 3
\[\text{accuracy} = \frac{11 + 3}{11 + 6 + 0 + 3} = 0.70\]
9.13.2 Linearly separable data
<- sim_data %>%
sim_data_sep mutate(
x1 = ifelse(y == 1, x1 + 0.5, x1),
x2 = ifelse(y == 1, x2 + 0.5, x2)
)
%>%
sim_data_sep ggplot() +
aes(x1, x2, color = y) +
geom_point()
<- svm_linear_spec %>%
svm_fit_sep_1e5 set_args(cost = 1e5) %>%
fit(y ~ ., data = sim_data_sep)
svm_fit_sep_1e5
## parsnip model object
##
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1e+05
##
## Polynomial kernel function.
## Hyperparameters : degree = 1 scale = 1 offset = 1
##
## Number of Support Vectors : 3
##
## Objective Function Value : -24.3753
## Training error : 0
## Probability model included.
%>%
svm_fit_sep_1e5 extract_fit_engine() %>%
plot()
<- svm_linear_spec %>%
svm_fit_sep_1 set_args(cost = 1) %>%
fit(y ~ ., data = sim_data_sep)
svm_fit_sep_1
## parsnip model object
##
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Polynomial kernel function.
## Hyperparameters : degree = 1 scale = 1 offset = 1
##
## Number of Support Vectors : 7
##
## Objective Function Value : -3.5451
## Training error : 0.05
## Probability model included.
%>%
svm_fit_sep_1 extract_fit_engine() %>%
plot()
<- test_data %>%
test_data_sep mutate(
x1 = ifelse(y == 1, x1 + 0.5, x1),
x2 = ifelse(y == 1, x2 + 0.5, x2)
)
%>%
svm_fit_sep_1e5 augment(new_data = test_data_sep) %>%
conf_mat(truth = y, estimate = .pred_class)
## Truth
## Prediction -1 1
## -1 9 1
## 1 2 8
\[\text{accuracy} = \frac{9 + 8}{8 + 1 + 2 + 8} = 0.85\]
%>%
svm_fit_sep_1 augment(new_data = test_data_sep) %>%
conf_mat(truth = y, estimate = .pred_class)
## Truth
## Prediction -1 1
## -1 9 0
## 1 2 9
\[\text{accuracy} = \frac{9 + 9}{9 + 0 + 2 + 9} = 0.90\]