PCA Example
R setup
penguin_2_class <- penguins |>
filter(species %in% c("Chinstrap", "Gentoo")) |>
na.omit()
penguin_2_class |>
ggplot(aes(x = flipper_length_mm, y = bill_length_mm,
color = species)) +
geom_point(size = 3) +
labs(title = "Two Predictor Variables",
subtitle = "50mm-long bill and 195mm-long flipper",
caption = "Data Science Learning Community") +
scale_color_manual(values = c(chinstrap_color, gentoo_color)) +
theme_minimal() +
theme(plot.title = element_markdown(face = "bold", size = 24),
plot.subtitle = element_markdown(size = 16))
train_set <- penguin_2_class |>
select(flipper_length_mm, bill_length_mm)
PCA math
del_x <- pca_results$rotation[1,1]
del_y <- pca_results$rotation[2,1]
pca_slope <- del_y / del_x
xbar <- mean(train_set$flipper_length_mm, na.rm = TRUE)
ybar <- mean(train_set$bill_length_mm, na.rm = TRUE)
pca_intercept <- ybar - pca_slope * xbar
pca_plot_1 <- penguin_2_class |>
ggplot(aes(x = flipper_length_mm, y = bill_length_mm)) +
geom_point(size = 3) +
geom_abline(slope = pca_slope, intercept = pca_intercept,
color = adelie_color, linewidth = 3) +
labs(title = "Principal Component Analysis",
subtitle = "<span style = 'color:#fb7504'>first principal component</span>",
caption = "Data Science Learning Community") +
# scale_color_manual(values = c(chinstrap_color, gentoo_color)) +
theme_minimal() +
theme(plot.title = element_markdown(face = "bold", size = 14),
plot.subtitle = element_markdown(size = 12))
pca_plot_1
- PC1 captures variance of the entire data set
Projection math
train_mat <- as.matrix(train_set)
proj_mat <- as.matrix(pca_results$rotation[,1])
projection_data <- train_mat %*% proj_mat
projection_df <- cbind(penguin_2_class, projection_data)
pca_plot_2 <- projection_df |>
ggplot(aes(x = projection_data)) +
geom_density(aes(fill = species),
alpha = 0.5) +
labs(title = "Classification via <br><span style = 'color:#fb7504'>Principal Component Analysis</span>",
subtitle = "",
caption = "Data Science Learning Community",
x = "(PC1) first principal component",
y = "") +
scale_fill_manual(values = c(chinstrap_color, gentoo_color)) +
theme_minimal() +
theme(axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
plot.title = element_markdown(face = "bold", size = 24),
plot.subtitle = element_markdown(size = 16))
pca_plot_2