4.4 Preliminary exploratory visualizations
Ridership line plot by month
g1 <- train_plot_data %>%
select(date, rides = s_40380) %>%
mutate(date = floor_date(date, "month")) %>%
arrange(date) %>%
group_by(date) %>%
summarise(rides = sum(rides), .groups = 'drop') %>%
ggplot(aes(date, rides)) +
geom_line(size = 1) +
geom_smooth(method = 'loess', se = FALSE, color = 'steelblue') +
scale_x_date(date_labels = "%b-%Y", date_breaks ="2 year")+
labs(x = '',
y = "Rides (000's)",
title = 'Chicago Clark/Lake Train Station Monthly Ridership Volume (Jan 2001 - Aug 2016)'
) +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
ggplotly(g1)## `geom_smooth()` using formula = 'y ~ x'
Boxplot rides by day of the week
g2 <- train_plot_data %>%
select(dow, rides = s_40380) %>%
ggplot(aes(dow, rides, fill = dow)) +
geom_boxplot() +
labs(x = '',
y = "Rides (000's)",
title = 'Chicago Clark/Lake Train Station Ridership by Day of the Week') +
theme(legend.position = 'none')
ggplotly(g2)Violinplot rides by day of the week
train_plot_data %>%
select(dow, rides = s_40380) %>%
ggplot(aes(dow, rides, fill = dow)) +
geom_violin() +
labs(x = '',
y = "Rides (000's)",
title = 'Chicago Clark/Lake Train Station Ridership by Day of the Week') +
theme(legend.position = 'none')
Boxplot rides by month
g3 <- train_plot_data %>%
select(month, rides = s_40380) %>%
ggplot(aes(month, rides, fill = month)) +
geom_boxplot() +
labs(x = '',
y = "Rides (000's)",
title = 'Chicago Clark/Lake Train Monthly Station Ridership') +
theme(legend.position = 'none')
ggplotly(g3)