11.7 Querying Data from R

Let’s compare the attendance of two Chicago teams by day of the week since the 2006 season. - CHA - Chicago White Sox - CHN - Chicago Cubs

# query <- '
# SELECT "Date", "HomeTeam", "DayOfWeek", "Attendance"
# FROM "gamelogs"
# WHERE ("Date" > 20060101.0) 
#      AND ("HomeTeam" IN (\'CHN\', \'CHA\'))
# '
# 
# chi_attendance <- dbGetQuery(con, query) 
# slice_head(chi_attendance, n = 6)

# using dplyr (tidyverse)
chi_attendance <- gamelogs |>
     filter(Date > 20060101, HomeTeam %in% c('CHN', 'CHA')) |>
     select(Date, HomeTeam, DayOfWeek, Attendance)

chi_attendance |> 
  head()
## # A tibble: 6 × 4
##       Date HomeTeam DayOfWeek Attendance
##      <dbl> <chr>    <chr>          <dbl>
## 1 20060402 CHA      Sun            38802
## 2 20060404 CHA      Tue            37591
## 3 20060405 CHA      Wed            33586
## 4 20060407 CHN      Fri            40869
## 5 20060408 CHN      Sat            40182
## 6 20060409 CHN      Sun            39839
# show_query()
# gamelogs |>
#      filter(Date > 20060101, HomeTeam %in% c('CHN', 'CHA')) |>
#      select(Date, HomeTeam, DayOfWeek, Attendance) |>
#      show_query()

Another exercise: let’s compare for New York Yankees and Mets daily average attendance.

gamelogs |>
     filter(Date >= 20060101, Date <= 20061101, HomeTeam %in% c('NYN', 'NYA')) |>
     select(Date, HomeTeam, DayOfWeek, Attendance) |> 
     group_by(HomeTeam, DayOfWeek) |> 
     summarise(average_attendance = mean(Attendance, na.rm = TRUE), 
               .groups = 'drop') |> 
     as_tibble() |> 

     mutate(dayofweek = DayOfWeek %>% 
                 factor(levels = c('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'), 
                        ordered = TRUE)) |> 
     arrange(HomeTeam, dayofweek) |> 
     
     # plot attendance by team
     ggplot(aes(x = dayofweek, y = average_attendance, fill = HomeTeam)) + 
     geom_col(position = 'dodge') + 
     scale_fill_manual(values = c(crc_fc[1], crc_fc[2])) + 
     labs(
       x = NULL, 
       y = 'Average Attendance', 
       title = 'Daily Average Attendance betweeb NY Yankees & NY Mets for 2016 season'
         ) + 
    theme_classic()