11.13 Exercises
11.13.1 1. Runs Scored at the Astrodome
# dbSendQuery(con, "TRUNCATE TABLE gamelogs;")
# map(1965:1999, append_game_logs, conn = con)
#
# query <- '
# SELECT
# SUBSTRING("Date"::text FROM 1 FOR 4) AS year,
# COUNT(*) AS num_games
# FROM
# gamelogs
# GROUP BY
# year
# ORDER BY
# year;
# '
#
# dbGetQuery(con, query)
Select games featuring the Astros (as either the home or visiting team) during the years when the Astrodome was their home park (i.e., from 1965 to 1999) using dplyr to get SQL query.
# gamelogs |>
# select(Date, ParkID, HomeTeam, VisitingTeam, HomeRunsScore, VisitorRunsScored) |>
# filter(HomeTeam == 'HOU' | VisitingTeam == 'HOU') |>
# show_query()
#
# query <- '
# SELECT
# "Date",
# "ParkID",
# "HomeTeam",
# "VisitingTeam",
# "HomeRunsScore",
# "VisitorRunsScored"
# FROM "gamelogs"
# WHERE ("HomeTeam" = \'HOU\' OR "VisitingTeam" = \'HOU\');
# '
# astros <- dbGetQuery(con, query) |>
# mutate(
# total_runs_scored = HomeRunsScore + VisitorRunsScored,
# astrodome = ParkID == "HOU02"
# )
astros <- read_rds('./data/astros.rds')
head(astros, 10)
## Date ParkID HomeTeam VisitingTeam HomeRunsScore VisitorRunsScored
## 1 19650412 HOU02 HOU PHI 0 2
## 2 19650414 NYC17 NYN HOU 6 7
## 3 19650415 NYC17 NYN HOU 5 4
## 4 19650417 PIT06 PIT HOU 3 2
## 5 19650418 PIT06 PIT HOU 1 3
## 6 19650418 PIT06 PIT HOU 5 4
## 7 19650419 PHI11 PHI HOU 8 0
## 8 19650420 PHI11 PHI HOU 2 1
## 9 19650421 PHI11 PHI HOU 4 11
## 10 19650423 HOU02 HOU PIT 4 3
## total_runs_scored astrodome
## 1 2 TRUE
## 2 13 FALSE
## 3 9 FALSE
## 4 5 FALSE
## 5 4 FALSE
## 6 9 FALSE
## 7 8 FALSE
## 8 3 FALSE
## 9 15 FALSE
## 10 7 TRUE
11.13.2 2. Draw a plot to visually compare through the years the runs scored (both teams combined) in games played at the Astrodome and in other ballparks.
astros |>
select(Date, total_runs_scored, astrodome) |>
ggplot(aes(x = year(ymd(Date)), y = total_runs_scored, color = astrodome)) +
stat_summary(fun.data = "mean_cl_boot") +
xlab("Season") +
ylab("Runs per game (both teams combined)") +
scale_color_manual(
name = "Location", values = crc_fc,
labels = c("Other parks", "Astrodome")
)
Another visualization (ggbetweenstats)
astros |>
group_by(year(ymd(Date)), astrodome) |>
summarise(
avg_runs = mean(total_runs_scored), .groups = 'drop'
) |>
rename(date = 1) |>
pivot_wider(
id_cols = 'date',
names_from = 'astrodome',
values_from = 'avg_runs'
) |>
rename('Other_parks' = 2, Astrodome = 3) |>
mutate(delta = Other_parks - Astrodome) |>
ggplot(aes(x = date, y = delta)) +
geom_point(size = 2, color = 'steelblue') +
geom_hline(yintercept=0, linetype="dashed", color = "red")
Disconnect db connection