9.11 Simulate Many Seasons
We can wrap the previous codes into a simulate_1968
function
function code
simulate_1968 <- function(s_talent = 0.20){
teams_68 <- teams_68 |>
mutate(talent = rnorm(10, 0, s_talent))
# increase chances of Cardinals' and Tigers' success
teams_68$talent[teams_68$teamID == "DET"] <- rnorm(1, 0.3, 0.05)
teams_68$talent[teams_68$teamID == "SLN"] <- rnorm(1, 0.27, 0.04)
schedule_talent <- schedule |>
inner_join(teams_68, join_by(lgID, Home == teamID)) |>
rename(talent_home = talent) |>
inner_join(teams_68, join_by(lgID, Visitor == teamID)) |>
rename(talent_visitor = talent)
schedule_talent <- schedule_talent |>
mutate(
prob_home = exp(talent_home) /
(exp(talent_home) + exp(talent_visitor))
)
schedule_talent <- schedule_talent |>
mutate(
outcome = rbinom(nrow(schedule_talent), 1, prob_home),
winner = if_else(outcome == 1, Home, Visitor)
)
results <- schedule_talent |>
group_by(winner) |>
summarize(Wins = n()) |>
inner_join(teams_68, by = c("winner" = "teamID"))
}
and use the rep
function to simulate 100 copies of the 1968 season.
set.seed(20250611)
many_results <- rep(0.20, 100) |>
map(simulate_1968) |>
list_rbind() |>
rename(team = winner)
graph code
title_text <- "<span style='color:#0C2340'>Simulations of **1968 MLB Season**</span>"
subtitle_text <- "with focus on the highly talented <span style='color:#FA4616'>**Detroit Tigers**</span> team"
many_results |>
ggplot() +
geom_point(aes(x = talent, y = Wins),
alpha = 0.2,
color = "#0C2340",
data = many_results |>
filter(team != "DET")) +
geom_point(aes(x = talent, y = Wins),
alpha = 0.6,
size = 3,
color = "#FA4616",
data = many_results |>
filter(team == "DET")) +
labs(title = title_text,
subtitle = subtitle_text,
caption = "DSLC") +
theme_minimal() +
theme(plot.subtitle = ggtext::element_markdown(),
plot.title = ggtext::element_markdown()) +
xlim(-1,1)
- average team \(T = 0\) tend to win about 81 games.
- positive correlation: more talent \(\Rightarrow\) more wins
9.11.1 Parity
What is the relationship between a team’s talent and its post-season success?
set.seed(20250611)
many_results <- rep(0.20, 100) |>
map(abdwr3edata::one_simulation_68) |>
list_rbind()
\[P(\text{win championship}|T) = \frac{e^{a+bT}}{1 + e^{a+bT}}\]
fit1 <- glm(
Winner.Lg ~ Talent,
data = many_results, family = binomial
)
fit2 <- glm(
Winner.WS ~ Talent,
data = many_results, family = binomial
)
graph code
tdf <- tibble(
Talent = seq(-0.4, 0.4, length.out = 100)
)
title_text <- "**Championship Probability** versus **Team Talent**"
subtitle_text <- "<span style='color:#0C2340'>**Pennant Probabilities**</span> and <span style='color:#FA4616'>**World Series Probabilities**</span>"
tdf |>
mutate(
Pennant = predict(fit1, newdata = tdf, type = "response"),
`World Series` = predict(fit2, newdata = tdf, type = "response")
) |>
pivot_longer(
cols = -Talent,
names_to = "Outcome",
values_to = "Probability"
) |>
ggplot(aes(Talent, Probability, color = Outcome)) +
geom_line(linewidth = 3) +
labs(title = title_text,
subtitle = subtitle_text,
caption = "DSLC") +
scale_color_manual(values = c("#0C2340", "#FA4616")) +
theme_minimal() +
theme(legend.position = "none",
plot.subtitle = ggtext::element_markdown(hjust = 1.0),
plot.title = ggtext::element_markdown(hjust = 1.0)) +
ylim(0, 0.65)