9.4 Simulate Half-Inning

simulate_half_inning <- function(P, R, start = 1) {
  # INPUTS:
      ## P: probability transition matrix
      ## R: run matrix
      ## start: starting state, 1 <= s <= 24
  # OUTPUT: number of runs scored
  s <- start
  path <- NULL
  runs <- 0
  while (s < 25) {
    s_new <- sample(1:25, size = 1, prob = P[s, ])
    path <- c(path, s_new)
    runs <- runs + R[s, s_new]
    s <- s_new
  }
  runs
}

9.4.1 Many Iterations

set.seed(20250611)
simulated_runs <- 1:12345 |>
  map_int(~simulate_half_inning(T_matrix, R_runs))
table(simulated_runs)

## simulated_runs
##    0    1    2    3    4    5    6    7    8    9 
## 9018 1806  836  398  160   82   27   12    3    3

How many runs are scored (on average) during a half-inning?

mean(simulated_runs)

## [1] 0.4875658

9.4.2 All baserunner-outs states

runs_j <- function(j) {
  1:987 |> #smaller number than textbook
    map_int(~simulate_half_inning(T_matrix, R_runs, j)) |>
    mean()
}

erm_2016_mc <- tibble(
  state = row.names(T_matrix), 
  mean_run_value = map_dbl(1:24, runs_j)
) |>
  mutate(
    bases = str_sub(state, 1, 3),
    outs_ct = as.numeric(str_sub(state, 5, 5))
  ) |>
  select(-state)

bases	0	1	2
Runs Potential
2016 Season
000	0.51	0.23	0.10
001	1.32	0.95	0.32
010	1.09	0.63	0.30
011	1.87	1.26	0.49
100	0.84	0.46	0.21
101	1.70	1.11	0.46
110	1.41	0.90	0.44
111	2.19	1.50	0.60

table code

erm_2016_mc |>
  pivot_wider(names_from = outs_ct, values_from = mean_run_value) |>
  gt() |>
  cols_align(align = "center") |>
  data_color(columns = -bases,
             palette = "inferno")  |>
  fmt_number(columns = -bases,
             decimals = 2) |>
  tab_header(title = "Runs Potential",
             subtitle = "2016 Season")

To understand the effect of non-batting plays (stealing, caught stealing, wild pitches, etc.) on run scoring, we compare this run expectancy matrix with the one found in Chapter 5 using all batting and non-batting plays.

From Chapter 5, we previously computed an expected run matrix as erm_2016.

erm code

retro2016 <- retro2016 |>
  mutate(
    is_runner1 = as.numeric(
      run1_dest_id == 1 | bat_dest_id == 1
    ),
    is_runner2 = as.numeric(
      run1_dest_id == 2 | run2_dest_id == 2 | 
        bat_dest_id == 2
    ),
    is_runner3 = as.numeric(
      run1_dest_id == 3 | run2_dest_id == 3 |
        run3_dest_id == 3 | bat_dest_id == 3
    ),
    new_outs = outs_ct + event_outs_ct,
    new_bases = paste0(is_runner1, is_runner2, is_runner3),
    new_state = paste(new_bases, new_outs)
  )

retro2016 <- retro2016 |> 
  mutate(
    runs_before = away_score_ct + home_score_ct,
    half_inning = paste(game_id, inn_ct, bat_home_id),
    runs_scored = 
      (bat_dest_id > 3) + (run1_dest_id > 3) + 
      (run2_dest_id > 3) + (run3_dest_id > 3)
  )

half_innings <- retro2016 |>
  group_by(half_inning) |>
  summarize(
    outs_inning = sum(event_outs_ct), 
    runs_inning = sum(runs_scored),
    runs_start = first(runs_before),
    max_runs = runs_inning + runs_start
  )

retro2016 <- retro2016 |>
  inner_join(half_innings, by = "half_inning") |>
  mutate(runs_roi = max_runs - runs_before)

changes2016 <- retro2016 |> 
  filter(state != new_state | runs_scored > 0)

changes2016_complete <- changes2016 |>
 filter(outs_inning == 3)

erm_2016 <- changes2016_complete |> 
  group_by(bases, outs_ct) |>
  summarize(mean_run_value = mean(runs_scored)) |>
  ungroup()

erm_2016_df <- erm_2016 |>
  inner_join(erm_2016_mc, join_by(bases, outs_ct)) |>
  mutate(
    run_value_diff = round(mean_run_value.x - mean_run_value.y, 2)
  ) |>
  select(bases, outs_ct, run_value_diff) |>
  pivot_wider(names_from = outs_ct, values_from = run_value_diff)

bases	0	1	2
Value of Non-Batting Plays
2016 Season
000	−0.48	−0.20	−0.07
001	−0.86	−0.46	−0.08
010	−0.92	−0.46	−0.11
011	−1.25	−0.65	−0.15
100	−0.77	−0.38	−0.13
101	−1.06	−0.53	−0.17
110	−1.19	−0.65	−0.18
111	−1.36	−0.71	−0.05

table code

erm_2016_df  |>
  gt() |>
  cols_align(align = "center") |>
  data_color(columns = -bases,
             palette = "inferno")  |>
  fmt_number(columns = -bases,
             decimals = 2) |>
  tab_header(title = "Value of Non-Batting Plays",
             subtitle = "2016 Season")