Data preperation

  • Focus on players that had complete careers
  • Use middle of career to position players careers in time
  • Fit them all and extract coefficients
not_current_playerID <- People |>
  filter(finalGame < "2021-11-01") |> 
  pull(playerID)
batting_2000 <- batting_2000 |>
  filter(playerID %in% not_current_playerID) 

midcareers <- batting_2000 |>
  group_by(playerID) |>
  summarize(
    Midyear = (min(yearID) + max(yearID)) / 2,
    AB_total = first(AB_career)
  )
batting_2000 <- batting_2000 |>
  inner_join(midcareers, by = "playerID")

batting_2000_grouped <- batting_2000 |> 
  group_by(playerID)

ids <- batting_2000_grouped |>
  group_keys() |>
  pull(playerID)
models <- batting_2000_grouped |>
  group_split() |>
  map(~lm(OPS ~ I(Age - 30) + I((Age - 30)^2), data = .)) |>
  map(tidy) |>
  set_names(ids) |>
  bind_rows(.id = "playerID")

beta_coefs <- models |> 
  group_by(playerID) |> 
  summarize(
    A = estimate[1],
    B = estimate[2],
    C = estimate[3]
  ) |>
  mutate(Peak_age = 30 - B / 2 / C) |>
  inner_join(midcareers, by = "playerID") |>
  inner_join(Positions) |> # for use later!
  rename(Position = POS)
## Joining with `by = join_by(playerID)`