7.3 Getting the data

sc2022 <- here::here("data_large/statcast_rds/statcast_2022.rds") |>
  read_rds()
sc2022 <- sc2022 |> 
  mutate(
    Outcome = case_match(
      description,
      c("ball", "blocked_ball", "pitchout", 
        "hit_by_pitch") ~ "ball",
      c("swinging_strike", "swinging_strike_blocked",
        "foul", "foul_bunt", "foul_tip", 
        "hit_into_play",  "missed_bunt" ) ~ "swing",
      "called_strike" ~ "called_strike"),
    Home = ifelse(inning_topbot == "Bot", 1, 0),
    Count = paste(balls, strikes, sep = "-")
  )
taken <- sc2022 |>
  filter(Outcome != "swing")
taken_select <- select(
  taken, pitch_type, release_speed,
  description, stand, p_throws, Outcome,
  plate_x, plate_z, fielder_2_1,
  pitcher, batter, Count, Home, zone 
)
write_rds(
  taken_select, 
  here::here("data/sc_taken_2022.rds"), 
  compress = "xz"
)