Code for creating the data file
library(abdwr3edata)
library(tidyverse)
library(fs)
library(arrow)
data_dir <- here::here("data_large")
statcast_dir <- path(data_dir, "statcast_csv")
if (!dir.exists(statcast_dir)) {
dir.create(statcast_dir)
}
statcast_season(year = 2023, dir = statcast_dir)
statcast_season(year = 2021, dir = statcast_dir)
sc2023 <- statcast_dir |> statcast_read_csv(pattern = "2023.+\\.csv")
sc2021 <- statcast_dir |> statcast_read_csv(pattern = "2021.+\\.csv")
sc2021_bip <- sc2021 |> filter(type == "X")
sc2023_bip <- sc2023 |> filter(type == "X")
sc_bip_2021_2023 <- bind_rows(sc2021_bip, sc2023_bip) |>
select(game_pk, game_date, batter, pitcher, events,
stand, p_throws, hit_distance_sc, hc_x, hc_y,
launch_speed, launch_angle, home_team, away_team)
write_parquet(sc_bip_2021_2023, "data/sc_bip_2021_2023.parquet")