Read in data

sc_two_seasons <- here::here("data/sc_bip_2021_2023.parquet") |>
  read_parquet() |> 
  mutate(
    Season = year(game_date),
    HR = ifelse(events == "home_run", 1, 0)
  )
sc_2023 <- sc_two_seasons |> 
  filter(Season == 2023)
Rows: 124,234
Columns: 16
$ game_pk         <dbl> 718773, 718774, 718773, 718778, 718781, 718778, 718772, 718770, 718776, 718778, 718…
$ game_date       <date> 2023-03-30, 2023-03-30, 2023-03-30, 2023-03-30, 2023-03-30, 2023-03-30, 2023-03-30…
$ batter          <dbl> 613564, 643446, 641584, 453568, 527038, 592178, 665489, 502110, 670623, 602074, 663…
$ pitcher         <dbl> 656605, 645261, 656605, 605483, 543037, 605483, 571945, 668678, 593958, 605483, 669…
$ events          <chr> "triple", "single", "single", "single", "grounded_into_double_play", "field_out", "…
$ stand           <chr> "L", "L", "L", "L", "R", "R", "R", "R", "R", "R", "L", "L", "R", "L", "L", "R", "R"…
$ p_throws        <chr> "R", "R", "R", "L", "R", "L", "R", "R", "L", "L", "R", "R", "L", "R", "R", "R", "R"…
$ hit_distance_sc <dbl> 134, 9, 254, 162, 51, 56, 42, 185, 143, 171, 240, 374, 356, 65, 392, 116, 422, 188,…
$ hc_x            <dbl> 215.08, 164.78, 196.95, 90.66, 110.24, 153.95, 184.78, 177.78, 116.88, 177.33, 73.8…
$ hc_y            <dbl> 107.23, 105.10, 95.17, 133.88, 148.44, 209.01, 100.21, 106.58, 74.73, 114.14, 117.8…
$ launch_speed    <dbl> 94.2, 93.7, 111.7, 59.1, 94.8, 69.5, 115.5, 102.7, 105.7, 93.8, 93.4, 96.4, 93.5, 8…
$ launch_angle    <dbl> 9, -19, 13, 27, 1, 81, -2, 9, 7, 9, 55, 28, 25, 76, 35, 6, 26, 12, 9, 32, -2, 1, -3…
$ home_team       <chr> "CIN", "MIA", "CIN", "SD", "NYY", "SD", "STL", "LAD", "TB", "SD", "SEA", "MIA", "TB…
$ away_team       <chr> "PIT", "NYM", "PIT", "COL", "SF", "COL", "TOR", "AZ", "DET", "COL", "CLE", "NYM", "…
$ Season          <dbl> 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023,…
$ HR              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
  • Parquet format allows us to include the data in the repo! (3.4 MB)