Local Patterns of Statcast Launch Velocity

sc_2017_ls <- readr::read_rds(here::here("data/statcast_2017.rds"))
sc_ip2017 <- sc_2017_ls |> 
  dplyr::filter(type == "X")
launch_speeds <- sc_ip2017 |> 
  dplyr::group_by(player_name, game_date) |> 
  dplyr::arrange(game_date) |> 
  dplyr::summarize(
    bip = dplyr::n(), 
    sum_LS = sum(launch_speed)
  )
## `summarise()` has grouped output by 'player_name'. You can override using the
## `.groups` argument.
ls_250 <- sc_ip2017 |> 
  dplyr::group_by(player_name) |> 
  dplyr::summarize(total_bip = dplyr::n()) |>  
  dplyr::filter(total_bip >= 250) |>
  dplyr::inner_join(launch_speeds, by = "player_name")
regroup <- function(data, group_size) {
  out <- data |>
    dplyr::mutate(
      id = dplyr::row_number() - 1,
      group_id = floor(id / group_size)
    )
  # hack to avoid a small leftover bin!
  if (nrow(data) %% group_size != 0) {
    max_group_id <- max(out$group_id)
    out <- out |>
      dplyr::mutate(
        group_id = dplyr::if_else(
          group_id == max_group_id, group_id - 1, group_id
        )
      )
  }
  out |>
    dplyr::group_by(group_id) |>
    dplyr::summarize(
      G = dplyr::n(), bip = sum(bip), sum_LS = sum(sum_LS)
    )
}

We focus on A.J. Pollock’s data

aj <- ls_250 |>
  dplyr::filter(player_name == "A.J. Pollock") |> 
  dplyr::arrange(game_date)
aj |>
  regroup(5)  |> 
  dplyr::slice_head(n = 5)
## # A tibble: 0 × 4
## # ℹ 4 variables: group_id <dbl>, G <int>, bip <int>, sum_LS <dbl>
summarize_streak_data <- function(data, name, group_size = 5) {
  data |>
    dplyr::filter(player_name == name) |> 
    dplyr::arrange(game_date) |>
    regroup(group_size) |> 
    dplyr::summarize(
      balls_in_play = sum(bip),
      Mean = mean(sum_LS / bip, na.rm = TRUE),
      SD = sd(sum_LS / bip, na.rm = TRUE)
    )
}

Mean and SD for A.J. Pollock

aj_sum <- summarize_streak_data(ls_250, "A.J. Pollock")
aj_sum
## # A tibble: 1 × 3
##   balls_in_play  Mean    SD
##           <int> <dbl> <dbl>
## 1             0   NaN    NA

Let’s look at all the players (with at least 250 ABs)

player_list <- ls_250 |>
  dplyr::pull(player_name) |>
  unique()
results <- player_list |>
  purrr::map(summarize_streak_data, data = ls_250) |>
  purrr::list_rbind() |>
  dplyr::mutate(Player = player_list)
library(ggrepel)
ggplot2::ggplot(results, ggplot2::aes(Mean, SD)) +
  ggplot2::geom_point() +
  ggrepel::geom_label_repel(
    data = dplyr::filter(results, SD > 5.63 | SD < 2.3 ),
    ggplot2::aes(label = Player)
  )

A new function to get average launch speed over a given game window

get_streak_data <- function(data, name, group_size = 5) {
  data |>
    dplyr::filter(player_name == name) |> 
    dplyr::arrange(game_date) |>
    regroup(group_size) |> 
    dplyr::mutate(
      launch_speed_avg = sum_LS / bip,
      Period = dplyr::row_number()
    )
}
streaky <- c("Michael Conforto", "Dexter Fowler") |>
  purrr::set_names() |>
  purrr::map(get_streak_data, data = ls_250) |>
  purrr::list_rbind(names_to = "Player")

ggplot2::ggplot(streaky, ggplot2::aes(Period, launch_speed_avg)) +
  ggplot2::geom_line(linewidth = 1) + 
  ggplot2::facet_wrap(vars(Player), ncol = 1)