Function translations

How does {dbplyr} deal with mean() vs median()?

summarize_query <- function(df, ...) {
  df |> 
    summarize(...) |> 
    show_query()
}
mutate_query <- function(df, ...) {
  df |> 
    mutate(..., .keep = "none") |> 
    show_query()
}
flights |> 
  group_by(year, month, day) |>  
  summarize_query(
    mean = mean(arr_delay, na.rm = TRUE),
    median = median(arr_delay, na.rm = TRUE)
  )
## `summarise()` has grouped output by "year" and "month". You can override using
## the `.groups` argument.
## <SQL>
## SELECT
##   "year",
##   "month",
##   "day",
##   AVG(arr_delay) AS mean,
##   PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY arr_delay) AS median
## FROM flights
## GROUP BY "year", "month", "day"
flights |> 
  group_by(year, month, day) |>  
  mutate_query(
    mean = mean(arr_delay, na.rm = TRUE),
  )
## <SQL>
## SELECT
##   "year",
##   "month",
##   "day",
##   AVG(arr_delay) OVER (PARTITION BY "year", "month", "day") AS mean
## FROM flights
flights |> 
  group_by(dest) |>  
  arrange(time_hour) |> 
  mutate_query(
    lead = lead(arr_delay),
    lag = lag(arr_delay)
  )
## <SQL>
## SELECT
##   dest,
##   LEAD(arr_delay, 1, NULL) OVER (PARTITION BY dest ORDER BY time_hour) AS lead,
##   LAG(arr_delay, 1, NULL) OVER (PARTITION BY dest ORDER BY time_hour) AS lag
## FROM flights
## ORDER BY time_hour