Function translations
How does {dbplyr} deal with mean()
vs median()
?
summarize_query <- function(df, ...) {
df |>
summarize(...) |>
show_query()
}
mutate_query <- function(df, ...) {
df |>
mutate(..., .keep = "none") |>
show_query()
}
flights |>
group_by(year, month, day) |>
summarize_query(
mean = mean(arr_delay, na.rm = TRUE),
median = median(arr_delay, na.rm = TRUE)
)
## `summarise()` has grouped output by "year" and "month". You can override using
## the `.groups` argument.
## <SQL>
## SELECT
## "year",
## "month",
## "day",
## AVG(arr_delay) AS mean,
## PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY arr_delay) AS median
## FROM flights
## GROUP BY "year", "month", "day"
## <SQL>
## SELECT
## "year",
## "month",
## "day",
## AVG(arr_delay) OVER (PARTITION BY "year", "month", "day") AS mean
## FROM flights
flights |>
group_by(dest) |>
arrange(time_hour) |>
mutate_query(
lead = lead(arr_delay),
lag = lag(arr_delay)
)
## <SQL>
## SELECT
## dest,
## LEAD(arr_delay, 1, NULL) OVER (PARTITION BY dest ORDER BY time_hour) AS lead,
## LAG(arr_delay, 1, NULL) OVER (PARTITION BY dest ORDER BY time_hour) AS lag
## FROM flights
## ORDER BY time_hour