Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add $rolling_*_by() expressions #1115

Merged
merged 16 commits into from
Jun 3, 2024
7 changes: 5 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
- All `$rolling_*()` functions lose the arguments `by`, `closed` and
`warn_if_unsorted`. Rolling computations based on `by` must be made via the
corresponding `rolling_*_by()`, e.g `rolling_mean_by()` instead of
`rolling_mean(by =)`.
`rolling_mean(by =)` (#1115).
- `pl$scan_parquet()` and `pl$read_parquet()` gain an argument `glob` which
defaults to `TRUE`. Set it to `FALSE` to avoid considering `*` as a globing
pattern.
Expand Down Expand Up @@ -50,7 +50,10 @@
(#1112).
- In `$dt$combine()`, the arguments `tm` and `tu` are renamed `time` and
`time_unit` (#1116).
- The default value of the `rechunk` argument of `pl$concat()` is changed from `TRUE` to `FALSE` (#1125).
- The default value of the `rechunk` argument of `pl$concat()` is changed from
`TRUE` to `FALSE` (#1125).
- In all `$rolling_*()` functions, the arguments `center` and `ddof` must be
named (#1115).

### New features

Expand Down
248 changes: 241 additions & 7 deletions R/expr__expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -2299,6 +2299,7 @@ prepare_rolling_window_args = function(
#' - 1i (1 index count)
#' If the dynamic string language is used, the `by` and `closed` arguments must
#' also be set.
#' @inherit Expr_rolling params return
#' @param weights An optional slice with the same length as the window that will
#' be multiplied elementwise with the values in the window.
#' @param min_periods The number of values in the window that should be non-null
Expand All @@ -2309,14 +2310,14 @@ prepare_rolling_window_args = function(
#' If you want to compute multiple aggregation statistics over the same dynamic
#' window, consider using `$rolling()` this method can cache the window size
#' computation.
#' @return Expr
#' @examples
#' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$
#' with_columns(roll_min = pl$col("a")$rolling_min(window_size = 2))
Expr_rolling_min = function(
window_size,
weights = NULL,
min_periods = NULL,
...,
center = FALSE) {
wargs = prepare_rolling_window_args(window_size, min_periods)
.pr$Expr$rolling_min(
Expand All @@ -2326,6 +2327,35 @@ Expr_rolling_min = function(
unwrap("in $rolling_min():")
}

#' Apply a rolling min based on another column.
#'
#' @inherit Expr_rolling_min params return details
#' @inheritParams Expr_rolling
#' @param by This column must of dtype [`Date`][pl_date] or
#' [`Datetime`][DataType_Datetime].
#'
#' @examples
#' df_temporal = pl$DataFrame(
#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h")
#' )$with_row_index("index")
#'
#' df_temporal
#'
#' df_temporal$with_columns(
#' rolling_row_min = pl$col("index")$rolling_min_by("date", window_size = "3h")
#' )
Expr_rolling_min_by = function(
by,
window_size,
...,
min_periods = 1,
closed = "right") {
.pr$Expr$rolling_min_by(
self, by, window_size, min_periods, closed
) |>
unwrap("in $rolling_min_by():")
}

#' Rolling maximum
#'
#' Compute the rolling (= moving) max over the values in this array. A window of
Expand All @@ -2340,6 +2370,7 @@ Expr_rolling_max = function(
window_size,
weights = NULL,
min_periods = NULL,
...,
center = FALSE) {
wargs = prepare_rolling_window_args(window_size, min_periods)
.pr$Expr$rolling_max(
Expand All @@ -2349,6 +2380,31 @@ Expr_rolling_max = function(
unwrap("in $rolling_max()")
}

#' Apply a rolling max based on another column.
#'
#' @inherit Expr_rolling_min_by params return details
#' @examples
#' df_temporal = pl$DataFrame(
#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h")
#' )$with_row_index("index")
#'
#' df_temporal
#'
#' df_temporal$with_columns(
#' rolling_row_max = pl$col("index")$rolling_max_by("date", window_size = "3h")
#' )
Expr_rolling_max_by = function(
by,
window_size,
...,
min_periods = 1,
closed = "right") {
.pr$Expr$rolling_max_by(
self, by, window_size, min_periods, closed
) |>
unwrap("in $rolling_max_by():")
}

#' Rolling mean
#'
#' Compute the rolling (= moving) mean over the values in this array. A window of
Expand All @@ -2363,6 +2419,7 @@ Expr_rolling_mean = function(
window_size,
weights = NULL,
min_periods = NULL,
...,
center = FALSE) {
wargs = prepare_rolling_window_args(window_size, min_periods)
.pr$Expr$rolling_mean(
Expand All @@ -2372,6 +2429,32 @@ Expr_rolling_mean = function(
unwrap("in $rolling_mean():")
}

#' Apply a rolling mean based on another column.
#'
#' @inherit Expr_rolling_min_by params return details
#' @examples
#' df_temporal = pl$DataFrame(
#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h")
#' )$with_row_index("index")
#'
#' df_temporal
#'
#' df_temporal$with_columns(
#' rolling_row_mean = pl$col("index")$rolling_mean_by("date", window_size = "3h")
#' )
Expr_rolling_mean_by = function(
by,
window_size,
...,
min_periods = 1,
closed = "right") {
.pr$Expr$rolling_mean_by(
self,
by, window_size, min_periods, closed
) |>
unwrap("in $rolling_mean_by():")
}

#' Rolling sum
#'
#' Compute the rolling (= moving) sum over the values in this array. A window of
Expand All @@ -2395,6 +2478,31 @@ Expr_rolling_sum = function(
unwrap("in $rolling_sum():")
}

#' Apply a rolling sum based on another column.
#'
#' @inherit Expr_rolling_min_by params return details
#' @examples
#' df_temporal = pl$DataFrame(
#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h")
#' )$with_row_index("index")
#'
#' df_temporal
#'
#' df_temporal$with_columns(
#' rolling_row_sum = pl$col("index")$rolling_sum_by("date", window_size = "3h")
#' )
Expr_rolling_sum_by = function(
by,
window_size,
...,
min_periods = 1,
closed = "right") {
.pr$Expr$rolling_sum_by(
self, by, window_size, min_periods, closed
) |>
unwrap("in $rolling_sum_by():")
}


#' Rolling standard deviation
#'
Expand All @@ -2404,22 +2512,57 @@ Expr_rolling_sum = function(
#' by the `weight` vector.
#'
#' @inherit Expr_rolling_min params details return
#' @inheritParams pl_std
#' @examples
#' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$
#' with_columns(roll_std = pl$col("a")$rolling_std(window_size = 2))
Expr_rolling_std = function(
window_size,
weights = NULL,
min_periods = NULL,
center = FALSE) {
...,
center = FALSE,
ddof = 1) {
wargs = prepare_rolling_window_args(window_size, min_periods)
.pr$Expr$rolling_std(
self, wargs$window_size, weights,
wargs$min_periods, center
self, wargs$window_size, weights, wargs$min_periods, center, ddof
) |>
unwrap("in $rolling_std(): ")
}

#' Compute a rolling standard deviation based on another column
#'
#' @inherit Expr_rolling_min_by params return details
#' @inheritParams Expr_rolling_std
#' @examples
#' df_temporal = pl$DataFrame(
#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h")
#' )$with_row_index("index")
#'
#' df_temporal
#'
#' # Compute the rolling std with the temporal windows closed on the right (default)
#' df_temporal$with_columns(
#' rolling_row_std = pl$col("index")$rolling_std_by("date", window_size = "2h")
#' )
#'
#' # Compute the rolling std with the closure of windows on both sides
#' df_temporal$with_columns(
#' rolling_row_std = pl$col("index")$rolling_std_by("date", window_size = "2h", closed = "both")
#' )
Expr_rolling_std_by = function(
by,
window_size,
...,
min_periods = 1,
closed = "right",
ddof = 1) {
.pr$Expr$rolling_std_by(
self, by, window_size, min_periods, closed, ddof
) |>
unwrap("in $rolling_std_by():")
}

#' Rolling variance
#'
#' Compute the rolling (= moving) variance over the values in this array. A
Expand All @@ -2428,22 +2571,57 @@ Expr_rolling_std = function(
#' `weight` vector.
#'
#' @inherit Expr_rolling_min params details return
#' @inheritParams pl_std
#' @examples
#' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$
#' with_columns(roll_var = pl$col("a")$rolling_var(window_size = 2))
Expr_rolling_var = function(
window_size,
weights = NULL,
min_periods = NULL,
center = FALSE) {
...,
center = FALSE,
ddof = 1) {
wargs = prepare_rolling_window_args(window_size, min_periods)
.pr$Expr$rolling_var(
self, wargs$window_size, weights,
wargs$min_periods, center
self, wargs$window_size, weights, wargs$min_periods, center, ddof
) |>
unwrap("in $rolling_var():")
}

#' Compute a rolling variance based on another column
#'
#' @inherit Expr_rolling_min_by params return details
#' @inheritParams Expr_rolling_var
#' @examples
#' df_temporal = pl$DataFrame(
#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h")
#' )$with_row_index("index")
#'
#' df_temporal
#'
#' # Compute the rolling var with the temporal windows closed on the right (default)
#' df_temporal$with_columns(
#' rolling_row_var = pl$col("index")$rolling_var_by("date", window_size = "2h")
#' )
#'
#' # Compute the rolling var with the closure of windows on both sides
#' df_temporal$with_columns(
#' rolling_row_var = pl$col("index")$rolling_var_by("date", window_size = "2h", closed = "both")
#' )
Expr_rolling_var_by = function(
by,
window_size,
...,
min_periods = 1,
closed = "right",
ddof = 1) {
.pr$Expr$rolling_var_by(
self, by, window_size, min_periods, closed, ddof
) |>
unwrap("in $rolling_var_by():")
}

#' Rolling median
#'
#' Compute the rolling (= moving) median over the values in this array. A window
Expand All @@ -2467,6 +2645,31 @@ Expr_rolling_median = function(
) |> unwrap("in $rolling_median():")
}

#' Apply a rolling median based on another column.
#'
#' @inherit Expr_rolling_min_by params return details
#' @examples
#' df_temporal = pl$DataFrame(
#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h")
#' )$with_row_index("index")
#'
#' df_temporal
#'
#' df_temporal$with_columns(
#' rolling_row_median = pl$col("index")$rolling_median_by("date", window_size = "3h")
#' )
Expr_rolling_median_by = function(
by,
window_size,
...,
min_periods = 1,
closed = "right") {
.pr$Expr$rolling_median_by(
self, by, window_size, min_periods, closed
) |>
unwrap("in $rolling_median_by():")
}

#' Rolling quantile
#'
#' Compute the rolling (= moving) quantile over the values in this array. A
Expand All @@ -2487,6 +2690,7 @@ Expr_rolling_quantile = function(
window_size,
weights = NULL,
min_periods = NULL,
...,
center = FALSE) {
wargs = prepare_rolling_window_args(window_size, min_periods)
.pr$Expr$rolling_quantile(
Expand All @@ -2496,6 +2700,36 @@ Expr_rolling_quantile = function(
unwrap("in $rolling_quantile():")
}

#' Compute a rolling quantile based on another column
#'
#' @inherit Expr_rolling_min_by params return details
#' @inheritParams Expr_quantile
#' @examples
#' df_temporal = pl$DataFrame(
#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h")
#' )$with_row_index("index")
#'
#' df_temporal
#'
#' df_temporal$with_columns(
#' rolling_row_quantile = pl$col("index")$rolling_quantile_by(
#' "date",
#' window_size = "2h", quantile = 0.3
#' )
#' )
Expr_rolling_quantile_by = function(
by,
window_size,
...,
quantile,
interpolation = "nearest",
min_periods = 1,
closed = "right") {
.pr$Expr$rolling_quantile_by(
self, by, quantile, interpolation, window_size, min_periods, closed
) |>
unwrap("in $rolling_quantile_by():")
}

#' Rolling skew
#'
Expand Down
Loading
Loading