diff --git a/NEWS.md b/NEWS.md index 85259e4b6..614c5507b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,7 +12,7 @@ - All `$rolling_*()` functions lose the arguments `by`, `closed` and `warn_if_unsorted`. Rolling computations based on `by` must be made via the corresponding `rolling_*_by()`, e.g `rolling_mean_by()` instead of - `rolling_mean(by =)`. + `rolling_mean(by =)` (#1115). - `pl$scan_parquet()` and `pl$read_parquet()` gain an argument `glob` which defaults to `TRUE`. Set it to `FALSE` to avoid considering `*` as a globing pattern. @@ -50,7 +50,10 @@ (#1112). - In `$dt$combine()`, the arguments `tm` and `tu` are renamed `time` and `time_unit` (#1116). -- The default value of the `rechunk` argument of `pl$concat()` is changed from `TRUE` to `FALSE` (#1125). +- The default value of the `rechunk` argument of `pl$concat()` is changed from + `TRUE` to `FALSE` (#1125). +- In all `$rolling_*()` functions, the arguments `center` and `ddof` must be + named (#1115). ### New features diff --git a/R/expr__expr.R b/R/expr__expr.R index c68d56442..129beaa90 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -2299,6 +2299,7 @@ prepare_rolling_window_args = function( #' - 1i (1 index count) #' If the dynamic string language is used, the `by` and `closed` arguments must #' also be set. +#' @inherit Expr_rolling params return #' @param weights An optional slice with the same length as the window that will #' be multiplied elementwise with the values in the window. #' @param min_periods The number of values in the window that should be non-null @@ -2309,7 +2310,6 @@ prepare_rolling_window_args = function( #' If you want to compute multiple aggregation statistics over the same dynamic #' window, consider using `$rolling()` this method can cache the window size #' computation. -#' @return Expr #' @examples #' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ #' with_columns(roll_min = pl$col("a")$rolling_min(window_size = 2)) @@ -2317,6 +2317,7 @@ Expr_rolling_min = function( window_size, weights = NULL, min_periods = NULL, + ..., center = FALSE) { wargs = prepare_rolling_window_args(window_size, min_periods) .pr$Expr$rolling_min( @@ -2326,6 +2327,35 @@ Expr_rolling_min = function( unwrap("in $rolling_min():") } +#' Apply a rolling min based on another column. +#' +#' @inherit Expr_rolling_min params return details +#' @inheritParams Expr_rolling +#' @param by This column must of dtype [`Date`][pl_date] or +#' [`Datetime`][DataType_Datetime]. +#' +#' @examples +#' df_temporal = pl$DataFrame( +#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h") +#' )$with_row_index("index") +#' +#' df_temporal +#' +#' df_temporal$with_columns( +#' rolling_row_min = pl$col("index")$rolling_min_by("date", window_size = "3h") +#' ) +Expr_rolling_min_by = function( + by, + window_size, + ..., + min_periods = 1, + closed = "right") { + .pr$Expr$rolling_min_by( + self, by, window_size, min_periods, closed + ) |> + unwrap("in $rolling_min_by():") +} + #' Rolling maximum #' #' Compute the rolling (= moving) max over the values in this array. A window of @@ -2340,6 +2370,7 @@ Expr_rolling_max = function( window_size, weights = NULL, min_periods = NULL, + ..., center = FALSE) { wargs = prepare_rolling_window_args(window_size, min_periods) .pr$Expr$rolling_max( @@ -2349,6 +2380,31 @@ Expr_rolling_max = function( unwrap("in $rolling_max()") } +#' Apply a rolling max based on another column. +#' +#' @inherit Expr_rolling_min_by params return details +#' @examples +#' df_temporal = pl$DataFrame( +#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h") +#' )$with_row_index("index") +#' +#' df_temporal +#' +#' df_temporal$with_columns( +#' rolling_row_max = pl$col("index")$rolling_max_by("date", window_size = "3h") +#' ) +Expr_rolling_max_by = function( + by, + window_size, + ..., + min_periods = 1, + closed = "right") { + .pr$Expr$rolling_max_by( + self, by, window_size, min_periods, closed + ) |> + unwrap("in $rolling_max_by():") +} + #' Rolling mean #' #' Compute the rolling (= moving) mean over the values in this array. A window of @@ -2363,6 +2419,7 @@ Expr_rolling_mean = function( window_size, weights = NULL, min_periods = NULL, + ..., center = FALSE) { wargs = prepare_rolling_window_args(window_size, min_periods) .pr$Expr$rolling_mean( @@ -2372,6 +2429,32 @@ Expr_rolling_mean = function( unwrap("in $rolling_mean():") } +#' Apply a rolling mean based on another column. +#' +#' @inherit Expr_rolling_min_by params return details +#' @examples +#' df_temporal = pl$DataFrame( +#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h") +#' )$with_row_index("index") +#' +#' df_temporal +#' +#' df_temporal$with_columns( +#' rolling_row_mean = pl$col("index")$rolling_mean_by("date", window_size = "3h") +#' ) +Expr_rolling_mean_by = function( + by, + window_size, + ..., + min_periods = 1, + closed = "right") { + .pr$Expr$rolling_mean_by( + self, + by, window_size, min_periods, closed + ) |> + unwrap("in $rolling_mean_by():") +} + #' Rolling sum #' #' Compute the rolling (= moving) sum over the values in this array. A window of @@ -2395,6 +2478,31 @@ Expr_rolling_sum = function( unwrap("in $rolling_sum():") } +#' Apply a rolling sum based on another column. +#' +#' @inherit Expr_rolling_min_by params return details +#' @examples +#' df_temporal = pl$DataFrame( +#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h") +#' )$with_row_index("index") +#' +#' df_temporal +#' +#' df_temporal$with_columns( +#' rolling_row_sum = pl$col("index")$rolling_sum_by("date", window_size = "3h") +#' ) +Expr_rolling_sum_by = function( + by, + window_size, + ..., + min_periods = 1, + closed = "right") { + .pr$Expr$rolling_sum_by( + self, by, window_size, min_periods, closed + ) |> + unwrap("in $rolling_sum_by():") +} + #' Rolling standard deviation #' @@ -2404,6 +2512,7 @@ Expr_rolling_sum = function( #' by the `weight` vector. #' #' @inherit Expr_rolling_min params details return +#' @inheritParams pl_std #' @examples #' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ #' with_columns(roll_std = pl$col("a")$rolling_std(window_size = 2)) @@ -2411,15 +2520,49 @@ Expr_rolling_std = function( window_size, weights = NULL, min_periods = NULL, - center = FALSE) { + ..., + center = FALSE, + ddof = 1) { wargs = prepare_rolling_window_args(window_size, min_periods) .pr$Expr$rolling_std( - self, wargs$window_size, weights, - wargs$min_periods, center + self, wargs$window_size, weights, wargs$min_periods, center, ddof ) |> unwrap("in $rolling_std(): ") } +#' Compute a rolling standard deviation based on another column +#' +#' @inherit Expr_rolling_min_by params return details +#' @inheritParams Expr_rolling_std +#' @examples +#' df_temporal = pl$DataFrame( +#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h") +#' )$with_row_index("index") +#' +#' df_temporal +#' +#' # Compute the rolling std with the temporal windows closed on the right (default) +#' df_temporal$with_columns( +#' rolling_row_std = pl$col("index")$rolling_std_by("date", window_size = "2h") +#' ) +#' +#' # Compute the rolling std with the closure of windows on both sides +#' df_temporal$with_columns( +#' rolling_row_std = pl$col("index")$rolling_std_by("date", window_size = "2h", closed = "both") +#' ) +Expr_rolling_std_by = function( + by, + window_size, + ..., + min_periods = 1, + closed = "right", + ddof = 1) { + .pr$Expr$rolling_std_by( + self, by, window_size, min_periods, closed, ddof + ) |> + unwrap("in $rolling_std_by():") +} + #' Rolling variance #' #' Compute the rolling (= moving) variance over the values in this array. A @@ -2428,6 +2571,7 @@ Expr_rolling_std = function( #' `weight` vector. #' #' @inherit Expr_rolling_min params details return +#' @inheritParams pl_std #' @examples #' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ #' with_columns(roll_var = pl$col("a")$rolling_var(window_size = 2)) @@ -2435,15 +2579,49 @@ Expr_rolling_var = function( window_size, weights = NULL, min_periods = NULL, - center = FALSE) { + ..., + center = FALSE, + ddof = 1) { wargs = prepare_rolling_window_args(window_size, min_periods) .pr$Expr$rolling_var( - self, wargs$window_size, weights, - wargs$min_periods, center + self, wargs$window_size, weights, wargs$min_periods, center, ddof ) |> unwrap("in $rolling_var():") } +#' Compute a rolling variance based on another column +#' +#' @inherit Expr_rolling_min_by params return details +#' @inheritParams Expr_rolling_var +#' @examples +#' df_temporal = pl$DataFrame( +#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h") +#' )$with_row_index("index") +#' +#' df_temporal +#' +#' # Compute the rolling var with the temporal windows closed on the right (default) +#' df_temporal$with_columns( +#' rolling_row_var = pl$col("index")$rolling_var_by("date", window_size = "2h") +#' ) +#' +#' # Compute the rolling var with the closure of windows on both sides +#' df_temporal$with_columns( +#' rolling_row_var = pl$col("index")$rolling_var_by("date", window_size = "2h", closed = "both") +#' ) +Expr_rolling_var_by = function( + by, + window_size, + ..., + min_periods = 1, + closed = "right", + ddof = 1) { + .pr$Expr$rolling_var_by( + self, by, window_size, min_periods, closed, ddof + ) |> + unwrap("in $rolling_var_by():") +} + #' Rolling median #' #' Compute the rolling (= moving) median over the values in this array. A window @@ -2467,6 +2645,31 @@ Expr_rolling_median = function( ) |> unwrap("in $rolling_median():") } +#' Apply a rolling median based on another column. +#' +#' @inherit Expr_rolling_min_by params return details +#' @examples +#' df_temporal = pl$DataFrame( +#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h") +#' )$with_row_index("index") +#' +#' df_temporal +#' +#' df_temporal$with_columns( +#' rolling_row_median = pl$col("index")$rolling_median_by("date", window_size = "3h") +#' ) +Expr_rolling_median_by = function( + by, + window_size, + ..., + min_periods = 1, + closed = "right") { + .pr$Expr$rolling_median_by( + self, by, window_size, min_periods, closed + ) |> + unwrap("in $rolling_median_by():") +} + #' Rolling quantile #' #' Compute the rolling (= moving) quantile over the values in this array. A @@ -2487,6 +2690,7 @@ Expr_rolling_quantile = function( window_size, weights = NULL, min_periods = NULL, + ..., center = FALSE) { wargs = prepare_rolling_window_args(window_size, min_periods) .pr$Expr$rolling_quantile( @@ -2496,6 +2700,36 @@ Expr_rolling_quantile = function( unwrap("in $rolling_quantile():") } +#' Compute a rolling quantile based on another column +#' +#' @inherit Expr_rolling_min_by params return details +#' @inheritParams Expr_quantile +#' @examples +#' df_temporal = pl$DataFrame( +#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h") +#' )$with_row_index("index") +#' +#' df_temporal +#' +#' df_temporal$with_columns( +#' rolling_row_quantile = pl$col("index")$rolling_quantile_by( +#' "date", +#' window_size = "2h", quantile = 0.3 +#' ) +#' ) +Expr_rolling_quantile_by = function( + by, + window_size, + ..., + quantile, + interpolation = "nearest", + min_periods = 1, + closed = "right") { + .pr$Expr$rolling_quantile_by( + self, by, quantile, interpolation, window_size, min_periods, closed + ) |> + unwrap("in $rolling_quantile_by():") +} #' Rolling skew #' diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index b961e6840..91ae47991 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -618,11 +618,13 @@ RPolarsExpr$rolling_sum <- function(window_size, weights, min_periods, center) . RPolarsExpr$rolling_sum_by <- function(by, window_size, min_periods, closed) .Call(wrap__RPolarsExpr__rolling_sum_by, self, by, window_size, min_periods, closed) -RPolarsExpr$rolling_std <- function(window_size, weights, min_periods, center) .Call(wrap__RPolarsExpr__rolling_std, self, window_size, weights, min_periods, center) +RPolarsExpr$rolling_std <- function(window_size, weights, min_periods, center, ddof) .Call(wrap__RPolarsExpr__rolling_std, self, window_size, weights, min_periods, center, ddof) -RPolarsExpr$rolling_std_by <- function(by, window_size, min_periods, closed) .Call(wrap__RPolarsExpr__rolling_std_by, self, by, window_size, min_periods, closed) +RPolarsExpr$rolling_std_by <- function(by, window_size, min_periods, closed, ddof) .Call(wrap__RPolarsExpr__rolling_std_by, self, by, window_size, min_periods, closed, ddof) -RPolarsExpr$rolling_var <- function(window_size, weights, min_periods, center) .Call(wrap__RPolarsExpr__rolling_var, self, window_size, weights, min_periods, center) +RPolarsExpr$rolling_var <- function(window_size, weights, min_periods, center, ddof) .Call(wrap__RPolarsExpr__rolling_var, self, window_size, weights, min_periods, center, ddof) + +RPolarsExpr$rolling_var_by <- function(by, window_size, min_periods, closed, ddof) .Call(wrap__RPolarsExpr__rolling_var_by, self, by, window_size, min_periods, closed, ddof) RPolarsExpr$rolling_median <- function(window_size, weights, min_periods, center) .Call(wrap__RPolarsExpr__rolling_median, self, window_size, weights, min_periods, center) @@ -630,6 +632,8 @@ RPolarsExpr$rolling_median_by <- function(by, window_size, min_periods, closed) RPolarsExpr$rolling_quantile <- function(quantile, interpolation, window_size, weights, min_periods, center) .Call(wrap__RPolarsExpr__rolling_quantile, self, quantile, interpolation, window_size, weights, min_periods, center) +RPolarsExpr$rolling_quantile_by <- function(by, quantile, interpolation, window_size, min_periods, closed) .Call(wrap__RPolarsExpr__rolling_quantile_by, self, by, quantile, interpolation, window_size, min_periods, closed) + RPolarsExpr$rolling_skew <- function(window_size, bias) .Call(wrap__RPolarsExpr__rolling_skew, self, window_size, bias) RPolarsExpr$abs <- function() .Call(wrap__RPolarsExpr__abs, self) diff --git a/man/Expr_rolling_max.Rd b/man/Expr_rolling_max.Rd index ae0fa7c80..a2991e4f8 100644 --- a/man/Expr_rolling_max.Rd +++ b/man/Expr_rolling_max.Rd @@ -8,6 +8,7 @@ Expr_rolling_max( window_size, weights = NULL, min_periods = NULL, + ..., center = FALSE ) } @@ -36,6 +37,8 @@ be multiplied elementwise with the values in the window.} \item{min_periods}{The number of values in the window that should be non-null before computing a result. If \code{NULL}, it will be set equal to window size.} +\item{...}{Ignored.} + \item{center}{Set the labels at the center of the window} } \value{ diff --git a/man/Expr_rolling_max_by.Rd b/man/Expr_rolling_max_by.Rd new file mode 100644 index 000000000..aea69f22b --- /dev/null +++ b/man/Expr_rolling_max_by.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{Expr_rolling_max_by} +\alias{Expr_rolling_max_by} +\title{Apply a rolling max based on another column.} +\usage{ +Expr_rolling_max_by(by, window_size, ..., min_periods = 1, closed = "right") +} +\arguments{ +\item{by}{This column must of dtype \code{\link[=pl_date]{Date}} or +\code{\link[=DataType_Datetime]{Datetime}}.} + +\item{window_size}{The length of the window. Can be a fixed integer size, or a dynamic temporal +size indicated by the following string language: +\itemize{ +\item 1ns (1 nanosecond) +\item 1us (1 microsecond) +\item 1ms (1 millisecond) +\item 1s (1 second) +\item 1m (1 minute) +\item 1h (1 hour) +\item 1d (1 day) +\item 1w (1 week) +\item 1mo (1 calendar month) +\item 1y (1 calendar year) +\item 1i (1 index count) +If the dynamic string language is used, the \code{by} and \code{closed} arguments must +also be set. +}} + +\item{...}{Ignored.} + +\item{min_periods}{The number of values in the window that should be non-null +before computing a result. If \code{NULL}, it will be set equal to window size.} + +\item{closed}{Define which sides of the temporal interval are closed +(inclusive). This can be either \code{"left"}, \code{"right"}, \code{"both"} or \code{"none"}.} +} +\value{ +Expr +} +\description{ +Apply a rolling max based on another column. +} +\details{ +If you want to compute multiple aggregation statistics over the same dynamic +window, consider using \verb{$rolling()} this method can cache the window size +computation. +} +\examples{ +df_temporal = pl$DataFrame( + date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h") +)$with_row_index("index") + +df_temporal + +df_temporal$with_columns( + rolling_row_max = pl$col("index")$rolling_max_by("date", window_size = "3h") +) +} diff --git a/man/Expr_rolling_mean.Rd b/man/Expr_rolling_mean.Rd index 250853a52..4aa2ca60f 100644 --- a/man/Expr_rolling_mean.Rd +++ b/man/Expr_rolling_mean.Rd @@ -8,6 +8,7 @@ Expr_rolling_mean( window_size, weights = NULL, min_periods = NULL, + ..., center = FALSE ) } @@ -36,6 +37,8 @@ be multiplied elementwise with the values in the window.} \item{min_periods}{The number of values in the window that should be non-null before computing a result. If \code{NULL}, it will be set equal to window size.} +\item{...}{Ignored.} + \item{center}{Set the labels at the center of the window} } \value{ diff --git a/man/Expr_rolling_mean_by.Rd b/man/Expr_rolling_mean_by.Rd new file mode 100644 index 000000000..6d6622ea7 --- /dev/null +++ b/man/Expr_rolling_mean_by.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{Expr_rolling_mean_by} +\alias{Expr_rolling_mean_by} +\title{Apply a rolling mean based on another column.} +\usage{ +Expr_rolling_mean_by(by, window_size, ..., min_periods = 1, closed = "right") +} +\arguments{ +\item{by}{This column must of dtype \code{\link[=pl_date]{Date}} or +\code{\link[=DataType_Datetime]{Datetime}}.} + +\item{window_size}{The length of the window. Can be a fixed integer size, or a dynamic temporal +size indicated by the following string language: +\itemize{ +\item 1ns (1 nanosecond) +\item 1us (1 microsecond) +\item 1ms (1 millisecond) +\item 1s (1 second) +\item 1m (1 minute) +\item 1h (1 hour) +\item 1d (1 day) +\item 1w (1 week) +\item 1mo (1 calendar month) +\item 1y (1 calendar year) +\item 1i (1 index count) +If the dynamic string language is used, the \code{by} and \code{closed} arguments must +also be set. +}} + +\item{...}{Ignored.} + +\item{min_periods}{The number of values in the window that should be non-null +before computing a result. If \code{NULL}, it will be set equal to window size.} + +\item{closed}{Define which sides of the temporal interval are closed +(inclusive). This can be either \code{"left"}, \code{"right"}, \code{"both"} or \code{"none"}.} +} +\value{ +Expr +} +\description{ +Apply a rolling mean based on another column. +} +\details{ +If you want to compute multiple aggregation statistics over the same dynamic +window, consider using \verb{$rolling()} this method can cache the window size +computation. +} +\examples{ +df_temporal = pl$DataFrame( + date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h") +)$with_row_index("index") + +df_temporal + +df_temporal$with_columns( + rolling_row_mean = pl$col("index")$rolling_mean_by("date", window_size = "3h") +) +} diff --git a/man/Expr_rolling_median_by.Rd b/man/Expr_rolling_median_by.Rd new file mode 100644 index 000000000..65ceff9ce --- /dev/null +++ b/man/Expr_rolling_median_by.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{Expr_rolling_median_by} +\alias{Expr_rolling_median_by} +\title{Apply a rolling median based on another column.} +\usage{ +Expr_rolling_median_by(by, window_size, ..., min_periods = 1, closed = "right") +} +\arguments{ +\item{by}{This column must of dtype \code{\link[=pl_date]{Date}} or +\code{\link[=DataType_Datetime]{Datetime}}.} + +\item{window_size}{The length of the window. Can be a fixed integer size, or a dynamic temporal +size indicated by the following string language: +\itemize{ +\item 1ns (1 nanosecond) +\item 1us (1 microsecond) +\item 1ms (1 millisecond) +\item 1s (1 second) +\item 1m (1 minute) +\item 1h (1 hour) +\item 1d (1 day) +\item 1w (1 week) +\item 1mo (1 calendar month) +\item 1y (1 calendar year) +\item 1i (1 index count) +If the dynamic string language is used, the \code{by} and \code{closed} arguments must +also be set. +}} + +\item{...}{Ignored.} + +\item{min_periods}{The number of values in the window that should be non-null +before computing a result. If \code{NULL}, it will be set equal to window size.} + +\item{closed}{Define which sides of the temporal interval are closed +(inclusive). This can be either \code{"left"}, \code{"right"}, \code{"both"} or \code{"none"}.} +} +\value{ +Expr +} +\description{ +Apply a rolling median based on another column. +} +\details{ +If you want to compute multiple aggregation statistics over the same dynamic +window, consider using \verb{$rolling()} this method can cache the window size +computation. +} +\examples{ +df_temporal = pl$DataFrame( + date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h") +)$with_row_index("index") + +df_temporal + +df_temporal$with_columns( + rolling_row_median = pl$col("index")$rolling_median_by("date", window_size = "3h") +) +} diff --git a/man/Expr_rolling_min.Rd b/man/Expr_rolling_min.Rd index 03d02a464..4b2416332 100644 --- a/man/Expr_rolling_min.Rd +++ b/man/Expr_rolling_min.Rd @@ -8,6 +8,7 @@ Expr_rolling_min( window_size, weights = NULL, min_periods = NULL, + ..., center = FALSE ) } @@ -36,6 +37,8 @@ be multiplied elementwise with the values in the window.} \item{min_periods}{The number of values in the window that should be non-null before computing a result. If \code{NULL}, it will be set equal to window size.} +\item{...}{Ignored.} + \item{center}{Set the labels at the center of the window} } \value{ diff --git a/man/Expr_rolling_min_by.Rd b/man/Expr_rolling_min_by.Rd new file mode 100644 index 000000000..c964c1be9 --- /dev/null +++ b/man/Expr_rolling_min_by.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{Expr_rolling_min_by} +\alias{Expr_rolling_min_by} +\title{Apply a rolling min based on another column.} +\usage{ +Expr_rolling_min_by(by, window_size, ..., min_periods = 1, closed = "right") +} +\arguments{ +\item{by}{This column must of dtype \code{\link[=pl_date]{Date}} or +\code{\link[=DataType_Datetime]{Datetime}}.} + +\item{window_size}{The length of the window. Can be a fixed integer size, or a dynamic temporal +size indicated by the following string language: +\itemize{ +\item 1ns (1 nanosecond) +\item 1us (1 microsecond) +\item 1ms (1 millisecond) +\item 1s (1 second) +\item 1m (1 minute) +\item 1h (1 hour) +\item 1d (1 day) +\item 1w (1 week) +\item 1mo (1 calendar month) +\item 1y (1 calendar year) +\item 1i (1 index count) +If the dynamic string language is used, the \code{by} and \code{closed} arguments must +also be set. +}} + +\item{...}{Ignored.} + +\item{min_periods}{The number of values in the window that should be non-null +before computing a result. If \code{NULL}, it will be set equal to window size.} + +\item{closed}{Define which sides of the temporal interval are closed +(inclusive). This can be either \code{"left"}, \code{"right"}, \code{"both"} or \code{"none"}.} +} +\value{ +Expr +} +\description{ +Apply a rolling min based on another column. +} +\details{ +If you want to compute multiple aggregation statistics over the same dynamic +window, consider using \verb{$rolling()} this method can cache the window size +computation. +} +\examples{ +df_temporal = pl$DataFrame( + date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h") +)$with_row_index("index") + +df_temporal + +df_temporal$with_columns( + rolling_row_min = pl$col("index")$rolling_min_by("date", window_size = "3h") +) +} diff --git a/man/Expr_rolling_quantile.Rd b/man/Expr_rolling_quantile.Rd index 3ea25b25c..d2c484853 100644 --- a/man/Expr_rolling_quantile.Rd +++ b/man/Expr_rolling_quantile.Rd @@ -10,6 +10,7 @@ Expr_rolling_quantile( window_size, weights = NULL, min_periods = NULL, + ..., center = FALSE ) } @@ -43,6 +44,8 @@ be multiplied elementwise with the values in the window.} \item{min_periods}{The number of values in the window that should be non-null before computing a result. If \code{NULL}, it will be set equal to window size.} +\item{...}{Ignored.} + \item{center}{Set the labels at the center of the window} } \value{ diff --git a/man/Expr_rolling_quantile_by.Rd b/man/Expr_rolling_quantile_by.Rd new file mode 100644 index 000000000..1923d7188 --- /dev/null +++ b/man/Expr_rolling_quantile_by.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{Expr_rolling_quantile_by} +\alias{Expr_rolling_quantile_by} +\title{Compute a rolling quantile based on another column} +\usage{ +Expr_rolling_quantile_by( + by, + window_size, + ..., + quantile, + interpolation = "nearest", + min_periods = 1, + closed = "right" +) +} +\arguments{ +\item{by}{This column must of dtype \code{\link[=pl_date]{Date}} or +\code{\link[=DataType_Datetime]{Datetime}}.} + +\item{window_size}{The length of the window. Can be a fixed integer size, or a dynamic temporal +size indicated by the following string language: +\itemize{ +\item 1ns (1 nanosecond) +\item 1us (1 microsecond) +\item 1ms (1 millisecond) +\item 1s (1 second) +\item 1m (1 minute) +\item 1h (1 hour) +\item 1d (1 day) +\item 1w (1 week) +\item 1mo (1 calendar month) +\item 1y (1 calendar year) +\item 1i (1 index count) +If the dynamic string language is used, the \code{by} and \code{closed} arguments must +also be set. +}} + +\item{...}{Ignored.} + +\item{quantile}{Either a numeric value or an Expr whose value must be +between 0 and 1.} + +\item{interpolation}{One of \code{"nearest"}, \code{"higher"}, \code{"lower"}, +\code{"midpoint"}, or \code{"linear"}.} + +\item{min_periods}{The number of values in the window that should be non-null +before computing a result. If \code{NULL}, it will be set equal to window size.} + +\item{closed}{Define which sides of the temporal interval are closed +(inclusive). This can be either \code{"left"}, \code{"right"}, \code{"both"} or \code{"none"}.} +} +\value{ +Expr +} +\description{ +Compute a rolling quantile based on another column +} +\details{ +If you want to compute multiple aggregation statistics over the same dynamic +window, consider using \verb{$rolling()} this method can cache the window size +computation. +} +\examples{ +df_temporal = pl$DataFrame( + date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h") +)$with_row_index("index") + +df_temporal + +df_temporal$with_columns( + rolling_row_quantile = pl$col("index")$rolling_quantile_by( + "date", + window_size = "2h", quantile = 0.3 + ) +) +} diff --git a/man/Expr_rolling_std.Rd b/man/Expr_rolling_std.Rd index 3225b5379..f30cf9084 100644 --- a/man/Expr_rolling_std.Rd +++ b/man/Expr_rolling_std.Rd @@ -8,7 +8,9 @@ Expr_rolling_std( window_size, weights = NULL, min_periods = NULL, - center = FALSE + ..., + center = FALSE, + ddof = 1 ) } \arguments{ @@ -36,7 +38,13 @@ be multiplied elementwise with the values in the window.} \item{min_periods}{The number of values in the window that should be non-null before computing a result. If \code{NULL}, it will be set equal to window size.} +\item{...}{Ignored.} + \item{center}{Set the labels at the center of the window} + +\item{ddof}{An integer representing "Delta Degrees of Freedom": +the divisor used in the calculation is \code{N - ddof}, +where \code{N} represents the number of elements. By default ddof is \code{1}.} } \value{ Expr diff --git a/man/Expr_rolling_std_by.Rd b/man/Expr_rolling_std_by.Rd new file mode 100644 index 000000000..c149b7a99 --- /dev/null +++ b/man/Expr_rolling_std_by.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{Expr_rolling_std_by} +\alias{Expr_rolling_std_by} +\title{Compute a rolling standard deviation based on another column} +\usage{ +Expr_rolling_std_by( + by, + window_size, + ..., + min_periods = 1, + closed = "right", + ddof = 1 +) +} +\arguments{ +\item{by}{This column must of dtype \code{\link[=pl_date]{Date}} or +\code{\link[=DataType_Datetime]{Datetime}}.} + +\item{window_size}{The length of the window. Can be a fixed integer size, or a dynamic temporal +size indicated by the following string language: +\itemize{ +\item 1ns (1 nanosecond) +\item 1us (1 microsecond) +\item 1ms (1 millisecond) +\item 1s (1 second) +\item 1m (1 minute) +\item 1h (1 hour) +\item 1d (1 day) +\item 1w (1 week) +\item 1mo (1 calendar month) +\item 1y (1 calendar year) +\item 1i (1 index count) +If the dynamic string language is used, the \code{by} and \code{closed} arguments must +also be set. +}} + +\item{...}{Ignored.} + +\item{min_periods}{The number of values in the window that should be non-null +before computing a result. If \code{NULL}, it will be set equal to window size.} + +\item{closed}{Define which sides of the temporal interval are closed +(inclusive). This can be either \code{"left"}, \code{"right"}, \code{"both"} or \code{"none"}.} + +\item{ddof}{An integer representing "Delta Degrees of Freedom": +the divisor used in the calculation is \code{N - ddof}, +where \code{N} represents the number of elements. By default ddof is \code{1}.} +} +\value{ +Expr +} +\description{ +Compute a rolling standard deviation based on another column +} +\details{ +If you want to compute multiple aggregation statistics over the same dynamic +window, consider using \verb{$rolling()} this method can cache the window size +computation. +} +\examples{ +df_temporal = pl$DataFrame( + date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h") +)$with_row_index("index") + +df_temporal + +# Compute the rolling std with the temporal windows closed on the right (default) +df_temporal$with_columns( + rolling_row_std = pl$col("index")$rolling_std_by("date", window_size = "2h") +) + +# Compute the rolling std with the closure of windows on both sides +df_temporal$with_columns( + rolling_row_std = pl$col("index")$rolling_std_by("date", window_size = "2h", closed = "both") +) +} diff --git a/man/Expr_rolling_sum_by.Rd b/man/Expr_rolling_sum_by.Rd new file mode 100644 index 000000000..734b73c08 --- /dev/null +++ b/man/Expr_rolling_sum_by.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{Expr_rolling_sum_by} +\alias{Expr_rolling_sum_by} +\title{Apply a rolling sum based on another column.} +\usage{ +Expr_rolling_sum_by(by, window_size, ..., min_periods = 1, closed = "right") +} +\arguments{ +\item{by}{This column must of dtype \code{\link[=pl_date]{Date}} or +\code{\link[=DataType_Datetime]{Datetime}}.} + +\item{window_size}{The length of the window. Can be a fixed integer size, or a dynamic temporal +size indicated by the following string language: +\itemize{ +\item 1ns (1 nanosecond) +\item 1us (1 microsecond) +\item 1ms (1 millisecond) +\item 1s (1 second) +\item 1m (1 minute) +\item 1h (1 hour) +\item 1d (1 day) +\item 1w (1 week) +\item 1mo (1 calendar month) +\item 1y (1 calendar year) +\item 1i (1 index count) +If the dynamic string language is used, the \code{by} and \code{closed} arguments must +also be set. +}} + +\item{...}{Ignored.} + +\item{min_periods}{The number of values in the window that should be non-null +before computing a result. If \code{NULL}, it will be set equal to window size.} + +\item{closed}{Define which sides of the temporal interval are closed +(inclusive). This can be either \code{"left"}, \code{"right"}, \code{"both"} or \code{"none"}.} +} +\value{ +Expr +} +\description{ +Apply a rolling sum based on another column. +} +\details{ +If you want to compute multiple aggregation statistics over the same dynamic +window, consider using \verb{$rolling()} this method can cache the window size +computation. +} +\examples{ +df_temporal = pl$DataFrame( + date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h") +)$with_row_index("index") + +df_temporal + +df_temporal$with_columns( + rolling_row_sum = pl$col("index")$rolling_sum_by("date", window_size = "3h") +) +} diff --git a/man/Expr_rolling_var.Rd b/man/Expr_rolling_var.Rd index 574e1d7ee..5d4472dec 100644 --- a/man/Expr_rolling_var.Rd +++ b/man/Expr_rolling_var.Rd @@ -8,7 +8,9 @@ Expr_rolling_var( window_size, weights = NULL, min_periods = NULL, - center = FALSE + ..., + center = FALSE, + ddof = 1 ) } \arguments{ @@ -36,7 +38,13 @@ be multiplied elementwise with the values in the window.} \item{min_periods}{The number of values in the window that should be non-null before computing a result. If \code{NULL}, it will be set equal to window size.} +\item{...}{Ignored.} + \item{center}{Set the labels at the center of the window} + +\item{ddof}{An integer representing "Delta Degrees of Freedom": +the divisor used in the calculation is \code{N - ddof}, +where \code{N} represents the number of elements. By default ddof is \code{1}.} } \value{ Expr diff --git a/man/Expr_rolling_var_by.Rd b/man/Expr_rolling_var_by.Rd new file mode 100644 index 000000000..677a3d880 --- /dev/null +++ b/man/Expr_rolling_var_by.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{Expr_rolling_var_by} +\alias{Expr_rolling_var_by} +\title{Compute a rolling variance based on another column} +\usage{ +Expr_rolling_var_by( + by, + window_size, + ..., + min_periods = 1, + closed = "right", + ddof = 1 +) +} +\arguments{ +\item{by}{This column must of dtype \code{\link[=pl_date]{Date}} or +\code{\link[=DataType_Datetime]{Datetime}}.} + +\item{window_size}{The length of the window. Can be a fixed integer size, or a dynamic temporal +size indicated by the following string language: +\itemize{ +\item 1ns (1 nanosecond) +\item 1us (1 microsecond) +\item 1ms (1 millisecond) +\item 1s (1 second) +\item 1m (1 minute) +\item 1h (1 hour) +\item 1d (1 day) +\item 1w (1 week) +\item 1mo (1 calendar month) +\item 1y (1 calendar year) +\item 1i (1 index count) +If the dynamic string language is used, the \code{by} and \code{closed} arguments must +also be set. +}} + +\item{...}{Ignored.} + +\item{min_periods}{The number of values in the window that should be non-null +before computing a result. If \code{NULL}, it will be set equal to window size.} + +\item{closed}{Define which sides of the temporal interval are closed +(inclusive). This can be either \code{"left"}, \code{"right"}, \code{"both"} or \code{"none"}.} + +\item{ddof}{An integer representing "Delta Degrees of Freedom": +the divisor used in the calculation is \code{N - ddof}, +where \code{N} represents the number of elements. By default ddof is \code{1}.} +} +\value{ +Expr +} +\description{ +Compute a rolling variance based on another column +} +\details{ +If you want to compute multiple aggregation statistics over the same dynamic +window, consider using \verb{$rolling()} this method can cache the window size +computation. +} +\examples{ +df_temporal = pl$DataFrame( + date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h") +)$with_row_index("index") + +df_temporal + +# Compute the rolling var with the temporal windows closed on the right (default) +df_temporal$with_columns( + rolling_row_var = pl$col("index")$rolling_var_by("date", window_size = "2h") +) + +# Compute the rolling var with the closure of windows on both sides +df_temporal$with_columns( + rolling_row_var = pl$col("index")$rolling_var_by("date", window_size = "2h", closed = "both") +) +} diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index 8f1d8b14d..129f1398e 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -18,11 +18,11 @@ use pl::{Duration, IntoSeries, RollingGroupOptions, SetOperation, TemporalMethod use polars::lazy::dsl; use polars::prelude as pl; use polars::prelude::{ExprEvalExtension, NestedType, SortOptions}; +use std::any::Any; use std::ops::{Add, Div, Mul, Rem, Sub}; use std::result::Result; pub type NameGenerator = pl::Arc String + Send + Sync>; use crate::rdatatype::robjs_to_ewm_options; - use crate::utils::r_expr_to_rust_expr; use crate::utils::unpack_r_eval; use smartstring::{LazyCompact, SmartString}; @@ -525,6 +525,7 @@ impl RPolarsExpr { weights, min_periods, center, + None, )?) .into()) } @@ -539,8 +540,8 @@ impl RPolarsExpr { .0 .clone() .rolling_min_by( - robj_to!(PLExpr, by)?, - make_rolling_options_dynamic_window(window_size, min_periods, closed)?, + robj_to!(PLExprCol, by)?, + make_rolling_options_dynamic_window(window_size, min_periods, closed, None)?, ) .into()) } @@ -561,6 +562,7 @@ impl RPolarsExpr { weights, min_periods, center, + None, )?) .into()) } @@ -576,8 +578,8 @@ impl RPolarsExpr { .0 .clone() .rolling_max_by( - robj_to!(PLExpr, by)?, - make_rolling_options_dynamic_window(window_size, min_periods, closed)?, + robj_to!(PLExprCol, by)?, + make_rolling_options_dynamic_window(window_size, min_periods, closed, None)?, ) .into()) } @@ -598,6 +600,7 @@ impl RPolarsExpr { weights, min_periods, center, + None, )?) .into()) } @@ -613,8 +616,8 @@ impl RPolarsExpr { .0 .clone() .rolling_mean_by( - robj_to!(PLExpr, by)?, - make_rolling_options_dynamic_window(window_size, min_periods, closed)?, + robj_to!(PLExprCol, by)?, + make_rolling_options_dynamic_window(window_size, min_periods, closed, None)?, ) .into()) } @@ -635,6 +638,7 @@ impl RPolarsExpr { weights, min_periods, center, + None, )?) .into()) } @@ -650,8 +654,8 @@ impl RPolarsExpr { .0 .clone() .rolling_sum_by( - robj_to!(PLExpr, by)?, - make_rolling_options_dynamic_window(window_size, min_periods, closed)?, + robj_to!(PLExprCol, by)?, + make_rolling_options_dynamic_window(window_size, min_periods, closed, None)?, ) .into()) } @@ -663,7 +667,10 @@ impl RPolarsExpr { weights: Robj, min_periods: Robj, center: Robj, + ddof: Robj, ) -> RResult { + let ddof = robj_to!(u8, ddof)?; + Ok(self .0 .clone() @@ -672,25 +679,32 @@ impl RPolarsExpr { weights, min_periods, center, + Some(Arc::new(pl::RollingVarParams { ddof }) as Arc), )?) .into()) } - // TODO: in make_rolling_options_dynamic_window(), the fn_params - // arg is different for this one fn rolling_std_by( &self, by: Robj, window_size: &str, min_periods: Robj, closed: Robj, + ddof: Robj, ) -> RResult { + let ddof = robj_to!(u8, ddof)?; + Ok(self .0 .clone() .rolling_std_by( - robj_to!(PLExpr, by)?, - make_rolling_options_dynamic_window(window_size, min_periods, closed)?, + robj_to!(PLExprCol, by)?, + make_rolling_options_dynamic_window( + window_size, + min_periods, + closed, + Some(Arc::new(pl::RollingVarParams { ddof }) as Arc), + )?, ) .into()) } @@ -702,7 +716,10 @@ impl RPolarsExpr { weights: Robj, min_periods: Robj, center: Robj, + ddof: Robj, ) -> RResult { + let ddof = robj_to!(u8, ddof)?; + Ok(self .0 .clone() @@ -711,28 +728,35 @@ impl RPolarsExpr { weights, min_periods, center, + Some(Arc::new(pl::RollingVarParams { ddof }) as Arc), )?) .into()) } - // TODO: in make_rolling_options_dynamic_window(), the fn_params - // arg is different for this one - // fn rolling_var_by( - // &self, - // by: Robj, - // window_size: &str, - // min_periods: Robj, - // closed: Robj, - // ) -> RResult { - // Ok(self - // .0 - // .clone() - // .rolling_var_by( - // robj_to!(PLExpr, by)?, - // make_rolling_options_dynamic_window(window_size, min_periods, closed)?, - // ) - // .into()) - // } + fn rolling_var_by( + &self, + by: Robj, + window_size: &str, + min_periods: Robj, + closed: Robj, + ddof: Robj, + ) -> RResult { + let ddof = robj_to!(u8, ddof)?; + + Ok(self + .0 + .clone() + .rolling_var_by( + robj_to!(PLExprCol, by)?, + make_rolling_options_dynamic_window( + window_size, + min_periods, + closed, + Some(Arc::new(pl::RollingVarParams { ddof }) as Arc), + )?, + ) + .into()) + } #[allow(clippy::too_many_arguments)] pub fn rolling_median( @@ -750,6 +774,7 @@ impl RPolarsExpr { weights, min_periods, center, + None, )?) .into()) } @@ -765,8 +790,8 @@ impl RPolarsExpr { .0 .clone() .rolling_median_by( - robj_to!(PLExpr, by)?, - make_rolling_options_dynamic_window(window_size, min_periods, closed)?, + robj_to!(PLExprCol, by)?, + make_rolling_options_dynamic_window(window_size, min_periods, closed, None)?, ) .into()) } @@ -798,23 +823,29 @@ impl RPolarsExpr { .into()) } - // TODO: this takes args for interpolation - // fn rolling_quantile_by( - // &self, - // by: Robj, - // window_size: &str, - // min_periods: Robj, - // closed: Robj, - // ) -> RResult { - // Ok(self - // .0 - // .clone() - // .rolling_quantile_by( - // robj_to!(PLExpr, by)?, - // make_rolling_options_dynamic_window(window_size, min_periods, closed)?, - // ) - // .into()) - // } + fn rolling_quantile_by( + &self, + by: Robj, + quantile: Robj, + interpolation: Robj, + window_size: &str, + min_periods: Robj, + closed: Robj, + ) -> RResult { + let quantile = robj_to!(f64, quantile)?; + let interpolation = robj_to!(quantile_interpolation_option, interpolation)?; + + Ok(self + .0 + .clone() + .rolling_quantile_by( + robj_to!(PLExprCol, by)?, + interpolation, + quantile, + make_rolling_options_dynamic_window(window_size, min_periods, closed, None)?, + ) + .into()) + } pub fn rolling_skew(&self, window_size: f64, bias: bool) -> RResult { Ok(self @@ -2733,13 +2764,14 @@ pub fn make_rolling_options_fixed_window( weights: Robj, min_periods: Robj, center: Robj, + fn_params: Option>, ) -> RResult { Ok(pl::RollingOptionsFixedWindow { window_size: robj_to!(usize, window_size)?, weights: robj_to!(Option, Vec, f64, weights)?, min_periods: robj_to!(usize, min_periods)?, center: robj_to!(bool, center)?, - ..Default::default() + fn_params, }) } @@ -2747,12 +2779,13 @@ pub fn make_rolling_options_dynamic_window( window_size: &str, min_periods: Robj, closed_window: Robj, + fn_params: Option>, ) -> RResult { Ok(pl::RollingOptionsDynamicWindow { window_size: Duration::parse(window_size), min_periods: robj_to!(usize, min_periods)?, closed_window: robj_to!(ClosedWindow, closed_window)?, - fn_params: None, + fn_params, }) } diff --git a/tests/testthat/_snaps/after-wrappers.md b/tests/testthat/_snaps/after-wrappers.md index 7307f15a8..6dac0553b 100644 --- a/tests/testthat/_snaps/after-wrappers.md +++ b/tests/testthat/_snaps/after-wrappers.md @@ -215,65 +215,68 @@ Code ls(.pr$env[[class_name]]) Output - [1] "abs" "add" "agg_groups" - [4] "alias" "all" "and" - [7] "any" "append" "approx_n_unique" - [10] "arccos" "arccosh" "arcsin" - [13] "arcsinh" "arctan" "arctanh" - [16] "arg_max" "arg_min" "arg_sort" - [19] "arg_unique" "arr" "backward_fill" - [22] "bin" "bottom_k" "cast" - [25] "cat" "ceil" "clip" - [28] "clip_max" "clip_min" "cos" - [31] "cosh" "count" "cum_count" - [34] "cum_max" "cum_min" "cum_prod" - [37] "cum_sum" "cumulative_eval" "cut" - [40] "diff" "div" "dot" - [43] "drop_nans" "drop_nulls" "dt" - [46] "entropy" "eq" "eq_missing" - [49] "ewm_mean" "ewm_std" "ewm_var" - [52] "exclude" "exp" "explode" - [55] "extend_constant" "fill_nan" "fill_null" - [58] "filter" "first" "flatten" - [61] "floor" "floor_div" "forward_fill" - [64] "gather" "gather_every" "gt" - [67] "gt_eq" "hash" "head" - [70] "implode" "inspect" "interpolate" - [73] "is_between" "is_duplicated" "is_finite" - [76] "is_first_distinct" "is_in" "is_infinite" - [79] "is_last_distinct" "is_nan" "is_not_nan" - [82] "is_not_null" "is_null" "is_unique" - [85] "kurtosis" "last" "len" - [88] "limit" "list" "log" - [91] "log10" "lower_bound" "lt" - [94] "lt_eq" "map_batches" "map_elements" - [97] "max" "mean" "median" - [100] "meta" "min" "mod" - [103] "mode" "mul" "n_unique" - [106] "name" "nan_max" "nan_min" - [109] "neq" "neq_missing" "not" - [112] "null_count" "or" "over" - [115] "pct_change" "peak_max" "peak_min" - [118] "pow" "print" "product" - [121] "qcut" "quantile" "rank" - [124] "rechunk" "reinterpret" "rep" - [127] "repeat_by" "replace" "reshape" - [130] "reverse" "rle" "rle_id" - [133] "rolling" "rolling_max" "rolling_mean" - [136] "rolling_median" "rolling_min" "rolling_quantile" - [139] "rolling_skew" "rolling_std" "rolling_sum" - [142] "rolling_var" "round" "sample" - [145] "search_sorted" "set_sorted" "shift" - [148] "shift_and_fill" "shrink_dtype" "shuffle" - [151] "sign" "sin" "sinh" - [154] "skew" "slice" "sort" - [157] "sort_by" "sqrt" "std" - [160] "str" "struct" "sub" - [163] "sum" "tail" "tan" - [166] "tanh" "to_physical" "to_r" - [169] "to_series" "top_k" "unique" - [172] "unique_counts" "upper_bound" "value_counts" - [175] "var" "xor" + [1] "abs" "add" "agg_groups" + [4] "alias" "all" "and" + [7] "any" "append" "approx_n_unique" + [10] "arccos" "arccosh" "arcsin" + [13] "arcsinh" "arctan" "arctanh" + [16] "arg_max" "arg_min" "arg_sort" + [19] "arg_unique" "arr" "backward_fill" + [22] "bin" "bottom_k" "cast" + [25] "cat" "ceil" "clip" + [28] "clip_max" "clip_min" "cos" + [31] "cosh" "count" "cum_count" + [34] "cum_max" "cum_min" "cum_prod" + [37] "cum_sum" "cumulative_eval" "cut" + [40] "diff" "div" "dot" + [43] "drop_nans" "drop_nulls" "dt" + [46] "entropy" "eq" "eq_missing" + [49] "ewm_mean" "ewm_std" "ewm_var" + [52] "exclude" "exp" "explode" + [55] "extend_constant" "fill_nan" "fill_null" + [58] "filter" "first" "flatten" + [61] "floor" "floor_div" "forward_fill" + [64] "gather" "gather_every" "gt" + [67] "gt_eq" "hash" "head" + [70] "implode" "inspect" "interpolate" + [73] "is_between" "is_duplicated" "is_finite" + [76] "is_first_distinct" "is_in" "is_infinite" + [79] "is_last_distinct" "is_nan" "is_not_nan" + [82] "is_not_null" "is_null" "is_unique" + [85] "kurtosis" "last" "len" + [88] "limit" "list" "log" + [91] "log10" "lower_bound" "lt" + [94] "lt_eq" "map_batches" "map_elements" + [97] "max" "mean" "median" + [100] "meta" "min" "mod" + [103] "mode" "mul" "n_unique" + [106] "name" "nan_max" "nan_min" + [109] "neq" "neq_missing" "not" + [112] "null_count" "or" "over" + [115] "pct_change" "peak_max" "peak_min" + [118] "pow" "print" "product" + [121] "qcut" "quantile" "rank" + [124] "rechunk" "reinterpret" "rep" + [127] "repeat_by" "replace" "reshape" + [130] "reverse" "rle" "rle_id" + [133] "rolling" "rolling_max" "rolling_max_by" + [136] "rolling_mean" "rolling_mean_by" "rolling_median" + [139] "rolling_median_by" "rolling_min" "rolling_min_by" + [142] "rolling_quantile" "rolling_quantile_by" "rolling_skew" + [145] "rolling_std" "rolling_std_by" "rolling_sum" + [148] "rolling_sum_by" "rolling_var" "rolling_var_by" + [151] "round" "sample" "search_sorted" + [154] "set_sorted" "shift" "shift_and_fill" + [157] "shrink_dtype" "shuffle" "sign" + [160] "sin" "sinh" "skew" + [163] "slice" "sort" "sort_by" + [166] "sqrt" "std" "str" + [169] "struct" "sub" "sum" + [172] "tail" "tan" "tanh" + [175] "to_physical" "to_r" "to_series" + [178] "top_k" "unique" "unique_counts" + [181] "upper_bound" "value_counts" "var" + [184] "xor" --- @@ -405,47 +408,48 @@ [245] "rolling_mean" "rolling_mean_by" [247] "rolling_median" "rolling_median_by" [249] "rolling_min" "rolling_min_by" - [251] "rolling_quantile" "rolling_skew" - [253] "rolling_std" "rolling_std_by" - [255] "rolling_sum" "rolling_sum_by" - [257] "rolling_var" "round" - [259] "sample_frac" "sample_n" - [261] "search_sorted" "shift" - [263] "shift_and_fill" "shrink_dtype" - [265] "shuffle" "sign" - [267] "sin" "sinh" - [269] "skew" "slice" - [271] "sort_by" "sort_with" - [273] "std" "str_base64_decode" - [275] "str_base64_encode" "str_concat" - [277] "str_contains" "str_contains_any" - [279] "str_count_matches" "str_ends_with" - [281] "str_extract" "str_extract_all" - [283] "str_extract_groups" "str_find" - [285] "str_head" "str_hex_decode" - [287] "str_hex_encode" "str_json_decode" - [289] "str_json_path_match" "str_len_bytes" - [291] "str_len_chars" "str_pad_end" - [293] "str_pad_start" "str_replace" - [295] "str_replace_all" "str_replace_many" - [297] "str_reverse" "str_slice" - [299] "str_split" "str_split_exact" - [301] "str_splitn" "str_starts_with" - [303] "str_strip_chars" "str_strip_chars_end" - [305] "str_strip_chars_start" "str_tail" - [307] "str_to_date" "str_to_datetime" - [309] "str_to_integer" "str_to_lowercase" - [311] "str_to_time" "str_to_titlecase" - [313] "str_to_uppercase" "str_zfill" - [315] "struct_field_by_name" "struct_rename_fields" - [317] "struct_with_fields" "sub" - [319] "sum" "tail" - [321] "tan" "tanh" - [323] "to_physical" "top_k" - [325] "unique" "unique_counts" - [327] "unique_stable" "upper_bound" - [329] "value_counts" "var" - [331] "xor" + [251] "rolling_quantile" "rolling_quantile_by" + [253] "rolling_skew" "rolling_std" + [255] "rolling_std_by" "rolling_sum" + [257] "rolling_sum_by" "rolling_var" + [259] "rolling_var_by" "round" + [261] "sample_frac" "sample_n" + [263] "search_sorted" "shift" + [265] "shift_and_fill" "shrink_dtype" + [267] "shuffle" "sign" + [269] "sin" "sinh" + [271] "skew" "slice" + [273] "sort_by" "sort_with" + [275] "std" "str_base64_decode" + [277] "str_base64_encode" "str_concat" + [279] "str_contains" "str_contains_any" + [281] "str_count_matches" "str_ends_with" + [283] "str_extract" "str_extract_all" + [285] "str_extract_groups" "str_find" + [287] "str_head" "str_hex_decode" + [289] "str_hex_encode" "str_json_decode" + [291] "str_json_path_match" "str_len_bytes" + [293] "str_len_chars" "str_pad_end" + [295] "str_pad_start" "str_replace" + [297] "str_replace_all" "str_replace_many" + [299] "str_reverse" "str_slice" + [301] "str_split" "str_split_exact" + [303] "str_splitn" "str_starts_with" + [305] "str_strip_chars" "str_strip_chars_end" + [307] "str_strip_chars_start" "str_tail" + [309] "str_to_date" "str_to_datetime" + [311] "str_to_integer" "str_to_lowercase" + [313] "str_to_time" "str_to_titlecase" + [315] "str_to_uppercase" "str_zfill" + [317] "struct_field_by_name" "struct_rename_fields" + [319] "struct_with_fields" "sub" + [321] "sum" "tail" + [323] "tan" "tanh" + [325] "to_physical" "top_k" + [327] "unique" "unique_counts" + [329] "unique_stable" "upper_bound" + [331] "value_counts" "var" + [333] "xor" # public and private methods of each class When @@ -466,66 +470,68 @@ Code ls(.pr$env[[class_name]]) Output - [1] "abs" "add" "agg_groups" - [4] "alias" "all" "and" - [7] "any" "append" "approx_n_unique" - [10] "arccos" "arccosh" "arcsin" - [13] "arcsinh" "arctan" "arctanh" - [16] "arg_max" "arg_min" "arg_sort" - [19] "arg_unique" "arr" "backward_fill" - [22] "bin" "bottom_k" "cast" - [25] "cat" "ceil" "clip" - [28] "clip_max" "clip_min" "cos" - [31] "cosh" "count" "cum_count" - [34] "cum_max" "cum_min" "cum_prod" - [37] "cum_sum" "cumulative_eval" "cut" - [40] "diff" "div" "dot" - [43] "drop_nans" "drop_nulls" "dt" - [46] "entropy" "eq" "eq_missing" - [49] "ewm_mean" "ewm_std" "ewm_var" - [52] "exclude" "exp" "explode" - [55] "extend_constant" "fill_nan" "fill_null" - [58] "filter" "first" "flatten" - [61] "floor" "floor_div" "forward_fill" - [64] "gather" "gather_every" "gt" - [67] "gt_eq" "hash" "head" - [70] "implode" "inspect" "interpolate" - [73] "is_between" "is_duplicated" "is_finite" - [76] "is_first_distinct" "is_in" "is_infinite" - [79] "is_last_distinct" "is_nan" "is_not_nan" - [82] "is_not_null" "is_null" "is_unique" - [85] "kurtosis" "last" "len" - [88] "limit" "list" "log" - [91] "log10" "lower_bound" "lt" - [94] "lt_eq" "map_batches" "map_elements" - [97] "max" "mean" "median" - [100] "meta" "min" "mod" - [103] "mode" "mul" "n_unique" - [106] "name" "nan_max" "nan_min" - [109] "neq" "neq_missing" "not" - [112] "null_count" "or" "otherwise" - [115] "over" "pct_change" "peak_max" - [118] "peak_min" "pow" "print" - [121] "product" "qcut" "quantile" - [124] "rank" "rechunk" "reinterpret" - [127] "rep" "repeat_by" "replace" - [130] "reshape" "reverse" "rle" - [133] "rle_id" "rolling" "rolling_max" - [136] "rolling_mean" "rolling_median" "rolling_min" - [139] "rolling_quantile" "rolling_skew" "rolling_std" - [142] "rolling_sum" "rolling_var" "round" - [145] "sample" "search_sorted" "set_sorted" - [148] "shift" "shift_and_fill" "shrink_dtype" - [151] "shuffle" "sign" "sin" - [154] "sinh" "skew" "slice" - [157] "sort" "sort_by" "sqrt" - [160] "std" "str" "struct" - [163] "sub" "sum" "tail" - [166] "tan" "tanh" "to_physical" - [169] "to_r" "to_series" "top_k" - [172] "unique" "unique_counts" "upper_bound" - [175] "value_counts" "var" "when" - [178] "xor" + [1] "abs" "add" "agg_groups" + [4] "alias" "all" "and" + [7] "any" "append" "approx_n_unique" + [10] "arccos" "arccosh" "arcsin" + [13] "arcsinh" "arctan" "arctanh" + [16] "arg_max" "arg_min" "arg_sort" + [19] "arg_unique" "arr" "backward_fill" + [22] "bin" "bottom_k" "cast" + [25] "cat" "ceil" "clip" + [28] "clip_max" "clip_min" "cos" + [31] "cosh" "count" "cum_count" + [34] "cum_max" "cum_min" "cum_prod" + [37] "cum_sum" "cumulative_eval" "cut" + [40] "diff" "div" "dot" + [43] "drop_nans" "drop_nulls" "dt" + [46] "entropy" "eq" "eq_missing" + [49] "ewm_mean" "ewm_std" "ewm_var" + [52] "exclude" "exp" "explode" + [55] "extend_constant" "fill_nan" "fill_null" + [58] "filter" "first" "flatten" + [61] "floor" "floor_div" "forward_fill" + [64] "gather" "gather_every" "gt" + [67] "gt_eq" "hash" "head" + [70] "implode" "inspect" "interpolate" + [73] "is_between" "is_duplicated" "is_finite" + [76] "is_first_distinct" "is_in" "is_infinite" + [79] "is_last_distinct" "is_nan" "is_not_nan" + [82] "is_not_null" "is_null" "is_unique" + [85] "kurtosis" "last" "len" + [88] "limit" "list" "log" + [91] "log10" "lower_bound" "lt" + [94] "lt_eq" "map_batches" "map_elements" + [97] "max" "mean" "median" + [100] "meta" "min" "mod" + [103] "mode" "mul" "n_unique" + [106] "name" "nan_max" "nan_min" + [109] "neq" "neq_missing" "not" + [112] "null_count" "or" "otherwise" + [115] "over" "pct_change" "peak_max" + [118] "peak_min" "pow" "print" + [121] "product" "qcut" "quantile" + [124] "rank" "rechunk" "reinterpret" + [127] "rep" "repeat_by" "replace" + [130] "reshape" "reverse" "rle" + [133] "rle_id" "rolling" "rolling_max" + [136] "rolling_max_by" "rolling_mean" "rolling_mean_by" + [139] "rolling_median" "rolling_median_by" "rolling_min" + [142] "rolling_min_by" "rolling_quantile" "rolling_quantile_by" + [145] "rolling_skew" "rolling_std" "rolling_std_by" + [148] "rolling_sum" "rolling_sum_by" "rolling_var" + [151] "rolling_var_by" "round" "sample" + [154] "search_sorted" "set_sorted" "shift" + [157] "shift_and_fill" "shrink_dtype" "shuffle" + [160] "sign" "sin" "sinh" + [163] "skew" "slice" "sort" + [166] "sort_by" "sqrt" "std" + [169] "str" "struct" "sub" + [172] "sum" "tail" "tan" + [175] "tanh" "to_physical" "to_r" + [178] "to_series" "top_k" "unique" + [181] "unique_counts" "upper_bound" "value_counts" + [184] "var" "when" "xor" --- @@ -553,66 +559,68 @@ Code ls(.pr$env[[class_name]]) Output - [1] "abs" "add" "agg_groups" - [4] "alias" "all" "and" - [7] "any" "append" "approx_n_unique" - [10] "arccos" "arccosh" "arcsin" - [13] "arcsinh" "arctan" "arctanh" - [16] "arg_max" "arg_min" "arg_sort" - [19] "arg_unique" "arr" "backward_fill" - [22] "bin" "bottom_k" "cast" - [25] "cat" "ceil" "clip" - [28] "clip_max" "clip_min" "cos" - [31] "cosh" "count" "cum_count" - [34] "cum_max" "cum_min" "cum_prod" - [37] "cum_sum" "cumulative_eval" "cut" - [40] "diff" "div" "dot" - [43] "drop_nans" "drop_nulls" "dt" - [46] "entropy" "eq" "eq_missing" - [49] "ewm_mean" "ewm_std" "ewm_var" - [52] "exclude" "exp" "explode" - [55] "extend_constant" "fill_nan" "fill_null" - [58] "filter" "first" "flatten" - [61] "floor" "floor_div" "forward_fill" - [64] "gather" "gather_every" "gt" - [67] "gt_eq" "hash" "head" - [70] "implode" "inspect" "interpolate" - [73] "is_between" "is_duplicated" "is_finite" - [76] "is_first_distinct" "is_in" "is_infinite" - [79] "is_last_distinct" "is_nan" "is_not_nan" - [82] "is_not_null" "is_null" "is_unique" - [85] "kurtosis" "last" "len" - [88] "limit" "list" "log" - [91] "log10" "lower_bound" "lt" - [94] "lt_eq" "map_batches" "map_elements" - [97] "max" "mean" "median" - [100] "meta" "min" "mod" - [103] "mode" "mul" "n_unique" - [106] "name" "nan_max" "nan_min" - [109] "neq" "neq_missing" "not" - [112] "null_count" "or" "otherwise" - [115] "over" "pct_change" "peak_max" - [118] "peak_min" "pow" "print" - [121] "product" "qcut" "quantile" - [124] "rank" "rechunk" "reinterpret" - [127] "rep" "repeat_by" "replace" - [130] "reshape" "reverse" "rle" - [133] "rle_id" "rolling" "rolling_max" - [136] "rolling_mean" "rolling_median" "rolling_min" - [139] "rolling_quantile" "rolling_skew" "rolling_std" - [142] "rolling_sum" "rolling_var" "round" - [145] "sample" "search_sorted" "set_sorted" - [148] "shift" "shift_and_fill" "shrink_dtype" - [151] "shuffle" "sign" "sin" - [154] "sinh" "skew" "slice" - [157] "sort" "sort_by" "sqrt" - [160] "std" "str" "struct" - [163] "sub" "sum" "tail" - [166] "tan" "tanh" "to_physical" - [169] "to_r" "to_series" "top_k" - [172] "unique" "unique_counts" "upper_bound" - [175] "value_counts" "var" "when" - [178] "xor" + [1] "abs" "add" "agg_groups" + [4] "alias" "all" "and" + [7] "any" "append" "approx_n_unique" + [10] "arccos" "arccosh" "arcsin" + [13] "arcsinh" "arctan" "arctanh" + [16] "arg_max" "arg_min" "arg_sort" + [19] "arg_unique" "arr" "backward_fill" + [22] "bin" "bottom_k" "cast" + [25] "cat" "ceil" "clip" + [28] "clip_max" "clip_min" "cos" + [31] "cosh" "count" "cum_count" + [34] "cum_max" "cum_min" "cum_prod" + [37] "cum_sum" "cumulative_eval" "cut" + [40] "diff" "div" "dot" + [43] "drop_nans" "drop_nulls" "dt" + [46] "entropy" "eq" "eq_missing" + [49] "ewm_mean" "ewm_std" "ewm_var" + [52] "exclude" "exp" "explode" + [55] "extend_constant" "fill_nan" "fill_null" + [58] "filter" "first" "flatten" + [61] "floor" "floor_div" "forward_fill" + [64] "gather" "gather_every" "gt" + [67] "gt_eq" "hash" "head" + [70] "implode" "inspect" "interpolate" + [73] "is_between" "is_duplicated" "is_finite" + [76] "is_first_distinct" "is_in" "is_infinite" + [79] "is_last_distinct" "is_nan" "is_not_nan" + [82] "is_not_null" "is_null" "is_unique" + [85] "kurtosis" "last" "len" + [88] "limit" "list" "log" + [91] "log10" "lower_bound" "lt" + [94] "lt_eq" "map_batches" "map_elements" + [97] "max" "mean" "median" + [100] "meta" "min" "mod" + [103] "mode" "mul" "n_unique" + [106] "name" "nan_max" "nan_min" + [109] "neq" "neq_missing" "not" + [112] "null_count" "or" "otherwise" + [115] "over" "pct_change" "peak_max" + [118] "peak_min" "pow" "print" + [121] "product" "qcut" "quantile" + [124] "rank" "rechunk" "reinterpret" + [127] "rep" "repeat_by" "replace" + [130] "reshape" "reverse" "rle" + [133] "rle_id" "rolling" "rolling_max" + [136] "rolling_max_by" "rolling_mean" "rolling_mean_by" + [139] "rolling_median" "rolling_median_by" "rolling_min" + [142] "rolling_min_by" "rolling_quantile" "rolling_quantile_by" + [145] "rolling_skew" "rolling_std" "rolling_std_by" + [148] "rolling_sum" "rolling_sum_by" "rolling_var" + [151] "rolling_var_by" "round" "sample" + [154] "search_sorted" "set_sorted" "shift" + [157] "shift_and_fill" "shrink_dtype" "shuffle" + [160] "sign" "sin" "sinh" + [163] "skew" "slice" "sort" + [166] "sort_by" "sqrt" "std" + [169] "str" "struct" "sub" + [172] "sum" "tail" "tan" + [175] "tanh" "to_physical" "to_r" + [178] "to_series" "top_k" "unique" + [181] "unique_counts" "upper_bound" "value_counts" + [184] "var" "when" "xor" --- @@ -641,68 +649,71 @@ Code ls(.pr$env[[class_name]]) Output - [1] "abs" "add" "alias" - [4] "all" "and" "any" - [7] "append" "approx_n_unique" "arccos" - [10] "arccosh" "arcsin" "arcsinh" - [13] "arctan" "arctanh" "arg_max" - [16] "arg_min" "arg_sort" "arg_unique" - [19] "arr" "backward_fill" "bin" - [22] "bottom_k" "cast" "cat" - [25] "ceil" "chunk_lengths" "clear" - [28] "clip" "clip_max" "clip_min" - [31] "clone" "compare" "cos" - [34] "cosh" "count" "cum_count" - [37] "cum_max" "cum_min" "cum_prod" - [40] "cum_sum" "cumulative_eval" "cut" - [43] "diff" "div" "dot" - [46] "drop_nans" "drop_nulls" "dt" - [49] "dtype" "entropy" "eq" - [52] "eq_missing" "equals" "ewm_mean" - [55] "ewm_std" "ewm_var" "exp" - [58] "explode" "extend_constant" "fill_nan" - [61] "fill_null" "filter" "first" - [64] "flags" "flatten" "floor" - [67] "floor_div" "forward_fill" "gather" - [70] "gather_every" "gt" "gt_eq" - [73] "hash" "head" "implode" - [76] "interpolate" "is_between" "is_duplicated" - [79] "is_finite" "is_first_distinct" "is_in" - [82] "is_infinite" "is_last_distinct" "is_nan" - [85] "is_not_nan" "is_not_null" "is_null" - [88] "is_numeric" "is_sorted" "is_unique" - [91] "item" "kurtosis" "last" - [94] "len" "limit" "list" - [97] "log" "log10" "lower_bound" - [100] "lt" "lt_eq" "map_batches" - [103] "map_elements" "max" "mean" - [106] "median" "min" "mod" - [109] "mode" "mul" "n_chunks" - [112] "n_unique" "name" "nan_max" - [115] "nan_min" "neq" "neq_missing" - [118] "not" "null_count" "or" - [121] "pct_change" "peak_max" "peak_min" - [124] "pow" "print" "product" - [127] "qcut" "quantile" "rank" - [130] "rechunk" "reinterpret" "rename" - [133] "rep" "repeat_by" "replace" - [136] "reshape" "reverse" "rle" - [139] "rle_id" "rolling_max" "rolling_mean" - [142] "rolling_median" "rolling_min" "rolling_quantile" - [145] "rolling_skew" "rolling_std" "rolling_sum" - [148] "rolling_var" "round" "sample" - [151] "search_sorted" "set_sorted" "shape" - [154] "shift" "shift_and_fill" "shrink_dtype" - [157] "shuffle" "sign" "sin" - [160] "sinh" "skew" "slice" - [163] "sort" "sort_by" "sqrt" - [166] "std" "str" "struct" - [169] "sub" "sum" "tail" - [172] "tan" "tanh" "to_frame" - [175] "to_list" "to_lit" "to_physical" - [178] "to_r" "to_vector" "top_k" - [181] "unique" "unique_counts" "upper_bound" - [184] "value_counts" "var" "xor" + [1] "abs" "add" "alias" + [4] "all" "and" "any" + [7] "append" "approx_n_unique" "arccos" + [10] "arccosh" "arcsin" "arcsinh" + [13] "arctan" "arctanh" "arg_max" + [16] "arg_min" "arg_sort" "arg_unique" + [19] "arr" "backward_fill" "bin" + [22] "bottom_k" "cast" "cat" + [25] "ceil" "chunk_lengths" "clear" + [28] "clip" "clip_max" "clip_min" + [31] "clone" "compare" "cos" + [34] "cosh" "count" "cum_count" + [37] "cum_max" "cum_min" "cum_prod" + [40] "cum_sum" "cumulative_eval" "cut" + [43] "diff" "div" "dot" + [46] "drop_nans" "drop_nulls" "dt" + [49] "dtype" "entropy" "eq" + [52] "eq_missing" "equals" "ewm_mean" + [55] "ewm_std" "ewm_var" "exp" + [58] "explode" "extend_constant" "fill_nan" + [61] "fill_null" "filter" "first" + [64] "flags" "flatten" "floor" + [67] "floor_div" "forward_fill" "gather" + [70] "gather_every" "gt" "gt_eq" + [73] "hash" "head" "implode" + [76] "interpolate" "is_between" "is_duplicated" + [79] "is_finite" "is_first_distinct" "is_in" + [82] "is_infinite" "is_last_distinct" "is_nan" + [85] "is_not_nan" "is_not_null" "is_null" + [88] "is_numeric" "is_sorted" "is_unique" + [91] "item" "kurtosis" "last" + [94] "len" "limit" "list" + [97] "log" "log10" "lower_bound" + [100] "lt" "lt_eq" "map_batches" + [103] "map_elements" "max" "mean" + [106] "median" "min" "mod" + [109] "mode" "mul" "n_chunks" + [112] "n_unique" "name" "nan_max" + [115] "nan_min" "neq" "neq_missing" + [118] "not" "null_count" "or" + [121] "pct_change" "peak_max" "peak_min" + [124] "pow" "print" "product" + [127] "qcut" "quantile" "rank" + [130] "rechunk" "reinterpret" "rename" + [133] "rep" "repeat_by" "replace" + [136] "reshape" "reverse" "rle" + [139] "rle_id" "rolling_max" "rolling_max_by" + [142] "rolling_mean" "rolling_mean_by" "rolling_median" + [145] "rolling_median_by" "rolling_min" "rolling_min_by" + [148] "rolling_quantile" "rolling_quantile_by" "rolling_skew" + [151] "rolling_std" "rolling_std_by" "rolling_sum" + [154] "rolling_sum_by" "rolling_var" "rolling_var_by" + [157] "round" "sample" "search_sorted" + [160] "set_sorted" "shape" "shift" + [163] "shift_and_fill" "shrink_dtype" "shuffle" + [166] "sign" "sin" "sinh" + [169] "skew" "slice" "sort" + [172] "sort_by" "sqrt" "std" + [175] "str" "struct" "sub" + [178] "sum" "tail" "tan" + [181] "tanh" "to_frame" "to_list" + [184] "to_lit" "to_physical" "to_r" + [187] "to_vector" "top_k" "unique" + [190] "unique_counts" "upper_bound" "value_counts" + [193] "var" "xor" --- diff --git a/tests/testthat/test-expr_expr.R b/tests/testthat/test-expr_expr.R index e570e87fb..16c1acd29 100644 --- a/tests/testthat/test-expr_expr.R +++ b/tests/testthat/test-expr_expr.R @@ -1666,6 +1666,131 @@ test_that("Expr_rolling_", { ) }) +test_that("Expr_rolling_*_by", { + df = pl$DataFrame( + a = 1:6, + date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-6"), "1d") + ) + + expected = data.frame( + min = c(1L, 1:5), + max = 1:6, + mean = c(1, 1.5, 2.5, 3.5, 4.5, 5.5), + sum = c(1L, 3L, 5L, 7L, 9L, 11L), + std = c(NA, rep(0.7071067811865476, 5)), + var = c(NA, rep(0.5, 5)), + median = c(1, 1.5, 2.5, 3.5, 4.5, 5.5), + quantile_linear = c(1, 1.33, 2.33, 3.33, 4.33, 5.33) + ) + + expect_identical( + df$select( + pl$col("a")$rolling_min_by("date", window_size = "2d")$alias("min"), + pl$col("a")$rolling_max_by("date", window_size = "2d")$alias("max"), + pl$col("a")$rolling_mean_by("date", window_size = "2d")$alias("mean"), + pl$col("a")$rolling_sum_by("date", window_size = "2d")$alias("sum"), + pl$col("a")$rolling_std_by("date", window_size = "2d")$alias("std"), + pl$col("a")$rolling_var_by("date", window_size = "2d")$alias("var"), + pl$col("a")$rolling_median_by("date", window_size = "2d")$alias("median"), + pl$col("a")$rolling_quantile_by( + quantile = .33, "date", window_size = "2d", interpolation = "linear" + )$alias("quantile_linear") + )$to_data_frame(), + expected + ) +}) + +test_that("Expr_rolling_*_by only works with date/datetime", { + df = pl$DataFrame(a = 1:6, id = 11:16) + + expect_error( + df$select(pl$col("a")$rolling_min_by("id", window_size = "2i")), + "`by` argument of dtype `i32` is not supported" + ) + + expect_error( + df$select(pl$col("a")$rolling_min_by(1, window_size = "2d")), + "must be the same length as values column" + ) +}) + +test_that("Expr_rolling_*_by: arg 'min_periods'", { + df = pl$DataFrame( + a = 1:6, + date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-6"), "1d") + ) + + expected = data.frame( + min = c(NA_integer_, 1L:5L), + max = c(NA_integer_, 2L:6L), + mean = c(NA, 1.5, 2.5, 3.5, 4.5, 5.5), + sum = c(NA_integer_, 3L, 5L, 7L, 9L, 11L), + std = c(NA, rep(0.7071067811865476, 5)), + var = c(NA, rep(0.5, 5)), + median = c(NA, 1.5, 2.5, 3.5, 4.5, 5.5), + quantile_linear = c(NA, 1.33, 2.33, 3.33, 4.33, 5.33) + ) + + expect_identical( + df$select( + pl$col("a")$rolling_min_by("date", window_size = "2d", min_periods = 2)$alias("min"), + pl$col("a")$rolling_max_by("date", window_size = "2d", min_periods = 2)$alias("max"), + pl$col("a")$rolling_mean_by("date", window_size = "2d", min_periods = 2)$alias("mean"), + pl$col("a")$rolling_sum_by("date", window_size = "2d", min_periods = 2)$alias("sum"), + pl$col("a")$rolling_std_by("date", window_size = "2d", min_periods = 2)$alias("std"), + pl$col("a")$rolling_var_by("date", window_size = "2d", min_periods = 2)$alias("var"), + pl$col("a")$rolling_median_by("date", window_size = "2d", min_periods = 2)$alias("median"), + pl$col("a")$rolling_quantile_by( + quantile = .33, "date", window_size = "2d", min_periods = 2, interpolation = "linear" + )$alias("quantile_linear") + )$to_data_frame(), + expected + ) + + expect_error( + df$select(pl$col("a")$rolling_min_by("date", window_size = "2d", min_periods = -1)), + "cannot be less than zero" + ) +}) + +test_that("Expr_rolling_*_by: arg 'closed'", { + df = pl$DataFrame( + a = 1:6, + date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-6"), "1d") + ) + + expected = data.frame( + min = c(NA_integer_, 1L, 1:4), + max = c(NA_integer_, 1:5), + mean = c(NA, 1, 1.5, 2.5, 3.5, 4.5), + sum = c(NA, 1L, 3L, 5L, 7L, 9L), + std = c(NA, NA, rep(0.7071067811865476, 4)), + var = c(NA, NA, rep(0.5, 4)), + median = c(NA, 1, 1.5, 2.5, 3.5, 4.5), + quantile_linear = c(NA, 1.00, 1.33, 2.33, 3.33, 4.33) + ) + + expect_identical( + df$select( + pl$col("a")$rolling_min_by("date", window_size = "2d", closed = "left")$alias("min"), + pl$col("a")$rolling_max_by("date", window_size = "2d", closed = "left")$alias("max"), + pl$col("a")$rolling_mean_by("date", window_size = "2d", closed = "left")$alias("mean"), + pl$col("a")$rolling_sum_by("date", window_size = "2d", closed = "left")$alias("sum"), + pl$col("a")$rolling_std_by("date", window_size = "2d", closed = "left")$alias("std"), + pl$col("a")$rolling_var_by("date", window_size = "2d", closed = "left")$alias("var"), + pl$col("a")$rolling_median_by("date", window_size = "2d", closed = "left")$alias("median"), + pl$col("a")$rolling_quantile_by( + quantile = .33, "date", window_size = "2d", closed = "left", interpolation = "linear" + )$alias("quantile_linear") + )$to_data_frame(), + expected + ) + + expect_error( + df$select(pl$col("a")$rolling_min_by("date", window_size = "2d", closed = "foo")), + "must be one of 'both', 'left', 'none', 'right'" + ) +}) test_that("Expr_rank", { l = list(a = c(3, 6, 1, 1, 6))