Skip to content

Commit

Permalink
feat!: make $rename()'s behavior closer to python polars' (#1129)
Browse files Browse the repository at this point in the history
  • Loading branch information
eitsupi authored Jun 4, 2024
1 parent 4a90617 commit 52cb990
Show file tree
Hide file tree
Showing 10 changed files with 96 additions and 206 deletions.
7 changes: 5 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,17 @@
`time_unit` (#1116).
- The default value of the `rechunk` argument of `pl$concat()` is changed from
`TRUE` to `FALSE` (#1125).
- In `$rename()` for LazyFrame and DataFrame, key-value pairs of names are changed to
`old_name = "new_name"` instead of `new_name = "old_name"` (#1129).
- In `$rename()` for LazyFrame and DataFrame, no argument is not allowed (#1129).
- In all `$rolling_*()` functions, the arguments `center` and `ddof` must be
named (#1115).

### New features

- Experimental feature `$rename_with()` for LazyFrame and DataFrame.
- Allow specify a function in `$rename()` for LazyFrame and DataFrame.
They are equivalent to `polars.LazyFrame.rename(mapping: Callable[[str], str])`
or `polars.DataFrame.rename(mapping: Callable[[str], str])` in Python Polars (#1122).
or `polars.DataFrame.rename(mapping: Callable[[str], str])` in Python Polars (#1122, #1129).

## Polars R Package 0.16.4

Expand Down
26 changes: 4 additions & 22 deletions R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -1595,40 +1595,22 @@ DataFrame_pivot = function(
#' Rename column names of a DataFrame
#' @inherit pl_DataFrame return
#' @inherit LazyFrame_rename params details
#' @seealso
#' - [`<DataFrame>$rename_with()`][DataFrame_rename_with]
#' @examples
#' df = pl$DataFrame(
#' foo = 1:3,
#' bar = 6:8,
#' ham = letters[1:3]
#' )
#'
#' df$rename(apple = "foo")
DataFrame_rename = function(...) {
self$lazy()$rename(...)$collect()
}

#' Rename column names of a DataFrame with a function
#' @inherit pl_DataFrame return
#' @inherit LazyFrame_rename_with description params details
#' @seealso
#' - [`<DataFrame>$rename()`][DataFrame_rename]
#' @examples
#' df = pl$DataFrame(
#' foo = 1:3,
#' bar = 6:8,
#' ham = letters[1:3]
#' )
#' df$rename(foo = "apple")
#'
#' df$rename_with(
#' df$rename(
#' \(column_name) paste0("c", substr(column_name, 2, 100))
#' )
DataFrame_rename_with = function(fun) {
self$lazy()$rename_with(fun)$collect()
DataFrame_rename = function(...) {
self$lazy()$rename(...)$collect()
}


#' @title Summary statistics for a DataFrame
#'
#' @description This returns the total number of rows, the number of missing
Expand Down
72 changes: 27 additions & 45 deletions R/lazyframe__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -1540,66 +1540,48 @@ LazyFrame_melt = function(
#' polars will block projection and predicate pushdowns at this node.
#' @inherit pl_LazyFrame return
#' @param ... One of the following:
#' - params like `new_name = "old_name"` to rename selected variables.
#' - as above but with params wrapped in a list
#' @seealso
#' - [`<LazyFrame>$rename_with()`][LazyFrame_rename_with]
#' - Key value pairs that map from old name to new name, like `old_name = "new_name"`.
#' - As above but with params wrapped in a list
#' - An R function that takes the old names character vector as input and
#' returns the new names character vector.
#' @examples
#' lf = pl$LazyFrame(
#' foo = 1:3,
#' bar = 6:8,
#' ham = letters[1:3]
#' )
#'
#' lf$rename(apple = "foo")$collect()
LazyFrame_rename = function(...) {
mapping = list2(...)
if (length(mapping) == 0) {
return(self)
}
if (is.list(mapping[[1L]])) {
mapping = mapping[[1L]]
}
existing = unname(unlist(mapping))
new = names(mapping)
unwrap(.pr$LazyFrame$rename(self, existing, new), "in $rename():")
}


#' Rename column names of a LazyFrame with a function
#' lf$rename(foo = "apple")$collect()
#'
#' This method is currently experimental and may
#' change without it being considered a breaking change.
#' @inherit LazyFrame_rename details return
#' @param fun An R function that takes the old names character vector as input and
#' returns the new names character vector.
#' @seealso
#' - [`<LazyFrame>$rename()`][LazyFrame_rename]
#' @examples
#' lf = pl$LazyFrame(
#' foo = 1:3,
#' bar = 6:8,
#' ham = letters[1:3]
#' )
#'
#' lf$rename_with(
#' lf$rename(
#' \(column_name) paste0("c", substr(column_name, 2, 100))
#' )$collect()
LazyFrame_rename_with = function(fun) {
uw = \(res) unwrap(res, "in $rename_with():")

{
existing = names(self)
new = fun(existing)
} |>
result() |>
uw()
LazyFrame_rename = function(...) {
uw = \(res) unwrap(res, "in $rename():")

if (!nargs()) {
Err_plain("No arguments provided for `$rename()`.") |>
uw()
}

mapping = list2(...)
if (is.function(mapping[[1L]])) {
result({
existing = names(self)
new = mapping[[1L]](existing)
}) |>
uw()
} else {
if (is.list(mapping[[1L]])) {
mapping = mapping[[1L]]
}
new = unname(unlist(mapping))
existing = names(mapping)
}
.pr$LazyFrame$rename(self, existing, new) |>
uw()
}


#' Fetch `n` rows of a LazyFrame
#'
#' This is similar to `$collect()` but limit the number of rows to collect. It
Expand Down
17 changes: 9 additions & 8 deletions man/DataFrame_rename.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 0 additions & 39 deletions man/DataFrame_rename_with.Rd

This file was deleted.

17 changes: 9 additions & 8 deletions man/LazyFrame_rename.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 0 additions & 39 deletions man/LazyFrame_rename_with.Rd

This file was deleted.

49 changes: 24 additions & 25 deletions tests/testthat/_snaps/after-wrappers.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,15 @@
[29] "lazy" "limit" "max" "mean"
[33] "median" "melt" "min" "n_chunks"
[37] "null_count" "partition_by" "pivot" "print"
[41] "quantile" "rechunk" "rename" "rename_with"
[45] "reverse" "rolling" "sample" "schema"
[49] "select" "select_seq" "shape" "shift"
[53] "shift_and_fill" "slice" "sort" "sql"
[57] "std" "sum" "tail" "to_data_frame"
[61] "to_list" "to_raw_ipc" "to_series" "to_struct"
[65] "transpose" "unique" "unnest" "var"
[69] "width" "with_columns" "with_columns_seq" "with_row_index"
[73] "write_csv" "write_ipc" "write_json" "write_ndjson"
[77] "write_parquet"
[41] "quantile" "rechunk" "rename" "reverse"
[45] "rolling" "sample" "schema" "select"
[49] "select_seq" "shape" "shift" "shift_and_fill"
[53] "slice" "sort" "sql" "std"
[57] "sum" "tail" "to_data_frame" "to_list"
[61] "to_raw_ipc" "to_series" "to_struct" "transpose"
[65] "unique" "unnest" "var" "width"
[69] "with_columns" "with_columns_seq" "with_row_index" "write_csv"
[73] "write_ipc" "write_json" "write_ndjson" "write_parquet"

---

Expand Down Expand Up @@ -160,21 +159,21 @@
[27] "median" "melt"
[29] "min" "print"
[31] "profile" "quantile"
[33] "rename" "rename_with"
[35] "reverse" "rolling"
[37] "schema" "select"
[39] "select_seq" "serialize"
[41] "set_optimization_toggle" "shift"
[43] "shift_and_fill" "sink_csv"
[45] "sink_ipc" "sink_ndjson"
[47] "sink_parquet" "slice"
[49] "sort" "sql"
[51] "std" "sum"
[53] "tail" "to_dot"
[55] "unique" "unnest"
[57] "var" "width"
[59] "with_columns" "with_columns_seq"
[61] "with_context" "with_row_index"
[33] "rename" "reverse"
[35] "rolling" "schema"
[37] "select" "select_seq"
[39] "serialize" "set_optimization_toggle"
[41] "shift" "shift_and_fill"
[43] "sink_csv" "sink_ipc"
[45] "sink_ndjson" "sink_parquet"
[47] "slice" "sort"
[49] "sql" "std"
[51] "sum" "tail"
[53] "to_dot" "unique"
[55] "unnest" "var"
[57] "width" "with_columns"
[59] "with_columns_seq" "with_context"
[61] "with_row_index"

---

Expand Down
Loading

0 comments on commit 52cb990

Please sign in to comment.