diff --git a/DESCRIPTION b/DESCRIPTION index ff4c34d76..47a1b2cf9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -119,4 +119,4 @@ Collate: Config/rextendr/version: 0.3.1 VignetteBuilder: knitr Config/polars/LibVersion: 0.42.2 -Config/polars/RustToolchainVersion: nightly-2024-07-26 +Config/polars/RustToolchainVersion: nightly-2024-09-19 diff --git a/NEWS.md b/NEWS.md index a7c1331e7..e9751803a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,15 @@ ## Polars R Package (development version) +- Updated rust-polars to 0.43.1 (#1230). + +### Breaking changes + +- In `pl$scan_ipc()` and `pl$read_ipc()`, the argument `memory_map` is removed + (#1230). +- In `$serialize()`, in the field `schema`, the field `inner` is renamed `fields`, + and the fields `output_schema` and `filter` are removed (#1230). + ### New features - New method `$cast()` for `DataFrame` and `LazyFrame` (#1219). diff --git a/R/as_polars.R b/R/as_polars.R index 697f6dcd5..40ede5aa6 100644 --- a/R/as_polars.R +++ b/R/as_polars.R @@ -383,14 +383,14 @@ as_polars_series.POSIXlt = function(x, name = NULL, ...) { #' @rdname as_polars_series #' @export as_polars_series.data.frame = function(x, name = NULL, ...) { - as_polars_df(x)$to_struct(name = name) + as_polars_df(x)$to_struct(name = name %||% "") } #' @rdname as_polars_series #' @export as_polars_series.vctrs_rcrd = function(x, name = NULL, ...) { - pl$select(unclass(x))$to_struct(name = name) + pl$select(unclass(x))$to_struct(name = name %||% "") } diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 80d692ebb..6e02c2dad 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -245,7 +245,7 @@ DataFrame_width = method_as_active_binding(\() .pr$DataFrame$shape(self)[2L]) #' #' @examples #' pl$DataFrame( -#' a = list(c(1, 2, 3, 4, 5)), # NB if first column should be a list, wrap it in a Series +#' a = c(1, 2, 3, 4, 5), #' b = 1:5, #' c = letters[1:5], #' d = list(1:1, 1:2, 1:3, 1:4, 1:5) diff --git a/R/expr__expr.R b/R/expr__expr.R index 52afcc79e..2aaecda54 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -1181,7 +1181,7 @@ Expr_is_not_nan = use_extendr_wrapper #' ) #' #' # recycling -#' pl$DataFrame(mtcars)$with_columns(pl$col("mpg")$slice(0, 1)) +#' pl$DataFrame(mtcars)$with_columns(pl$col("mpg")$slice(0, 1)$first()) Expr_slice = function(offset, length = NULL) { .pr$Expr$slice(self, offset, wrap_e(length)) |> unwrap("in $slice():") diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index ba414817c..ff7c21b05 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -94,7 +94,7 @@ concat_series <- function(l, rechunk, to_supertypes) .Call(wrap__concat_series, new_from_csv <- function(path, has_header, separator, comment_prefix, quote_char, skip_rows, dtypes, null_values, ignore_errors, cache, infer_schema_length, n_rows, encoding, low_memory, rechunk, skip_rows_after_header, row_index_name, row_index_offset, try_parse_dates, eol_char, raise_if_empty, truncate_ragged_lines, include_file_paths) .Call(wrap__new_from_csv, path, has_header, separator, comment_prefix, quote_char, skip_rows, dtypes, null_values, ignore_errors, cache, infer_schema_length, n_rows, encoding, low_memory, rechunk, skip_rows_after_header, row_index_name, row_index_offset, try_parse_dates, eol_char, raise_if_empty, truncate_ragged_lines, include_file_paths) -import_arrow_ipc <- function(path, n_rows, cache, rechunk, row_name, row_index, memory_map, hive_partitioning, hive_schema, try_parse_hive_dates, include_file_paths) .Call(wrap__import_arrow_ipc, path, n_rows, cache, rechunk, row_name, row_index, memory_map, hive_partitioning, hive_schema, try_parse_hive_dates, include_file_paths) +import_arrow_ipc <- function(path, n_rows, cache, rechunk, row_name, row_index, hive_partitioning, hive_schema, try_parse_hive_dates, include_file_paths) .Call(wrap__import_arrow_ipc, path, n_rows, cache, rechunk, row_name, row_index, hive_partitioning, hive_schema, try_parse_hive_dates, include_file_paths) new_from_ndjson <- function(path, infer_schema_length, batch_size, n_rows, low_memory, rechunk, row_index_name, row_index_offset, ignore_errors) .Call(wrap__new_from_ndjson, path, infer_schema_length, batch_size, n_rows, low_memory, rechunk, row_index_name, row_index_offset, ignore_errors) diff --git a/R/io_ipc.R b/R/io_ipc.R index 1ced8b429..dbb5d6725 100644 --- a/R/io_ipc.R +++ b/R/io_ipc.R @@ -60,7 +60,6 @@ pl_scan_ipc = function( rechunk = rechunk, row_name = row_index_name, row_index = row_index_offset, - memory_map = memory_map, hive_partitioning = hive_partitioning, hive_schema = hive_schema, try_parse_hive_dates = try_parse_hive_dates, diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index d9dc74c6e..35ab42575 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -173,7 +173,7 @@ LazyFrame_width = method_as_active_binding(\() length(self$schema)) #' #' @examples #' pl$LazyFrame( -#' a = list(c(1, 2, 3, 4, 5)), +#' a = c(1, 2, 3, 4, 5), #' b = 1:5, #' c = letters[1:5], #' d = list(1:1, 1:2, 1:3, 1:4, 1:5) diff --git a/man/Expr_slice.Rd b/man/Expr_slice.Rd index 28fa0a8a1..1f5bd0cfe 100644 --- a/man/Expr_slice.Rd +++ b/man/Expr_slice.Rd @@ -37,5 +37,5 @@ pl$DataFrame(list(a = 0:100))$select( ) # recycling -pl$DataFrame(mtcars)$with_columns(pl$col("mpg")$slice(0, 1)) +pl$DataFrame(mtcars)$with_columns(pl$col("mpg")$slice(0, 1)$first()) } diff --git a/man/pl_DataFrame.Rd b/man/pl_DataFrame.Rd index 93cb4e334..a644e38b3 100644 --- a/man/pl_DataFrame.Rd +++ b/man/pl_DataFrame.Rd @@ -32,7 +32,7 @@ Create a new polars DataFrame } \examples{ pl$DataFrame( - a = list(c(1, 2, 3, 4, 5)), # NB if first column should be a list, wrap it in a Series + a = c(1, 2, 3, 4, 5), b = 1:5, c = letters[1:5], d = list(1:1, 1:2, 1:3, 1:4, 1:5) diff --git a/man/pl_LazyFrame.Rd b/man/pl_LazyFrame.Rd index 11efc24fc..829f3c961 100644 --- a/man/pl_LazyFrame.Rd +++ b/man/pl_LazyFrame.Rd @@ -19,7 +19,7 @@ be used for making examples and quick demonstrations. } \examples{ pl$LazyFrame( - a = list(c(1, 2, 3, 4, 5)), + a = c(1, 2, 3, 4, 5), b = 1:5, c = letters[1:5], d = list(1:1, 1:2, 1:3, 1:4, 1:5) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index fd7a9d5ab..89f7017aa 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -302,6 +302,15 @@ version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" +[[package]] +name = "castaway" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0abae9be0aaf9ea96a3b1b8b1b55c602ca751eba1b1500220cea4ecbafe7c0d5" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.1.11" @@ -375,6 +384,21 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "compact_str" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6050c3a16ddab2e412160b31f2c871015704239bca62f72f6e5f0be631d3f644" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "serde", + "static_assertions", +] + [[package]] name = "constant_time_eq" version = "0.3.0" @@ -1683,8 +1707,8 @@ dependencies = [ [[package]] name = "polars" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "getrandom", "polars-arrow", @@ -1703,8 +1727,8 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "atoi", @@ -1728,6 +1752,7 @@ dependencies = [ "parking_lot", "polars-arrow-format", "polars-error", + "polars-schema", "polars-utils", "ryu", "serde", @@ -1750,8 +1775,8 @@ dependencies = [ [[package]] name = "polars-compute" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "bytemuck", "either", @@ -1765,8 +1790,8 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "bitflags", @@ -1784,6 +1809,7 @@ dependencies = [ "polars-compute", "polars-error", "polars-row", + "polars-schema", "polars-utils", "rand", "rand_distr", @@ -1791,7 +1817,6 @@ dependencies = [ "regex", "serde", "serde_json", - "smartstring", "thiserror", "version_check", "xxhash-rust", @@ -1799,8 +1824,8 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "avro-schema", "object_store", @@ -1812,13 +1837,14 @@ dependencies = [ [[package]] name = "polars-expr" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "bitflags", "once_cell", "polars-arrow", + "polars-compute", "polars-core", "polars-io", "polars-ops", @@ -1826,13 +1852,12 @@ dependencies = [ "polars-time", "polars-utils", "rayon", - "smartstring", ] [[package]] name = "polars-io" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "async-trait", @@ -1846,6 +1871,7 @@ dependencies = [ "fs4", "futures", "glob", + "hashbrown", "home", "itoa", "memchr", @@ -1859,6 +1885,7 @@ dependencies = [ "polars-error", "polars-json", "polars-parquet", + "polars-schema", "polars-time", "polars-utils", "rayon", @@ -1869,7 +1896,6 @@ dependencies = [ "serde_json", "simd-json", "simdutf8", - "smartstring", "tokio", "tokio-util", "url", @@ -1878,8 +1904,8 @@ dependencies = [ [[package]] name = "polars-json" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "chrono", @@ -1899,8 +1925,8 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "bitflags", @@ -1919,15 +1945,14 @@ dependencies = [ "polars-time", "polars-utils", "rayon", - "smartstring", "tokio", "version_check", ] [[package]] name = "polars-mem-engine" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "futures", "memmap2", @@ -1947,8 +1972,8 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "aho-corasick", @@ -1969,6 +1994,7 @@ dependencies = [ "polars-core", "polars-error", "polars-json", + "polars-schema", "polars-utils", "rand", "rand_distr", @@ -1976,15 +2002,14 @@ dependencies = [ "regex", "serde", "serde_json", - "smartstring", "unicode-reverse", "version_check", ] [[package]] name = "polars-parquet" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "async-stream", @@ -1994,6 +2019,7 @@ dependencies = [ "ethnum", "flate2", "futures", + "hashbrown", "lz4", "num-traits", "parquet-format-safe", @@ -2010,8 +2036,8 @@ dependencies = [ [[package]] name = "polars-pipe" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -2029,7 +2055,6 @@ dependencies = [ "polars-row", "polars-utils", "rayon", - "smartstring", "tokio", "uuid", "version_check", @@ -2037,12 +2062,13 @@ dependencies = [ [[package]] name = "polars-plan" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "bitflags", "bytemuck", + "bytes", "chrono", "chrono-tz", "either", @@ -2063,15 +2089,14 @@ dependencies = [ "recursive", "regex", "serde", - "smartstring", "strum_macros", "version_check", ] [[package]] name = "polars-row" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "bytemuck", "polars-arrow", @@ -2079,10 +2104,22 @@ dependencies = [ "polars-utils", ] +[[package]] +name = "polars-schema" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" +dependencies = [ + "indexmap", + "polars-error", + "polars-utils", + "serde", + "version_check", +] + [[package]] name = "polars-sql" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "hex", "once_cell", @@ -2093,6 +2130,7 @@ dependencies = [ "polars-ops", "polars-plan", "polars-time", + "polars-utils", "rand", "serde", "serde_json", @@ -2101,8 +2139,8 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "atoi", "bytemuck", @@ -2117,26 +2155,27 @@ dependencies = [ "polars-utils", "regex", "serde", - "smartstring", ] [[package]] name = "polars-utils" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "bytemuck", "bytes", + "compact_str", "hashbrown", "indexmap", + "libc", "memmap2", "num-traits", "once_cell", "polars-error", "raw-cpuid", "rayon", - "smartstring", + "serde", "stacker", "sysinfo", "version_check", @@ -2238,7 +2277,7 @@ dependencies = [ [[package]] name = "r-polars" -version = "0.42.2" +version = "0.43.0" dependencies = [ "either", "extendr-api", @@ -2749,7 +2788,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" dependencies = [ "autocfg", - "serde", "static_assertions", "version_check", ] diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index c6f16225c..53d9e50ef 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "r-polars" -version = "0.42.2" +version = "0.43.0" edition = "2021" rust-version = "1.80.0" publish = false @@ -50,8 +50,8 @@ serde_json = "*" smartstring = "1.0.1" state = "0.6.0" thiserror = "1.0.63" -polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "67551b6594c581731f0e9ca814ff7c39377bd324", default-features = false } -polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "67551b6594c581731f0e9ca814ff7c39377bd324", default-features = false } +polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "54218e7e35e3defd4b0801e820c56eea6b91e525", default-features = false } +polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "54218e7e35e3defd4b0801e820c56eea6b91e525", default-features = false } either = "1" [dependencies.polars] @@ -160,4 +160,4 @@ features = [ "zip_with", ] git = "https://github.com/pola-rs/polars.git" -rev = "67551b6594c581731f0e9ca814ff7c39377bd324" +rev = "54218e7e35e3defd4b0801e820c56eea6b91e525" diff --git a/src/rust/src/arrow_interop/to_rust.rs b/src/rust/src/arrow_interop/to_rust.rs index 2dfe6ca1d..949b5e269 100644 --- a/src/rust/src/arrow_interop/to_rust.rs +++ b/src/rust/src/arrow_interop/to_rust.rs @@ -25,7 +25,7 @@ pub fn arrow_array_to_rust(arrow_array: Robj) -> Result { let array = unsafe { let field = ffi::import_field_from_c(schema.as_ref()).map_err(|err| err.to_string())?; - ffi::import_array_from_c(*array, field.data_type).map_err(|err| err.to_string())? + ffi::import_array_from_c(*array, field.dtype).map_err(|err| err.to_string())? }; Ok(array) } @@ -46,7 +46,10 @@ pub unsafe fn to_rust_df(rb: Robj) -> Result { }; let names = robj_record_batch_names .as_str_vector() - .ok_or_else(|| "internal error: Robj$schema$names is not a char vec".to_string())?; + .ok_or_else(|| "internal error: Robj$schema$names is not a char vec".to_string())? + .into_iter() + .map(PlSmallStr::from_str) + .collect::>(); //iterate over record batches let rb_len = rb.len(); @@ -65,7 +68,7 @@ pub unsafe fn to_rust_df(rb: Robj) -> Result { let array_iter = columns_list.into_iter().map(|(_, column)| { let arr = arrow_array_to_rust(column)?; run_parallel |= matches!( - arr.data_type(), + arr.dtype(), ArrowDataType::Utf8 | ArrowDataType::Dictionary(_, _, _) ); let list_res: Result<_, String> = Ok(arr); @@ -83,14 +86,15 @@ pub unsafe fn to_rust_df(rb: Robj) -> Result { .into_par_iter() .zip(names.par_iter()) .map(|(arr, name)| { - let s = Series::try_from((*name, arr)).map_err(|err| err.to_string())?; + let s = + Series::try_from((name.clone(), arr)).map_err(|err| err.to_string())?; Ok(s) }) .collect::, String>>() }) } else { let iter = arrays_vec.into_iter().zip(names.iter()).map(|(arr, name)| { - let s = Series::try_from((*name, arr)).map_err(|err| err.to_string())?; + let s = Series::try_from((name.clone(), arr)).map_err(|err| err.to_string())?; Ok(s) }); crate::utils::collect_hinted_result(n_columns, iter) diff --git a/src/rust/src/conversion.rs b/src/rust/src/conversion.rs deleted file mode 100644 index 62a656579..000000000 --- a/src/rust/src/conversion.rs +++ /dev/null @@ -1,8 +0,0 @@ -use smartstring::alias::String as SmartString; -pub(crate) fn strings_to_smartstrings(container: I) -> Vec -where - I: IntoIterator, - S: AsRef, -{ - container.into_iter().map(|s| s.as_ref().into()).collect() -} diff --git a/src/rust/src/conversion_r_to_s.rs b/src/rust/src/conversion_r_to_s.rs index 6335fb3ee..cc070fbf6 100644 --- a/src/rust/src/conversion_r_to_s.rs +++ b/src/rust/src/conversion_r_to_s.rs @@ -75,7 +75,7 @@ fn recursive_robjname2series_tree(x: &Robj, name: &str) -> pl::PolarsResult(real_slice) }; - Ok(SeriesTree::Series(pl::Series::new(name, i64_slice))) + Ok(SeriesTree::Series(pl::Series::new(name.into(), i64_slice))) } else { let mut s: pl::Series = rdouble //convert R NAs to rust options .iter() @@ -89,7 +89,7 @@ fn recursive_robjname2series_tree(x: &Robj, name: &str) -> pl::PolarsResult pl::PolarsResult pl::PolarsResult pl::PolarsResult Ok(SeriesTree::Series( @@ -138,14 +138,14 @@ fn recursive_robjname2series_tree(x: &Robj, name: &str) -> pl::PolarsResult { let rints = x.as_integers().expect("as matched"); let s = if rints.no_na().is_true() { - pl::Series::new(name, x.as_integer_slice().expect("as matched")) + pl::Series::new(name.into(), x.as_integer_slice().expect("as matched")) } else { //convert R NAs to rust options let mut s: pl::Series = rints .iter() .map(|x| if x.is_na() { None } else { Some(x.inner()) }) .collect(); - s.rename(name); + s.rename(name.into()); s }; @@ -175,7 +175,7 @@ fn recursive_robjname2series_tree(x: &Robj, name: &str) -> pl::PolarsResult pl::PolarsResult { Ok(SeriesTree::Series( (s * 1_000f64).cast(&pl::DataType::Int64)?.cast( - &pl::DataType::Datetime(pl::TimeUnit::Milliseconds, Some(tz)), + &pl::DataType::Datetime(pl::TimeUnit::Milliseconds, Some(tz.into())), )?, )) } @@ -235,17 +235,14 @@ fn recursive_robjname2series_tree(x: &Robj, name: &str) -> pl::PolarsResult Ok(s), // SeriesTree is just a regular Series, return as is SeriesTree::SeriesEmptyVec => { // Create Series of empty array and cast to the found leaf_dtype. use polars::prelude::ListBuilderTrait; - let empty_list_series = pl::ListBinaryChunkedBuilder::new(name, 0,0).finish().into_series(); + let empty_list_series = pl::ListBinaryChunkedBuilder::new(name.into(), 0,0).finish().into_series(); //cast to any discovered leaftype to allow concatenation without Error if let Some(leaf_dt_ref) = leaf_dtype { @@ -342,7 +339,7 @@ fn concat_series_tree( } // use polars new method to concat concatenated series - Ok(pl::Series::new(name, series_vec)) + Ok(pl::Series::new(name.into(), series_vec)) } } } @@ -350,7 +347,7 @@ fn concat_series_tree( //handle R character/strings to utf8 fn robj_to_utf8_series(rstrings: Strings, name: &str) -> pl::Series { if rstrings.no_na().is_true() { - pl::Series::new(name, rstrings.as_robj().as_str_vector().unwrap()) + pl::Series::new(name.into(), rstrings.as_robj().as_str_vector().unwrap()) } else { //convert R NAs to rust options let mut s: Vec> = Vec::with_capacity(rstrings.len()); @@ -360,6 +357,6 @@ fn robj_to_utf8_series(rstrings: Strings, name: &str) -> pl::Series { .map(|x| if x.is_na() { None } else { Some(x.as_str()) }), ); - pl::Series::new(name, s) + pl::Series::new(name.into(), s) } } diff --git a/src/rust/src/conversion_s_to_r.rs b/src/rust/src/conversion_s_to_r.rs index 8199148e1..2963b5b91 100644 --- a/src/rust/src/conversion_s_to_r.rs +++ b/src/rust/src/conversion_s_to_r.rs @@ -164,7 +164,7 @@ pub fn pl_series_to_list( Ok(l.into_robj()) } Struct(_) => { - let df = s.clone().into_frame().unnest([s.name()]).unwrap(); + let df = s.clone().into_frame().unnest([s.name().clone()]).unwrap(); let mut l = RPolarsDataFrame(df).to_list_result(int64_conversion)?; //TODO contribute extendr_api set_attrib mutates &self, change signature to surprise anyone @@ -225,8 +225,8 @@ pub fn pl_series_to_list( let s_name = s.name(); pl::DataFrame::new(vec![s.clone()])? .lazy() - .select([col(s_name).dt().replace_time_zone( - Some(sys_tz), + .select([col(s_name.clone()).dt().replace_time_zone( + Some(sys_tz.into()), pl::lit("raise"), pl::NonExistent::Raise, )]) diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 22c2e60b0..04857e68a 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -342,7 +342,10 @@ impl RPolarsLazyFrame { let maintain_order = robj_to!(bool, maintain_order)?; let subset = robj_to!(Option, Vec, String, subset)?; let lf = if maintain_order { - self.0.clone().unique_stable(subset, ke) + self.0.clone().unique_stable( + subset.map(|x| x.into_iter().map(|y| y.into()).collect()), + ke, + ) } else { self.0.clone().unique(subset, ke) }; @@ -699,14 +702,14 @@ impl RPolarsLazyFrame { .iter() .map(|(k, v)| { let data_type = robj_to!(RPolarsDataType, v)?; - Ok(pl::Field::new(k, data_type.0)) + Ok(pl::Field::new(k.into(), data_type.0)) }) .collect::>>()?; let mut cast_map = PlHashMap::with_capacity(dtypes.len()); cast_map.extend( dtypes .iter() - .map(|f| (f.name().as_ref(), f.data_type().clone())), + .map(|f| (f.name().as_ref(), f.dtype().clone())), ); Ok(self.0.clone().cast(cast_map, strict).into()) } @@ -721,7 +724,7 @@ impl RPolarsLazyFrame { #[derive(Clone)] pub struct RPolarsLazyGroupBy { pub lgb: pl::LazyGroupBy, - opt_state: pl::OptState, + opt_state: pl::OptFlags, } #[extendr] diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index 8cd203ad1..52cfae57f 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -25,7 +25,6 @@ pub type NameGenerator = pl::Arc String + Send + Sync>; use crate::rdatatype::robjs_to_ewm_options; use crate::utils::r_expr_to_rust_expr; use crate::utils::unpack_r_eval; -use smartstring::{LazyCompact, SmartString}; use std::sync::Arc; #[derive(Clone, Debug)] @@ -88,8 +87,12 @@ impl RPolarsExpr { (Rtype::Raw, _) => Ok(dsl::lit(robj_to_binary_vec(robj)?)), // Raw in R is seen as a vector of bytes, in polars it is a Literal, not wrapped in a Series. (_, rlen) if rlen != 1 => to_series_then_lit(robj), (Rtype::List, _) => to_series_then_lit(robj), - (_, _) if robj_inherits(&robj, ["POSIXct", "PTime", "Date"]) => { - to_series_then_lit(robj) + (_, rlen) if robj_inherits(&robj, ["POSIXct", "PTime", "Date"]) => { + if rlen == 1 { + Ok(to_series_then_lit(robj)?.first()) + } else { + to_series_then_lit(robj) + } } (Rtype::Integers, 1) => { @@ -1030,7 +1033,7 @@ impl RPolarsExpr { } } - pub fn value_counts(&self, sort: bool, parallel: bool, name: String, normalize: bool) -> Self { + pub fn value_counts(&self, sort: bool, parallel: bool, name: &str, normalize: bool) -> Self { self.0 .clone() .value_counts(sort, parallel, name, normalize) @@ -1255,12 +1258,12 @@ impl RPolarsExpr { let width_strat = robj_to!(ListToStructWidthStrategy, n_field_strategy)?; let fields = robj_to!(Option, Robj, fields)?.map(|robj| { let par_fn: ParRObj = robj.into(); - let f: Arc<(dyn Fn(usize) -> SmartString + Send + Sync + 'static)> = + let f: Arc<(dyn Fn(usize) -> pl::PlSmallStr + Send + Sync + 'static)> = pl::Arc::new(move |idx: usize| { let thread_com = ThreadCom::from_global(&CONFIG); thread_com.send(RFnSignature::FnF64ToString(par_fn.clone(), idx as f64)); let s = thread_com.recv().unwrap_string(); - let s: SmartString = s.into(); + let s: pl::PlSmallStr = s.into(); s }); f @@ -1443,12 +1446,12 @@ impl RPolarsExpr { fn arr_to_struct(&self, fields: Robj) -> RResult { let fields = robj_to!(Option, Robj, fields)?.map(|robj| { let par_fn: ParRObj = robj.into(); - let f: Arc<(dyn Fn(usize) -> SmartString + Send + Sync + 'static)> = + let f: Arc<(dyn Fn(usize) -> pl::PlSmallStr + Send + Sync + 'static)> = pl::Arc::new(move |idx: usize| { let thread_com = ThreadCom::from_global(&CONFIG); thread_com.send(RFnSignature::FnF64ToString(par_fn.clone(), idx as f64)); let s = thread_com.recv().unwrap_string(); - let s: SmartString = s.into(); + let s: pl::PlSmallStr = s.into(); s }); f @@ -1583,22 +1586,23 @@ impl RPolarsExpr { .0 .clone() .dt() - .convert_time_zone(robj_to!(String, time_zone)?) + .convert_time_zone(robj_to!(String, time_zone)?.into()) .into()) } pub fn dt_replace_time_zone( &self, - time_zone: Nullable, + time_zone: Robj, ambiguous: Robj, non_existent: Robj, ) -> RResult { + let time_zone = robj_to!(Option, String, time_zone)?.map(|x| x.into()); Ok(self .0 .clone() .dt() .replace_time_zone( - time_zone.into_option(), + time_zone, robj_to!(PLExpr, ambiguous)?, robj_to!(NonExistent, non_existent)?, ) @@ -1985,7 +1989,7 @@ impl RPolarsExpr { // set expected type of output from R function let ot = robj_to!(Option, PLPolarsDataType, output_type)?; let output_map = pl::GetOutput::map_field(move |fld| match ot { - Some(ref dt) => Ok(pl::Field::new(fld.name(), dt.clone())), + Some(ref dt) => Ok(pl::Field::new(fld.name().clone(), dt.clone())), None => Ok(fld.clone()), }); @@ -2019,7 +2023,7 @@ impl RPolarsExpr { let ot = robj_to!(Option, PLPolarsDataType, output_type)?; let output_map = pl::GetOutput::map_field(move |fld| match ot { - Some(ref dt) => Ok(pl::Field::new(fld.name(), dt.clone())), + Some(ref dt) => Ok(pl::Field::new(fld.name().clone(), dt.clone())), None => Ok(fld.clone()), }); @@ -2052,7 +2056,7 @@ impl RPolarsExpr { let ot = null_to_opt(output_type).map(|rdt| rdt.0.clone()); let output_map = pl::GetOutput::map_field(move |fld| match ot { - Some(ref dt) => Ok(pl::Field::new(fld.name(), dt.clone())), + Some(ref dt) => Ok(pl::Field::new(fld.name().clone(), dt.clone())), None => Ok(fld.clone()), }); @@ -2117,13 +2121,13 @@ impl RPolarsExpr { // //wrap as series // }; - let f = move |name: &str| -> pl::PolarsResult { + let f = move |name: &pl::PlSmallStr| -> pl::PolarsResult { let robj = probj.clone().0; let rfun = robj .as_function() .expect("internal error: this is not an R function"); - let newname_robj = rfun.call(pairlist!(name)).map_err(|err| { + let newname_robj = rfun.call(pairlist!(name.as_str())).map_err(|err| { let es = format!("in $name$map(): user function raised this error: {:?}", err).into(); pl_error::ComputeError(es) @@ -2137,7 +2141,7 @@ impl RPolarsExpr { .into(); pl_error::ComputeError(es) }) - .map(|str| str.to_string()) + .map(|str| str.into()) }; Ok(self.clone().0.name().map(f).into()) @@ -2321,12 +2325,13 @@ impl RPolarsExpr { exact: Robj, cache: Robj, ) -> RResult { + let format = robj_to!(Option, String, format)?.map(|x| x.into()); Ok(self .0 .clone() .str() .to_date(pl::StrptimeOptions { - format: robj_to!(Option, String, format)?, + format, strict: robj_to!(bool, strict)?, exact: robj_to!(bool, exact)?, cache: robj_to!(bool, cache)?, @@ -2345,15 +2350,19 @@ impl RPolarsExpr { cache: Robj, ambiguous: Robj, ) -> RResult { + let format = robj_to!(Option, String, format)?.map(|x| x.into()); + let time_unit = robj_to!(Option, timeunit, time_unit)?.map(|x| x.into()); + let time_zone = robj_to!(Option, String, time_zone)?.map(|x| x.into()); + Ok(self .0 .clone() .str() .to_datetime( - robj_to!(Option, timeunit, time_unit)?, - robj_to!(Option, String, time_zone)?, + time_unit, + time_zone, pl::StrptimeOptions { - format: robj_to!(Option, String, format)?, + format: format, strict: robj_to!(bool, strict)?, exact: robj_to!(bool, exact)?, cache: robj_to!(bool, cache)?, @@ -2364,12 +2373,14 @@ impl RPolarsExpr { } pub fn str_to_time(&self, format: Robj, strict: Robj, cache: Robj) -> RResult { + let format = robj_to!(Option, String, format)?.map(|x| x.into()); + Ok(self .0 .clone() .str() .to_time(pl::StrptimeOptions { - format: robj_to!(Option, String, format)?, + format, strict: robj_to!(bool, strict)?, cache: robj_to!(bool, cache)?, exact: true, diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index f87968b3a..54a191ec1 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -14,7 +14,6 @@ pub mod lazy; pub mod arrow_interop; pub mod concat; -pub mod conversion; pub mod conversion_r_to_s; pub mod conversion_s_to_r; pub mod info; diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index db8df6b61..916164675 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -18,14 +18,13 @@ pub use lazy::dataframe::*; use crate::conversion_s_to_r::pl_series_to_list; pub use crate::series::*; +use crate::utils::{collect_hinted_result, r_result_list}; use arrow::datatypes::ArrowDataType; use polars::prelude::ArrowField; +use polars::prelude::SchemaExt; use polars_core::error::PolarsError; use polars_core::utils::arrow; -use crate::utils::{collect_hinted_result, r_result_list}; - -use crate::conversion::strings_to_smartstrings; use polars::frame::explode::UnpivotArgsIR; use polars::prelude::pivot::{pivot, pivot_stable}; @@ -40,7 +39,8 @@ pub struct OwnedDataFrameIterator { impl OwnedDataFrameIterator { pub fn new(df: polars::frame::DataFrame, compat_level: CompatLevel) -> Self { let schema = df.schema().to_arrow(compat_level); - let data_type = ArrowDataType::Struct(schema.fields); + // TODO: changed when bumping to 0.43.1, might need refactor + let data_type = ArrowDataType::Struct(schema.iter_values().map(|x| x.clone()).collect()); let vs = df.get_columns().to_vec(); Self { columns: vs, @@ -164,7 +164,7 @@ impl RPolarsDataFrame { .0 .clone() .with_row_index( - robj_to!(String, name)?.as_str(), + robj_to!(String, name)?.into(), robj_to!(Option, u32, offset)?, ) .map_err(polars_to_rpolars_err)? @@ -221,7 +221,8 @@ impl RPolarsDataFrame { pub fn schema(&self) -> List { let mut l = self.dtypes(); - l.set_names(self.0.get_column_names()).unwrap(); + let nms = self.0.get_column_names().into_iter().map(|x| x.as_str()); + l.set_names(nms).unwrap(); l } @@ -327,7 +328,7 @@ impl RPolarsDataFrame { pub fn to_struct(&self, name: Robj) -> RResult { use pl::IntoSeries; - let name = robj_to!(Option, str, name)?.unwrap_or(""); + let name = robj_to!(str, name)?.into(); let s = self.0.clone().into_struct(name); Ok(s.into_series().into()) } @@ -354,8 +355,9 @@ impl RPolarsDataFrame { pub fn export_stream(&self, stream_ptr: &str, compat_level: Robj) { let compat_level = robj_to!(CompatLevel, compat_level).unwrap(); let schema = self.0.schema().to_arrow(compat_level); - let data_type = ArrowDataType::Struct(schema.fields); - let field = ArrowField::new("", data_type, false); + // TODO: changed when bumping to 0.43.1, might need refactor + let data_type = ArrowDataType::Struct(schema.iter_values().map(|x| x.clone()).collect()); + let field = ArrowField::new("".into(), data_type, false); let iter_boxed = Box::new(OwnedDataFrameIterator::new(self.0.clone(), compat_level)); let mut stream = arrow::ffi::export_iterator(iter_boxed, field); @@ -393,8 +395,14 @@ impl RPolarsDataFrame { ) -> RResult { use polars::prelude::UnpivotDF; let args = UnpivotArgsIR { - on: strings_to_smartstrings(robj_to!(Vec, String, on)?), - index: strings_to_smartstrings(robj_to!(Vec, String, index)?), + on: robj_to!(Vec, String, on)? + .into_iter() + .map(|x| x.into()) + .collect(), + index: robj_to!(Vec, String, index)? + .into_iter() + .map(|x| x.into()) + .collect(), value_name: robj_to!(Option, String, value_name)?.map(|s| s.into()), variable_name: robj_to!(Option, String, variable_name)?.map(|s| s.into()), }; diff --git a/src/rust/src/rdataframe/read_csv.rs b/src/rust/src/rdataframe/read_csv.rs index 5c3af1735..9e16a67cf 100644 --- a/src/rust/src/rdataframe/read_csv.rs +++ b/src/rust/src/rdataframe/read_csv.rs @@ -22,18 +22,20 @@ use polars::prelude::LazyFileListReader; #[extendr] impl RPolarsRNullValues { pub fn new_all_columns(x: String) -> Self { - RPolarsRNullValues(pl::NullValues::AllColumnsSingle(x)) + RPolarsRNullValues(pl::NullValues::AllColumnsSingle(x.into())) } pub fn new_columns(x: Vec) -> Self { - RPolarsRNullValues(pl::NullValues::AllColumns(x)) + RPolarsRNullValues(pl::NullValues::AllColumns( + x.into_iter().map(|xi| xi.into()).collect(), + )) } pub fn new_named(robj: Robj) -> Self { let null_markers = robj.as_str_iter().expect("must be str"); let column_names = robj.names().expect("names were missing"); - let key_val_pair: Vec<(String, String)> = column_names + let key_val_pair: Vec<(pl::PlSmallStr, pl::PlSmallStr)> = column_names .zip(null_markers) - .map(|(k, v)| (k.to_owned(), v.to_owned())) + .map(|(k, v)| (k.into(), v.into())) .collect(); RPolarsRNullValues(pl::NullValues::Named(key_val_pair)) } @@ -96,7 +98,7 @@ pub fn new_from_csv( let schema = dtv.map(|some_od| { let fields = some_od.0.iter().map(|(name, dtype)| { if let Some(sname) = name { - pl::Field::new(sname, dtype.clone()) + pl::Field::new(sname.into(), dtype.clone()) } else { todo!("missing column name for dtype not implemented"); } @@ -114,7 +116,7 @@ pub fn new_from_csv( .with_cache(robj_to!(bool, cache)?) .with_dtype_overwrite(schema.map(|schema| std::sync::Arc::new(schema))) .with_low_memory(robj_to!(bool, low_memory)?) - .with_comment_prefix(robj_to!(Option, str, comment_prefix)?) + .with_comment_prefix(robj_to!(Option, str, comment_prefix)?.map(|x| x.into())) .with_quote_char(robj_to!(Option, Utf8Byte, quote_char)?) .with_eol_char(robj_to!(Utf8Byte, eol_char)?) .with_rechunk(robj_to!(bool, rechunk)?) diff --git a/src/rust/src/rdataframe/read_ipc.rs b/src/rust/src/rdataframe/read_ipc.rs index 5185be59a..543e41c82 100644 --- a/src/rust/src/rdataframe/read_ipc.rs +++ b/src/rust/src/rdataframe/read_ipc.rs @@ -13,7 +13,6 @@ pub fn import_arrow_ipc( rechunk: Robj, row_name: Robj, row_index: Robj, - memory_map: Robj, hive_partitioning: Robj, hive_schema: Robj, try_parse_hive_dates: Robj, @@ -38,10 +37,9 @@ pub fn import_arrow_ipc( }) }) .transpose()?, - memory_map: robj_to!(bool, memory_map)?, cloud_options: None, hive_options, - include_file_paths: robj_to!(Option, String, include_file_paths)?.map(Arc::from), + include_file_paths: robj_to!(Option, String, include_file_paths)?.map(|x| x.into()), }; let lf = LazyFrame::scan_ipc(robj_to!(String, path)?, args) .map_err(crate::rpolarserr::polars_to_rpolars_err)?; diff --git a/src/rust/src/rdataframe/read_parquet.rs b/src/rust/src/rdataframe/read_parquet.rs index 8cb03b6c4..f147d8d48 100644 --- a/src/rust/src/rdataframe/read_parquet.rs +++ b/src/rust/src/rdataframe/read_parquet.rs @@ -52,7 +52,7 @@ pub fn new_from_parquet( use_statistics: robj_to!(bool, use_statistics)?, hive_options, glob: robj_to!(bool, glob)?, - include_file_paths: robj_to!(Option, String, include_file_paths)?.map(Arc::from), + include_file_paths: robj_to!(Option, String, include_file_paths)?.map(|x| x.into()), }; pl::LazyFrame::scan_parquet(path, args) diff --git a/src/rust/src/rdatatype.rs b/src/rust/src/rdatatype.rs index bdf70764e..81678433e 100644 --- a/src/rust/src/rdatatype.rs +++ b/src/rust/src/rdatatype.rs @@ -8,7 +8,6 @@ use polars_core::prelude::QuantileInterpolOptions; use crate::rpolarserr::{polars_to_rpolars_err, rerr, RPolarsErr, RResult, WithRctx}; use crate::utils::collect_hinted_result; use crate::utils::robj_to_rchoice; -use crate::utils::wrappers::null_to_opt; use pl::UniqueKeepStrategy; use polars::prelude::AsofStrategy; use std::num::NonZeroUsize; @@ -97,14 +96,14 @@ impl RPolarsDataType { let s = robjname2series(categories, "").unwrap(); let ca = s.str()?; let categories = ca.downcast_iter().next().unwrap().clone(); - Ok(RPolarsDataType(pl::datatypes::create_enum_data_type( + Ok(RPolarsDataType(pl::datatypes::create_enum_dtype( categories, ))) } - pub fn new_datetime(tu: Robj, tz: Nullable) -> RResult { - robj_to!(timeunit, tu) - .map(|dt| RPolarsDataType(pl::DataType::Datetime(dt, null_to_opt(tz)))) + pub fn new_datetime(tu: Robj, tz: Robj) -> RResult { + let tz = robj_to!(Option, String, tz)?.map(|x| x.into()); + robj_to!(timeunit, tu).map(|dt| RPolarsDataType(pl::DataType::Datetime(dt, tz))) } pub fn new_duration(tu: Robj) -> RResult { @@ -452,7 +451,6 @@ pub fn robj_to_window_mapping(robj: Robj) -> RResult { pub fn literal_to_any_value(litval: pl::LiteralValue) -> RResult> { use pl::AnyValue as av; use pl::LiteralValue as lv; - use smartstring::alias::String as SString; match litval { lv::Boolean(x) => Ok(av::Boolean(x)), //lv::Datetime(datetime, unit) => Ok(av::Datetime(datetime, unit, &None)), #check how to convert @@ -475,12 +473,7 @@ pub fn literal_to_any_value(litval: pl::LiteralValue) -> RResult Ok(av::UInt64(x)), lv::UInt8(x) => Ok(av::UInt8(x)), // lv::Utf8(x) => Ok(av::Utf8(x.as_str())), - lv::String(x) => { - let mut s = SString::new(); - - s.push_str(x.as_str()); - Ok(av::StringOwned(s)) - } + lv::String(x) => Ok(av::StringOwned(x)), x => rerr().notachoice(format!("cannot convert LiteralValue {:?} to AnyValue", x)), } } @@ -741,8 +734,8 @@ pub fn robj_to_statistics_options(robj: Robj) -> RResult pub fn robj_to_wrap_schema(robj: Robj) -> RResult> { use pl::Schema; - let mut schema = Schema::new(); let hm = robj.as_list().unwrap().into_hashmap(); + let mut schema = Schema::with_capacity(hm.capacity()); for (key, value) in hm.into_iter() { let dt = crate::utils::robj_to_datatype(value)?; diff --git a/src/rust/src/rlib.rs b/src/rust/src/rlib.rs index 10df4c0e7..86190b638 100644 --- a/src/rust/src/rlib.rs +++ b/src/rust/src/rlib.rs @@ -6,61 +6,50 @@ use crate::RFnSignature; use crate::CONFIG; use extendr_api::prelude::*; use polars::chunked_array::ops::SortMultipleOptions; +use polars::lazy::dsl; use polars::prelude as pl; use std::result::Result; #[extendr] fn min_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::min_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::min_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] fn max_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::max_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::max_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] fn sum_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::sum_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::sum_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] fn mean_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::mean_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::mean_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] fn all_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::all_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::all_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] fn any_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::any_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::any_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] @@ -87,7 +76,7 @@ fn concat_str(dotdotdot: Robj, separator: Robj, ignore_nulls: Robj) -> RResult RResult { - Ok(RPolarsExpr(polars::lazy::prelude::date_range( + Ok(RPolarsExpr(dsl::date_range( robj_to!(PLExprCol, start)?, robj_to!(PLExprCol, end)?, pl::Duration::parse(interval), @@ -97,7 +86,7 @@ fn date_range(start: Robj, end: Robj, interval: &str, closed: Robj) -> RResult RResult { - Ok(RPolarsExpr(polars::lazy::prelude::date_ranges( + Ok(RPolarsExpr(dsl::date_ranges( robj_to!(PLExprCol, start)?, robj_to!(PLExprCol, end)?, pl::Duration::parse(interval), @@ -114,13 +103,13 @@ fn datetime_range( time_unit: Robj, time_zone: Robj, ) -> RResult { - Ok(RPolarsExpr(polars::lazy::prelude::datetime_range( + Ok(RPolarsExpr(dsl::datetime_range( robj_to!(PLExprCol, start)?, robj_to!(PLExprCol, end)?, pl::Duration::parse(interval), robj_to!(ClosedWindow, closed)?, robj_to!(Option, timeunit, time_unit)?, - robj_to!(Option, String, time_zone)?, + robj_to!(Option, String, time_zone)?.map(|x| x.into()), ))) } @@ -133,13 +122,13 @@ fn datetime_ranges( time_unit: Robj, time_zone: Robj, ) -> RResult { - Ok(RPolarsExpr(polars::lazy::prelude::datetime_ranges( + Ok(RPolarsExpr(dsl::datetime_ranges( robj_to!(PLExprCol, start)?, robj_to!(PLExprCol, end)?, pl::Duration::parse(interval), robj_to!(ClosedWindow, closed)?, robj_to!(Option, timeunit, time_unit)?, - robj_to!(Option, String, time_zone)?, + robj_to!(Option, String, time_zone)?.map(|x| x.into()), ))) } @@ -284,19 +273,17 @@ pub fn duration( time_unit: Robj, ) -> RResult { let args = pl::DurationArgs { - weeks: robj_to!(Option, PLExprCol, weeks)?.unwrap_or(polars::lazy::dsl::lit(0)), - days: robj_to!(Option, PLExprCol, days)?.unwrap_or(polars::lazy::dsl::lit(0)), - hours: robj_to!(Option, PLExprCol, hours)?.unwrap_or(polars::lazy::dsl::lit(0)), - minutes: robj_to!(Option, PLExprCol, minutes)?.unwrap_or(polars::lazy::dsl::lit(0)), - seconds: robj_to!(Option, PLExprCol, seconds)?.unwrap_or(polars::lazy::dsl::lit(0)), - milliseconds: robj_to!(Option, PLExprCol, milliseconds)? - .unwrap_or(polars::lazy::dsl::lit(0)), - microseconds: robj_to!(Option, PLExprCol, microseconds)? - .unwrap_or(polars::lazy::dsl::lit(0)), - nanoseconds: robj_to!(Option, PLExprCol, nanoseconds)?.unwrap_or(polars::lazy::dsl::lit(0)), + weeks: robj_to!(Option, PLExprCol, weeks)?.unwrap_or(dsl::lit(0)), + days: robj_to!(Option, PLExprCol, days)?.unwrap_or(dsl::lit(0)), + hours: robj_to!(Option, PLExprCol, hours)?.unwrap_or(dsl::lit(0)), + minutes: robj_to!(Option, PLExprCol, minutes)?.unwrap_or(dsl::lit(0)), + seconds: robj_to!(Option, PLExprCol, seconds)?.unwrap_or(dsl::lit(0)), + milliseconds: robj_to!(Option, PLExprCol, milliseconds)?.unwrap_or(dsl::lit(0)), + microseconds: robj_to!(Option, PLExprCol, microseconds)?.unwrap_or(dsl::lit(0)), + nanoseconds: robj_to!(Option, PLExprCol, nanoseconds)?.unwrap_or(dsl::lit(0)), time_unit: robj_to!(timeunit, time_unit)?, }; - Ok(polars::lazy::dsl::duration(args).into()) + Ok(dsl::duration(args).into()) } #[extendr] @@ -317,15 +304,15 @@ pub fn datetime( year: robj_to!(PLExprCol, year)?, month: robj_to!(PLExprCol, month)?, day: robj_to!(PLExprCol, day)?, - hour: robj_to!(Option, PLExprCol, hour)?.unwrap_or(polars::lazy::dsl::lit(0)), - minute: robj_to!(Option, PLExprCol, minute)?.unwrap_or(polars::lazy::dsl::lit(0)), - second: robj_to!(Option, PLExprCol, second)?.unwrap_or(polars::lazy::dsl::lit(0)), - microsecond: robj_to!(Option, PLExprCol, microsecond)?.unwrap_or(polars::lazy::dsl::lit(0)), + hour: robj_to!(Option, PLExprCol, hour)?.unwrap_or(dsl::lit(0)), + minute: robj_to!(Option, PLExprCol, minute)?.unwrap_or(dsl::lit(0)), + second: robj_to!(Option, PLExprCol, second)?.unwrap_or(dsl::lit(0)), + microsecond: robj_to!(Option, PLExprCol, microsecond)?.unwrap_or(dsl::lit(0)), time_unit: robj_to!(timeunit, time_unit)?, - time_zone: robj_to!(Option, String, time_zone)?, + time_zone: robj_to!(Option, String, time_zone)?.map(|x| x.into()), ambiguous: robj_to!(PLExpr, ambiguous)?, }; - Ok(polars::lazy::dsl::datetime(args).into()) + Ok(dsl::datetime(args).into()) } #[extendr] @@ -363,7 +350,7 @@ pub fn int_range(start: Robj, end: Robj, step: i64, dtype: Robj) -> RResult RResult RResult RResult { let names = robj_to!(Vec, String, names)?; - Ok(pl::Expr::Field( - names - .into_iter() - .map(|name| pl::Arc::from(name.as_str())) - .collect(), - ) - .into()) + Ok(pl::Expr::Field(names.into_iter().map(|name| name.into()).collect()).into()) } extendr_module! { diff --git a/src/rust/src/series.rs b/src/rust/src/series.rs index 29e9f4f0c..461683025 100644 --- a/src/rust/src/series.rs +++ b/src/rust/src/series.rs @@ -81,7 +81,7 @@ impl From<&RPolarsExpr> for pl::PolarsResult { .map(|df| { df.select_at_idx(0) .cloned() - .unwrap_or_else(|| pl::Series::new_empty("", &pl::DataType::Null)) + .unwrap_or_else(|| pl::Series::new_empty("".into(), &pl::DataType::Null)) .into() }) } @@ -118,7 +118,7 @@ impl RPolarsSeries { } //any mut method exposed in R suffixed _mut pub fn rename_mut(&mut self, name: &str) { - self.0.rename(name); + self.0.rename(name.into()); } //any other method or trait method in alphabetical order @@ -169,7 +169,7 @@ impl RPolarsSeries { normalize: bool, ) -> std::result::Result { self.0 - .value_counts(sort, parallel, name, normalize) + .value_counts(sort, parallel, name.into(), normalize) .map(RPolarsDataFrame) .map_err(|err| format!("in value_counts: {:?}", err)) } @@ -315,7 +315,7 @@ impl RPolarsSeries { pub fn alias(&self, name: &str) -> RPolarsSeries { let mut s = self.0.clone(); - s.rename(name); + s.rename(name.into()); RPolarsSeries(s) } @@ -512,11 +512,11 @@ impl RPolarsSeries { .cast(&DataType::UInt8) .map_err(polars_to_rpolars_err)? .mean_reduce() - .into_series(""); + .into_series("".into()); RPolarsSeries(s).to_r("double") } DataType::Datetime(_, _) | DataType::Duration(_) | DataType::Time => { - let s = self.0.mean_reduce().into_series(""); + let s = self.0.mean_reduce().into_series("".into()); RPolarsSeries(s).to_r("double") } _ => Ok(self.0.mean().into()), @@ -532,7 +532,7 @@ impl RPolarsSeries { .map_err(polars_to_rpolars_err)? .median_reduce() .map_err(polars_to_rpolars_err)? - .into_series(""); + .into_series("".into()); RPolarsSeries(s).to_r("double") } DataType::Datetime(_, _) | DataType::Duration(_) | DataType::Time => { @@ -540,7 +540,7 @@ impl RPolarsSeries { .0 .median_reduce() .map_err(polars_to_rpolars_err)? - .into_series(""); + .into_series("".into()); RPolarsSeries(s).to_r("double") } _ => Ok(self.0.median().into()), @@ -552,7 +552,7 @@ impl RPolarsSeries { self.0 .min_reduce() .map_err(polars_to_rpolars_err)? - .into_series(""), + .into_series("".into()), ) .to_r("double") } @@ -562,7 +562,7 @@ impl RPolarsSeries { self.0 .max_reduce() .map_err(polars_to_rpolars_err)? - .into_series(""), + .into_series("".into()), ) .to_r("double") } @@ -572,7 +572,7 @@ impl RPolarsSeries { self.0 .sum_reduce() .map_err(polars_to_rpolars_err)? - .into_series(""), + .into_series("".into()), ) .to_r("double") } @@ -584,7 +584,7 @@ impl RPolarsSeries { self.0 .std_reduce(ddof) .map_err(polars_to_rpolars_err)? - .into_series(""), + .into_series("".into()), ) .to_r("double") } @@ -596,7 +596,7 @@ impl RPolarsSeries { self.0 .var_reduce(ddof) .map_err(polars_to_rpolars_err)? - .into_series(""), + .into_series("".into()), ) .to_r("double") } @@ -631,7 +631,7 @@ impl RPolarsSeries { pub fn export_stream(&self, stream_ptr: &str, compat_level: Robj) { let compat_level = robj_to!(CompatLevel, compat_level).unwrap(); let data_type = self.0.dtype().to_arrow(compat_level); - let field = pl::ArrowField::new("", data_type, false); + let field = pl::ArrowField::new("".into(), data_type, false); let iter_boxed = Box::new(OwnedSeriesIterator::new(self.0.clone(), compat_level)); let mut stream = arrow::ffi::export_iterator(iter_boxed, field); @@ -647,7 +647,7 @@ impl RPolarsSeries { } pub fn import_stream(name: Robj, stream_ptr: Robj) -> RResult { - let name = robj_to!(str, name)?; + let name: PlSmallStr = robj_to!(str, name)?.into(); let stream_in_ptr_addr = robj_to!(usize, stream_ptr)?; let stream_in_ptr = unsafe { Box::from_raw(stream_in_ptr_addr as *mut arrow::ffi::ArrowArrayStream) }; @@ -665,10 +665,10 @@ impl RPolarsSeries { } pub fn from_arrow_array_robj(name: Robj, array: Robj) -> Result { - let name = robj_to!(str, name)?; + let name: PlSmallStr = robj_to!(str, name)?.into(); let arr = crate::arrow_interop::to_rust::arrow_array_to_rust(array)?; - match arr.data_type() { + match arr.dtype() { ArrowDataType::LargeList(_) => { let array = arr.as_any().downcast_ref::().unwrap(); @@ -681,7 +681,7 @@ impl RPolarsSeries { } previous = o; } - let mut out = unsafe { ListChunked::from_chunks(name, vec![arr]) }; + let mut out = unsafe { ListChunked::from_chunks(name.into(), vec![arr]) }; if fast_explode { out.set_fast_explode() } diff --git a/tests/testthat/_snaps/lazy.md b/tests/testthat/_snaps/lazy.md index 7d1810ac5..8591cf0d8 100644 --- a/tests/testthat/_snaps/lazy.md +++ b/tests/testthat/_snaps/lazy.md @@ -10,7 +10,7 @@ FILTER [(col("a")) == (2)] FROM DF ["a", "b"]; PROJECT */2 COLUMNS; SELECTION: None -# LazyFrame serialize/deseialize +# LazyFrame serialize/deserialize Code jsonlite::prettify(json) @@ -51,13 +51,11 @@ ] }, "schema": { - "inner": { + "fields": { "a": "Int32", "b": "String" } - }, - "output_schema": null, - "filter": null + } } }, "predicate": { diff --git a/tests/testthat/test-ipc.R b/tests/testthat/test-ipc.R index 9034e81e3..7273494a2 100644 --- a/tests/testthat/test-ipc.R +++ b/tests/testthat/test-ipc.R @@ -34,7 +34,6 @@ test_that("Test reading data from Apache Arrow IPC", { expect_grepl_error(pl$scan_ipc(tmpf, rechunk = list())) expect_grepl_error(pl$scan_ipc(tmpf, row_index_name = c("x", "y"))) expect_grepl_error(pl$scan_ipc(tmpf, row_index_name = "name", row_index_offset = data.frame())) - expect_grepl_error(pl$scan_ipc(tmpf, memory_map = NULL)) }) @@ -89,27 +88,6 @@ patrick::with_parameters_test_that("input/output DataFrame as raw vector", ) -test_that("memory_map", { - tmpf = tempfile(fileext = ".arrow") - on.exit(unlink(tmpf)) - pl$DataFrame(x = 1)$write_ipc(tmpf, compression = "uncompressed") - - df = pl$read_ipc(tmpf, memory_map = TRUE) - - expect_true( - df$equals(pl$DataFrame(x = 1)) - ) - - # On Windows, the file is still open so overwriting it is not allowed - skip_on_os("windows") - pl$DataFrame(y = 2)$write_ipc(tmpf, compression = "uncompressed") - - expect_true( - df$equals(pl$DataFrame(x = 2)) - ) -}) - - test_that("scanning from hive partition works", { skip_if_not_installed("arrow") skip_if_not_installed("withr") diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R index 94c596ca3..ce42b9f88 100644 --- a/tests/testthat/test-lazy.R +++ b/tests/testthat/test-lazy.R @@ -27,7 +27,7 @@ test_that("create LazyFrame", { }) -test_that("LazyFrame serialize/deseialize", { +test_that("LazyFrame serialize/deserialize", { skip_if_not_installed("jsonlite") df = pl$DataFrame( @@ -48,7 +48,7 @@ test_that("LazyFrame serialize/deseialize", { df$lazy()$select( pl$col("a")$map_elements(\(x) -abs(x)) )$serialize(), - "serialize not supported for this 'opaque' function" + "serialization not supported for this 'opaque' function" ) }) diff --git a/tests/testthat/test-sink_stream.R b/tests/testthat/test-sink_stream.R index 9427ab15a..c49335b10 100644 --- a/tests/testthat/test-sink_stream.R +++ b/tests/testthat/test-sink_stream.R @@ -49,27 +49,27 @@ test_that("Test sinking data to IPC file", { on.exit(unlink(tmpf)) lf$sink_ipc(tmpf) expect_grepl_error(lf$sink_ipc(tmpf, compression = "rar")) - expect_identical(pl$scan_ipc(tmpf, memory_map = FALSE)$collect()$to_data_frame(), rdf) + expect_identical(pl$scan_ipc(tmpf)$collect()$to_data_frame(), rdf) # update with new data lf$slice(5, 5)$sink_ipc(tmpf) expect_equal( - pl$scan_ipc(tmpf, memory_map = FALSE)$collect()$to_data_frame(), + pl$scan_ipc(tmpf)$collect()$to_data_frame(), lf$slice(5, 5)$collect()$to_data_frame() ) lf$sink_ipc(tmpf) # from another process via rcall rdf_callr = callr::r(\(tmpf) { - polars::pl$scan_ipc(tmpf, memory_map = FALSE)$collect()$to_data_frame() + polars::pl$scan_ipc(tmpf)$collect()$to_data_frame() }, args = list(tmpf = tmpf)) expect_identical(rdf_callr, rdf) # from another process via rpool f_ipc_to_s = \(s) { - polars::pl$scan_ipc(s$to_r(), memory_map = FALSE)$ + polars::pl$scan_ipc(s$to_r())$ select(polars::pl$struct(polars::pl$all()))$ collect()$ to_series()