diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 42a1de62a..f6be83bb5 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -1270,3 +1270,99 @@ DataFrame_join_asof = function( tolerance = tolerance )$collect() } + + + + +#' @inherit LazyFrame_melt +#' @keywords DataFrame +#' +#' @return A new `DataFrame` +#' +#' @examples +#' df = pl$DataFrame( +#' a = c("x", "y", "z"), +#' b = c(1, 3, 5), +#' c = c(2, 4, 6) +#' ) +#' df$melt(id_vars = "a", value_vars = c("b", "c")) +DataFrame_melt = function( + id_vars = NULL, + value_vars = NULL, + variable_name = NULL, + value_name = NULL) { + .pr$DataFrame$melt( + self, id_vars %||% character(), value_vars %||% character(), + value_name, variable_name + ) |> unwrap("in $melt( ): ") +} + + + +#' Create a spreadsheet-style pivot table as a DataFrame. +#' @param values Column values to aggregate. Can be multiple columns if the `columns` +#' arguments contains multiple columns as well. +#' @param index One or multiple keys to group by. +#' @param columns Name of the column(s) whose values will be used as the header of the output +#' DataFrame. +#' @param aggregate_function +#' String naming Expr to aggregate with, or an Expr e.g. `pl$element()$sum()`, +#' examples of strings:'first', 'sum', 'max', 'min', 'mean', 'median', 'last', 'count' +#' @param maintain_order Sort the grouped keys so that the output order is predictable. +#' @param sort_columns Sort the transposed columns by name. Default is by order of discovery. +#' @param separator Used as separator/delimiter in generated column names. +#' +#' @return DataFrame +#' @keywords DataFrame +#' @examples +#' df = pl$DataFrame( +#' foo = c("one", "one", "one", "two", "two", "two"), +#' bar = c("A", "B", "C", "A", "B", "C"), +#' baz = c(1, 2, 3, 4, 5, 6) +#' ) +#' df$pivot( +#' values = "baz", index = "foo", columns = "bar", aggregate_function = "first" +#' ) +#' +#' +#' # Run an expression as aggregation function +#' df = pl$DataFrame( +#' col1 = c("a", "a", "a", "b", "b", "b"), +#' col2 = c("x", "x", "x", "x", "y", "y"), +#' col3 = c(6, 7, 3, 2, 5, 7) +#' ) +#' df$pivot( +#' index = "col1", +#' columns = "col2", +#' values = "col3", +#' aggregate_function = pl$element()$tanh()$mean() +#' ) +DataFrame_pivot = function( + values, + index, + columns, + aggregate_function = NULL, + maintain_order = TRUE, + sort_columns = FALSE, + separator = "_") { + pcase( + # if string, call it on Expr-method of pl$element() and capture any Error as Result + is_string(aggregate_function), result(`$.Expr`(pl$element(), aggregate_function)()), + + # Expr or NULL pass as is + is.null(aggregate_function) || inherits(aggregate_function, "Expr"), Ok(aggregate_function), + + # anything else pass err + or_else = Err(" is neither a string, NULL or an Expr") + ) |> + # add param context + map_err(\(err_msg) paste( + "param [aggregate_function] being ", str_string(aggregate_function), err_msg + )) |> + # run pivot when valid aggregate_expr + and_then(\(aggregate_expr) .pr$DataFrame$pivot_expr( + self, values, index, columns, maintain_order, sort_columns, aggregate_expr, separator + )) |> + # unwrap and add method context name + unwrap("in $pivot():") +} diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 58736603b..ea6f75ab1 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -115,6 +115,10 @@ DataFrame$estimated_size <- function() .Call(wrap__DataFrame__estimated_size, se DataFrame$null_count <- function() .Call(wrap__DataFrame__null_count, self) +DataFrame$melt <- function(id_vars, value_vars, value_name, variable_name) .Call(wrap__DataFrame__melt, self, id_vars, value_vars, value_name, variable_name) + +DataFrame$pivot_expr <- function(values, index, columns, maintain_order, sort_columns, aggregate_expr, separator) .Call(wrap__DataFrame__pivot_expr, self, values, index, columns, maintain_order, sort_columns, aggregate_expr, separator) + #' @export `$.DataFrame` <- function (self, name) { func <- DataFrame[[name]]; environment(func) <- environment(); func } @@ -887,6 +891,8 @@ LazyFrame$join <- function(other, left_on, right_on, how, suffix, allow_parallel LazyFrame$sort_by_exprs <- function(by, descending, nulls_last) .Call(wrap__LazyFrame__sort_by_exprs, self, by, descending, nulls_last) +LazyFrame$melt <- function(id_vars, value_vars, value_name, variable_name, streamable) .Call(wrap__LazyFrame__melt, self, id_vars, value_vars, value_name, variable_name, streamable) + #' @export `$.LazyFrame` <- function (self, name) { func <- LazyFrame[[name]]; environment(func) <- environment(); func } diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index d456ef930..b7a77f1fa 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -752,3 +752,49 @@ LazyFrame_join_asof = function( ) |> unwrap("in join_asof( ):") } + + +#' Unpivot a Frame from wide to long format +#' +#' @param id_vars char vec, columns to use as identifier variables. +#' @param value_vars char vec, Values to use as identifier variables. +#' If `value_vars` is empty all columns that are not in `id_vars` will be used. +#' @param variable_name string, Name to give to the `variable` column. Defaults to "variable" +#' @param value_name string, Name to give to the `value` column. Defaults to "value" +#' @param ... not used, forces to name streamable arg +#' @param streamable Allow this node to run in the streaming engine. +#' If this runs in streaming, the output of the melt operation +#' will not have a stable ordering. +#' +#' @details +#' Optionally leaves identifiers set. +#' +#' This function is useful to massage a DataFrame into a format where one or more +#' columns are identifier variables (id_vars), while all other columns, considered +#' measured variables (value_vars), are "unpivoted" to the row axis, leaving just +#' two non-identifier columns, 'variable' and 'value'. +#' +#' @keywords LazyFrame +#' +#' @return A new `LazyFrame` +#' +#' @examples +#' lf = pl$DataFrame( +#' a = c("x", "y", "z"), +#' b = c(1, 3, 5), +#' c = c(2, 4, 6) +#' )$lazy() +#' lf$melt(id_vars = "a", value_vars = c("b", "c"))$collect() +#' +LazyFrame_melt = function( + id_vars = NULL, + value_vars = NULL, + variable_name = NULL, + value_name = NULL, + ..., + streamable = TRUE) { + .pr$LazyFrame$melt( + self, id_vars %||% character(), value_vars %||% character(), + value_name, variable_name, streamable + ) |> unwrap("in $melt( ): ") +} diff --git a/man/DataFrame_melt.Rd b/man/DataFrame_melt.Rd new file mode 100644 index 000000000..ba27aae92 --- /dev/null +++ b/man/DataFrame_melt.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_melt} +\alias{DataFrame_melt} +\title{Unpivot a Frame from wide to long format} +\usage{ +DataFrame_melt( + id_vars = NULL, + value_vars = NULL, + variable_name = NULL, + value_name = NULL +) +} +\arguments{ +\item{id_vars}{char vec, columns to use as identifier variables.} + +\item{value_vars}{char vec, Values to use as identifier variables. +If \code{value_vars} is empty all columns that are not in \code{id_vars} will be used.} + +\item{variable_name}{string, Name to give to the \code{variable} column. Defaults to "variable"} + +\item{value_name}{string, Name to give to the \code{value} column. Defaults to "value"} +} +\value{ +A new \code{DataFrame} +} +\description{ +Unpivot a Frame from wide to long format +} +\details{ +Optionally leaves identifiers set. + +This function is useful to massage a DataFrame into a format where one or more +columns are identifier variables (id_vars), while all other columns, considered +measured variables (value_vars), are "unpivoted" to the row axis, leaving just +two non-identifier columns, 'variable' and 'value'. +} +\examples{ +df = pl$DataFrame( + a = c("x", "y", "z"), + b = c(1, 3, 5), + c = c(2, 4, 6) +) +df$melt(id_vars = "a", value_vars = c("b", "c")) +} +\keyword{DataFrame} diff --git a/man/DataFrame_pivot.Rd b/man/DataFrame_pivot.Rd new file mode 100644 index 000000000..bc144feb2 --- /dev/null +++ b/man/DataFrame_pivot.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_pivot} +\alias{DataFrame_pivot} +\title{Create a spreadsheet-style pivot table as a DataFrame.} +\usage{ +DataFrame_pivot( + values, + index, + columns, + aggregate_function = NULL, + maintain_order = TRUE, + sort_columns = FALSE, + separator = "_" +) +} +\arguments{ +\item{values}{Column values to aggregate. Can be multiple columns if the \code{columns} +arguments contains multiple columns as well.} + +\item{index}{One or multiple keys to group by.} + +\item{columns}{Name of the column(s) whose values will be used as the header of the output +DataFrame.} + +\item{aggregate_function}{String naming Expr to aggregate with, or an Expr e.g. \code{pl$element()$sum()}, +examples of strings:'first', 'sum', 'max', 'min', 'mean', 'median', 'last', 'count'} + +\item{maintain_order}{Sort the grouped keys so that the output order is predictable.} + +\item{sort_columns}{Sort the transposed columns by name. Default is by order of discovery.} + +\item{separator}{Used as separator/delimiter in generated column names.} +} +\value{ +DataFrame +} +\description{ +Create a spreadsheet-style pivot table as a DataFrame. +} +\examples{ +df = pl$DataFrame( + foo = c("one", "one", "one", "two", "two", "two"), + bar = c("A", "B", "C", "A", "B", "C"), + baz = c(1, 2, 3, 4, 5, 6) +) +df$pivot( + values = "baz", index = "foo", columns = "bar", aggregate_function = "first" +) + + +# Run an expression as aggregation function +df = pl$DataFrame( + col1 = c("a", "a", "a", "b", "b", "b"), + col2 = c("x", "x", "x", "x", "y", "y"), + col3 = c(6, 7, 3, 2, 5, 7) +) +df$pivot( + index = "col1", + columns = "col2", + values = "col3", + aggregate_function = pl$element()$tanh()$mean() +) +} +\keyword{DataFrame} diff --git a/man/LazyFrame_melt.Rd b/man/LazyFrame_melt.Rd new file mode 100644 index 000000000..204f69f97 --- /dev/null +++ b/man/LazyFrame_melt.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_melt} +\alias{LazyFrame_melt} +\title{Unpivot a Frame from wide to long format} +\usage{ +LazyFrame_melt( + id_vars = NULL, + value_vars = NULL, + variable_name = NULL, + value_name = NULL, + ..., + streamable = TRUE +) +} +\arguments{ +\item{id_vars}{char vec, columns to use as identifier variables.} + +\item{value_vars}{char vec, Values to use as identifier variables. +If \code{value_vars} is empty all columns that are not in \code{id_vars} will be used.} + +\item{variable_name}{string, Name to give to the \code{variable} column. Defaults to "variable"} + +\item{value_name}{string, Name to give to the \code{value} column. Defaults to "value"} + +\item{...}{not used, forces to name streamable arg} + +\item{streamable}{Allow this node to run in the streaming engine. +If this runs in streaming, the output of the melt operation +will not have a stable ordering.} +} +\value{ +A new \code{LazyFrame} +} +\description{ +Unpivot a Frame from wide to long format +} +\details{ +Optionally leaves identifiers set. + +This function is useful to massage a DataFrame into a format where one or more +columns are identifier variables (id_vars), while all other columns, considered +measured variables (value_vars), are "unpivoted" to the row axis, leaving just +two non-identifier columns, 'variable' and 'value'. +} +\examples{ +lf = pl$DataFrame( + a = c("x", "y", "z"), + b = c(1, 3, 5), + c = c(2, 4, 6) +)$lazy() +lf$melt(id_vars = "a", value_vars = c("b", "c"))$collect() + +} +\keyword{LazyFrame} diff --git a/src/rust/src/conversion.rs b/src/rust/src/conversion.rs new file mode 100644 index 000000000..62a656579 --- /dev/null +++ b/src/rust/src/conversion.rs @@ -0,0 +1,8 @@ +use smartstring::alias::String as SmartString; +pub(crate) fn strings_to_smartstrings(container: I) -> Vec +where + I: IntoIterator, + S: AsRef, +{ + container.into_iter().map(|s| s.as_ref().into()).collect() +} diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 69f5f239c..dcc7bde3c 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -1,4 +1,5 @@ use crate::concurrent::{handle_thread_r_requests, PolarsBackgroundHandle}; +use crate::conversion::strings_to_smartstrings; use crate::lazy::dsl::*; use crate::rdatatype::new_asof_strategy; use crate::rdatatype::new_join_type; @@ -9,6 +10,7 @@ use crate::utils::wrappers::null_to_opt; use crate::utils::{r_result_list, try_f64_into_usize}; use extendr_api::prelude::*; use polars::chunked_array::object::AsOfOptions; +use polars::frame::explode::MeltArgs; use polars::frame::hash_join::JoinType; use polars::prelude as pl; @@ -335,6 +337,24 @@ impl LazyFrame { let nulls_last = robj_to!(bool, nulls_last)?; Ok(ldf.sort_by_exprs(exprs, descending, nulls_last).into()) } + + fn melt( + &self, + id_vars: Robj, + value_vars: Robj, + value_name: Robj, + variable_name: Robj, + streamable: Robj, + ) -> Result { + let args = MeltArgs { + id_vars: strings_to_smartstrings(robj_to!(Vec, String, id_vars)?), + value_vars: strings_to_smartstrings(robj_to!(Vec, String, value_vars)?), + value_name: robj_to!(Option, String, value_name)?.map(|s| s.into()), + variable_name: robj_to!(Option, String, variable_name)?.map(|s| s.into()), + streamable: robj_to!(bool, streamable)?, + }; + Ok(self.0.clone().melt(args).into()) + } } #[derive(Clone)] diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 55d0f0e3c..477ba5f7a 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -13,6 +13,7 @@ pub mod concurrent; pub mod lazy; pub mod arrow_interop; +pub mod conversion; pub mod conversion_r_to_s; pub mod conversion_s_to_r; pub mod rdataframe; diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index 3afd03cc6..72e466b7c 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -25,6 +25,10 @@ use polars_core::utils::arrow; use crate::utils::{collect_hinted_result, r_result_list}; +use crate::conversion::strings_to_smartstrings; +use polars::frame::explode::MeltArgs; +use polars::prelude::pivot::{pivot, pivot_stable}; + pub struct OwnedDataFrameIterator { columns: Vec, data_type: arrow::datatypes::DataType, @@ -345,6 +349,53 @@ impl DataFrame { pub fn null_count(&self) -> Self { self.0.clone().null_count().into() } + + fn melt( + &self, + id_vars: Robj, + value_vars: Robj, + value_name: Robj, + variable_name: Robj, + ) -> Result { + let args = MeltArgs { + id_vars: strings_to_smartstrings(robj_to!(Vec, String, id_vars)?), + value_vars: strings_to_smartstrings(robj_to!(Vec, String, value_vars)?), + value_name: robj_to!(Option, String, value_name)?.map(|s| s.into()), + variable_name: robj_to!(Option, String, variable_name)?.map(|s| s.into()), + streamable: false, + }; + let df = self.0.melt2(args).map_err(|s| s.to_string())?; + Ok(DataFrame(df)) + } + + pub fn pivot_expr( + &self, + values: Robj, + index: Robj, + columns: Robj, + maintain_order: Robj, + sort_columns: Robj, + aggregate_expr: Robj, + separator: Robj, + ) -> Result { + let fun = if robj_to!(bool, maintain_order)? { + pivot_stable + } else { + pivot + }; + + fun( + &self.0, + robj_to!(Vec, String, values)?, + robj_to!(Vec, String, index)?, + robj_to!(Vec, String, columns)?, + robj_to!(bool, sort_columns)?, + robj_to!(Option, PLExpr, aggregate_expr)?, + robj_to!(Option, str, separator)?, + ) + .map_err(|err| err.to_string()) + .map(|ok| ok.into()) + } } use crate::utils::wrappers::null_to_opt; impl DataFrame { diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs index e082af50a..164e65811 100644 --- a/src/rust/src/utils/mod.rs +++ b/src/rust/src/utils/mod.rs @@ -746,6 +746,10 @@ macro_rules! robj_to_inner { $crate::utils::robj_to_rexpr($a, true) }; + (PLExpr, $a:ident) => { + $crate::utils::robj_to_rexpr($a, true).map(|ok| ok.0) + }; + (ExprCol, $a:ident) => { $crate::utils::robj_to_rexpr($a, false) }; @@ -807,7 +811,7 @@ macro_rules! robj_to { .map_err(|err| format!("the arg [{}] {}", stringify!($a), err)) .and_then(|x: Robj| { //coerce R vectors into list - let x = if !x.is_list() && x.len() > 1 { + let x = if !x.is_list() && x.len() != 1 { extendr_api::call!("as.list", x) .map_err(|err| format!("could not coerce to list: {}", err))? } else { diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R index 3a32ebca6..dab77dc9c 100644 --- a/tests/testthat/test-dataframe.R +++ b/tests/testthat/test-dataframe.R @@ -412,17 +412,17 @@ test_that("to_Struct, unnest, to_frame, to_data_frame", { make_cases = function() { tibble::tribble( - ~.test_name, ~pola, ~base, - "max", "max", max, - "mean", "mean", mean, - "median", "median", median, - "max", "max", max, - "min", "min", min, - "std", "std", sd, - "sum", "sum", sum, - "var", "var", var, - "first", "first", function(x) head(x, 1), - "last", "last", function(x) tail(x, 1) + ~.test_name, ~pola, ~base, + "max", "max", max, + "mean", "mean", mean, + "median", "median", median, + "max", "max", max, + "min", "min", min, + "std", "std", sd, + "sum", "sum", sum, + "var", "var", var, + "first", "first", function(x) head(x, 1), + "last", "last", function(x) tail(x, 1) ) } @@ -742,3 +742,157 @@ test_that("join_asof_simple", { )$to_list() ) }) + + +test_that("melt example", { + df = pl$DataFrame( + a = c("x", "y", "z"), + b = c(1, 3, 5), + c = c(2, 4, 6) + ) + + expect_identical( + df$melt(id_vars = "a", value_vars = c("b", "c"))$to_list(), + list( + a = c("x", "y", "z", "x", "y", "z"), + variable = c("b", "b", "b", "c", "c", "c"), + value = c(1, 3, 5, 2, 4, 6) + ) + ) +}) + +test_that("melt vs data.table::melt", { + skip_if_not_installed("data.table") + pdf = pl$DataFrame( + a = c("x", "y", "z"), + b = c(1, 3, 5), + c = c(2, 4, 6) + ) + + rdf = pdf$to_data_frame() + dtt = data.table::data.table(rdf) + + melt_mod = \(...) { + data.table::melt(variable.factor = FALSE, value.factor = FALSE, ...) + } + + expect_identical( + pdf$melt(id_vars = "a", value_vars = c("b", "c"))$to_list(), + as.list(melt_mod(dtt, id.vars = "a", value_vars = c("b", "c"))) + ) + expect_identical( + pdf$melt(id_vars = c("c", "b"), value_vars = c("a"))$to_list(), + as.list(melt_mod(dtt, id.vars = c("c", "b"), value_vars = c("a"))) + ) + expect_identical( + pdf$melt(id_vars = c("a", "b"), value_vars = c("c"))$to_list(), + as.list(melt_mod(dtt, id.vars = c("a", "b"), value_vars = c("b", "c"))) + ) + + + expect_identical( + pdf$melt( + id_vars = c("a", "b"), value_vars = c("c"), value_name = "alice", variable_name = "bob" + )$to_list(), + as.list(melt_mod( + dtt, + id.vars = c("a", "b"), value_vars = c("b", "c"), value.name = "alice", variable.name = "bob" + )) + ) + + # check the check, this should not be equal + expect_error(expect_equal( + pdf$melt(id_vars = c("c", "b"), value_vars = c("a"))$to_list(), + as.list(melt_mod(dtt, id.vars = c("a", "b"), value_vars = c("c"))) + )) +}) + + + + + +test_that("pivot examples", { + df = pl$DataFrame( + foo = c("one", "one", "one", "two", "two", "two"), + bar = c("A", "B", "C", "A", "B", "C"), + baz = c(1, 2, 3, 4, 5, 6) + ) + + expect_identical( + df$pivot( + values = "baz", index = "foo", columns = "bar", aggregate_function = "first" + )$to_list(), + list(foo = c("one", "two"), A = c(1, 4), B = c(2, 5), C = c(3, 6)) + ) + + + # Run an expression as aggregation function + df = pl$DataFrame( + col1 = c("a", "a", "a", "b", "b", "b"), + col2 = c("x", "x", "x", "x", "y", "y"), + col3 = c(6, 7, 3, 2, 5, 7) + ) + + expect_equal( + df$pivot( + index = "col1", + columns = "col2", + values = "col3", + aggregate_function = pl$element()$tanh()$mean() + )$to_list(), + list( + col1 = c("a", "b"), + x = c(0.998346934093824, 0.964027580075817), + y = c(NA, 0.99995377060327) + ) + ) +}) + + +test_that("pivot args works", { + df = pl$DataFrame( + foo = c("one", "one", "one", "two", "two", "two"), + bar = c("A", "B", "C", "A", "B", "C"), + baz = c(1, 2, 3, 4, 5, 6), + jaz = 6:1 + ) + expect_identical( + df$pivot("foo", "bar", "baz")$to_list(), + list(bar = c("A", "B", "C"), `1.0` = c("one", NA, NA), `2.0` = c( + NA, + "one", NA + ), `3.0` = c(NA, NA, "one"), `4.0` = c("two", NA, NA), `5.0` = c(NA, "two", NA), `6.0` = c(NA, NA, "two")) + ) + + df = pl$DataFrame( + ann = c("one", "one", "one", "two", "two", "two"), + bob = c("A", "B", "A", "B", "A", "B"), + cat = c(1, 2, 3, 4, 5, 6) + ) + + # aggr functions + expect_identical( + df$pivot("cat", "ann", "bob", "mean")$to_list(), + list(ann = c("one", "two"), A = c(2, 5), B = c(2, 5)) + ) + expect_identical( + df$pivot("cat", "ann", "bob", pl$element()$mean())$to_list(), + df$pivot("cat", "ann", "bob", "mean")$to_list() + ) + expect_grepl_error(df$pivot("cat", "ann", "bob", 42), c("pivot", "param", "aggregate_function", "42")) + expect_grepl_error(df$pivot("cat", "ann", "bob", "dummy"), c("pivot", "dummy is not a method")) + + # maintain_order sort_columns + expect_grepl_error(df$pivot("cat", "ann", "bob", "mean", 42), c("pivot", "maintain_order", "bool")) + expect_grepl_error(df$pivot("cat", "ann", "bob", "mean", TRUE, 42), c("pivot", "sort_columns", "bool")) + + # separator + expect_identical( + names(df$pivot(c("ann", "bob"), "ann", "cat", "mean", sep = ".")), + c( + "ann", "ann.cat.1.0", "ann.cat.2.0", "ann.cat.3.0", "ann.cat.4.0", + "ann.cat.5.0", "ann.cat.6.0", "bob.cat.1.0", "bob.cat.2.0", "bob.cat.3.0", + "bob.cat.4.0", "bob.cat.5.0", "bob.cat.6.0" + ) + ) +}) diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R index 7c0884f56..f1a393627 100644 --- a/tests/testthat/test-lazy.R +++ b/tests/testthat/test-lazy.R @@ -101,17 +101,17 @@ test_that("lazy filter", { make_cases = function() { tibble::tribble( - ~.test_name, ~pola, ~base, - "max", "max", max, - "mean", "mean", mean, - "median", "median", median, - "max", "max", max, - "min", "min", min, - "std", "std", sd, - "sum", "sum", sum, - "var", "var", var, - "first", "first", function(x) head(x, 1), - "last", "last", function(x) tail(x, 1) + ~.test_name, ~pola, ~base, + "max", "max", max, + "mean", "mean", mean, + "median", "median", median, + "max", "max", max, + "min", "min", min, + "std", "std", sd, + "sum", "sum", sum, + "var", "var", var, + "first", "first", function(x) head(x, 1), + "last", "last", function(x) tail(x, 1) ) } @@ -460,3 +460,65 @@ test_that("join_asof_simple", { expect_identical(get_reg(logical_json_plan_FF, allow_p_pat), "\"allow_parallel\": Bool(false)") expect_identical(get_reg(logical_json_plan_FF, force_p_pat), "\"force_parallel\": Bool(false)") }) + +test_that("melt example", { + lf = pl$DataFrame( + a = c("x", "y", "z"), + b = c(1, 3, 5), + c = c(2, 4, 6) + )$lazy() + + expect_identical( + lf$melt(id_vars = "a", value_vars = c("b", "c"))$collect()$to_list(), + list( + a = c("x", "y", "z", "x", "y", "z"), + variable = c("b", "b", "b", "c", "c", "c"), + value = c(1, 3, 5, 2, 4, 6) + ) + ) +}) + +test_that("melt vs data.table::melt", { + skip_if_not_installed("data.table") + plf = pl$DataFrame( + a = c("x", "y", "z"), + b = c(1, 3, 5), + c = c(2, 4, 6) + )$lazy() + + rdf = plf$collect()$to_data_frame() + dtt = data.table(rdf) + + melt_mod = \(...) { + data.table::melt(variable.factor = FALSE, value.factor = FALSE, ...) + } + + expect_identical( + plf$melt(id_vars = "a", value_vars = c("b", "c"))$collect()$to_list(), + as.list(melt_mod(dtt, id.vars = "a", value_vars = c("b", "c"))) + ) + expect_identical( + plf$melt(id_vars = c("c", "b"), value_vars = c("a"))$collect()$to_list(), + as.list(melt_mod(dtt, id.vars = c("c", "b"), value_vars = c("a"))) + ) + expect_identical( + plf$melt(id_vars = c("a", "b"), value_vars = c("c"))$collect()$to_list(), + as.list(melt_mod(dtt, id.vars = c("a", "b"), value_vars = c("b", "c"))) + ) + + expect_identical( + plf$melt( + id_vars = c("a", "b"), value_vars = c("c"), value_name = "alice", variable_name = "bob" + )$collect()$to_list(), + as.list(melt_mod( + dtt, + id.vars = c("a", "b"), value_vars = c("b", "c"), value.name = "alice", variable.name = "bob" + )) + ) + + # check the check, this should not be equal + expect_error(expect_equal( + plf$melt(id_vars = c("c", "b"), value_vars = c("a"))$collect()$to_list(), + as.list(melt_mod(dtt, id.vars = c("a", "b"), value_vars = c("c"))) + )) +})