Skip to content

Commit

Permalink
S3 methods for DataFrame and LazyFrame: simplest (#107)
Browse files Browse the repository at this point in the history
* S3 methods for DataFrame and LazyFrame: simplest

* @nord tags

* remove upper case duplicates (docs warning)

* s3: patrick + series + agg

* docs

* vignette

* vignette

* vignette

* prefix, maintain_order not sort, drop cliffhanger

* responses to Grant's comments on Get Started vignette

* typo

---------

Co-authored-by: sorhawell <[email protected]>
  • Loading branch information
vincentarelbundock and sorhawell authored Apr 16, 2023
1 parent 99e5cd3 commit e9d98de
Show file tree
Hide file tree
Showing 10 changed files with 500 additions and 101 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ Collate:
'pkg-nanoarrow.R'
'rlang.R'
'rust_result.R'
's3_methods.R'
'series__series.R'
'translation.R'
'vctrs.R'
Expand Down
31 changes: 31 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,29 @@ S3method(.DollarNames,WhenThen)
S3method(.DollarNames,WhenThenThen)
S3method(.DollarNames,method_environment)
S3method(as.data.frame,DataFrame)
S3method(as.data.frame,LazyFrame)
S3method(as.matrix,DataFrame)
S3method(as.matrix,LazyFrame)
S3method(c,Series)
S3method(dim,DataFrame)
S3method(dim,LazyFrame)
S3method(head,DataFrame)
S3method(head,LazyFrame)
S3method(length,DataFrame)
S3method(length,LazyFrame)
S3method(length,Series)
S3method(max,DataFrame)
S3method(max,LazyFrame)
S3method(max,Series)
S3method(mean,DataFrame)
S3method(mean,LazyFrame)
S3method(median,DataFrame)
S3method(median,LazyFrame)
S3method(min,DataFrame)
S3method(min,LazyFrame)
S3method(min,Series)
S3method(names,DataFrame)
S3method(names,LazyFrame)
S3method(print,DataFrame)
S3method(print,Expr)
S3method(print,GroupBy)
Expand All @@ -96,6 +117,11 @@ S3method(print,Series)
S3method(print,When)
S3method(print,WhenThen)
S3method(print,WhenThenThen)
S3method(sum,DataFrame)
S3method(sum,LazyFrame)
S3method(sum,Series)
S3method(tail,DataFrame)
S3method(tail,LazyFrame)
export("%**%")
export("%**%.Expr")
export(.pr)
Expand All @@ -104,10 +130,15 @@ export(GroupBy_agg)
export(GroupBy_as_data_frame)
export(LazyFrame_print)
export(csv_reader)
export(ncol.DataFrame)
export(ncol.LazyFrame)
export(nrow.DataFrame)
export(nrow.LazyFrame)
export(pl)
export(read_csv_)
export(scan_parquet)
export(unwrap)
importFrom(stats,median)
importFrom(utils,.DollarNames)
importFrom(utils,capture.output)
importFrom(utils,download.file)
Expand Down
136 changes: 136 additions & 0 deletions R/s3_methods.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#' @export
#' @noRd
head.DataFrame = function(x, n = 6L, ...) x$limit(n = n)

#' @export
#' @noRd
head.LazyFrame = head.DataFrame

#' @export
#' @noRd
tail.DataFrame = function(x, n = 6L, ...) x$tail(n = n)

#' @export
#' @noRd
tail.LazyFrame = tail.DataFrame

#' @export
#' @noRd
dim.DataFrame = function(x, ...) x$shape

#' @export
#' @noRd
dim.LazyFrame = function(x, ...) x$collect()$shape

#' @export
#' @noRd
length.DataFrame = function(x, ...) x$width

#' @export
#' @noRd
length.Series = function(x, ...) x$len()

#' @export
#' @noRd
length.LazyFrame = function(x, ...) x$collect()$width

#' The Number of Rows of a DataFrame
#' @param x DataFrame
#' @return Integer
#' @export
nrow.DataFrame = function(x) x$height

#' The Number of Rows of a LazyFrame
#' @param x LazyFrame
#' @return Integer
#' @export
nrow.LazyFrame = function(x) x$collect()$height

#' The Number of Columns of a DataFrame
#' @param x DataFrame
#' @return Integer
#' @export
ncol.DataFrame = function(x) x$height

#' The Number of Columns of a LazyFrame
#' @param x LazyFrame
#' @return Integer
#' @export
ncol.LazyFrame = function(x) x$collect()$height

#' @export
#' @noRd
names.DataFrame = function(x) x$columns

# TODO: inefficient to collect, but attribute is missing
#' @export
#' @noRd
names.LazyFrame = function(x) x$collect()$columns

# TODO: inefficient to collect, but attribute is missing
#' @export
#' @noRd
as.data.frame.LazyFrame = function(x, ...) x$collect()$as_data_frame(...)

#' @export
#' @noRd
as.matrix.DataFrame = function(x, ...) as.matrix(x$as_data_frame(...))

# TODO: inefficient to collect, but attribute is missing
#' @export
#' @noRd
as.matrix.LazyFrame = function(x, ...) as.matrix(x$collect()$as_data_frame(...))

#' @export
#' @noRd
mean.DataFrame = function(x, ...) x$mean()

#' @export
#' @noRd
mean.LazyFrame = function(x, ...) x$mean()

#' @export
#' @importFrom stats median
#' @noRd
median.DataFrame = function(x, ...) x$median()

#' @export
#' @importFrom stats median
#' @noRd
median.LazyFrame = function(x, ...) x$median()

#' @export
#' @noRd
min.DataFrame = function(x, ...) x$min()

#' @export
#' @noRd
min.LazyFrame = function(x, ...) x$min()

#' @export
#' @noRd
min.Series = function(x, ...) x$min()

#' @export
#' @noRd
max.DataFrame = function(x, ...) x$max()

#' @export
#' @noRd
max.LazyFrame = function(x, ...) x$max()

#' @export
#' @noRd
max.Series = function(x, ...) x$max()

#' @export
#' @noRd
sum.DataFrame = function(x, ...) x$sum()

#' @export
#' @noRd
sum.LazyFrame = function(x, ...) x$sum()

#' @export
#' @noRd
sum.Series = function(x, ...) x$sum()
17 changes: 17 additions & 0 deletions man/ncol.DataFrame.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/ncol.LazyFrame.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/nrow.DataFrame.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/nrow.LazyFrame.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions tests/testthat/test-expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -1012,8 +1012,8 @@ test_that("sort_by", {
expect_grepl_error(pl$lit(1:4)$sort_by(1)$to_r(),"Expected length\\: 4")
expect_grepl_error(pl$lit(1:4)$sort_by("blop")$to_r(),"column 'blop' not available in schema")
expect_grepl_error(pl$lit(1:4)$sort_by("blop")$to_r(),"column 'blop' not available in schema")
expect_grepl_error(pl$lit(1:4)$sort_by(df)$to_r(),"of sequence not convertable into an Expr")
expect_grepl_error(pl$lit(1:4)$sort_by(df)$to_r(),"of sequence not convertable into an Expr")
expect_grepl_error(pl$lit(1:4)$sort_by(df)$to_r(),"not convertable into.* Expr")
expect_grepl_error(pl$lit(1:4)$sort_by(df)$to_r(),"not convertable into.* Expr")

#this test is minimal, if polars give better documentation on behaviour, expand the test.
})
Expand Down
74 changes: 74 additions & 0 deletions tests/testthat/test-s3_methods.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
make_cases <- function() {
tibble::tribble(
~ .test_name, ~ pola, ~ base,
"mean", "mean", mean,
"median", "median", stats::median,
"min", "min", min,
"max", "max", max,
"sum", "sum", sum,
)
}
patrick::with_parameters_test_that("aggregations",
{
d = pl$DataFrame(mtcars)
w = d[[pola]]()$as_data_frame()
x = base(d)$as_data_frame()
y = base(d$lazy())$collect()$as_data_frame()
z = data.frame(t(sapply(mtcars, base)))
expect_equal(w, x, ignore_attr = TRUE)
expect_equal(w, y, ignore_attr = TRUE)
expect_equal(w, z, ignore_attr = TRUE)
},
.cases = make_cases()
)


make_cases <- function() {
tibble::tribble(
~ .test_name, ~ FUN,
"head", head,
"tail", tail,
"nrow", nrow,
"ncol", ncol,
"length", length,
"as.matrix", as.matrix,
"names", names,
)
}
patrick::with_parameters_test_that("inspection",
{
d = pl$DataFrame(mtcars)
x = FUN(mtcars)
y = FUN(d)
z = FUN(d$lazy())
if (inherits(y, "DataFrame")) y = y$as_data_frame()
if (inherits(z, "LazyFrame")) z = z$collect()$as_data_frame()
expect_equal(x, y, ignore_attr = TRUE)
expect_equal(x, z, ignore_attr = TRUE)
},
.cases = make_cases()
)


make_cases <- function() {
tibble::tribble(
~ .test_name, ~ pola, ~ base,
"length", "len", length,
"min", "min", min,
"max", "max", max,
"sum", "sum", sum,
)
}
patrick::with_parameters_test_that("Series",
{
d = pl$Series(mtcars$mpg)
x = base(mtcars$mpg)
y = base(d)
z = d[[pola]]()
if (inherits(y, "Series")) y = y$to_r_vector()
if (inherits(z, "Series")) z = z$to_r_vector()
expect_equal(x, y, ignore_attr = TRUE)
expect_equal(x, z, ignore_attr = TRUE)
},
.cases = make_cases()
)
Loading

0 comments on commit e9d98de

Please sign in to comment.