From 4a922320204e610bfe7bbd1358619159ac02cb19 Mon Sep 17 00:00:00 2001 From: Kyle Husmann Date: Wed, 17 Jul 2024 14:18:14 -0700 Subject: [PATCH] add na argument to collectors --- DESCRIPTION | 2 +- R/col_types.R | 55 ++++++++++++++++++++++++++++-------------------- R/generator.R | 4 +++- man/cols.Rd | 28 ++++++++++++++---------- man/vroom_fwf.Rd | 8 ++++--- 5 files changed, 58 insertions(+), 39 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6fed3f35..991130e5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -79,4 +79,4 @@ Copyright: file COPYRIGHTS Encoding: UTF-8 Language: en-US Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3.9000 +RoxygenNote: 7.3.2 diff --git a/R/col_types.R b/R/col_types.R index 6fd94d47..454679aa 100644 --- a/R/col_types.R +++ b/R/col_types.R @@ -32,6 +32,11 @@ #' @param .delim The delimiter to use when parsing. If the `delim` argument #' used in the call to `vroom()` it takes precedence over the one specified in #' `col_types`. +#' @param na Character vector of strings to interpret as missing values for the +#' column. If `NULL`, the column will use the missing values specified in the +#' `na` argument in the call to `vroom()`, otherwise, the missing values +#' specified here will take precedence. Set this option to `character()` to +#' indicate no missing values for the column. #' @export #' @aliases col_types #' @examples @@ -207,7 +212,11 @@ format.col_spec <- function(x, n = Inf, condense = NULL, colour = crayon::has_co vapply(seq_along(cols), function(i) { col_funs <- sub("^collector_", "col_", class(cols[[i]])[[1]]) - args <- vapply(cols[[i]], deparse2, character(1), sep = "\n ") + args <- cols[[i]] + if (is.null(args[["na"]])) { + args[["na"]] <- NULL + } + args <- vapply(args, deparse2, character(1), sep = "\n ") args <- paste(names(args), args, sep = " = ", collapse = ", ") col_funs <- paste0(col_funs, "(", args, ")") @@ -624,32 +633,32 @@ color_type <- function(type) { #' @rdname cols #' @export -col_logical <- function(...) { - collector("logical", ...) +col_logical <- function(na = NULL, ...) { + collector("logical", na = na, ...) } #' @rdname cols #' @export -col_integer <- function(...) { - collector("integer", ...) +col_integer <- function(na = NULL, ...) { + collector("integer", na = na, ...) } #' @rdname cols #' @export -col_big_integer <- function(...) { - collector("big_integer", ...) +col_big_integer <- function(na = NULL, ...) { + collector("big_integer", na = na, ...) } #' @rdname cols #' @export -col_double <- function(...) { - collector("double", ...) +col_double <- function(na = NULL, ...) { + collector("double", na = na, ...) } #' @rdname cols #' @export -col_character <- function(...) { - collector("character", ...) +col_character <- function(na = NULL, ...) { + collector("character", na = na, ...) } #' @rdname cols @@ -660,38 +669,38 @@ col_skip <- function(...) { #' @rdname cols #' @export -col_number <- function(...) { - collector("number", ...) +col_number <- function(na = NULL, ...) { + collector("number", na = na, ...) } #' @rdname cols #' @export -col_guess <- function(...) { - collector("guess", ...) +col_guess <- function(na = NULL, ...) { + collector("guess", na = na, ...) } #' @inheritParams readr::col_factor #' @rdname cols #' @export -col_factor <- function(levels = NULL, ordered = FALSE, include_na = FALSE, ...) { - collector("factor", levels = levels, ordered = ordered, include_na = include_na, ...) +col_factor <- function(levels = NULL, ordered = FALSE, include_na = FALSE, na = NULL, ...) { + collector("factor", levels = levels, ordered = ordered, include_na = include_na, na = na, ...) } #' @inheritParams readr::col_datetime #' @rdname cols #' @export -col_datetime <- function(format = "", ...) { - collector("datetime", format = format, ...) +col_datetime <- function(format = "", na = NULL, ...) { + collector("datetime", format = format, na = na, ...) } #' @rdname cols #' @export -col_date <- function(format = "", ...) { - collector("date", format = format, ...) +col_date <- function(format = "", na = NULL, ...) { + collector("date", format = format, na = na, ...) } #' @rdname cols #' @export -col_time <- function(format = "", ...) { - collector("time", format = format, ...) +col_time <- function(format = "", na = NULL, ...) { + collector("time", format = format, na = na, ...) } diff --git a/R/generator.R b/R/generator.R index a7c44f45..eb56286d 100644 --- a/R/generator.R +++ b/R/generator.R @@ -171,7 +171,9 @@ gen_tbl <- function(rows, cols = NULL, col_types = NULL, locale = default_locale specs$cols[[i]] <- do.call(paste0("col_", type), list()) } fun_nme <- paste0("gen_", type) - res[[i]] <- do.call(fun_nme, c(rows, specs$cols[[i]])) + args <- specs$cols[[i]] + args[["na"]] <- NULL + res[[i]] <- do.call(fun_nme, c(rows, args)) } if (missing > 0) { diff --git a/man/cols.Rd b/man/cols.Rd index c50b21f5..4930169c 100644 --- a/man/cols.Rd +++ b/man/cols.Rd @@ -22,29 +22,29 @@ cols(..., .default = col_guess(), .delim = NULL) cols_only(...) -col_logical(...) +col_logical(na = NULL, ...) -col_integer(...) +col_integer(na = NULL, ...) -col_big_integer(...) +col_big_integer(na = NULL, ...) -col_double(...) +col_double(na = NULL, ...) -col_character(...) +col_character(na = NULL, ...) col_skip(...) -col_number(...) +col_number(na = NULL, ...) -col_guess(...) +col_guess(na = NULL, ...) -col_factor(levels = NULL, ordered = FALSE, include_na = FALSE, ...) +col_factor(levels = NULL, ordered = FALSE, include_na = FALSE, na = NULL, ...) -col_datetime(format = "", ...) +col_datetime(format = "", na = NULL, ...) -col_date(format = "", ...) +col_date(format = "", na = NULL, ...) -col_time(format = "", ...) +col_time(format = "", na = NULL, ...) } \arguments{ \item{...}{Either column objects created by \verb{col_*()}, or their abbreviated @@ -61,6 +61,12 @@ will be read with this column type.} used in the call to \code{vroom()} it takes precedence over the one specified in \code{col_types}.} +\item{na}{Character vector of strings to interpret as missing values for the +column. If \code{NULL}, the column will use the missing values specified in the +\code{na} argument in the call to \code{vroom()}, otherwise, the missing values +specified here will take precedence. Set this option to \code{character()} to +indicate no missing values for the column.} + \item{levels}{Character vector of the allowed levels. When \code{levels = NULL} (the default), \code{levels} are discovered from the unique values of \code{x}, in the order in which they appear in \code{x}.} diff --git a/man/vroom_fwf.Rd b/man/vroom_fwf.Rd index 58b29d23..24eb1132 100644 --- a/man/vroom_fwf.Rd +++ b/man/vroom_fwf.Rd @@ -90,7 +90,7 @@ character represents one column: By default, reading a file without a column specification will print a message showing what \code{readr} guessed they were. To remove this message, -set \code{show_col_types = FALSE} or set `options(readr.show_col_types = FALSE).} +set \code{show_col_types = FALSE} or set \code{options(readr.show_col_types = FALSE)}.} \item{col_select}{Columns to include in the results. You can use the same mini-language as \code{dplyr::select()} to refer to the columns by name. Use @@ -159,9 +159,11 @@ supported: \itemize{ \item \code{"minimal"}: No name repair or checks, beyond basic existence of names. \item \code{"unique"} (default value): Make sure names are unique and not empty. -\item \code{"check_unique"}: no name repair, but check they are \code{unique}. +\item \code{"check_unique"}: No name repair, but check they are \code{unique}. +\item \code{"unique_quiet"}: Repair with the \code{unique} strategy, quietly. \item \code{"universal"}: Make the names \code{unique} and syntactic. -\item A function: apply custom name repair (e.g., \code{name_repair = make.names} +\item \code{"universal_quiet"}: Repair with the \code{universal} strategy, quietly. +\item A function: Apply custom name repair (e.g., \code{name_repair = make.names} for names in the style of base R). \item A purrr-style anonymous function, see \code{\link[rlang:as_function]{rlang::as_function()}}. }