From 4a922320204e610bfe7bbd1358619159ac02cb19 Mon Sep 17 00:00:00 2001
From: Kyle Husmann <kyle.husmann@gmail.com>
Date: Wed, 17 Jul 2024 14:18:14 -0700
Subject: [PATCH] add na argument to collectors

---
 DESCRIPTION      |  2 +-
 R/col_types.R    | 55 ++++++++++++++++++++++++++++--------------------
 R/generator.R    |  4 +++-
 man/cols.Rd      | 28 ++++++++++++++----------
 man/vroom_fwf.Rd |  8 ++++---
 5 files changed, 58 insertions(+), 39 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 6fed3f35..991130e5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -79,4 +79,4 @@ Copyright: file COPYRIGHTS
 Encoding: UTF-8
 Language: en-US
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3.9000
+RoxygenNote: 7.3.2
diff --git a/R/col_types.R b/R/col_types.R
index 6fd94d47..454679aa 100644
--- a/R/col_types.R
+++ b/R/col_types.R
@@ -32,6 +32,11 @@
 #' @param .delim The delimiter to use when parsing. If the `delim` argument
 #'   used in the call to `vroom()` it takes precedence over the one specified in
 #'   `col_types`.
+#' @param na Character vector of strings to interpret as missing values for the
+#'   column. If `NULL`, the column will use the missing values specified in the
+#'   `na` argument in the call to `vroom()`, otherwise, the missing values
+#'   specified here will take precedence. Set this option to `character()` to
+#'   indicate no missing values for the column.
 #' @export
 #' @aliases col_types
 #' @examples
@@ -207,7 +212,11 @@ format.col_spec <- function(x, n = Inf, condense = NULL, colour = crayon::has_co
     vapply(seq_along(cols),
       function(i) {
         col_funs <- sub("^collector_", "col_", class(cols[[i]])[[1]])
-        args <- vapply(cols[[i]], deparse2, character(1), sep = "\n    ")
+        args <- cols[[i]]
+        if (is.null(args[["na"]])) {
+          args[["na"]] <- NULL
+        }
+        args <- vapply(args, deparse2, character(1), sep = "\n    ")
         args <- paste(names(args), args, sep = " = ", collapse = ", ")
 
         col_funs <- paste0(col_funs, "(", args, ")")
@@ -624,32 +633,32 @@ color_type <- function(type) {
 
 #' @rdname cols
 #' @export
-col_logical <- function(...) {
-  collector("logical", ...)
+col_logical <- function(na = NULL, ...) {
+  collector("logical", na = na, ...)
 }
 
 #' @rdname cols
 #' @export
-col_integer <- function(...) {
-  collector("integer", ...)
+col_integer <- function(na = NULL, ...) {
+  collector("integer", na = na, ...)
 }
 
 #' @rdname cols
 #' @export
-col_big_integer <- function(...) {
-  collector("big_integer", ...)
+col_big_integer <- function(na = NULL, ...) {
+  collector("big_integer", na = na, ...)
 }
 
 #' @rdname cols
 #' @export
-col_double <- function(...) {
-  collector("double", ...)
+col_double <- function(na = NULL, ...) {
+  collector("double", na = na, ...)
 }
 
 #' @rdname cols
 #' @export
-col_character <- function(...) {
-  collector("character", ...)
+col_character <- function(na = NULL, ...) {
+  collector("character", na = na, ...)
 }
 
 #' @rdname cols
@@ -660,38 +669,38 @@ col_skip <- function(...) {
 
 #' @rdname cols
 #' @export
-col_number <- function(...) {
-  collector("number", ...)
+col_number <- function(na = NULL, ...) {
+  collector("number", na = na, ...)
 }
 
 #' @rdname cols
 #' @export
-col_guess <- function(...) {
-  collector("guess", ...)
+col_guess <- function(na = NULL, ...) {
+  collector("guess", na = na, ...)
 }
 
 #' @inheritParams readr::col_factor
 #' @rdname cols
 #' @export
-col_factor <- function(levels = NULL, ordered = FALSE, include_na = FALSE, ...) {
-  collector("factor", levels = levels, ordered = ordered, include_na = include_na, ...)
+col_factor <- function(levels = NULL, ordered = FALSE, include_na = FALSE, na = NULL, ...) {
+  collector("factor", levels = levels, ordered = ordered, include_na = include_na, na = na, ...)
 }
 
 #' @inheritParams readr::col_datetime
 #' @rdname cols
 #' @export
-col_datetime <- function(format = "", ...) {
-  collector("datetime", format = format, ...)
+col_datetime <- function(format = "", na = NULL, ...) {
+  collector("datetime", format = format, na = na, ...)
 }
 
 #' @rdname cols
 #' @export
-col_date <- function(format = "", ...) {
-  collector("date", format = format, ...)
+col_date <- function(format = "", na = NULL, ...) {
+  collector("date", format = format, na = na, ...)
 }
 
 #' @rdname cols
 #' @export
-col_time <- function(format = "", ...) {
-  collector("time", format = format, ...)
+col_time <- function(format = "", na = NULL, ...) {
+  collector("time", format = format, na = na, ...)
 }
diff --git a/R/generator.R b/R/generator.R
index a7c44f45..eb56286d 100644
--- a/R/generator.R
+++ b/R/generator.R
@@ -171,7 +171,9 @@ gen_tbl <- function(rows, cols = NULL, col_types = NULL, locale = default_locale
       specs$cols[[i]] <- do.call(paste0("col_", type), list())
     }
     fun_nme <- paste0("gen_", type)
-    res[[i]] <- do.call(fun_nme, c(rows, specs$cols[[i]]))
+    args <- specs$cols[[i]]
+    args[["na"]] <- NULL
+    res[[i]] <- do.call(fun_nme, c(rows, args))
   }
 
   if (missing > 0) {
diff --git a/man/cols.Rd b/man/cols.Rd
index c50b21f5..4930169c 100644
--- a/man/cols.Rd
+++ b/man/cols.Rd
@@ -22,29 +22,29 @@ cols(..., .default = col_guess(), .delim = NULL)
 
 cols_only(...)
 
-col_logical(...)
+col_logical(na = NULL, ...)
 
-col_integer(...)
+col_integer(na = NULL, ...)
 
-col_big_integer(...)
+col_big_integer(na = NULL, ...)
 
-col_double(...)
+col_double(na = NULL, ...)
 
-col_character(...)
+col_character(na = NULL, ...)
 
 col_skip(...)
 
-col_number(...)
+col_number(na = NULL, ...)
 
-col_guess(...)
+col_guess(na = NULL, ...)
 
-col_factor(levels = NULL, ordered = FALSE, include_na = FALSE, ...)
+col_factor(levels = NULL, ordered = FALSE, include_na = FALSE, na = NULL, ...)
 
-col_datetime(format = "", ...)
+col_datetime(format = "", na = NULL, ...)
 
-col_date(format = "", ...)
+col_date(format = "", na = NULL, ...)
 
-col_time(format = "", ...)
+col_time(format = "", na = NULL, ...)
 }
 \arguments{
 \item{...}{Either column objects created by \verb{col_*()}, or their abbreviated
@@ -61,6 +61,12 @@ will be read with this column type.}
 used in the call to \code{vroom()} it takes precedence over the one specified in
 \code{col_types}.}
 
+\item{na}{Character vector of strings to interpret as missing values for the
+column. If \code{NULL}, the column will use the missing values specified in the
+\code{na} argument in the call to \code{vroom()}, otherwise, the missing values
+specified here will take precedence. Set this option to \code{character()} to
+indicate no missing values for the column.}
+
 \item{levels}{Character vector of the allowed levels. When \code{levels = NULL}
 (the default), \code{levels} are discovered from the unique values of \code{x}, in
 the order in which they appear in \code{x}.}
diff --git a/man/vroom_fwf.Rd b/man/vroom_fwf.Rd
index 58b29d23..24eb1132 100644
--- a/man/vroom_fwf.Rd
+++ b/man/vroom_fwf.Rd
@@ -90,7 +90,7 @@ character represents one column:
 
 By default, reading a file without a column specification will print a
 message showing what \code{readr} guessed they were. To remove this message,
-set \code{show_col_types = FALSE} or set `options(readr.show_col_types = FALSE).}
+set \code{show_col_types = FALSE} or set \code{options(readr.show_col_types = FALSE)}.}
 
 \item{col_select}{Columns to include in the results. You can use the same
 mini-language as \code{dplyr::select()} to refer to the columns by name. Use
@@ -159,9 +159,11 @@ supported:
 \itemize{
 \item \code{"minimal"}: No name repair or checks, beyond basic existence of names.
 \item \code{"unique"} (default value): Make sure names are unique and not empty.
-\item \code{"check_unique"}: no name repair, but check they are \code{unique}.
+\item \code{"check_unique"}: No name repair, but check they are \code{unique}.
+\item \code{"unique_quiet"}: Repair with the \code{unique} strategy, quietly.
 \item \code{"universal"}: Make the names \code{unique} and syntactic.
-\item A function: apply custom name repair (e.g., \code{name_repair = make.names}
+\item \code{"universal_quiet"}: Repair with the \code{universal} strategy, quietly.
+\item A function: Apply custom name repair (e.g., \code{name_repair = make.names}
 for names in the style of base R).
 \item A purrr-style anonymous function, see \code{\link[rlang:as_function]{rlang::as_function()}}.
 }