From 4c0a3f127cb921a246a88d752ae17b4d968cbd93 Mon Sep 17 00:00:00 2001 From: Kyle Husmann Date: Wed, 17 Jul 2024 15:23:28 -0700 Subject: [PATCH] add support for collector-level na args to the backend --- src/collectors.h | 7 +++++-- src/columns.h | 5 ++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/collectors.h b/src/collectors.h index 30fdad68..0854401c 100644 --- a/src/collectors.h +++ b/src/collectors.h @@ -54,6 +54,7 @@ class collector { type_(derive_type(cpp11::strings(data_.attr("class"))[0])), altrep_(altrep) {} column_type type() const { return type_; } + SEXP na() const { return data_["na"]; } SEXP name() const { return name_; } SEXP operator[](const char* nme) { return data_[nme]; } bool use_altrep() { @@ -170,6 +171,8 @@ inline collectors resolve_collectors( std::string my_col_type = cpp11::strings(my_collector.attr("class"))[0]; if (my_col_type == "collector_guess") { + auto my_col_na = my_collector["na"]; + auto my_col_na_res = Rf_isNull(my_col_na) ? na : my_col_na; cpp11::writable::strings col_vals(guess_num); for (R_xlen_t j = 0; j < guess_num - 1; ++j) { size_t row = j * guess_step; @@ -187,10 +190,10 @@ inline collectors resolve_collectors( locale_info->encoder_.makeSEXP(str.begin(), str.end(), true); } - auto type = guess_type__(col_vals, na, locale_info.get(), false); + auto type = guess_type__(col_vals, my_col_na_res, locale_info.get(), false); auto fun_name = std::string("col_") + type; auto col_type = vroom[fun_name.c_str()]; - my_collectors[col] = col_type(); + my_collectors[col] = col_type("na"_nm = my_col_na); } } diff --git a/src/columns.h b/src/columns.h index 68216e99..aee2037d 100644 --- a/src/columns.h +++ b/src/columns.h @@ -129,11 +129,14 @@ inline cpp11::list create_columns( continue; } + auto col_na = collector.na(); + auto col_na_res = Rf_isNull(col_na) ? na : cpp11::strings(col_na); + // This is deleted in the finalizers when the vectors are GC'd by R auto info = new vroom_vec_info{ idx->get_column(col), num_threads, - std::make_shared(na), + std::make_shared(col_na_res), locale_info, *errors, std::string()};