From d236a44c5f9d864a8f5cbdbade8475ea5c71b55a Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Mon, 10 Jun 2024 18:20:17 +0400 Subject: [PATCH] feat: Support homedir expansion in lazy/scan read functions --- crates/polars-lazy/src/scan/csv.rs | 11 +++++++++-- crates/polars-lazy/src/scan/ipc.rs | 11 +++++++++-- crates/polars-lazy/src/scan/ndjson.rs | 9 ++++++++- crates/polars-lazy/src/scan/parquet.rs | 9 ++++++++- 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/crates/polars-lazy/src/scan/csv.rs b/crates/polars-lazy/src/scan/csv.rs index 577441e3dee7..4045ee718aae 100644 --- a/crates/polars-lazy/src/scan/csv.rs +++ b/crates/polars-lazy/src/scan/csv.rs @@ -5,6 +5,7 @@ use polars_io::cloud::CloudOptions; use polars_io::csv::read::{ infer_file_schema, CommentPrefix, CsvEncoding, CsvParseOptions, CsvReadOptions, NullValues, }; +use polars_io::prelude::resolve_homedir; use polars_io::utils::get_reader_bytes; use polars_io::RowIndex; @@ -35,7 +36,7 @@ impl LazyCsvReader { pub fn new(path: impl AsRef) -> Self { LazyCsvReader { - path: path.as_ref().to_owned(), + path: resolve_homedir(path.as_ref()), paths: Arc::new([]), glob: true, cache: true, @@ -305,11 +306,17 @@ impl LazyFileListReader for LazyCsvReader { } fn with_path(mut self, path: PathBuf) -> Self { - self.path = path; + self.path = resolve_homedir(&path); self } fn with_paths(mut self, paths: Arc<[PathBuf]>) -> Self { + let paths = paths + .iter() + .map(|p| resolve_homedir(p)) + .collect::>() + .into(); + self.paths = paths; self } diff --git a/crates/polars-lazy/src/scan/ipc.rs b/crates/polars-lazy/src/scan/ipc.rs index 5672802a61f2..498eebf4a04d 100644 --- a/crates/polars-lazy/src/scan/ipc.rs +++ b/crates/polars-lazy/src/scan/ipc.rs @@ -3,6 +3,7 @@ use std::path::{Path, PathBuf}; use polars_core::prelude::*; use polars_io::cloud::CloudOptions; use polars_io::ipc::IpcScanOptions; +use polars_io::utils::resolve_homedir; use polars_io::RowIndex; use crate::prelude::*; @@ -41,7 +42,7 @@ impl LazyIpcReader { fn new(path: PathBuf, args: ScanArgsIpc) -> Self { Self { args, - path, + path: resolve_homedir(&path), paths: Arc::new([]), } } @@ -96,11 +97,17 @@ impl LazyFileListReader for LazyIpcReader { } fn with_path(mut self, path: PathBuf) -> Self { - self.path = path; + self.path = resolve_homedir(&path); self } fn with_paths(mut self, paths: Arc<[PathBuf]>) -> Self { + let paths = paths + .iter() + .map(|p| resolve_homedir(p)) + .collect::>() + .into(); + self.paths = paths; self } diff --git a/crates/polars-lazy/src/scan/ndjson.rs b/crates/polars-lazy/src/scan/ndjson.rs index e2b2691e4e08..996d066a01c8 100644 --- a/crates/polars-lazy/src/scan/ndjson.rs +++ b/crates/polars-lazy/src/scan/ndjson.rs @@ -3,6 +3,7 @@ use std::path::{Path, PathBuf}; use std::sync::RwLock; use polars_core::prelude::*; +use polars_io::utils::resolve_homedir; use polars_io::RowIndex; use super::*; @@ -114,11 +115,17 @@ impl LazyFileListReader for LazyJsonLineReader { } fn with_path(mut self, path: PathBuf) -> Self { - self.path = path; + self.path = resolve_homedir(&path); self } fn with_paths(mut self, paths: Arc<[PathBuf]>) -> Self { + let paths = paths + .iter() + .map(|p| resolve_homedir(p)) + .collect::>() + .into(); + self.paths = paths; self } diff --git a/crates/polars-lazy/src/scan/parquet.rs b/crates/polars-lazy/src/scan/parquet.rs index 4e57b25d42bf..e135099b13a8 100644 --- a/crates/polars-lazy/src/scan/parquet.rs +++ b/crates/polars-lazy/src/scan/parquet.rs @@ -3,6 +3,7 @@ use std::path::{Path, PathBuf}; use polars_core::prelude::*; use polars_io::cloud::CloudOptions; use polars_io::parquet::read::ParallelStrategy; +use polars_io::prelude::resolve_homedir; use polars_io::{HiveOptions, RowIndex}; use crate::prelude::*; @@ -112,11 +113,17 @@ impl LazyFileListReader for LazyParquetReader { } fn with_path(mut self, path: PathBuf) -> Self { - self.path = path; + self.path = resolve_homedir(&path); self } fn with_paths(mut self, paths: Arc<[PathBuf]>) -> Self { + let paths = paths + .iter() + .map(|p| resolve_homedir(p)) + .collect::>() + .into(); + self.paths = paths; self }