Skip to content

Commit

Permalink
Improved rust docs (#926)
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebarron authored Dec 10, 2024
1 parent 8205530 commit edc2919
Show file tree
Hide file tree
Showing 14 changed files with 148 additions and 26 deletions.
2 changes: 1 addition & 1 deletion python/geoarrow-io/src/io/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ pub fn read_csv(

file.seek(SeekFrom::Start(pos))?;

let record_batch_reader = csv::read_csv(file, schema.into(), options)?;
let record_batch_reader = csv::read_csv(file, schema, options)?;
let schema = record_batch_reader.schema();
let batches = record_batch_reader.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(PyTable::try_new(batches, schema)?.into())
Expand Down
10 changes: 10 additions & 0 deletions rust/geoarrow/src/array/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ impl ArrayMetadata {
Self::default().with_unknown_crs_type(value)
}

pub fn from_authority_code(value: String) -> Self {
Self::default().with_authority_code(value)
}

pub fn with_projjson(mut self, value: Value) -> Self {
self.crs = Some(value);
self.crs_type = Some(CRSType::Projjson);
Expand All @@ -122,6 +126,12 @@ impl ArrayMetadata {
self
}

pub fn with_authority_code(mut self, value: String) -> Self {
self.crs = Some(Value::String(value));
self.crs_type = Some(CRSType::AuthorityCode);
self
}

pub fn with_edges(mut self, edges: Edges) -> Self {
self.edges = Some(edges);
self
Expand Down
7 changes: 6 additions & 1 deletion rust/geoarrow/src/io/crs.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
//! Defines CRS transforms used for writing GeoArrow data to file formats that require different
//! CRS representations.
use std::fmt::Debug;

use serde_json::Value;
Expand Down Expand Up @@ -51,7 +54,9 @@ pub trait CRSTransform: Debug {
}
}

/// A default implementation for [CRSTransform] which errors on any CRS conversion.
/// A default implementation for [CRSTransform] which does not do any CRS conversion.
///
/// Instead of raising an error, this will **silently drop any CRS information when writing data**.
#[derive(Debug, Clone, Default)]
pub struct DefaultCRSTransform {}

Expand Down
42 changes: 42 additions & 0 deletions rust/geoarrow/src/io/csv/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,46 @@
//! Read from and write to CSV files.
//!
//! # Examples
//!
//! ```
//! use std::io::{Cursor, Seek};
//!
//! use arrow_array::RecordBatchReader;
//!
//! use geoarrow::array::CoordType;
//! use geoarrow::io::csv::{infer_csv_schema, read_csv, CSVReaderOptions};
//! use geoarrow::table::Table;
//!
//! let s = r#"
//! address,type,datetime,report location,incident number
//! 904 7th Av,Car Fire,05/22/2019 12:55:00 PM,POINT (-122.329051 47.6069),F190051945
//! 9610 53rd Av S,Aid Response,05/22/2019 12:55:00 PM,POINT (-122.266529 47.515984),F190051946"#;
//! let mut cursor = Cursor::new(s);
//!
//! let options = CSVReaderOptions {
//! coord_type: CoordType::Separated,
//! geometry_column_name: Some("report location".to_string()),
//! has_header: Some(true),
//! ..Default::default()
//! };
//!
//! // Note: this initial schema currently represents the CSV data _on disk_. That is, the
//! // geometry column is represented as a string. This may change in the future.
//! let (schema, _read_records, _geometry_column_name) =
//! infer_csv_schema(&mut cursor, &options).unwrap();
//! cursor.rewind().unwrap();
//!
//! // `read_csv` returns a RecordBatchReader, which enables streaming the CSV without reading
//! // all of it.
//! let record_batch_reader = read_csv(cursor, schema, options).unwrap();
//! let geospatial_schema = record_batch_reader.schema();
//! let table = Table::try_new(
//! record_batch_reader.collect::<Result<_, _>>().unwrap(),
//! geospatial_schema,
//! )
//! .unwrap();
//! ```
//!
pub use reader::{infer_csv_schema, read_csv, CSVReaderOptions};
pub use writer::write_csv;
Expand Down
19 changes: 12 additions & 7 deletions rust/geoarrow/src/io/csv/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,29 +95,34 @@ impl Default for CSVReaderOptions {
}
}

/// Infer a CSV file's schema
/// Infer a CSV file's schema.
///
/// By default, the reader will **scan the entire CSV file** to infer the data's
/// schema. If your data is large, you can limit the number of records scanned
/// with the [CSVReaderOptions].
///
/// Returns (Schema, records_read, geometry column name)
///
/// Note that the geometry column in the Schema is still left as a String.
pub fn infer_csv_schema(
reader: impl Read,
options: &CSVReaderOptions,
) -> Result<(Schema, usize, String)> {
) -> Result<(SchemaRef, usize, String)> {
let format = options.to_format();
let (schema, records_read) = format.infer_schema(reader, options.max_records)?;

let geometry_col_name = find_geometry_column(&schema, options.geometry_column_name.as_deref())?;

Ok((schema, records_read, geometry_col_name))
Ok((Arc::new(schema), records_read, geometry_col_name))
}

/// Read a CSV file to a Table
/// Read a CSV file to a [RecordBatchReader].
///
/// This expects a geometry to be encoded as WKT within one column.
///
/// Note that this is Read and not Read + Seek. This means that you must infer the schema yourself
/// before calling this function. This allows using with objects that are only `Read` in the case
/// when you already know the file's schema.
/// Note that the input required here is [`Read`] and not [`Read`] + [`Seek`][std::io::Seek]. This
/// means that you must infer the schema yourself before calling this function. This allows using
/// with objects that are only `Read` in the case when you already know the file's schema.
///
/// This schema is expected to be the schema inferred by `arrow-csv`'s
/// [`infer_schema`][Format::infer_schema]. That means the geometry should be a string in the
Expand Down
2 changes: 2 additions & 0 deletions rust/geoarrow/src/io/gdal/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//! Read-only integration with [GDAL][gdal].
mod reader;

pub use reader::read_gdal;
2 changes: 1 addition & 1 deletion rust/geoarrow/src/io/geos/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! Export to and import from data structures of the [`geos`] crate.
mod array;
pub mod scalar;
pub(crate) mod scalar;
4 changes: 2 additions & 2 deletions rust/geoarrow/src/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@
pub mod crs;
#[cfg(feature = "csv")]
pub mod csv;
pub mod display;
pub(crate) mod display;
#[cfg(feature = "flatgeobuf")]
pub mod flatgeobuf;
#[cfg(feature = "gdal")]
pub mod gdal;
pub mod geojson;
pub mod geojson_lines;
#[cfg(feature = "geos")]
pub mod geos;
pub(crate) mod geos;
pub mod geozero;
pub mod ipc;
#[cfg(feature = "parquet")]
Expand Down
4 changes: 4 additions & 0 deletions rust/geoarrow/src/io/shapefile/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
//! Read from [Shapefile](https://www.esri.com/content/dam/esrisites/sitecore-archive/Files/Pdfs/library/whitepapers/pdfs/shapefile.pdf) datasets.
//!
//! This wraps the [shapefile] crate.
mod reader;
mod scalar;

Expand Down
4 changes: 2 additions & 2 deletions rust/geoarrow/src/io/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use crate::table::Table;
use arrow_array::{RecordBatchIterator, RecordBatchReader as _RecordBatchReader};
use arrow_schema::SchemaRef;

/// A newtype wrapper around an [arrow_array::RecordBatchReader] so that we can impl the
/// [geozero::GeozeroDatasource] trait.
/// A newtype wrapper around an [`arrow_array::RecordBatchReader`] so that we can implement the
/// [`geozero::GeozeroDatasource`] trait on it.
pub struct RecordBatchReader(Option<Box<dyn _RecordBatchReader>>);

impl RecordBatchReader {
Expand Down
5 changes: 4 additions & 1 deletion rust/geoarrow/src/io/wkb/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
//! An optimized implementation of reading and writing ISO-flavored WKB-encoded geometries.
//! Read and write geometries encoded as [Well-Known Binary](https://libgeos.org/specifications/wkb/).
//!
//! This wraps the [wkb] crate. As such, it currently supports reading the ISO and extended (EWKB)
//! variants of WKB. Currently, it always writes the ISO WKB variant.
mod api;
pub(crate) mod writer;
Expand Down
57 changes: 57 additions & 0 deletions rust/geoarrow/src/io/wkt/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,60 @@
//! Read and write geometries encoded as [Well-Known Text](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry).
//!
//! ## Example
//!
//! ```
//! use std::sync::Arc;
//!
//! use arrow_array::StringArray;
//!
//! use geoarrow::array::metadata::ArrayMetadata;
//! use geoarrow::array::{AsNativeArray, CoordType, GeometryArray, WKTArray};
//! use geoarrow::datatypes::NativeType;
//! use geoarrow::io::wkt::{read_wkt, ToWKT};
//! use geoarrow::trait_::ArrayAccessor;
//! use geoarrow::NativeArray;
//!
//! // Start with some WKT data
//! let wkt_strings = vec![
//! "POINT(30 10)",
//! "LINESTRING(30 10, 10 30, 40 40)",
//! "POLYGON((30 10, 40 40, 20 40, 10 20, 30 10))",
//! ];
//!
//! // Construct an Arrow StringArray from this data
//! let arrow_arr = StringArray::from_iter_values(wkt_strings);
//!
//! // GeoArrow has a `WKTArray` concept in order to associate geospatial metadata with WKT data.
//! // Here, we associate CRS information with the WKT array, which will be maintained in the
//! // parsed representation.
//! let array_metadata = Arc::new(ArrayMetadata::from_authority_code("EPSG:4326".to_string()));
//! let wkt_array = WKTArray::new(arrow_arr, array_metadata);
//!
//! // Parse this WKT array to an `Arc<dyn NativeArray>`
//! let geometry_array: Arc<dyn NativeArray> =
//! read_wkt(&wkt_array, CoordType::Separated, false).unwrap();
//!
//! // All parsed WKT data currently has `NativeType::Geometry`, because there's no way to know in
//! // advance what the geometry type of the WKT is.
//! assert!(matches!(
//! geometry_array.data_type(),
//! NativeType::Geometry(CoordType::Separated)
//! ));
//!
//! // Now we can downcast the dynamic reference to a concrete `GeometryArray`, and access a value
//! // as a `geo::Geometry`
//! let geometry_array_ref = geometry_array.as_ref();
//! let downcasted: &GeometryArray = geometry_array_ref.as_geometry();
//! matches!(
//! downcasted.value_as_geo(0),
//! geo::Geometry::Point(geo::Point(geo::Coord { x: 30.0, y: 10.0 }))
//! );
//!
//! // Then we can write back to WKT
//! let wkt_array_again: WKTArray<i32> = downcasted.as_ref().to_wkt().unwrap();
//! assert_eq!(wkt_array_again.into_inner().value(0), "POINT(30 10)")
//! ```
mod reader;
mod writer;

Expand Down
14 changes: 4 additions & 10 deletions rust/geoarrow/src/io/wkt/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@ use wkt::to_wkt::{
write_multi_point, write_multi_polygon, write_point, write_polygon, write_rect,
};

/// Serialize a geometry array to Well-Known Text
pub trait ToWKT {
/// The output type of the operation. You can specify whether you want to use i32 or i64
/// offsets for the Arrow string array.
type Output<O: OffsetSizeTrait>;

/// Convert to WKT.
fn to_wkt<O: OffsetSizeTrait>(&self) -> Self::Output<O>;
}

Expand All @@ -41,16 +45,6 @@ impl ToWKT for &dyn NativeArray {
}

match self.data_type() {
// Point(_, _) => {
// for maybe_geom in self.as_point().iter() {
// if let Some(geom) = maybe_geom {
// write_point(&mut output_array, &geom)?;
// output_array.append_value("");
// } else {
// output_array.append_null();
// }
// }
// }
Point(_, _) => impl_to_wkt!(as_point, write_point),
LineString(_, _) => impl_to_wkt!(as_line_string, write_linestring),
Polygon(_, _) => impl_to_wkt!(as_polygon, write_polygon),
Expand Down
2 changes: 1 addition & 1 deletion rust/geoarrow/src/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pub(crate) static GEOARROW_EXTENSION_NAMES: Set<&'static str> = phf_set! {
"ogc.wkb",
};

/// An Arrow table that MAY contain one or more geospatial columns.
/// An Arrow table that may contain one or more geospatial columns.
///
/// This Table object is designed to be interoperable with non-geospatial Arrow libraries, and thus
/// does not _require_ a geometry column.
Expand Down

0 comments on commit edc2919

Please sign in to comment.