From 21264b5092223898a5fedb407ce8ed0fe0f63686 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Fri, 20 Dec 2024 16:40:27 -0500 Subject: [PATCH] Cleanup pass (#957) ### Change list - Disallow missing documentation in `geoarrow::array` - Update documentation in `geoarrow::array`. - Use `ArrayRef` instead of `Arc` - Use `SchemaRef` instead of `Arc` --- rust/geoarrow/src/array/binary/array.rs | 11 ++-- rust/geoarrow/src/array/binary/builder.rs | 11 +++- rust/geoarrow/src/array/binary/capacity.rs | 3 + rust/geoarrow/src/array/cast.rs | 1 + .../src/array/coord/combined/array.rs | 33 +++++++---- .../src/array/coord/combined/builder.rs | 19 ++++--- .../src/array/coord/interleaved/array.rs | 36 ++++++------ .../src/array/coord/interleaved/builder.rs | 33 ++++++----- .../src/array/coord/separated/array.rs | 38 +++++++------ .../src/array/coord/separated/builder.rs | 34 +++++++----- rust/geoarrow/src/array/dynamic.rs | 15 +++-- rust/geoarrow/src/array/geometry/array.rs | 25 +++++---- rust/geoarrow/src/array/geometry/builder.rs | 30 +++++++++- rust/geoarrow/src/array/geometry/capacity.rs | 28 ++++++++++ .../src/array/geometrycollection/array.rs | 8 ++- .../src/array/geometrycollection/builder.rs | 48 +++++++++------- rust/geoarrow/src/array/linestring/array.rs | 9 ++- rust/geoarrow/src/array/linestring/builder.rs | 47 ++++++++++------ .../geoarrow/src/array/linestring/capacity.rs | 6 ++ rust/geoarrow/src/array/metadata.rs | 16 ++++++ rust/geoarrow/src/array/mixed/array.rs | 6 +- rust/geoarrow/src/array/mixed/builder.rs | 1 + rust/geoarrow/src/array/mod.rs | 55 ++++++++++++++++++- .../src/array/multilinestring/array.rs | 11 +++- .../src/array/multilinestring/builder.rs | 47 ++++++++++------ .../src/array/multilinestring/capacity.rs | 17 ++++-- rust/geoarrow/src/array/multipoint/array.rs | 13 +++-- rust/geoarrow/src/array/multipoint/builder.rs | 52 +++++++++++------- .../geoarrow/src/array/multipoint/capacity.rs | 20 ++++++- rust/geoarrow/src/array/multipolygon/array.rs | 15 +++-- .../src/array/multipolygon/builder.rs | 47 ++++++++++------ .../src/array/multipolygon/capacity.rs | 22 ++++---- rust/geoarrow/src/array/point/array.rs | 13 ++++- rust/geoarrow/src/array/point/builder.rs | 37 ++++++++----- rust/geoarrow/src/array/polygon/array.rs | 11 +++- rust/geoarrow/src/array/polygon/builder.rs | 48 ++++++++++------ rust/geoarrow/src/array/polygon/capacity.rs | 12 ++++ rust/geoarrow/src/array/rect/array.rs | 11 +++- rust/geoarrow/src/array/rect/builder.rs | 40 +++++++------- rust/geoarrow/src/array/wkt/array.rs | 10 +++- rust/geoarrow/src/chunked_array/mod.rs | 10 ++-- .../io/geozero/table/builder/properties.rs | 2 +- .../src/io/geozero/table/builder/table.rs | 6 +- rust/geoarrow/src/io/parquet/reader/parse.rs | 12 ++-- rust/geoarrow/src/io/parquet/writer/encode.rs | 10 ++-- .../src/io/parquet/writer/metadata.rs | 9 +-- rust/geoarrow/src/lib.rs | 2 +- rust/geoarrow/src/table.rs | 11 +--- rust/geoarrow/src/test/geoarrow_data/util.rs | 5 +- rust/geoarrow/src/trait_.rs | 10 +++- 50 files changed, 680 insertions(+), 336 deletions(-) diff --git a/rust/geoarrow/src/array/binary/array.rs b/rust/geoarrow/src/array/binary/array.rs index a32dd0d9..0ed44fcd 100644 --- a/rust/geoarrow/src/array/binary/array.rs +++ b/rust/geoarrow/src/array/binary/array.rs @@ -9,8 +9,8 @@ use crate::error::{GeoArrowError, Result}; use crate::scalar::WKB; use crate::trait_::{ArrayAccessor, ArrayBase, IntoArrow, SerializedArray}; use arrow::array::AsArray; -use arrow_array::OffsetSizeTrait; use arrow_array::{Array, BinaryArray, GenericBinaryArray, LargeBinaryArray}; +use arrow_array::{ArrayRef, OffsetSizeTrait}; use arrow_buffer::NullBuffer; use arrow_schema::{DataType, Field}; use geo_traits::GeometryTrait; @@ -73,10 +73,6 @@ impl WKBArray { validity_len + self.buffer_lengths().num_bytes::() } - pub fn into_inner(self) -> GenericBinaryArray { - self.array - } - /// Slices this [`WKBArray`] in place. /// # Panic /// This function panics iff `offset + length > self.len()`. @@ -93,6 +89,7 @@ impl WKBArray { } } + /// Replace the [ArrayMetadata] in the array with the given metadata pub fn with_metadata(&self, metadata: Arc) -> Self { let mut arr = self.clone(); arr.metadata = metadata; @@ -119,12 +116,12 @@ impl ArrayBase for WKBArray { self.data_type.extension_name() } - fn into_array_ref(self) -> Arc { + fn into_array_ref(self) -> ArrayRef { // Recreate a BinaryArray so that we can force it to have geoarrow.wkb extension type Arc::new(self.into_arrow()) } - fn to_array_ref(&self) -> arrow_array::ArrayRef { + fn to_array_ref(&self) -> ArrayRef { self.clone().into_array_ref() } diff --git a/rust/geoarrow/src/array/binary/builder.rs b/rust/geoarrow/src/array/binary/builder.rs index 9de69d6d..404960e1 100644 --- a/rust/geoarrow/src/array/binary/builder.rs +++ b/rust/geoarrow/src/array/binary/builder.rs @@ -31,12 +31,11 @@ impl Default for WKBBuilder { impl WKBBuilder { /// Creates a new empty [`WKBBuilder`]. - /// # Implementation - /// This allocates a [`Vec`] of one element pub fn new() -> Self { Self::with_capacity(Default::default()) } + /// Creates a new empty [`WKBBuilder`] with the provided options. pub fn new_with_options(metadata: Arc) -> Self { Self::with_capacity_and_options(Default::default(), metadata) } @@ -46,6 +45,7 @@ impl WKBBuilder { Self::with_capacity_and_options(capacity, Default::default()) } + /// Creates a new empty [`WKBBuilder`] with the provided capacity and options. pub fn with_capacity_and_options(capacity: WKBCapacity, metadata: Arc) -> Self { Self( GenericBinaryBuilder::with_capacity( @@ -56,12 +56,16 @@ impl WKBBuilder { ) } + /// Creates a new empty [`WKBBuilder`] with a capacity inferred by the provided geometry + /// iterator. pub fn with_capacity_from_iter<'a>( geoms: impl Iterator + 'a)>>, ) -> Self { Self::with_capacity_and_options_from_iter(geoms, Default::default()) } + /// Creates a new empty [`WKBBuilder`] with the provided options and a capacity inferred by the + /// provided geometry iterator. pub fn with_capacity_and_options_from_iter<'a>( geoms: impl Iterator + 'a)>>, metadata: Arc, @@ -205,6 +209,9 @@ impl WKBBuilder { array } + /// Consume this builder and convert to a [WKBArray]. + /// + /// This is `O(1)`. pub fn finish(self) -> WKBArray { self.into() } diff --git a/rust/geoarrow/src/array/binary/capacity.rs b/rust/geoarrow/src/array/binary/capacity.rs index d2e32135..784c1a8b 100644 --- a/rust/geoarrow/src/array/binary/capacity.rs +++ b/rust/geoarrow/src/array/binary/capacity.rs @@ -39,10 +39,12 @@ impl WKBCapacity { self.buffer_capacity == 0 && self.offsets_capacity == 0 } + /// The capacity of the underlying data buffer pub fn buffer_capacity(&self) -> usize { self.buffer_capacity } + /// The capacity of the underlying offsets buffer pub fn offsets_capacity(&self) -> usize { self.offsets_capacity } @@ -223,6 +225,7 @@ impl WKBCapacity { counter } + /// Create a capacity counter from an iterator of Geometries. pub fn from_owned_geometries<'a>( geoms: impl Iterator + 'a)>>, ) -> Self { diff --git a/rust/geoarrow/src/array/cast.rs b/rust/geoarrow/src/array/cast.rs index 4e48b573..c7421a89 100644 --- a/rust/geoarrow/src/array/cast.rs +++ b/rust/geoarrow/src/array/cast.rs @@ -135,6 +135,7 @@ impl AsNativeArray for &dyn NativeArray { } } +/// Trait to downcast an Arrow array to a serialized array pub trait AsSerializedArray { /// Downcast this to a [`WKBArray`] with `i32` offsets returning `None` if not possible fn as_wkb_opt(&self) -> Option<&WKBArray>; diff --git a/rust/geoarrow/src/array/coord/combined/array.rs b/rust/geoarrow/src/array/coord/combined/array.rs index f235bb35..12f61461 100644 --- a/rust/geoarrow/src/array/coord/combined/array.rs +++ b/rust/geoarrow/src/array/coord/combined/array.rs @@ -8,7 +8,7 @@ use crate::datatypes::Dimension; use crate::error::{GeoArrowError, Result}; use crate::scalar::Coord; use crate::trait_::IntoArrow; -use arrow_array::{Array, FixedSizeListArray, StructArray}; +use arrow_array::{Array, ArrayRef, FixedSizeListArray, StructArray}; use arrow_schema::DataType; /// An Arrow representation of an array of coordinates. @@ -25,18 +25,22 @@ use arrow_schema::DataType; /// validity masks. #[derive(Debug, Clone)] pub enum CoordBuffer { + /// Interleaved coordinates Interleaved(InterleavedCoordBuffer), + /// Separated coordinates Separated(SeparatedCoordBuffer), } impl CoordBuffer { - pub fn slice(&self, offset: usize, length: usize) -> Self { + /// Slice this buffer + pub(crate) fn slice(&self, offset: usize, length: usize) -> Self { match self { CoordBuffer::Interleaved(c) => CoordBuffer::Interleaved(c.slice(offset, length)), CoordBuffer::Separated(c) => CoordBuffer::Separated(c.slice(offset, length)), } } + /// The underlying coordinate type pub fn coord_type(&self) -> CoordType { match self { CoordBuffer::Interleaved(cb) => cb.coord_type(), @@ -44,13 +48,15 @@ impl CoordBuffer { } } - pub fn storage_type(&self) -> DataType { + /// The arrow [DataType] for this coordinate buffer. + pub(crate) fn storage_type(&self) -> DataType { match self { CoordBuffer::Interleaved(c) => c.storage_type(), CoordBuffer::Separated(c) => c.storage_type(), } } + /// The length of this coordinate buffer pub fn len(&self) -> usize { match self { CoordBuffer::Interleaved(c) => c.len(), @@ -58,25 +64,23 @@ impl CoordBuffer { } } + /// Whether this coordinate buffer is empty pub fn is_empty(&self) -> bool { self.len() == 0 } - pub fn value(&self, index: usize) -> Coord<'_> { + pub(crate) fn value(&self, index: usize) -> Coord<'_> { match self { CoordBuffer::Interleaved(c) => Coord::Interleaved(c.value(index)), CoordBuffer::Separated(c) => Coord::Separated(c.value(index)), } } - pub fn into_array_ref(self) -> Arc { + pub(crate) fn into_array_ref(self) -> ArrayRef { self.into_arrow() } - pub fn to_array_ref(&self) -> arrow_array::ArrayRef { - self.clone().into_array_ref() - } - + /// The dimension of this coordinate buffer pub fn dim(&self) -> Dimension { match self { CoordBuffer::Interleaved(c) => c.dim(), @@ -84,11 +88,16 @@ impl CoordBuffer { } } - pub fn with_coords(self, coords: CoordBuffer) -> Self { + #[allow(dead_code)] + pub(crate) fn with_coords(self, coords: CoordBuffer) -> Self { assert_eq!(coords.len(), self.len()); coords } + /// Convert this coordinate array into the given [CoordType] + /// + /// This is a no-op if the coord_type matches the existing coord type. Otherwise a full clone + /// of the underlying coordinate buffers will be performed. pub fn into_coord_type(self, coord_type: CoordType) -> Self { let dim = self.dim(); match (self, coord_type) { @@ -113,7 +122,7 @@ impl CoordBuffer { } } - pub fn from_arrow(value: &dyn Array, dim: Dimension) -> Result { + pub(crate) fn from_arrow(value: &dyn Array, dim: Dimension) -> Result { match value.data_type() { DataType::Struct(_) => { let downcasted = value.as_any().downcast_ref::().unwrap(); @@ -136,7 +145,7 @@ impl CoordBuffer { } impl IntoArrow for CoordBuffer { - type ArrowArray = Arc; + type ArrowArray = ArrayRef; fn into_arrow(self) -> Self::ArrowArray { match self { diff --git a/rust/geoarrow/src/array/coord/combined/builder.rs b/rust/geoarrow/src/array/coord/combined/builder.rs index 5fc4e4c8..2e116376 100644 --- a/rust/geoarrow/src/array/coord/combined/builder.rs +++ b/rust/geoarrow/src/array/coord/combined/builder.rs @@ -12,11 +12,14 @@ use geo_traits::{CoordTrait, PointTrait}; /// Converting an [`CoordBufferBuilder`] into a [`CoordBuffer`] is `O(1)`. #[derive(Debug, Clone)] pub enum CoordBufferBuilder { + /// Interleaved coordinates Interleaved(InterleavedCoordBufferBuilder), + /// Separated coordinates Separated(SeparatedCoordBufferBuilder), } impl CoordBufferBuilder { + /// Initialize a buffer of a given length with all coordinates set to 0.0 pub fn initialize(len: usize, interleaved: bool, dim: Dimension) -> Self { match interleaved { true => { @@ -40,18 +43,17 @@ impl CoordBufferBuilder { } } - /// Reserves the minimum capacity for at least `additional` more coordinates to - /// be inserted in the given `Vec`. Unlike [`reserve`], this will not - /// deliberately over-allocate to speculatively avoid frequent allocations. - /// After calling `reserve_exact`, capacity will be greater than or equal to - /// `self.len() + additional`. Does nothing if the capacity is already - /// sufficient. + /// Reserves the minimum capacity for at least `additional` more coordinates. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. /// /// Note that the allocator may give the collection more space than it /// requests. Therefore, capacity can not be relied upon to be precisely /// minimal. Prefer [`reserve`] if future insertions are expected. /// - /// [`reserve`]: Vec::reserve + /// [`reserve`]: Self::reserve pub fn reserve_exact(&mut self, additional: usize) { match self { CoordBufferBuilder::Interleaved(cb) => cb.reserve_exact(additional), @@ -67,6 +69,7 @@ impl CoordBufferBuilder { } } + /// The number of coordinates pub fn len(&self) -> usize { match self { CoordBufferBuilder::Interleaved(cb) => cb.len(), @@ -74,10 +77,12 @@ impl CoordBufferBuilder { } } + /// Whether the buffer is empty pub fn is_empty(&self) -> bool { self.len() == 0 } + /// The underlying coordinate type pub fn coord_type(&self) -> CoordType { match self { CoordBufferBuilder::Interleaved(_) => CoordType::Interleaved, diff --git a/rust/geoarrow/src/array/coord/interleaved/array.rs b/rust/geoarrow/src/array/coord/interleaved/array.rs index 02cec0f9..ffe2cb0c 100644 --- a/rust/geoarrow/src/array/coord/interleaved/array.rs +++ b/rust/geoarrow/src/array/coord/interleaved/array.rs @@ -48,11 +48,17 @@ impl InterleavedCoordBuffer { Ok(Self { coords, dim }) } - pub fn from_vec(coords: Vec, dim: Dimension) -> Result { + // Currently used by a test + #[allow(dead_code)] + pub(crate) fn from_vec(coords: Vec, dim: Dimension) -> Result { Self::try_new(coords.into(), dim) } - pub fn from_coords>(coords: &[G], dim: Dimension) -> Result { + /// Construct from an iterator of coordinates. + pub fn from_coords<'a>( + coords: impl ExactSizeIterator + 'a)>, + dim: Dimension, + ) -> Result { Ok(InterleavedCoordBufferBuilder::from_coords(coords, dim)?.into()) } @@ -61,22 +67,23 @@ impl InterleavedCoordBuffer { &self.coords } - pub fn values_array(&self) -> Float64Array { + pub(crate) fn values_array(&self) -> Float64Array { Float64Array::new(self.coords.clone(), None) } + /// The dimension of this coordinate buffer pub fn dim(&self) -> Dimension { self.dim } - pub fn values_field(&self) -> Field { + pub(crate) fn values_field(&self) -> Field { match self.dim { Dimension::XY => Field::new("xy", DataType::Float64, false), Dimension::XYZ => Field::new("xyz", DataType::Float64, false), } } - pub fn slice(&self, offset: usize, length: usize) -> Self { + pub(crate) fn slice(&self, offset: usize, length: usize) -> Self { assert!( offset + length <= self.len(), "offset + length may not exceed length of array" @@ -89,39 +96,34 @@ impl InterleavedCoordBuffer { } } - pub fn into_array_ref(self) -> Arc { - Arc::new(self.into_arrow()) - } - - pub fn to_array_ref(&self) -> arrow_array::ArrayRef { - self.clone().into_array_ref() - } - - pub fn storage_type(&self) -> DataType { + pub(crate) fn storage_type(&self) -> DataType { coord_type_to_data_type(CoordType::Interleaved, self.dim) } // todo switch to: // pub const coord_type: CoordType = CoordType::Interleaved; + /// The coordinate type pub fn coord_type(&self) -> CoordType { CoordType::Interleaved } + /// The number of coordinates pub fn len(&self) -> usize { self.coords.len() / self.dim.size() } + /// Whether this buffer is empty pub fn is_empty(&self) -> bool { self.len() == 0 } - pub fn value(&self, index: usize) -> InterleavedCoord<'_> { + pub(crate) fn value(&self, index: usize) -> InterleavedCoord<'_> { assert!(index <= self.len()); self.value_unchecked(index) } - pub fn value_unchecked(&self, index: usize) -> InterleavedCoord<'_> { + pub(crate) fn value_unchecked(&self, index: usize) -> InterleavedCoord<'_> { InterleavedCoord { coords: &self.coords, i: index, @@ -129,7 +131,7 @@ impl InterleavedCoordBuffer { } } - pub fn from_arrow(array: &FixedSizeListArray, dim: Dimension) -> Result { + pub(crate) fn from_arrow(array: &FixedSizeListArray, dim: Dimension) -> Result { if array.value_length() != dim.size() as i32 { return Err(GeoArrowError::General( format!( "Expected the FixedSizeListArray to match the dimension. Array length is {}, dimension is: {:?} have size 2", array.value_length(), dim) diff --git a/rust/geoarrow/src/array/coord/interleaved/builder.rs b/rust/geoarrow/src/array/coord/interleaved/builder.rs index 269d6051..a2d8fe4c 100644 --- a/rust/geoarrow/src/array/coord/interleaved/builder.rs +++ b/rust/geoarrow/src/array/coord/interleaved/builder.rs @@ -12,15 +12,17 @@ use geo_traits::{CoordTrait, PointTrait}; /// Converting an [`InterleavedCoordBufferBuilder`] into a [`InterleavedCoordBuffer`] is `O(1)`. #[derive(Debug, Clone)] pub struct InterleavedCoordBufferBuilder { - pub coords: Vec, + pub(crate) coords: Vec, dim: Dimension, } impl InterleavedCoordBufferBuilder { + /// Create a new empty builder with the given dimension pub fn new(dim: Dimension) -> Self { Self::with_capacity(0, dim) } + /// Create a new builder with the given capacity and dimension pub fn with_capacity(capacity: usize, dim: Dimension) -> Self { Self { coords: Vec::with_capacity(capacity * dim.size()), @@ -36,27 +38,26 @@ impl InterleavedCoordBufferBuilder { } } - /// Reserves capacity for at least `additional` more coordinates to be inserted - /// in the given `Vec`. The collection may reserve more space to - /// speculatively avoid frequent reallocations. After calling `reserve`, - /// capacity will be greater than or equal to `self.len() + additional`. + /// Reserves capacity for at least `additional` more coordinates. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. /// Does nothing if capacity is already sufficient. pub fn reserve(&mut self, additional: usize) { self.coords.reserve(additional * self.dim.size()); } - /// Reserves the minimum capacity for at least `additional` more coordinates to - /// be inserted in the given `Vec`. Unlike [`reserve`], this will not - /// deliberately over-allocate to speculatively avoid frequent allocations. - /// After calling `reserve_exact`, capacity will be greater than or equal to - /// `self.len() + additional`. Does nothing if the capacity is already - /// sufficient. + /// Reserves the minimum capacity for at least `additional` more coordinates. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. /// /// Note that the allocator may give the collection more space than it /// requests. Therefore, capacity can not be relied upon to be precisely /// minimal. Prefer [`reserve`] if future insertions are expected. /// - /// [`reserve`]: Vec::reserve + /// [`reserve`]: Self::reserve pub fn reserve_exact(&mut self, additional: usize) { self.coords.reserve_exact(additional * self.dim.size()); } @@ -66,10 +67,12 @@ impl InterleavedCoordBufferBuilder { self.coords.capacity() / self.dim.size() } + /// The number of coordinates in this builder pub fn len(&self) -> usize { self.coords.len() / self.dim.size() } + /// Whether this builder is empty pub fn is_empty(&self) -> bool { self.len() == 0 } @@ -137,7 +140,11 @@ impl InterleavedCoordBufferBuilder { Ok(()) } - pub fn from_coords>(coords: &[G], dim: Dimension) -> Result { + /// Construct a new builder and pre-fill it with coordinates from the provided iterator + pub fn from_coords<'a>( + coords: impl ExactSizeIterator + 'a)>, + dim: Dimension, + ) -> Result { let mut buffer = InterleavedCoordBufferBuilder::with_capacity(coords.len(), dim); for coord in coords { buffer.push_coord(coord); diff --git a/rust/geoarrow/src/array/coord/separated/array.rs b/rust/geoarrow/src/array/coord/separated/array.rs index ea3220f3..28aab78f 100644 --- a/rust/geoarrow/src/array/coord/separated/array.rs +++ b/rust/geoarrow/src/array/coord/separated/array.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use arrow::array::AsArray; use arrow::datatypes::Float64Type; -use arrow_array::{Array, ArrayRef, Float64Array, StructArray}; +use arrow_array::{ArrayRef, Float64Array, StructArray}; use arrow_buffer::ScalarBuffer; use arrow_schema::{DataType, Field}; @@ -13,6 +13,10 @@ use crate::scalar::SeparatedCoord; use crate::trait_::IntoArrow; use geo_traits::CoordTrait; +/// The GeoArrow equivalent to `Vec>`: an immutable collection of coordinates. +/// +/// This stores all coordinates in separated fashion as multiple underlying buffers: `xxx` and +/// `yyy`. #[derive(Debug, Clone, PartialEq)] pub struct SeparatedCoordBuffer { /// We always store a buffer for all 4 dimensions. The buffers for dimension 3 and 4 may be @@ -85,11 +89,12 @@ impl SeparatedCoordBuffer { } } + /// The dimension of this coordinate buffer pub fn dim(&self) -> Dimension { self.dim } - pub fn values_array(&self) -> Vec { + pub(crate) fn values_array(&self) -> Vec { match self.dim { Dimension::XY => { vec![ @@ -107,7 +112,7 @@ impl SeparatedCoordBuffer { } } - pub fn values_field(&self) -> Vec { + pub(crate) fn values_field(&self) -> Vec { match self.dim { Dimension::XY => { vec![ @@ -125,7 +130,7 @@ impl SeparatedCoordBuffer { } } - pub fn slice(&self, offset: usize, length: usize) -> Self { + pub(crate) fn slice(&self, offset: usize, length: usize) -> Self { assert!( offset + length <= self.len(), "offset + length may not exceed length of array" @@ -143,36 +148,31 @@ impl SeparatedCoordBuffer { } } - pub fn storage_type(&self) -> DataType { + pub(crate) fn storage_type(&self) -> DataType { coord_type_to_data_type(CoordType::Separated, self.dim) } - pub fn into_array_ref(self) -> Arc { - Arc::new(self.into_arrow()) - } - - pub fn to_array_ref(&self) -> arrow_array::ArrayRef { - self.clone().into_array_ref() - } - + /// The coordinate type pub fn coord_type(&self) -> CoordType { CoordType::Separated } + /// The number of coordinates pub fn len(&self) -> usize { self.buffers[0].len() } + /// Whether the coordinate buffer is empty pub fn is_empty(&self) -> bool { self.len() == 0 } - pub fn value(&self, index: usize) -> SeparatedCoord<'_> { + pub(crate) fn value(&self, index: usize) -> SeparatedCoord<'_> { assert!(index <= self.len()); self.value_unchecked(index) } - pub fn value_unchecked(&self, index: usize) -> SeparatedCoord<'_> { + pub(crate) fn value_unchecked(&self, index: usize) -> SeparatedCoord<'_> { SeparatedCoord { buffers: &self.buffers, i: index, @@ -180,7 +180,7 @@ impl SeparatedCoordBuffer { } } - pub fn from_arrow(array: &StructArray, dim: Dimension) -> Result { + pub(crate) fn from_arrow(array: &StructArray, dim: Dimension) -> Result { let arrays = array.columns(); assert_eq!(arrays.len(), dim.size()); @@ -193,7 +193,11 @@ impl SeparatedCoordBuffer { Self::try_new(buffers, dim) } - pub fn from_coords>(coords: &[G], dim: Dimension) -> Result { + /// Construct from an iterator of coordinates + pub fn from_coords<'a>( + coords: impl ExactSizeIterator + 'a)>, + dim: Dimension, + ) -> Result { Ok(SeparatedCoordBufferBuilder::from_coords(coords, dim)?.into()) } } diff --git a/rust/geoarrow/src/array/coord/separated/builder.rs b/rust/geoarrow/src/array/coord/separated/builder.rs index bc1fe2d7..ff49c94c 100644 --- a/rust/geoarrow/src/array/coord/separated/builder.rs +++ b/rust/geoarrow/src/array/coord/separated/builder.rs @@ -5,7 +5,7 @@ use crate::datatypes::Dimension; use crate::error::{GeoArrowError, Result}; use geo_traits::{CoordTrait, PointTrait}; -/// The GeoArrow equivalent to `Vec`: a mutable collection of coordinates. +/// The GeoArrow equivalent to `Vec>`: a mutable collection of coordinates. /// /// This stores all coordinates in separated fashion as multiple arrays: `xxx` and `yyy`. /// @@ -17,7 +17,7 @@ pub struct SeparatedCoordBufferBuilder { } impl SeparatedCoordBufferBuilder { - // TODO: switch this new (initializing to zero) to default? + /// Create a new empty builder with the given dimension pub fn new(dim: Dimension) -> Self { Self::with_capacity(0, dim) } @@ -43,6 +43,7 @@ impl SeparatedCoordBufferBuilder { // Self { buffers } // } + /// Create a new builder with the given capacity and dimension pub fn with_capacity(capacity: usize, dim: Dimension) -> Self { // Only allocate buffers for existant dimensions let buffers = core::array::from_fn(|i| { @@ -70,10 +71,10 @@ impl SeparatedCoordBufferBuilder { Self { buffers, dim } } - /// Reserves capacity for at least `additional` more coordinates to be inserted - /// in the given `Vec`. The collection may reserve more space to - /// speculatively avoid frequent reallocations. After calling `reserve`, - /// capacity will be greater than or equal to `self.len() + additional`. + /// Reserves capacity for at least `additional` more coordinates. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. /// Does nothing if capacity is already sufficient. pub fn reserve(&mut self, additional: usize) { self.buffers @@ -81,18 +82,17 @@ impl SeparatedCoordBufferBuilder { .for_each(|buffer| buffer.reserve(additional)) } - /// Reserves the minimum capacity for at least `additional` more coordinates to - /// be inserted in the given `Vec`. Unlike [`reserve`], this will not - /// deliberately over-allocate to speculatively avoid frequent allocations. - /// After calling `reserve_exact`, capacity will be greater than or equal to - /// `self.len() + additional`. Does nothing if the capacity is already - /// sufficient. + /// Reserves the minimum capacity for at least `additional` more coordinates. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. /// /// Note that the allocator may give the collection more space than it /// requests. Therefore, capacity can not be relied upon to be precisely /// minimal. Prefer [`reserve`] if future insertions are expected. /// - /// [`reserve`]: Vec::reserve + /// [`reserve`]: Self::reserve pub fn reserve_exact(&mut self, additional: usize) { self.buffers .iter_mut() @@ -104,10 +104,12 @@ impl SeparatedCoordBufferBuilder { self.buffers[0].capacity() } + /// The number of coordinates in this builder pub fn len(&self) -> usize { self.buffers[0].len() } + /// Whether this builder is empty pub fn is_empty(&self) -> bool { self.len() == 0 } @@ -175,7 +177,11 @@ impl SeparatedCoordBufferBuilder { Ok(()) } - pub fn from_coords>(coords: &[G], dim: Dimension) -> Result { + /// Construct a new builder and pre-fill it with coordinates from the provided iterator + pub fn from_coords<'a>( + coords: impl ExactSizeIterator + 'a)>, + dim: Dimension, + ) -> Result { let mut buffer = SeparatedCoordBufferBuilder::with_capacity(coords.len(), dim); for coord in coords { buffer.try_push_coord(coord)?; diff --git a/rust/geoarrow/src/array/dynamic.rs b/rust/geoarrow/src/array/dynamic.rs index c1707af2..00168337 100644 --- a/rust/geoarrow/src/array/dynamic.rs +++ b/rust/geoarrow/src/array/dynamic.rs @@ -22,16 +22,16 @@ use crate::{ArrayBase, NativeArray}; pub struct NativeArrayDyn(Arc); impl NativeArrayDyn { + /// Construct a new [NativeArrayDyn] pub fn new(array: Arc) -> Self { Self(array) } + /// Construct a new [NativeArrayDyn] from an Arrow [Array] and [Field]. + // TODO: add an option to parse a serialized array to a native array here. pub fn from_arrow_array(array: &dyn Array, field: &Field) -> Result { - let data_type = NativeType::try_from(field)?; - - // TODO: have to figure out when to parse as a MixedGeometry array vs Unknown Array use NativeType::*; - let geo_arr: Arc = match data_type { + let geo_arr: Arc = match NativeType::try_from(field)? { Point(_, _) => Arc::new(PointArray::try_from((array, field))?), LineString(_, _) => Arc::new(LineStringArray::try_from((array, field))?), Polygon(_, _) => Arc::new(PolygonArray::try_from((array, field))?), @@ -48,10 +48,12 @@ impl NativeArrayDyn { Ok(Self(geo_arr)) } + /// Access the underlying [`Arc`] pub fn inner(&self) -> &NativeArrayRef { &self.0 } + /// Consume self and access the underlying [`Arc`] pub fn into_inner(self) -> NativeArrayRef { self.0 } @@ -140,15 +142,18 @@ impl Display for NativeArrayDyn { } } +/// A wrapper around a SerializedArray of unknown type. #[derive(Debug, Clone)] #[repr(transparent)] pub struct SerializedArrayDyn(pub(crate) SerializedArrayRef); impl SerializedArrayDyn { + /// Construct a new [SerializedArrayDyn] pub fn new(array: SerializedArrayRef) -> Self { Self(array) } + /// Construct a new [SerializedArrayDyn] from an Arrow [Array] and [Field]. pub fn from_arrow_array(array: &dyn Array, field: &Field) -> Result { let data_type = SerializedType::try_from(field)?; @@ -162,10 +167,12 @@ impl SerializedArrayDyn { Ok(Self(geo_arr)) } + /// Access the underlying [`Arc`] pub fn inner(&self) -> &SerializedArrayRef { &self.0 } + /// Consume self and access the underlying [`Arc`] pub fn into_inner(self) -> SerializedArrayRef { self.0 } diff --git a/rust/geoarrow/src/array/geometry/array.rs b/rust/geoarrow/src/array/geometry/array.rs index b1c0fce6..0820a3f4 100644 --- a/rust/geoarrow/src/array/geometry/array.rs +++ b/rust/geoarrow/src/array/geometry/array.rs @@ -1,7 +1,7 @@ use std::collections::HashSet; use std::sync::Arc; -use arrow_array::{Array, OffsetSizeTrait, UnionArray}; +use arrow_array::{Array, ArrayRef, OffsetSizeTrait, UnionArray}; use arrow_buffer::{NullBuffer, ScalarBuffer}; use arrow_schema::{DataType, Field, UnionMode}; @@ -250,49 +250,49 @@ impl GeometryArray { } // TODO: handle slicing - pub fn has_points(&self, dim: Dimension) -> bool { + pub(crate) fn has_points(&self, dim: Dimension) -> bool { match dim { Dimension::XY => !self.point_xy.is_empty(), Dimension::XYZ => !self.point_xyz.is_empty(), } } - pub fn has_line_strings(&self, dim: Dimension) -> bool { + pub(crate) fn has_line_strings(&self, dim: Dimension) -> bool { match dim { Dimension::XY => !self.line_string_xy.is_empty(), Dimension::XYZ => !self.line_string_xyz.is_empty(), } } - pub fn has_polygons(&self, dim: Dimension) -> bool { + pub(crate) fn has_polygons(&self, dim: Dimension) -> bool { match dim { Dimension::XY => !self.polygon_xy.is_empty(), Dimension::XYZ => !self.polygon_xyz.is_empty(), } } - pub fn has_multi_points(&self, dim: Dimension) -> bool { + pub(crate) fn has_multi_points(&self, dim: Dimension) -> bool { match dim { Dimension::XY => !self.mpoint_xy.is_empty(), Dimension::XYZ => !self.mpoint_xyz.is_empty(), } } - pub fn has_multi_line_strings(&self, dim: Dimension) -> bool { + pub(crate) fn has_multi_line_strings(&self, dim: Dimension) -> bool { match dim { Dimension::XY => !self.mline_string_xy.is_empty(), Dimension::XYZ => !self.mline_string_xyz.is_empty(), } } - pub fn has_multi_polygons(&self, dim: Dimension) -> bool { + pub(crate) fn has_multi_polygons(&self, dim: Dimension) -> bool { match dim { Dimension::XY => !self.mpolygon_xy.is_empty(), Dimension::XYZ => !self.mpolygon_xyz.is_empty(), } } - pub fn has_geometry_collections(&self, dim: Dimension) -> bool { + pub(crate) fn has_geometry_collections(&self, dim: Dimension) -> bool { match dim { Dimension::XY => !self.gc_xy.is_empty(), Dimension::XYZ => !self.gc_xyz.is_empty(), @@ -500,10 +500,12 @@ impl GeometryArray { } } + /// Change the coordinate type of this array. pub fn to_coord_type(&self, coord_type: CoordType) -> Self { self.clone().into_coord_type(coord_type) } + /// Change the coordinate type of this array. pub fn into_coord_type(self, coord_type: CoordType) -> Self { Self::new( self.type_ids, @@ -527,7 +529,8 @@ impl GeometryArray { } // TODO: recursively expand the types from the geometry collection array - pub fn contained_types(&self) -> HashSet { + #[allow(dead_code)] + pub(crate) fn contained_types(&self) -> HashSet { let mut types = HashSet::new(); if self.has_points(Dimension::XY) { types.insert(self.point_xy.data_type()); @@ -597,11 +600,11 @@ impl ArrayBase for GeometryArray { self.data_type.extension_name() } - fn into_array_ref(self) -> Arc { + fn into_array_ref(self) -> ArrayRef { Arc::new(self.into_arrow()) } - fn to_array_ref(&self) -> arrow_array::ArrayRef { + fn to_array_ref(&self) -> ArrayRef { self.clone().into_array_ref() } diff --git a/rust/geoarrow/src/array/geometry/builder.rs b/rust/geoarrow/src/array/geometry/builder.rs index 5455596d..86302186 100644 --- a/rust/geoarrow/src/array/geometry/builder.rs +++ b/rust/geoarrow/src/array/geometry/builder.rs @@ -89,6 +89,7 @@ impl<'a> GeometryBuilder { Self::new_with_options(Default::default(), Default::default(), DEFAULT_PREFER_MULTI) } + /// Creates a new empty [`GeometryBuilder`] with the given options. pub fn new_with_options( coord_type: CoordType, metadata: Arc, @@ -97,7 +98,7 @@ impl<'a> GeometryBuilder { Self::with_capacity_and_options(Default::default(), coord_type, metadata, prefer_multi) } - /// Creates a new [`MixedGeometryBuilder`] with given capacity and no validity. + /// Creates a new [`GeometryBuilder`] with given capacity and no validity. pub fn with_capacity(capacity: GeometryCapacity) -> Self { Self::with_capacity_and_options( capacity, @@ -107,6 +108,7 @@ impl<'a> GeometryBuilder { ) } + /// Creates a new empty [`GeometryBuilder`] with the given capacity and options. pub fn with_capacity_and_options( capacity: GeometryCapacity, coord_type: CoordType, @@ -211,6 +213,11 @@ impl<'a> GeometryBuilder { } } + /// Reserves capacity for at least `additional` more geometries. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if capacity is already sufficient. pub fn reserve(&mut self, capacity: GeometryCapacity) { let total_num_geoms = capacity.total_num_geoms(); self.types.reserve(total_num_geoms); @@ -233,6 +240,17 @@ impl<'a> GeometryBuilder { self.gc_xyz.reserve(capacity.gc_xyz()); } + /// Reserves the minimum capacity for at least `additional` more Geometries. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`reserve`] if future insertions are expected. + /// + /// [`reserve`]: Self::reserve pub fn reserve_exact(&mut self, capacity: GeometryCapacity) { let total_num_geoms = capacity.total_num_geoms(); @@ -288,10 +306,12 @@ impl<'a> GeometryBuilder { // }) // } + /// Consume the builder and convert to an immutable [`GeometryArray`] pub fn finish(self) -> GeometryArray { self.into() } + /// Creates a new builder with a capacity inferred by the provided iterator. pub fn with_capacity_from_iter( geoms: impl Iterator>, ) -> Result { @@ -303,6 +323,8 @@ impl<'a> GeometryBuilder { ) } + /// Creates a new builder with the provided options and a capacity inferred by the provided + /// iterator. pub fn with_capacity_and_options_from_iter( geoms: impl Iterator>, coord_type: CoordType, @@ -318,6 +340,8 @@ impl<'a> GeometryBuilder { )) } + /// Reserve more space in the underlying buffers with the capacity inferred from the provided + /// geometries. pub fn reserve_from_iter( &mut self, geoms: impl Iterator>, @@ -328,6 +352,8 @@ impl<'a> GeometryBuilder { Ok(()) } + /// Reserve more space in the underlying buffers with the capacity inferred from the provided + /// geometries. pub fn reserve_exact_from_iter( &mut self, geoms: impl Iterator>, @@ -745,6 +771,7 @@ impl<'a> GeometryBuilder { } } + /// Add a new geometry to this builder #[inline] pub fn push_geometry(&mut self, value: Option<&'a impl GeometryTrait>) -> Result<()> { use geo_traits::GeometryType::*; @@ -866,6 +893,7 @@ impl<'a> GeometryBuilder { } } + /// Extend this builder with the given geometries pub fn extend_from_iter( &mut self, geoms: impl Iterator + 'a)>>, diff --git a/rust/geoarrow/src/array/geometry/capacity.rs b/rust/geoarrow/src/array/geometry/capacity.rs index 0836ffa4..af2f7550 100644 --- a/rust/geoarrow/src/array/geometry/capacity.rs +++ b/rust/geoarrow/src/array/geometry/capacity.rs @@ -40,6 +40,7 @@ pub struct GeometryCapacity { } impl GeometryCapacity { + /// Create a new capacity with known sizes. #[allow(clippy::too_many_arguments)] pub fn new( nulls: usize, @@ -102,6 +103,8 @@ impl GeometryCapacity { } } + /// Set whether this capacity counter should prefer allocating "single-type" geometries like + /// Point/LineString/Polygon in the arrays of their "Multi" counterparts. pub fn with_prefer_multi(mut self, prefer_multi: bool) -> Self { self.prefer_multi = prefer_multi; self @@ -123,6 +126,7 @@ impl GeometryCapacity { && self.mpolygon_xyz.is_empty() } + /// The total number of geometries across all geometry types. pub fn total_num_geoms(&self) -> usize { let mut total = 0; total += self.point_xy; @@ -140,58 +144,72 @@ impl GeometryCapacity { total } + /// The number of point_xy geometries pub fn point_xy(&self) -> usize { self.point_xy } + /// The number of line_string_xy geometries pub fn line_string_xy(&self) -> LineStringCapacity { self.line_string_xy } + /// The number of polygon_xy geometries pub fn polygon_xy(&self) -> PolygonCapacity { self.polygon_xy } + /// The number of multi point_xy geometries pub fn mpoint_xy(&self) -> MultiPointCapacity { self.mpoint_xy } + /// The number of multi line_string_xy geometries pub fn mline_string_xy(&self) -> MultiLineStringCapacity { self.mline_string_xy } + /// The number of multi polygon_xy geometries pub fn mpolygon_xy(&self) -> MultiPolygonCapacity { self.mpolygon_xy } + /// The number of gc_xy geometries pub fn gc_xy(&self) -> GeometryCollectionCapacity { self.gc_xy } + /// The number of point_xyz geometries pub fn point_xyz(&self) -> usize { self.point_xyz } + /// The number of line_string_xyz geometries pub fn line_string_xyz(&self) -> LineStringCapacity { self.line_string_xyz } + /// The number of polygon_xyz geometries pub fn polygon_xyz(&self) -> PolygonCapacity { self.polygon_xyz } + /// The number of multi point_xyz geometries pub fn mpoint_xyz(&self) -> MultiPointCapacity { self.mpoint_xyz } + /// The number of multi line_string_xyz geometries pub fn mline_string_xyz(&self) -> MultiLineStringCapacity { self.mline_string_xyz } + /// The number of multi polygon_xyz geometries pub fn mpolygon_xyz(&self) -> MultiPolygonCapacity { self.mpolygon_xyz } + /// The number of gc_xyz geometries pub fn gc_xyz(&self) -> GeometryCollectionCapacity { self.gc_xyz } @@ -241,6 +259,7 @@ impl GeometryCapacity { // && self.multi_line_string.is_empty() // } + /// Add the capacity of the given Point #[inline] pub fn add_point(&mut self, point: Option<&impl PointTrait>) { if let Some(point) = point { @@ -266,6 +285,7 @@ impl GeometryCapacity { } } + /// Add the capacity of the given LineString #[inline] pub fn add_line_string(&mut self, line_string: Option<&impl LineStringTrait>) { if let Some(line_string) = line_string { @@ -291,6 +311,7 @@ impl GeometryCapacity { } } + /// Add the capacity of the given Polygon #[inline] pub fn add_polygon(&mut self, polygon: Option<&impl PolygonTrait>) { if let Some(polygon) = polygon { @@ -316,6 +337,7 @@ impl GeometryCapacity { } } + /// Add the capacity of the given MultiPoint #[inline] pub fn add_multi_point(&mut self, multi_point: Option<&impl MultiPointTrait>) { if let Some(multi_point) = multi_point { @@ -333,6 +355,7 @@ impl GeometryCapacity { } } + /// Add the capacity of the given MultiLineString #[inline] pub fn add_multi_line_string(&mut self, multi_line_string: Option<&impl MultiLineStringTrait>) { if let Some(multi_line_string) = multi_line_string { @@ -352,6 +375,7 @@ impl GeometryCapacity { } } + /// Add the capacity of the given MultiPolygon #[inline] pub fn add_multi_polygon(&mut self, multi_polygon: Option<&impl MultiPolygonTrait>) { if let Some(multi_polygon) = multi_polygon { @@ -369,6 +393,7 @@ impl GeometryCapacity { } } + /// Add the capacity of the given Geometry #[inline] pub fn add_geometry(&mut self, geom: Option<&impl GeometryTrait>) -> Result<()> { if let Some(geom) = geom { @@ -390,6 +415,7 @@ impl GeometryCapacity { Ok(()) } + /// Add the capacity of the given GeometryCollection #[inline] pub fn add_geometry_collection( &mut self, @@ -411,6 +437,7 @@ impl GeometryCapacity { Ok(()) } + /// Construct a new counter pre-filled with the given geometries pub fn from_geometries<'a>( geoms: impl Iterator>, prefer_multi: bool, @@ -422,6 +449,7 @@ impl GeometryCapacity { Ok(counter) } + /// Construct a new counter pre-filled with the given geometries pub fn from_owned_geometries<'a>( geoms: impl Iterator>, prefer_multi: bool, diff --git a/rust/geoarrow/src/array/geometrycollection/array.rs b/rust/geoarrow/src/array/geometrycollection/array.rs index 4a37452e..f23077f3 100644 --- a/rust/geoarrow/src/array/geometrycollection/array.rs +++ b/rust/geoarrow/src/array/geometrycollection/array.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use arrow::array::AsArray; -use arrow_array::{Array, GenericListArray, OffsetSizeTrait}; +use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait}; use arrow_buffer::{NullBuffer, OffsetBuffer}; use arrow_schema::{DataType, Field}; @@ -115,10 +115,12 @@ impl GeometryCollectionArray { } } + /// Change the coordinate type of this array. pub fn to_coord_type(&self, coord_type: CoordType) -> Self { self.clone().into_coord_type(coord_type) } + /// Change the coordinate type of this array. pub fn into_coord_type(self, coord_type: CoordType) -> Self { Self::new( self.array.into_coord_type(coord_type), @@ -148,11 +150,11 @@ impl ArrayBase for GeometryCollectionArray { self.data_type.extension_name() } - fn into_array_ref(self) -> Arc { + fn into_array_ref(self) -> ArrayRef { Arc::new(self.into_arrow()) } - fn to_array_ref(&self) -> arrow_array::ArrayRef { + fn to_array_ref(&self) -> ArrayRef { self.clone().into_array_ref() } diff --git a/rust/geoarrow/src/array/geometrycollection/builder.rs b/rust/geoarrow/src/array/geometrycollection/builder.rs index fbf2c815..74004578 100644 --- a/rust/geoarrow/src/array/geometrycollection/builder.rs +++ b/rust/geoarrow/src/array/geometrycollection/builder.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use arrow_array::{Array, GenericListArray, OffsetSizeTrait}; +use arrow_array::{ArrayRef, GenericListArray, OffsetSizeTrait}; use arrow_buffer::NullBufferBuilder; use crate::array::geometrycollection::GeometryCollectionCapacity; @@ -43,6 +43,7 @@ impl<'a> GeometryCollectionBuilder { ) } + /// Creates a new empty [`GeometryCollectionBuilder`] with the provided options. pub fn new_with_options( dim: Dimension, coord_type: CoordType, @@ -52,6 +53,7 @@ impl<'a> GeometryCollectionBuilder { Self::with_capacity_and_options(dim, Default::default(), coord_type, metadata, prefer_multi) } + /// Creates a new empty [`GeometryCollectionBuilder`] with the provided capacity. pub fn with_capacity(dim: Dimension, capacity: GeometryCollectionCapacity) -> Self { Self::with_capacity_and_options( dim, @@ -62,6 +64,7 @@ impl<'a> GeometryCollectionBuilder { ) } + /// Creates a new empty [`GeometryCollectionBuilder`] with the provided capacity and options. pub fn with_capacity_and_options( dim: Dimension, capacity: GeometryCollectionCapacity, @@ -69,7 +72,6 @@ impl<'a> GeometryCollectionBuilder { metadata: Arc, prefer_multi: bool, ) -> Self { - // Should we be storing array metadata on child arrays? Self { geoms: MixedGeometryBuilder::with_capacity_and_options( dim, @@ -84,42 +86,39 @@ impl<'a> GeometryCollectionBuilder { } } - /// Reserves capacity for at least `additional` more LineStrings to be inserted - /// in the given `Vec`. The collection may reserve more space to - /// speculatively avoid frequent reallocations. After calling `reserve`, - /// capacity will be greater than or equal to `self.len() + additional`. + /// Reserves capacity for at least `additional` more GeometryCollections. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. /// Does nothing if capacity is already sufficient. pub fn reserve(&mut self, additional: GeometryCollectionCapacity) { self.geoms.reserve(additional.mixed_capacity); self.geom_offsets.reserve(additional.geom_capacity); } - /// Reserves the minimum capacity for at least `additional` more LineStrings to - /// be inserted in the given `Vec`. Unlike [`reserve`], this will not - /// deliberately over-allocate to speculatively avoid frequent allocations. - /// After calling `reserve_exact`, capacity will be greater than or equal to - /// `self.len() + additional`. Does nothing if the capacity is already - /// sufficient. + /// Reserves the minimum capacity for at least `additional` more GeometryCollections. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. /// /// Note that the allocator may give the collection more space than it /// requests. Therefore, capacity can not be relied upon to be precisely /// minimal. Prefer [`reserve`] if future insertions are expected. /// - /// [`reserve`]: Vec::reserve + /// [`reserve`]: Self::reserve pub fn reserve_exact(&mut self, additional: GeometryCollectionCapacity) { self.geoms.reserve_exact(additional.mixed_capacity); self.geom_offsets.reserve_exact(additional.geom_capacity); } - /// Extract the low-level APIs from the [`GeometryCollectionBuilder`]. - pub fn into_inner(self) -> (MixedGeometryBuilder, OffsetsBuilder, NullBufferBuilder) { - (self.geoms, self.geom_offsets, self.validity) - } - + /// Consume the builder and convert to an immutable [`GeometryCollectionArray`] pub fn finish(self) -> GeometryCollectionArray { self.into() } + /// Creates a new [`GeometryCollectionBuilder`] with a capacity inferred by the provided + /// iterator. pub fn with_capacity_from_iter( geoms: impl Iterator>, dim: Dimension, @@ -133,6 +132,8 @@ impl<'a> GeometryCollectionBuilder { ) } + /// Creates a new [`GeometryCollectionBuilder`] with the provided options and a capacity + /// inferred by the provided iterator. pub fn with_capacity_and_options_from_iter( geoms: impl Iterator>, dim: Dimension, @@ -150,6 +151,8 @@ impl<'a> GeometryCollectionBuilder { )) } + /// Reserve more space in the underlying buffers with the capacity inferred from the provided + /// geometries. pub fn reserve_from_iter( &mut self, geoms: impl Iterator>, @@ -159,6 +162,8 @@ impl<'a> GeometryCollectionBuilder { Ok(()) } + /// Reserve more space in the underlying buffers with the capacity inferred from the provided + /// geometries. pub fn reserve_exact_from_iter( &mut self, geoms: impl Iterator>, @@ -276,6 +281,7 @@ impl<'a> GeometryCollectionBuilder { Ok(()) } + /// Extend this builder with the given geometries pub fn extend_from_iter( &mut self, geoms: impl Iterator + 'a)>>, @@ -299,6 +305,7 @@ impl<'a> GeometryCollectionBuilder { self.validity.append(false); } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_geometry_collections( geoms: &[impl GeometryCollectionTrait], dim: Dimension, @@ -317,6 +324,7 @@ impl<'a> GeometryCollectionBuilder { Ok(array) } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_nullable_geometry_collections( geoms: &[Option>], dim: Dimension, @@ -335,6 +343,7 @@ impl<'a> GeometryCollectionBuilder { Ok(array) } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_geometries( geoms: &[impl GeometryTrait], dim: Dimension, @@ -351,6 +360,7 @@ impl<'a> GeometryCollectionBuilder { Ok(array) } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_nullable_geometries( geoms: &[Option>], dim: Dimension, @@ -414,7 +424,7 @@ impl GeometryArrayBuilder for GeometryCollectionBuilder { &self.validity } - fn into_array_ref(self) -> Arc { + fn into_array_ref(self) -> ArrayRef { Arc::new(self.into_arrow()) } diff --git a/rust/geoarrow/src/array/linestring/array.rs b/rust/geoarrow/src/array/linestring/array.rs index a08f73c1..ab36eefd 100644 --- a/rust/geoarrow/src/array/linestring/array.rs +++ b/rust/geoarrow/src/array/linestring/array.rs @@ -114,14 +114,17 @@ impl LineStringArray { Field::new("vertices", self.coords.storage_type(), false).into() } + /// Access the underlying coordinate buffer pub fn coords(&self) -> &CoordBuffer { &self.coords } - pub fn into_inner(self) -> (CoordBuffer, OffsetBuffer, Option) { + #[allow(dead_code)] + pub(crate) fn into_inner(self) -> (CoordBuffer, OffsetBuffer, Option) { (self.coords, self.geom_offsets, self.validity) } + /// Access the underlying geometry offsets buffer pub fn geom_offsets(&self) -> &OffsetBuffer { &self.geom_offsets } @@ -171,10 +174,12 @@ impl LineStringArray { } } + /// Change the coordinate type of this array. pub fn to_coord_type(&self, coord_type: CoordType) -> Self { self.clone().into_coord_type(coord_type) } + /// Change the coordinate type of this array. pub fn into_coord_type(self, coord_type: CoordType) -> Self { Self::new( self.coords.into_coord_type(coord_type), @@ -217,7 +222,7 @@ impl ArrayBase for LineStringArray { Arc::new(self.into_arrow()) } - fn to_array_ref(&self) -> arrow_array::ArrayRef { + fn to_array_ref(&self) -> ArrayRef { self.clone().into_array_ref() } diff --git a/rust/geoarrow/src/array/linestring/builder.rs b/rust/geoarrow/src/array/linestring/builder.rs index e165b767..22b05666 100644 --- a/rust/geoarrow/src/array/linestring/builder.rs +++ b/rust/geoarrow/src/array/linestring/builder.rs @@ -10,7 +10,7 @@ use crate::datatypes::Dimension; use crate::error::{GeoArrowError, Result}; use crate::scalar::WKB; use crate::trait_::{ArrayAccessor, GeometryArrayBuilder, IntoArrow}; -use arrow_array::{Array, GenericListArray, OffsetSizeTrait}; +use arrow_array::{ArrayRef, GenericListArray, OffsetSizeTrait}; use arrow_buffer::NullBufferBuilder; use geo_traits::{CoordTrait, GeometryTrait, GeometryType, LineStringTrait, MultiLineStringTrait}; use std::convert::From; @@ -38,6 +38,7 @@ impl LineStringBuilder { Self::new_with_options(dim, Default::default(), Default::default()) } + /// Creates a new empty [`LineStringBuilder`] with the provided options. pub fn new_with_options( dim: Dimension, coord_type: CoordType, @@ -51,6 +52,7 @@ impl LineStringBuilder { Self::with_capacity_and_options(dim, capacity, Default::default(), Default::default()) } + /// Creates a new empty [`LineStringBuilder`] with the provided capacity and options. pub fn with_capacity_and_options( dim: Dimension, capacity: LineStringCapacity, @@ -73,28 +75,27 @@ impl LineStringBuilder { } } - /// Reserves capacity for at least `additional` more LineStrings to be inserted - /// in the given `Vec`. The collection may reserve more space to - /// speculatively avoid frequent reallocations. After calling `reserve`, - /// capacity will be greater than or equal to `self.len() + additional`. + /// Reserves capacity for at least `additional` more LineStrings. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. /// Does nothing if capacity is already sufficient. pub fn reserve(&mut self, additional: LineStringCapacity) { self.coords.reserve(additional.coord_capacity()); self.geom_offsets.reserve(additional.geom_capacity()); } - /// Reserves the minimum capacity for at least `additional` more LineStrings to - /// be inserted in the given `Vec`. Unlike [`reserve`], this will not - /// deliberately over-allocate to speculatively avoid frequent allocations. - /// After calling `reserve_exact`, capacity will be greater than or equal to - /// `self.len() + additional`. Does nothing if the capacity is already - /// sufficient. + /// Reserves the minimum capacity for at least `additional` more LineStrings. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. /// /// Note that the allocator may give the collection more space than it /// requests. Therefore, capacity can not be relied upon to be precisely /// minimal. Prefer [`reserve`] if future insertions are expected. /// - /// [`reserve`]: Vec::reserve + /// [`reserve`]: Self::reserve pub fn reserve_exact(&mut self, additional: LineStringCapacity) { self.coords.reserve_exact(additional.coord_capacity()); self.geom_offsets.reserve_exact(additional.geom_capacity()); @@ -151,14 +152,12 @@ impl LineStringBuilder { self.validity.append(false); } - pub fn into_array_ref(self) -> Arc { - Arc::new(self.into_arrow()) - } - + /// Consume the builder and convert to an immutable [`LineStringArray`] pub fn finish(self) -> LineStringArray { self.into() } + /// Creates a new builder with a capacity inferred by the provided iterator. pub fn with_capacity_from_iter<'a>( geoms: impl Iterator>, dim: Dimension, @@ -171,6 +170,8 @@ impl LineStringBuilder { ) } + /// Creates a new builder with the provided options and a capacity inferred by the provided + /// iterator. pub fn with_capacity_and_options_from_iter<'a>( geoms: impl Iterator>, dim: Dimension, @@ -181,6 +182,8 @@ impl LineStringBuilder { Self::with_capacity_and_options(dim, counter, coord_type, metadata) } + /// Reserve more space in the underlying buffers with the capacity inferred from the provided + /// geometries. pub fn reserve_from_iter<'a>( &mut self, geoms: impl Iterator>, @@ -189,6 +192,8 @@ impl LineStringBuilder { self.reserve(counter) } + /// Reserve more space in the underlying buffers with the capacity inferred from the provided + /// geometries. pub fn reserve_exact_from_iter<'a>( &mut self, geoms: impl Iterator>, @@ -197,6 +202,7 @@ impl LineStringBuilder { self.reserve_exact(counter) } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_line_strings( geoms: &[impl LineStringTrait], dim: Dimension, @@ -213,6 +219,7 @@ impl LineStringBuilder { array } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_nullable_line_strings( geoms: &[Option>], dim: Dimension, @@ -251,6 +258,7 @@ impl LineStringBuilder { Ok(()) } + /// Extend this builder with the given geometries pub fn extend_from_iter<'a>( &mut self, geoms: impl Iterator + 'a)>>, @@ -261,6 +269,7 @@ impl LineStringBuilder { .unwrap(); } + /// Extend this builder with the given geometries pub fn extend_from_geometry_iter<'a>( &mut self, geoms: impl Iterator + 'a)>>, @@ -280,6 +289,9 @@ impl LineStringBuilder { self.coords.try_push_coord(coord) } + /// Add a new geometry to this builder + /// + /// This will error if the geometry type is not LineString or a MultiLineString with length 1. #[inline] pub fn push_geometry(&mut self, value: Option<&impl GeometryTrait>) -> Result<()> { if let Some(value) = value { @@ -300,6 +312,7 @@ impl LineStringBuilder { Ok(()) } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_nullable_geometries( geoms: &[Option>], dim: Dimension, @@ -357,7 +370,7 @@ impl GeometryArrayBuilder for LineStringBuilder { &self.validity } - fn into_array_ref(self) -> Arc { + fn into_array_ref(self) -> ArrayRef { Arc::new(self.into_arrow()) } diff --git a/rust/geoarrow/src/array/linestring/capacity.rs b/rust/geoarrow/src/array/linestring/capacity.rs index 0b3002ee..57b5adb0 100644 --- a/rust/geoarrow/src/array/linestring/capacity.rs +++ b/rust/geoarrow/src/array/linestring/capacity.rs @@ -45,6 +45,9 @@ impl LineStringCapacity { self.coord_capacity += line_string.num_coords(); } + /// Add the capacity of the given Geometry + /// + /// The type of the geometry must be LineString #[inline] pub fn add_geometry(&mut self, value: Option<&impl GeometryTrait>) -> Result<()> { self.geom_capacity += 1; @@ -58,10 +61,12 @@ impl LineStringCapacity { Ok(()) } + /// The coordinate buffer capacity pub fn coord_capacity(&self) -> usize { self.coord_capacity } + /// The geometry offset buffer capacity pub fn geom_capacity(&self) -> usize { self.geom_capacity } @@ -79,6 +84,7 @@ impl LineStringCapacity { counter } + /// Construct a new counter pre-filled with the given geometries pub fn from_geometries<'a>( geoms: impl Iterator>, ) -> Result { diff --git a/rust/geoarrow/src/array/metadata.rs b/rust/geoarrow/src/array/metadata.rs index 8cab7577..0f7bf6b9 100644 --- a/rust/geoarrow/src/array/metadata.rs +++ b/rust/geoarrow/src/array/metadata.rs @@ -92,46 +92,62 @@ impl ArrayMetadata { self.crs.is_some() || self.edges.is_some() } + /// Construct from a PROJJSON object. + /// + /// Note that `value` should be a _parsed_ JSON object; this should not contain + /// `Value::String`. pub fn from_projjson(value: Value) -> Self { Self::default().with_projjson(value) } + /// Construct from a WKT:2019 string. pub fn from_wkt2_2019(value: String) -> Self { Self::default().with_wkt2_2019(value) } + /// Construct from an opaque string. pub fn from_unknown_crs_type(value: String) -> Self { Self::default().with_unknown_crs_type(value) } + /// Construct from an authority:code string. pub fn from_authority_code(value: String) -> Self { Self::default().with_authority_code(value) } + /// Set the CRS using a PROJJSON object. + /// + /// Note that `value` should be a _parsed_ JSON object; this should not contain + /// `Value::String`. pub fn with_projjson(mut self, value: Value) -> Self { self.crs = Some(value); self.crs_type = Some(CRSType::Projjson); self } + /// Set the CRS using a WKT:2019 string. pub fn with_wkt2_2019(mut self, value: String) -> Self { self.crs = Some(Value::String(value)); self.crs_type = Some(CRSType::Wkt2_2019); self } + /// Set the CRS using an opaque string. pub fn with_unknown_crs_type(mut self, value: String) -> Self { self.crs = Some(Value::String(value)); self.crs_type = None; self } + /// Set the CRS using an authority:code string. pub fn with_authority_code(mut self, value: String) -> Self { + assert!(value.contains(':'), "':' should be authority:code CRS"); self.crs = Some(Value::String(value)); self.crs_type = Some(CRSType::AuthorityCode); self } + /// Set the edge type. pub fn with_edges(mut self, edges: Edges) -> Self { self.edges = Some(edges); self diff --git a/rust/geoarrow/src/array/mixed/array.rs b/rust/geoarrow/src/array/mixed/array.rs index c893a37c..9a769298 100644 --- a/rust/geoarrow/src/array/mixed/array.rs +++ b/rust/geoarrow/src/array/mixed/array.rs @@ -1,7 +1,7 @@ use std::collections::{HashMap, HashSet}; use std::sync::Arc; -use arrow_array::{Array, OffsetSizeTrait, UnionArray}; +use arrow_array::{Array, ArrayRef, OffsetSizeTrait, UnionArray}; use arrow_buffer::{NullBuffer, ScalarBuffer}; use arrow_schema::{DataType, Field, UnionMode}; @@ -497,11 +497,11 @@ impl ArrayBase for MixedGeometryArray { "geoarrow.geometry" } - fn into_array_ref(self) -> Arc { + fn into_array_ref(self) -> ArrayRef { Arc::new(self.into_arrow()) } - fn to_array_ref(&self) -> arrow_array::ArrayRef { + fn to_array_ref(&self) -> ArrayRef { self.clone().into_array_ref() } diff --git a/rust/geoarrow/src/array/mixed/builder.rs b/rust/geoarrow/src/array/mixed/builder.rs index 2ce259c5..9e1cbe5d 100644 --- a/rust/geoarrow/src/array/mixed/builder.rs +++ b/rust/geoarrow/src/array/mixed/builder.rs @@ -447,6 +447,7 @@ impl<'a> MixedGeometryBuilder { todo!("push null geometry") } + /// Extend this builder with the given geometries pub fn extend_from_iter( &mut self, geoms: impl Iterator + 'a)>>, diff --git a/rust/geoarrow/src/array/mod.rs b/rust/geoarrow/src/array/mod.rs index b445f3b8..ce69c4a3 100644 --- a/rust/geoarrow/src/array/mod.rs +++ b/rust/geoarrow/src/array/mod.rs @@ -1,6 +1,59 @@ //! Implementations of immutable GeoArrow arrays plus builders to more easily create arrays. +//! +//! There are three primary types of structs in this module: arrays, builders, and capacity +//! counters. +//! +//! ## Arrays +//! +//! Arrays +//! +//! These arrays implement the binary layout defined in the [GeoArrow specification](https://github.com/geoarrow/geoarrow). +//! +//! +//! +//! These include: +//! +//! - [`PointArray`] +//! - [`LineStringArray`] +//! - [`PolygonArray`] +//! - [`MultiPointArray`] +//! - [`MultiLineStringArray`] +//! - [`MultiPolygonArray`] +//! - [`GeometryArray`] +//! - [`GeometryCollectionArray`] +//! - [`RectArray`] +//! +//! ## Builders +//! +//! Builders are designed to make it easier +//! +//! There's a builder for each of the above array types: +//! +//! +//! - [`PointBuilder`] +//! - [`LineStringBuilder`] +//! - [`PolygonBuilder`] +//! - [`MultiPointBuilder`] +//! - [`MultiLineStringBuilder`] +//! - [`MultiPolygonBuilder`] +//! - [`GeometryBuilder`] +//! - [`GeometryCollectionBuilder`] +//! - [`RectBuilder`] +//! +//! Once you've finished adding geometries to a builder, it's `O(1)` to convert a builder to an +//! array, by calling `finish()`. +//! +//! ## Capacity Counters +//! +//! Underlying the builders are growable `Vec`s. E.g. you can think of a `PointBuilder` as a buffer of `x` coordinates and a buffer of `y` coordinates. +//! +//! The fastest and most memory-efficient way to construct an array from a set of known geometries +//! is to make a first pass over these geometries to count exactly how big each part of the Arrow +//! array must be, allocate _once_ for exactly what you need, and then fill those buffers in a +//! second pass. +//! -#![allow(missing_docs)] // FIXME +// #![allow(missing_docs)] // FIXME pub use binary::{WKBArray, WKBBuilder, WKBCapacity}; pub use cast::{AsChunkedNativeArray, AsNativeArray, AsSerializedArray}; diff --git a/rust/geoarrow/src/array/multilinestring/array.rs b/rust/geoarrow/src/array/multilinestring/array.rs index 517df2e3..a48b094f 100644 --- a/rust/geoarrow/src/array/multilinestring/array.rs +++ b/rust/geoarrow/src/array/multilinestring/array.rs @@ -14,7 +14,7 @@ use crate::scalar::{Geometry, MultiLineString}; use crate::trait_::{ArrayAccessor, GeometryArraySelfMethods, IntoArrow, NativeGeometryAccessor}; use crate::{ArrayBase, NativeArray}; use arrow::array::AsArray; -use arrow_array::{Array, GenericListArray, OffsetSizeTrait}; +use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait}; use arrow_buffer::{NullBuffer, OffsetBuffer}; use arrow_schema::{DataType, Field}; use geo_traits::MultiLineStringTrait; @@ -136,14 +136,17 @@ impl MultiLineStringArray { Field::new_list("linestrings", self.vertices_field(), false).into() } + /// Access the underlying coordinate buffer pub fn coords(&self) -> &CoordBuffer { &self.coords } + /// Access the underlying geometry offsets buffer pub fn geom_offsets(&self) -> &OffsetBuffer { &self.geom_offsets } + /// Access the underlying ring offsets buffer pub fn ring_offsets(&self) -> &OffsetBuffer { &self.ring_offsets } @@ -184,10 +187,12 @@ impl MultiLineStringArray { } } + /// Change the coordinate type of this array. pub fn to_coord_type(&self, coord_type: CoordType) -> Self { self.clone().into_coord_type(coord_type) } + /// Change the coordinate type of this array. pub fn into_coord_type(self, coord_type: CoordType) -> Self { Self::new( self.coords.into_coord_type(coord_type), @@ -218,11 +223,11 @@ impl ArrayBase for MultiLineStringArray { self.data_type.extension_name() } - fn into_array_ref(self) -> Arc { + fn into_array_ref(self) -> ArrayRef { Arc::new(self.into_arrow()) } - fn to_array_ref(&self) -> arrow_array::ArrayRef { + fn to_array_ref(&self) -> ArrayRef { self.clone().into_array_ref() } diff --git a/rust/geoarrow/src/array/multilinestring/builder.rs b/rust/geoarrow/src/array/multilinestring/builder.rs index 1c6c4d37..7c08b318 100644 --- a/rust/geoarrow/src/array/multilinestring/builder.rs +++ b/rust/geoarrow/src/array/multilinestring/builder.rs @@ -12,7 +12,7 @@ use crate::datatypes::Dimension; use crate::error::{GeoArrowError, Result}; use crate::scalar::WKB; use crate::trait_::{ArrayAccessor, GeometryArrayBuilder, IntoArrow}; -use arrow_array::{Array, GenericListArray, OffsetSizeTrait}; +use arrow_array::{ArrayRef, GenericListArray, OffsetSizeTrait}; use arrow_buffer::{NullBufferBuilder, OffsetBuffer}; use geo_traits::{CoordTrait, GeometryTrait, GeometryType, LineStringTrait, MultiLineStringTrait}; @@ -49,6 +49,7 @@ impl MultiLineStringBuilder { Self::new_with_options(dim, Default::default(), Default::default()) } + /// Creates a new empty [`MultiLineStringBuilder`] with the provided options. pub fn new_with_options( dim: Dimension, coord_type: CoordType, @@ -62,6 +63,7 @@ impl MultiLineStringBuilder { Self::with_capacity_and_options(dim, capacity, Default::default(), Default::default()) } + /// Creates a new empty [`MultiLineStringBuilder`] with the provided capacity and options. pub fn with_capacity_and_options( dim: Dimension, capacity: MultiLineStringCapacity, @@ -85,10 +87,10 @@ impl MultiLineStringBuilder { } } - /// Reserves capacity for at least `additional` more LineStrings to be inserted - /// in the given `Vec`. The collection may reserve more space to - /// speculatively avoid frequent reallocations. After calling `reserve`, - /// capacity will be greater than or equal to `self.len() + additional`. + /// Reserves capacity for at least `additional` more MultiLineStrings. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. /// Does nothing if capacity is already sufficient. pub fn reserve(&mut self, additional: MultiLineStringCapacity) { self.coords.reserve(additional.coord_capacity); @@ -96,18 +98,17 @@ impl MultiLineStringBuilder { self.geom_offsets.reserve(additional.geom_capacity); } - /// Reserves the minimum capacity for at least `additional` more LineStrings to - /// be inserted in the given `Vec`. Unlike [`reserve`], this will not - /// deliberately over-allocate to speculatively avoid frequent allocations. - /// After calling `reserve_exact`, capacity will be greater than or equal to - /// `self.len() + additional`. Does nothing if the capacity is already - /// sufficient. + /// Reserves the minimum capacity for at least `additional` more MultiLineStrings. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. /// /// Note that the allocator may give the collection more space than it /// requests. Therefore, capacity can not be relied upon to be precisely /// minimal. Prefer [`reserve`] if future insertions are expected. /// - /// [`reserve`]: Vec::reserve + /// [`reserve`]: Self::reserve pub fn reserve_exact(&mut self, additional: MultiLineStringCapacity) { self.coords.reserve_exact(additional.coord_capacity); self.ring_offsets.reserve_exact(additional.ring_capacity); @@ -158,10 +159,6 @@ impl MultiLineStringBuilder { ) } - pub fn into_array_ref(self) -> Arc { - Arc::new(self.into_arrow()) - } - /// Push a raw offset to the underlying geometry offsets buffer. /// /// # Safety @@ -187,10 +184,12 @@ impl MultiLineStringBuilder { Ok(()) } + /// Consume the builder and convert to an immutable [`MultiLineStringArray`] pub fn finish(self) -> MultiLineStringArray { self.into() } + /// Creates a new builder with a capacity inferred by the provided iterator. pub fn with_capacity_from_iter<'a>( geoms: impl Iterator>, dim: Dimension, @@ -203,6 +202,8 @@ impl MultiLineStringBuilder { ) } + /// Creates a new builder with the provided options and a capacity inferred by the provided + /// iterator. pub fn with_capacity_and_options_from_iter<'a>( geoms: impl Iterator>, dim: Dimension, @@ -213,6 +214,8 @@ impl MultiLineStringBuilder { Self::with_capacity_and_options(dim, counter, coord_type, metadata) } + /// Reserve more space in the underlying buffers with the capacity inferred from the provided + /// geometries. pub fn reserve_from_iter<'a>( &mut self, geoms: impl Iterator>, @@ -221,6 +224,8 @@ impl MultiLineStringBuilder { self.reserve(counter) } + /// Reserve more space in the underlying buffers with the capacity inferred from the provided + /// geometries. pub fn reserve_exact_from_iter<'a>( &mut self, geoms: impl Iterator>, @@ -302,6 +307,9 @@ impl MultiLineStringBuilder { Ok(()) } + /// Add a new geometry to this builder + /// + /// This will error if the geometry type is not LineString or MultiLineString. #[inline] pub fn push_geometry(&mut self, value: Option<&impl GeometryTrait>) -> Result<()> { if let Some(value) = value { @@ -316,6 +324,7 @@ impl MultiLineStringBuilder { Ok(()) } + /// Extend this builder with the given geometries pub fn extend_from_iter<'a>( &mut self, geoms: impl Iterator + 'a)>>, @@ -326,6 +335,7 @@ impl MultiLineStringBuilder { .unwrap(); } + /// Extend this builder with the given geometries pub fn extend_from_geometry_iter<'a>( &mut self, geoms: impl Iterator + 'a)>>, @@ -354,6 +364,7 @@ impl MultiLineStringBuilder { self.validity.append(false); } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_multi_line_strings( geoms: &[impl MultiLineStringTrait], dim: Dimension, @@ -370,6 +381,7 @@ impl MultiLineStringBuilder { array } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_nullable_multi_line_strings( geoms: &[Option>], dim: Dimension, @@ -386,6 +398,7 @@ impl MultiLineStringBuilder { array } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_nullable_geometries( geoms: &[Option>], dim: Dimension, @@ -443,7 +456,7 @@ impl GeometryArrayBuilder for MultiLineStringBuilder { &self.validity } - fn into_array_ref(self) -> Arc { + fn into_array_ref(self) -> ArrayRef { Arc::new(self.into_arrow()) } diff --git a/rust/geoarrow/src/array/multilinestring/capacity.rs b/rust/geoarrow/src/array/multilinestring/capacity.rs index d97a2254..67810aac 100644 --- a/rust/geoarrow/src/array/multilinestring/capacity.rs +++ b/rust/geoarrow/src/array/multilinestring/capacity.rs @@ -35,18 +35,22 @@ impl MultiLineStringCapacity { self.coord_capacity == 0 && self.ring_capacity == 0 && self.geom_capacity == 0 } + /// The coordinate buffer capacity pub fn coord_capacity(&self) -> usize { self.coord_capacity } + /// The ring offset buffer capacity pub fn ring_capacity(&self) -> usize { self.ring_capacity } + /// The geometry offset buffer capacity pub fn geom_capacity(&self) -> usize { self.geom_capacity } + /// Add the capacity of the given LineString #[inline] pub fn add_line_string(&mut self, maybe_line_string: Option<&impl LineStringTrait>) { self.geom_capacity += 1; @@ -57,6 +61,7 @@ impl MultiLineStringCapacity { } } + /// Add the capacity of the given MultiLineString #[inline] pub fn add_multi_line_string(&mut self, multi_line_string: Option<&impl MultiLineStringTrait>) { self.geom_capacity += 1; @@ -71,6 +76,10 @@ impl MultiLineStringCapacity { } } + /// Add the capacity of the given Geometry + /// + /// The type of the geometry must be either LineString or MultiLineString + #[inline] pub fn add_geometry(&mut self, value: Option<&impl GeometryTrait>) -> Result<()> { if let Some(geom) = value { match geom.as_type() { @@ -84,12 +93,7 @@ impl MultiLineStringCapacity { Ok(()) } - pub fn add_line_string_capacity(&mut self, line_string_capacity: LineStringCapacity) { - self.coord_capacity += line_string_capacity.coord_capacity(); - self.ring_capacity += line_string_capacity.geom_capacity(); - self.geom_capacity += line_string_capacity.geom_capacity(); - } - + /// Construct a new counter pre-filled with the given MultiLineStrings pub fn from_multi_line_strings<'a>( geoms: impl Iterator>, ) -> Self { @@ -100,6 +104,7 @@ impl MultiLineStringCapacity { counter } + /// Construct a new counter pre-filled with the given geometries pub fn from_geometries<'a>( geoms: impl Iterator>, ) -> Result { diff --git a/rust/geoarrow/src/array/multipoint/array.rs b/rust/geoarrow/src/array/multipoint/array.rs index 06fe7862..7358d97e 100644 --- a/rust/geoarrow/src/array/multipoint/array.rs +++ b/rust/geoarrow/src/array/multipoint/array.rs @@ -15,7 +15,7 @@ use crate::scalar::{Geometry, MultiPoint}; use crate::trait_::{ArrayAccessor, GeometryArraySelfMethods, IntoArrow, NativeGeometryAccessor}; use crate::{ArrayBase, NativeArray}; use arrow::array::AsArray; -use arrow_array::{Array, GenericListArray, OffsetSizeTrait}; +use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait}; use arrow_buffer::{NullBuffer, OffsetBuffer}; use arrow_schema::{DataType, Field}; use geo_traits::MultiPointTrait; @@ -111,14 +111,17 @@ impl MultiPointArray { Field::new("points", self.coords.storage_type(), false).into() } + /// Access the underlying coord buffer pub fn coords(&self) -> &CoordBuffer { &self.coords } - pub fn into_inner(self) -> (CoordBuffer, OffsetBuffer, Option) { + #[allow(dead_code)] + pub(crate) fn into_inner(self) -> (CoordBuffer, OffsetBuffer, Option) { (self.coords, self.geom_offsets, self.validity) } + /// Access the underlying geometry offsets buffer pub fn geom_offsets(&self) -> &OffsetBuffer { &self.geom_offsets } @@ -167,10 +170,12 @@ impl MultiPointArray { } } + /// Change the coordinate type of this array. pub fn to_coord_type(&self, coord_type: CoordType) -> Self { self.clone().into_coord_type(coord_type) } + /// Change the coordinate type of this array. pub fn into_coord_type(self, coord_type: CoordType) -> Self { Self::new( self.coords.into_coord_type(coord_type), @@ -200,11 +205,11 @@ impl ArrayBase for MultiPointArray { self.data_type.extension_name() } - fn into_array_ref(self) -> Arc { + fn into_array_ref(self) -> ArrayRef { Arc::new(self.into_arrow()) } - fn to_array_ref(&self) -> arrow_array::ArrayRef { + fn to_array_ref(&self) -> ArrayRef { self.clone().into_array_ref() } diff --git a/rust/geoarrow/src/array/multipoint/builder.rs b/rust/geoarrow/src/array/multipoint/builder.rs index 9c8a9dc5..38d8d531 100644 --- a/rust/geoarrow/src/array/multipoint/builder.rs +++ b/rust/geoarrow/src/array/multipoint/builder.rs @@ -12,7 +12,7 @@ use crate::datatypes::Dimension; use crate::error::{GeoArrowError, Result}; use crate::scalar::WKB; use crate::trait_::{ArrayAccessor, GeometryArrayBuilder, IntoArrow}; -use arrow_array::{Array, GenericListArray, OffsetSizeTrait}; +use arrow_array::{ArrayRef, GenericListArray, OffsetSizeTrait}; use arrow_buffer::NullBufferBuilder; use geo_traits::{CoordTrait, GeometryTrait, GeometryType, MultiPointTrait, PointTrait}; @@ -37,7 +37,7 @@ impl MultiPointBuilder { Self::new_with_options(dim, Default::default(), Default::default()) } - /// Creates a new [`MultiPointBuilder`] with a specified [`CoordType`] + /// Creates a new [`MultiPointBuilder`] with options pub fn new_with_options( dim: Dimension, coord_type: CoordType, @@ -45,12 +45,13 @@ impl MultiPointBuilder { ) -> Self { Self::with_capacity_and_options(dim, Default::default(), coord_type, metadata) } + /// Creates a new [`MultiPointBuilder`] with a capacity. pub fn with_capacity(dim: Dimension, capacity: MultiPointCapacity) -> Self { Self::with_capacity_and_options(dim, capacity, Default::default(), Default::default()) } - // with capacity and options enables us to write with_capacity based on this method + /// Creates a new [`MultiPointBuilder`] with capacity and options pub fn with_capacity_and_options( dim: Dimension, capacity: MultiPointCapacity, @@ -73,28 +74,27 @@ impl MultiPointBuilder { } } - /// Reserves capacity for at least `additional` more MultiPoints to be inserted - /// in the given `Vec`. The collection may reserve more space to - /// speculatively avoid frequent reallocations. After calling `reserve`, - /// capacity will be greater than or equal to `self.len() + additional`. + /// Reserves capacity for at least `additional` more MultiPoints. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. /// Does nothing if capacity is already sufficient. pub fn reserve(&mut self, capacity: MultiPointCapacity) { self.coords.reserve(capacity.coord_capacity); self.geom_offsets.reserve(capacity.geom_capacity); } - /// Reserves the minimum capacity for at least `additional` more MultiPoints to - /// be inserted in the given `Vec`. Unlike [`reserve`], this will not - /// deliberately over-allocate to speculatively avoid frequent allocations. - /// After calling `reserve_exact`, capacity will be greater than or equal to - /// `self.len() + additional`. Does nothing if the capacity is already - /// sufficient. + /// Reserves the minimum capacity for at least `additional` more MultiPoints. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. /// /// Note that the allocator may give the collection more space than it /// requests. Therefore, capacity can not be relied upon to be precisely /// minimal. Prefer [`reserve`] if future insertions are expected. /// - /// [`reserve`]: Vec::reserve + /// [`reserve`]: Self::reserve pub fn reserve_exact(&mut self, capacity: MultiPointCapacity) { self.coords.reserve_exact(capacity.coord_capacity); self.geom_offsets.reserve_exact(capacity.geom_capacity); @@ -136,14 +136,12 @@ impl MultiPointBuilder { (self.coords, self.geom_offsets, self.validity) } - pub fn into_array_ref(self) -> Arc { - Arc::new(self.into_arrow()) - } - + /// Consume the builder and convert to an immutable [`MultiPointArray`] pub fn finish(self) -> MultiPointArray { self.into() } + /// Creates a new builder with a capacity inferred by the provided iterator. pub fn with_capacity_from_iter<'a>( geoms: impl Iterator>, dim: Dimension, @@ -156,6 +154,8 @@ impl MultiPointBuilder { ) } + /// Creates a new builder with the provided options and a capacity inferred by the provided + /// iterator. pub fn with_capacity_and_options_from_iter<'a>( geoms: impl Iterator>, dim: Dimension, @@ -166,6 +166,8 @@ impl MultiPointBuilder { Self::with_capacity_and_options(dim, counter, coord_type, metadata) } + /// Reserve more space in the underlying buffers with the capacity inferred from the provided + /// geometries. pub fn reserve_from_iter<'a>( &mut self, geoms: impl Iterator>, @@ -174,6 +176,8 @@ impl MultiPointBuilder { self.reserve(counter) } + /// Reserve more space in the underlying buffers with the capacity inferred from the provided + /// geometries. pub fn reserve_exact_from_iter<'a>( &mut self, geoms: impl Iterator>, @@ -182,6 +186,7 @@ impl MultiPointBuilder { self.reserve_exact(counter) } + /// Extend this builder with the given geometries pub fn extend_from_iter<'a>( &mut self, geoms: impl Iterator + 'a)>>, @@ -192,6 +197,7 @@ impl MultiPointBuilder { .unwrap(); } + /// Extend this builder with the given geometries pub fn extend_from_geometry_iter<'a>( &mut self, geoms: impl Iterator + 'a)>>, @@ -239,6 +245,9 @@ impl MultiPointBuilder { Ok(()) } + /// Add a new geometry to this builder + /// + /// This will error if the geometry type is not Point or MultiPoint. #[inline] pub fn push_geometry(&mut self, value: Option<&impl GeometryTrait>) -> Result<()> { if let Some(value) = value { @@ -295,6 +304,7 @@ impl MultiPointBuilder { self.validity.append(false); } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_multi_points( geoms: &[impl MultiPointTrait], dim: Dimension, @@ -311,6 +321,7 @@ impl MultiPointBuilder { array } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_nullable_multi_points( geoms: &[Option>], dim: Dimension, @@ -327,6 +338,7 @@ impl MultiPointBuilder { array } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_nullable_geometries( geoms: &[Option>], dim: Dimension, @@ -389,8 +401,8 @@ impl GeometryArrayBuilder for MultiPointBuilder { &self.validity } - fn into_array_ref(self) -> Arc { - self.into_array_ref() + fn into_array_ref(self) -> ArrayRef { + Arc::new(self.into_arrow()) } fn coord_type(&self) -> CoordType { diff --git a/rust/geoarrow/src/array/multipoint/capacity.rs b/rust/geoarrow/src/array/multipoint/capacity.rs index 1453fca7..64db4649 100644 --- a/rust/geoarrow/src/array/multipoint/capacity.rs +++ b/rust/geoarrow/src/array/multipoint/capacity.rs @@ -1,4 +1,4 @@ -use std::ops::Add; +use std::ops::{Add, AddAssign}; use crate::error::{GeoArrowError, Result}; use geo_traits::{GeometryTrait, GeometryType, MultiPointTrait, PointTrait}; @@ -31,6 +31,7 @@ impl MultiPointCapacity { self.coord_capacity == 0 && self.geom_capacity == 0 } + /// Add the capacity of a point #[inline] pub fn add_point(&mut self, point: Option<&impl PointTrait>) { self.geom_capacity += 1; @@ -44,6 +45,7 @@ impl MultiPointCapacity { self.coord_capacity += 1; } + /// Add the capacity of the given MultiPoint #[inline] pub fn add_multi_point(&mut self, maybe_multi_point: Option<&impl MultiPointTrait>) { self.geom_capacity += 1; @@ -58,6 +60,9 @@ impl MultiPointCapacity { self.coord_capacity += multi_point.num_points(); } + /// Add the capacity of the given Geometry + /// + /// The type of the geometry must be either Point or MultiPoint #[inline] pub fn add_geometry(&mut self, value: Option<&impl GeometryTrait>) -> Result<()> { self.geom_capacity += 1; @@ -72,19 +77,22 @@ impl MultiPointCapacity { Ok(()) } - pub fn add_point_capacity(&mut self, point_capacity: usize) { + pub(crate) fn add_point_capacity(&mut self, point_capacity: usize) { self.coord_capacity += point_capacity; self.geom_capacity += point_capacity; } + /// The coordinate buffer capacity pub fn coord_capacity(&self) -> usize { self.coord_capacity } + /// The geometry offsets buffer capacity pub fn geom_capacity(&self) -> usize { self.geom_capacity } + /// Construct a new counter pre-filled with the given MultiPoints pub fn from_multi_points<'a>( geoms: impl Iterator>, ) -> Self { @@ -97,6 +105,7 @@ impl MultiPointCapacity { counter } + /// Construct a new counter pre-filled with the given geometries pub fn from_geometries<'a>( geoms: impl Iterator>, ) -> Result { @@ -130,3 +139,10 @@ impl Add for MultiPointCapacity { Self::new(coord_capacity, geom_capacity) } } + +impl AddAssign for MultiPointCapacity { + fn add_assign(&mut self, rhs: Self) { + self.coord_capacity += rhs.coord_capacity; + self.geom_capacity += rhs.geom_capacity; + } +} diff --git a/rust/geoarrow/src/array/multipolygon/array.rs b/rust/geoarrow/src/array/multipolygon/array.rs index 7d2d247b..ec4f6d70 100644 --- a/rust/geoarrow/src/array/multipolygon/array.rs +++ b/rust/geoarrow/src/array/multipolygon/array.rs @@ -13,7 +13,7 @@ use crate::scalar::{Geometry, MultiPolygon}; use crate::trait_::{ArrayAccessor, GeometryArraySelfMethods, IntoArrow, NativeGeometryAccessor}; use crate::{ArrayBase, NativeArray}; use arrow::array::AsArray; -use arrow_array::{Array, GenericListArray, OffsetSizeTrait}; +use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait}; use geo_traits::MultiPolygonTrait; use arrow_buffer::{NullBuffer, OffsetBuffer}; @@ -165,11 +165,13 @@ impl MultiPolygonArray { Field::new_list(name, self.rings_field(), false).into() } + /// Access the underlying coordinate buffer pub fn coords(&self) -> &CoordBuffer { &self.coords } - pub fn into_inner( + #[allow(dead_code)] + pub(crate) fn into_inner( self, ) -> ( CoordBuffer, @@ -185,14 +187,17 @@ impl MultiPolygonArray { ) } + /// Access the underlying geometry offsets buffer pub fn geom_offsets(&self) -> &OffsetBuffer { &self.geom_offsets } + /// Access the underlying polygon offsets buffer pub fn polygon_offsets(&self) -> &OffsetBuffer { &self.polygon_offsets } + /// Access the underlying ring offsets buffer pub fn ring_offsets(&self) -> &OffsetBuffer { &self.ring_offsets } @@ -235,10 +240,12 @@ impl MultiPolygonArray { } } + /// Change the coordinate type of this array. pub fn to_coord_type(&self, coord_type: CoordType) -> Self { self.clone().into_coord_type(coord_type) } + /// Change the coordinate type of this array. pub fn into_coord_type(self, coord_type: CoordType) -> Self { Self::new( self.coords.into_coord_type(coord_type), @@ -270,11 +277,11 @@ impl ArrayBase for MultiPolygonArray { self.data_type.extension_name() } - fn into_array_ref(self) -> Arc { + fn into_array_ref(self) -> ArrayRef { Arc::new(self.into_arrow()) } - fn to_array_ref(&self) -> arrow_array::ArrayRef { + fn to_array_ref(&self) -> ArrayRef { self.clone().into_array_ref() } diff --git a/rust/geoarrow/src/array/multipolygon/builder.rs b/rust/geoarrow/src/array/multipolygon/builder.rs index 4247f5da..4a004869 100644 --- a/rust/geoarrow/src/array/multipolygon/builder.rs +++ b/rust/geoarrow/src/array/multipolygon/builder.rs @@ -12,7 +12,7 @@ use crate::datatypes::Dimension; use crate::error::{GeoArrowError, Result}; use crate::scalar::WKB; use crate::trait_::{ArrayAccessor, GeometryArrayBuilder, IntoArrow}; -use arrow_array::{Array, GenericListArray, OffsetSizeTrait}; +use arrow_array::{ArrayRef, GenericListArray, OffsetSizeTrait}; use arrow_buffer::{NullBufferBuilder, OffsetBuffer}; use geo_traits::{ CoordTrait, GeometryTrait, GeometryType, LineStringTrait, MultiPolygonTrait, PolygonTrait, @@ -54,6 +54,7 @@ impl MultiPolygonBuilder { Self::new_with_options(dim, Default::default(), Default::default()) } + /// Creates a new empty [`MultiPolygonBuilder`] with the provided options. pub fn new_with_options( dim: Dimension, coord_type: CoordType, @@ -67,6 +68,7 @@ impl MultiPolygonBuilder { Self::with_capacity_and_options(dim, capacity, Default::default(), Default::default()) } + /// Creates a new empty [`MultiPolygonBuilder`] with the provided capacity and options. pub fn with_capacity_and_options( dim: Dimension, capacity: MultiPolygonCapacity, @@ -92,10 +94,10 @@ impl MultiPolygonBuilder { } } - /// Reserves capacity for at least `additional` more LineStrings to be inserted - /// in the given `Vec`. The collection may reserve more space to - /// speculatively avoid frequent reallocations. After calling `reserve`, - /// capacity will be greater than or equal to `self.len() + additional`. + /// Reserves capacity for at least `additional` more MultiPolygons. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. /// Does nothing if capacity is already sufficient. pub fn reserve(&mut self, additional: MultiPolygonCapacity) { self.coords.reserve(additional.coord_capacity); @@ -104,18 +106,17 @@ impl MultiPolygonBuilder { self.geom_offsets.reserve(additional.geom_capacity); } - /// Reserves the minimum capacity for at least `additional` more LineStrings to - /// be inserted in the given `Vec`. Unlike [`reserve`], this will not - /// deliberately over-allocate to speculatively avoid frequent allocations. - /// After calling `reserve_exact`, capacity will be greater than or equal to - /// `self.len() + additional`. Does nothing if the capacity is already - /// sufficient. + /// Reserves the minimum capacity for at least `additional` more MultiPolygons. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. /// /// Note that the allocator may give the collection more space than it /// requests. Therefore, capacity can not be relied upon to be precisely /// minimal. Prefer [`reserve`] if future insertions are expected. /// - /// [`reserve`]: Vec::reserve + /// [`reserve`]: Self::reserve pub fn reserve_exact(&mut self, additional: MultiPolygonCapacity) { self.coords.reserve_exact(additional.coord_capacity); self.ring_offsets.reserve_exact(additional.ring_capacity); @@ -173,14 +174,12 @@ impl MultiPolygonBuilder { ) } - pub fn into_array_ref(self) -> Arc { - Arc::new(self.into_arrow()) - } - + /// Consume the builder and convert to an immutable [`MultiPolygonArray`] pub fn finish(self) -> MultiPolygonArray { self.into() } + /// Creates a new builder with a capacity inferred by the provided iterator. pub fn with_capacity_from_iter<'a>( geoms: impl Iterator>, dim: Dimension, @@ -193,6 +192,8 @@ impl MultiPolygonBuilder { ) } + /// Creates a new builder with the provided options and a capacity inferred by the provided + /// iterator. pub fn with_capacity_and_options_from_iter<'a>( geoms: impl Iterator>, dim: Dimension, @@ -203,6 +204,8 @@ impl MultiPolygonBuilder { Self::with_capacity_and_options(dim, capacity, coord_type, metadata) } + /// Reserve more space in the underlying buffers with the capacity inferred from the provided + /// geometries. pub fn reserve_from_iter<'a>( &mut self, geoms: impl Iterator>, @@ -211,6 +214,8 @@ impl MultiPolygonBuilder { self.reserve(capacity) } + /// Reserve more space in the underlying buffers with the capacity inferred from the provided + /// geometries. pub fn reserve_exact_from_iter<'a>( &mut self, geoms: impl Iterator>, @@ -317,6 +322,9 @@ impl MultiPolygonBuilder { Ok(()) } + /// Add a new geometry to this builder + /// + /// This will error if the geometry type is not Polygon or MultiPolygon. #[inline] pub fn push_geometry(&mut self, value: Option<&impl GeometryTrait>) -> Result<()> { if let Some(value) = value { @@ -332,6 +340,7 @@ impl MultiPolygonBuilder { Ok(()) } + /// Extend this builder with the given geometries pub fn extend_from_iter<'a>( &mut self, geoms: impl Iterator + 'a)>>, @@ -342,6 +351,7 @@ impl MultiPolygonBuilder { .unwrap(); } + /// Extend this builder with the given geometries pub fn extend_from_geometry_iter<'a>( &mut self, geoms: impl Iterator + 'a)>>, @@ -414,6 +424,7 @@ impl MultiPolygonBuilder { self.validity.append(false); } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_multi_polygons( geoms: &[impl MultiPolygonTrait], dim: Dimension, @@ -430,6 +441,7 @@ impl MultiPolygonBuilder { array } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_nullable_multi_polygons( geoms: &[Option>], dim: Dimension, @@ -446,6 +458,7 @@ impl MultiPolygonBuilder { array } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_nullable_geometries( geoms: &[Option>], dim: Dimension, @@ -514,7 +527,7 @@ impl GeometryArrayBuilder for MultiPolygonBuilder { &self.validity } - fn into_array_ref(self) -> Arc { + fn into_array_ref(self) -> ArrayRef { Arc::new(self.into_arrow()) } diff --git a/rust/geoarrow/src/array/multipolygon/capacity.rs b/rust/geoarrow/src/array/multipolygon/capacity.rs index 653f0850..ba8ff54d 100644 --- a/rust/geoarrow/src/array/multipolygon/capacity.rs +++ b/rust/geoarrow/src/array/multipolygon/capacity.rs @@ -44,22 +44,27 @@ impl MultiPolygonCapacity { && self.geom_capacity == 0 } + /// The coordinate buffer capacity pub fn coord_capacity(&self) -> usize { self.coord_capacity } + /// The ring offset buffer capacity pub fn ring_capacity(&self) -> usize { self.ring_capacity } + /// The polygon offset buffer capacity pub fn polygon_capacity(&self) -> usize { self.polygon_capacity } + /// The geometry offset buffer capacity pub fn geom_capacity(&self) -> usize { self.geom_capacity } + /// Add the capacity of the given Polygon #[inline] pub fn add_polygon<'a>(&mut self, polygon: Option<&'a (impl PolygonTrait + 'a)>) { self.geom_capacity += 1; @@ -82,6 +87,7 @@ impl MultiPolygonCapacity { } } + /// Add the capacity of the given MultiPolygon #[inline] pub fn add_multi_polygon<'a>( &mut self, @@ -110,6 +116,10 @@ impl MultiPolygonCapacity { } } + /// Add the capacity of the given Geometry + /// + /// The type of the geometry must be either Polygon or MultiPolygon + #[inline] pub fn add_geometry(&mut self, value: Option<&impl GeometryTrait>) -> Result<()> { if let Some(geom) = value { match geom.as_type() { @@ -123,16 +133,7 @@ impl MultiPolygonCapacity { Ok(()) } - pub fn add_polygon_capacity(&mut self, capacity: PolygonCapacity) { - // NOTE: I think this will overallocate if there are null values? - // Because it assumes that every geometry has exactly one polygon, which won't be true if - // there are null values? - self.coord_capacity += capacity.coord_capacity(); - self.ring_capacity += capacity.ring_capacity(); - self.polygon_capacity += capacity.geom_capacity(); - self.geom_capacity += capacity.geom_capacity(); - } - + /// Construct a new counter pre-filled with the given MultiPolygons pub fn from_multi_polygons<'a>( geoms: impl Iterator>, ) -> Self { @@ -143,6 +144,7 @@ impl MultiPolygonCapacity { counter } + /// Construct a new counter pre-filled with the given geometries pub fn from_geometries<'a>( geoms: impl Iterator>, ) -> Result { diff --git a/rust/geoarrow/src/array/point/array.rs b/rust/geoarrow/src/array/point/array.rs index 4fa11225..f21f0810 100644 --- a/rust/geoarrow/src/array/point/array.rs +++ b/rust/geoarrow/src/array/point/array.rs @@ -30,6 +30,9 @@ pub struct PointArray { pub(crate) validity: Option, } +/// Perform checks: +/// +/// - Validity mask must have the same length as the coordinates. pub(super) fn check(coords: &CoordBuffer, validity_len: Option) -> Result<()> { if validity_len.map_or(false, |len| len != coords.len()) { return Err(GeoArrowError::General( @@ -82,10 +85,14 @@ impl PointArray { }) } + /// Access the underlying coordinate buffer + /// + /// Note that some coordinates may be null, depending on the value of [`Self::nulls`] pub fn coords(&self) -> &CoordBuffer { &self.coords } + /// Access the pub fn into_inner(self) -> (CoordBuffer, Option) { (self.coords, self.validity) } @@ -118,10 +125,12 @@ impl PointArray { } } + /// Change the coordinate type of this array. pub fn to_coord_type(&self, coord_type: CoordType) -> Self { self.clone().into_coord_type(coord_type) } + /// Change the coordinate type of this array. pub fn into_coord_type(self, coord_type: CoordType) -> Self { Self::new( self.coords.into_coord_type(coord_type), @@ -154,7 +163,7 @@ impl ArrayBase for PointArray { self.into_arrow() } - fn to_array_ref(&self) -> arrow_array::ArrayRef { + fn to_array_ref(&self) -> ArrayRef { self.clone().into_array_ref() } @@ -241,7 +250,7 @@ impl<'a> ArrayAccessor<'a> for PointArray { } impl IntoArrow for PointArray { - type ArrowArray = Arc; + type ArrowArray = ArrayRef; fn into_arrow(self) -> Self::ArrowArray { let validity = self.validity; diff --git a/rust/geoarrow/src/array/point/builder.rs b/rust/geoarrow/src/array/point/builder.rs index c9d61f8f..8f8dd37f 100644 --- a/rust/geoarrow/src/array/point/builder.rs +++ b/rust/geoarrow/src/array/point/builder.rs @@ -11,7 +11,7 @@ use crate::datatypes::Dimension; use crate::error::{GeoArrowError, Result}; use crate::scalar::WKB; use crate::trait_::{ArrayAccessor, GeometryArrayBuilder, IntoArrow}; -use arrow_array::{Array, OffsetSizeTrait}; +use arrow_array::{ArrayRef, OffsetSizeTrait}; use arrow_buffer::NullBufferBuilder; use geo_traits::{CoordTrait, GeometryTrait, GeometryType, MultiPointTrait, PointTrait}; @@ -21,8 +21,8 @@ use geo_traits::{CoordTrait, GeometryTrait, GeometryType, MultiPointTrait, Point #[derive(Debug)] pub struct PointBuilder { metadata: Arc, - pub coords: CoordBufferBuilder, - pub validity: NullBufferBuilder, + pub(crate) coords: CoordBufferBuilder, + pub(crate) validity: NullBufferBuilder, } impl PointBuilder { @@ -31,6 +31,7 @@ impl PointBuilder { Self::new_with_options(dim, Default::default(), Default::default()) } + /// Creates a new empty [`PointBuilder`] with the provided options. pub fn new_with_options( dim: Dimension, coord_type: CoordType, @@ -44,7 +45,7 @@ impl PointBuilder { Self::with_capacity_and_options(dim, capacity, Default::default(), Default::default()) } - /// Creates a new [`PointBuilder`] with a capacity. + /// Creates a new empty [`PointBuilder`] with the provided capacity and options. pub fn with_capacity_and_options( dim: Dimension, capacity: usize, @@ -75,18 +76,17 @@ impl PointBuilder { self.coords.reserve(additional); } - /// Reserves the minimum capacity for at least `additional` more points to - /// be inserted in the given `Vec`. Unlike [`reserve`], this will not - /// deliberately over-allocate to speculatively avoid frequent allocations. - /// After calling `reserve_exact`, capacity will be greater than or equal to - /// `self.len() + additional`. Does nothing if the capacity is already - /// sufficient. + /// Reserves the minimum capacity for at least `additional` more points. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. /// /// Note that the allocator may give the collection more space than it /// requests. Therefore, capacity can not be relied upon to be precisely /// minimal. Prefer [`reserve`] if future insertions are expected. /// - /// [`reserve`]: Vec::reserve + /// [`reserve`]: Self::reserve pub fn reserve_exact(&mut self, additional: usize) { self.coords.reserve_exact(additional); } @@ -120,6 +120,7 @@ impl PointBuilder { (self.coords, self.validity) } + /// Consume the builder and convert to an immutable [`PointArray`] pub fn finish(self) -> PointArray { self.into() } @@ -190,6 +191,9 @@ impl PointBuilder { self.validity.append_null(); } + /// Add a new geometry to this builder + /// + /// This will error if the geometry type is not Point or a MultiPoint with length 1. #[inline] pub fn push_geometry(&mut self, value: Option<&impl GeometryTrait>) -> Result<()> { if let Some(value) = value { @@ -210,6 +214,7 @@ impl PointBuilder { Ok(()) } + /// Extend this builder with the given geometries pub fn extend_from_iter<'a>( &mut self, geoms: impl Iterator + 'a)>>, @@ -219,6 +224,7 @@ impl PointBuilder { .for_each(|maybe_polygon| self.push_point(maybe_polygon)); } + /// Extend this builder with the given geometries pub fn extend_from_geometry_iter<'a>( &mut self, geoms: impl Iterator + 'a)>>, @@ -227,6 +233,7 @@ impl PointBuilder { Ok(()) } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_points<'a>( geoms: impl ExactSizeIterator + 'a)>, dim: Dimension, @@ -241,6 +248,7 @@ impl PointBuilder { mutable_array } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_nullable_points<'a>( geoms: impl ExactSizeIterator + 'a)>>, dim: Dimension, @@ -255,6 +263,7 @@ impl PointBuilder { mutable_array } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_nullable_geometries( geoms: &[Option>], dim: Dimension, @@ -315,7 +324,7 @@ impl GeometryArrayBuilder for PointBuilder { &self.validity } - fn into_array_ref(self) -> Arc { + fn into_array_ref(self) -> ArrayRef { self.into_arrow() } @@ -335,7 +344,7 @@ impl Default for PointBuilder { } impl IntoArrow for PointBuilder { - type ArrowArray = Arc; + type ArrowArray = ArrayRef; fn into_arrow(self) -> Self::ArrowArray { let point_array: PointArray = self.into(); @@ -350,7 +359,7 @@ impl From for PointArray { } } -impl From for Arc { +impl From for ArrayRef { fn from(arr: PointBuilder) -> Self { arr.into_array_ref() } diff --git a/rust/geoarrow/src/array/polygon/array.rs b/rust/geoarrow/src/array/polygon/array.rs index a254750f..b9df856e 100644 --- a/rust/geoarrow/src/array/polygon/array.rs +++ b/rust/geoarrow/src/array/polygon/array.rs @@ -15,8 +15,8 @@ use crate::scalar::{Geometry, Polygon}; use crate::trait_::{ArrayAccessor, GeometryArraySelfMethods, IntoArrow, NativeGeometryAccessor}; use crate::{ArrayBase, NativeArray}; use arrow::array::AsArray; -use arrow_array::GenericListArray; use arrow_array::{Array, OffsetSizeTrait}; +use arrow_array::{ArrayRef, GenericListArray}; use geo_traits::PolygonTrait; use arrow_buffer::{NullBuffer, OffsetBuffer}; @@ -140,14 +140,17 @@ impl PolygonArray { Field::new_list(name, self.vertices_field(), false).into() } + /// Access the underlying coordinate buffer pub fn coords(&self) -> &CoordBuffer { &self.coords } + /// Access the underlying geometry offsets buffer pub fn geom_offsets(&self) -> &OffsetBuffer { &self.geom_offsets } + /// Access the underlying ring offsets buffer pub fn ring_offsets(&self) -> &OffsetBuffer { &self.ring_offsets } @@ -188,10 +191,12 @@ impl PolygonArray { } } + /// Change the coordinate type of this array. pub fn to_coord_type(&self, coord_type: CoordType) -> Self { self.clone().into_coord_type(coord_type) } + /// Change the coordinate type of this array. pub fn into_coord_type(self, coord_type: CoordType) -> Self { Self::new( self.coords.into_coord_type(coord_type), @@ -222,11 +227,11 @@ impl ArrayBase for PolygonArray { self.data_type.extension_name() } - fn into_array_ref(self) -> Arc { + fn into_array_ref(self) -> ArrayRef { Arc::new(self.into_arrow()) } - fn to_array_ref(&self) -> arrow_array::ArrayRef { + fn to_array_ref(&self) -> ArrayRef { self.clone().into_array_ref() } diff --git a/rust/geoarrow/src/array/polygon/builder.rs b/rust/geoarrow/src/array/polygon/builder.rs index 46401e60..9728b7ab 100644 --- a/rust/geoarrow/src/array/polygon/builder.rs +++ b/rust/geoarrow/src/array/polygon/builder.rs @@ -12,7 +12,7 @@ use crate::datatypes::Dimension; use crate::error::{GeoArrowError, Result}; use crate::scalar::WKB; use crate::trait_::{ArrayAccessor, GeometryArrayBuilder, IntoArrow}; -use arrow_array::{Array, GenericListArray, OffsetSizeTrait}; +use arrow_array::{ArrayRef, GenericListArray, OffsetSizeTrait}; use arrow_buffer::{NullBufferBuilder, OffsetBuffer}; use geo_traits::{ CoordTrait, GeometryTrait, GeometryType, LineStringTrait, MultiPolygonTrait, PolygonTrait, @@ -51,6 +51,7 @@ impl PolygonBuilder { Self::new_with_options(dim, Default::default(), Default::default()) } + /// Creates a new empty [`PolygonBuilder`] with the provided options. pub fn new_with_options( dim: Dimension, coord_type: CoordType, @@ -64,6 +65,7 @@ impl PolygonBuilder { Self::with_capacity_and_options(dim, capacity, Default::default(), Default::default()) } + /// Creates a new empty [`PolygonBuilder`] with the provided capacity and options. pub fn with_capacity_and_options( dim: Dimension, capacity: PolygonCapacity, @@ -87,10 +89,10 @@ impl PolygonBuilder { } } - /// Reserves capacity for at least `additional` more LineStrings to be inserted - /// in the given `Vec`. The collection may reserve more space to - /// speculatively avoid frequent reallocations. After calling `reserve`, - /// capacity will be greater than or equal to `self.len() + additional`. + /// Reserves capacity for at least `additional` more Polygons. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. /// Does nothing if capacity is already sufficient. pub fn reserve(&mut self, capacity: PolygonCapacity) { self.coords.reserve(capacity.coord_capacity); @@ -98,24 +100,25 @@ impl PolygonBuilder { self.geom_offsets.reserve(capacity.geom_capacity); } - /// Reserves the minimum capacity for at least `additional` more LineStrings to - /// be inserted in the given `Vec`. Unlike [`reserve`], this will not - /// deliberately over-allocate to speculatively avoid frequent allocations. - /// After calling `reserve_exact`, capacity will be greater than or equal to - /// `self.len() + additional`. Does nothing if the capacity is already - /// sufficient. + /// Reserves the minimum capacity for at least `additional` more Polygons. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. /// /// Note that the allocator may give the collection more space than it /// requests. Therefore, capacity can not be relied upon to be precisely /// minimal. Prefer [`reserve`] if future insertions are expected. /// - /// [`reserve`]: Vec::reserve + /// [`reserve`]: Self::reserve pub fn reserve_exact(&mut self, capacity: PolygonCapacity) { self.coords.reserve_exact(capacity.coord_capacity); self.ring_offsets.reserve_exact(capacity.ring_capacity); self.geom_offsets.reserve_exact(capacity.geom_capacity); } + /// Reserve more space in the underlying buffers with the capacity inferred from the provided + /// geometries. pub fn reserve_from_iter<'a>( &mut self, geoms: impl Iterator>, @@ -124,6 +127,8 @@ impl PolygonBuilder { self.reserve(counter) } + /// Reserve more space in the underlying buffers with the capacity inferred from the provided + /// geometries. pub fn reserve_exact_from_iter<'a>( &mut self, geoms: impl Iterator>, @@ -175,10 +180,6 @@ impl PolygonBuilder { ) } - pub fn into_array_ref(self) -> Arc { - Arc::new(self.into_arrow()) - } - /// Push a raw offset to the underlying geometry offsets buffer. /// /// # Safety @@ -204,10 +205,12 @@ impl PolygonBuilder { Ok(()) } + /// Consume the builder and convert to an immutable [`PolygonArray`] pub fn finish(self) -> PolygonArray { self.into() } + /// Creates a new builder with a capacity inferred by the provided iterator. pub fn with_capacity_from_iter<'a>( geoms: impl Iterator>, dim: Dimension, @@ -220,6 +223,8 @@ impl PolygonBuilder { ) } + /// Creates a new builder with the provided options and a capacity inferred by the provided + /// iterator. pub fn with_capacity_and_options_from_iter<'a>( geoms: impl Iterator>, dim: Dimension, @@ -275,6 +280,7 @@ impl PolygonBuilder { Ok(()) } + /// Add a new Rect to this builder #[inline] pub fn push_rect(&mut self, value: Option<&impl RectTrait>) -> Result<()> { if let Some(rect) = value { @@ -315,6 +321,9 @@ impl PolygonBuilder { Ok(()) } + /// Add a new geometry to this builder + /// + /// This will error if the geometry type is not Polygon, a MultiPolygon of length 1, or Rect. #[inline] pub fn push_geometry(&mut self, value: Option<&impl GeometryTrait>) -> Result<()> { if let Some(value) = value { @@ -336,6 +345,7 @@ impl PolygonBuilder { Ok(()) } + /// Extend this builder with the given geometries pub fn extend_from_iter<'a>( &mut self, geoms: impl Iterator + 'a)>>, @@ -346,6 +356,7 @@ impl PolygonBuilder { .unwrap(); } + /// Extend this builder with the given geometries pub fn extend_from_geometry_iter<'a>( &mut self, geoms: impl Iterator + 'a)>>, @@ -380,6 +391,7 @@ impl PolygonBuilder { self.validity.append(false); } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_polygons( geoms: &[impl PolygonTrait], dim: Dimension, @@ -396,6 +408,7 @@ impl PolygonBuilder { array } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_nullable_polygons( geoms: &[Option>], dim: Dimension, @@ -412,6 +425,7 @@ impl PolygonBuilder { array } + /// Construct a new builder, pre-filling it with the provided geometries pub fn from_nullable_geometries( geoms: &[Option>], dim: Dimension, @@ -475,7 +489,7 @@ impl GeometryArrayBuilder for PolygonBuilder { &self.validity } - fn into_array_ref(self) -> Arc { + fn into_array_ref(self) -> ArrayRef { Arc::new(self.into_arrow()) } diff --git a/rust/geoarrow/src/array/polygon/capacity.rs b/rust/geoarrow/src/array/polygon/capacity.rs index b568d4ba..39ef387e 100644 --- a/rust/geoarrow/src/array/polygon/capacity.rs +++ b/rust/geoarrow/src/array/polygon/capacity.rs @@ -34,18 +34,22 @@ impl PolygonCapacity { self.coord_capacity == 0 && self.ring_capacity == 0 && self.geom_capacity == 0 } + /// The coordinate buffer capacity pub fn coord_capacity(&self) -> usize { self.coord_capacity } + /// The ring offset buffer capacity pub fn ring_capacity(&self) -> usize { self.ring_capacity } + /// The geometry offset buffer capacity pub fn geom_capacity(&self) -> usize { self.geom_capacity } + /// Add the capacity of the given Polygon #[inline] pub fn add_polygon<'a>(&mut self, polygon: Option<&'a (impl PolygonTrait + 'a)>) { self.geom_capacity += 1; @@ -65,6 +69,7 @@ impl PolygonCapacity { } } + /// Add the capacity of the given Rect #[inline] pub fn add_rect<'a>(&mut self, rect: Option<&'a (impl RectTrait + 'a)>) { self.geom_capacity += 1; @@ -76,6 +81,10 @@ impl PolygonCapacity { } } + /// Add the capacity of the given Geometry + /// + /// The type of the geometry must be either Polygon or Rect + #[inline] pub fn add_geometry(&mut self, value: Option<&impl GeometryTrait>) -> Result<()> { if let Some(geom) = value { match geom.as_type() { @@ -89,6 +98,7 @@ impl PolygonCapacity { Ok(()) } + /// Construct a new counter pre-filled with the given Polygons pub fn from_polygons<'a>( geoms: impl Iterator>, ) -> Self { @@ -99,6 +109,7 @@ impl PolygonCapacity { counter } + /// Construct a new counter pre-filled with the given Rects pub fn from_rects<'a>(geoms: impl Iterator>) -> Self { let mut counter = Self::new_empty(); for maybe_rect in geoms.into_iter() { @@ -107,6 +118,7 @@ impl PolygonCapacity { counter } + /// Construct a new counter pre-filled with the given geometries pub fn from_geometries<'a>( geoms: impl Iterator>, ) -> Result { diff --git a/rust/geoarrow/src/array/rect/array.rs b/rust/geoarrow/src/array/rect/array.rs index d4ece60f..19a2c446 100644 --- a/rust/geoarrow/src/array/rect/array.rs +++ b/rust/geoarrow/src/array/rect/array.rs @@ -40,6 +40,7 @@ pub struct RectArray { } impl RectArray { + /// Construct a new [`RectArray`] from parts pub fn new( lower: SeparatedCoordBuffer, upper: SeparatedCoordBuffer, @@ -57,10 +58,16 @@ impl RectArray { } } + /// Access the coordinate buffer of the "lower" corner of the RectArray + /// + /// Note that this needs to be interpreted in conjunction with the [null buffer][Self::nulls]. pub fn lower(&self) -> &SeparatedCoordBuffer { &self.lower } + /// Access the coordinate buffer of the "upper" corner of the RectArray + /// + /// Note that this needs to be interpreted in conjunction with the [null buffer][Self::nulls]. pub fn upper(&self) -> &SeparatedCoordBuffer { &self.upper } @@ -104,11 +111,11 @@ impl ArrayBase for RectArray { self.data_type.extension_name() } - fn into_array_ref(self) -> Arc { + fn into_array_ref(self) -> ArrayRef { Arc::new(self.into_arrow()) } - fn to_array_ref(&self) -> arrow_array::ArrayRef { + fn to_array_ref(&self) -> ArrayRef { self.clone().into_array_ref() } diff --git a/rust/geoarrow/src/array/rect/builder.rs b/rust/geoarrow/src/array/rect/builder.rs index 3669e85a..9f0cc0c8 100644 --- a/rust/geoarrow/src/array/rect/builder.rs +++ b/rust/geoarrow/src/array/rect/builder.rs @@ -4,7 +4,7 @@ use crate::datatypes::Dimension; use crate::error::GeoArrowError; use crate::scalar::Rect; use crate::trait_::IntoArrow; -use arrow_array::{Array, StructArray}; +use arrow_array::{ArrayRef, StructArray}; use arrow_buffer::NullBufferBuilder; use geo_traits::{CoordTrait, RectTrait}; use std::sync::Arc; @@ -14,10 +14,10 @@ use std::sync::Arc; /// Converting an [`RectBuilder`] into a [`RectArray`] is `O(1)`. #[derive(Debug)] pub struct RectBuilder { - pub metadata: Arc, - pub lower: SeparatedCoordBufferBuilder, - pub upper: SeparatedCoordBufferBuilder, - pub validity: NullBufferBuilder, + pub(crate) metadata: Arc, + pub(crate) lower: SeparatedCoordBufferBuilder, + pub(crate) upper: SeparatedCoordBufferBuilder, + pub(crate) validity: NullBufferBuilder, } impl RectBuilder { @@ -26,6 +26,7 @@ impl RectBuilder { Self::new_with_options(dim, Default::default()) } + /// Creates a new empty [`RectBuilder`] with the provided options. pub fn new_with_options(dim: Dimension, metadata: Arc) -> Self { Self::with_capacity_and_options(dim, 0, metadata) } @@ -35,7 +36,7 @@ impl RectBuilder { Self::with_capacity_and_options(dim, capacity, Default::default()) } - /// Creates a new [`RectBuilder`] with a capacity. + /// Creates a new [`RectBuilder`] with a capacity and options. pub fn with_capacity_and_options( dim: Dimension, capacity: usize, @@ -49,28 +50,27 @@ impl RectBuilder { } } - /// Reserves capacity for at least `additional` more points to be inserted - /// in the given `Vec`. The collection may reserve more space to - /// speculatively avoid frequent reallocations. After calling `reserve`, - /// capacity will be greater than or equal to `self.len() + additional`. + /// Reserves capacity for at least `additional` more Rects. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. /// Does nothing if capacity is already sufficient. pub fn reserve(&mut self, additional: usize) { self.lower.reserve(additional); self.upper.reserve(additional); } - /// Reserves the minimum capacity for at least `additional` more points to - /// be inserted in the given `Vec`. Unlike [`reserve`], this will not - /// deliberately over-allocate to speculatively avoid frequent allocations. - /// After calling `reserve_exact`, capacity will be greater than or equal to - /// `self.len() + additional`. Does nothing if the capacity is already - /// sufficient. + /// Reserves the minimum capacity for at least `additional` more Rects. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. /// /// Note that the allocator may give the collection more space than it /// requests. Therefore, capacity can not be relied upon to be precisely /// minimal. Prefer [`reserve`] if future insertions are expected. /// - /// [`reserve`]: Vec::reserve + /// [`reserve`]: Self::reserve pub fn reserve_exact(&mut self, additional: usize) { self.lower.reserve_exact(additional); self.upper.reserve_exact(additional); @@ -117,10 +117,12 @@ impl RectBuilder { (self.lower, self.upper, self.validity) } - pub fn into_arrow_ref(self) -> Arc { + /// Convert to an [`ArrayRef`] + pub fn into_arrow_ref(self) -> ArrayRef { Arc::new(self.into_arrow()) } + /// Consume the builder and convert to an immutable [`RectArray`] pub fn finish(self) -> RectArray { self.into() } @@ -224,7 +226,7 @@ impl From for RectArray { other.lower.into(), other.upper.into(), other.validity.finish(), - Default::default(), + other.metadata, ) } } diff --git a/rust/geoarrow/src/array/wkt/array.rs b/rust/geoarrow/src/array/wkt/array.rs index 430230c9..d4333f7f 100644 --- a/rust/geoarrow/src/array/wkt/array.rs +++ b/rust/geoarrow/src/array/wkt/array.rs @@ -1,7 +1,9 @@ use std::sync::Arc; use arrow::array::AsArray; -use arrow_array::{Array, GenericStringArray, LargeStringArray, OffsetSizeTrait, StringArray}; +use arrow_array::{ + Array, ArrayRef, GenericStringArray, LargeStringArray, OffsetSizeTrait, StringArray, +}; use arrow_buffer::NullBuffer; use arrow_schema::{DataType, Field}; @@ -49,6 +51,7 @@ impl WKTArray { self.len() == 0 } + /// Consume self and access the underlying data. pub fn into_inner(self) -> GenericStringArray { self.array } @@ -69,6 +72,7 @@ impl WKTArray { } } + /// Replace the [`ArrayMetadata`] contained in this array. pub fn with_metadata(&self, metadata: Arc) -> Self { let mut arr = self.clone(); arr.metadata = metadata; @@ -95,12 +99,12 @@ impl ArrayBase for WKTArray { self.data_type.extension_name() } - fn into_array_ref(self) -> Arc { + fn into_array_ref(self) -> ArrayRef { // Recreate a BinaryArray so that we can force it to have geoarrow.wkb extension type Arc::new(self.into_arrow()) } - fn to_array_ref(&self) -> arrow_array::ArrayRef { + fn to_array_ref(&self) -> ArrayRef { self.clone().into_array_ref() } diff --git a/rust/geoarrow/src/chunked_array/mod.rs b/rust/geoarrow/src/chunked_array/mod.rs index 1c68d5d1..3be9f7e2 100644 --- a/rust/geoarrow/src/chunked_array/mod.rs +++ b/rust/geoarrow/src/chunked_array/mod.rs @@ -712,7 +712,7 @@ pub trait ChunkedArrayBase: std::fmt::Debug + Send + Sync { /// let chunked_array = ChunkedGeometryArray::new(vec![array_0, array_1]); /// let arrays = chunked_array.array_refs(); /// ``` - fn array_refs(&self) -> Vec>; + fn array_refs(&self) -> Vec; } /// A trait implemented by all chunked geometry arrays. @@ -833,7 +833,7 @@ impl ChunkedArrayBase for ChunkedPointArray { self.chunks.len() } - fn array_refs(&self) -> Vec> { + fn array_refs(&self) -> Vec { self.chunks .iter() .map(|chunk| chunk.to_array_ref()) @@ -892,7 +892,7 @@ impl ChunkedArrayBase for ChunkedWKBArray { // self // } - fn array_refs(&self) -> Vec> { + fn array_refs(&self) -> Vec { self.chunks .iter() .map(|chunk| chunk.to_array_ref()) @@ -921,7 +921,7 @@ macro_rules! impl_trait { self.chunks.len() } - fn array_refs(&self) -> Vec> { + fn array_refs(&self) -> Vec { self.chunks .iter() .map(|chunk| chunk.to_array_ref()) @@ -976,7 +976,7 @@ impl ChunkedArrayBase for ChunkedRectArray { self.chunks.len() } - fn array_refs(&self) -> Vec> { + fn array_refs(&self) -> Vec { self.chunks .iter() .map(|chunk| chunk.to_array_ref()) diff --git a/rust/geoarrow/src/io/geozero/table/builder/properties.rs b/rust/geoarrow/src/io/geozero/table/builder/properties.rs index a5bd1316..e85df409 100644 --- a/rust/geoarrow/src/io/geozero/table/builder/properties.rs +++ b/rust/geoarrow/src/io/geozero/table/builder/properties.rs @@ -10,7 +10,7 @@ use crate::io::geozero::table::builder::anyvalue::AnyBuilder; use indexmap::IndexMap; /// A builder for a single RecordBatch of properties -// TODO: store an Arc on this struct? Especially when known or user-provided? +// TODO: store a SchemaRef on this struct? Especially when known or user-provided? // TODO: switch to ordered Vec of builders instead of a hashmap for sources like postgis pub(crate) struct PropertiesBatchBuilder { /// A mapping from column name to its builder. diff --git a/rust/geoarrow/src/io/geozero/table/builder/table.rs b/rust/geoarrow/src/io/geozero/table/builder/table.rs index 13840e3c..1c7340ac 100644 --- a/rust/geoarrow/src/io/geozero/table/builder/table.rs +++ b/rust/geoarrow/src/io/geozero/table/builder/table.rs @@ -2,7 +2,7 @@ use std::mem::replace; use std::sync::Arc; use arrow_array::RecordBatch; -use arrow_schema::Schema; +use arrow_schema::SchemaRef; use geozero::{FeatureProcessor, GeomProcessor, PropertyProcessor}; use crate::array::metadata::ArrayMetadata; @@ -30,7 +30,7 @@ pub struct GeoTableBuilderOptions { pub batch_size: usize, /// If known, the schema of properties. Must not include the schema of the geometry. - pub properties_schema: Option>, + pub properties_schema: Option, /// The number of rows to be read pub num_rows: Option, @@ -41,7 +41,7 @@ impl GeoTableBuilderOptions { coord_type: CoordType, prefer_multi: bool, batch_size: Option, - properties_schema: Option>, + properties_schema: Option, num_rows: Option, metadata: Arc, ) -> Self { diff --git a/rust/geoarrow/src/io/parquet/reader/parse.rs b/rust/geoarrow/src/io/parquet/reader/parse.rs index 7e283258..b010fd1e 100644 --- a/rust/geoarrow/src/io/parquet/reader/parse.rs +++ b/rust/geoarrow/src/io/parquet/reader/parse.rs @@ -167,11 +167,7 @@ pub fn parse_record_batch(batch: RecordBatch, target_schema: SchemaRef) -> Resul } /// Parse a single column based on provided GeoParquet metadata and target field -fn parse_array( - array: ArrayRef, - orig_field: &Field, - target_field: &Field, -) -> Result> { +fn parse_array(array: ArrayRef, orig_field: &Field, target_field: &Field) -> Result { use NativeType::*; let orig_type = AnyType::try_from(orig_field)?; @@ -202,7 +198,7 @@ fn parse_array( } } -fn parse_wkb_column(arr: &dyn Array, target_geo_data_type: NativeType) -> Result> { +fn parse_wkb_column(arr: &dyn Array, target_geo_data_type: NativeType) -> Result { match arr.data_type() { DataType::Binary => { let wkb_arr = WKBArray::::try_from(arr)?; @@ -221,14 +217,14 @@ fn parse_wkb_column(arr: &dyn Array, target_geo_data_type: NativeType) -> Result } } -fn parse_point_column(array: &dyn Array, dim: Dimension) -> Result> { +fn parse_point_column(array: &dyn Array, dim: Dimension) -> Result { let geom_arr: PointArray = (array, dim).try_into()?; Ok(geom_arr.into_array_ref()) } macro_rules! impl_parse_fn { ($fn_name:ident, $geoarrow_type:ty) => { - fn $fn_name(array: &dyn Array, dim: Dimension) -> Result> { + fn $fn_name(array: &dyn Array, dim: Dimension) -> Result { match array.data_type() { DataType::List(_) | DataType::LargeList(_) => { let geom_arr: $geoarrow_type = (array, dim).try_into()?; diff --git a/rust/geoarrow/src/io/parquet/writer/encode.rs b/rust/geoarrow/src/io/parquet/writer/encode.rs index e698f710..526a6380 100644 --- a/rust/geoarrow/src/io/parquet/writer/encode.rs +++ b/rust/geoarrow/src/io/parquet/writer/encode.rs @@ -1,6 +1,4 @@ -use std::sync::Arc; - -use arrow_array::{Array, RecordBatch}; +use arrow_array::{Array, ArrayRef, RecordBatch}; use arrow_schema::Field; use crate::algorithm::native::bounding_rect::BoundingRect; @@ -38,7 +36,7 @@ fn encode_column( array: &dyn Array, field: &Field, column_info: &mut ColumnInfo, -) -> Result<(Arc, BoundingRect)> { +) -> Result<(ArrayRef, BoundingRect)> { let geo_arr = NativeArrayDyn::from_arrow_array(array, field)?.into_inner(); let array_bounds = geo_arr.as_ref().total_bounds(); let encoded_array = match column_info.encoding { @@ -49,13 +47,13 @@ fn encode_column( } /// Encode column as WKB -fn encode_wkb_column(geo_arr: &dyn NativeArray) -> Result> { +fn encode_wkb_column(geo_arr: &dyn NativeArray) -> Result { Ok(geo_arr.as_ref().to_wkb::().to_array_ref()) } /// Encode column as GeoArrow. /// /// Note that the GeoParquet specification requires separated coord type! -fn encode_native_column(geo_arr: &dyn NativeArray) -> Result> { +fn encode_native_column(geo_arr: &dyn NativeArray) -> Result { Ok(geo_arr.to_coord_type(CoordType::Separated).to_array_ref()) } diff --git a/rust/geoarrow/src/io/parquet/writer/metadata.rs b/rust/geoarrow/src/io/parquet/writer/metadata.rs index 3f6b83af..975b8f9b 100644 --- a/rust/geoarrow/src/io/parquet/writer/metadata.rs +++ b/rust/geoarrow/src/io/parquet/writer/metadata.rs @@ -2,7 +2,7 @@ use std::collections::{HashMap, HashSet}; use std::sync::Arc; use arrow_array::ArrayRef; -use arrow_schema::{Field, Schema}; +use arrow_schema::{Field, Schema, SchemaRef}; use serde_json::Value; use crate::algorithm::native::bounding_rect::BoundingRect; @@ -155,7 +155,7 @@ impl ColumnInfo { } pub struct GeoParquetMetadataBuilder { - pub output_schema: Arc, + pub output_schema: SchemaRef, pub primary_column: Option, pub columns: HashMap, } @@ -287,10 +287,7 @@ pub fn get_geometry_types(data_type: &NativeType) -> HashSet, -) -> Arc { +fn create_output_schema(input_schema: &Schema, columns: &HashMap) -> SchemaRef { let mut fields = input_schema.fields().to_vec(); for (column_idx, column_info) in columns.iter() { let existing_field = input_schema.field(*column_idx); diff --git a/rust/geoarrow/src/lib.rs b/rust/geoarrow/src/lib.rs index dd3d937a..7f132f11 100644 --- a/rust/geoarrow/src/lib.rs +++ b/rust/geoarrow/src/lib.rs @@ -58,7 +58,7 @@ #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![cfg_attr(not(test), deny(unused_crate_dependencies))] -#![deny(missing_docs)] // FIXME some modules allow missing docs +#![warn(missing_docs)] // FIXME some modules allow missing docs pub use trait_::{ArrayBase, NativeArray}; diff --git a/rust/geoarrow/src/table.rs b/rust/geoarrow/src/table.rs index 1b40934e..b822d31f 100644 --- a/rust/geoarrow/src/table.rs +++ b/rust/geoarrow/src/table.rs @@ -5,7 +5,7 @@ use std::ops::Deref; use std::sync::Arc; -use arrow_array::{Array, ArrayRef, RecordBatch, RecordBatchIterator, RecordBatchReader}; +use arrow_array::{ArrayRef, RecordBatch, RecordBatchIterator, RecordBatchReader}; use arrow_schema::{ArrowError, FieldRef, Schema, SchemaBuilder, SchemaRef}; use crate::algorithm::native::{Cast, Downcast}; @@ -487,12 +487,7 @@ impl Table { /// table.set_column(0, field.into(), vec![Arc::new(array)]).unwrap(); /// # } /// ``` - pub fn set_column( - &mut self, - i: usize, - field: FieldRef, - column: Vec>, - ) -> Result<()> { + pub fn set_column(&mut self, i: usize, field: FieldRef, column: Vec) -> Result<()> { let mut fields = self.schema().fields().deref().to_vec(); fields[i] = field; let schema = Arc::new(Schema::new_with_metadata( @@ -552,7 +547,7 @@ impl Table { /// assert_eq!(index, 7); /// # } /// ``` - pub fn append_column(&mut self, field: FieldRef, column: Vec>) -> Result { + pub fn append_column(&mut self, field: FieldRef, column: Vec) -> Result { assert_eq!(self.batches().len(), column.len()); let new_batches = self diff --git a/rust/geoarrow/src/test/geoarrow_data/util.rs b/rust/geoarrow/src/test/geoarrow_data/util.rs index 6f66186c..62ee33b7 100644 --- a/rust/geoarrow/src/test/geoarrow_data/util.rs +++ b/rust/geoarrow/src/test/geoarrow_data/util.rs @@ -1,10 +1,9 @@ use std::fs::File; -use std::sync::Arc; -use arrow_array::Array; +use arrow_array::ArrayRef; use arrow_ipc::reader::FileReader; -pub(super) fn read_geometry_column(path: &str) -> Arc { +pub(super) fn read_geometry_column(path: &str) -> ArrayRef { let file = File::open(path).unwrap(); let reader = FileReader::try_new(file, None).unwrap(); diff --git a/rust/geoarrow/src/trait_.rs b/rust/geoarrow/src/trait_.rs index 1ab51443..d68868bf 100644 --- a/rust/geoarrow/src/trait_.rs +++ b/rust/geoarrow/src/trait_.rs @@ -5,7 +5,7 @@ use crate::array::{CoordBuffer, CoordType}; use crate::datatypes::{Dimension, NativeType, SerializedType}; use crate::error::Result; use crate::scalar::Geometry; -use arrow_array::{Array, ArrayRef}; +use arrow_array::ArrayRef; use arrow_buffer::{NullBuffer, NullBufferBuilder}; use arrow_schema::{DataType, Field}; use geo_traits::GeometryTrait; @@ -87,6 +87,8 @@ pub trait ArrayBase: std::fmt::Debug + Send + Sync { /// /// This is `O(1)`. /// + /// Note that **this will omit any spatial extension information**. + /// /// # Examples /// /// ``` @@ -105,6 +107,8 @@ pub trait ArrayBase: std::fmt::Debug + Send + Sync { /// /// This is `O(1)`. /// + /// Note that **this will omit any spatial extension information**. + /// /// # Examples /// /// ``` @@ -1024,6 +1028,8 @@ pub trait GeometryArrayBuilder: std::fmt::Debug + Send + Sync + Sized { /// Converts this builder into an [`ArrayRef`], a dynamic array reference. /// + /// Note that **this will omit any spatial extension information**. + /// /// # Examples /// /// ``` @@ -1033,5 +1039,5 @@ pub trait GeometryArrayBuilder: std::fmt::Debug + Send + Sync + Sized { /// let builder = PointBuilder::new(Dimension::XY); /// let array_ref = builder.into_array_ref(); /// ``` - fn into_array_ref(self) -> Arc; + fn into_array_ref(self) -> ArrayRef; }