Skip to content

Commit

Permalink
Cleanup pass (#957)
Browse files Browse the repository at this point in the history
### Change list

- Disallow missing documentation in `geoarrow::array`
- Update documentation in `geoarrow::array`.
- Use `ArrayRef` instead of `Arc<dyn Array>`
- Use `SchemaRef` instead of `Arc<Schema>`
  • Loading branch information
kylebarron authored Dec 20, 2024
1 parent b4bc4f0 commit 21264b5
Show file tree
Hide file tree
Showing 50 changed files with 680 additions and 336 deletions.
11 changes: 4 additions & 7 deletions rust/geoarrow/src/array/binary/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ use crate::error::{GeoArrowError, Result};
use crate::scalar::WKB;
use crate::trait_::{ArrayAccessor, ArrayBase, IntoArrow, SerializedArray};
use arrow::array::AsArray;
use arrow_array::OffsetSizeTrait;
use arrow_array::{Array, BinaryArray, GenericBinaryArray, LargeBinaryArray};
use arrow_array::{ArrayRef, OffsetSizeTrait};
use arrow_buffer::NullBuffer;
use arrow_schema::{DataType, Field};
use geo_traits::GeometryTrait;
Expand Down Expand Up @@ -73,10 +73,6 @@ impl<O: OffsetSizeTrait> WKBArray<O> {
validity_len + self.buffer_lengths().num_bytes::<O>()
}

pub fn into_inner(self) -> GenericBinaryArray<O> {
self.array
}

/// Slices this [`WKBArray`] in place.
/// # Panic
/// This function panics iff `offset + length > self.len()`.
Expand All @@ -93,6 +89,7 @@ impl<O: OffsetSizeTrait> WKBArray<O> {
}
}

/// Replace the [ArrayMetadata] in the array with the given metadata
pub fn with_metadata(&self, metadata: Arc<ArrayMetadata>) -> Self {
let mut arr = self.clone();
arr.metadata = metadata;
Expand All @@ -119,12 +116,12 @@ impl<O: OffsetSizeTrait> ArrayBase for WKBArray<O> {
self.data_type.extension_name()
}

fn into_array_ref(self) -> Arc<dyn Array> {
fn into_array_ref(self) -> ArrayRef {
// Recreate a BinaryArray so that we can force it to have geoarrow.wkb extension type
Arc::new(self.into_arrow())
}

fn to_array_ref(&self) -> arrow_array::ArrayRef {
fn to_array_ref(&self) -> ArrayRef {
self.clone().into_array_ref()
}

Expand Down
11 changes: 9 additions & 2 deletions rust/geoarrow/src/array/binary/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,11 @@ impl<O: OffsetSizeTrait> Default for WKBBuilder<O> {

impl<O: OffsetSizeTrait> WKBBuilder<O> {
/// Creates a new empty [`WKBBuilder`].
/// # Implementation
/// This allocates a [`Vec`] of one element
pub fn new() -> Self {
Self::with_capacity(Default::default())
}

/// Creates a new empty [`WKBBuilder`] with the provided options.
pub fn new_with_options(metadata: Arc<ArrayMetadata>) -> Self {
Self::with_capacity_and_options(Default::default(), metadata)
}
Expand All @@ -46,6 +45,7 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
Self::with_capacity_and_options(capacity, Default::default())
}

/// Creates a new empty [`WKBBuilder`] with the provided capacity and options.
pub fn with_capacity_and_options(capacity: WKBCapacity, metadata: Arc<ArrayMetadata>) -> Self {
Self(
GenericBinaryBuilder::with_capacity(
Expand All @@ -56,12 +56,16 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
)
}

/// Creates a new empty [`WKBBuilder`] with a capacity inferred by the provided geometry
/// iterator.
pub fn with_capacity_from_iter<'a>(
geoms: impl Iterator<Item = Option<&'a (impl GeometryTrait<T = f64> + 'a)>>,
) -> Self {
Self::with_capacity_and_options_from_iter(geoms, Default::default())
}

/// Creates a new empty [`WKBBuilder`] with the provided options and a capacity inferred by the
/// provided geometry iterator.
pub fn with_capacity_and_options_from_iter<'a>(
geoms: impl Iterator<Item = Option<&'a (impl GeometryTrait<T = f64> + 'a)>>,
metadata: Arc<ArrayMetadata>,
Expand Down Expand Up @@ -205,6 +209,9 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
array
}

/// Consume this builder and convert to a [WKBArray].
///
/// This is `O(1)`.
pub fn finish(self) -> WKBArray<O> {
self.into()
}
Expand Down
3 changes: 3 additions & 0 deletions rust/geoarrow/src/array/binary/capacity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,12 @@ impl WKBCapacity {
self.buffer_capacity == 0 && self.offsets_capacity == 0
}

/// The capacity of the underlying data buffer
pub fn buffer_capacity(&self) -> usize {
self.buffer_capacity
}

/// The capacity of the underlying offsets buffer
pub fn offsets_capacity(&self) -> usize {
self.offsets_capacity
}
Expand Down Expand Up @@ -223,6 +225,7 @@ impl WKBCapacity {
counter
}

/// Create a capacity counter from an iterator of Geometries.
pub fn from_owned_geometries<'a>(
geoms: impl Iterator<Item = Option<(impl GeometryTrait<T = f64> + 'a)>>,
) -> Self {
Expand Down
1 change: 1 addition & 0 deletions rust/geoarrow/src/array/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ impl AsNativeArray for &dyn NativeArray {
}
}

/// Trait to downcast an Arrow array to a serialized array
pub trait AsSerializedArray {
/// Downcast this to a [`WKBArray`] with `i32` offsets returning `None` if not possible
fn as_wkb_opt(&self) -> Option<&WKBArray<i32>>;
Expand Down
33 changes: 21 additions & 12 deletions rust/geoarrow/src/array/coord/combined/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::datatypes::Dimension;
use crate::error::{GeoArrowError, Result};
use crate::scalar::Coord;
use crate::trait_::IntoArrow;
use arrow_array::{Array, FixedSizeListArray, StructArray};
use arrow_array::{Array, ArrayRef, FixedSizeListArray, StructArray};
use arrow_schema::DataType;

/// An Arrow representation of an array of coordinates.
Expand All @@ -25,70 +25,79 @@ use arrow_schema::DataType;
/// validity masks.
#[derive(Debug, Clone)]
pub enum CoordBuffer {
/// Interleaved coordinates
Interleaved(InterleavedCoordBuffer),
/// Separated coordinates
Separated(SeparatedCoordBuffer),
}

impl CoordBuffer {
pub fn slice(&self, offset: usize, length: usize) -> Self {
/// Slice this buffer
pub(crate) fn slice(&self, offset: usize, length: usize) -> Self {
match self {
CoordBuffer::Interleaved(c) => CoordBuffer::Interleaved(c.slice(offset, length)),
CoordBuffer::Separated(c) => CoordBuffer::Separated(c.slice(offset, length)),
}
}

/// The underlying coordinate type
pub fn coord_type(&self) -> CoordType {
match self {
CoordBuffer::Interleaved(cb) => cb.coord_type(),
CoordBuffer::Separated(cb) => cb.coord_type(),
}
}

pub fn storage_type(&self) -> DataType {
/// The arrow [DataType] for this coordinate buffer.
pub(crate) fn storage_type(&self) -> DataType {
match self {
CoordBuffer::Interleaved(c) => c.storage_type(),
CoordBuffer::Separated(c) => c.storage_type(),
}
}

/// The length of this coordinate buffer
pub fn len(&self) -> usize {
match self {
CoordBuffer::Interleaved(c) => c.len(),
CoordBuffer::Separated(c) => c.len(),
}
}

/// Whether this coordinate buffer is empty
pub fn is_empty(&self) -> bool {
self.len() == 0
}

pub fn value(&self, index: usize) -> Coord<'_> {
pub(crate) fn value(&self, index: usize) -> Coord<'_> {
match self {
CoordBuffer::Interleaved(c) => Coord::Interleaved(c.value(index)),
CoordBuffer::Separated(c) => Coord::Separated(c.value(index)),
}
}

pub fn into_array_ref(self) -> Arc<dyn Array> {
pub(crate) fn into_array_ref(self) -> ArrayRef {
self.into_arrow()
}

pub fn to_array_ref(&self) -> arrow_array::ArrayRef {
self.clone().into_array_ref()
}

/// The dimension of this coordinate buffer
pub fn dim(&self) -> Dimension {
match self {
CoordBuffer::Interleaved(c) => c.dim(),
CoordBuffer::Separated(c) => c.dim(),
}
}

pub fn with_coords(self, coords: CoordBuffer) -> Self {
#[allow(dead_code)]
pub(crate) fn with_coords(self, coords: CoordBuffer) -> Self {
assert_eq!(coords.len(), self.len());
coords
}

/// Convert this coordinate array into the given [CoordType]
///
/// This is a no-op if the coord_type matches the existing coord type. Otherwise a full clone
/// of the underlying coordinate buffers will be performed.
pub fn into_coord_type(self, coord_type: CoordType) -> Self {
let dim = self.dim();
match (self, coord_type) {
Expand All @@ -113,7 +122,7 @@ impl CoordBuffer {
}
}

pub fn from_arrow(value: &dyn Array, dim: Dimension) -> Result<Self> {
pub(crate) fn from_arrow(value: &dyn Array, dim: Dimension) -> Result<Self> {
match value.data_type() {
DataType::Struct(_) => {
let downcasted = value.as_any().downcast_ref::<StructArray>().unwrap();
Expand All @@ -136,7 +145,7 @@ impl CoordBuffer {
}

impl IntoArrow for CoordBuffer {
type ArrowArray = Arc<dyn Array>;
type ArrowArray = ArrayRef;

fn into_arrow(self) -> Self::ArrowArray {
match self {
Expand Down
19 changes: 12 additions & 7 deletions rust/geoarrow/src/array/coord/combined/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@ use geo_traits::{CoordTrait, PointTrait};
/// Converting an [`CoordBufferBuilder`] into a [`CoordBuffer`] is `O(1)`.
#[derive(Debug, Clone)]
pub enum CoordBufferBuilder {
/// Interleaved coordinates
Interleaved(InterleavedCoordBufferBuilder),
/// Separated coordinates
Separated(SeparatedCoordBufferBuilder),
}

impl CoordBufferBuilder {
/// Initialize a buffer of a given length with all coordinates set to 0.0
pub fn initialize(len: usize, interleaved: bool, dim: Dimension) -> Self {
match interleaved {
true => {
Expand All @@ -40,18 +43,17 @@ impl CoordBufferBuilder {
}
}

/// Reserves the minimum capacity for at least `additional` more coordinates to
/// be inserted in the given `Vec<T>`. Unlike [`reserve`], this will not
/// deliberately over-allocate to speculatively avoid frequent allocations.
/// After calling `reserve_exact`, capacity will be greater than or equal to
/// `self.len() + additional`. Does nothing if the capacity is already
/// sufficient.
/// Reserves the minimum capacity for at least `additional` more coordinates.
///
/// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid
/// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal
/// to `self.len() + additional`. Does nothing if the capacity is already sufficient.
///
/// Note that the allocator may give the collection more space than it
/// requests. Therefore, capacity can not be relied upon to be precisely
/// minimal. Prefer [`reserve`] if future insertions are expected.
///
/// [`reserve`]: Vec::reserve
/// [`reserve`]: Self::reserve
pub fn reserve_exact(&mut self, additional: usize) {
match self {
CoordBufferBuilder::Interleaved(cb) => cb.reserve_exact(additional),
Expand All @@ -67,17 +69,20 @@ impl CoordBufferBuilder {
}
}

/// The number of coordinates
pub fn len(&self) -> usize {
match self {
CoordBufferBuilder::Interleaved(cb) => cb.len(),
CoordBufferBuilder::Separated(cb) => cb.len(),
}
}

/// Whether the buffer is empty
pub fn is_empty(&self) -> bool {
self.len() == 0
}

/// The underlying coordinate type
pub fn coord_type(&self) -> CoordType {
match self {
CoordBufferBuilder::Interleaved(_) => CoordType::Interleaved,
Expand Down
Loading

0 comments on commit 21264b5

Please sign in to comment.