From 9e68c71fba77bed86b4eabac7db4d93b6f2cc920 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 10 Dec 2024 12:07:18 -0500 Subject: [PATCH] downcast geometry array (#918) todo: - [x] Change downcast to be implemented in terms of `resolve_types` and `cast`. - [x] Handle slicing in `GeometryArray::has_points`. I.e. bring back the slice offset and length. Then if the array has been sliced and the point array exists, check if the point values are within the bounds of the current slice. The fastest way to do this should be to check the length of the point array, then as you're iterating through the type_ids array, once you've seen that many ids pointing to the point array, you know the array is fully there. Or, in the cast of downcasting you really only care about whether _any_ geometry exists. So you can short-circuit as soon as you've seen the first point. - [x] Ensure we apply slicing when exporting the GeometryArray. E.g. in the `TryFrom` impl from GeometryArray to `PointArray`, we need to call the slice when we export. - [ ] recursively expand the types from the geometry collection array. But only if the geometry collection array has only a single geometry per row and it could be split up. Closes https://github.com/geoarrow/geoarrow-rs/issues/416 --- rust/geoarrow/src/algorithm/native/cast.rs | 107 ++--- .../geoarrow/src/algorithm/native/downcast.rs | 380 +++++++----------- rust/geoarrow/src/array/geometry/array.rs | 312 +++++++------- rust/geoarrow/src/array/linestring/array.rs | 5 +- rust/geoarrow/src/array/mixed/array.rs | 158 +++++++- .../src/array/multilinestring/array.rs | 5 +- rust/geoarrow/src/array/multipoint/array.rs | 5 +- rust/geoarrow/src/array/multipolygon/array.rs | 5 +- rust/geoarrow/src/array/point/array.rs | 5 +- rust/geoarrow/src/array/polygon/array.rs | 5 +- rust/geoarrow/src/datatypes.rs | 25 +- rust/geoarrow/src/io/wkb/api.rs | 7 +- rust/geoarrow/src/table.rs | 5 +- 13 files changed, 540 insertions(+), 484 deletions(-) diff --git a/rust/geoarrow/src/algorithm/native/cast.rs b/rust/geoarrow/src/algorithm/native/cast.rs index 22da66c9..33dce570 100644 --- a/rust/geoarrow/src/algorithm/native/cast.rs +++ b/rust/geoarrow/src/algorithm/native/cast.rs @@ -28,7 +28,7 @@ impl Default for CastOptions { /// Note: not currently used and outdated #[allow(dead_code)] -fn can_cast_types(from_type: &NativeType, to_type: &NativeType) -> bool { +fn can_cast_types(from_type: NativeType, to_type: NativeType) -> bool { if from_type == to_type { return true; } @@ -51,13 +51,13 @@ pub trait Cast { type Output; /// Note: **does not currently implement dimension casts** - fn cast(&self, to_type: &NativeType) -> Self::Output; + fn cast(&self, to_type: NativeType) -> Self::Output; } impl Cast for PointArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { use NativeType::*; let array = self.to_coord_type(to_type.coord_type()); @@ -66,6 +66,7 @@ impl Cast for PointArray { MultiPoint(_, _) => Ok(Arc::new(MultiPointArray::from(array))), Mixed(_, _) => Ok(Arc::new(MixedGeometryArray::from(array))), GeometryCollection(_, _) => Ok(Arc::new(GeometryCollectionArray::from(array))), + Geometry(_) => Ok(Arc::new(GeometryArray::from(array))), dt => Err(GeoArrowError::General(format!( "invalid cast to type {dt:?}" ))), @@ -76,7 +77,7 @@ impl Cast for PointArray { impl Cast for LineStringArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { use NativeType::*; let array = self.to_coord_type(to_type.coord_type()); @@ -86,6 +87,7 @@ impl Cast for LineStringArray { MultiLineString(_, _) => Ok(Arc::new(MultiLineStringArray::from(array))), Mixed(_, _) => Ok(Arc::new(MixedGeometryArray::from(array))), GeometryCollection(_, _) => Ok(Arc::new(GeometryCollectionArray::from(array))), + Geometry(_) => Ok(Arc::new(GeometryArray::from(array))), dt => Err(GeoArrowError::General(format!( "invalid cast to type {dt:?}" ))), @@ -96,7 +98,7 @@ impl Cast for LineStringArray { impl Cast for PolygonArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { use NativeType::*; let array = self.to_coord_type(to_type.coord_type()); @@ -106,6 +108,7 @@ impl Cast for PolygonArray { MultiPolygon(_, _) => Ok(Arc::new(MultiPolygonArray::from(array))), Mixed(_, _) => Ok(Arc::new(MixedGeometryArray::from(array))), GeometryCollection(_, _) => Ok(Arc::new(GeometryCollectionArray::from(array))), + Geometry(_) => Ok(Arc::new(GeometryArray::from(array))), dt => Err(GeoArrowError::General(format!( "invalid cast to type {dt:?}" ))), @@ -116,7 +119,7 @@ impl Cast for PolygonArray { impl Cast for MultiPointArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { use NativeType::*; let array = self.to_coord_type(to_type.coord_type()); @@ -126,6 +129,7 @@ impl Cast for MultiPointArray { MultiPoint(_, _) => Ok(Arc::new(array)), Mixed(_, _) => Ok(Arc::new(MixedGeometryArray::from(array))), GeometryCollection(_, _) => Ok(Arc::new(GeometryCollectionArray::from(array))), + Geometry(_) => Ok(Arc::new(GeometryArray::from(array))), dt => Err(GeoArrowError::General(format!( "invalid cast to type {dt:?}" ))), @@ -136,7 +140,7 @@ impl Cast for MultiPointArray { impl Cast for MultiLineStringArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { use NativeType::*; let array = self.to_coord_type(to_type.coord_type()); @@ -145,6 +149,7 @@ impl Cast for MultiLineStringArray { LineString(_, _) => Ok(Arc::new(LineStringArray::try_from(array)?)), Mixed(_, _) => Ok(Arc::new(MixedGeometryArray::from(array))), GeometryCollection(_, _) => Ok(Arc::new(GeometryCollectionArray::from(array))), + Geometry(_) => Ok(Arc::new(GeometryArray::from(array))), dt => Err(GeoArrowError::General(format!( "invalid cast to type {dt:?}" ))), @@ -155,7 +160,7 @@ impl Cast for MultiLineStringArray { impl Cast for MultiPolygonArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { use NativeType::*; let array = self.to_coord_type(to_type.coord_type()); @@ -164,6 +169,7 @@ impl Cast for MultiPolygonArray { Polygon(_, _) => Ok(Arc::new(PolygonArray::try_from(array)?)), Mixed(_, _) => Ok(Arc::new(MixedGeometryArray::from(array))), GeometryCollection(_, _) => Ok(Arc::new(GeometryCollectionArray::from(array))), + Geometry(_) => Ok(Arc::new(GeometryArray::from(array))), dt => Err(GeoArrowError::General(format!( "invalid cast to type {dt:?}" ))), @@ -174,7 +180,7 @@ impl Cast for MultiPolygonArray { impl Cast for MixedGeometryArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { use NativeType::*; let array = self.to_coord_type(to_type.coord_type()); @@ -198,7 +204,7 @@ impl Cast for MixedGeometryArray { impl Cast for GeometryCollectionArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { use NativeType::*; let array = self.to_coord_type(to_type.coord_type()); @@ -212,6 +218,7 @@ impl Cast for GeometryCollectionArray { MultiPolygon(_, _) => Ok(Arc::new(MultiPolygonArray::try_from(array)?)), Mixed(_, _) => Ok(Arc::new(MixedGeometryArray::try_from(array)?)), GeometryCollection(_, _) => Ok(Arc::new(array)), + Geometry(_) => Ok(Arc::new(GeometryArray::from(array))), dt => Err(GeoArrowError::General(format!( "invalid cast to type {dt:?}" ))), @@ -219,10 +226,21 @@ impl Cast for GeometryCollectionArray { } } +impl Cast for GeometryArray { + type Output = Result>; + + fn cast(&self, to_type: NativeType) -> Self::Output { + // TODO: validate dimension + let array = self.to_coord_type(to_type.coord_type()); + let mixed_array = MixedGeometryArray::try_from(array)?; + mixed_array.cast(to_type) + } +} + impl Cast for &dyn NativeArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { // TODO: not working :/ // if self.data_type() == to_type { // return Ok(Arc::new(self.to_owned())); @@ -239,56 +257,18 @@ impl Cast for &dyn NativeArray { MultiPolygon(_, _) => self.as_ref().as_multi_polygon().cast(to_type), Mixed(_, _) => self.as_ref().as_mixed().cast(to_type), GeometryCollection(_, _) => self.as_ref().as_geometry_collection().cast(to_type), + Geometry(_) => self.as_ref().as_geometry().cast(to_type), _ => todo!(), } } } -macro_rules! impl_chunked_cast_non_generic { - ($chunked_array:ty) => { - impl Cast for $chunked_array { - type Output = Result>; - - fn cast(&self, to_type: &NativeType) -> Self::Output { - macro_rules! impl_cast { - ($method:ident) => { - Arc::new(ChunkedGeometryArray::new( - self.geometry_chunks() - .iter() - .map(|chunk| { - Ok(chunk.as_ref().cast(to_type)?.as_ref().$method().clone()) - }) - .collect::>>()?, - )) - }; - } - - use NativeType::*; - - let result: Arc = match to_type { - Point(_, _) => impl_cast!(as_point), - LineString(_, _) => impl_cast!(as_line_string), - Polygon(_, _) => impl_cast!(as_polygon), - MultiPoint(_, _) => impl_cast!(as_multi_point), - MultiLineString(_, _) => impl_cast!(as_multi_line_string), - MultiPolygon(_, _) => impl_cast!(as_multi_polygon), - Mixed(_, _) => impl_cast!(as_mixed), - GeometryCollection(_, _) => impl_cast!(as_geometry_collection), - Rect(_) => impl_cast!(as_rect), - Geometry(_) => todo!("cast to unknown"), - }; - Ok(result) - } - } - }; -} - -macro_rules! impl_chunked_cast_generic { +macro_rules! impl_chunked_cast { ($chunked_array:ty) => { impl Cast for $chunked_array { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { macro_rules! impl_cast { ($method:ident) => { Arc::new(ChunkedGeometryArray::new( @@ -314,7 +294,7 @@ macro_rules! impl_chunked_cast_generic { Mixed(_, _) => impl_cast!(as_mixed), GeometryCollection(_, _) => impl_cast!(as_geometry_collection), Rect(_) => impl_cast!(as_rect), - Geometry(_) => todo!("cast to unknown"), + Geometry(_) => impl_cast!(as_geometry), }; Ok(result) } @@ -322,13 +302,14 @@ macro_rules! impl_chunked_cast_generic { }; } -impl_chunked_cast_non_generic!(ChunkedPointArray); -impl_chunked_cast_non_generic!(ChunkedRectArray); -impl_chunked_cast_non_generic!(&dyn ChunkedNativeArray); -impl_chunked_cast_generic!(ChunkedLineStringArray); -impl_chunked_cast_generic!(ChunkedPolygonArray); -impl_chunked_cast_generic!(ChunkedMultiPointArray); -impl_chunked_cast_generic!(ChunkedMultiLineStringArray); -impl_chunked_cast_generic!(ChunkedMultiPolygonArray); -impl_chunked_cast_generic!(ChunkedMixedGeometryArray); -impl_chunked_cast_generic!(ChunkedGeometryCollectionArray); +impl_chunked_cast!(ChunkedPointArray); +impl_chunked_cast!(ChunkedRectArray); +impl_chunked_cast!(&dyn ChunkedNativeArray); +impl_chunked_cast!(ChunkedLineStringArray); +impl_chunked_cast!(ChunkedPolygonArray); +impl_chunked_cast!(ChunkedMultiPointArray); +impl_chunked_cast!(ChunkedMultiLineStringArray); +impl_chunked_cast!(ChunkedMultiPolygonArray); +impl_chunked_cast!(ChunkedMixedGeometryArray); +impl_chunked_cast!(ChunkedGeometryCollectionArray); +impl_chunked_cast!(ChunkedUnknownGeometryArray); diff --git a/rust/geoarrow/src/algorithm/native/downcast.rs b/rust/geoarrow/src/algorithm/native/downcast.rs index cad3c47a..5704afe8 100644 --- a/rust/geoarrow/src/algorithm/native/downcast.rs +++ b/rust/geoarrow/src/algorithm/native/downcast.rs @@ -7,8 +7,6 @@ use arrow_array::OffsetSizeTrait; use arrow_buffer::OffsetBuffer; use crate::algorithm::native::cast::Cast; -use crate::array::offset_builder::OffsetsBuilder; -use crate::array::util::OffsetBufferUtils; use crate::array::*; use crate::chunked_array::*; use crate::datatypes::{Dimension, NativeType}; @@ -18,6 +16,8 @@ use crate::table::Table; use crate::NativeArray; /// Downcast will change between geometry types but will not affect the dimension of the data. +/// +/// Downcast will not change the coordinate type of the data. pub trait Downcast { type Output; @@ -49,61 +49,30 @@ impl Downcast for PointArray { } } -/// Returns `true` if this offsets buffer is type `i64` and would fit in an `i32` -/// -/// If the offset type `O` is already `i32`, will return false -#[allow(dead_code)] -fn can_downcast_offsets_i32(buffer: &OffsetBuffer) -> bool { - if O::IS_LARGE { - buffer.last().to_usize().unwrap() < i32::MAX as usize - } else { - false - } -} - -/// Downcast an i64 offset buffer to i32 -/// -/// This copies the buffer into an i32 -#[allow(dead_code)] -fn downcast_offsets(buffer: &OffsetBuffer) -> OffsetBuffer { - if O::IS_LARGE { - let mut builder = OffsetsBuilder::with_capacity(buffer.len_proxy()); - buffer - .iter() - .for_each(|x| builder.try_push(x.to_usize().unwrap() as i32).unwrap()); - builder.finish() - } else { - // This function should never be called when offsets are i32 - unreachable!() - } -} - /// Returns `true` if this Multi-geometry array can fit into a non-multi array /// /// Note that we can't just check the value of the last offset, because there could be a null /// element with length 0 and then a multi point of length 2. We need to check that every offset is /// <= 1. +/// +/// Also note that for now, we explicitly check `== 1` instead of `<= 1`. Having an offset of +/// length 0 means that the geometry is empty, and the cast functionality would need to handle +/// that. pub(crate) fn can_downcast_multi(buffer: &OffsetBuffer) -> bool { buffer .windows(2) - .all(|slice| *slice.get(1).unwrap() - *slice.first().unwrap() <= O::one()) + .all(|slice| *slice.get(1).unwrap() - *slice.first().unwrap() == O::one()) } impl Downcast for LineStringArray { type Output = Arc; fn downcasted_data_type(&self) -> NativeType { - match self.data_type() { - NativeType::LineString(ct, dim) => NativeType::LineString(ct, dim), - _ => unreachable!(), - } + self.data_type() } fn downcast(&self) -> Self::Output { - match (self.data_type(), self.downcasted_data_type()) { - (NativeType::LineString(_, _), NativeType::LineString(_, _)) => Arc::new(self.clone()), - _ => unreachable!(), - } + Arc::new(self.clone()) } } @@ -111,10 +80,7 @@ impl Downcast for PolygonArray { type Output = Arc; fn downcasted_data_type(&self) -> NativeType { - match self.data_type() { - NativeType::Polygon(ct, dim) => NativeType::Polygon(ct, dim), - _ => unreachable!(), - } + self.data_type() } fn downcast(&self) -> Self::Output { @@ -138,16 +104,11 @@ impl Downcast for MultiPointArray { } } fn downcast(&self) -> Self::Output { - // Note: this won't allow a downcast for empty MultiPoints - if *self.geom_offsets.last() as usize == self.len() { - return Arc::new(PointArray::new( - self.coords.clone(), - self.validity.clone(), - self.metadata(), - )); + if let Ok(array) = PointArray::try_from(self.clone()) { + Arc::new(array) + } else { + Arc::new(self.clone()) } - - Arc::new(self.clone()) } } @@ -168,16 +129,11 @@ impl Downcast for MultiLineStringArray { } fn downcast(&self) -> Self::Output { - if *self.geom_offsets.last() as usize == self.len() { - return Arc::new(LineStringArray::new( - self.coords.clone(), - self.ring_offsets.clone(), - self.validity.clone(), - self.metadata(), - )); + if let Ok(array) = LineStringArray::try_from(self.clone()) { + Arc::new(array) + } else { + Arc::new(self.clone()) } - - Arc::new(self.clone()) } } @@ -198,58 +154,36 @@ impl Downcast for MultiPolygonArray { } fn downcast(&self) -> Self::Output { - if *self.geom_offsets.last() as usize == self.len() { - return Arc::new(PolygonArray::new( - self.coords.clone(), - self.polygon_offsets.clone(), - self.ring_offsets.clone(), - self.validity.clone(), - self.metadata(), - )); + if let Ok(array) = PolygonArray::try_from(self.clone()) { + Arc::new(array) + } else { + Arc::new(self.clone()) } - - Arc::new(self.clone()) } } +// Note: this will not downcast on sliced data when it otherwise could, because the children +// haven't been sliced, just the offsets. So it still looks like the children have data. impl Downcast for MixedGeometryArray { - type Output = Arc; + type Output = Result>; fn downcasted_data_type(&self) -> NativeType { - let coord_type = self.coord_type(); - - if self.has_points() - && !self.has_line_strings() - && !self.has_polygons() - && !self.has_multi_points() - && !self.has_multi_line_strings() - && !self.has_multi_polygons() - { - return NativeType::Point(coord_type, Dimension::XY); - } - - if !self.has_points() - && self.has_line_strings() - && !self.has_polygons() - && !self.has_multi_points() - && !self.has_multi_line_strings() - && !self.has_multi_polygons() - { - return self.line_strings.downcasted_data_type(); - } - - if !self.has_points() - && !self.has_line_strings() - && self.has_polygons() - && !self.has_multi_points() - && !self.has_multi_line_strings() - && !self.has_multi_polygons() - { - return self.polygons.downcasted_data_type(); + let types = self.contained_types(); + if types.len() == 1 { + let typ = *types.iter().next().unwrap(); + + // Only has non-multi geometry children + if matches!(typ, NativeType::Point(_, _)) + || matches!(typ, NativeType::LineString(_, _)) + || matches!(typ, NativeType::Polygon(_, _)) + { + return typ; + } } - if !self.has_points() - && !self.has_line_strings() + // Whether or not we have the single-geom type, if we only otherwise have the multi-geom + // type, then we can downcast if we can downcast the multi-geom type. + if !self.has_line_strings() && !self.has_polygons() && self.has_multi_points() && !self.has_multi_line_strings() @@ -259,7 +193,6 @@ impl Downcast for MixedGeometryArray { } if !self.has_points() - && !self.has_line_strings() && !self.has_polygons() && !self.has_multi_points() && self.has_multi_line_strings() @@ -270,7 +203,6 @@ impl Downcast for MixedGeometryArray { if !self.has_points() && !self.has_line_strings() - && !self.has_polygons() && !self.has_multi_points() && !self.has_multi_line_strings() && self.has_multi_polygons() @@ -282,85 +214,24 @@ impl Downcast for MixedGeometryArray { } fn downcast(&self) -> Self::Output { - // TODO: do I need to handle the slice offset? - if self.has_points() - && !self.has_line_strings() - && !self.has_polygons() - && !self.has_multi_points() - && !self.has_multi_line_strings() - && !self.has_multi_polygons() - { - return Arc::new(self.points.clone()); - } - - if !self.has_points() - && self.has_line_strings() - && !self.has_polygons() - && !self.has_multi_points() - && !self.has_multi_line_strings() - && !self.has_multi_polygons() - { - return self.line_strings.downcast(); - } - - if !self.has_points() - && !self.has_line_strings() - && self.has_polygons() - && !self.has_multi_points() - && !self.has_multi_line_strings() - && !self.has_multi_polygons() - { - return self.polygons.downcast(); - } - - if !self.has_points() - && !self.has_line_strings() - && !self.has_polygons() - && self.has_multi_points() - && !self.has_multi_line_strings() - && !self.has_multi_polygons() - { - return self.multi_points.downcast(); - } - - if !self.has_points() - && !self.has_line_strings() - && !self.has_polygons() - && !self.has_multi_points() - && self.has_multi_line_strings() - && !self.has_multi_polygons() - { - return self.multi_line_strings.downcast(); - } - - if !self.has_points() - && !self.has_line_strings() - && !self.has_polygons() - && !self.has_multi_points() - && !self.has_multi_line_strings() - && self.has_multi_polygons() - { - return self.multi_polygons.downcast(); - } - - Arc::new(self.clone()) + self.cast(self.downcasted_data_type()) } } impl Downcast for GeometryCollectionArray { - type Output = Arc; + type Output = Result>; fn downcasted_data_type(&self) -> NativeType { - todo!() - } - fn downcast(&self) -> Self::Output { // TODO: support downcasting with null elements - if *self.geom_offsets.last() as usize == self.len() && self.null_count() == 0 { - // Call downcast on the mixed array - return self.array.downcast(); + if can_downcast_multi(&self.geom_offsets) && self.null_count() == 0 { + self.array.downcasted_data_type() + } else { + self.data_type() } + } - Arc::new(self.clone()) + fn downcast(&self) -> Self::Output { + self.cast(self.downcasted_data_type()) } } @@ -370,13 +241,30 @@ impl Downcast for RectArray { fn downcasted_data_type(&self) -> NativeType { self.data_type() } + fn downcast(&self) -> Self::Output { Arc::new(self.clone()) } } +impl Downcast for GeometryArray { + type Output = Result>; + + fn downcasted_data_type(&self) -> NativeType { + if let Ok(mixed_array) = MixedGeometryArray::try_from(self.clone()) { + mixed_array.downcasted_data_type() + } else { + self.data_type() + } + } + + fn downcast(&self) -> Self::Output { + self.cast(self.downcasted_data_type()) + } +} + impl Downcast for &dyn NativeArray { - type Output = Arc; + type Output = Result>; fn downcasted_data_type(&self) -> NativeType { use NativeType::*; @@ -391,7 +279,7 @@ impl Downcast for &dyn NativeArray { Mixed(_, _) => self.as_mixed().downcasted_data_type(), GeometryCollection(_, _) => self.as_geometry_collection().downcasted_data_type(), Rect(_) => self.as_rect().downcasted_data_type(), - _ => todo!("3d support"), + Geometry(_) => self.as_geometry().downcasted_data_type(), } } @@ -399,16 +287,16 @@ impl Downcast for &dyn NativeArray { use NativeType::*; match self.data_type() { - Point(_, _) => self.as_point().downcast(), - LineString(_, _) => self.as_line_string().downcast(), - Polygon(_, _) => self.as_polygon().downcast(), - MultiPoint(_, _) => self.as_multi_point().downcast(), - MultiLineString(_, _) => self.as_multi_line_string().downcast(), - MultiPolygon(_, _) => self.as_multi_polygon().downcast(), + Point(_, _) => Ok(self.as_point().downcast()), + LineString(_, _) => Ok(self.as_line_string().downcast()), + Polygon(_, _) => Ok(self.as_polygon().downcast()), + MultiPoint(_, _) => Ok(self.as_multi_point().downcast()), + MultiLineString(_, _) => Ok(self.as_multi_line_string().downcast()), + MultiPolygon(_, _) => Ok(self.as_multi_polygon().downcast()), Mixed(_, _) => self.as_mixed().downcast(), GeometryCollection(_, _) => self.as_geometry_collection().downcast(), - Rect(_) => self.as_rect().downcast(), - _ => todo!("3d support"), + Rect(_) => Ok(self.as_rect().downcast()), + Geometry(_) => self.as_geometry().downcast(), } } } @@ -416,35 +304,62 @@ impl Downcast for &dyn NativeArray { /// Given a set of types, return a single type that the result should be casted to fn resolve_types(types: &HashSet) -> NativeType { if types.is_empty() { + // TODO: error here panic!("empty types"); - } else if types.len() == 1 { - *types.iter().next().unwrap() - } else if types.len() == 2 { - let mut extension_name_set = HashSet::new(); - // let mut coord_types = HashSet::new(); - types.iter().for_each(|t| { - extension_name_set.insert(t.extension_name()); - }); - if extension_name_set.contains("geoarrow.point") - && extension_name_set.contains("geoarrow.multipoint") + } + + // If only one type, we can cast to that. + if types.len() == 1 { + return *types.iter().next().unwrap(); + } + + // If Geometry is in the type set, short circuit to that. + if types.contains(&NativeType::Geometry(CoordType::Interleaved)) { + return NativeType::Geometry(CoordType::Interleaved); + } else if types.contains(&NativeType::Geometry(CoordType::Separated)) { + return NativeType::Geometry(CoordType::Separated); + } + + // Since we don't have NativeType::Geometry, dimension should never be null + let dimensions: HashSet = + HashSet::from_iter(types.iter().map(|ty| ty.dimension().unwrap())); + let coord_types: HashSet = + HashSet::from_iter(types.iter().map(|ty| ty.coord_type())); + + // Just take the first one + let coord_type = *coord_types.iter().next().unwrap(); + + // For data with multiple dimensions, we must cast to GeometryArray + if dimensions.len() > 1 { + return NativeType::Geometry(coord_type); + } + // Otherwise, we have just one dimension + let dimension = *dimensions.iter().next().unwrap(); + + // We want to compare geometry types without looking at dimension or coord type. This is a + // slight hack but for now we do that by the string geometry type. + let geometry_type_names: HashSet<&str> = + HashSet::from_iter(types.iter().map(|x| x.extension_name())); + + if geometry_type_names.len() == 2 { + if geometry_type_names.contains("geoarrow.point") + && geometry_type_names.contains("geoarrow.multipoint") { - NativeType::MultiPoint(Default::default(), Dimension::XY) - } else if extension_name_set.contains("geoarrow.linestring") - && extension_name_set.contains("geoarrow.multilinestring") + return NativeType::MultiPoint(coord_type, dimension); + } else if geometry_type_names.contains("geoarrow.linestring") + && geometry_type_names.contains("geoarrow.multilinestring") { - NativeType::MultiLineString(Default::default(), Dimension::XY) - } else if extension_name_set.contains("geoarrow.polygon") - && extension_name_set.contains("geoarrow.multipolygon") + return NativeType::MultiLineString(coord_type, dimension); + } else if geometry_type_names.contains("geoarrow.polygon") + && geometry_type_names.contains("geoarrow.multipolygon") { - NativeType::MultiPolygon(Default::default(), Dimension::XY) - } else if extension_name_set.contains("geoarrow.geometrycollection") { - NativeType::GeometryCollection(Default::default(), Dimension::XY) - } else { - NativeType::Mixed(Default::default(), Dimension::XY) + return NativeType::MultiPolygon(coord_type, dimension); + } else if geometry_type_names.contains("geoarrow.geometrycollection") { + return NativeType::GeometryCollection(coord_type, dimension); } - } else { - NativeType::Mixed(Default::default(), Dimension::XY) } + + NativeType::Geometry(coord_type) } impl Downcast for ChunkedPointArray { @@ -477,7 +392,7 @@ macro_rules! impl_chunked_downcast { return Arc::new(self.clone()); } - self.cast(&to_data_type).unwrap() + self.cast(to_data_type).unwrap() } } }; @@ -490,6 +405,7 @@ impl_chunked_downcast!(ChunkedMultiLineStringArray); impl_chunked_downcast!(ChunkedMultiPolygonArray); impl_chunked_downcast!(ChunkedMixedGeometryArray); impl_chunked_downcast!(ChunkedGeometryCollectionArray); +impl_chunked_downcast!(ChunkedUnknownGeometryArray); impl Downcast for ChunkedRectArray { type Output = Arc; @@ -506,38 +422,36 @@ impl Downcast for &dyn ChunkedNativeArray { type Output = Arc; fn downcasted_data_type(&self) -> NativeType { - use Dimension::*; use NativeType::*; match self.data_type() { - Point(_, XY) => self.as_point().downcasted_data_type(), - LineString(_, XY) => self.as_line_string().downcasted_data_type(), - Polygon(_, XY) => self.as_polygon().downcasted_data_type(), - MultiPoint(_, XY) => self.as_multi_point().downcasted_data_type(), - MultiLineString(_, XY) => self.as_multi_line_string().downcasted_data_type(), - MultiPolygon(_, XY) => self.as_multi_polygon().downcasted_data_type(), - Mixed(_, XY) => self.as_mixed().downcasted_data_type(), - GeometryCollection(_, XY) => self.as_geometry_collection().downcasted_data_type(), - Rect(XY) => self.as_rect().downcasted_data_type(), - _ => todo!("3d support"), + Point(_, _) => self.as_point().downcasted_data_type(), + LineString(_, _) => self.as_line_string().downcasted_data_type(), + Polygon(_, _) => self.as_polygon().downcasted_data_type(), + MultiPoint(_, _) => self.as_multi_point().downcasted_data_type(), + MultiLineString(_, _) => self.as_multi_line_string().downcasted_data_type(), + MultiPolygon(_, _) => self.as_multi_polygon().downcasted_data_type(), + Mixed(_, _) => self.as_mixed().downcasted_data_type(), + GeometryCollection(_, _) => self.as_geometry_collection().downcasted_data_type(), + Rect(_) => self.as_rect().downcasted_data_type(), + Geometry(_) => self.as_geometry().downcasted_data_type(), } } fn downcast(&self) -> Self::Output { - use Dimension::*; use NativeType::*; match self.data_type() { - Point(_, XY) => self.as_point().downcast(), - LineString(_, XY) => self.as_line_string().downcast(), - Polygon(_, XY) => self.as_polygon().downcast(), - MultiPoint(_, XY) => self.as_multi_point().downcast(), - MultiLineString(_, XY) => self.as_multi_line_string().downcast(), - MultiPolygon(_, XY) => self.as_multi_polygon().downcast(), - Mixed(_, XY) => self.as_mixed().downcast(), - GeometryCollection(_, XY) => self.as_geometry_collection().downcast(), - Rect(XY) => self.as_rect().downcast(), - _ => todo!("3d support"), + Point(_, _) => self.as_point().downcast(), + LineString(_, _) => self.as_line_string().downcast(), + Polygon(_, _) => self.as_polygon().downcast(), + MultiPoint(_, _) => self.as_multi_point().downcast(), + MultiLineString(_, _) => self.as_multi_line_string().downcast(), + MultiPolygon(_, _) => self.as_multi_polygon().downcast(), + Mixed(_, _) => self.as_mixed().downcast(), + GeometryCollection(_, _) => self.as_geometry_collection().downcast(), + Rect(_) => self.as_rect().downcast(), + Geometry(_) => self.as_geometry().downcast(), } } } diff --git a/rust/geoarrow/src/array/geometry/array.rs b/rust/geoarrow/src/array/geometry/array.rs index 81a9ecc8..cd804ff6 100644 --- a/rust/geoarrow/src/array/geometry/array.rs +++ b/rust/geoarrow/src/array/geometry/array.rs @@ -9,8 +9,8 @@ use crate::array::geometry::GeometryBuilder; use crate::array::geometry::GeometryCapacity; use crate::array::metadata::ArrayMetadata; use crate::array::{ - CoordType, GeometryCollectionArray, LineStringArray, MultiLineStringArray, MultiPointArray, - MultiPolygonArray, PointArray, PolygonArray, WKBArray, + CoordType, GeometryCollectionArray, LineStringArray, MixedGeometryArray, MultiLineStringArray, + MultiPointArray, MultiPolygonArray, PointArray, PolygonArray, WKBArray, }; use crate::datatypes::{Dimension, NativeType}; use crate::error::{GeoArrowError, Result}; @@ -181,6 +181,7 @@ impl GeometryArray { ) } + // TODO: handle slicing pub fn has_points(&self, dim: Dimension) -> bool { match dim { Dimension::XY => !self.point_xy.is_empty(), @@ -223,6 +224,13 @@ impl GeometryArray { } } + pub fn has_geometry_collections(&self, dim: Dimension) -> bool { + match dim { + Dimension::XY => !self.gc_xy.is_empty(), + Dimension::XYZ => !self.gc_xyz.is_empty(), + } + } + /// Return `true` if this array holds at least one geometry array of the given dimension pub fn has_dimension(&self, dim: Dimension) -> bool { use Dimension::*; @@ -256,6 +264,9 @@ impl GeometryArray { } } + // Handle sliced data before downcasting. + // pub fn compact_children() + // /// The number of non-empty child arrays // fn num_non_empty_children(&self) -> usize { // let mut count = 0; @@ -446,6 +457,56 @@ impl GeometryArray { self.metadata, ) } + + // TODO: recursively expand the types from the geometry collection array + pub fn contained_types(&self) -> HashSet { + let mut types = HashSet::new(); + if self.has_points(Dimension::XY) { + types.insert(self.point_xy.data_type()); + } + if self.has_line_strings(Dimension::XY) { + types.insert(self.line_string_xy.data_type()); + } + if self.has_polygons(Dimension::XY) { + types.insert(self.polygon_xy.data_type()); + } + if self.has_multi_points(Dimension::XY) { + types.insert(self.mpoint_xy.data_type()); + } + if self.has_multi_line_strings(Dimension::XY) { + types.insert(self.mline_string_xy.data_type()); + } + if self.has_multi_polygons(Dimension::XY) { + types.insert(self.mpolygon_xy.data_type()); + } + if self.has_geometry_collections(Dimension::XY) { + types.insert(self.gc_xy.data_type()); + } + + if self.has_points(Dimension::XYZ) { + types.insert(self.point_xyz.data_type()); + } + if self.has_line_strings(Dimension::XYZ) { + types.insert(self.line_string_xyz.data_type()); + } + if self.has_polygons(Dimension::XYZ) { + types.insert(self.polygon_xyz.data_type()); + } + if self.has_multi_points(Dimension::XYZ) { + types.insert(self.mpoint_xyz.data_type()); + } + if self.has_multi_line_strings(Dimension::XYZ) { + types.insert(self.mline_string_xyz.data_type()); + } + if self.has_multi_polygons(Dimension::XYZ) { + types.insert(self.mpolygon_xyz.data_type()); + } + if self.has_geometry_collections(Dimension::XYZ) { + types.insert(self.gc_xyz.data_type()); + } + + types + } } impl ArrayBase for GeometryArray { @@ -867,155 +928,104 @@ impl TryFrom> for GeometryArray { } } -// impl From for GeometryArray { -// fn from(value: PointArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![1; value.len()], -// Dimension::XYZ => vec![11; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// value, -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// metadata, -// ) -// } -// } - -// impl From for GeometryArray { -// fn from(value: LineStringArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![2; value.len()], -// Dimension::XYZ => vec![12; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// Default::default(), -// value, -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// metadata, -// ) -// } -// } - -// impl From for GeometryArray { -// fn from(value: PolygonArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![3; value.len()], -// Dimension::XYZ => vec![13; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// Default::default(), -// Default::default(), -// value, -// Default::default(), -// Default::default(), -// Default::default(), -// metadata, -// ) -// } -// } - -// impl From for GeometryArray { -// fn from(value: MultiPointArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![4; value.len()], -// Dimension::XYZ => vec![14; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// Default::default(), -// Default::default(), -// Default::default(), -// value, -// Default::default(), -// Default::default(), -// metadata, -// ) -// } -// } - -// impl From for GeometryArray { -// fn from(value: MultiLineStringArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![5; value.len()], -// Dimension::XYZ => vec![15; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// value, -// Default::default(), -// metadata, -// ) -// } -// } - -// impl From for GeometryArray { -// fn from(value: MultiPolygonArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![6; value.len()], -// Dimension::XYZ => vec![16; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// value, -// metadata, -// ) -// } -// } - -// impl TryFrom for GeometryArray { -// type Error = GeoArrowError; - -// fn try_from(value: GeometryCollectionArray) -> std::result::Result { -// if !can_downcast_multi(&value.geom_offsets) { -// return Err(GeoArrowError::General("Unable to cast".to_string())); -// } - -// if value.null_count() > 0 { -// return Err(GeoArrowError::General( -// "Unable to cast with nulls".to_string(), -// )); -// } - -// Ok(value.array) -// } -// } +macro_rules! impl_to_geometry_array { + ($source_array:ty, $typeid_xy:expr, $typeid_xyz:expr, $child_xy:ident, $child_xyz:ident) => { + impl From<$source_array> for GeometryArray { + fn from(value: $source_array) -> Self { + let dim = value.dimension(); + let type_ids = match dim { + Dimension::XY => vec![$typeid_xy; value.len()], + Dimension::XYZ => vec![$typeid_xyz; value.len()], + }; + let mut slf = Self { + data_type: NativeType::Geometry(value.coord_type()), + metadata: value.metadata().clone(), + type_ids: type_ids.into(), + offsets: ScalarBuffer::from_iter(0..value.len() as i32), + ..Default::default() + }; + match dim { + Dimension::XY => { + slf.$child_xy = value; + } + Dimension::XYZ => { + slf.$child_xyz = value; + } + } + slf + } + } + }; +} + +impl_to_geometry_array!(PointArray, 1, 11, point_xy, point_xyz); +impl_to_geometry_array!(LineStringArray, 1, 11, line_string_xy, line_string_xy); +impl_to_geometry_array!(PolygonArray, 1, 11, polygon_xy, polygon_xyz); +impl_to_geometry_array!(MultiPointArray, 1, 11, mpoint_xy, mpoint_xyz); +impl_to_geometry_array!( + MultiLineStringArray, + 1, + 11, + mline_string_xy, + mline_string_xyz +); +impl_to_geometry_array!(MultiPolygonArray, 1, 11, mpolygon_xy, mpolygon_xyz); +impl_to_geometry_array!(GeometryCollectionArray, 1, 11, gc_xy, gc_xyz); + +impl TryFrom for MixedGeometryArray { + type Error = GeoArrowError; + + /// Will error if: + /// + /// - the contained geometries are not all of the same dimension + /// - any geometry collection child exists + fn try_from(value: GeometryArray) -> std::result::Result { + if value.has_only_dimension(Dimension::XY) { + if value.gc_xy.is_empty() { + Ok(MixedGeometryArray::new( + value.type_ids, + value.offsets, + value.point_xy, + value.line_string_xy, + value.polygon_xy, + value.mpoint_xy, + value.mline_string_xy, + value.mpolygon_xy, + value.metadata, + )) + } else { + Err(GeoArrowError::General( + "Cannot cast to MixedGeometryArray with non-empty GeometryCollection child." + .to_string(), + )) + } + } else if value.has_only_dimension(Dimension::XYZ) { + if value.gc_xyz.is_empty() { + Ok(MixedGeometryArray::new( + value.type_ids, + value.offsets, + value.point_xyz, + value.line_string_xyz, + value.polygon_xyz, + value.mpoint_xyz, + value.mline_string_xyz, + value.mpolygon_xyz, + value.metadata, + )) + } else { + Err(GeoArrowError::General( + "Cannot cast to MixedGeometryArray with non-empty GeometryCollection child." + .to_string(), + )) + } + } else { + Err(GeoArrowError::General( + "Cannot cast to MixedGeometryArray when GeometryArray contains multiple dimensions" + .to_string(), + )) + } + } +} /// Default to an empty array impl Default for GeometryArray { diff --git a/rust/geoarrow/src/array/linestring/array.rs b/rust/geoarrow/src/array/linestring/array.rs index 8c400c17..a08f73c1 100644 --- a/rust/geoarrow/src/array/linestring/array.rs +++ b/rust/geoarrow/src/array/linestring/array.rs @@ -477,12 +477,13 @@ impl TryFrom for LineStringArray { return Err(GeoArrowError::General("Unable to cast".to_string())); } + let (offset, length) = value.slice_offset_length(); if value.has_only_line_strings() { - return Ok(value.line_strings); + return Ok(value.line_strings.slice(offset, length)); } if value.has_only_multi_line_strings() { - return value.multi_line_strings.try_into(); + return value.multi_line_strings.slice(offset, length).try_into(); } let mut capacity = value.line_strings.buffer_lengths(); diff --git a/rust/geoarrow/src/array/mixed/array.rs b/rust/geoarrow/src/array/mixed/array.rs index cbd1d49c..0f2d4be3 100644 --- a/rust/geoarrow/src/array/mixed/array.rs +++ b/rust/geoarrow/src/array/mixed/array.rs @@ -73,6 +73,10 @@ pub struct MixedGeometryArray { pub(crate) multi_points: MultiPointArray, pub(crate) multi_line_strings: MultiLineStringArray, pub(crate) multi_polygons: MultiPolygonArray, + + /// We don't need a separate slice_length, because that's the length of the full + /// MixedGeometryArray + slice_offset: usize, } impl MixedGeometryArray { @@ -121,7 +125,6 @@ impl MixedGeometryArray { let dim = dimensions.into_iter().next().unwrap(); let data_type = NativeType::Mixed(coord_type, dim); - Self { data_type, type_ids, @@ -133,6 +136,7 @@ impl MixedGeometryArray { multi_line_strings, multi_polygons, metadata, + slice_offset: 0, } } @@ -148,28 +152,143 @@ impl MixedGeometryArray { ) } + /// Return `true` if this array has been sliced. + pub(crate) fn is_sliced(&self) -> bool { + // Note this is still not a valid check, because it could've been sliced with start 0 but + // length less than the full length. + // self.slice_offset > 0 || self.slice_length + + let mut child_lengths = 0; + child_lengths += self.points.len(); + child_lengths += self.line_strings.len(); + child_lengths += self.polygons.len(); + child_lengths += self.multi_points.len(); + child_lengths += self.multi_line_strings.len(); + child_lengths += self.multi_polygons.len(); + + child_lengths > self.len() + } + + /// The offset and length by which this array has been sliced. + /// + /// If this array has not been sliced, the slice offset will be `0`. The length will always be + /// equal to `self.len()`. + pub(crate) fn slice_offset_length(&self) -> (usize, usize) { + (self.slice_offset, self.len()) + } + pub fn has_points(&self) -> bool { - !self.points.is_empty() + if self.points.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == 1 { + return true; + } + } + + return false; + } + + true } pub fn has_line_strings(&self) -> bool { - !self.line_strings.is_empty() + if self.line_strings.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == 2 { + return true; + } + } + + return false; + } + + true } pub fn has_polygons(&self) -> bool { - !self.polygons.is_empty() + if self.polygons.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == 3 { + return true; + } + } + + return false; + } + + true } pub fn has_multi_points(&self) -> bool { - !self.multi_points.is_empty() + if self.multi_points.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == 4 { + return true; + } + } + + return false; + } + + true } pub fn has_multi_line_strings(&self) -> bool { - !self.multi_line_strings.is_empty() + if self.multi_line_strings.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == 5 { + return true; + } + } + + return false; + } + + true } pub fn has_multi_polygons(&self) -> bool { - !self.multi_polygons.is_empty() + if self.multi_polygons.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == 6 { + return true; + } + } + + return false; + } + + true } pub fn has_only_points(&self) -> bool { @@ -257,6 +376,7 @@ impl MixedGeometryArray { multi_line_strings: self.multi_line_strings.clone(), multi_polygons: self.multi_polygons.clone(), metadata: self.metadata.clone(), + slice_offset: self.slice_offset + offset, } } @@ -277,6 +397,30 @@ impl MixedGeometryArray { self.metadata, ) } + + pub fn contained_types(&self) -> HashSet { + let mut types = HashSet::new(); + if self.has_points() { + types.insert(self.points.data_type()); + } + if self.has_line_strings() { + types.insert(self.line_strings.data_type()); + } + if self.has_polygons() { + types.insert(self.polygons.data_type()); + } + if self.has_multi_points() { + types.insert(self.multi_points.data_type()); + } + if self.has_multi_line_strings() { + types.insert(self.multi_line_strings.data_type()); + } + if self.has_multi_polygons() { + types.insert(self.multi_polygons.data_type()); + } + + types + } } impl ArrayBase for MixedGeometryArray { diff --git a/rust/geoarrow/src/array/multilinestring/array.rs b/rust/geoarrow/src/array/multilinestring/array.rs index d9ae018e..517df2e3 100644 --- a/rust/geoarrow/src/array/multilinestring/array.rs +++ b/rust/geoarrow/src/array/multilinestring/array.rs @@ -513,12 +513,13 @@ impl TryFrom for MultiLineStringArray { return Err(GeoArrowError::General("Unable to cast".to_string())); } + let (offset, length) = value.slice_offset_length(); if value.has_only_line_strings() { - return Ok(value.line_strings.into()); + return Ok(value.line_strings.slice(offset, length).into()); } if value.has_only_multi_line_strings() { - return Ok(value.multi_line_strings); + return Ok(value.multi_line_strings.slice(offset, length)); } let mut capacity = value.multi_line_strings.buffer_lengths(); diff --git a/rust/geoarrow/src/array/multipoint/array.rs b/rust/geoarrow/src/array/multipoint/array.rs index 1722470e..06fe7862 100644 --- a/rust/geoarrow/src/array/multipoint/array.rs +++ b/rust/geoarrow/src/array/multipoint/array.rs @@ -457,12 +457,13 @@ impl TryFrom for MultiPointArray { return Err(GeoArrowError::General("Unable to cast".to_string())); } + let (offset, length) = value.slice_offset_length(); if value.has_only_points() { - return Ok(value.points.into()); + return Ok(value.points.slice(offset, length).into()); } if value.has_only_multi_points() { - return Ok(value.multi_points); + return Ok(value.multi_points.slice(offset, length)); } let mut capacity = value.multi_points.buffer_lengths(); diff --git a/rust/geoarrow/src/array/multipolygon/array.rs b/rust/geoarrow/src/array/multipolygon/array.rs index 5390ed2d..7d2d247b 100644 --- a/rust/geoarrow/src/array/multipolygon/array.rs +++ b/rust/geoarrow/src/array/multipolygon/array.rs @@ -595,12 +595,13 @@ impl TryFrom for MultiPolygonArray { return Err(GeoArrowError::General("Unable to cast".to_string())); } + let (offset, length) = value.slice_offset_length(); if value.has_only_polygons() { - return Ok(value.polygons.into()); + return Ok(value.polygons.slice(offset, length).into()); } if value.has_only_multi_polygons() { - return Ok(value.multi_polygons); + return Ok(value.multi_polygons.slice(offset, length)); } let mut capacity = value.multi_polygons.buffer_lengths(); diff --git a/rust/geoarrow/src/array/point/array.rs b/rust/geoarrow/src/array/point/array.rs index 24798f80..4fa11225 100644 --- a/rust/geoarrow/src/array/point/array.rs +++ b/rust/geoarrow/src/array/point/array.rs @@ -412,12 +412,13 @@ impl TryFrom for PointArray { return Err(GeoArrowError::General("Unable to cast".to_string())); } + let (offset, length) = value.slice_offset_length(); if value.has_only_points() { - return Ok(value.points); + return Ok(value.points.slice(offset, length)); } if value.has_only_multi_points() { - return value.multi_points.try_into(); + return value.multi_points.slice(offset, length).try_into(); } let mut builder = PointBuilder::with_capacity_and_options( diff --git a/rust/geoarrow/src/array/polygon/array.rs b/rust/geoarrow/src/array/polygon/array.rs index a1660c72..a254750f 100644 --- a/rust/geoarrow/src/array/polygon/array.rs +++ b/rust/geoarrow/src/array/polygon/array.rs @@ -552,12 +552,13 @@ impl TryFrom for PolygonArray { return Err(GeoArrowError::General("Unable to cast".to_string())); } + let (offset, length) = value.slice_offset_length(); if value.has_only_polygons() { - return Ok(value.polygons); + return Ok(value.polygons.slice(offset, length)); } if value.has_only_multi_polygons() { - return value.multi_polygons.try_into(); + return value.multi_polygons.slice(offset, length).try_into(); } let mut capacity = value.polygons.buffer_lengths(); diff --git a/rust/geoarrow/src/datatypes.rs b/rust/geoarrow/src/datatypes.rs index de9e8408..666756f3 100644 --- a/rust/geoarrow/src/datatypes.rs +++ b/rust/geoarrow/src/datatypes.rs @@ -469,7 +469,7 @@ impl NativeType { Mixed(_, _) => "geoarrow.geometry", GeometryCollection(_, _) => "geoarrow.geometrycollection", Rect(_) => "geoarrow.box", - Geometry(_) => "geoarrow.unknown", + Geometry(_) => "geoarrow.geometry", } } @@ -794,7 +794,7 @@ fn parse_multi_polygon(field: &Field) -> Result { } } -fn parse_geometry(field: &Field) -> Result { +fn parse_mixed(field: &Field) -> Result { match field.data_type() { DataType::Union(fields, _) => { let mut coord_types: HashSet = HashSet::new(); @@ -927,13 +927,13 @@ fn parse_geometry_collection(field: &Field) -> Result { // We need to parse the _inner_ type of the geometry collection as a union so that we can check // what coordinate type it's using. match field.data_type() { - DataType::List(inner_field) => match parse_geometry(inner_field)? { + DataType::List(inner_field) => match parse_mixed(inner_field)? { NativeType::Mixed(coord_type, dim) => { Ok(NativeType::GeometryCollection(coord_type, dim)) } _ => panic!(), }, - DataType::LargeList(inner_field) => match parse_geometry(inner_field)? { + DataType::LargeList(inner_field) => match parse_mixed(inner_field)? { NativeType::Mixed(coord_type, dim) => { Ok(NativeType::GeometryCollection(coord_type, dim)) } @@ -970,7 +970,7 @@ fn parse_rect(field: &Field) -> NativeType { } } -fn parse_unknown(field: &Field) -> Result { +fn parse_geometry(field: &Field) -> Result { if let DataType::Union(fields, _mode) = field.data_type() { let mut coord_types: HashSet = HashSet::new(); @@ -1090,10 +1090,11 @@ impl TryFrom<&Field> for NativeType { "geoarrow.multipoint" => parse_multi_point(field)?, "geoarrow.multilinestring" => parse_multi_linestring(field)?, "geoarrow.multipolygon" => parse_multi_polygon(field)?, - "geoarrow.geometry" => parse_geometry(field)?, "geoarrow.geometrycollection" => parse_geometry_collection(field)?, "geoarrow.box" => parse_rect(field), - "geoarrow.unknown" => parse_unknown(field)?, + "geoarrow.geometry" => parse_geometry(field)?, + // We always parse geoarrow.geometry to a GeometryArray + // "geoarrow.geometry" => parse_mixed(field)?, name => return Err(GeoArrowError::General(format!("Expected GeoArrow native type, got '{}'.\nIf you're passing a serialized GeoArrow type like 'geoarrow.wkb' or 'geoarrow.wkt', you need to parse to a native representation.", name))), }; Ok(data_type) @@ -1162,7 +1163,7 @@ impl TryFrom<&Field> for AnyType { #[cfg(test)] mod test { use super::*; - use crate::array::MixedGeometryBuilder; + use crate::array::GeometryBuilder; use crate::{ArrayBase, NativeArray}; #[test] @@ -1177,7 +1178,7 @@ mod test { let data_type: NativeType = field.as_ref().try_into().unwrap(); assert_eq!(ml_array.data_type(), data_type); - let mut builder = MixedGeometryBuilder::new(Dimension::XY); + let mut builder = GeometryBuilder::new(); builder.push_point(Some(&crate::test::point::p0())).unwrap(); builder.push_point(Some(&crate::test::point::p1())).unwrap(); builder.push_point(Some(&crate::test::point::p2())).unwrap(); @@ -1187,9 +1188,9 @@ mod test { builder .push_multi_line_string(Some(&crate::test::multilinestring::ml1())) .unwrap(); - let mixed_array = builder.finish(); - let field = mixed_array.extension_field(); + let geom_array = builder.finish(); + let field = geom_array.extension_field(); let data_type: NativeType = field.as_ref().try_into().unwrap(); - assert_eq!(mixed_array.data_type(), data_type); + assert_eq!(geom_array.data_type(), data_type); } } diff --git a/rust/geoarrow/src/io/wkb/api.rs b/rust/geoarrow/src/io/wkb/api.rs index b55b713c..95e4e0ea 100644 --- a/rust/geoarrow/src/io/wkb/api.rs +++ b/rust/geoarrow/src/io/wkb/api.rs @@ -116,7 +116,7 @@ impl FromWKB for Arc { arr.metadata(), true, )?; - Ok(builder.finish().downcast()) + builder.finish().downcast() } } @@ -303,7 +303,7 @@ impl ToWKB for &dyn ChunkedNativeArray { ChunkedGeometryArray::new(self.as_geometry_collection().map(|chunk| chunk.into())) } Rect(_) => todo!(), - Geometry(_) => ChunkedGeometryArray::new(self.as_mixed().map(|chunk| chunk.into())), + Geometry(_) => ChunkedGeometryArray::new(self.as_geometry().map(|chunk| chunk.into())), } } } @@ -356,9 +356,10 @@ mod test { true, ) .unwrap(); + let rt_ref = roundtrip.as_ref(); let rt_mixed_arr = rt_ref.as_mixed(); - let downcasted = rt_mixed_arr.downcast(); + let downcasted = rt_mixed_arr.downcast().unwrap(); let downcasted_ref = downcasted.as_ref(); let rt_point_arr = downcasted_ref.as_point(); assert_eq!(&arr, rt_point_arr); diff --git a/rust/geoarrow/src/table.rs b/rust/geoarrow/src/table.rs index 9bc87bb6..6052483f 100644 --- a/rust/geoarrow/src/table.rs +++ b/rust/geoarrow/src/table.rs @@ -29,7 +29,6 @@ pub(crate) static GEOARROW_EXTENSION_NAMES: Set<&'static str> = phf_set! { "geoarrow.geometrycollection", "geoarrow.wkb", "geoarrow.wkt", - "geoarrow.unknown", "ogc.wkb", }; @@ -153,10 +152,10 @@ impl Table { /// let index = table.default_geometry_column_idx().unwrap(); /// /// // Change to separated storage of coordinates - /// table.cast_geometry(index, &NativeType::LineString(CoordType::Separated, Dimension::XY)).unwrap(); + /// table.cast_geometry(index, NativeType::LineString(CoordType::Separated, Dimension::XY)).unwrap(); /// # } /// ``` - pub fn cast_geometry(&mut self, index: usize, to_type: &NativeType) -> Result<()> { + pub fn cast_geometry(&mut self, index: usize, to_type: NativeType) -> Result<()> { let orig_field = self.schema().field(index); let array_slices = self