From 649d74ca7a084e6867328451ac5b2d975817ca3e Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 20 Dec 2024 10:00:59 +0100 Subject: [PATCH] Remove `Chunk::iter_component_arrays` (#8548) This method makes no sense. It's a complete anti-pattern. The whole point of the Chunk level methods is to pay the cost of reflection/downcasting/deserialization once for the whole Chunk, this can never happen with the way this method is defined. I'm not sure what I was thinking back then. I likely wasn't. There is never a good reason to use this. --- crates/store/re_chunk/src/iter.rs | 34 ++-------------- crates/store/re_grpc_client/src/lib.rs | 10 +---- .../src/max_image_dimension_subscriber.rs | 40 +++++++++---------- 3 files changed, 23 insertions(+), 61 deletions(-) diff --git a/crates/store/re_chunk/src/iter.rs b/crates/store/re_chunk/src/iter.rs index c417e814c6ad..4bce90046356 100644 --- a/crates/store/re_chunk/src/iter.rs +++ b/crates/store/re_chunk/src/iter.rs @@ -2,9 +2,9 @@ use std::sync::Arc; use arrow2::{ array::{ - Array as Arrow2Array, BooleanArray as Arrow2BooleanArray, - FixedSizeListArray as Arrow2FixedSizeListArray, ListArray as Arrow2ListArray, - PrimitiveArray as Arrow2PrimitiveArray, Utf8Array as Arrow2Utf8Array, + BooleanArray as Arrow2BooleanArray, FixedSizeListArray as Arrow2FixedSizeListArray, + ListArray as Arrow2ListArray, PrimitiveArray as Arrow2PrimitiveArray, + Utf8Array as Arrow2Utf8Array, }, bitmap::Bitmap as Arrow2Bitmap, Either, @@ -200,27 +200,6 @@ impl Chunk { } } - /// Returns an iterator over the raw arrays of a [`Chunk`], for a given component. - /// - /// See also: - /// * [`Self::iter_primitive`] - /// * [`Self::iter_primitive_array`] - /// * [`Self::iter_primitive_array_list`] - /// * [`Self::iter_string`] - /// * [`Self::iter_buffer`]. - /// * [`Self::iter_component`]. - #[inline] - pub fn iter_component_arrays( - &self, - component_name: &ComponentName, - ) -> impl Iterator> + '_ { - let Some(list_array) = self.get_first_component(component_name) else { - return Either::Left(std::iter::empty()); - }; - - Either::Right(list_array.iter().flatten()) - } - /// Returns an iterator over the raw primitive values of a [`Chunk`], for a given component. /// /// This is a very fast path: the entire column will be downcasted at once, and then every @@ -233,7 +212,6 @@ impl Chunk { /// * [`Self::iter_primitive_array_list`] /// * [`Self::iter_string`] /// * [`Self::iter_buffer`]. - /// * [`Self::iter_component_arrays`]. /// * [`Self::iter_component`]. #[inline] pub fn iter_primitive( @@ -276,7 +254,6 @@ impl Chunk { /// * [`Self::iter_primitive_array_list`] /// * [`Self::iter_string`] /// * [`Self::iter_buffer`]. - /// * [`Self::iter_component_arrays`]. /// * [`Self::iter_component`]. #[inline] pub fn iter_bool( @@ -319,7 +296,6 @@ impl Chunk { /// * [`Self::iter_primitive`] /// * [`Self::iter_string`] /// * [`Self::iter_buffer`]. - /// * [`Self::iter_component_arrays`]. /// * [`Self::iter_component`]. pub fn iter_primitive_array( &self, @@ -381,7 +357,6 @@ impl Chunk { /// * [`Self::iter_primitive_array`] /// * [`Self::iter_string`] /// * [`Self::iter_buffer`]. - /// * [`Self::iter_component_arrays`]. /// * [`Self::iter_component`]. pub fn iter_primitive_array_list( &self, @@ -466,7 +441,6 @@ impl Chunk { /// * [`Self::iter_primitive_array`] /// * [`Self::iter_primitive_array_list`] /// * [`Self::iter_buffer`]. - /// * [`Self::iter_component_arrays`]. /// * [`Self::iter_component`]. pub fn iter_string( &self, @@ -517,7 +491,6 @@ impl Chunk { /// * [`Self::iter_primitive_array`] /// * [`Self::iter_primitive_array_list`] /// * [`Self::iter_string`]. - /// * [`Self::iter_component_arrays`]. /// * [`Self::iter_component`]. pub fn iter_buffer( &self, @@ -740,7 +713,6 @@ impl Chunk { /// * [`Self::iter_primitive_array_list`] /// * [`Self::iter_string`] /// * [`Self::iter_buffer`]. - /// * [`Self::iter_component_arrays`]. #[inline] pub fn iter_component( &self, diff --git a/crates/store/re_grpc_client/src/lib.rs b/crates/store/re_grpc_client/src/lib.rs index 2fe879e632eb..9be46a4b8260 100644 --- a/crates/store/re_grpc_client/src/lib.rs +++ b/crates/store/re_grpc_client/src/lib.rs @@ -480,15 +480,9 @@ async fn stream_catalog_async( )))?; let recording_uri_arrays: Vec> = chunk - .iter_component_arrays(&"id".into()) + .iter_string(&"id".into()) .map(|id| { - let rec_id = id - .as_any() - .downcast_ref::>() - .ok_or(StreamError::ChunkError(re_chunk::ChunkError::Malformed { - reason: format!("id must be a utf8 array: {:?}", tc.schema), - }))? - .value(0); // each component batch is of length 1 i.e. single 'id' value + let rec_id = &id[0]; // each component batch is of length 1 i.e. single 'id' value let recording_uri = format!("rerun://{host}:{port}/recording/{rec_id}"); diff --git a/crates/viewer/re_view_spatial/src/max_image_dimension_subscriber.rs b/crates/viewer/re_view_spatial/src/max_image_dimension_subscriber.rs index 735cba3f50ff..327d24b60db5 100644 --- a/crates/viewer/re_view_spatial/src/max_image_dimension_subscriber.rs +++ b/crates/viewer/re_view_spatial/src/max_image_dimension_subscriber.rs @@ -1,4 +1,3 @@ -use arrow2::array::Array; use nohash_hasher::IntMap; use once_cell::sync::OnceCell; @@ -85,41 +84,38 @@ impl PerStoreChunkSubscriber for MaxImageDimensionsStoreSubscriber { } // Handle `ImageEncoded`, `AssetVideo`… - let blobs = event.diff.chunk.iter_component_arrays(&Blob::name()); - let media_types = event.diff.chunk.iter_component_arrays(&MediaType::name()); + let blobs = event.diff.chunk.iter_buffer(&Blob::name()); + let media_types = event.diff.chunk.iter_string(&MediaType::name()); for (blob, media_type) in itertools::izip!(blobs, media_types.map(Some).chain(std::iter::repeat(None))) { - if let Some([width, height]) = size_from_blob(blob.as_ref(), media_type.as_deref()) - { - let max_dim = self - .max_dimensions - .entry(event.diff.chunk.entity_path().clone()) - .or_default(); - max_dim.width = max_dim.width.max(width); - max_dim.height = max_dim.height.max(height); + if let Some(blob) = blob.first() { + if let Some([width, height]) = size_from_blob( + blob.as_slice(), + media_type.and_then(|v| v.first().map(|v| MediaType(v.clone().into()))), + ) { + let max_dim = self + .max_dimensions + .entry(event.diff.chunk.entity_path().clone()) + .or_default(); + max_dim.width = max_dim.width.max(width); + max_dim.height = max_dim.height.max(height); + } } } } } } -fn size_from_blob(blob: &dyn Array, media_type: Option<&dyn Array>) -> Option<[u32; 2]> { +fn size_from_blob(blob: &[u8], media_type: Option) -> Option<[u32; 2]> { re_tracing::profile_function!(); - let blob = Blob::from_arrow2_opt(blob).ok()?.first()?.clone()?; - - let media_type: Option = media_type - .and_then(|media_type| MediaType::from_arrow2_opt(media_type).ok()) - .and_then(|list| list.first().cloned()) - .flatten(); - - let media_type = MediaType::or_guess_from_data(media_type, &blob)?; + let media_type = MediaType::or_guess_from_data(media_type, blob)?; if media_type.is_image() { re_tracing::profile_scope!("image"); - let image_bytes = blob.0.as_slice(); + let image_bytes = blob; let mut reader = image::ImageReader::new(std::io::Cursor::new(image_bytes)); @@ -137,7 +133,7 @@ fn size_from_blob(blob: &dyn Array, media_type: Option<&dyn Array>) -> Option<[u reader.into_dimensions().ok().map(|size| size.into()) } else if media_type.is_video() { re_tracing::profile_scope!("video"); - re_video::VideoData::load_from_bytes(&blob, &media_type) + re_video::VideoData::load_from_bytes(blob, &media_type) .ok() .map(|video| video.dimensions()) } else {