Skip to content

Commit

Permalink
Remove Chunk::iter_component_arrays (#8548)
Browse files Browse the repository at this point in the history
This method makes no sense. It's a complete anti-pattern. The whole
point of the Chunk level methods is to pay the cost of
reflection/downcasting/deserialization once for the whole Chunk, this
can never happen with the way this method is defined.

I'm not sure what I was thinking back then. I likely wasn't. There is
never a good reason to use this.
  • Loading branch information
teh-cmc authored Dec 20, 2024
1 parent 3532706 commit 649d74c
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 61 deletions.
34 changes: 3 additions & 31 deletions crates/store/re_chunk/src/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ use std::sync::Arc;

use arrow2::{
array::{
Array as Arrow2Array, BooleanArray as Arrow2BooleanArray,
FixedSizeListArray as Arrow2FixedSizeListArray, ListArray as Arrow2ListArray,
PrimitiveArray as Arrow2PrimitiveArray, Utf8Array as Arrow2Utf8Array,
BooleanArray as Arrow2BooleanArray, FixedSizeListArray as Arrow2FixedSizeListArray,
ListArray as Arrow2ListArray, PrimitiveArray as Arrow2PrimitiveArray,
Utf8Array as Arrow2Utf8Array,
},
bitmap::Bitmap as Arrow2Bitmap,
Either,
Expand Down Expand Up @@ -200,27 +200,6 @@ impl Chunk {
}
}

/// Returns an iterator over the raw arrays of a [`Chunk`], for a given component.
///
/// See also:
/// * [`Self::iter_primitive`]
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_primitive_array_list`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component`].
#[inline]
pub fn iter_component_arrays(
&self,
component_name: &ComponentName,
) -> impl Iterator<Item = Box<dyn Arrow2Array>> + '_ {
let Some(list_array) = self.get_first_component(component_name) else {
return Either::Left(std::iter::empty());
};

Either::Right(list_array.iter().flatten())
}

/// Returns an iterator over the raw primitive values of a [`Chunk`], for a given component.
///
/// This is a very fast path: the entire column will be downcasted at once, and then every
Expand All @@ -233,7 +212,6 @@ impl Chunk {
/// * [`Self::iter_primitive_array_list`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component_arrays`].
/// * [`Self::iter_component`].
#[inline]
pub fn iter_primitive<T: arrow2::types::NativeType>(
Expand Down Expand Up @@ -276,7 +254,6 @@ impl Chunk {
/// * [`Self::iter_primitive_array_list`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component_arrays`].
/// * [`Self::iter_component`].
#[inline]
pub fn iter_bool(
Expand Down Expand Up @@ -319,7 +296,6 @@ impl Chunk {
/// * [`Self::iter_primitive`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component_arrays`].
/// * [`Self::iter_component`].
pub fn iter_primitive_array<const N: usize, T: arrow2::types::NativeType>(
&self,
Expand Down Expand Up @@ -381,7 +357,6 @@ impl Chunk {
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component_arrays`].
/// * [`Self::iter_component`].
pub fn iter_primitive_array_list<const N: usize, T: arrow2::types::NativeType>(
&self,
Expand Down Expand Up @@ -466,7 +441,6 @@ impl Chunk {
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_primitive_array_list`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component_arrays`].
/// * [`Self::iter_component`].
pub fn iter_string(
&self,
Expand Down Expand Up @@ -517,7 +491,6 @@ impl Chunk {
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_primitive_array_list`]
/// * [`Self::iter_string`].
/// * [`Self::iter_component_arrays`].
/// * [`Self::iter_component`].
pub fn iter_buffer<T: arrow::datatypes::ArrowNativeType + arrow2::types::NativeType>(
&self,
Expand Down Expand Up @@ -740,7 +713,6 @@ impl Chunk {
/// * [`Self::iter_primitive_array_list`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component_arrays`].
#[inline]
pub fn iter_component<C: Component>(
&self,
Expand Down
10 changes: 2 additions & 8 deletions crates/store/re_grpc_client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -480,15 +480,9 @@ async fn stream_catalog_async(
)))?;

let recording_uri_arrays: Vec<Box<dyn Arrow2Array>> = chunk
.iter_component_arrays(&"id".into())
.iter_string(&"id".into())
.map(|id| {
let rec_id = id
.as_any()
.downcast_ref::<Arrow2Utf8Array<i32>>()
.ok_or(StreamError::ChunkError(re_chunk::ChunkError::Malformed {
reason: format!("id must be a utf8 array: {:?}", tc.schema),
}))?
.value(0); // each component batch is of length 1 i.e. single 'id' value
let rec_id = &id[0]; // each component batch is of length 1 i.e. single 'id' value

let recording_uri = format!("rerun://{host}:{port}/recording/{rec_id}");

Expand Down
40 changes: 18 additions & 22 deletions crates/viewer/re_view_spatial/src/max_image_dimension_subscriber.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use arrow2::array::Array;
use nohash_hasher::IntMap;
use once_cell::sync::OnceCell;

Expand Down Expand Up @@ -85,41 +84,38 @@ impl PerStoreChunkSubscriber for MaxImageDimensionsStoreSubscriber {
}

// Handle `ImageEncoded`, `AssetVideo`…
let blobs = event.diff.chunk.iter_component_arrays(&Blob::name());
let media_types = event.diff.chunk.iter_component_arrays(&MediaType::name());
let blobs = event.diff.chunk.iter_buffer(&Blob::name());
let media_types = event.diff.chunk.iter_string(&MediaType::name());
for (blob, media_type) in
itertools::izip!(blobs, media_types.map(Some).chain(std::iter::repeat(None)))
{
if let Some([width, height]) = size_from_blob(blob.as_ref(), media_type.as_deref())
{
let max_dim = self
.max_dimensions
.entry(event.diff.chunk.entity_path().clone())
.or_default();
max_dim.width = max_dim.width.max(width);
max_dim.height = max_dim.height.max(height);
if let Some(blob) = blob.first() {
if let Some([width, height]) = size_from_blob(
blob.as_slice(),
media_type.and_then(|v| v.first().map(|v| MediaType(v.clone().into()))),
) {
let max_dim = self
.max_dimensions
.entry(event.diff.chunk.entity_path().clone())
.or_default();
max_dim.width = max_dim.width.max(width);
max_dim.height = max_dim.height.max(height);
}
}
}
}
}
}

fn size_from_blob(blob: &dyn Array, media_type: Option<&dyn Array>) -> Option<[u32; 2]> {
fn size_from_blob(blob: &[u8], media_type: Option<MediaType>) -> Option<[u32; 2]> {
re_tracing::profile_function!();

let blob = Blob::from_arrow2_opt(blob).ok()?.first()?.clone()?;

let media_type: Option<MediaType> = media_type
.and_then(|media_type| MediaType::from_arrow2_opt(media_type).ok())
.and_then(|list| list.first().cloned())
.flatten();

let media_type = MediaType::or_guess_from_data(media_type, &blob)?;
let media_type = MediaType::or_guess_from_data(media_type, blob)?;

if media_type.is_image() {
re_tracing::profile_scope!("image");

let image_bytes = blob.0.as_slice();
let image_bytes = blob;

let mut reader = image::ImageReader::new(std::io::Cursor::new(image_bytes));

Expand All @@ -137,7 +133,7 @@ fn size_from_blob(blob: &dyn Array, media_type: Option<&dyn Array>) -> Option<[u
reader.into_dimensions().ok().map(|size| size.into())
} else if media_type.is_video() {
re_tracing::profile_scope!("video");
re_video::VideoData::load_from_bytes(&blob, &media_type)
re_video::VideoData::load_from_bytes(blob, &media_type)
.ok()
.map(|video| video.dimensions())
} else {
Expand Down

0 comments on commit 649d74c

Please sign in to comment.