diff --git a/arrow-row/src/fixed.rs b/arrow-row/src/fixed.rs index 0f3c3d0912f6..3d9920708f9b 100644 --- a/arrow-row/src/fixed.rs +++ b/arrow-row/src/fixed.rs @@ -20,7 +20,8 @@ use crate::null_sentinel; use arrow_array::builder::BufferBuilder; use arrow_array::{ArrowPrimitiveType, BooleanArray, FixedSizeBinaryArray}; use arrow_buffer::{ - bit_util, i256, ArrowNativeType, Buffer, IntervalDayTime, IntervalMonthDayNano, MutableBuffer, + bit_util, i256, ArrowNativeType, BooleanBuffer, Buffer, IntervalDayTime, IntervalMonthDayNano, + MutableBuffer, NullBuffer, }; use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::{DataType, SortOptions}; @@ -216,18 +217,75 @@ where /// /// - 1 byte `0` if null or `1` if valid /// - bytes of [`FixedLengthEncoding`] -pub fn encode>>( +pub fn encode( data: &mut [u8], offsets: &mut [usize], - i: I, + values: &[T], + nulls: &NullBuffer, opts: SortOptions, ) { - for (offset, maybe_val) in offsets.iter_mut().skip(1).zip(i) { + for (value_idx, is_valid) in nulls.iter().enumerate() { + let offset = &mut offsets[value_idx + 1]; let end_offset = *offset + T::ENCODED_LEN; - if let Some(val) = maybe_val { + if is_valid { + let to_write = &mut data[*offset..end_offset]; + to_write[0] = 1; + let mut encoded = values[value_idx].encode(); + if opts.descending { + // Flip bits to reverse order + encoded.as_mut().iter_mut().for_each(|v| *v = !*v) + } + to_write[1..].copy_from_slice(encoded.as_ref()) + } else { + data[*offset] = null_sentinel(opts); + } + *offset = end_offset; + } +} + +/// Encoding for non-nullable primitive arrays. +/// Iterates directly over the `values`, and skips NULLs-checking. +pub fn encode_not_null( + data: &mut [u8], + offsets: &mut [usize], + values: &[T], + opts: SortOptions, +) { + for (value_idx, val) in values.iter().enumerate() { + let offset = &mut offsets[value_idx + 1]; + let end_offset = *offset + T::ENCODED_LEN; + + let to_write = &mut data[*offset..end_offset]; + to_write[0] = 1; + let mut encoded = val.encode(); + if opts.descending { + // Flip bits to reverse order + encoded.as_mut().iter_mut().for_each(|v| *v = !*v) + } + to_write[1..].copy_from_slice(encoded.as_ref()); + + *offset = end_offset; + } +} + +/// Boolean values are encoded as +/// +/// - 1 byte `0` if null or `1` if valid +/// - bytes of [`FixedLengthEncoding`] +pub fn encode_boolean( + data: &mut [u8], + offsets: &mut [usize], + values: &BooleanBuffer, + nulls: &NullBuffer, + opts: SortOptions, +) { + for (idx, is_valid) in nulls.iter().enumerate() { + let offset = &mut offsets[idx + 1]; + let end_offset = *offset + bool::ENCODED_LEN; + if is_valid { let to_write = &mut data[*offset..end_offset]; to_write[0] = 1; - let mut encoded = val.encode(); + let mut encoded = values.value(idx).encode(); if opts.descending { // Flip bits to reverse order encoded.as_mut().iter_mut().for_each(|v| *v = !*v) @@ -240,6 +298,31 @@ pub fn encode>>( } } +/// Encoding for non-nullable boolean arrays. +/// Iterates directly over `values`, and skips NULLs-checking. +pub fn encode_boolean_not_null( + data: &mut [u8], + offsets: &mut [usize], + values: &BooleanBuffer, + opts: SortOptions, +) { + for (value_idx, val) in values.iter().enumerate() { + let offset = &mut offsets[value_idx + 1]; + let end_offset = *offset + bool::ENCODED_LEN; + + let to_write = &mut data[*offset..end_offset]; + to_write[0] = 1; + let mut encoded = val.encode(); + if opts.descending { + // Flip bits to reverse order + encoded.as_mut().iter_mut().for_each(|v| *v = !*v) + } + to_write[1..].copy_from_slice(encoded.as_ref()); + + *offset = end_offset; + } +} + pub fn encode_fixed_size_binary( data: &mut [u8], offsets: &mut [usize], diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs index 5dce771c85a9..a6fd03b5bcec 100644 --- a/arrow-row/src/lib.rs +++ b/arrow-row/src/lib.rs @@ -1153,9 +1153,21 @@ fn encode_column( match encoder { Encoder::Stateless => { downcast_primitive_array! { - column => fixed::encode(data, offsets, column, opts), + column => { + if let Some(nulls) = column.nulls().filter(|n| n.null_count() > 0){ + fixed::encode(data, offsets, column.values(), nulls, opts) + } else { + fixed::encode_not_null(data, offsets, column.values(), opts) + } + } DataType::Null => {} - DataType::Boolean => fixed::encode(data, offsets, column.as_boolean(), opts), + DataType::Boolean => { + if let Some(nulls) = column.nulls().filter(|n| n.null_count() > 0){ + fixed::encode_boolean(data, offsets, column.as_boolean().values(), nulls, opts) + } else { + fixed::encode_boolean_not_null(data, offsets, column.as_boolean().values(), opts) + } + } DataType::Binary => { variable::encode(data, offsets, as_generic_binary_array::(column).iter(), opts) }