Skip to content

Commit

Permalink
special cases for not-null primitives encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
korowa committed Jun 9, 2024
1 parent a413c26 commit e0929da
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 7 deletions.
103 changes: 98 additions & 5 deletions arrow-row/src/fixed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
use crate::array::PrimitiveArray;
use crate::null_sentinel;
use arrow_array::builder::BufferBuilder;
use arrow_array::{ArrowPrimitiveType, BooleanArray, FixedSizeBinaryArray};
use arrow_array::{Array, ArrowPrimitiveType, BooleanArray, FixedSizeBinaryArray};
use arrow_buffer::{
bit_util, i256, ArrowNativeType, Buffer, IntervalDayTime, IntervalMonthDayNano, MutableBuffer,
};
Expand Down Expand Up @@ -216,16 +216,80 @@ where
///
/// - 1 byte `0` if null or `1` if valid
/// - bytes of [`FixedLengthEncoding`]
pub fn encode<T: FixedLengthEncoding, I: IntoIterator<Item = Option<T>>>(
pub fn encode<T: ArrowPrimitiveType>(
data: &mut [u8],
offsets: &mut [usize],
i: I,
array: &PrimitiveArray<T>,
opts: SortOptions,
) where
T::Native: FixedLengthEncoding,
{
let mut offset_idx = 1;
for maybe_val in array {
let offset = &mut offsets[offset_idx];
let end_offset = *offset + T::Native::ENCODED_LEN;
if let Some(val) = maybe_val {
let to_write = &mut data[*offset..end_offset];
to_write[0] = 1;
let mut encoded = val.encode();
if opts.descending {
// Flip bits to reverse order
encoded.as_mut().iter_mut().for_each(|v| *v = !*v)
}
to_write[1..].copy_from_slice(encoded.as_ref())
} else {
data[*offset] = null_sentinel(opts);
}
*offset = end_offset;
offset_idx += 1;
}
}

/// Encoding for non-nullable primitive arrays.
/// Iterates directly over the `values`, and skips NULLs-checking.
pub fn encode_not_null<T: ArrowPrimitiveType>(
data: &mut [u8],
offsets: &mut [usize],
array: &PrimitiveArray<T>,
opts: SortOptions,
) where
T::Native: FixedLengthEncoding,
{
assert!(!array.is_nullable());

let mut offset_idx = 1;
for val in array.values() {
let offset = &mut offsets[offset_idx];
let end_offset = *offset + T::Native::ENCODED_LEN;

let to_write = &mut data[*offset..end_offset];
to_write[0] = 1;
let mut encoded = val.encode();
if opts.descending {
// Flip bits to reverse order
encoded.as_mut().iter_mut().for_each(|v| *v = !*v)
}
to_write[1..].copy_from_slice(encoded.as_ref());

*offset = end_offset;
offset_idx += 1;
}
}

/// Boolean values are encoded as
///
/// - 1 byte `0` if null or `1` if valid
/// - bytes of [`FixedLengthEncoding`]
pub fn encode_bool(
data: &mut [u8],
offsets: &mut [usize],
array: &BooleanArray,
opts: SortOptions,
) {
let mut offset_idx = 1;
for maybe_val in i {
for maybe_val in array {
let offset = &mut offsets[offset_idx];
let end_offset = *offset + T::ENCODED_LEN;
let end_offset = *offset + bool::ENCODED_LEN;
if let Some(val) = maybe_val {
let to_write = &mut data[*offset..end_offset];
to_write[0] = 1;
Expand All @@ -243,6 +307,35 @@ pub fn encode<T: FixedLengthEncoding, I: IntoIterator<Item = Option<T>>>(
}
}

/// Encoding for non-nullable boolean arrays.
/// Iterates directly over `values`, and skips NULLs-checking.
pub fn encode_bool_not_null(
data: &mut [u8],
offsets: &mut [usize],
array: &BooleanArray,
opts: SortOptions,
) {
assert!(!array.is_nullable());

let mut offset_idx = 1;
for val in array.values() {
let offset = &mut offsets[offset_idx];
let end_offset = *offset + bool::ENCODED_LEN;

let to_write = &mut data[*offset..end_offset];
to_write[0] = 1;
let mut encoded = val.encode();
if opts.descending {
// Flip bits to reverse order
encoded.as_mut().iter_mut().for_each(|v| *v = !*v)
}
to_write[1..].copy_from_slice(encoded.as_ref());

*offset = end_offset;
offset_idx += 1;
}
}

pub fn encode_fixed_size_binary(
data: &mut [u8],
offsets: &mut [usize],
Expand Down
16 changes: 14 additions & 2 deletions arrow-row/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1146,9 +1146,21 @@ fn encode_column(
match encoder {
Encoder::Stateless => {
downcast_primitive_array! {
column => fixed::encode(data, offsets, column, opts),
column => {
if column.is_nullable(){
fixed::encode(data, offsets, column, opts)
} else {
fixed::encode_not_null(data, offsets, column, opts)
}
}
DataType::Null => {}
DataType::Boolean => fixed::encode(data, offsets, column.as_boolean(), opts),
DataType::Boolean => {
if column.is_nullable(){
fixed::encode_bool(data, offsets, column.as_boolean(), opts)
} else {
fixed::encode_bool_not_null(data, offsets, column.as_boolean(), opts)
}
}
DataType::Binary => {
variable::encode(data, offsets, as_generic_binary_array::<i32>(column).iter(), opts)
}
Expand Down

0 comments on commit e0929da

Please sign in to comment.