diff --git a/bustubx/src/common/bitmap.rs b/bustubx/src/common/bitmap.rs new file mode 100644 index 0000000..1399570 --- /dev/null +++ b/bustubx/src/common/bitmap.rs @@ -0,0 +1,68 @@ +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct DynamicBitmap { + map: Vec, +} + +impl DynamicBitmap { + pub fn new() -> Self { + Self { map: Vec::new() } + } + + pub fn set(&mut self, index: usize, value: bool) { + let byte_idx = index >> 3; // idx / 8 + if byte_idx >= self.map.len() { + self.map.extend(vec![0; byte_idx - self.map.len() + 1]) + } + let offset = index & 0b111; // idx % 8 + let mut byte = self.map[byte_idx]; + + let curval = (byte >> (7 - offset)) & 1; + let mask = if value { 1 ^ curval } else { 0 ^ curval }; + byte = byte ^ (mask << (7 - offset)); // Bit flipping + self.map[byte_idx] = byte; + } + + pub fn get(&self, index: usize) -> Option { + if index >= self.map.len() << 8 { + return None; + } + let byte_idx = index >> 3; // idx / 8 + let offset = index & 0b111; // idx % 8 + let byte = self.map[byte_idx]; + Some((byte >> (7 - offset)) & 1 == 1) + } + + pub fn to_bytes(&self) -> Vec { + self.map.clone() + } + + pub fn from_bytes(bytes: &[u8]) -> Self { + Self { + map: bytes.to_vec(), + } + } +} + +#[cfg(test)] +mod tests { + use crate::common::bitmap::DynamicBitmap; + + #[test] + fn dynamic_bitmap() { + let mut bitmap = DynamicBitmap::new(); + assert_eq!(bitmap.get(0), None); + + bitmap.set(3, true); + assert_eq!(bitmap.map.len(), 1); + + bitmap.set(10, true); + assert_eq!(bitmap.map.len(), 2); + + assert_eq!(bitmap.get(0), Some(false)); + assert_eq!(bitmap.get(3), Some(true)); + assert_eq!(bitmap.get(10), Some(true)); + + let new_bitmap = DynamicBitmap::from_bytes(&bitmap.to_bytes()); + assert_eq!(new_bitmap, bitmap); + } +} diff --git a/bustubx/src/common/mod.rs b/bustubx/src/common/mod.rs index e716268..44b46e3 100644 --- a/bustubx/src/common/mod.rs +++ b/bustubx/src/common/mod.rs @@ -1,8 +1,10 @@ +mod bitmap; pub mod rid; mod scalar; mod table_ref; pub mod util; +pub use bitmap::DynamicBitmap; pub use scalar::ScalarValue; pub use table_ref::TableReference; diff --git a/bustubx/src/storage/codec/common.rs b/bustubx/src/storage/codec/common.rs index 724feb7..969075a 100644 --- a/bustubx/src/storage/codec/common.rs +++ b/bustubx/src/storage/codec/common.rs @@ -4,6 +4,25 @@ use crate::{BustubxError, BustubxResult}; pub struct CommonCodec; impl CommonCodec { + pub fn encode_bool(data: bool) -> Vec { + if data { + vec![1] + } else { + vec![0] + } + } + + pub fn decode_bool(bytes: &[u8]) -> BustubxResult> { + if bytes.len() < 1 { + return Err(BustubxError::Storage(format!( + "bytes length {} is less than {}", + bytes.len(), + 1 + ))); + } + Ok((if bytes[0] == 0 { false } else { true }, 1)) + } + pub fn encode_u8(data: u8) -> Vec { data.to_be_bytes().to_vec() } @@ -141,6 +160,18 @@ mod tests { #[test] fn common_codec() { + assert_eq!( + true, + CommonCodec::decode_bool(&CommonCodec::encode_bool(true)) + .unwrap() + .0 + ); + assert_eq!( + false, + CommonCodec::decode_bool(&CommonCodec::encode_bool(false)) + .unwrap() + .0 + ); assert_eq!( 5u8, CommonCodec::decode_u8(&CommonCodec::encode_u8(5u8)) diff --git a/bustubx/src/storage/codec/mod.rs b/bustubx/src/storage/codec/mod.rs index 8b92c93..cb014c6 100644 --- a/bustubx/src/storage/codec/mod.rs +++ b/bustubx/src/storage/codec/mod.rs @@ -1,7 +1,9 @@ mod common; +mod scalar; mod tuple; pub use common::CommonCodec; +pub use scalar::ScalarValueCodec; pub use tuple::TupleCodec; // data + consumed offset diff --git a/bustubx/src/storage/codec/scalar.rs b/bustubx/src/storage/codec/scalar.rs new file mode 100644 index 0000000..97fdb65 --- /dev/null +++ b/bustubx/src/storage/codec/scalar.rs @@ -0,0 +1,55 @@ +use crate::catalog::DataType; +use crate::common::ScalarValue; +use crate::storage::codec::{CommonCodec, DecodedData}; +use crate::BustubxResult; + +pub struct ScalarValueCodec; + +impl ScalarValueCodec { + pub fn encode(value: &ScalarValue) -> Vec { + match value { + ScalarValue::Boolean(Some(v)) => CommonCodec::encode_bool(*v), + ScalarValue::Int8(Some(v)) => CommonCodec::encode_i8(*v), + ScalarValue::Int16(Some(v)) => CommonCodec::encode_i16(*v), + ScalarValue::Int32(Some(v)) => CommonCodec::encode_i32(*v), + ScalarValue::Int64(Some(v)) => CommonCodec::encode_i64(*v), + ScalarValue::UInt64(Some(v)) => CommonCodec::encode_u64(*v), + // null + ScalarValue::Boolean(None) + | ScalarValue::Int8(None) + | ScalarValue::Int16(None) + | ScalarValue::Int32(None) + | ScalarValue::Int64(None) + | ScalarValue::UInt64(None) => vec![], + } + } + + pub fn decode(bytes: &[u8], data_type: DataType) -> BustubxResult> { + match data_type { + DataType::Boolean => { + let (value, offset) = CommonCodec::decode_bool(bytes)?; + Ok((ScalarValue::Boolean(Some(value)), offset)) + } + DataType::Int8 => { + let (value, offset) = CommonCodec::decode_i8(bytes)?; + Ok((ScalarValue::Int8(Some(value)), offset)) + } + DataType::Int16 => { + let (value, offset) = CommonCodec::decode_i16(bytes)?; + Ok((ScalarValue::Int16(Some(value)), offset)) + } + DataType::Int32 => { + let (value, offset) = CommonCodec::decode_i32(bytes)?; + Ok((ScalarValue::Int32(Some(value)), offset)) + } + DataType::Int64 => { + let (value, offset) = CommonCodec::decode_i64(bytes)?; + Ok((ScalarValue::Int64(Some(value)), offset)) + } + DataType::UInt64 => { + let (value, offset) = CommonCodec::decode_u64(bytes)?; + Ok((ScalarValue::UInt64(Some(value)), offset)) + } + } + } +} diff --git a/bustubx/src/storage/codec/tuple.rs b/bustubx/src/storage/codec/tuple.rs index c5f0f32..c7d4740 100644 --- a/bustubx/src/storage/codec/tuple.rs +++ b/bustubx/src/storage/codec/tuple.rs @@ -1,14 +1,76 @@ use crate::catalog::SchemaRef; -use crate::Tuple; +use crate::common::{DynamicBitmap, ScalarValue}; +use crate::storage::codec::{DecodedData, ScalarValueCodec}; +use crate::{BustubxError, BustubxResult, Tuple}; pub struct TupleCodec; impl TupleCodec { pub fn encode(tuple: &Tuple) -> Vec { - todo!() + // null map + let mut null_map = DynamicBitmap::new(); + let mut attributes = Vec::new(); + for (idx, value) in tuple.data.iter().enumerate() { + null_map.set(idx, value.is_null()); + if !value.is_null() { + attributes.extend(ScalarValueCodec::encode(value)); + } + } + + let mut bytes = null_map.to_bytes(); + bytes.extend(attributes); + bytes + } + + pub fn decode(bytes: &[u8], schema: SchemaRef) -> BustubxResult> { + let mut total_offset = 0; + + let null_map_bytes = (schema.column_count() >> 3) + 1; + let null_map = DynamicBitmap::from_bytes(&bytes[0..null_map_bytes]); + total_offset += null_map_bytes; + let mut bytes = &bytes[null_map_bytes..]; + + let mut data = vec![]; + for (idx, col) in schema.columns.iter().enumerate() { + let null = null_map.get(idx).ok_or(BustubxError::Internal( + "null map size should be greater than or equal to col count".to_string(), + ))?; + if null { + data.push(ScalarValue::new_empty(col.data_type)); + } else { + let (value, offset) = ScalarValueCodec::decode(bytes, col.data_type)?; + data.push(value); + total_offset += offset; + bytes = &bytes[offset..]; + } + } + + Ok((Tuple::new(schema, data), total_offset)) } +} + +#[cfg(test)] +mod tests { + use crate::catalog::{Column, DataType, Schema}; + use crate::common::ScalarValue; + use crate::storage::codec::TupleCodec; + use crate::Tuple; + use std::sync::Arc; - pub fn decode(bytes: &[u8], schema: SchemaRef) -> Tuple { - todo!() + #[test] + fn tuple_codec() { + let schema = Arc::new(Schema::new(vec![ + Column::new("a".to_string(), DataType::Boolean, true), + Column::new("b".to_string(), DataType::Int32, true), + Column::new("c".to_string(), DataType::UInt64, true), + ])); + let tuple = Tuple::new( + schema.clone(), + vec![true.into(), ScalarValue::Int32(None), 1234u64.into()], + ); + let new_tuple = TupleCodec::decode(&TupleCodec::encode(&tuple), schema) + .unwrap() + .0; + assert_eq!(new_tuple, tuple); } } diff --git a/bustubx/src/storage/tuple.rs b/bustubx/src/storage/tuple.rs index 2943d42..5a9820c 100644 --- a/bustubx/src/storage/tuple.rs +++ b/bustubx/src/storage/tuple.rs @@ -10,7 +10,7 @@ pub struct TupleMeta { pub is_deleted: bool, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Eq, PartialEq)] pub struct Tuple { pub schema: SchemaRef, pub data: Vec,