Skip to content

Commit

Permalink
Implement ScalarValueCodec and TupleCodec
Browse files Browse the repository at this point in the history
  • Loading branch information
lewiszlw committed Feb 6, 2024
1 parent ba96442 commit 1031403
Show file tree
Hide file tree
Showing 7 changed files with 225 additions and 5 deletions.
68 changes: 68 additions & 0 deletions bustubx/src/common/bitmap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct DynamicBitmap {
map: Vec<u8>,
}

impl DynamicBitmap {
pub fn new() -> Self {
Self { map: Vec::new() }
}

pub fn set(&mut self, index: usize, value: bool) {
let byte_idx = index >> 3; // idx / 8
if byte_idx >= self.map.len() {
self.map.extend(vec![0; byte_idx - self.map.len() + 1])
}
let offset = index & 0b111; // idx % 8
let mut byte = self.map[byte_idx];

let curval = (byte >> (7 - offset)) & 1;
let mask = if value { 1 ^ curval } else { 0 ^ curval };
byte = byte ^ (mask << (7 - offset)); // Bit flipping
self.map[byte_idx] = byte;
}

pub fn get(&self, index: usize) -> Option<bool> {
if index >= self.map.len() << 8 {
return None;
}
let byte_idx = index >> 3; // idx / 8
let offset = index & 0b111; // idx % 8
let byte = self.map[byte_idx];
Some((byte >> (7 - offset)) & 1 == 1)
}

pub fn to_bytes(&self) -> Vec<u8> {
self.map.clone()
}

pub fn from_bytes(bytes: &[u8]) -> Self {
Self {
map: bytes.to_vec(),
}
}
}

#[cfg(test)]
mod tests {
use crate::common::bitmap::DynamicBitmap;

#[test]
fn dynamic_bitmap() {
let mut bitmap = DynamicBitmap::new();
assert_eq!(bitmap.get(0), None);

bitmap.set(3, true);
assert_eq!(bitmap.map.len(), 1);

bitmap.set(10, true);
assert_eq!(bitmap.map.len(), 2);

assert_eq!(bitmap.get(0), Some(false));
assert_eq!(bitmap.get(3), Some(true));
assert_eq!(bitmap.get(10), Some(true));

let new_bitmap = DynamicBitmap::from_bytes(&bitmap.to_bytes());
assert_eq!(new_bitmap, bitmap);
}
}
2 changes: 2 additions & 0 deletions bustubx/src/common/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
mod bitmap;
pub mod rid;
mod scalar;
mod table_ref;
pub mod util;

pub use bitmap::DynamicBitmap;
pub use scalar::ScalarValue;
pub use table_ref::TableReference;

Expand Down
31 changes: 31 additions & 0 deletions bustubx/src/storage/codec/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,25 @@ use crate::{BustubxError, BustubxResult};
pub struct CommonCodec;

impl CommonCodec {
pub fn encode_bool(data: bool) -> Vec<u8> {
if data {
vec![1]
} else {
vec![0]
}
}

pub fn decode_bool(bytes: &[u8]) -> BustubxResult<DecodedData<bool>> {
if bytes.len() < 1 {
return Err(BustubxError::Storage(format!(
"bytes length {} is less than {}",
bytes.len(),
1
)));
}
Ok((if bytes[0] == 0 { false } else { true }, 1))
}

pub fn encode_u8(data: u8) -> Vec<u8> {
data.to_be_bytes().to_vec()
}
Expand Down Expand Up @@ -141,6 +160,18 @@ mod tests {

#[test]
fn common_codec() {
assert_eq!(
true,
CommonCodec::decode_bool(&CommonCodec::encode_bool(true))
.unwrap()
.0
);
assert_eq!(
false,
CommonCodec::decode_bool(&CommonCodec::encode_bool(false))
.unwrap()
.0
);
assert_eq!(
5u8,
CommonCodec::decode_u8(&CommonCodec::encode_u8(5u8))
Expand Down
2 changes: 2 additions & 0 deletions bustubx/src/storage/codec/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
mod common;
mod scalar;
mod tuple;

pub use common::CommonCodec;
pub use scalar::ScalarValueCodec;
pub use tuple::TupleCodec;

Check warning on line 7 in bustubx/src/storage/codec/mod.rs

View workflow job for this annotation

GitHub Actions / Test Suite

unused import: `tuple::TupleCodec`

// data + consumed offset
Expand Down
55 changes: 55 additions & 0 deletions bustubx/src/storage/codec/scalar.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
use crate::catalog::DataType;
use crate::common::ScalarValue;
use crate::storage::codec::{CommonCodec, DecodedData};
use crate::BustubxResult;

pub struct ScalarValueCodec;

impl ScalarValueCodec {
pub fn encode(value: &ScalarValue) -> Vec<u8> {
match value {
ScalarValue::Boolean(Some(v)) => CommonCodec::encode_bool(*v),
ScalarValue::Int8(Some(v)) => CommonCodec::encode_i8(*v),
ScalarValue::Int16(Some(v)) => CommonCodec::encode_i16(*v),
ScalarValue::Int32(Some(v)) => CommonCodec::encode_i32(*v),
ScalarValue::Int64(Some(v)) => CommonCodec::encode_i64(*v),
ScalarValue::UInt64(Some(v)) => CommonCodec::encode_u64(*v),
// null
ScalarValue::Boolean(None)
| ScalarValue::Int8(None)
| ScalarValue::Int16(None)
| ScalarValue::Int32(None)
| ScalarValue::Int64(None)
| ScalarValue::UInt64(None) => vec![],
}
}

pub fn decode(bytes: &[u8], data_type: DataType) -> BustubxResult<DecodedData<ScalarValue>> {
match data_type {
DataType::Boolean => {
let (value, offset) = CommonCodec::decode_bool(bytes)?;
Ok((ScalarValue::Boolean(Some(value)), offset))
}
DataType::Int8 => {
let (value, offset) = CommonCodec::decode_i8(bytes)?;
Ok((ScalarValue::Int8(Some(value)), offset))
}
DataType::Int16 => {
let (value, offset) = CommonCodec::decode_i16(bytes)?;
Ok((ScalarValue::Int16(Some(value)), offset))
}
DataType::Int32 => {
let (value, offset) = CommonCodec::decode_i32(bytes)?;
Ok((ScalarValue::Int32(Some(value)), offset))
}
DataType::Int64 => {
let (value, offset) = CommonCodec::decode_i64(bytes)?;
Ok((ScalarValue::Int64(Some(value)), offset))
}
DataType::UInt64 => {
let (value, offset) = CommonCodec::decode_u64(bytes)?;
Ok((ScalarValue::UInt64(Some(value)), offset))
}
}
}
}
70 changes: 66 additions & 4 deletions bustubx/src/storage/codec/tuple.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,76 @@
use crate::catalog::SchemaRef;
use crate::Tuple;
use crate::common::{DynamicBitmap, ScalarValue};
use crate::storage::codec::{DecodedData, ScalarValueCodec};
use crate::{BustubxError, BustubxResult, Tuple};

pub struct TupleCodec;

impl TupleCodec {
pub fn encode(tuple: &Tuple) -> Vec<u8> {
todo!()
// null map
let mut null_map = DynamicBitmap::new();
let mut attributes = Vec::new();
for (idx, value) in tuple.data.iter().enumerate() {
null_map.set(idx, value.is_null());
if !value.is_null() {
attributes.extend(ScalarValueCodec::encode(value));
}
}

let mut bytes = null_map.to_bytes();
bytes.extend(attributes);
bytes
}

pub fn decode(bytes: &[u8], schema: SchemaRef) -> BustubxResult<DecodedData<Tuple>> {
let mut total_offset = 0;

let null_map_bytes = (schema.column_count() >> 3) + 1;
let null_map = DynamicBitmap::from_bytes(&bytes[0..null_map_bytes]);
total_offset += null_map_bytes;
let mut bytes = &bytes[null_map_bytes..];

let mut data = vec![];
for (idx, col) in schema.columns.iter().enumerate() {
let null = null_map.get(idx).ok_or(BustubxError::Internal(
"null map size should be greater than or equal to col count".to_string(),
))?;
if null {
data.push(ScalarValue::new_empty(col.data_type));
} else {
let (value, offset) = ScalarValueCodec::decode(bytes, col.data_type)?;
data.push(value);
total_offset += offset;
bytes = &bytes[offset..];
}
}

Ok((Tuple::new(schema, data), total_offset))
}
}

#[cfg(test)]
mod tests {
use crate::catalog::{Column, DataType, Schema};
use crate::common::ScalarValue;
use crate::storage::codec::TupleCodec;
use crate::Tuple;
use std::sync::Arc;

pub fn decode(bytes: &[u8], schema: SchemaRef) -> Tuple {
todo!()
#[test]
fn tuple_codec() {
let schema = Arc::new(Schema::new(vec![
Column::new("a".to_string(), DataType::Boolean, true),
Column::new("b".to_string(), DataType::Int32, true),
Column::new("c".to_string(), DataType::UInt64, true),
]));
let tuple = Tuple::new(
schema.clone(),
vec![true.into(), ScalarValue::Int32(None), 1234u64.into()],
);
let new_tuple = TupleCodec::decode(&TupleCodec::encode(&tuple), schema)
.unwrap()
.0;
assert_eq!(new_tuple, tuple);
}
}
2 changes: 1 addition & 1 deletion bustubx/src/storage/tuple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pub struct TupleMeta {
pub is_deleted: bool,
}

#[derive(Debug, Clone)]
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Tuple {
pub schema: SchemaRef,
pub data: Vec<ScalarValue>,
Expand Down

0 comments on commit 1031403

Please sign in to comment.