Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Optimize Hexadecimal Serialization and Deserialization for u64 #21

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 33 additions & 42 deletions crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use alloc::vec::Vec;
use core::marker::PhantomData;

/// A trait for types that should be serialized or deserialized as hexadecimal strings.
Expand All @@ -18,8 +19,6 @@ impl<'de> NumAsHex<'de> for u64 {
{
/// The symbols to be used for the hexadecimal representation.
const HEX_DIGITS: [u8; 16] = *b"0123456789abcdef";
/// The maximum number of digits in the hexadecimal representation of a `u64`.
const MAX_NUMBER_SIZE: usize = u64::MAX.ilog(16) as usize + 1;

if *self == 0 {
return serializer.serialize_str("0x0");
Expand Down Expand Up @@ -51,25 +50,22 @@ impl<'de> NumAsHex<'de> for u64 {
// The output string is the part of the buffer that has been written. In other
// words, we have to skip all the bytes that *were not* written yet (remaining).

let mut buffer = [0u8; MAX_NUMBER_SIZE + 2]; // + 2 to account for 0x
let mut cursor = buffer.iter_mut().rev();
let mut buffer = [0u8; 18]; // Enough for "0x" prefix and 16 hex digits
let mut n = *self;
let mut length = 0;

while n != 0 {
*cursor.next().unwrap() = HEX_DIGITS[(n % 16) as usize];
length += 1;
buffer[18 - length] = HEX_DIGITS[(n % 16) as usize];
n /= 16;
}
*cursor.next().unwrap() = b'x';
*cursor.next().unwrap() = b'0';

let remaining = cursor.len();

// SAFETY:
// We only wrote ASCII characters to the buffer, ensuring that it is only composed
// of valid UTF-8 code points. This unwrap can never fail. Just like the code above,
// using `from_utf8_unchecked` is safe.
let s = core::str::from_utf8(&buffer[remaining..]).unwrap();
buffer[18 - length - 1] = b'x';
buffer[18 - length - 2] = b'0';
length += 2;

serializer.serialize_str(s)
let hex_str = core::str::from_utf8(&buffer[18 - length..]).unwrap();
serializer.serialize_str(hex_str)
}

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
Expand All @@ -93,30 +89,13 @@ impl<'de> NumAsHex<'de> for u64 {
// unsafe code and pointers. Though the gain will probably be less interesting.

// Explicitly avoid being UTF-8 aware.
let mut bytes = v.as_bytes();
let bytes = v.as_bytes();

// If the input string does not start with the `0x` prefix, then it's an
// error. The `NUM_AS_HEX` regex defined in the specification specifies
// this prefix as mandatory.
bytes = bytes
.strip_prefix(b"0x")
.ok_or_else(|| E::custom("expected a hexadecimal string starting with 0x"))?;

if bytes.is_empty() {
return Err(E::custom("expected a hexadecimal string"));
}

// Remove the leading zeros from the string, if any.
// We need this in order to optimize the code below with the knowledge of the
// length of the hexadecimal representation of the number.
while let Some(rest) = bytes.strip_prefix(b"0") {
bytes = rest;
}

// If the string has a size larger than the maximum size of the hexadecimal
// representation of a `u64`, then we're forced to overflow.
if bytes.len() > u64::MAX.ilog(16) as usize + 1 {
return Err(E::custom("integer overflowed 64-bit"));
if bytes.len() < 2 || &bytes[0..2] != b"0x" {
return Err(E::custom("expected a hexadecimal string starting with 0x"));
}

// Aggregate the digits into `n`,
Expand All @@ -128,16 +107,28 @@ impl<'de> NumAsHex<'de> for u64 {
//
// Because we already checked the size of the string earlier, we know that
// the following code will never overflow.
let hex_bytes = &bytes[2..];

// Trim leading zeros from the hexadecimal part for efficient processing
let trimmed_hex = hex_bytes
.iter()
.skip_while(|&&b| b == b'0')
.collect::<Vec<_>>();

// Check if the significant part of the hexadecimal string is too long for a 64-bit number
if trimmed_hex.len() > 16 {
return Err(E::custom("hexadecimal string too long for a 64-bit number"));
}

let mut n = 0u64;
for &b in bytes.iter() {
let unit = match b {
b'0'..=b'9' => b as u64 - b'0' as u64,
b'a'..=b'f' => b as u64 - b'a' as u64 + 10,
b'A'..=b'F' => b as u64 - b'A' as u64 + 10,
for &b in &trimmed_hex {
let digit = match b {
b'0'..=b'9' => b - b'0',
b'a'..=b'f' => 10 + b - b'a',
b'A'..=b'F' => 10 + b - b'A',
_ => return Err(E::custom("invalid hexadecimal digit")),
};

n = n * 16 + unit;
n = n * 16 + digit as u64;
}

Ok(n)
Expand Down