From 2d682f042e7726d8a9d06a7625c1ef717fb221d1 Mon Sep 17 00:00:00 2001 From: Evan Hedbor Date: Mon, 8 Jul 2024 22:08:50 +0200 Subject: [PATCH] Create skeleton for Deserializer --- src/de.rs | 5 +- src/de/deserializer.rs | 400 +++++++++++++++++++++++++++++++++++++++++ src/error.rs | 14 ++ src/lib.rs | 3 +- 4 files changed, 419 insertions(+), 3 deletions(-) create mode 100644 src/de/deserializer.rs diff --git a/src/de.rs b/src/de.rs index a280e30..d40f74e 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1,13 +1,14 @@ //! Deserialize KeyValues text to Rust types. -// TODO: impl de; remove -#![allow(dead_code)] +mod deserializer; use crate::Result; use serde::de::DeserializeOwned; use serde::Deserialize; use std::io::Read; +pub use deserializer::Deserializer; + /// Deserialize a KeyValues value representing some type `T`. /// /// # Errors diff --git a/src/de/deserializer.rs b/src/de/deserializer.rs new file mode 100644 index 0000000..413e9a0 --- /dev/null +++ b/src/de/deserializer.rs @@ -0,0 +1,400 @@ +use crate::{Error, Result}; +use serde::de::{ + DeserializeSeed, EnumAccess, IntoDeserializer, MapAccess, SeqAccess, VariantAccess, Visitor, +}; +use std::borrow::Cow; +use std::io::{Bytes, Read}; + +pub struct Deserializer { + reader: Bytes, + buffer: Vec, +} + +impl Deserializer { + pub fn from_reader(reader: R) -> Self { + Self { + reader: reader.bytes(), + buffer: Vec::new(), + } + } +} + +impl<'a> Deserializer<&'a [u8]> { + pub fn from_str(input: &'a str) -> Self { + Self::from_reader(input.as_bytes()) + } +} + +impl Deserializer { + fn peek(&mut self) -> Result> { + self.peek_nth(0) + } + + fn peek_nth(&mut self, index: usize) -> Result> { + match self.buffer.get(index) { + Some(ch) => Ok(Some(*ch)), + None => loop { + match self.reader.next() { + Some(Ok(ch)) => { + self.buffer.push(ch); + if self.buffer.len() == index - 1 { + return Ok(Some(ch)); + } + } + Some(Err(e)) => return Err(Error::Io(e)), + None => return Ok(None), + } + }, + } + } + + fn next(&mut self) -> Result> { + let ch = self.peek()?; + self.buffer.remove(0); + Ok(ch) + } + + fn peek_no_whitespace(&mut self) -> Result> { + loop { + match self.peek()? { + Some(ch) if ch.is_ascii_whitespace() => { + self.next()?; + } + Some(ch) if ch == b'/' && self.peek_nth(1)? == Some(b'/') => { + // line comment + while !matches!(self.peek()?, Some(b'\n') | None) { + self.next()?; + } + } + _ => break, + } + } + + self.peek() + } + + fn next_no_whitespace(&mut self) -> Result> { + self.peek_no_whitespace()?; + self.next() + } + + fn parse_string(&mut self) -> Result { + let is_quoted = match self.peek_no_whitespace()? { + Some(b'"') => { + self.next()?; + true + } + Some(b'{' | b'}' | b'[') => todo!("expected string, got unexpected control character"), + Some(_) => false, + None => return Err(Error::UnexpectedEof), + }; + + let mut result = Vec::new(); + loop { + let ch = match self.peek()? { + Some(b'\\') => todo!("handle escape characters"), + Some(ch) => ch, + None if !is_quoted => break, + None => return Err(Error::UnexpectedEof), + }; + + if is_quoted && ch == b'"' { + self.next()?; + break; + } else if !is_quoted && ch.is_ascii_whitespace() + || ch == b'{' + || ch == b'}' + || ch == b'"' + { + break; + } else { + self.next()?; + result.push(ch); + } + } + + // TODO: return some kind of encoding error if Err + Ok(String::from_utf8(result).unwrap()) + } + + fn try_parse_tag(&mut self) -> Result>> { + todo!() + } +} + +macro_rules! deserialize_as_str_impl { + ($ty:ident) => { + paste::paste! { + fn []>(self, visitor: V) -> $crate::Result { + let s = self.parse_string()?; + let value = <$ty as std::str::FromStr>::from_str(&s) + .map_err(|_| $crate::error::Error::ToDo(String::from("FromStr error")))?; + visitor.[](value) + } + } + }; + ($first:ident, $($rest:ident),+) => { + deserialize_as_str_impl!($first); + deserialize_as_str_impl!($($rest),+); + } +} + +impl<'de, 'a, R: Read> serde::Deserializer<'de> for &'a mut Deserializer { + type Error = Error; + + fn deserialize_any>(self, visitor: V) -> Result { + match self.peek_no_whitespace()? { + Some(b'{') => self.deserialize_map(visitor), + Some(_) => self.deserialize_str(visitor), + None => Err(Error::UnexpectedEof), + } + } + + fn deserialize_bool>(self, visitor: V) -> Result { + let value = match self.parse_string() { + Ok(ref s) if s == "0" => Ok(false), + Ok(ref s) if s == "1" => Ok(true), + Ok(_) => Err(Error::ToDo(String::from("string not convertible to bool"))), + Err(e) => Err(e), + }?; + visitor.visit_bool(value) + } + + deserialize_as_str_impl!(i8, i16, i32, i64, i128, u8, u16, u32, u64, u128, f32, f64, char); + + fn deserialize_str>(self, visitor: V) -> Result { + let value = self.parse_string()?; + // TODO: use visit_borrowed_str + visitor.visit_str(&value) + } + + fn deserialize_string>(self, visitor: V) -> Result { + let value = self.parse_string()?; + visitor.visit_string(value) + } + + fn deserialize_bytes>(self, visitor: V) -> Result { + Err(Error::UnsupportedType(String::from("bytes"))) + } + + fn deserialize_byte_buf>(self, visitor: V) -> Result { + Err(Error::UnsupportedType(String::from("bytes"))) + } + + fn deserialize_option>(self, visitor: V) -> Result { + // TODO: check if this is an empty string + visitor.visit_some(self) + } + + fn deserialize_unit>(self, visitor: V) -> Result { + // TODO: check if this is an empty string + visitor.visit_unit() + } + + fn deserialize_unit_struct>( + self, + _name: &'static str, + visitor: V, + ) -> Result { + self.deserialize_unit(visitor) + } + + fn deserialize_newtype_struct>( + self, + _name: &'static str, + visitor: V, + ) -> Result { + visitor.visit_newtype_struct(self) + } + + fn deserialize_seq>(self, visitor: V) -> Result { + todo!("figure out how to deserialize tuples") + } + + fn deserialize_tuple>(self, _len: usize, visitor: V) -> Result { + self.deserialize_seq(visitor) + } + + fn deserialize_tuple_struct>( + self, + _name: &'static str, + _len: usize, + visitor: V, + ) -> Result { + self.deserialize_seq(visitor) + } + + fn deserialize_map>(self, visitor: V) -> Result { + match self.next_no_whitespace()? { + Some(b'{') => { + let value = visitor.visit_map(KeyValueSeq::new(self))?; + match self.next_no_whitespace()? { + Some(b'}') => Ok(value), + Some(_) => Err(Error::ToDo(String::from("expected map end"))), + None => Err(Error::UnexpectedEof), + } + } + Some(_) => Err(Error::ToDo(String::from("expected map start"))), + None => Err(Error::UnexpectedEof), + } + } + + fn deserialize_struct>( + self, + _name: &'static str, + _fields: &'static [&'static str], + visitor: V, + ) -> Result { + self.deserialize_map(visitor) + } + + fn deserialize_enum>( + self, + _name: &'static str, + _variants: &'static [&'static str], + visitor: V, + ) -> Result { + // TODO: check for any valid string start + match self.peek_no_whitespace()? { + Some(b'{') => { + self.next()?.unwrap(); + let value = visitor.visit_enum(Enum::new(self))?; + match self.next_no_whitespace()? { + Some(b'}') => Ok(value), + Some(_) => Err(Error::ToDo(String::from("expected map end"))), + None => Err(Error::UnexpectedEof), + } + } + Some(_) => { + // unit variant + visitor.visit_enum(self.parse_string()?.into_deserializer()) + } + None => Err(Error::UnexpectedEof), + } + } + + fn deserialize_identifier>(self, visitor: V) -> Result { + self.deserialize_str(visitor) + } + + fn deserialize_ignored_any>(self, visitor: V) -> Result { + self.deserialize_any(visitor) + } +} + +struct KeyValueSeq<'a, R> { + de: &'a mut Deserializer, + first: bool, +} + +impl<'a, R> KeyValueSeq<'a, R> { + fn new(de: &'a mut Deserializer) -> Self { + Self { de, first: true } + } +} + +impl<'de, 'a, R: Read> SeqAccess<'de> for KeyValueSeq<'a, R> { + type Error = Error; + + fn next_element_seed>(&mut self, seed: T) -> Result> { + match self.de.peek_no_whitespace()? { + Some(b'}') => return Ok(None), + Some(_) => {} + None => return Err(Error::UnexpectedEof), + } + + if !self.first { + let _key = self.de.parse_string()?; + // TODO: Parse all key-values where the key equals the start key, + // ignoring other key-values at this level. + // This is annoying because related keys don't have to be next to each other. + // For example, parsing: + // "foo" "1" + // "bar" "2" + // "foo" "3" + // "bar" "4" + // with: + // #[derive(Debug, Deserialize)] + // struct Example { + // foo: Vec, + // bar: Vec, + // } + // should result in: + // Example { foo: [1, 3], bar: [2, 4] } + // and NOT: + // Example { foo: [1], bar: [2] } + } + self.first = false; + + seed.deserialize(&mut *self.de).map(Some) + } +} + +impl<'de, 'a, R: Read> MapAccess<'de> for KeyValueSeq<'a, R> { + type Error = Error; + + fn next_key_seed>(&mut self, seed: K) -> Result> { + match self.de.peek_no_whitespace()? { + Some(b'}') => return Ok(None), + None => return Err(Error::UnexpectedEof), + _ => {} + } + + self.first = false; + + seed.deserialize(&mut *self.de).map(Some) + } + + fn next_value_seed>(&mut self, seed: V) -> Result { + seed.deserialize(&mut *self.de) + } +} + +struct Enum<'a, R> { + de: &'a mut Deserializer, +} + +impl<'a, R> Enum<'a, R> { + fn new(de: &'a mut Deserializer) -> Self { + Self { de } + } +} + +impl<'de, 'a, R: Read> EnumAccess<'de> for Enum<'a, R> { + type Error = Error; + type Variant = Self; + + fn variant_seed>(self, seed: V) -> Result<(V::Value, Self::Variant)> { + // Deserialize the enum variant name + let value = seed.deserialize(&mut *self.de)?; + Ok((value, self)) + } +} + +impl<'de, 'a, R: Read> VariantAccess<'de> for Enum<'a, R> { + type Error = Error; + + fn unit_variant(self) -> Result<()> { + // If the visitor thought this was a unit variant, it should have already been handled. + Err(Error::ToDo(String::from("expected string"))) + } + + fn newtype_variant_seed>(self, seed: T) -> Result { + // Newtype variants are represented as `{ "name" "value" }` + seed.deserialize(self.de) + } + + fn tuple_variant>(self, _len: usize, visitor: V) -> Result { + // Tuple variants are represented as `{ "name" "value1" "name" "value2" ... }` + serde::Deserializer::deserialize_seq(self.de, visitor) + } + + fn struct_variant>( + self, + _fields: &'static [&'static str], + visitor: V, + ) -> Result { + // Struct variants are represented as `{ "name" { "k1" "v1" "k2" "v2" ... } }` + serde::Deserializer::deserialize_map(self.de, visitor) + } +} diff --git a/src/error.rs b/src/error.rs index 073ae8b..ab767be 100644 --- a/src/error.rs +++ b/src/error.rs @@ -5,6 +5,7 @@ use std::io; use thiserror::Error; // TODO: this struct is incomplete and subject to change +// TODO: error positions /// Represents all errors that can occur during serialization or deserialization. #[derive(Debug, Error)] #[non_exhaustive] @@ -13,6 +14,7 @@ pub enum Error { #[error("unexpected io error")] Io(io::Error), + // TODO: only the Bytes type is unsupported. /// Indicates that the given type is not supported. /// /// # Explanation @@ -124,6 +126,18 @@ pub enum Error { #[error("key must be a string, but it was a `{0}`")] KeyMustBeAString(String), + // TODO: is this necessary? + #[error("unexpected end of file")] + UnexpectedEof, + + // TODO: maybe this should have a different name. this represents non-utf8 text + #[error("expected UTF-8")] + BadEncoding, + + // TODO: create error variants for each usage of ToDo + #[error("temporary error: {0}")] + ToDo(String), + /// Indicates that a Serde error occurred. #[error("a serde error occurred: {0}")] Serde(String), diff --git a/src/lib.rs b/src/lib.rs index 20ba41c..c68f5ef 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -119,10 +119,11 @@ #![warn(missing_docs)] -mod de; +pub mod de; pub mod error; pub mod ser; +pub use de::{from_reader, from_str, kv_from_reader, kv_from_str}; pub use error::{Error, Result}; pub use ser::{ kv_to_string, kv_to_string_pretty, kv_to_writer, kv_to_writer_pretty, to_string,