diff --git a/src/buffer.rs b/src/buffer.rs deleted file mode 100644 index 31663be..0000000 --- a/src/buffer.rs +++ /dev/null @@ -1,114 +0,0 @@ -//! Buffer types for collecting tokens into. - -use core::{ - array, - ops::{Deref, DerefMut}, - str::FromStr, -}; - -/// A type that can collect [`char`]s on the stack and can be used in conjunction -/// with [`crate::Tokens::parse()`]. -/// -/// # Panics -/// -/// Using `FromIter` to collect more chars than will fit in the buffer will -/// panic. `FromIter` is called implicitly as a result of calling -/// [`crate::Tokens::parse()`]. -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub struct StackString { - buf: [u8; N], - len: usize, -} - -impl StackString { - /// Attempt to push a new [`char`] onto the [`StackString`]. - /// Returns false if there is no more space. - pub fn push(&mut self, val: char) -> bool { - let mut buf = [0; 4]; - let encoded = val.encode_utf8(&mut buf); - - let remaining = N - self.len; - if remaining < encoded.len() { - return false; - } - - for b in encoded.bytes() { - self.buf[self.len] = b; - self.len += 1; - } - true - } -} - -impl Default for StackString { - fn default() -> Self { - Self { - buf: array::from_fn(|_| Default::default()), - len: Default::default(), - } - } -} - -impl FromIterator for StackString { - /// Creates a [`StackString`] from an iterator. - /// - /// # Panics - /// - /// Panics if the iterator is longer than the internal buffer of the [`StackString`]. - fn from_iter>(iter: I) -> Self { - let mut out = Self::default(); - for char in iter.into_iter() { - if !out.push(char) { - panic!("Iterator longer than max buffer length ({N})"); - } - } - out - } -} - -impl FromStr for StackString { - type Err = (); - - fn from_str(s: &str) -> Result { - // String is too big to fit; error. - if s.len() > N { - return Err(()); - } - - // String will fit; manually copy bytes into the array: - let mut out = StackString::default(); - out.buf[..s.len()].copy_from_slice(s.as_bytes()); - out.len = s.len(); - - Ok(out) - } -} - -impl Deref for StackString { - type Target = str; - - /// Dereferences the stack string to a `&str`. - fn deref(&self) -> &Self::Target { - core::str::from_utf8(&self.buf[..self.len]).expect("Valid Utf8") - } -} - -impl DerefMut for StackString { - /// Dereferences the stack string to a `&mut str`. - fn deref_mut(&mut self) -> &mut Self::Target { - core::str::from_utf8_mut(&mut self.buf[..self.len]).expect("Valid Utf8") - } -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn stack_string_from_iter() { - assert_eq!( - StackString::<20>::from_iter("123🗻∈🌏".chars()).to_string(), - "123🗻∈🌏" - ); - } -} diff --git a/src/lib.rs b/src/lib.rs index 4ee6156..2b074bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -115,6 +115,5 @@ assert_eq!(remaining, ",foobar"); mod one_of; mod tokens; -pub mod buffer; pub mod types; pub use tokens::{IntoTokens, TokenLocation, Tokens}; diff --git a/src/tokens.rs b/src/tokens.rs index de23f9c..094e4dd 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -117,14 +117,14 @@ pub trait Tokens: Sized { TokensIter { tokens: self } } - /// Consume the remaining tokens into the buffer type denoted by the second `Buf` generic, and - /// attempt to parse them using [`str::parse`] into the first `Out` generic type. + /// Attempt to parse the remaining tokens into the first `Out` generic using [`str::parse()`]. + /// The second generic type may be used to buffer tokens, and can be any type that implements + /// `FromIterator + Deref`. /// /// If the parsing fails, then no tokens are consumed. /// - /// The buffer type used can be heap allocated (ie `String` would be a common choice) or stack - /// allocated; anything that implements [`core::iter::FromIterator`] and - /// derefs to `str` is allowed. + /// As an optimisation, implementations may choose not to use the provided buffer type if they have a + /// suitable internal buffer of their own already. This is the case for [`crate::types::StrTokens`]. /// /// This is mostly expected to be used in conjunction with [`Tokens::take`] and [`Tokens::take_while`], /// which themselves return the matching [`Tokens`]. @@ -133,18 +133,12 @@ pub trait Tokens: Sized { /// /// ``` /// use yap::{ Tokens, IntoTokens }; - /// use yap::buffer::StackString; /// /// let mut tokens = "123abc456".into_tokens(); /// - /// // The provided buffer::StackString is a bounded stack allocated buffer - /// // which can be used when the maximum number of tokens you'll need to buffer - /// // is known (in this case, 3): - /// let n = tokens.take(3).parse::>().unwrap(); + /// let n = tokens.take(3).parse::().unwrap(); /// assert_eq!(n, 123); /// - /// // A heap allocated type like String can be used when the number of tokens - /// // you'll want to buffer before parsing is not known: /// let s = tokens.take_while(|t| t.is_alphabetic()).parse::().unwrap(); /// assert_eq!(s, "abc".to_string()); /// @@ -156,7 +150,7 @@ pub trait Tokens: Sized { /// let n2 = tokens.parse::().unwrap(); /// assert_eq!(n2, 456); /// ``` - fn parse(&'_ mut self) -> Result::Err> + fn parse(&mut self) -> Result::Err> where Out: FromStr, Buf: FromIterator + Deref, @@ -164,6 +158,54 @@ pub trait Tokens: Sized { self.optional_err(|toks| toks.as_iter().collect::().parse::()) } + /// Attempt to parse the tokens between the `from` and `to` locations into the first `Out` generic, + /// using [`str::parse()`]. The second generic type may be used to buffer tokens, and can be any + /// type that implements `FromIterator + Deref`. + /// + /// If the parsing fails, then no tokens are consumed. + /// + /// As an optimisation, implementations may choose not to use the provided buffer type if they have a + /// suitable internal buffer of their own already. This is the case for [`crate::types::StrTokens`]. + /// + /// See [`Tokens::parse`] for a version of this that is often a little more ergonomic to work with. + /// + /// # Example + /// + /// ``` + /// use yap::{ Tokens, IntoTokens }; + /// + /// let mut tokens = "123abc456".into_tokens(); + /// + /// let from = tokens.location(); + /// + /// // arbitrary complex parsing logic here that consumes some tokens. + /// tokens.take_while(|t| t.is_numeric()).for_each(drop); + /// + /// let to = tokens.location(); + /// + /// // Now, use external logic to parse the matching tokens: + /// let n = tokens.parse_slice::(from, to).unwrap(); + /// + /// assert_eq!(n, 123); + /// assert_eq!(tokens.remaining(), "abc456"); + /// ``` + fn parse_slice( + &mut self, + from: Self::Location, + to: Self::Location, + ) -> Result::Err> + where + Out: FromStr, + Buf: FromIterator + Deref, + { + self.optional_err(|toks| { + toks.slice(from, to) + .as_iter() + .collect::() + .parse::() + }) + } + /// Attach some context to your tokens. The returned struct, [`WithContext`], also implements /// [`Tokens`], and so has can be used in much the same way. Since this consumes your tokens, it's /// better suited to permanent context that you'd like throughout the parsing. diff --git a/src/tokens/slice.rs b/src/tokens/slice.rs index 578e6f5..2e7d52a 100644 --- a/src/tokens/slice.rs +++ b/src/tokens/slice.rs @@ -55,6 +55,29 @@ impl<'a, T: Tokens> Tokens for Slice<'a, T> { fn is_at_location(&self, location: &Self::Location) -> bool { self.tokens.is_at_location(location) } + + // This is an optimisation, because some impls like `StrTokens` can parse things + // more efficiently, so delegate to those impls if they exist rather than using + // the default impls which will buffer tokens first. + fn parse(&mut self) -> Result::Err> + where + Out: core::str::FromStr, + Buf: FromIterator + core::ops::Deref, + { + self.tokens + .parse_slice::(self.from.clone(), self.to.clone()) + } + fn parse_slice( + &mut self, + from: Self::Location, + to: Self::Location, + ) -> Result::Err> + where + Out: core::str::FromStr, + Buf: FromIterator + core::ops::Deref, + { + self.tokens.parse_slice::(from, to) + } } impl<'a, T: Tokens> Drop for Slice<'a, T> { diff --git a/src/types.rs b/src/types.rs index 12a20d8..84411c4 100644 --- a/src/types.rs +++ b/src/types.rs @@ -146,15 +146,39 @@ impl<'a> Tokens for StrTokens<'a> { self.cursor = next_char_boundary; Some(next_char) } + fn location(&self) -> Self::Location { StrTokensLocation(self.cursor) } + fn set_location(&mut self, location: Self::Location) { self.cursor = location.0; } + fn is_at_location(&self, location: &Self::Location) -> bool { self.cursor == location.0 } + + // We can do better than the default impl here; we have a &str that we + // can call parse on without needing to buffer anything, + fn parse(&mut self) -> Result::Err> + where + Out: core::str::FromStr, + Buf: FromIterator + core::ops::Deref, + { + self.optional_err(|toks| toks.remaining().parse()) + } + fn parse_slice( + &mut self, + from: Self::Location, + to: Self::Location, + ) -> Result::Err> + where + Out: core::str::FromStr, + Buf: FromIterator + core::ops::Deref, + { + self.optional_err(|toks| toks.str[from.0..to.0].parse()) + } } impl<'a> IntoTokens for StrTokens<'a> { @@ -371,4 +395,42 @@ mod tests { tokens.set_location(loc); assert!(tokens.tokens("hello".chars())); } + + #[test] + fn str_tokens_parse_optimisations_work() { + // This buffer will panic if it's used. + struct BadBuffer; + impl core::iter::FromIterator for BadBuffer { + fn from_iter>(_: T) -> Self { + panic!("FromIterator impl shouldn't be used") + } + } + impl core::ops::Deref for BadBuffer { + type Target = str; + fn deref(&self) -> &Self::Target { + panic!("Deref impl shouldn't be used") + } + } + + let mut tokens = "123abc".into_tokens(); + + // Find locations to the number: + let from = tokens.location(); + tokens.take_while(|t| t.is_numeric()).for_each(drop); + let to = tokens.location(); + + // These shouldn't use the provided buffer, since StrTokens + // can make use of its own internal one: + + let n = tokens + .slice(from.clone(), to.clone()) + .parse::() + .expect("parse worked (1)"); + assert_eq!(n, 123); + + let n = tokens + .parse_slice::(from, to) + .expect("parse worked (2)"); + assert_eq!(n, 123); + } }