Skip to content

Commit

Permalink
add parse_slice and optimise StrTokens to avoid needing buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
jsdw committed Nov 12, 2023
1 parent 1aeccdb commit 2dcc808
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 128 deletions.
114 changes: 0 additions & 114 deletions src/buffer.rs

This file was deleted.

1 change: 0 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,5 @@ assert_eq!(remaining, ",foobar");
mod one_of;
mod tokens;

pub mod buffer;
pub mod types;
pub use tokens::{IntoTokens, TokenLocation, Tokens};
68 changes: 55 additions & 13 deletions src/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,14 @@ pub trait Tokens: Sized {
TokensIter { tokens: self }
}

/// Consume the remaining tokens into the buffer type denoted by the second `Buf` generic, and
/// attempt to parse them using [`str::parse`] into the first `Out` generic type.
/// Attempt to parse the remaining tokens into the first `Out` generic using [`str::parse()`].
/// The second generic type may be used to buffer tokens, and can be any type that implements
/// `FromIterator<Self::Item> + Deref<Target = str>`.
///
/// If the parsing fails, then no tokens are consumed.
///
/// The buffer type used can be heap allocated (ie `String` would be a common choice) or stack
/// allocated; anything that implements [`core::iter::FromIterator`] and
/// derefs to `str` is allowed.
/// As an optimisation, implementations may choose not to use the provided buffer type if they have a
/// suitable internal buffer of their own already. This is the case for [`crate::types::StrTokens`].
///
/// This is mostly expected to be used in conjunction with [`Tokens::take`] and [`Tokens::take_while`],
/// which themselves return the matching [`Tokens`].
Expand All @@ -133,18 +133,12 @@ pub trait Tokens: Sized {
///
/// ```
/// use yap::{ Tokens, IntoTokens };
/// use yap::buffer::StackString;
///
/// let mut tokens = "123abc456".into_tokens();
///
/// // The provided buffer::StackString is a bounded stack allocated buffer
/// // which can be used when the maximum number of tokens you'll need to buffer
/// // is known (in this case, 3):
/// let n = tokens.take(3).parse::<u8, StackString<3>>().unwrap();
/// let n = tokens.take(3).parse::<u8, String>().unwrap();
/// assert_eq!(n, 123);
///
/// // A heap allocated type like String can be used when the number of tokens
/// // you'll want to buffer before parsing is not known:
/// let s = tokens.take_while(|t| t.is_alphabetic()).parse::<String, String>().unwrap();
/// assert_eq!(s, "abc".to_string());
///
Expand All @@ -156,14 +150,62 @@ pub trait Tokens: Sized {
/// let n2 = tokens.parse::<u16, String>().unwrap();
/// assert_eq!(n2, 456);
/// ```
fn parse<Out, Buf>(&'_ mut self) -> Result<Out, <Out as FromStr>::Err>
fn parse<Out, Buf>(&mut self) -> Result<Out, <Out as FromStr>::Err>
where
Out: FromStr,
Buf: FromIterator<Self::Item> + Deref<Target = str>,
{
self.optional_err(|toks| toks.as_iter().collect::<Buf>().parse::<Out>())
}

/// Attempt to parse the tokens between the `from` and `to` locations into the first `Out` generic,
/// using [`str::parse()`]. The second generic type may be used to buffer tokens, and can be any
/// type that implements `FromIterator<Self::Item> + Deref<Target = str>`.
///
/// If the parsing fails, then no tokens are consumed.
///
/// As an optimisation, implementations may choose not to use the provided buffer type if they have a
/// suitable internal buffer of their own already. This is the case for [`crate::types::StrTokens`].
///
/// See [`Tokens::parse`] for a version of this that is often a little more ergonomic to work with.
///
/// # Example
///
/// ```
/// use yap::{ Tokens, IntoTokens };
///
/// let mut tokens = "123abc456".into_tokens();
///
/// let from = tokens.location();
///
/// // arbitrary complex parsing logic here that consumes some tokens.
/// tokens.take_while(|t| t.is_numeric()).for_each(drop);
///
/// let to = tokens.location();
///
/// // Now, use external logic to parse the matching tokens:
/// let n = tokens.parse_slice::<u16, String>(from, to).unwrap();
///
/// assert_eq!(n, 123);
/// assert_eq!(tokens.remaining(), "abc456");
/// ```
fn parse_slice<Out, Buf>(
&mut self,
from: Self::Location,
to: Self::Location,
) -> Result<Out, <Out as FromStr>::Err>
where
Out: FromStr,
Buf: FromIterator<Self::Item> + Deref<Target = str>,
{
self.optional_err(|toks| {
toks.slice(from, to)
.as_iter()
.collect::<Buf>()
.parse::<Out>()
})
}

/// Attach some context to your tokens. The returned struct, [`WithContext`], also implements
/// [`Tokens`], and so has can be used in much the same way. Since this consumes your tokens, it's
/// better suited to permanent context that you'd like throughout the parsing.
Expand Down
23 changes: 23 additions & 0 deletions src/tokens/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,29 @@ impl<'a, T: Tokens> Tokens for Slice<'a, T> {
fn is_at_location(&self, location: &Self::Location) -> bool {
self.tokens.is_at_location(location)
}

// This is an optimisation, because some impls like `StrTokens` can parse things
// more efficiently, so delegate to those impls if they exist rather than using
// the default impls which will buffer tokens first.
fn parse<Out, Buf>(&mut self) -> Result<Out, <Out as core::str::FromStr>::Err>
where
Out: core::str::FromStr,
Buf: FromIterator<Self::Item> + core::ops::Deref<Target = str>,
{
self.tokens
.parse_slice::<Out, Buf>(self.from.clone(), self.to.clone())
}
fn parse_slice<Out, Buf>(
&mut self,
from: Self::Location,
to: Self::Location,
) -> Result<Out, <Out as core::str::FromStr>::Err>
where
Out: core::str::FromStr,
Buf: FromIterator<Self::Item> + core::ops::Deref<Target = str>,
{
self.tokens.parse_slice::<Out, Buf>(from, to)
}
}

impl<'a, T: Tokens> Drop for Slice<'a, T> {
Expand Down
62 changes: 62 additions & 0 deletions src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,15 +146,39 @@ impl<'a> Tokens for StrTokens<'a> {
self.cursor = next_char_boundary;
Some(next_char)
}

fn location(&self) -> Self::Location {
StrTokensLocation(self.cursor)
}

fn set_location(&mut self, location: Self::Location) {
self.cursor = location.0;
}

fn is_at_location(&self, location: &Self::Location) -> bool {
self.cursor == location.0
}

// We can do better than the default impl here; we have a &str that we
// can call parse on without needing to buffer anything,
fn parse<Out, Buf>(&mut self) -> Result<Out, <Out as core::str::FromStr>::Err>
where
Out: core::str::FromStr,
Buf: FromIterator<Self::Item> + core::ops::Deref<Target = str>,
{
self.optional_err(|toks| toks.remaining().parse())
}
fn parse_slice<Out, Buf>(
&mut self,
from: Self::Location,
to: Self::Location,
) -> Result<Out, <Out as core::str::FromStr>::Err>
where
Out: core::str::FromStr,
Buf: FromIterator<Self::Item> + core::ops::Deref<Target = str>,
{
self.optional_err(|toks| toks.str[from.0..to.0].parse())
}
}

impl<'a> IntoTokens<char> for StrTokens<'a> {
Expand Down Expand Up @@ -371,4 +395,42 @@ mod tests {
tokens.set_location(loc);
assert!(tokens.tokens("hello".chars()));
}

#[test]
fn str_tokens_parse_optimisations_work() {
// This buffer will panic if it's used.
struct BadBuffer;
impl core::iter::FromIterator<char> for BadBuffer {
fn from_iter<T: IntoIterator<Item = char>>(_: T) -> Self {
panic!("FromIterator impl shouldn't be used")
}
}
impl core::ops::Deref for BadBuffer {
type Target = str;
fn deref(&self) -> &Self::Target {
panic!("Deref impl shouldn't be used")
}
}

let mut tokens = "123abc".into_tokens();

// Find locations to the number:
let from = tokens.location();
tokens.take_while(|t| t.is_numeric()).for_each(drop);
let to = tokens.location();

// These shouldn't use the provided buffer, since StrTokens
// can make use of its own internal one:

let n = tokens
.slice(from.clone(), to.clone())
.parse::<u16, BadBuffer>()
.expect("parse worked (1)");
assert_eq!(n, 123);

let n = tokens
.parse_slice::<u16, BadBuffer>(from, to)
.expect("parse worked (2)");
assert_eq!(n, 123);
}
}

0 comments on commit 2dcc808

Please sign in to comment.