diff --git a/src/bin/doodle/main.rs b/src/bin/doodle/main.rs index ca2922ff..ee43daac 100644 --- a/src/bin/doodle/main.rs +++ b/src/bin/doodle/main.rs @@ -5,8 +5,9 @@ use std::fs; use std::path::PathBuf; use clap::{Parser, ValueEnum}; -use doodle::decoder::Compiler; +use doodle::decoder; use doodle::read::ReadCtxt; +use doodle::streamer; use doodle::FormatModule; mod format; @@ -61,7 +62,7 @@ fn main() -> Result<(), Box> { FormatOutput::Debug => println!("{module:?}"), FormatOutput::Json => serde_json::to_writer(std::io::stdout(), &module).unwrap(), FormatOutput::Rust => { - let program = Compiler::compile_program(&module, &format)?; + let program = decoder::Compiler::compile_program(&module, &format)?; doodle::codegen::print_program(&program); } } @@ -71,7 +72,7 @@ fn main() -> Result<(), Box> { Command::File { output, filename } => { let mut module = FormatModule::new(); let format = format::main(&mut module).call(); - let program = Compiler::compile_program(&module, &format)?; + let program = streamer::Compiler::compile_program(&module, &format)?; let input = fs::read(filename)?; let (value, _) = program.run(ReadCtxt::new(&input))?; diff --git a/src/decoder.rs b/src/decoder.rs index 5bcc5baa..e78a08b5 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -67,7 +67,7 @@ impl Value { } } - fn unwrap_usize(self) -> usize { + pub fn unwrap_usize(self) -> usize { match self { Value::U8(n) => usize::from(n), Value::U16(n) => usize::from(n), @@ -76,22 +76,21 @@ impl Value { } } - fn unwrap_tuple(self) -> Vec { + pub fn unwrap_tuple(self) -> Vec { match self { Value::Tuple(values) => values, _ => panic!("value is not a tuple"), } } - fn unwrap_bool(self) -> bool { + pub fn unwrap_bool(self) -> bool { match self { Value::Bool(b) => b, _ => panic!("value is not a bool"), } } - #[allow(dead_code)] - fn unwrap_char(self) -> char { + pub fn unwrap_char(self) -> char { match self { Value::Char(c) => c, _ => panic!("value is not a char"), @@ -107,7 +106,7 @@ impl Value { .then_some(pattern_scope) } - fn matches_inner(&self, scope: &mut MultiScope<'_>, pattern: &Pattern) -> bool { + pub fn matches_inner(&self, scope: &mut dyn ScopeBinding, pattern: &Pattern) -> bool { match (pattern, self) { (Pattern::Binding(name), head) => { scope.push(name.clone(), head.clone()); @@ -403,7 +402,7 @@ impl Expr { self.eval(scope).coerce_mapped_value().clone() } - fn eval_lambda<'a>(&self, scope: &'a Scope<'a>, arg: &Value) -> Value { + pub fn eval_lambda<'a>(&self, scope: &'a Scope<'a>, arg: &Value) -> Value { match self { Expr::Lambda(name, expr) => { let child_scope = SingleScope::new(scope, name, arg); @@ -451,7 +450,7 @@ pub struct Program { } impl Program { - fn new() -> Self { + pub fn new() -> Self { let decoders = Vec::new(); Program { decoders } } @@ -696,6 +695,7 @@ pub enum Scope<'a> { Multi(&'a MultiScope<'a>), Single(SingleScope<'a>), Decoder(DecoderScope<'a>), + Other(&'a dyn ScopeLookup), } pub struct MultiScope<'a> { @@ -715,13 +715,24 @@ pub struct DecoderScope<'a> { decoder: Decoder, } -impl<'a> Scope<'a> { +pub trait ScopeLookup { + fn get_value_by_name(&self, name: &str) -> &Value; + fn get_decoder_by_name(&self, name: &str) -> &Decoder; + fn get_bindings(&self, bindings: &mut Vec<(Label, ScopeEntry)>); +} + +pub trait ScopeBinding { + fn push(&mut self, name: Label, v: Value); +} + +impl<'a> ScopeLookup for Scope<'a> { fn get_value_by_name(&self, name: &str) -> &Value { match self { Scope::Empty => panic!("value not found: {name}"), Scope::Multi(multi) => multi.get_value_by_name(name), Scope::Single(single) => single.get_value_by_name(name), Scope::Decoder(decoder) => decoder.parent.get_value_by_name(name), + Scope::Other(other) => other.get_value_by_name(name), } } @@ -731,19 +742,27 @@ impl<'a> Scope<'a> { Scope::Multi(multi) => multi.parent.get_decoder_by_name(name), Scope::Single(single) => single.parent.get_decoder_by_name(name), Scope::Decoder(decoder) => decoder.get_decoder_by_name(name), + Scope::Other(other) => other.get_decoder_by_name(name), } } - pub fn get_bindings(&self, bindings: &mut Vec<(Label, ScopeEntry)>) { + fn get_bindings(&self, bindings: &mut Vec<(Label, ScopeEntry)>) { match self { Scope::Empty => {} Scope::Multi(multi) => multi.get_bindings(bindings), Scope::Single(single) => single.get_bindings(bindings), Scope::Decoder(decoder) => decoder.get_bindings(bindings), + Scope::Other(other) => other.get_bindings(bindings), } } } +impl<'a> ScopeBinding for MultiScope<'a> { + fn push(&mut self, name: Label, v: Value) { + self.entries.push((name, v)); + } +} + impl<'a> MultiScope<'a> { fn new(parent: &'a Scope<'a>) -> MultiScope<'a> { let entries = Vec::new(); @@ -759,10 +778,6 @@ impl<'a> MultiScope<'a> { Value::Record(self.entries) } - pub fn push(&mut self, name: Label, v: Value) { - self.entries.push((name, v)); - } - fn get_value_by_name(&self, name: &str) -> &Value { for (n, v) in self.entries.iter().rev() { if n == name { @@ -807,7 +822,7 @@ impl<'a> SingleScope<'a> { } impl<'a> DecoderScope<'a> { - fn new(parent: &'a Scope<'a>, name: &'a str, decoder: Decoder) -> DecoderScope<'a> { + pub fn new(parent: &'a Scope<'a>, name: &'a str, decoder: Decoder) -> DecoderScope<'a> { DecoderScope { parent, name, @@ -1080,7 +1095,7 @@ impl Decoder { } } -fn value_to_vec_usize(v: &Value) -> Vec { +pub fn value_to_vec_usize(v: &Value) -> Vec { let vs = match v { Value::Seq(vs) => vs, _ => panic!("expected Seq"), @@ -1094,7 +1109,7 @@ fn value_to_vec_usize(v: &Value) -> Vec { .collect::>() } -fn make_huffman_codes(lengths: &[usize]) -> Format { +pub fn make_huffman_codes(lengths: &[usize]) -> Format { let max_length = *lengths.iter().max().unwrap(); let mut bl_count = [0].repeat(max_length + 1); diff --git a/src/error.rs b/src/error.rs index 1d39e0cf..073d2f4e 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,5 +1,5 @@ use crate::byte_set::ByteSet; -use crate::decoder::{Scope, ScopeEntry}; +use crate::decoder::{Scope, ScopeEntry, ScopeLookup}; use crate::read::ReadCtxt; use crate::Label; diff --git a/src/lib.rs b/src/lib.rs index 1f21e2a5..72f41b78 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,6 +16,7 @@ pub mod byte_set; pub mod codegen; pub mod decoder; pub mod error; +pub mod streamer; pub mod output; pub mod prelude; diff --git a/src/output/flat.rs b/src/output/flat.rs index 0af1507b..2f08cfdf 100644 --- a/src/output/flat.rs +++ b/src/output/flat.rs @@ -1,6 +1,6 @@ use std::io; -use crate::decoder::{MultiScope, Scope, SingleScope, Value}; +use crate::decoder::{MultiScope, Scope, ScopeBinding, SingleScope, Value}; use crate::Label; use crate::{Format, FormatModule}; diff --git a/src/output/tree.rs b/src/output/tree.rs index 003d1f7e..127aba19 100644 --- a/src/output/tree.rs +++ b/src/output/tree.rs @@ -1,6 +1,6 @@ use std::{borrow::Cow, fmt, io, ops::Deref, rc::Rc}; -use crate::decoder::{MultiScope, Scope, SingleScope, Value}; +use crate::decoder::{MultiScope, Scope, ScopeBinding, SingleScope, Value}; use crate::Label; use crate::{DynFormat, Expr, Format, FormatModule}; diff --git a/src/streamer.rs b/src/streamer.rs new file mode 100644 index 00000000..9e32fa80 --- /dev/null +++ b/src/streamer.rs @@ -0,0 +1,1846 @@ +use crate::byte_set::ByteSet; +use crate::decoder::{ + self, make_huffman_codes, value_to_vec_usize, Decoder, Scope, ScopeBinding, ScopeEntry, + ScopeLookup, Value, +}; +use crate::error::{ParseError, ParseResult}; +use crate::read::ReadCtxt; +use crate::{DynFormat, Expr, Format, FormatModule, Label, MatchTree, Next, Pattern}; +use std::collections::HashMap; +use std::rc::Rc; + +struct StreamCtxt<'a> { + parent: Option<&'a StreamCtxt<'a>>, + frames: Vec>, + values: Vec, + counters: Vec, +} + +enum StreamFrame<'a> { + Call(CallScope), + Let(LetScope<'a>), + Record(RecordScope), + Pattern(RecordScope), + Decoder(DecoderScope<'a>), +} + +struct CallScope { + args: Vec<(Label, Value)>, +} + +struct LetScope<'a> { + name: &'a str, + value: Value, +} + +struct RecordScope { + record: Vec<(Label, Value)>, +} + +struct DecoderScope<'a> { + name: &'a str, + decoder: Decoder, +} + +impl CallScope { + fn new(args: Vec<(Label, Value)>) -> CallScope { + CallScope { args } + } +} + +impl<'a> LetScope<'a> { + fn new(name: &'a str, value: Value) -> LetScope<'a> { + LetScope { name, value } + } +} + +impl RecordScope { + fn new(num_fields: usize) -> RecordScope { + let record = Vec::with_capacity(num_fields); + RecordScope { record } + } +} + +impl ScopeBinding for RecordScope { + fn push(&mut self, name: Label, v: Value) { + self.record.push((name, v)); + } +} + +impl<'a> DecoderScope<'a> { + fn new(name: &'a str, decoder: Decoder) -> DecoderScope<'a> { + DecoderScope { name, decoder } + } +} + +impl<'a> ScopeLookup for StreamCtxt<'a> { + fn get_value_by_name(&self, name: &str) -> &Value { + for frame in self.frames.iter().rev() { + match frame { + StreamFrame::Call(call_scope) => { + for (n, v) in call_scope.args.iter().rev() { + if n == name { + return v; + } + } + } + StreamFrame::Let(let_scope) => { + if let_scope.name == name { + return &let_scope.value; + } + } + StreamFrame::Record(record_scope) => { + for (n, v) in record_scope.record.iter().rev() { + if n == name { + return v; + } + } + } + StreamFrame::Pattern(pattern_scope) => { + for (n, v) in pattern_scope.record.iter().rev() { + if n == name { + return v; + } + } + } + StreamFrame::Decoder(_decoder_scope) => {} + } + } + match self.parent { + None => panic!("could not get value: {name}"), + Some(parent) => parent.get_value_by_name(name), + } + } + + fn get_decoder_by_name(&self, name: &str) -> &Decoder { + for frame in self.frames.iter().rev() { + match frame { + StreamFrame::Call(_call_scope) => {} + StreamFrame::Let(_let_scope) => {} + StreamFrame::Record(_record_scope) => {} + StreamFrame::Pattern(_pattern_scope) => {} + StreamFrame::Decoder(decoder_scope) => { + if decoder_scope.name == name { + return &decoder_scope.decoder; + } + } + } + } + match self.parent { + None => panic!("could not get decoder: {name}"), + Some(parent) => parent.get_decoder_by_name(name), + } + } + + fn get_bindings(&self, bindings: &mut Vec<(Label, ScopeEntry)>) { + for frame in self.frames.iter().rev() { + match frame { + StreamFrame::Call(call_scope) => { + for (name, value) in call_scope.args.iter().rev() { + bindings.push((name.clone(), ScopeEntry::Value(value.clone()))); + } + } + StreamFrame::Let(let_scope) => { + bindings.push(( + let_scope.name.to_string().into(), + ScopeEntry::Value(let_scope.value.clone()), + )); + } + StreamFrame::Record(record_scope) => { + for (name, value) in record_scope.record.iter().rev() { + bindings.push((name.clone(), ScopeEntry::Value(value.clone()))); + } + } + StreamFrame::Pattern(pattern_scope) => { + for (name, value) in pattern_scope.record.iter().rev() { + bindings.push((name.clone(), ScopeEntry::Value(value.clone()))); + } + } + StreamFrame::Decoder(_decoder_scope) => {} // FIXME + } + } + match self.parent { + None => {} + Some(parent) => parent.get_bindings(bindings), + } + } +} + +#[derive(Clone, Debug)] +pub struct Block { + ops: Vec, +} + +#[derive(Clone, Debug)] +pub enum Target { + Block(usize), + Local(usize), + Forward(usize), + Back(usize), +} + +#[derive(Clone, Debug)] +pub enum Op { + Jump(Target), + Call(usize, Vec<(Label, Expr)>), + Fail, + EndOfInput, + Align(usize), + Byte(ByteSet), + Variant(Label), + Branch(usize), + Parallel(Vec), + MatchTree(MatchTree, Vec), + MatchExpr(Expr, Vec<(Pattern, Target)>), + PushTuple(usize), + TupleField, + PushRecord(usize), + RecordField(Label), + PopRecord, + PushSeq, + SeqItem, + PushPattern, + PopPattern, + PushCounter(Expr), + PopCounter, + DecCounter, + CounterZero(Target, Target), + Lambda(Expr), + LambdaSeqLast(Expr), + If(Target, Target), + Map(Expr), + Compute(Expr), + PushLet(Label, Expr), + PopLet, + PushDynamic(Label, DynFormat), + PopDynamic, + ApplyDynamic(Label), + PushInput, + PushInputSlice(Expr), + PushInputOffset(Expr), + PopInput, + Bits(Block), + Negated(Block), +} + +pub struct Program { + blocks: Vec, +} + +impl Program { + fn new() -> Self { + let blocks = Vec::new(); + Program { blocks } + } + + pub fn run<'input>(&self, input: ReadCtxt<'input>) -> ParseResult<(Value, ReadCtxt<'input>)> { + self.blocks[0].eval_clean(self, None, input) + } +} + +pub struct Compiler<'a> { + module: &'a FormatModule, + program: Program, + decoder_map: HashMap<(usize, Rc>), usize>, + compile_queue: Vec<(&'a Format, Rc>, usize)>, +} + +impl<'a> Compiler<'a> { + fn new(module: &'a FormatModule) -> Self { + let program = Program::new(); + let decoder_map = HashMap::new(); + let compile_queue = Vec::new(); + Compiler { + module, + program, + decoder_map, + compile_queue, + } + } + + pub fn compile_program(module: &FormatModule, format: &Format) -> Result { + let mut compiler = Compiler::new(module); + compiler.queue_compile(format, Rc::new(Next::Empty)); + while let Some((f, next, n)) = compiler.compile_queue.pop() { + let b = compiler.compile_format(f, next)?; + compiler.program.blocks[n] = b; + } + Ok(compiler.program) + } + + fn chain_block(&mut self, mut fst: Block, mut snd: Block) -> Block { + fst.ops.append(&mut snd.ops); + fst + } + + fn queue_compile(&mut self, f: &'a Format, next: Rc>) -> usize { + let n = self.program.blocks.len(); + self.program.blocks.push(Block { ops: vec![] }); + self.compile_queue.push((f, next, n)); + n + } + + pub fn compile_one(format: &Format) -> Result { + let module = FormatModule::new(); + Compiler::compile_program(&module, format) + } + + fn concat_blocks(bs: Vec) -> (Vec, Vec) { + let mut indices = Vec::with_capacity(bs.len()); + let mut offset = 0; + for block in &bs { + let size = block.ops.len() + 1; + indices.push((offset, size)); + offset += size; + } + let mut mbs = Vec::with_capacity(bs.len()); + let mut ops = Vec::new(); + for (index, mut block) in bs.into_iter().enumerate() { + let (offset, _) = indices[index]; + let mut remain = 1; + for (_, size) in &indices[index + 1..] { + remain += size; + } + ops.append(&mut block.ops); + ops.push(Op::Jump(Target::Forward(remain))); + mbs.push(Target::Forward(offset + 1)); + } + (ops, mbs) + } + + fn concat_blocks2(bs: Vec<(Pattern, Block)>) -> (Vec, Vec<(Pattern, Target)>) { + let mut indices = Vec::with_capacity(bs.len()); + let mut offset = 0; + for (_pattern, block) in &bs { + let size = block.ops.len() + 1; + indices.push((offset, size)); + offset += size; + } + let mut mbs = Vec::with_capacity(bs.len()); + let mut ops = Vec::new(); + for (index, (pattern, mut block)) in bs.into_iter().enumerate() { + let (offset, _) = indices[index]; + let mut remain = 1; + for (_, size) in &indices[index + 1..] { + remain += size; + } + ops.append(&mut block.ops); + ops.push(Op::Jump(Target::Forward(remain))); + mbs.push((pattern, Target::Forward(offset + 1))); + } + (ops, mbs) + } + + fn compile_format(&mut self, format: &'a Format, next: Rc>) -> Result { + match format { + Format::ItemVar(level, arg_exprs) => { + let f = self.module.get_format(*level); + let next = if f.depends_on_next(self.module) { + next + } else { + Rc::new(Next::Empty) + }; + let n = if let Some(n) = self.decoder_map.get(&(*level, next.clone())) { + *n + } else { + let n = self.queue_compile(f, next.clone()); + self.decoder_map.insert((*level, next.clone()), n); + n + }; + let arg_names = self.module.get_args(*level); + let mut args = Vec::new(); + for ((name, _type), expr) in Iterator::zip(arg_names.iter(), arg_exprs.iter()) { + args.push((name.clone(), expr.clone())); + } + Ok(Block::op(Op::Call(n, args))) + } + Format::Fail => Ok(Block::op(Op::Fail)), + Format::EndOfInput => Ok(Block::op(Op::EndOfInput)), + Format::Align(n) => Ok(Block::op(Op::Align(*n))), + Format::Byte(bs) => Ok(Block::op(Op::Byte(*bs))), + Format::Variant(label, f) => { + let s = self.compile_format(f, next.clone())?; + Ok(self.chain_block(s, Block::op(Op::Variant(label.clone())))) + } + Format::Union(branches) => { + let mut fs = Vec::with_capacity(branches.len()); + let mut bs = Vec::with_capacity(branches.len()); + for (index, f) in branches.iter().enumerate() { + let mut block = self.compile_format(f, next.clone())?; + block.ops.push(Op::Branch(index)); + bs.push(block); + fs.push(f.clone()); + } + if let Some(tree) = MatchTree::build(self.module, &fs, next.clone()) { + let (mut ops, mbs) = Self::concat_blocks(bs); + ops.insert(0, Op::MatchTree(tree, mbs)); + Ok(Block::ops(ops)) + } else { + Err(format!("cannot build match tree for {:?}", format)) + } + } + Format::UnionNondet(branches) => { + let mut ds = Vec::with_capacity(branches.len()); + for (label, f) in branches { + let s = self.compile_format(f, next.clone())?; + ds.push(self.chain_block(s, Block::op(Op::Variant(label.clone())))); + } + Ok(Block::op(Op::Parallel(ds))) + } + Format::Tuple(fields) => { + let mut chain = Block::op(Op::PushTuple(fields.len())); + let mut fields = fields.iter(); + while let Some(f) = fields.next() { + let next = Rc::new(Next::Tuple(fields.as_slice(), next.clone())); + let s = self.compile_format(f, next)?; + chain = self.chain_block(chain, s); + chain = self.chain_block(chain, Block::op(Op::TupleField)); + } + Ok(chain) + } + Format::Record(fields) => { + let mut chain = Block::op(Op::PushRecord(fields.len())); + let mut fields = fields.iter(); + while let Some((name, f)) = fields.next() { + let next = Rc::new(Next::Record(fields.as_slice(), next.clone())); + let s = self.compile_format(f, next)?; + chain = self.chain_block(chain, s); + chain = self.chain_block(chain, Block::op(Op::RecordField(name.clone()))); + } + chain = self.chain_block(chain, Block::op(Op::PopRecord)); + Ok(chain) + } + Format::Repeat(a) => { + if a.is_nullable(self.module) { + return Err(format!("cannot repeat nullable format: {a:?}")); + } + let astar = Format::Repeat(a.clone()); + let fa = Format::Tuple(vec![(**a).clone(), astar]); + let fb = Format::EMPTY; + if let Some(tree) = MatchTree::build(self.module, &[fa, fb], next.clone()) { + let mut block = self.compile_format(a, Rc::new(Next::Repeat(a, next)))?; + let body_length = block.ops.len(); + block.ops.insert(0, Op::PushSeq); + block.ops.insert( + 1, + Op::MatchTree( + tree, + vec![Target::Forward(1), Target::Forward(body_length + 3)], + ), + ); + block.ops.push(Op::SeqItem); + block.ops.push(Op::Jump(Target::Back(body_length + 2))); + let chain = Block::ops(block.ops); + Ok(chain) + } else { + Err(format!("cannot build match tree for {:?}", format)) + } + } + Format::Repeat1(a) => { + if a.is_nullable(self.module) { + return Err(format!("cannot repeat nullable format: {a:?}")); + } + let astar = Format::Repeat(a.clone()); + let fa = Format::EMPTY; + let fb = Format::Tuple(vec![(**a).clone(), astar]); + if let Some(tree) = MatchTree::build(self.module, &[fa, fb], next.clone()) { + let mut block = self.compile_format(a, Rc::new(Next::Repeat(a, next)))?; + let body_length = block.ops.len(); + block.ops.insert(0, Op::PushSeq); + block.ops.push(Op::SeqItem); + block.ops.push(Op::MatchTree( + tree, + vec![Target::Forward(1), Target::Back(body_length + 1)], + )); + Ok(Block::ops(block.ops)) + } else { + Err(format!("cannot build match tree for {:?}", format)) + } + } + Format::RepeatCount(expr, a) => { + let mut block = self.compile_format(a, next)?; + let body_length = block.ops.len(); + block.ops.insert(0, Op::PushSeq); + block.ops.insert(1, Op::PushCounter(expr.clone())); + block.ops.insert( + 2, + Op::CounterZero(Target::Forward(body_length + 4), Target::Forward(1)), + ); + block.ops.push(Op::SeqItem); + block.ops.push(Op::DecCounter); + block.ops.push(Op::Jump(Target::Back(body_length + 3))); + Ok(Block::ops(block.ops)) + } + Format::RepeatUntilLast(expr, a) => { + let mut block = self.compile_format(a, next)?; + let body_length = block.ops.len(); + block.ops.insert(0, Op::PushSeq); + block.ops.push(Op::SeqItem); + block.ops.push(Op::LambdaSeqLast(expr.clone())); + block + .ops + .push(Op::If(Target::Forward(1), Target::Back(body_length + 2))); + Ok(Block::ops(block.ops)) + } + Format::RepeatUntilSeq(expr, a) => { + let mut block = self.compile_format(a, next)?; + let body_length = block.ops.len(); + block.ops.insert(0, Op::PushSeq); + block.ops.push(Op::SeqItem); + block.ops.push(Op::Lambda(expr.clone())); + block + .ops + .push(Op::If(Target::Forward(1), Target::Back(body_length + 2))); + Ok(Block::ops(block.ops)) + } + Format::Peek(a) => { + let mut chain = Block::op(Op::PushInput); + let s = self.compile_format(a, Rc::new(Next::Empty))?; + chain = self.chain_block(chain, s); + chain = self.chain_block(chain, Block::op(Op::PopInput)); + Ok(chain) + } + Format::PeekNot(a) => { + const MAX_LOOKAHEAD: usize = 1024; + match a.match_bounds(self.module).max { + None => return Err("PeekNot cannot require unbounded lookahead".to_string()), + Some(n) if n > MAX_LOOKAHEAD => { + return Err(format!( + "PeekNot cannot require > {MAX_LOOKAHEAD} bytes lookahead" + )) + } + _ => {} + } + let s = self.compile_format(a, Rc::new(Next::Empty))?; + Ok(Block::ops(vec![ + Op::PushInput, + Op::Negated(s), + Op::PopInput, + ])) + } + Format::Slice(expr, a) => { + let mut chain = Block::op(Op::PushInputSlice(expr.clone())); + let s = self.compile_format(a, Rc::new(Next::Empty))?; + chain = self.chain_block(chain, s); + chain = self.chain_block(chain, Block::op(Op::PopInput)); + Ok(chain) + } + Format::Bits(a) => { + let s = self.compile_format(a, Rc::new(Next::Empty))?; + Ok(Block::op(Op::Bits(s))) + } + Format::WithRelativeOffset(expr, a) => { + let mut chain = Block::op(Op::PushInputOffset(expr.clone())); + let s = self.compile_format(a, Rc::new(Next::Empty))?; + chain = self.chain_block(chain, s); + chain = self.chain_block(chain, Block::op(Op::PopInput)); + Ok(chain) + } + Format::Map(a, expr) => { + let s = self.compile_format(a, next.clone())?; + Ok(self.chain_block(s, Block::op(Op::Map(expr.clone())))) + } + Format::Compute(expr) => Ok(Block::op(Op::Compute(expr.clone()))), + Format::Let(name, expr, a) => { + let mut chain = Block::op(Op::PushLet(name.clone(), expr.clone())); + let s = self.compile_format(a, next.clone())?; + chain = self.chain_block(chain, s); + chain = self.chain_block(chain, Block::op(Op::PopLet)); + Ok(chain) + } + Format::Match(head, branches) => { + let mut bs = Vec::with_capacity(branches.len()); + for (index, (pattern, f)) in branches.iter().enumerate() { + let mut block = self.compile_format(f, next.clone())?; + block.ops.push(Op::Branch(index)); + block.ops.push(Op::PopPattern); + bs.push((pattern.clone(), block)); + } + let (mut ops, mbs) = Self::concat_blocks2(bs); + ops.insert(0, Op::MatchExpr(head.clone(), mbs)); + Ok(Block::ops(ops)) + } + Format::Dynamic(name, dynformat, a) => { + let mut chain = Block::op(Op::PushDynamic(name.clone(), dynformat.clone())); + let s = self.compile_format(a, next.clone())?; + chain = self.chain_block(chain, s); + chain = self.chain_block(chain, Block::op(Op::PopDynamic)); + Ok(chain) + } + Format::Apply(name) => Ok(Block::op(Op::ApplyDynamic(name.clone()))), + } + } +} + +impl Block { + fn op(op: Op) -> Block { + Block { ops: vec![op] } + } + + fn ops(ops: Vec) -> Block { + Block { ops } + } + + fn eval_clean<'input>( + &self, + program: &Program, + parent_scope: Option<&StreamCtxt<'_>>, + input: ReadCtxt<'input>, + ) -> ParseResult<(Value, ReadCtxt<'input>)> { + let mut stack = StreamCtxt { + parent: parent_scope, + frames: Vec::new(), + values: Vec::new(), + counters: Vec::new(), + }; + let mut input_stack = Vec::new(); + self.eval(program, &mut stack, &mut input_stack, input) + } + + fn eval<'a, 'input>( + &'a self, + program: &'a Program, + ctxt: &mut StreamCtxt<'a>, + input_stack: &mut Vec>, + mut input: ReadCtxt<'input>, + ) -> ParseResult<(Value, ReadCtxt<'input>)> { + let mut block = self; + 'blockloop: loop { + match block { + Block { ops } => { + let mut start = 0; + match 'oploop: loop { + for (ip, op) in ops[start..].iter().enumerate() { + match op { + Op::Jump(target) => match target { + Target::Block(b) => { + block = &program.blocks[*b]; + continue 'blockloop; + } + Target::Local(offset) => { + start = *offset; + continue 'oploop; + } + Target::Forward(offset) => { + start = start + ip + offset; + continue 'oploop; + } + Target::Back(offset) => { + start = start + ip - offset; + continue 'oploop; + } + }, + Op::Call(n, es) => { + let scope = &Scope::Other(ctxt); + let mut args = Vec::with_capacity(es.len()); + for (name, e) in es { + let v = e.eval_value(scope); + args.push((name.clone(), v)); + } + let call_scope = CallScope::new(args); + let mut new_ctxt = StreamCtxt { + parent: None, + frames: vec![StreamFrame::Call(call_scope)], + values: Vec::new(), + counters: Vec::new(), + }; + match program.blocks[*n].eval( + program, + &mut new_ctxt, + input_stack, + input, + ) { + Ok((v, new_input)) => { + ctxt.values.push(v); + input = new_input; + } + Err(err) => break 'oploop Err(err), + } + } + Op::Fail => { + let scope = &Scope::Other(ctxt); + break 'oploop Err(ParseError::fail(scope, input)); + } + Op::EndOfInput => match input.read_byte() { + None => ctxt.values.push(Value::UNIT), + Some((b, _)) => { + break 'oploop Err(ParseError::trailing(b, input.offset)) + } + }, + Op::Align(n) => { + let skip = (n - (input.offset % n)) % n; + match input + .split_at(skip) + .ok_or(ParseError::overrun(skip, input.offset)) + { + Ok((_, new_input)) => { + ctxt.values.push(Value::UNIT); + input = new_input; + } + Err(err) => break 'oploop Err(err), + } + } + Op::Byte(bs) => { + match input + .read_byte() + .ok_or(ParseError::overbyte(input.offset)) + { + Ok((b, new_input)) => { + if bs.contains(b) { + let v = Value::U8(b); + ctxt.values.push(v); + input = new_input; + } else { + break 'oploop Err(ParseError::unexpected( + b, + *bs, + input.offset, + )); + } + } + Err(err) => break 'oploop Err(err), + } + } + Op::Variant(label) => { + let v = ctxt.values.pop().unwrap(); + let v = Value::Variant(label.clone(), Box::new(v)); + ctxt.values.push(v); + } + Op::Branch(index) => { + let v = ctxt.values.pop().unwrap(); + let v = Value::Branch(*index, Box::new(v)); + ctxt.values.push(v); + } + Op::Parallel(branches) => { + match (|| { + for (index, d) in branches.iter().enumerate() { + let res = d.eval_clean(program, Some(ctxt), input); + if let Ok((v, input)) = res { + return Ok(( + Value::Branch(index, Box::new(v)), + input, + )); + } + } + Err(ParseError::fail(&Scope::Other(ctxt), input)) + })() { + Ok((v, new_input)) => { + input = new_input; + ctxt.values.push(v); + } + Err(err) => break 'oploop Err(err), + } + } + Op::MatchTree(tree, bs) => { + match tree.matches(input).ok_or(ParseError::NoValidBranch { + offset: input.offset, + }) { + Ok(index) => match bs[index] { + Target::Block(b) => { + block = &program.blocks[b]; + continue 'blockloop; + } + Target::Local(offset) => { + start = offset; + continue 'oploop; + } + Target::Forward(offset) => { + start = start + ip + offset; + continue 'oploop; + } + Target::Back(offset) => { + start = start + ip - offset; + continue 'oploop; + } + }, + Err(err) => break 'oploop Err(err), + } + } + Op::MatchExpr(head, branches) => { + let scope = &Scope::Other(ctxt); + let head = head.eval(scope); + for (pattern, target) in branches.iter() { + let mut pattern_scope = RecordScope::new(0); + if head + .coerce_mapped_value() + .matches_inner(&mut pattern_scope, pattern) + { + ctxt.frames.push(StreamFrame::Pattern(pattern_scope)); + match target { + Target::Block(b) => { + block = &program.blocks[*b]; + continue 'blockloop; + } + Target::Local(offset) => { + start = *offset; + continue 'oploop; + } + Target::Forward(offset) => { + start = start + ip + offset; + continue 'oploop; + } + Target::Back(offset) => { + start = start + ip - offset; + continue 'oploop; + } + } + } + } + panic!("non-exhaustive patterns"); + } + Op::PushTuple(num_fields) => { + let v = Value::Tuple(Vec::with_capacity(*num_fields)); + ctxt.values.push(v); + } + Op::TupleField => { + let v = ctxt.values.pop().unwrap(); + if let Value::Tuple(ref mut vs) = + ctxt.values.last_mut().unwrap() + { + vs.push(v); + } else { + panic!("expected tuple value"); + } + } + Op::PushRecord(num_fields) => { + let record_scope = RecordScope::new(*num_fields); + ctxt.frames.push(StreamFrame::Record(record_scope)); + } + Op::RecordField(name) => { + let v = ctxt.values.pop().unwrap(); + if let StreamFrame::Record(ref mut record_scope) = + ctxt.frames.last_mut().unwrap() + { + record_scope.record.push((name.clone(), v)); + } else { + panic!("expected record stack frame"); + } + } + Op::PopRecord => { + if let StreamFrame::Record(record_scope) = + ctxt.frames.pop().unwrap() + { + let v = Value::Record(record_scope.record); + ctxt.values.push(v); + } else { + panic!("expected record stack frame"); + } + } + Op::PushSeq => { + let v = Value::Seq(Vec::new()); + ctxt.values.push(v); + } + Op::SeqItem => { + let v = ctxt.values.pop().unwrap(); + if let Value::Seq(ref mut vs) = ctxt.values.last_mut().unwrap() + { + vs.push(v); + } else { + panic!("expected seq value"); + } + } + Op::PushPattern => { + let pattern_scope = RecordScope::new(0); + ctxt.frames.push(StreamFrame::Pattern(pattern_scope)); + } + Op::PopPattern => { + if let StreamFrame::Pattern(_pattern_scope) = + ctxt.frames.pop().unwrap() + { + } else { + panic!("expected pattern stack frame"); + } + } + Op::PushCounter(expr) => { + let scope = &Scope::Other(ctxt); + let n = expr.eval_value(scope).unwrap_usize(); + ctxt.counters.push(n); + } + Op::PopCounter => { + ctxt.counters.pop().unwrap(); + } + Op::DecCounter => { + let mut n = ctxt.counters.pop().unwrap(); + n = n.checked_sub(1).unwrap(); + ctxt.counters.push(n); + } + Op::CounterZero(bzero, bnonzero) => { + let target = if *ctxt.counters.last().unwrap() == 0 { + bzero + } else { + bnonzero + }; + match target { + Target::Block(b) => { + block = &program.blocks[*b]; + continue 'blockloop; + } + Target::Local(offset) => { + start = *offset; + continue 'oploop; + } + Target::Forward(offset) => { + start = start + ip + offset; + continue 'oploop; + } + Target::Back(offset) => { + start = start + ip - offset; + continue 'oploop; + } + } + } + Op::Lambda(expr) => { + let arg = ctxt.values.last().unwrap(); + let scope = &Scope::Other(ctxt); + let v = expr.eval_lambda(scope, arg); + ctxt.values.push(v); + } + Op::LambdaSeqLast(expr) => { + if let Value::Seq(vs) = ctxt.values.last().unwrap() { + let arg = vs.last().unwrap(); + let scope = &Scope::Other(ctxt); + let v = expr.eval_lambda(scope, arg); + ctxt.values.push(v); + } else { + panic!("expected seq"); + } + } + Op::If(btrue, bfalse) => { + let target = if ctxt.values.pop().unwrap().unwrap_bool() { + btrue + } else { + bfalse + }; + match target { + Target::Block(b) => { + block = &program.blocks[*b]; + continue 'blockloop; + } + Target::Local(offset) => { + start = *offset; + continue 'oploop; + } + Target::Forward(offset) => { + start = start + ip + offset; + continue 'oploop; + } + Target::Back(offset) => { + start = start + ip - offset; + continue 'oploop; + } + } + } + Op::Map(expr) => { + let old_v = ctxt.values.pop().unwrap(); + let scope = &Scope::Other(ctxt); + let new_v = expr.eval_lambda(scope, &old_v); + let v = Value::Mapped(Box::new(old_v), Box::new(new_v)); + ctxt.values.push(v); + } + Op::Compute(expr) => { + let scope = &Scope::Other(ctxt); + let v = expr.eval_value(scope); + ctxt.values.push(v); + } + Op::PushLet(name, expr) => { + let scope = &Scope::Other(ctxt); + let v = expr.eval_value(scope); + let let_scope = LetScope::new(name, v); + ctxt.frames.push(StreamFrame::Let(let_scope)); + } + Op::PopLet => { + if let StreamFrame::Let(_let_scope) = ctxt.frames.pop().unwrap() + { + } else { + panic!("expected let scope"); + } + } + Op::PushDynamic( + name, + DynFormat::Huffman(lengths_expr, opt_values_expr), + ) => { + let scope = &Scope::Other(ctxt); + let lengths_val = lengths_expr.eval(scope); + let lengths = value_to_vec_usize(&lengths_val); + let lengths = match opt_values_expr { + None => lengths, + Some(e) => { + let values = value_to_vec_usize(&e.eval(scope)); + let mut new_lengths = [0].repeat(values.len()); + for i in 0..lengths.len() { + new_lengths[values[i]] = lengths[i]; + } + new_lengths + } + }; + let f = make_huffman_codes(&lengths); + let dyn_d = decoder::Compiler::compile_one(&f).unwrap(); + let decoder_scope = DecoderScope::new(name, dyn_d); + ctxt.frames.push(StreamFrame::Decoder(decoder_scope)); + } + Op::PopDynamic => { + if let StreamFrame::Decoder(_decoder_scope) = + ctxt.frames.pop().unwrap() + { + } else { + panic!("expected decoder scope"); + } + } + Op::ApplyDynamic(name) => { + let scope = &Scope::Other(ctxt); + let d = scope.get_decoder_by_name(&name); + match d.parse(&decoder::Program::new(), scope, input) { + Ok((v, new_input)) => { + input = new_input; + ctxt.values.push(v); + } + Err(err) => break 'oploop Err(err), + } + } + Op::PushInput => { + input_stack.push(input.clone()); + } + Op::PushInputSlice(expr) => { + let scope = &Scope::Other(ctxt); + let size = expr.eval_value(scope).unwrap_usize(); + match input + .split_at(size) + .ok_or(ParseError::overrun(size, input.offset)) + { + Ok((slice_input, next_input)) => { + input_stack.push(next_input); + input = slice_input; + } + Err(err) => break 'oploop Err(err), + } + } + Op::PushInputOffset(expr) => { + let scope = &Scope::Other(ctxt); + let offset = expr.eval_value(scope).unwrap_usize(); + match input + .split_at(offset) + .ok_or(ParseError::overrun(offset, input.offset)) + { + Ok((_, offset_input)) => { + input_stack.push(input); + input = offset_input; + } + Err(err) => break 'oploop Err(err), + } + } + Op::PopInput => { + input = input_stack.pop().unwrap(); + } + Op::Bits(s) => { + let mut bits = Vec::with_capacity(input.remaining().len() * 8); + let mut new_input_stack: Vec> = Vec::new(); + for b in input.remaining() { + for i in 0..8 { + bits.push((b & (1 << i)) >> i); + } + } + match s.eval( + program, + ctxt, + &mut new_input_stack, + ReadCtxt::new(&bits), + ) { + Ok((v, bits)) => { + let bytes_remain = bits.remaining().len() >> 3; + let bytes_read = input.remaining().len() - bytes_remain; + let (_, new_input) = + input.split_at(bytes_read).unwrap(); + ctxt.values.push(v); + input = new_input; + } + Err(err) => break 'oploop Err(err), + } + } + Op::Negated(s) => { + if s.eval_clean(program, Some(ctxt), input).is_ok() { + break 'oploop Err(ParseError::fail( + &Scope::Other(ctxt), + input, + )); + } else { + ctxt.values.push(Value::UNIT); + } + } + } + } + break 'oploop Ok(()); + } { + Ok(()) => { + let v = ctxt.values.pop().unwrap(); + return Ok((v, input)); + } + Err(err) => return Err(err), + } + } + } + } + } +} + +#[cfg(test)] +#[allow(clippy::redundant_clone)] +mod tests { + use crate::IntoLabel; + + use super::*; + + fn alts(fields: impl IntoIterator) -> Format { + Format::Union( + (fields.into_iter()) + .map(|(label, format)| Format::Variant(label.into(), Box::new(format))) + .collect(), + ) + } + + fn record(fields: impl IntoIterator) -> Format { + Format::Record( + (fields.into_iter()) + .map(|(label, format)| (label.into(), format)) + .collect(), + ) + } + + fn optional(format: Format) -> Format { + alts([("some", format), ("none", Format::EMPTY)]) + } + + fn repeat(format: Format) -> Format { + Format::Repeat(Box::new(format)) + } + + fn repeat1(format: Format) -> Format { + Format::Repeat1(Box::new(format)) + } + + fn is_byte(b: u8) -> Format { + Format::Byte(ByteSet::from([b])) + } + + fn not_byte(b: u8) -> Format { + Format::Byte(!ByteSet::from([b])) + } + + fn accepts(program: &Program, input: &[u8], tail: &[u8], expect: Value) { + let (val, remain) = program.run(ReadCtxt::new(input)).unwrap(); + assert_eq!(val, expect); + assert_eq!(remain.remaining(), tail); + } + + fn rejects(program: &Program, input: &[u8]) { + assert!(program.run(ReadCtxt::new(input)).is_err()); + } + + #[test] + fn compile_fail() { + let f = Format::Fail; + let d = Compiler::compile_one(&f).unwrap(); + rejects(&d, &[]); + rejects(&d, &[0x00]); + } + + #[test] + fn compile_empty() { + let f = Format::EMPTY; + let d = Compiler::compile_one(&f).unwrap(); + accepts(&d, &[], &[], Value::UNIT); + accepts(&d, &[0x00], &[0x00], Value::UNIT); + } + + #[test] + fn compile_byte_is() { + let f = is_byte(0x00); + let d = Compiler::compile_one(&f).unwrap(); + accepts(&d, &[0x00], &[], Value::U8(0)); + accepts(&d, &[0x00, 0xFF], &[0xFF], Value::U8(0)); + rejects(&d, &[0xFF]); + rejects(&d, &[]); + } + + #[test] + fn compile_byte_not() { + let f = not_byte(0x00); + let d = Compiler::compile_one(&f).unwrap(); + accepts(&d, &[0xFF], &[], Value::U8(0xFF)); + accepts(&d, &[0xFF, 0x00], &[0x00], Value::U8(0xFF)); + rejects(&d, &[0x00]); + rejects(&d, &[]); + } + + #[test] + fn compile_alt() { + let f = alts::<&str>([]); + let d = Compiler::compile_one(&f).unwrap(); + rejects(&d, &[]); + rejects(&d, &[0x00]); + } + + #[test] + fn compile_alt_byte() { + let f = alts([("a", is_byte(0x00)), ("b", is_byte(0xFF))]); + let d = Compiler::compile_one(&f).unwrap(); + accepts( + &d, + &[0x00], + &[], + Value::Branch(0, Box::new(Value::variant("a", Value::U8(0x00)))), + ); + accepts( + &d, + &[0xFF], + &[], + Value::Branch(1, Box::new(Value::variant("b", Value::U8(0xFF)))), + ); + rejects(&d, &[0x11]); + rejects(&d, &[]); + } + + #[test] + fn compile_alt_ambiguous() { + let f = alts([("a", is_byte(0x00)), ("b", is_byte(0x00))]); + assert!(Compiler::compile_one(&f).is_err()); + } + + #[test] + fn compile_alt_slice_byte() { + let slice_a = Format::Slice(Expr::U8(1), Box::new(is_byte(0x00))); + let slice_b = Format::Slice(Expr::U8(1), Box::new(is_byte(0xFF))); + let f = alts([("a", slice_a), ("b", slice_b)]); + let d = Compiler::compile_one(&f).unwrap(); + accepts( + &d, + &[0x00], + &[], + Value::Branch(0, Box::new(Value::variant("a", Value::U8(0x00)))), + ); + accepts( + &d, + &[0xFF], + &[], + Value::Branch(1, Box::new(Value::variant("b", Value::U8(0xFF)))), + ); + rejects(&d, &[0x11]); + rejects(&d, &[]); + } + + #[test] + fn compile_alt_slice_ambiguous1() { + let slice_a = Format::Slice(Expr::U8(1), Box::new(is_byte(0x00))); + let slice_b = Format::Slice(Expr::U8(1), Box::new(is_byte(0x00))); + let f = alts([("a", slice_a), ("b", slice_b)]); + assert!(Compiler::compile_one(&f).is_err()); + } + + #[test] + fn compile_alt_slice_ambiguous2() { + let tuple_a = Format::Tuple(vec![is_byte(0x00), is_byte(0x00)]); + let tuple_b = Format::Tuple(vec![is_byte(0x00), is_byte(0xFF)]); + let slice_a = Format::Slice(Expr::U8(1), Box::new(tuple_a)); + let slice_b = Format::Slice(Expr::U8(1), Box::new(tuple_b)); + let f = alts([("a", slice_a), ("b", slice_b)]); + assert!(Compiler::compile_one(&f).is_err()); + } + + #[test] + fn compile_alt_fail() { + let f = alts([("a", Format::Fail), ("b", Format::Fail)]); + let d = Compiler::compile_one(&f).unwrap(); + rejects(&d, &[]); + } + + #[test] + fn compile_alt_end_of_input() { + let f = alts([("a", Format::EndOfInput), ("b", Format::EndOfInput)]); + assert!(Compiler::compile_one(&f).is_err()); + } + + #[test] + fn compile_alt_empty() { + let f = alts([("a", Format::EMPTY), ("b", Format::EMPTY)]); + assert!(Compiler::compile_one(&f).is_err()); + } + + #[test] + fn compile_alt_fail_end_of_input() { + let f = alts([("a", Format::Fail), ("b", Format::EndOfInput)]); + let d = Compiler::compile_one(&f).unwrap(); + accepts( + &d, + &[], + &[], + Value::Branch(1, Box::new(Value::variant("b", Value::UNIT))), + ); + } + + #[test] + fn compile_alt_end_of_input_or_byte() { + let f = alts([("a", Format::EndOfInput), ("b", is_byte(0x00))]); + let d = Compiler::compile_one(&f).unwrap(); + accepts( + &d, + &[], + &[], + Value::Branch(0, Box::new(Value::variant("a", Value::UNIT))), + ); + accepts( + &d, + &[0x00], + &[], + Value::Branch(1, Box::new(Value::variant("b", Value::U8(0x00)))), + ); + accepts( + &d, + &[0x00, 0x00], + &[0x00], + Value::Branch(1, Box::new(Value::variant("b", Value::U8(0x00)))), + ); + rejects(&d, &[0x11]); + } + + #[test] + fn compile_alt_opt() { + let f = alts([("a", Format::EMPTY), ("b", is_byte(0x00))]); + let d = Compiler::compile_one(&f).unwrap(); + accepts( + &d, + &[0x00], + &[], + Value::Branch(1, Box::new(Value::variant("b", Value::U8(0x00)))), + ); + accepts( + &d, + &[], + &[], + Value::Branch(0, Box::new(Value::variant("a", Value::UNIT))), + ); + accepts( + &d, + &[0xFF], + &[0xFF], + Value::Branch(0, Box::new(Value::variant("a", Value::UNIT))), + ); + } + + #[test] + fn compile_alt_opt_next() { + let f = Format::Tuple(vec![optional(is_byte(0x00)), is_byte(0xFF)]); + let d = Compiler::compile_one(&f).unwrap(); + accepts( + &d, + &[0x00, 0xFF], + &[], + Value::Tuple(vec![ + Value::Branch(0, Box::new(Value::variant("some", Value::U8(0)))), + Value::U8(0xFF), + ]), + ); + accepts( + &d, + &[0xFF], + &[], + Value::Tuple(vec![ + Value::Branch(1, Box::new(Value::variant("none", Value::UNIT))), + Value::U8(0xFF), + ]), + ); + rejects(&d, &[0x00]); + rejects(&d, &[]); + } + + #[test] + fn compile_alt_opt_opt() { + let f = Format::Tuple(vec![optional(is_byte(0x00)), optional(is_byte(0xFF))]); + let d = Compiler::compile_one(&f).unwrap(); + accepts( + &d, + &[0x00, 0xFF], + &[], + Value::Tuple(vec![ + Value::Branch(0, Box::new(Value::variant("some", Value::U8(0)))), + Value::Branch(0, Box::new(Value::variant("some", Value::U8(0xFF)))), + ]), + ); + accepts( + &d, + &[0x00], + &[], + Value::Tuple(vec![ + Value::Branch(0, Box::new(Value::variant("some", Value::U8(0)))), + Value::Branch(1, Box::new(Value::variant("none", Value::UNIT))), + ]), + ); + accepts( + &d, + &[0xFF], + &[], + Value::Tuple(vec![ + Value::Branch(1, Box::new(Value::variant("none", Value::UNIT))), + Value::Branch(0, Box::new(Value::variant("some", Value::U8(0xFF)))), + ]), + ); + accepts( + &d, + &[], + &[], + Value::Tuple(vec![ + Value::Branch(1, Box::new(Value::variant("none", Value::UNIT))), + Value::Branch(1, Box::new(Value::variant("none", Value::UNIT))), + ]), + ); + accepts( + &d, + &[], + &[], + Value::Tuple(vec![ + Value::Branch(1, Box::new(Value::variant("none", Value::UNIT))), + Value::Branch(1, Box::new(Value::variant("none", Value::UNIT))), + ]), + ); + accepts( + &d, + &[0x7F], + &[0x7F], + Value::Tuple(vec![ + Value::Branch(1, Box::new(Value::variant("none", Value::UNIT))), + Value::Branch(1, Box::new(Value::variant("none", Value::UNIT))), + ]), + ); + } + + #[test] + fn compile_alt_opt_ambiguous() { + let f = Format::Tuple(vec![optional(is_byte(0x00)), optional(is_byte(0x00))]); + assert!(Compiler::compile_one(&f).is_err()); + } + + #[test] + fn compile_alt_opt_ambiguous_slow() { + let alt = alts([ + ("0x00", is_byte(0x00)), + ("0x01", is_byte(0x01)), + ("0x02", is_byte(0x02)), + ("0x03", is_byte(0x03)), + ("0x04", is_byte(0x04)), + ("0x05", is_byte(0x05)), + ("0x06", is_byte(0x06)), + ("0x07", is_byte(0x07)), + ]); + let rec = record([ + ("0", alt.clone()), + ("1", alt.clone()), + ("2", alt.clone()), + ("3", alt.clone()), + ("4", alt.clone()), + ("5", alt.clone()), + ("6", alt.clone()), + ("7", alt.clone()), + ]); + let f = alts([("a", rec.clone()), ("b", rec.clone())]); + assert!(Compiler::compile_one(&f).is_err()); + } + + #[test] + fn compile_repeat_alt_repeat1_slow() { + let f = repeat(alts([ + ("a", repeat1(is_byte(0x00))), + ("b", is_byte(0x01)), + ("c", is_byte(0x02)), + ])); + assert!(Compiler::compile_one(&f).is_err()); + } + + #[test] + fn compile_repeat() { + let f = repeat(is_byte(0x00)); + let d = Compiler::compile_one(&f).unwrap(); + accepts(&d, &[], &[], Value::Seq(vec![])); + accepts(&d, &[0xFF], &[0xFF], Value::Seq(vec![])); + accepts(&d, &[0x00], &[], Value::Seq(vec![Value::U8(0x00)])); + accepts( + &d, + &[0x00, 0x00], + &[], + Value::Seq(vec![Value::U8(0x00), Value::U8(0x00)]), + ); + } + + #[test] + fn compile_repeat_repeat() { + let f = repeat(repeat(is_byte(0x00))); + assert!(Compiler::compile_one(&f).is_err()); + } + + #[test] + fn compile_cat_repeat() { + let f = Format::Tuple(vec![repeat(is_byte(0x00)), repeat(is_byte(0xFF))]); + let d = Compiler::compile_one(&f).unwrap(); + accepts( + &d, + &[], + &[], + Value::Tuple(vec![Value::Seq(vec![]), Value::Seq(vec![])]), + ); + accepts( + &d, + &[0x00], + &[], + Value::Tuple(vec![Value::Seq(vec![Value::U8(0x00)]), Value::Seq(vec![])]), + ); + accepts( + &d, + &[0xFF], + &[], + Value::Tuple(vec![Value::Seq(vec![]), Value::Seq(vec![Value::U8(0xFF)])]), + ); + accepts( + &d, + &[0x00, 0xFF], + &[], + Value::Tuple(vec![ + Value::Seq(vec![Value::U8(0x00)]), + Value::Seq(vec![Value::U8(0xFF)]), + ]), + ); + accepts( + &d, + &[0x00, 0xFF, 0x00], + &[0x00], + Value::Tuple(vec![ + Value::Seq(vec![Value::U8(0x00)]), + Value::Seq(vec![Value::U8(0xFF)]), + ]), + ); + accepts( + &d, + &[0x7F], + &[0x7F], + Value::Tuple(vec![Value::Seq(vec![]), Value::Seq(vec![])]), + ); + } + + #[test] + fn compile_cat_end_of_input() { + let f = Format::Tuple(vec![is_byte(0x00), Format::EndOfInput]); + let d = Compiler::compile_one(&f).unwrap(); + accepts( + &d, + &[0x00], + &[], + Value::Tuple(vec![Value::U8(0x00), Value::UNIT]), + ); + rejects(&d, &[]); + rejects(&d, &[0x00, 0x00]); + } + + #[test] + fn compile_cat_repeat_end_of_input() { + let f = Format::Tuple(vec![repeat(is_byte(0x00)), Format::EndOfInput]); + let d = Compiler::compile_one(&f).unwrap(); + accepts( + &d, + &[], + &[], + Value::Tuple(vec![Value::Seq(vec![]), Value::UNIT]), + ); + accepts( + &d, + &[0x00, 0x00, 0x00], + &[], + Value::Tuple(vec![ + Value::Seq(vec![Value::U8(0x00), Value::U8(0x00), Value::U8(0x00)]), + Value::UNIT, + ]), + ); + rejects(&d, &[0x00, 0x10]); + } + + #[test] + fn compile_cat_repeat_ambiguous() { + let f = Format::Tuple(vec![repeat(is_byte(0x00)), repeat(is_byte(0x00))]); + assert!(Compiler::compile_one(&f).is_err()); + } + + #[test] + fn compile_repeat_fields() { + let f = record([ + ("first", repeat(is_byte(0x00))), + ("second", repeat(is_byte(0xFF))), + ("third", repeat(is_byte(0x7F))), + ]); + assert!(Compiler::compile_one(&f).is_ok()); + } + + #[test] + fn compile_repeat_fields_ambiguous() { + let f = record([ + ("first", repeat(is_byte(0x00))), + ("second", repeat(is_byte(0xFF))), + ("third", repeat(is_byte(0x00))), + ]); + assert!(Compiler::compile_one(&f).is_err()); + } + + #[test] + fn compile_repeat_fields_okay() { + let f = record([ + ("first", repeat(is_byte(0x00))), + ( + "second-and-third", + optional(record([ + ( + "second", + Format::Tuple(vec![is_byte(0xFF), repeat(is_byte(0xFF))]), + ), + ("third", repeat(is_byte(0x00))), + ])), + ), + ]); + let d = Compiler::compile_one(&f).unwrap(); + accepts( + &d, + &[], + &[], + Value::record([ + ("first", Value::Seq(vec![])), + ( + "second-and-third", + Value::Branch(1, Box::new(Value::variant("none", Value::UNIT))), + ), + ]), + ); + accepts( + &d, + &[0x00], + &[], + Value::record([ + ("first", Value::Seq(vec![Value::U8(0x00)])), + ( + "second-and-third", + Value::Branch(1, Box::new(Value::variant("none", Value::UNIT))), + ), + ]), + ); + accepts( + &d, + &[0x00, 0xFF], + &[], + Value::record([ + ("first", Value::Seq(vec![Value::U8(0x00)])), + ( + "second-and-third", + Value::Branch( + 0, + Box::new(Value::variant( + "some", + Value::record([ + ( + "second", + Value::Tuple(vec![Value::U8(0xFF), Value::Seq(vec![])]), + ), + ("third", Value::Seq(vec![])), + ]), + )), + ), + ), + ]), + ); + accepts( + &d, + &[0x00, 0xFF, 0x00], + &[], + Value::record(vec![ + ("first", Value::Seq(vec![Value::U8(0x00)])), + ( + "second-and-third", + Value::Branch( + 0, + Box::new(Value::variant( + "some", + Value::record(vec![ + ( + "second", + Value::Tuple(vec![Value::U8(0xFF), Value::Seq(vec![])]), + ), + ("third", Value::Seq(vec![Value::U8(0x00)])), + ]), + )), + ), + ), + ]), + ); + accepts( + &d, + &[0x00, 0x7F], + &[0x7F], + Value::record(vec![ + ("first", Value::Seq(vec![Value::U8(0x00)])), + ( + "second-and-third", + Value::Branch(1, Box::new(Value::variant("none", Value::UNIT))), + ), + ]), + ); + } + + #[test] + fn compile_repeat1() { + let f = repeat1(is_byte(0x00)); + let d = Compiler::compile_one(&f).unwrap(); + rejects(&d, &[]); + rejects(&d, &[0xFF]); + accepts(&d, &[0x00], &[], Value::Seq(vec![Value::U8(0x00)])); + accepts( + &d, + &[0x00, 0xFF], + &[0xFF], + Value::Seq(vec![Value::U8(0x00)]), + ); + accepts( + &d, + &[0x00, 0x00], + &[], + Value::Seq(vec![Value::U8(0x00), Value::U8(0x00)]), + ); + } + + #[test] + fn compile_align1() { + let f = Format::Tuple(vec![is_byte(0x00), Format::Align(1), is_byte(0xFF)]); + let d = Compiler::compile_one(&f).unwrap(); + accepts( + &d, + &[0x00, 0xFF], + &[], + Value::Tuple(vec![Value::U8(0x00), Value::UNIT, Value::U8(0xFF)]), + ); + } + + #[test] + fn compile_align2() { + let f = Format::Tuple(vec![is_byte(0x00), Format::Align(2), is_byte(0xFF)]); + let d = Compiler::compile_one(&f).unwrap(); + rejects(&d, &[0x00, 0xFF]); + rejects(&d, &[0x00, 0x99, 0x99, 0xFF]); + accepts( + &d, + &[0x00, 0x99, 0xFF], + &[], + Value::Tuple(vec![Value::U8(0x00), Value::UNIT, Value::U8(0xFF)]), + ); + } + + #[test] + fn compile_peek_not() { + let any_byte = Format::Byte(ByteSet::full()); + let a = Format::Tuple(vec![is_byte(0xFF), is_byte(0xFF)]); + let peek_not = Format::PeekNot(Box::new(a)); + let f = Format::Tuple(vec![peek_not, any_byte.clone(), any_byte.clone()]); + let d = Compiler::compile_one(&f).unwrap(); + rejects(&d, &[]); + rejects(&d, &[0xFF]); + rejects(&d, &[0xFF, 0xFF]); + accepts( + &d, + &[0x00, 0xFF], + &[], + Value::Tuple(vec![Value::Tuple(vec![]), Value::U8(0x00), Value::U8(0xFF)]), + ); + accepts( + &d, + &[0xFF, 0x00], + &[], + Value::Tuple(vec![Value::Tuple(vec![]), Value::U8(0xFF), Value::U8(0x00)]), + ); + } + + #[test] + fn compile_peek_not_switch() { + let any_byte = Format::Byte(ByteSet::full()); + let guard = Format::PeekNot(Box::new(Format::Tuple(vec![is_byte(0xFF), is_byte(0xFF)]))); + let a = Format::Tuple(vec![guard, Format::Repeat(Box::new(any_byte.clone()))]); + let b = Format::Tuple(vec![is_byte(0xFF), is_byte(0xFF)]); + let f = alts([("a", a), ("b", b)]); + let d = Compiler::compile_one(&f).unwrap(); + accepts( + &d, + &[], + &[], + Value::Branch( + 0, + Box::new(Value::Variant( + "a".into(), + Box::new(Value::Tuple(vec![Value::Tuple(vec![]), Value::Seq(vec![])])), + )), + ), + ); + accepts( + &d, + &[0xFF], + &[], + Value::Branch( + 0, + Box::new(Value::Variant( + "a".into(), + Box::new(Value::Tuple(vec![ + Value::Tuple(vec![]), + Value::Seq(vec![Value::U8(0xFF)]), + ])), + )), + ), + ); + accepts( + &d, + &[0x00, 0xFF], + &[], + Value::Branch( + 0, + Box::new(Value::Variant( + "a".into(), + Box::new(Value::Tuple(vec![ + Value::Tuple(vec![]), + Value::Seq(vec![Value::U8(0x00), Value::U8(0xFF)]), + ])), + )), + ), + ); + accepts( + &d, + &[0xFF, 0x00], + &[], + Value::Branch( + 0, + Box::new(Value::Variant( + "a".into(), + Box::new(Value::Tuple(vec![ + Value::Tuple(vec![]), + Value::Seq(vec![Value::U8(0xFF), Value::U8(0x00)]), + ])), + )), + ), + ); + accepts( + &d, + &[0xFF, 0xFF], + &[], + Value::Branch( + 1, + Box::new(Value::Variant( + "b".into(), + Box::new(Value::Tuple(vec![Value::U8(0xFF), Value::U8(0xFF)])), + )), + ), + ); + } + + #[test] + fn compile_peek_not_lookahead() { + let peek_not = Format::PeekNot(Box::new(repeat1(is_byte(0x00)))); + let any_byte = Format::Byte(ByteSet::full()); + let f = Format::Tuple(vec![peek_not, repeat1(any_byte)]); + assert!(Compiler::compile_one(&f).is_err()); + } +}