Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Attempt to improve the input manipulation formats #48

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 34 additions & 17 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,10 +210,14 @@ pub enum Format {
Repeat1(Box<Format>),
/// Repeat a format an exact number of times
RepeatCount(Expr, Box<Format>),
/// Restrict a format to a sub-stream of a given number of bytes
Slice(Expr, Box<Format>),
/// Matches a format at a byte offset relative to the current stream position
WithRelativeOffset(Expr, Box<Format>),
/// Matches a format without consuming the input
WithInput(Box<Format>),
Comment on lines +213 to +214
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not quite happy with the name of this format.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting, so instead of say Intersection(A, B) there would be Cat(WithInput(A), B) 🤔

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, maybe intersection would be better… not sure!

/// Takes a given number of bytes from the start of the input, matching
/// the format against those bytes
TakeBytes(Expr, Box<Format>),
/// Drops a given number of bytes from the start of the input, matching
/// the format against the remaining input
DropBytes(Expr, Box<Format>),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DropBytes seems unnecessarily parallel to TakeBytes, seems like it doesn't need the Format argument and could just skip N bytes? Whereas TakeBytes does require the Format argument.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh right, TakeBytes can be used to implement something like DropBytes?

def drop-bytes (n : Size) (f : Format) :=
  map _.data {
    skipped <- take-bytes n {},
    data <- f,
  };

Hadn’t thought of that! 🤔

/// Transform a decoded value with a function
Map(Func, Box<Format>),
/// Pattern match on an expression
Expand Down Expand Up @@ -261,8 +265,9 @@ pub enum Decoder {
While(MatchTree, Box<Decoder>),
Until(MatchTree, Box<Decoder>),
RepeatCount(Expr, Box<Decoder>),
Slice(Expr, Box<Decoder>),
WithRelativeOffset(Expr, Box<Decoder>),
WithInput(Box<Decoder>),
TakeBytes(Expr, Box<Decoder>),
DropBytes(Expr, Box<Decoder>),
Map(Func, Box<Decoder>),
Match(Expr, Vec<(Pattern, Decoder)>),
}
Expand Down Expand Up @@ -408,8 +413,9 @@ impl Format {
Format::Repeat(_a) => true,
Format::Repeat1(_a) => false,
Format::RepeatCount(_expr, _a) => true,
Format::Slice(_expr, _a) => true,
Format::WithRelativeOffset(_, _) => true,
Format::WithInput(_) => true,
Format::TakeBytes(_, _) => true,
Format::DropBytes(_, _) => true,
Format::Map(_f, a) => a.is_nullable(),
Format::Match(_, branches) => branches.iter().any(|(_, f)| f.is_nullable()),
}
Expand Down Expand Up @@ -523,10 +529,13 @@ impl<'a> MatchTreeLevel<'a> {
Format::RepeatCount(_expr, _a) => {
self.accept(index) // FIXME
}
Format::Slice(_expr, _a) => {
Format::WithInput(_) => {
self.accept(index) // FIXME
}
Format::WithRelativeOffset(_expr, _a) => {
Format::TakeBytes(_, _) => {
self.accept(index) // FIXME
}
Format::DropBytes(_, _) => {
self.accept(index) // FIXME
}
Format::Map(_f, a) => self.add(index, a, next),
Expand Down Expand Up @@ -684,13 +693,17 @@ impl Decoder {
let da = Box::new(Decoder::compile_next(a, next)?);
Ok(Decoder::RepeatCount(expr.clone(), da))
}
Format::Slice(expr, a) => {
Format::WithInput(a) => {
let da = Box::new(Decoder::compile_next(a, Rc::new(Next::Empty))?);
Ok(Decoder::WithInput(da))
}
Format::TakeBytes(expr, a) => {
let da = Box::new(Decoder::compile_next(a, Rc::new(Next::Empty))?);
Ok(Decoder::Slice(expr.clone(), da))
Ok(Decoder::TakeBytes(expr.clone(), da))
}
Format::WithRelativeOffset(expr, a) => {
Format::DropBytes(expr, a) => {
let da = Box::new(Decoder::compile_next(a, Rc::new(Next::Empty))?);
Ok(Decoder::WithRelativeOffset(expr.clone(), da))
Ok(Decoder::DropBytes(expr.clone(), da))
}
Format::Map(f, a) => {
let da = Box::new(Decoder::compile_next(a, next)?);
Expand Down Expand Up @@ -791,7 +804,11 @@ impl Decoder {
}
Some((Value::Seq(v), input))
}
Decoder::Slice(expr, a) => {
Decoder::WithInput(a) => {
let (v, _) = a.parse(stack, input)?;
Some((v, input))
}
Decoder::TakeBytes(expr, a) => {
let size = expr.eval_usize(stack);
if size <= input.len() {
let (slice, input) = input.split_at(size);
Expand All @@ -801,11 +818,11 @@ impl Decoder {
None
}
}
Decoder::WithRelativeOffset(expr, a) => {
Decoder::DropBytes(expr, a) => {
let offset = expr.eval_usize(stack);
if offset <= input.len() {
let (_, slice) = input.split_at(offset);
let (v, _) = a.parse(stack, slice)?;
let (v, input) = a.parse(stack, slice)?;
Some((v, input))
} else {
None
Expand Down
10 changes: 5 additions & 5 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ fn jpeg_format() -> Format {
("length", u16be()),
(
"data",
Format::Slice(
Format::TakeBytes(
Expr::Sub(
Box::new(Expr::Var(0)), // length
Box::new(Expr::U16(2)),
Expand Down Expand Up @@ -736,7 +736,7 @@ fn png_format() -> Format {
record([
("length", u32be()), // FIXME < 2^31
("tag", tag),
("data", Format::Slice(Expr::Var(1), Box::new(data))),
("data", Format::TakeBytes(Expr::Var(1), Box::new(data))),
("crc", u32be()), // FIXME check this
])
}
Expand Down Expand Up @@ -793,7 +793,7 @@ fn riff_format() -> Format {
record([
("tag", tag),
("length", u32le()),
("data", Format::Slice(Expr::Var(0), Box::new(data))),
("data", Format::TakeBytes(Expr::Var(0), Box::new(data))),
(
"pad",
if_then_else(is_even(Expr::Var(1)), Format::EMPTY, is_byte(0x00)),
Expand Down Expand Up @@ -875,7 +875,7 @@ fn tiff_format() -> Format {
),
(
"ifd",
Format::WithRelativeOffset(
Format::WithInput(Box::new(Format::DropBytes(
// TODO: Offset from start of the TIFF header
Expr::Sub(Box::new(Expr::Var(0)), Box::new(Expr::U32(8))),
Box::new(Format::Match(
Expand All @@ -885,7 +885,7 @@ fn tiff_format() -> Format {
(Pattern::variant("be", Pattern::UNIT), ifd(true)),
],
)),
),
))),
),
])
}
17 changes: 11 additions & 6 deletions src/output/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,9 @@ impl<W: io::Write> Context<W> {
_ => panic!("expected sequence"),
}
}
Format::Slice(_, format) => self.write_decoded_value(value, format),
Format::WithRelativeOffset(_, format) => self.write_decoded_value(value, format),
Format::WithInput(format) => self.write_decoded_value(value, format),
Format::TakeBytes(_, format) => self.write_decoded_value(value, format),
Format::DropBytes(_, format) => self.write_decoded_value(value, format),
Format::Map(Func::Expr(_), _) => self.write_value(value),
Format::Map(Func::TupleProj(index), format) => match format.as_ref() {
Format::Tuple(formats) => self.write_decoded_value(value, &formats[*index]),
Expand Down Expand Up @@ -368,14 +369,18 @@ impl<W: io::Write> Context<W> {
write!(&mut self.writer, " ")?;
self.write_atomic_format(format)
}
Format::Slice(len, format) => {
write!(&mut self.writer, "slice ")?;
Format::WithInput(format) => {
write!(&mut self.writer, "with-input ")?;
self.write_atomic_format(format)
}
Format::TakeBytes(len, format) => {
write!(&mut self.writer, "take-bytes ")?;
self.write_atomic_expr(len)?;
write!(&mut self.writer, " ")?;
self.write_atomic_format(format)
}
Format::WithRelativeOffset(offset, format) => {
write!(&mut self.writer, "with-relative-offset ")?;
Format::DropBytes(offset, format) => {
write!(&mut self.writer, "drop-bytes ")?;
self.write_atomic_expr(offset)?;
write!(&mut self.writer, " ")?;
self.write_atomic_format(format)
Expand Down
18 changes: 9 additions & 9 deletions tests/expected/decode/test.jpg.stdout
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
│ │ └── app0 <- { ... } :=
│ │ ├── marker <- map _.1 (...) := 224
│ │ ├── length <- u16be := 16
│ │ └── data <- slice (length - 2) { ... } :=
│ │ └── data <- take-bytes (length - 2) { ... } :=
│ │ ├── identifier <- map _.string { ... } :=
│ │ │ ├── 0 <- [!= 0] := 74
│ │ │ ├── 1 <- [!= 0] := 70
Expand All @@ -25,7 +25,7 @@
│ │ │ └── dqt <- { ... } :=
│ │ │ ├── marker <- map _.1 (...) := 219
│ │ │ ├── length <- u16be := 67
│ │ │ └── data <- slice (length - 2) { ... } :=
│ │ │ └── data <- take-bytes (length - 2) { ... } :=
│ │ │ ├── precision-table-id <- u8 := 0
│ │ │ └── elements <- repeat u8 :=
│ │ │ ├── 0 <- u8 := 8
Expand All @@ -44,7 +44,7 @@
│ │ └── dqt <- { ... } :=
│ │ ├── marker <- map _.1 (...) := 219
│ │ ├── length <- u16be := 67
│ │ └── data <- slice (length - 2) { ... } :=
│ │ └── data <- take-bytes (length - 2) { ... } :=
│ │ ├── precision-table-id <- u8 := 1
│ │ └── elements <- repeat u8 :=
│ │ ├── 0 <- u8 := 9
Expand All @@ -63,7 +63,7 @@
│ │ └── sof0 <- { ... } :=
│ │ ├── marker <- map _.1 (...) := 192
│ │ ├── length <- u16be := 17
│ │ └── data <- slice (length - 2) { ... } :=
│ │ └── data <- take-bytes (length - 2) { ... } :=
│ │ ├── sample-precision <- u8 := 8
│ │ ├── num-lines <- u16be := 97
│ │ ├── num-samples-per-line <- u16be := 105
Expand All @@ -87,7 +87,7 @@
│ │ │ │ └── dht <- { ... } :=
│ │ │ │ ├── marker <- map _.1 (...) := 196
│ │ │ │ ├── length <- u16be := 27
│ │ │ │ └── data <- slice (length - 2) { ... } :=
│ │ │ │ └── data <- take-bytes (length - 2) { ... } :=
│ │ │ │ ├── class-table-id <- u8 := 0
│ │ │ │ ├── num-codes <- repeat-count 16 u8 :=
│ │ │ │ │ ├── 0 <- u8 := 0
Expand Down Expand Up @@ -115,7 +115,7 @@
│ │ │ │ └── dht <- { ... } :=
│ │ │ │ ├── marker <- map _.1 (...) := 196
│ │ │ │ ├── length <- u16be := 58
│ │ │ │ └── data <- slice (length - 2) { ... } :=
│ │ │ │ └── data <- take-bytes (length - 2) { ... } :=
│ │ │ │ ├── class-table-id <- u8 := 16
│ │ │ │ ├── num-codes <- repeat-count 16 u8 :=
│ │ │ │ │ ├── 0 <- u8 := 0
Expand Down Expand Up @@ -147,7 +147,7 @@
│ │ │ │ └── dht <- { ... } :=
│ │ │ │ ├── marker <- map _.1 (...) := 196
│ │ │ │ ├── length <- u16be := 26
│ │ │ │ └── data <- slice (length - 2) { ... } :=
│ │ │ │ └── data <- take-bytes (length - 2) { ... } :=
│ │ │ │ ├── class-table-id <- u8 := 1
│ │ │ │ ├── num-codes <- repeat-count 16 u8 :=
│ │ │ │ │ ├── 0 <- u8 := 0
Expand All @@ -174,7 +174,7 @@
│ │ │ └── dht <- { ... } :=
│ │ │ ├── marker <- map _.1 (...) := 196
│ │ │ ├── length <- u16be := 38
│ │ │ └── data <- slice (length - 2) { ... } :=
│ │ │ └── data <- take-bytes (length - 2) { ... } :=
│ │ │ ├── class-table-id <- u8 := 17
│ │ │ ├── num-codes <- repeat-count 16 u8 :=
│ │ │ │ ├── 0 <- u8 := 0
Expand Down Expand Up @@ -205,7 +205,7 @@
│ │ ├── sos <- { ... } :=
│ │ │ ├── marker <- map _.1 (...) := 218
│ │ │ ├── length <- u16be := 12
│ │ │ └── data <- slice (length - 2) { ... } :=
│ │ │ └── data <- take-bytes (length - 2) { ... } :=
│ │ │ ├── num-image-components <- u8 := 3
│ │ │ ├── image-components <- repeat-count num-image-components { ... } :=
│ │ │ │ ├── 0 <- { ... } :=
Expand Down
16 changes: 8 additions & 8 deletions tests/expected/decode/test.png.stdout
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
│ │ ├── 1 <- [= 72] := 72
│ │ ├── 2 <- [= 68] := 68
│ │ └── 3 <- [= 82] := 82
│ ├── data <- slice length { ... } :=
│ ├── data <- take-bytes length { ... } :=
│ │ ├── width <- u32be := 50
│ │ ├── height <- u32be := 50
│ │ ├── bit-depth <- u8 := 8
Expand All @@ -33,7 +33,7 @@
│ │ │ ├── 1 <- [= 76] := 76
│ │ │ ├── 2 <- [= 84] := 84
│ │ │ └── 3 <- [= 69] := 69
│ │ ├── data <- slice length (repeat u8) :=
│ │ ├── data <- take-bytes length (repeat u8) :=
│ │ │ ├── 0 <- u8 := 192
│ │ │ ├── 1 <- u8 := 192
│ │ │ ├── 2 <- u8 := 192
Expand All @@ -55,7 +55,7 @@
│ │ │ ├── 1 <- [= 82] := 82
│ │ │ ├── 2 <- [= 78] := 78
│ │ │ └── 3 <- [= 83] := 83
│ │ ├── data <- slice length (repeat u8) :=
│ │ ├── data <- take-bytes length (repeat u8) :=
│ │ │ └── 0 <- u8 := 0
│ │ └── crc <- u32be := 1088870502
│ ├── 2 <- { ... } :=
Expand All @@ -66,7 +66,7 @@
│ │ │ ├── 1 <- [= 75] := 75
│ │ │ ├── 2 <- [= 71] := 71
│ │ │ └── 3 <- [= 68] := 68
│ │ ├── data <- slice length (repeat u8) :=
│ │ ├── data <- take-bytes length (repeat u8) :=
│ │ │ └── 0 <- u8 := 0
│ │ └── crc <- u32be := 2282036552
│ ├── 3 <- { ... } :=
Expand All @@ -77,7 +77,7 @@
│ │ │ ├── 1 <- [= 72] := 72
│ │ │ ├── 2 <- [= 89] := 89
│ │ │ └── 3 <- [= 115] := 115
│ │ ├── data <- slice length (repeat u8) :=
│ │ ├── data <- take-bytes length (repeat u8) :=
│ │ │ ├── 0 <- u8 := 0
│ │ │ ├── 1 <- u8 := 0
│ │ │ ├── 2 <- u8 := 11
Expand All @@ -96,7 +96,7 @@
│ │ ├── 1 <- [= 73] := 73
│ │ ├── 2 <- [= 77] := 77
│ │ └── 3 <- [= 69] := 69
│ ├── data <- slice length (repeat u8) :=
│ ├── data <- take-bytes length (repeat u8) :=
│ │ ├── 0 <- u8 := 7
│ │ ├── 1 <- u8 := 213
│ │ ├── 2 <- u8 := 11
Expand All @@ -113,7 +113,7 @@
│ │ ├── 1 <- [= 68] := 68
│ │ ├── 2 <- [= 65] := 65
│ │ └── 3 <- [= 84] := 84
│ ├── data <- slice length (repeat u8) :=
│ ├── data <- take-bytes length (repeat u8) :=
│ │ ├── 0 <- u8 := 72
│ │ ├── 1 <- u8 := 199
│ │ ├── 2 <- u8 := 165
Expand All @@ -135,5 +135,5 @@
│ ├── 1 <- [= 69] := 69
│ ├── 2 <- [= 78] := 78
│ └── 3 <- [= 68] := 68
├── data <- slice length () := ()
├── data <- take-bytes length () := ()
└── crc <- u32be := 2923585666
8 changes: 4 additions & 4 deletions tests/expected/decode/test.webp.stdout
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
│ ├── 2 <- [= 70] := 70
│ └── 3 <- [= 70] := 70
├── length <- u32le := 1140
├── data <- slice length { ... } :=
├── data <- take-bytes length { ... } :=
│ ├── tag <- (...) :=
│ │ ├── 0 <- u8 := 87
│ │ ├── 1 <- u8 := 69
Expand All @@ -19,7 +19,7 @@
│ │ │ ├── 2 <- u8 := 56
│ │ │ └── 3 <- u8 := 88
│ │ ├── length <- u32le := 10
│ │ ├── data <- slice length (repeat u8) :=
│ │ ├── data <- take-bytes length (repeat u8) :=
│ │ │ ├── 0 <- u8 := 8
│ │ │ ├── 1 <- u8 := 0
│ │ │ ├── 2 <- u8 := 0
Expand All @@ -38,7 +38,7 @@
│ │ │ ├── 2 <- u8 := 56
│ │ │ └── 3 <- u8 := 76
│ │ ├── length <- u32le := 963
│ │ ├── data <- slice length (repeat u8) :=
│ │ ├── data <- take-bytes length (repeat u8) :=
│ │ │ ├── 0 <- u8 := 47
│ │ │ ├── 1 <- u8 := 72
│ │ │ ├── 2 <- u8 := 128
Expand All @@ -59,7 +59,7 @@
│ │ ├── 2 <- u8 := 73
│ │ └── 3 <- u8 := 70
│ ├── length <- u32le := 138
│ ├── data <- slice length (repeat u8) :=
│ ├── data <- take-bytes length (repeat u8) :=
│ │ ├── 0 <- u8 := 69
│ │ ├── 1 <- u8 := 120
│ │ ├── 2 <- u8 := 105
Expand Down
Loading