diff --git a/Cargo.lock b/Cargo.lock index ee6b204..c7800c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -143,6 +143,21 @@ dependencies = [ "rustc-demangle", ] +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" @@ -574,6 +589,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "fastrand" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" + [[package]] name = "fnv" version = "1.0.7" @@ -610,6 +631,7 @@ dependencies = [ "owo-colors", "parking_lot 0.12.1", "pixel-canvas", + "proptest", "rand", "serde", "thiserror", @@ -873,6 +895,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + [[package]] name = "linux-raw-sys" version = "0.4.13" @@ -1069,6 +1097,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -1259,6 +1288,32 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proptest" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31b476131c3c86cb68032fdc5cb6d5a1045e3e42d96b69fa599fd77701e1f5bf" +dependencies = [ + "bit-set", + "bit-vec", + "bitflags 2.5.0", + "lazy_static", + "num-traits", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax 0.8.4", + "rusty-fork", + "tempfile", + "unarray", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quote" version = "1.0.28" @@ -1298,6 +1353,15 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rand_xorshift" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" +dependencies = [ + "rand_core", +] + [[package]] name = "raw-window-handle" version = "0.4.3" @@ -1345,7 +1409,7 @@ dependencies = [ "aho-corasick", "memchr", "regex-automata", - "regex-syntax", + "regex-syntax 0.7.5", ] [[package]] @@ -1356,7 +1420,7 @@ checksum = "49530408a136e16e5b486e883fbb6ba058e8e4e8ae6621a77b048b314336e629" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.7.5", ] [[package]] @@ -1365,6 +1429,12 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + [[package]] name = "rustc-demangle" version = "0.1.23" @@ -1384,6 +1454,18 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rusty-fork" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" +dependencies = [ + "fnv", + "quick-error", + "tempfile", + "wait-timeout", +] + [[package]] name = "ryu" version = "1.0.18" @@ -1526,6 +1608,18 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36ae8932fcfea38b7d3883ae2ab357b0d57a02caaa18ebb4f5ece08beaec4aa0" +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if 1.0.0", + "fastrand", + "rustix", + "windows-sys 0.52.0", +] + [[package]] name = "terminal_size" version = "0.3.0" @@ -1600,6 +1694,12 @@ dependencies = [ "winnow", ] +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + [[package]] name = "unicode-ident" version = "1.0.9" @@ -1618,6 +1718,15 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "wait-timeout" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" +dependencies = [ + "libc", +] + [[package]] name = "walkdir" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index f818f30..68f5ffb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,6 +51,7 @@ owo-colors = "3.5.0" serde = { version = "1.0.171", features = ["derive"] } toml = "0.7.6" ahash = "0.8.11" +proptest = "1.4.0" [dependencies.clap] version = "4.5" diff --git a/proptest-regressions/parser/mod.txt b/proptest-regressions/parser/mod.txt new file mode 100644 index 0000000..f2395a3 --- /dev/null +++ b/proptest-regressions/parser/mod.txt @@ -0,0 +1,11 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 407ea58689884bfff9c2a39384df853776b4dd55a0122283144837573b6eac0a # shrinks to s = ";" +cc 20080fd2cb666a3ffd533f75b19a24947f138ae0bd2af352127b1f709220013c # shrinks to s = "\"" +cc 4a9f4b7272dadf4249b422581a333ae5645ac9e75d51115272e3af1a90400ad0 # shrinks to s = "\"\\" +cc 87448d7f9120eba78b89edebf33cc84139071f52a0137cdfcbc191274ca6f006 # shrinks to s = "-" +cc 78096578d744f3a6e05d123f95540e1bdab2d2cf815e870f2a65c9b023cd0404 # shrinks to s = "'" diff --git a/src/parser/error.rs b/src/parser/error.rs index 588ca20..d42b3db 100644 --- a/src/parser/error.rs +++ b/src/parser/error.rs @@ -65,6 +65,21 @@ pub enum LexerError { #[error("Expected '{}', but found '{}'", expected.bright_blue(), found.bright_yellow())] UnexpectedChar { expected: char, found: char }, + + #[error("Unknown char '{0}'")] + UnknownChar(char), + + #[error("Unexpected espace sequence '\\{0}'")] + UnexpectedEscapeSequence(char), + + #[error("Expected escape sequence after '\\', found nothing")] + ExpectedEscapeSequence, + + #[error("Expected character, found nothing")] + ExpectedChar, + + #[error("Invalid number '{0}'")] + InvalidNumber(String), } #[derive(Debug, thiserror::Error)] diff --git a/src/parser/lexer.rs b/src/parser/lexer.rs index ad87b38..710affc 100644 --- a/src/parser/lexer.rs +++ b/src/parser/lexer.rs @@ -104,24 +104,26 @@ impl Lexer { } // WARN: assumes the '\' has already been consumed - fn next_escape_sequence(&mut self) -> char { + fn next_escape_sequence(&mut self) -> Result { match self.consume() { - Some('n') => '\n', - Some('t') => '\t', - Some('r') => '\r', - Some('\\') => '\\', - Some('"') => '"', - _ => todo!("Return an Error::InvalidEscapeSequence"), + Some('n') => Ok('\n'), + Some('t') => Ok('\t'), + Some('r') => Ok('\r'), + Some('\\') => Ok('\\'), + Some('"') => Ok('"'), + Some(c) => Err(LexerError::UnexpectedEscapeSequence(c).into()), + None => Err(LexerError::ExpectedEscapeSequence.into()), } } fn next_string_literal(&mut self) -> Result { let mut string = String::new(); expect!(Some('"') = self.consume()); - while self.peek() != Some('"') { + // Consume chars while there are some to consume, but they're not the end of the string yet + while self.peek().is_some() && self.peek() != Some('"') { let mut c = self.consume().unwrap(); if c == '\\' { - c = self.next_escape_sequence(); + c = self.next_escape_sequence()?; } string.push(c); } @@ -132,16 +134,19 @@ impl Lexer { fn next_char_literal(&mut self) -> Result { expect!(Some('\'') = self.consume()); - let mut c = self.consume().unwrap(); + let mut c = match self.consume() { + Some(c) => c, + None => return Err(LexerError::ExpectedChar.into()), + }; if c == '\\' { - c = self.next_escape_sequence(); + c = self.next_escape_sequence()?; } expect!(Some('\'') = self.consume()); Ok(Token::new(Data::CharLiteral(c))) } - fn next_number(&mut self) -> Token { + fn next_number(&mut self) -> Result { let cursor = self.cursor; let mut i = 0; while let Some('-' | '.' | '0'..='9' | 'x' | 'o' | 'a'..='f' | 'A'..='F') = self.peek() { @@ -170,20 +175,20 @@ impl Lexer { }; if res.is_err() { - let fres = slice.parse::(); - if let Ok(mut x) = fres { - if negative { - x = -x; - } - return Token::new(Data::Float(x)); + let mut fres = slice + .parse::() + .map_err(|_| LexerError::InvalidNumber(slice.to_string()))?; + if negative { + fres = -fres; } + return Ok(Token::new(Data::Float(fres))); } let mut x = res.unwrap() as i32; if negative { x = -x; } - Token::new(Data::Integer(x)) + Ok(Token::new(Data::Integer(x))) } } @@ -237,7 +242,7 @@ impl Iterator for Lexer { Some(Ok(Token::new(Data::Char(next_char)).with_ctx(ctx))) } - '-' | '0'..='9' => Some(Ok(self.next_number().with_ctx(ctx))), + '-' | '0'..='9' => Some(self.next_number().with_ctx(ctx)), allowed_identifier!(start) => { let identifier = self.next_identifier(); @@ -255,7 +260,7 @@ impl Iterator for Lexer { } } - _ => panic!("Unimplemented character: {}", next_char), + other => Some(Err(LexerError::UnknownChar(other).into())), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 49bd3ef..af04fb1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -143,6 +143,11 @@ pub fn parse(entry_file: &str, data_segment_size: usize) -> ParseResult { parse_tokens(tokens, data_segment_size) } +pub fn parse_str(content: String, data_segment_size: usize) -> ParseResult { + let tokens = Lexer::from_content(content, "").preprocess().peekable(); + parse_tokens(tokens, data_segment_size) +} + pub fn parse_tokens>>( mut tokens: Peekable, data_segment_size: usize, @@ -261,3 +266,16 @@ fn parse_globl( _ => Err(ParserError::UnexpectedToken(Some(label.data)).with_context(label.ctx)), } } + +#[cfg(test)] +mod tests { + use super::*; + use proptest::prelude::*; + + proptest! { + #[test] + fn parse_doesnt_crash(s in "\\PC*") { + let _ = parse_str(s, 0x100); + } + } +}