From f2806e9ec743bda560dffbad5e4ee750efe9cf72 Mon Sep 17 00:00:00 2001 From: "liuqiang.06" Date: Thu, 19 Dec 2024 11:29:30 +0800 Subject: [PATCH] fix: should validate when parsing into lazyvalue --- fuzz/Cargo.toml | 1 + fuzz/src/lib.rs | 19 ++++++++++++++++--- src/lazyvalue/owned.rs | 17 +++++++++++++++++ src/parser.rs | 26 ++++++++++++++++---------- src/serde/de.rs | 2 +- 5 files changed, 51 insertions(+), 14 deletions(-) diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 02339d7..16f6037 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -16,6 +16,7 @@ faststr = "0.2" libfuzzer-sys = "0.4" serde = { version = "1.0", features = ["derive"] } serde_json = { version = "1.0", features = ["float_roundtrip"] } +simdutf8 = "0.1" sonic-rs = { path = ".." } [[bin]] diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs index 44a9a5a..28138e0 100644 --- a/fuzz/src/lib.rs +++ b/fuzz/src/lib.rs @@ -111,9 +111,22 @@ pub fn sonic_rs_fuzz_data(data: &[u8]) { } } } - Err(_) => { - let _ = from_slice::(data) - .expect_err(&format!("parse invalid json {:?} failed", data)); + Err(e) => { + let _ = from_slice::(data).expect_err(&format!( + "parse invalid json {:?} failed, should return error {e} ", + data + )); + + // LazyValue should return error if the json is invalid + let msg = e.to_string(); + if (msg.starts_with("expected ") || msg.starts_with("EOF")) + && simdutf8::basic::from_utf8(data).is_ok() + { + let _ = from_slice::(data).expect_err(&format!( + "parse invalid json {:?} failed, should return error {msg}", + data + )); + } } } diff --git a/src/lazyvalue/owned.rs b/src/lazyvalue/owned.rs index 6f316da..c97719c 100644 --- a/src/lazyvalue/owned.rs +++ b/src/lazyvalue/owned.rs @@ -890,4 +890,21 @@ mod test { assert_eq!(to_string(&root).unwrap(), to_string(&root2).unwrap()); assert_eq!(to_string(&root).unwrap(), to_string(&root3).unwrap()); } + + #[test] + fn test_owned_from_invalid() { + for json in [ + r#"", + r#"{"a":"#, + r#"{"a":123"#, + r#"{"a":}"#, + r#"{"a": x}"#, + r#"{"a":1.}"#, + r#"{"a:1.}"#, + r#"{"a" 1}"#, + r#"{"a"[1}}"#, + ] { + crate::from_str::(json).expect_err(json); + } + } } diff --git a/src/parser.rs b/src/parser.rs index 937293e..8c8b2c9 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -581,28 +581,34 @@ where #[inline(always)] pub(crate) fn get_owned_lazyvalue(&mut self, strict: bool) -> Result { let c = self.skip_space(); - let start = self.read.index() - 1; - match c { - Some(b'"') => match self.skip_string()? { - ParseStatus::None => { - let slice = self.read.slice_unchecked(start, self.read.index()); - let raw = unsafe { self.read.slice_ref(slice).as_faststr() }; - return Ok(OwnedLazyValue::from_non_esc_str(raw)); + let start = match c { + Some(b'"') => { + let start = self.read.index() - 1; + match self.skip_string()? { + ParseStatus::None => { + let slice = self.read.slice_unchecked(start, self.read.index()); + let raw = unsafe { self.read.slice_ref(slice).as_faststr() }; + return Ok(OwnedLazyValue::from_non_esc_str(raw)); + } + ParseStatus::HasEscaped => {} } - ParseStatus::HasEscaped => {} - }, + start + } Some(b't') if self.match_literal("rue")? => return Ok(true.into()), Some(b'f') if self.match_literal("alse")? => return Ok(false.into()), Some(b'n') if self.match_literal("ull")? => return Ok(().into()), + None => return perr!(self, EofWhileParsing), _ => { + let start = self.read.index() - 1; self.read.backward(1); if strict { self.skip_one()?; } else { self.skip_one_unchecked()?; } + start } - } + }; let end = self.read.index(); let sub = self.read.slice_unchecked(start, end); let raw = unsafe { self.read.slice_ref(sub).as_faststr() }; diff --git a/src/serde/de.rs b/src/serde/de.rs index 7bc0f12..0b35b81 100644 --- a/src/serde/de.rs +++ b/src/serde/de.rs @@ -373,7 +373,7 @@ impl<'de, R: Reader<'de>> Deserializer { where V: de::Visitor<'de>, { - let val = ManuallyDrop::new(self.parser.get_owned_lazyvalue(false)?); + let val = ManuallyDrop::new(self.parser.get_owned_lazyvalue(true)?); // #Safety // the json is validate before parsing json, and we pass the document using visit_bytes // here.