optimize all-ascii json string parsing - webauthn_rp

commit 5239f6a43dda53268908438de4449f56910c8405
parent 8300b9ebdfeed0354c33efbf4720577b93b443d2
Author: Zack Newman <zack@philomathiclife.com>
Date:   Sat, 25 Jan 2025 17:54:19 -0700

optimize all-ascii json string parsing

Diffstat:
M Cargo.toml  | 6 +++---
M src/response.rs  | 28 ++++++++++++++++++++++++++++

2 files changed, 31 insertions(+), 3 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
@@ -17,7 +17,7 @@ all-features = true
 rustdoc-args = ["--cfg", "docsrs"]
 
 [dependencies]
-data-encoding = { version = "2.6.0", default-features = false }
+data-encoding = { version = "2.7.0", default-features = false }
 ed25519-dalek = { version = "2.1.1", default-features = false, features = ["fast"] }
 p256 = { version = "0.13.2", default-features = false, features = ["ecdsa"] }
 p384 = { version = "0.13.0", default-features = false, features = ["ecdsa"] }
@@ -25,7 +25,7 @@ precis-profiles = { version = "0.1.11", default-features = false }
 rand = { version = "0.8.5", default-features = false, features = ["std", "std_rng"] }
 rsa = { version = "0.9.7", default-features = false, features = ["sha2"] }
 serde = { version = "1.0.217", default-features = false, features = ["alloc"], optional = true }
-serde_json = { version = "1.0.135", default-features = false, features = ["alloc"], optional = true }
+serde_json = { version = "1.0.137", default-features = false, features = ["alloc"], optional = true }
 url = { version = "2.5.4", default-features = false }
 
 [dev-dependencies]
@@ -33,7 +33,7 @@ data-encoding = { version = "2.6.0", default-features = false, features = ["allo
 ed25519-dalek = { version = "2.1.1", default-features = false, features = ["alloc", "pkcs8"] }
 p256 = { version = "0.13.2", default-features = false, features = ["pem"] }
 p384 = { version = "0.13.0", default-features = false, features = ["pkcs8"] }
-serde_json = { version = "1.0.135", default-features = false, features = ["preserve_order"] }
+serde_json = { version = "1.0.137", default-features = false, features = ["preserve_order"] }
 
 
 ### FEATURES #################################################################
diff --git a/src/response.rs b/src/response.rs
@@ -870,6 +870,7 @@ impl<const R: bool> LimitedVerificationParser<R> {
     /// portion of `val` _after_ the closing quote. The limited verification algorithm is adhered to; thus the
     /// _only_ Unicode scalar values that are allowed (and must) be hex-escaped are U+0000 to U+001F inclusively.
     /// Similarly only `b'\\'` and `b'"'` are allowed (and must) be escaped with `b'\\'`.
+    #[expect(unsafe_code, reason = "comment justifies its correctness")] 
     #[expect(clippy::arithmetic_side_effects, clippy::indexing_slicing, reason = "comments justify their correctness")]
     fn parse_string(val: &[u8]) -> Result<(Cow<'_, str>, &'_ [u8]), CollectedClientDataErr> {
         /// Tracks the state of the current Unicode scalar value that is being parsed.
@@ -891,6 +892,9 @@ impl<const R: bool> LimitedVerificationParser<R> {
         // We parse this as UTF-8 only at the end iff it is not empty. This contains all the potential Unicode scalar
         // values after de-escaping.
         let mut utf8 = Vec::new();
+        // We check for all `u8`s already; thus we might as well check if we encounter a non-ASCII `u8`.
+        // If we don't, then we can rely on `str::from_utf8_unchecked`.
+        let mut all_ascii = true;
         // This tracks the start index of the next slice to add. We add slices iff we encounter the escape character or
         // we return the parsed `Cow` (i.e., encounter an unescaped `b'"'`).
         let mut cur_idx = 0;
@@ -902,6 +906,18 @@ impl<const R: bool> LimitedVerificationParser<R> {
                     match b {
                         b'"' => {
                             if utf8.is_empty() {
+                                if all_ascii {
+                                    // `cur_idx` is 0 or 1. The latter is true iff `val` starts with a
+                                    // `b'\\'` or `b'"'` but contains no other escaped characters.
+                                    let s = &val[cur_idx..counter];
+                                    // SAFETY:
+                                    // `all_ascii` is `false` iff we encountered any `u8` that was not
+                                    // an ASCII `u8`; thus we know `s` is valid ASCII which in turn means
+                                    // it's valid UTF-8.
+                                    let v = unsafe { str::from_utf8_unchecked(s) };
+                                    // `val.len() > counter`, so indexing is fine and overflow cannot happen.
+                                    return Ok((Cow::Borrowed(v), &val[counter + 1..]));
+                                }
                                 // `cur_idx` is 0 or 1. The latter is true iff `val` starts with a
                                 // `b'\\'` or `b'"'` but contains no other escaped characters.
                                 return str::from_utf8(&val[cur_idx..counter])
@@ -909,7 +925,18 @@ impl<const R: bool> LimitedVerificationParser<R> {
                                     // `val.len() > counter`, so indexing is fine and overflow cannot happen.
                                     .map(|v| (Cow::Borrowed(v), &val[counter + 1..]));
                             }
+                            // `val.len() > counter && counter >= cur_idx`, so indexing is fine and overflow
+                            // cannot happen.
                             utf8.extend_from_slice(&val[cur_idx..counter]);
+                            if all_ascii {
+                                // SAFETY:
+                                // `all_ascii` is `false` iff we encountered any `u8` that was not
+                                // an ASCII `u8`; thus we know `utf8` is valid ASCII which in turn means
+                                // it's valid UTF-8.
+                                let v = unsafe { String::from_utf8_unchecked(utf8) };
+                                // `val.len() > counter`, so indexing is fine and overflow cannot happen.
+                                return Ok((Cow::Owned(v), &val[counter + 1..]));
+                            }
                             return String::from_utf8(utf8)
                                 .map_err(CollectedClientDataErr::Utf8Owned)
                                 // `val.len() > counter`, so indexing is fine and overflow cannot happen.
@@ -924,6 +951,7 @@ impl<const R: bool> LimitedVerificationParser<R> {
                         // ASCII Unicode scalar value _never_ appears in multi-code-unit Unicode scalar values; thus we
                         // error immediately.
                         ..=0x1f => return Err(CollectedClientDataErr::InvalidEscapedString),
+                        128.. => all_ascii = false,
                         _ => (),
                     }
                 }

	webauthn_rp WebAuthn Level 3 RP library.
	git clone https://git.philomathiclife.com/repos/webauthn_rp
	Log \| Files \| Refs \| README

M	Cargo.toml	\|	6	+++---
M	src/response.rs	\|	28	++++++++++++++++++++++++++++