Skip to content

Commit d24eac3

Browse files
committed
perf(scan): fuse validate_brackets into NEON scanner
Eliminate the separate validate_brackets pass in the NEON scanner by carrying a depth stack inline during emit. This mirrors the scalar scanner's fused scan_and_validate approach. Changes: - Add emit_bits_validate() that validates brackets while emitting - Add scan_tail_validate() for the scalar tail with inline validation - Gate emit_bits, validate_brackets, scan_emit_resume with #[cfg] for AVX2-only (they remain used by the AVX2 scanner) Profile on bench fixtures showed validate_brackets consuming ~30% of scan time on structure-dense payloads (small_api.json). The fusion eliminates this second pass. Closes #25
1 parent 28ad4b7 commit d24eac3

3 files changed

Lines changed: 104 additions & 4 deletions

File tree

src/scan/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ pub(crate) fn find_escape_mask_with_carry(bs: u64, prev_carry: &mut u64) -> u64
8080
}
8181

8282
/// Emit all set-bit positions in `mask` (relative to `base`) into `out`.
83+
#[cfg(all(target_arch = "x86_64", feature = "avx2"))]
8384
#[inline(always)]
8485
pub(crate) fn emit_bits(mut mask: u64, base: u32, out: &mut Vec<u32>) {
8586
while mask != 0 {
@@ -99,6 +100,7 @@ pub(crate) fn emit_bits(mut mask: u64, base: u32, out: &mut Vec<u32>) {
99100
///
100101
/// On the first mismatch, returns `Err(offset_in_buf)`. On unmatched
101102
/// openers at end of input, returns `Err(buf.len())`.
103+
#[cfg(all(target_arch = "x86_64", feature = "avx2"))]
102104
pub(crate) fn validate_brackets(buf: &[u8], indices: &[u32]) -> Result<(), usize> {
103105
let mut stack: Vec<u8> = Vec::with_capacity(32);
104106
let mut in_string = false;

src/scan/neon.rs

Lines changed: 101 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ unsafe fn scan_neon_impl(buf: &[u8], out: &mut Vec<u32>) -> Result<(), usize> {
142142
let mut i = 0usize;
143143
let mut bs_carry: u64 = 0;
144144
let mut in_string: u64 = 0;
145+
let mut stack: Vec<u8> = Vec::with_capacity(32);
145146

146147
while i + 64 <= buf.len() {
147148
let c0 = vld1q_u8(buf.as_ptr().add(i));
@@ -199,23 +200,119 @@ unsafe fn scan_neon_impl(buf: &[u8], out: &mut Vec<u32>) -> Result<(), usize> {
199200

200201
let struct_mask = tag_mask64(t0, t1, t2, t3, TAG_STRUCTURAL);
201202
let final_mask = (struct_mask & !inside) | real_quote;
202-
super::emit_bits(final_mask, i as u32, out);
203+
emit_bits_validate(final_mask, i as u32, buf, out, &mut stack)?;
203204
i += 64;
204205
}
205206

206-
// Tail (<64 bytes): hand off to scalar emit, carrying in_string / bs_carry state.
207+
// Tail (<64 bytes): hand off to scalar, carrying in_string / bs_carry / stack state.
207208
if i < buf.len() {
208209
let scalar_start = if in_string != 0 && bs_carry != 0 {
209210
i + 1
210211
} else {
211212
i
212213
};
213-
super::scalar::scan_emit_resume(buf, scalar_start, in_string != 0, out)?;
214+
scan_tail_validate(buf, scalar_start, in_string != 0, out, &mut stack)?;
214215
} else if in_string != 0 {
215216
return Err(buf.len());
216217
}
217218

218-
super::validate_brackets(buf, out)
219+
if !stack.is_empty() {
220+
return Err(buf.len());
221+
}
222+
Ok(())
223+
}
224+
225+
/// Emit structural offsets and validate brackets inline.
226+
#[inline(always)]
227+
fn emit_bits_validate(
228+
mut mask: u64,
229+
base: u32,
230+
buf: &[u8],
231+
out: &mut Vec<u32>,
232+
stack: &mut Vec<u8>,
233+
) -> Result<(), usize> {
234+
while mask != 0 {
235+
let tz = mask.trailing_zeros();
236+
let pos = base + tz;
237+
out.push(pos);
238+
let b = buf[pos as usize];
239+
match b {
240+
b'{' | b'[' => stack.push(b),
241+
b'}' => {
242+
if stack.pop() != Some(b'{') {
243+
return Err(pos as usize);
244+
}
245+
}
246+
b']' => {
247+
if stack.pop() != Some(b'[') {
248+
return Err(pos as usize);
249+
}
250+
}
251+
_ => {}
252+
}
253+
mask &= mask - 1;
254+
}
255+
Ok(())
256+
}
257+
258+
/// Scalar tail with inline bracket validation, continuing from NEON state.
259+
fn scan_tail_validate(
260+
buf: &[u8],
261+
start: usize,
262+
in_str_init: bool,
263+
out: &mut Vec<u32>,
264+
stack: &mut Vec<u8>,
265+
) -> Result<(), usize> {
266+
let mut i = start;
267+
let mut in_str = in_str_init;
268+
269+
while i < buf.len() {
270+
let b = buf[i];
271+
272+
if in_str {
273+
if b == b'\\' {
274+
i += 2;
275+
continue;
276+
}
277+
if b == b'"' {
278+
in_str = false;
279+
out.push(i as u32);
280+
}
281+
i += 1;
282+
continue;
283+
}
284+
285+
match b {
286+
b'"' => {
287+
in_str = true;
288+
out.push(i as u32);
289+
}
290+
b'{' | b'[' => {
291+
out.push(i as u32);
292+
stack.push(b);
293+
}
294+
b'}' => {
295+
out.push(i as u32);
296+
if stack.pop() != Some(b'{') {
297+
return Err(i);
298+
}
299+
}
300+
b']' => {
301+
out.push(i as u32);
302+
if stack.pop() != Some(b'[') {
303+
return Err(i);
304+
}
305+
}
306+
b':' | b',' => out.push(i as u32),
307+
_ => {}
308+
}
309+
i += 1;
310+
}
311+
312+
if in_str {
313+
return Err(buf.len());
314+
}
315+
Ok(())
219316
}
220317

221318
#[cfg(test)]

src/scan/scalar.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ pub(crate) fn scan_and_validate(buf: &[u8], out: &mut Vec<u32>) -> Result<(), us
4545
/// Used by `ScalarScanner::scan` (with start=0, in_str_init=false) and as
4646
/// the unaligned-tail handler by `Avx2Scanner::scan` (with the carried
4747
/// in-string state from the last AVX2 chunk).
48+
#[cfg(all(target_arch = "x86_64", feature = "avx2"))]
4849
pub(crate) fn scan_emit_resume(
4950
buf: &[u8],
5051
start: usize,

0 commit comments

Comments
 (0)