Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions benches/lua_bench.lua
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,26 @@ end
-- the final image falls through to `math.max(1024, remaining)` — undershoot
-- is at most a few hundred bytes; worst-case overshoot is ~1 KB (only when
-- `remaining < 1024`, which the seed=42 walk does not hit for our ladder).
-- Structure-dense payload: many small key-value pairs with short string values.
-- Targets ~10-12% structural density (vs <0.1% for multimodal payloads).
-- Shape: {"items":[{"k0":"v0","k1":"v1",...}, {...}, ...]}
local function make_dense_payload(target_bytes)
local items = {}
local current = 20 -- outer envelope: {"items":[...]}

while current < target_bytes do
local obj_parts = {}
for i = 0, 19 do
obj_parts[#obj_parts + 1] = string.format('"k%d":"val%d"', i, i)
end
local obj = "{" .. table.concat(obj_parts, ",") .. "}"
items[#items + 1] = obj
current = current + #obj + 1
end

return '{"items":[' .. table.concat(items, ",") .. ']}'
end

local function make_payload(target_bytes)
local rng_state = 42
local function rng_range(lo, hi)
Expand Down Expand Up @@ -97,6 +117,7 @@ end
local scenarios = {
{name = "small", iters = 5000, payload = read_file("benches/fixtures/small_api.json")},
{name = "medium", iters = 500, payload = read_file("benches/fixtures/medium_resp.json")},
{name = "dense-100k", iters = 100, payload = make_dense_payload(100 * 1024)},
{name = "100k", iters = 100, payload = make_payload(100 * 1024)},
{name = "200k", iters = 50, payload = make_payload(200 * 1024)},
{name = "500k", iters = 20, payload = make_payload(500 * 1024)},
Expand Down
2 changes: 2 additions & 0 deletions src/scan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ pub(crate) fn find_escape_mask_with_carry(bs: u64, prev_carry: &mut u64) -> u64
}

/// Emit all set-bit positions in `mask` (relative to `base`) into `out`.
#[cfg(all(target_arch = "x86_64", feature = "avx2"))]
#[inline(always)]
pub(crate) fn emit_bits(mut mask: u64, base: u32, out: &mut Vec<u32>) {
while mask != 0 {
Expand All @@ -99,6 +100,7 @@ pub(crate) fn emit_bits(mut mask: u64, base: u32, out: &mut Vec<u32>) {
///
/// On the first mismatch, returns `Err(offset_in_buf)`. On unmatched
/// openers at end of input, returns `Err(buf.len())`.
#[cfg(all(target_arch = "x86_64", feature = "avx2"))]
pub(crate) fn validate_brackets(buf: &[u8], indices: &[u32]) -> Result<(), usize> {
let mut stack: Vec<u8> = Vec::with_capacity(32);
let mut in_string = false;
Expand Down
105 changes: 101 additions & 4 deletions src/scan/neon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ unsafe fn scan_neon_impl(buf: &[u8], out: &mut Vec<u32>) -> Result<(), usize> {
let mut i = 0usize;
let mut bs_carry: u64 = 0;
let mut in_string: u64 = 0;
let mut stack: Vec<u8> = Vec::with_capacity(32);

while i + 64 <= buf.len() {
let c0 = vld1q_u8(buf.as_ptr().add(i));
Expand Down Expand Up @@ -199,23 +200,119 @@ unsafe fn scan_neon_impl(buf: &[u8], out: &mut Vec<u32>) -> Result<(), usize> {

let struct_mask = tag_mask64(t0, t1, t2, t3, TAG_STRUCTURAL);
let final_mask = (struct_mask & !inside) | real_quote;
super::emit_bits(final_mask, i as u32, out);
emit_bits_validate(final_mask, i as u32, buf, out, &mut stack)?;
i += 64;
}

// Tail (<64 bytes): hand off to scalar emit, carrying in_string / bs_carry state.
// Tail (<64 bytes): hand off to scalar, carrying in_string / bs_carry / stack state.
if i < buf.len() {
let scalar_start = if in_string != 0 && bs_carry != 0 {
i + 1
} else {
i
};
super::scalar::scan_emit_resume(buf, scalar_start, in_string != 0, out)?;
scan_tail_validate(buf, scalar_start, in_string != 0, out, &mut stack)?;
} else if in_string != 0 {
return Err(buf.len());
}

super::validate_brackets(buf, out)
if !stack.is_empty() {
return Err(buf.len());
}
Ok(())
}

/// Emit structural offsets and validate brackets inline.
#[inline(always)]
fn emit_bits_validate(
mut mask: u64,
base: u32,
buf: &[u8],
out: &mut Vec<u32>,
stack: &mut Vec<u8>,
) -> Result<(), usize> {
while mask != 0 {
let tz = mask.trailing_zeros();
let pos = base + tz;
out.push(pos);
let b = buf[pos as usize];
match b {
b'{' | b'[' => stack.push(b),
b'}' => {
if stack.pop() != Some(b'{') {
return Err(pos as usize);
}
}
b']' => {
if stack.pop() != Some(b'[') {
return Err(pos as usize);
}
}
_ => {}
}
mask &= mask - 1;
}
Ok(())
}

/// Scalar tail with inline bracket validation, continuing from NEON state.
fn scan_tail_validate(
buf: &[u8],
start: usize,
in_str_init: bool,
out: &mut Vec<u32>,
stack: &mut Vec<u8>,
) -> Result<(), usize> {
let mut i = start;
let mut in_str = in_str_init;

while i < buf.len() {
let b = buf[i];

if in_str {
if b == b'\\' {
i += 2;
continue;
}
if b == b'"' {
in_str = false;
out.push(i as u32);
}
i += 1;
continue;
}

match b {
b'"' => {
in_str = true;
out.push(i as u32);
}
b'{' | b'[' => {
out.push(i as u32);
stack.push(b);
}
b'}' => {
out.push(i as u32);
if stack.pop() != Some(b'{') {
return Err(i);
}
}
b']' => {
out.push(i as u32);
if stack.pop() != Some(b'[') {
return Err(i);
}
}
b':' | b',' => out.push(i as u32),
_ => {}
}
i += 1;
}

if in_str {
return Err(buf.len());
}
Ok(())
}

#[cfg(test)]
Expand Down
1 change: 1 addition & 0 deletions src/scan/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ pub(crate) fn scan_and_validate(buf: &[u8], out: &mut Vec<u32>) -> Result<(), us
/// Used by `ScalarScanner::scan` (with start=0, in_str_init=false) and as
/// the unaligned-tail handler by `Avx2Scanner::scan` (with the carried
/// in-string state from the last AVX2 chunk).
#[cfg(all(target_arch = "x86_64", feature = "avx2"))]
pub(crate) fn scan_emit_resume(
buf: &[u8],
start: usize,
Expand Down
Loading