Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,12 @@ test: build ## Run cargo tests + busted Lua tests
lint: ## Run clippy with -D warnings
cargo clippy --release --all-targets -- -D warnings

bench: build vendor/lua-cjson/cjson.so ## Run the OpenResty LuaJIT benchmark
$(LUA_ENV) $(RESTY) benches/lua_bench.lua
BENCH_SCENARIOS := small medium github-100k 100k 200k 500k 1m 2m 5m 10m interleaved

bench: build vendor/lua-cjson/cjson.so ## Run each scenario in a fresh LuaJIT process
@for s in $(BENCH_SCENARIOS); do \
$(LUA_ENV) $(RESTY) benches/lua_bench.lua $$s; \
done

vendor/lua-cjson/cjson.so: | vendor/lua-cjson/Makefile
ifeq ($(shell uname),Darwin)
Expand Down
49 changes: 28 additions & 21 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,29 +99,36 @@ LD_LIBRARY_PATH="$PWD/target/release" \
## Benchmarks

`qjson` vs. `lua-cjson` and `lua-resty-simdjson` on multimodal
chat-completion payloads, "parse + access model, temperature, and all
messages[*].content paths" workload (median ops/s under OpenResty LuaJIT 2.1,
AMD EPYC Rome (Zen 2, 4 vCPUs); 5 rounds, deterministic payload):
chat-completion payloads (median ops/s under OpenResty LuaJIT 2.1,
AMD EPYC Rome, Zen 2, 4 vCPUs; 5 rounds, deterministic payload).

| Size | cjson | simdjson | `qjson.parse` | `qjson.decode + access content` | speedup vs. cjson |
### Parse + access (read-only)

| Size | cjson | simdjson | `qjson.parse` | `qjson.decode + access` | speedup vs. cjson |
|---:|---:|---:|---:|---:|---:|
| 2 KB | 94,075 | 108,108 | 127,214 | 120,398 | 1.4× / 1.3× |
| 60 KB | 9,041 | 83,043 | 123,487 | 214,500 | 13.7× / 23.7× |
| 100 KB | 5,302 | 32,248 | 109,649 | 102,564 | 20.7× / 19.3× |
| 1 MB | 517 | 3,538 | 16,520 | 16,988 | 32.0× / 32.9× |
| 10 MB | 50 | 402 | 1,899 | 1,918 | 38.0× / 38.4× |

`qjson.parse` wins because it skips building a Lua table for the parts you
never read; `qjson.decode + t.field` adds a cjson-shaped table proxy on top
with similar throughput. Memory retention for `qjson` is essentially
flat in payload size (a few KB for the reusable buffers), while `cjson`
and `simdjson` retain more Lua heap because they materialize the table tree.

See [`docs/benchmarks.md`](docs/benchmarks.md) for the full size ladder,
memory numbers, an "encode round-trip" row (passthrough emit via
`memcpy`), exact environment, and the reproduction command. `make bench`
uses `lua-resty-simdjson` when `resty.simdjson` is available in the
OpenResty environment; otherwise it skips the simdjson rows.
| 2 KB | 92,716 | 102,602 | 128,005 | 125,815 | 1.4× / 1.4× |
| 60 KB | 9,007 | 82,699 | 116,198 | 219,491 | 12.9× / 24.4× |
| 100 KB | 2,769 | 40,437 | 84,034 | 121,803 | 30.3× / 44.0× |
| 1 MB | 512 | 4,020 | 16,056 | 15,400 | 31.4× / 30.1× |
| 10 MB | 51 | 363 | 1,830 | 1,783 | 35.9× / 35.0× |

### Encode (unmodified) + modify-then-re-encode

| Size | encode (unmodified) | modify top (cjson / qjson) | modify nested (cjson / qjson) | speedup vs. cjson |
|---:|---:|---:|---:|---:|
| 2 KB | 219,925 | 59,761 / 56,909 | 61,685 / 49,798 | 1.0× / 0.8× |
| 60 KB | 143,843 | 4,590 / **44,370** | 4,616 / **196,386** | 9.7× / 42.5× |
| 100 KB | 119,617 | 2,645 / **32,712** | 5,263 / **59,809** | 12.4× / 11.4× |
| 1 MB | 16,269 | 241 / **3,108** | 516 / **14,134** | 12.9× / 27.4× |

> **qjson.encode(unmodified)** re-emits the original byte range via `memcpy` —
> no fields touched means zero serializer work.
> **qjson modify+encode** materializes only the mutated subtree; unmodified
> siblings stay on the fast path. cjson always does a full materialize +
> re-serialize on every encode. At 60 KB+, qjson modify+encode is **10–43×**
> faster than the cjson equivalent.
> See [`docs/benchmarks.md`](docs/benchmarks.md) for the full size ladder,
> memory numbers, and environment.

```sh
make bench # qjson vs cjson and lua-resty-simdjson
Expand Down
135 changes: 129 additions & 6 deletions benches/lua_bench.lua
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,10 @@ local ROUNDS = 5
local function bench(name, iters, fn)
-- Warmup pass: lets JIT compile hot traces and any one-time pools fill
-- before measurement starts. Excluded from timing and memory delta.
local warmup = math.max(3, math.floor(iters / 5))
-- Floor at 50: LuaJIT hotloop default is 56, so fewer iterations leave
-- the bench measuring interpreter mode for the large-payload scenarios
Comment thread
coderabbitai[bot] marked this conversation as resolved.
-- (e.g. 500k has iters=100, iters/5=20 → without floor, traces may not compile).
local warmup = math.max(50, math.floor(iters / 5))
Comment on lines +148 to +151
for _ = 1, warmup do fn() end

collectgarbage("collect")
Expand Down Expand Up @@ -220,6 +223,21 @@ local function default_table_access(t)
end
end

local function default_table_modify_top(t)
t.model = "new-model"
t.temperature = 0.0
end

local function default_table_modify_add(t)
t.stream = true
end

local function default_table_modify_nested(t)
if t.messages and qjson.len(t.messages) > 0 then
t.messages[1].content = "modified"
end
end

-- GitHub issues accessors: array of issues, access first issue's fields
local function github_cjson_access(obj)
local _ = obj[1] and obj[1].id
Expand All @@ -239,15 +257,32 @@ local function github_table_access(t)
local _ = t[1] and t[1].user and t[1].user.login
end

local function github_table_modify_top(t)
t[1].title = "modified title"
end

local function github_table_modify_add(t)
if t[1] then
t[1].extra_field = true
end
end
Comment thread
coderabbitai[bot] marked this conversation as resolved.

local function github_table_modify_nested(t)
if t[1] and t[1].user then
t[1].user.login = "modified-user"
end
end

local scenarios = {
{name = "small", iters = 5000, payload = read_file("benches/fixtures/small_api.json")},
{name = "medium", iters = 500, payload = read_file("benches/fixtures/medium_resp.json")},
{name = "github-100k", iters = 100, payload = make_github_issues_payload(100 * 1024),
cjson_access = github_cjson_access, qjson_access = github_qjson_access, table_access = github_table_access},
cjson_access = github_cjson_access, qjson_access = github_qjson_access, table_access = github_table_access,
modify_top = github_table_modify_top, modify_add = github_table_modify_add, modify_nested = github_table_modify_nested},
{name = "100k", iters = 100, payload = make_payload(100 * 1024)},
{name = "200k", iters = 50, payload = make_payload(200 * 1024)},
{name = "500k", iters = 20, payload = make_payload(500 * 1024)},
{name = "1m", iters = 15, payload = make_payload(1024 * 1024)},
{name = "500k", iters = 100, payload = make_payload(500 * 1024)},
{name = "1m", iters = 60, payload = make_payload(1024 * 1024)},
{name = "2m", iters = 20, payload = make_payload(2 * 1024 * 1024)},
{name = "5m", iters = 20, payload = make_payload(5 * 1024 * 1024)},
{name = "10m", iters = 20, payload = make_payload(10 * 1024 * 1024)},
Expand All @@ -258,23 +293,56 @@ local scenarios = {
local has_pooled_api = type(qjson.new_decoder) == "function"
local pooled_decoder = has_pooled_api and qjson.new_decoder() or nil

-- Optional scenario filter: arg[1] = scenario name (e.g. "small").
-- When set, only that single scenario runs in a fresh LuaJIT process,
-- avoiding accumulated GC/JIT state from prior payloads.
local filter = arg[1]

if not simdjson then
print("lua-resty-simdjson unavailable; skipping simdjson rows: "
.. tostring(simdjson_or_err))
end

for _, s in ipairs(scenarios) do
if filter and s.name ~= filter then goto continue_scenario end
Comment on lines +299 to +307
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Fail fast when arg[1] does not match any scenario.

A typo in the filter currently exits with no benchmark rows, which is easy to misread as success in CI/manual runs. Please track whether any scenario matched and error if none did.

Suggested patch
 local filter = arg[1]
+local matched = false
@@
 for _, s in ipairs(scenarios) do
     if filter and s.name ~= filter then goto continue_scenario end
+    matched = true
     print(string.format("=== %s (%d bytes) ===", s.name, `#s.payload`))
@@
 end
+
+if filter and filter ~= "interleaved" and not matched then
+    error("unknown benchmark scenario filter: " .. tostring(filter))
+end

Also applies to: 378-379

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@benches/lua_bench.lua` around lines 299 - 307, The filter check using local
variable "filter" can silently skip all scenarios when arg[1] is mistyped;
modify the loop over "scenarios" that uses "if filter and s.name ~= filter then
goto continue_scenario end" to set a boolean flag (e.g., "matched = true") when
any scenario is executed, and after the loop check that flag and call
error/print and os.exit(1) if no scenario matched; apply the same pattern to the
second loop that uses the same filter logic (the block using the
"continue_scenario" label) so CI fails fast when an invalid filter was supplied.

print(string.format("=== %s (%d bytes) ===", s.name, #s.payload))
Comment on lines 306 to 308

local cjson_access = s.cjson_access or default_cjson_access
local qjson_access = s.qjson_access or default_qjson_access
local table_access = s.table_access or default_table_access
local modify_top = s.modify_top or default_table_modify_top
local modify_add = s.modify_add or default_table_modify_add
local modify_nested = s.modify_nested or default_table_modify_nested

bench("cjson.decode + access fields", s.iters, function()
local obj = cjson.decode(s.payload)
cjson_access(obj)
end)

-- cjson always fully materializes on decode, so modify+encode is the
-- same cost as a full re-encode — useful as a realistic baseline for
-- modify workloads.
bench("cjson.decode + modify top + encode", s.iters, function()
local obj = cjson.decode(s.payload)
modify_top(obj)
local _enc = cjson.encode(obj)
if #_enc < 2 then error("cjson.encode produced too-short result") end
end)

bench("cjson.decode + add field + encode", s.iters, function()
local obj = cjson.decode(s.payload)
modify_add(obj)
local _enc = cjson.encode(obj)
if #_enc < 2 then error("cjson.encode produced too-short result") end
end)

bench("cjson.decode + modify nested + encode", s.iters, function()
local obj = cjson.decode(s.payload)
modify_nested(obj)
local _enc = cjson.encode(obj)
if #_enc < 2 then error("cjson.encode produced too-short result") end
end)

if simdjson then
bench("simdjson.decode + access fields", s.iters, function()
local obj = simdjson:decode(s.payload)
Expand Down Expand Up @@ -307,8 +375,31 @@ for _, s in ipairs(scenarios) do

bench("qjson.decode + qjson.encode (unmodified)", s.iters, function()
local t = qjson.decode(s.payload)
local _ = qjson.encode(t)
local _enc = qjson.encode(t)
if #_enc < 2 then error("qjson.encode produced too-short result") end
end)

bench("qjson.decode + modify top + encode", s.iters, function()
local t = qjson.decode(s.payload)
modify_top(t)
local _enc = qjson.encode(t)
if #_enc < 2 then error("qjson.encode produced too-short result") end
end)

bench("qjson.decode + add field + encode", s.iters, function()
local t = qjson.decode(s.payload)
modify_add(t)
local _enc = qjson.encode(t)
if #_enc < 2 then error("qjson.encode produced too-short result") end
end)

bench("qjson.decode + modify nested + encode", s.iters, function()
local t = qjson.decode(s.payload)
modify_nested(t)
local _enc = qjson.encode(t)
if #_enc < 2 then error("qjson.encode produced too-short result") end
end)
::continue_scenario::
end

-- Interleaved scenario: cycle through several payloads of different sizes
Expand Down Expand Up @@ -338,6 +429,8 @@ local function make_cycler(items)
end
end

if not filter or filter == "interleaved" then

print(string.format("=== interleaved %s ===", table.concat(interleaved_names, ",")))

do
Expand Down Expand Up @@ -384,6 +477,36 @@ do
bench("qjson.decode + qjson.encode (unmodified)", 400, function()
local p = next_p()
local t = qjson.decode(p)
local _ = qjson.encode(t)
local _enc = qjson.encode(t)
if #_enc < 2 then error("qjson.encode produced too-short result") end
end)

next_p = make_cycler(interleaved)
bench("qjson.decode + modify top + encode", 400, function()
local p = next_p()
local t = qjson.decode(p)
default_table_modify_top(t)
local _enc = qjson.encode(t)
if #_enc < 2 then error("qjson.encode produced too-short result") end
end)

next_p = make_cycler(interleaved)
bench("qjson.decode + add field + encode", 400, function()
local p = next_p()
local t = qjson.decode(p)
default_table_modify_add(t)
local _enc = qjson.encode(t)
if #_enc < 2 then error("qjson.encode produced too-short result") end
end)

next_p = make_cycler(interleaved)
bench("qjson.decode + modify nested + encode", 400, function()
local p = next_p()
local t = qjson.decode(p)
default_table_modify_nested(t)
local _enc = qjson.encode(t)
if #_enc < 2 then error("qjson.encode produced too-short result") end
end)
end

end -- filter == "interleaved"
Loading
Loading