perf: double magazine capacity to 128 to reduce global pool contention #38

Workflow file for this run

.github/workflows/benchmarks.yml at d040013

	name: Benchmark Matrix

	on:
	push:
	branches: [feature/wire-advanced-features]
	workflow_dispatch:

	jobs:
	build:
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v4
	- uses: cachix/install-nix-action@v27
	with:
	nix_path: nixpkgs=channel:nixos-unstable
	- name: Cache Nix store
	uses: actions/cache@v4
	with:
	path: \|
	~/.cache/nix
	/nix/store
	key: nix-${{ runner.os }}-${{ hashFiles('/Cargo.lock', '/flake.nix', '**/flake.lock') }}
	restore-keys: \|
	nix-${{ runner.os }}-
	- name: Cache Cargo
	uses: actions/cache@v4
	with:
	path: \|
	~/.cargo/registry
	~/.cargo/git
	target
	key: cargo-${{ runner.os }}-${{ hashFiles('**/Cargo.lock') }}
	restore-keys: \|
	cargo-${{ runner.os }}-
	- name: Build
	run: nix build
	- name: Upload artifact
	uses: actions/upload-artifact@v4
	with:
	name: libaethalloc
	path: result/lib/*.so

	benchmark-matrix:
	needs: build
	runs-on: ubuntu-latest
	strategy:
	fail-fast: false
	matrix:
	benchmark:
	- name: packet_churn
	cmd: "/tmp/packet_churn 100000 10000"
	metric: throughput_ops_per_sec
	unit: ops/s
	direction: higher
	- name: multithread_churn
	cmd: "/tmp/multithread_churn 8 100000"
	metric: throughput_ops_per_sec
	unit: ops/s
	direction: higher
	- name: kv_store
	cmd: "/tmp/kv_store"
	metric: throughput_ops_per_sec
	unit: ops/s
	direction: higher
	- name: producer_consumer
	cmd: "/tmp/producer_consumer"
	metric: throughput_ops_per_sec
	unit: ops/s
	direction: higher
	- name: realloc_churn
	cmd: "/tmp/realloc_churn 100000 2"
	metric: latency_ns.avg
	unit: ns
	direction: lower
	- name: realloc_large
	cmd: "/tmp/realloc_large 10000"
	metric: latency_ns.avg
	unit: ns
	direction: lower
	- name: fragmentation_churn
	cmd: "/tmp/fragmentation_churn 50000 10000"
	metric: latency_ns.avg
	unit: ns
	direction: lower
	- name: fragmentation_rss
	cmd: "/tmp/fragmentation"
	metric: summary.final_rss_kb
	unit: KB
	direction: lower
	run_id: [1, 2, 3, 4, 5]
	steps:
	- uses: actions/checkout@v4
	- name: Download artifact
	uses: actions/download-artifact@v4
	with:
	name: libaethalloc
	path: ./lib
	- name: Compile benchmarks
	run: \|
	gcc -O3 -pthread benches/packet_churn.c -o /tmp/packet_churn
	gcc -O3 -pthread benches/kv_store.c -o /tmp/kv_store
	gcc -O3 -pthread benches/producer_consumer.c -o /tmp/producer_consumer
	gcc -O3 -pthread benches/multithread_churn.c -o /tmp/multithread_churn
	gcc -O3 -pthread benches/fragmentation.c -o /tmp/fragmentation
	gcc -O3 -pthread benches/realloc_churn.c -o /tmp/realloc_churn
	gcc -O3 -pthread benches/realloc_large.c -o /tmp/realloc_large
	gcc -O3 -pthread benches/fragmentation_churn.c -o /tmp/fragmentation_churn
	- name: Run glibc baseline
	id: glibc
	run: \|
	RESULT=$(${{ matrix.benchmark.cmd }} 2>&1)
	echo "result<<EOF" >> $GITHUB_OUTPUT
	echo "$RESULT" >> $GITHUB_OUTPUT
	echo "EOF" >> $GITHUB_OUTPUT
	- name: Run aethalloc
	id: aethalloc
	run: \|
	LIB=$(realpath lib/*.so)
	RESULT=$(LD_PRELOAD="$LIB" ${{ matrix.benchmark.cmd }} 2>&1)
	echo "result<<EOF" >> $GITHUB_OUTPUT
	echo "$RESULT" >> $GITHUB_OUTPUT
	echo "EOF" >> $GITHUB_OUTPUT
	- name: Compare
	run: \|
	python3 -c "
	import json, os
	glibc = json.loads(os.environ['GLIBC_RESULT'])
	aeth = json.loads(os.environ['AETH_RESULT'])
	metric_path = os.environ['METRIC'].split('.')
	def get_nested(d, path):
	for key in path:
	if isinstance(d, dict):
	d = d.get(key, 0)
	else:
	return 0
	return d
	glibc_val = get_nested(glibc, metric_path)
	aeth_val = get_nested(aeth, metric_path)
	delta = ((aeth_val - glibc_val) / glibc_val * 100) if glibc_val > 0 else 0
	direction = os.environ['DIRECTION']
	if direction == 'higher':
	emoji = '🟢' if delta > 0 else '🔴' if delta < 0 else '➖'
	else:
	emoji = '🟢' if delta < 0 else '🔴' if delta > 0 else '➖'
	print(f'{emoji} {os.environ[\"BENCH_NAME\"]} run {os.environ[\"RUN_ID\"]}: glibc={glibc_val:,.2f} \| aethalloc={aeth_val:,.2f} \| delta={delta:+.1f}%')
	"
	env:
	GLIBC_RESULT: ${{ steps.glibc.outputs.result }}
	AETH_RESULT: ${{ steps.aethalloc.outputs.result }}
	METRIC: ${{ matrix.benchmark.metric }}
	DIRECTION: ${{ matrix.benchmark.direction }}
	BENCH_NAME: ${{ matrix.benchmark.name }}
	RUN_ID: ${{ matrix.run_id }}

	summarize:
	needs: benchmark-matrix
	runs-on: ubuntu-latest
	if: always()
	steps:
	- uses: actions/checkout@v4
	- name: Download artifact
	uses: actions/download-artifact@v4
	with:
	name: libaethalloc
	path: ./lib
	- name: Compile all benchmarks
	run: \|
	gcc -O3 -pthread benches/packet_churn.c -o /tmp/packet_churn
	gcc -O3 -pthread benches/kv_store.c -o /tmp/kv_store
	gcc -O3 -pthread benches/producer_consumer.c -o /tmp/producer_consumer
	gcc -O3 -pthread benches/multithread_churn.c -o /tmp/multithread_churn
	gcc -O3 -pthread benches/fragmentation.c -o /tmp/fragmentation
	gcc -O3 -pthread benches/realloc_churn.c -o /tmp/realloc_churn
	gcc -O3 -pthread benches/realloc_large.c -o /tmp/realloc_large
	gcc -O3 -pthread benches/fragmentation_churn.c -o /tmp/fragmentation_churn
	gcc -O3 benches/tail_latency.c -o /tmp/tail_latency
	- name: Run full benchmark suite
	run: \|
	python3 << 'PYEOF'
	import subprocess, json, statistics, os

	LIB_PATH = subprocess.check_output("realpath lib/*.so", shell=True).decode().strip()

	benchmarks = [
	("packet_churn", "/tmp/packet_churn 100000 10000", "throughput_ops_per_sec", "ops/s", "higher"),
	("multithread_churn", "/tmp/multithread_churn 8 100000", "throughput_ops_per_sec", "ops/s", "higher"),
	("kv_store", "/tmp/kv_store", "throughput_ops_per_sec", "ops/s", "higher"),
	("producer_consumer", "/tmp/producer_consumer", "throughput_ops_per_sec", "ops/s", "higher"),
	("realloc_churn", "/tmp/realloc_churn 100000 2", "latency_ns.avg", "ns", "lower"),
	("realloc_large", "/tmp/realloc_large 10000", "latency_ns.avg", "ns", "lower"),
	("fragmentation_churn", "/tmp/fragmentation_churn 50000 10000", "latency_ns.avg", "ns", "lower"),
	("fragmentation_rss", "/tmp/fragmentation", "summary.final_rss_kb", "KB", "lower"),
	]

	runs = 5
	summary = "# Benchmark Results\n\n"
	summary += f"System: GitHub Actions ubuntu-latest ({subprocess.check_output('nproc', shell=True).decode().strip()} cores)\n\n"
	summary += f"Runs per benchmark: {runs}\n\n"
	summary += "---\n\n"

	for bench_name, cmd, metric, unit, direction in benchmarks:
	glibc_vals = []
	aeth_vals = []
	for i in range(runs):
	try:
	out = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT, timeout=120).decode()
	d = json.loads(out.strip())
	parts = metric.split(".")
	val = d
	for p in parts:
	val = val.get(p, 0) if isinstance(val, dict) else 0
	glibc_vals.append(val)
	except Exception as e:
	print(f"WARNING: glibc {bench_name} run {i+1} failed: {e}")

	try:
	out = subprocess.check_output(f"LD_PRELOAD={LIB_PATH} {cmd}", shell=True, stderr=subprocess.STDOUT, timeout=120).decode()
	d = json.loads(out.strip())
	parts = metric.split(".")
	val = d
	for p in parts:
	val = val.get(p, 0) if isinstance(val, dict) else 0
	aeth_vals.append(val)
	except Exception as e:
	print(f"WARNING: aethalloc {bench_name} run {i+1} failed: {e}")

	g_mean = statistics.mean(glibc_vals) if glibc_vals else 0
	g_stdev = statistics.stdev(glibc_vals) if len(glibc_vals) > 1 else 0
	a_mean = statistics.mean(aeth_vals) if aeth_vals else 0
	a_stdev = statistics.stdev(aeth_vals) if len(aeth_vals) > 1 else 0
	delta = ((a_mean - g_mean) / g_mean * 100) if g_mean > 0 else 0

	if direction == "higher":
	emoji = "🟢" if delta > 2 else "🔴" if delta < -2 else "➖"
	else:
	emoji = "🟢" if delta < -2 else "🔴" if delta > 2 else "➖"

	summary += f"{emoji} {bench_name}\n"
	if glibc_vals or aeth_vals:
	summary += f"- glibc: {g_mean:,.0f} ± {g_stdev:,.0f} {unit}\n"
	summary += f"- aethalloc: {a_mean:,.0f} ± {a_stdev:,.0f} {unit}\n"
	summary += f"- delta: {delta:+.1f}%\n\n"
	else:
	summary += f"- ⚠️ All runs failed (benchmark may not work on this platform)\n\n"

	# Tail latency
	summary += "---\n\n## Tail Latency (8 threads, 50K ops)\n\n"
	summary += "\| Allocator \| P50 \| P99 \| P99.9 \| P99.99 \| Max \|\n"
	summary += "\|-----------\|-----\|-----\|-------\|--------\|-----\|\n"

	for label, pre in [("glibc", ""), ("AethAlloc", f"LD_PRELOAD={LIB_PATH}")]:
	try:
	out = subprocess.check_output(f"{pre} /tmp/tail_latency 8 50000", shell=True, stderr=subprocess.STDOUT, timeout=120).decode()
	d = json.loads(out.strip())
	lat = d.get("latency_ns", {})
	summary += f"\| {label} \| {lat.get('p50', 0):,.0f}ns \| {lat.get('p99', 0):,.0f}ns \| {lat.get('p99.9', 0):,.0f}ns \| {lat.get('p99.99', 0):,.0f}ns \| {lat.get('max', 0):,.0f}ns \|\n"
	except Exception as e:
	summary += f"\| {label} \| ⚠️ \| ⚠️ \| ⚠️ \| ⚠️ \| ⚠️ \|\n"
	print(f"WARNING: {label} tail_latency failed: {e}")

	with open(os.environ["GITHUB_STEP_SUMMARY"], "w") as f:
	f.write(summary)

	# Also save raw JSON
	raw = {}
	for bench_name, cmd, metric, unit, direction in benchmarks:
	glibc_vals = []
	aeth_vals = []
	for i in range(runs):
	try:
	out = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT, timeout=120).decode()
	d = json.loads(out.strip())
	parts = metric.split(".")
	val = d
	for p in parts:
	val = val.get(p, 0) if isinstance(val, dict) else 0
	glibc_vals.append(val)
	except:
	pass
	try:
	out = subprocess.check_output(f"LD_PRELOAD={LIB_PATH} {cmd}", shell=True, stderr=subprocess.STDOUT, timeout=120).decode()
	d = json.loads(out.strip())
	val = d
	for p in parts:
	val = val.get(p, 0) if isinstance(val, dict) else 0
	aeth_vals.append(val)
	except:
	pass
	if glibc_vals or aeth_vals:
	raw[bench_name] = {
	"glibc": {"mean": statistics.mean(glibc_vals) if glibc_vals else 0, "stdev": statistics.stdev(glibc_vals) if len(glibc_vals) > 1 else 0, "runs": glibc_vals},
	"aethalloc": {"mean": statistics.mean(aeth_vals) if aeth_vals else 0, "stdev": statistics.stdev(aeth_vals) if len(aeth_vals) > 1 else 0, "runs": aeth_vals},
	}
	with open("benchmark-results.json", "w") as f:
	json.dump(raw, f, indent=2)
	PYEOF
	- name: Upload results
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-results
	path: benchmark-results.json

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

perf: double magazine capacity to 128 to reduce global pool contention #38

Workflow file

perf: double magazine capacity to 128 to reduce global pool contention #38

Uh oh!

Workflow file for this run