Skip to content

perf: double magazine capacity to 128 to reduce global pool contention #38

perf: double magazine capacity to 128 to reduce global pool contention

perf: double magazine capacity to 128 to reduce global pool contention #38

Workflow file for this run

name: Benchmark Matrix
on:
push:
branches: [feature/wire-advanced-features]
workflow_dispatch:
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: cachix/install-nix-action@v27
with:
nix_path: nixpkgs=channel:nixos-unstable
- name: Cache Nix store
uses: actions/cache@v4
with:
path: |
~/.cache/nix
/nix/store
key: nix-${{ runner.os }}-${{ hashFiles('**/Cargo.lock', '**/flake.nix', '**/flake.lock') }}
restore-keys: |
nix-${{ runner.os }}-
- name: Cache Cargo
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: cargo-${{ runner.os }}-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
cargo-${{ runner.os }}-
- name: Build
run: nix build
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: libaethalloc
path: result/lib/*.so
benchmark-matrix:
needs: build
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
benchmark:
- name: packet_churn
cmd: "/tmp/packet_churn 100000 10000"
metric: throughput_ops_per_sec
unit: ops/s
direction: higher
- name: multithread_churn
cmd: "/tmp/multithread_churn 8 100000"
metric: throughput_ops_per_sec
unit: ops/s
direction: higher
- name: kv_store
cmd: "/tmp/kv_store"
metric: throughput_ops_per_sec
unit: ops/s
direction: higher
- name: producer_consumer
cmd: "/tmp/producer_consumer"
metric: throughput_ops_per_sec
unit: ops/s
direction: higher
- name: realloc_churn
cmd: "/tmp/realloc_churn 100000 2"
metric: latency_ns.avg
unit: ns
direction: lower
- name: realloc_large
cmd: "/tmp/realloc_large 10000"
metric: latency_ns.avg
unit: ns
direction: lower
- name: fragmentation_churn
cmd: "/tmp/fragmentation_churn 50000 10000"
metric: latency_ns.avg
unit: ns
direction: lower
- name: fragmentation_rss
cmd: "/tmp/fragmentation"
metric: summary.final_rss_kb
unit: KB
direction: lower
run_id: [1, 2, 3, 4, 5]
steps:
- uses: actions/checkout@v4
- name: Download artifact
uses: actions/download-artifact@v4
with:
name: libaethalloc
path: ./lib
- name: Compile benchmarks
run: |
gcc -O3 -pthread benches/packet_churn.c -o /tmp/packet_churn
gcc -O3 -pthread benches/kv_store.c -o /tmp/kv_store
gcc -O3 -pthread benches/producer_consumer.c -o /tmp/producer_consumer
gcc -O3 -pthread benches/multithread_churn.c -o /tmp/multithread_churn
gcc -O3 -pthread benches/fragmentation.c -o /tmp/fragmentation
gcc -O3 -pthread benches/realloc_churn.c -o /tmp/realloc_churn
gcc -O3 -pthread benches/realloc_large.c -o /tmp/realloc_large
gcc -O3 -pthread benches/fragmentation_churn.c -o /tmp/fragmentation_churn
- name: Run glibc baseline
id: glibc
run: |
RESULT=$(${{ matrix.benchmark.cmd }} 2>&1)
echo "result<<EOF" >> $GITHUB_OUTPUT
echo "$RESULT" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
- name: Run aethalloc
id: aethalloc
run: |
LIB=$(realpath lib/*.so)
RESULT=$(LD_PRELOAD="$LIB" ${{ matrix.benchmark.cmd }} 2>&1)
echo "result<<EOF" >> $GITHUB_OUTPUT
echo "$RESULT" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
- name: Compare
run: |
python3 -c "
import json, os
glibc = json.loads(os.environ['GLIBC_RESULT'])
aeth = json.loads(os.environ['AETH_RESULT'])
metric_path = os.environ['METRIC'].split('.')
def get_nested(d, path):
for key in path:
if isinstance(d, dict):
d = d.get(key, 0)
else:
return 0
return d
glibc_val = get_nested(glibc, metric_path)
aeth_val = get_nested(aeth, metric_path)
delta = ((aeth_val - glibc_val) / glibc_val * 100) if glibc_val > 0 else 0
direction = os.environ['DIRECTION']
if direction == 'higher':
emoji = '🟢' if delta > 0 else '🔴' if delta < 0 else '➖'
else:
emoji = '🟢' if delta < 0 else '🔴' if delta > 0 else '➖'
print(f'{emoji} {os.environ[\"BENCH_NAME\"]} run {os.environ[\"RUN_ID\"]}: glibc={glibc_val:,.2f} | aethalloc={aeth_val:,.2f} | delta={delta:+.1f}%')
"
env:
GLIBC_RESULT: ${{ steps.glibc.outputs.result }}
AETH_RESULT: ${{ steps.aethalloc.outputs.result }}
METRIC: ${{ matrix.benchmark.metric }}
DIRECTION: ${{ matrix.benchmark.direction }}
BENCH_NAME: ${{ matrix.benchmark.name }}
RUN_ID: ${{ matrix.run_id }}
summarize:
needs: benchmark-matrix
runs-on: ubuntu-latest
if: always()
steps:
- uses: actions/checkout@v4
- name: Download artifact
uses: actions/download-artifact@v4
with:
name: libaethalloc
path: ./lib
- name: Compile all benchmarks
run: |
gcc -O3 -pthread benches/packet_churn.c -o /tmp/packet_churn
gcc -O3 -pthread benches/kv_store.c -o /tmp/kv_store
gcc -O3 -pthread benches/producer_consumer.c -o /tmp/producer_consumer
gcc -O3 -pthread benches/multithread_churn.c -o /tmp/multithread_churn
gcc -O3 -pthread benches/fragmentation.c -o /tmp/fragmentation
gcc -O3 -pthread benches/realloc_churn.c -o /tmp/realloc_churn
gcc -O3 -pthread benches/realloc_large.c -o /tmp/realloc_large
gcc -O3 -pthread benches/fragmentation_churn.c -o /tmp/fragmentation_churn
gcc -O3 benches/tail_latency.c -o /tmp/tail_latency
- name: Run full benchmark suite
run: |
python3 << 'PYEOF'
import subprocess, json, statistics, os
LIB_PATH = subprocess.check_output("realpath lib/*.so", shell=True).decode().strip()
benchmarks = [
("packet_churn", "/tmp/packet_churn 100000 10000", "throughput_ops_per_sec", "ops/s", "higher"),
("multithread_churn", "/tmp/multithread_churn 8 100000", "throughput_ops_per_sec", "ops/s", "higher"),
("kv_store", "/tmp/kv_store", "throughput_ops_per_sec", "ops/s", "higher"),
("producer_consumer", "/tmp/producer_consumer", "throughput_ops_per_sec", "ops/s", "higher"),
("realloc_churn", "/tmp/realloc_churn 100000 2", "latency_ns.avg", "ns", "lower"),
("realloc_large", "/tmp/realloc_large 10000", "latency_ns.avg", "ns", "lower"),
("fragmentation_churn", "/tmp/fragmentation_churn 50000 10000", "latency_ns.avg", "ns", "lower"),
("fragmentation_rss", "/tmp/fragmentation", "summary.final_rss_kb", "KB", "lower"),
]
runs = 5
summary = "# Benchmark Results\n\n"
summary += f"**System:** GitHub Actions ubuntu-latest ({subprocess.check_output('nproc', shell=True).decode().strip()} cores)\n\n"
summary += f"**Runs per benchmark:** {runs}\n\n"
summary += "---\n\n"
for bench_name, cmd, metric, unit, direction in benchmarks:
glibc_vals = []
aeth_vals = []
for i in range(runs):
try:
out = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT, timeout=120).decode()
d = json.loads(out.strip())
parts = metric.split(".")
val = d
for p in parts:
val = val.get(p, 0) if isinstance(val, dict) else 0
glibc_vals.append(val)
except Exception as e:
print(f"WARNING: glibc {bench_name} run {i+1} failed: {e}")
try:
out = subprocess.check_output(f"LD_PRELOAD={LIB_PATH} {cmd}", shell=True, stderr=subprocess.STDOUT, timeout=120).decode()
d = json.loads(out.strip())
parts = metric.split(".")
val = d
for p in parts:
val = val.get(p, 0) if isinstance(val, dict) else 0
aeth_vals.append(val)
except Exception as e:
print(f"WARNING: aethalloc {bench_name} run {i+1} failed: {e}")
g_mean = statistics.mean(glibc_vals) if glibc_vals else 0
g_stdev = statistics.stdev(glibc_vals) if len(glibc_vals) > 1 else 0
a_mean = statistics.mean(aeth_vals) if aeth_vals else 0
a_stdev = statistics.stdev(aeth_vals) if len(aeth_vals) > 1 else 0
delta = ((a_mean - g_mean) / g_mean * 100) if g_mean > 0 else 0
if direction == "higher":
emoji = "🟢" if delta > 2 else "🔴" if delta < -2 else "➖"
else:
emoji = "🟢" if delta < -2 else "🔴" if delta > 2 else "➖"
summary += f"{emoji} **{bench_name}**\n"
if glibc_vals or aeth_vals:
summary += f"- glibc: {g_mean:,.0f} ± {g_stdev:,.0f} {unit}\n"
summary += f"- aethalloc: {a_mean:,.0f} ± {a_stdev:,.0f} {unit}\n"
summary += f"- **delta: {delta:+.1f}%**\n\n"
else:
summary += f"- ⚠️ All runs failed (benchmark may not work on this platform)\n\n"
# Tail latency
summary += "---\n\n## Tail Latency (8 threads, 50K ops)\n\n"
summary += "| Allocator | P50 | P99 | P99.9 | P99.99 | Max |\n"
summary += "|-----------|-----|-----|-------|--------|-----|\n"
for label, pre in [("glibc", ""), ("AethAlloc", f"LD_PRELOAD={LIB_PATH}")]:
try:
out = subprocess.check_output(f"{pre} /tmp/tail_latency 8 50000", shell=True, stderr=subprocess.STDOUT, timeout=120).decode()
d = json.loads(out.strip())
lat = d.get("latency_ns", {})
summary += f"| {label} | {lat.get('p50', 0):,.0f}ns | {lat.get('p99', 0):,.0f}ns | {lat.get('p99.9', 0):,.0f}ns | {lat.get('p99.99', 0):,.0f}ns | {lat.get('max', 0):,.0f}ns |\n"
except Exception as e:
summary += f"| {label} | ⚠️ | ⚠️ | ⚠️ | ⚠️ | ⚠️ |\n"
print(f"WARNING: {label} tail_latency failed: {e}")
with open(os.environ["GITHUB_STEP_SUMMARY"], "w") as f:
f.write(summary)
# Also save raw JSON
raw = {}
for bench_name, cmd, metric, unit, direction in benchmarks:
glibc_vals = []
aeth_vals = []
for i in range(runs):
try:
out = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT, timeout=120).decode()
d = json.loads(out.strip())
parts = metric.split(".")
val = d
for p in parts:
val = val.get(p, 0) if isinstance(val, dict) else 0
glibc_vals.append(val)
except:
pass
try:
out = subprocess.check_output(f"LD_PRELOAD={LIB_PATH} {cmd}", shell=True, stderr=subprocess.STDOUT, timeout=120).decode()
d = json.loads(out.strip())
val = d
for p in parts:
val = val.get(p, 0) if isinstance(val, dict) else 0
aeth_vals.append(val)
except:
pass
if glibc_vals or aeth_vals:
raw[bench_name] = {
"glibc": {"mean": statistics.mean(glibc_vals) if glibc_vals else 0, "stdev": statistics.stdev(glibc_vals) if len(glibc_vals) > 1 else 0, "runs": glibc_vals},
"aethalloc": {"mean": statistics.mean(aeth_vals) if aeth_vals else 0, "stdev": statistics.stdev(aeth_vals) if len(aeth_vals) > 1 else 0, "runs": aeth_vals},
}
with open("benchmark-results.json", "w") as f:
json.dump(raw, f, indent=2)
PYEOF
- name: Upload results
uses: actions/upload-artifact@v4
with:
name: benchmark-results
path: benchmark-results.json