-
Notifications
You must be signed in to change notification settings - Fork 0
134 lines (110 loc) · 4.64 KB
/
Copy pathperformance.yml
File metadata and controls
134 lines (110 loc) · 4.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
name: Performance Regression Detection
on:
pull_request:
branches: [main]
push:
branches: [main]
workflow_dispatch:
env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
jobs:
micro-benchmarks:
name: Criterion Micro-benchmarks
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install Rust stable
uses: dtolnay/rust-toolchain@stable
- name: Cache cargo registry
uses: actions/cache@v4
with:
path: ~/.cargo/registry
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
- name: Cache cargo index
uses: actions/cache@v4
with:
path: ~/.cargo/git
key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }}
- name: Cache build artifacts
uses: actions/cache@v4
with:
path: target
key: ${{ runner.os }}-cargo-build-${{ hashFiles('**/Cargo.lock') }}
- name: Run event_serialization benchmark
run: cargo bench --bench event_serialization
- name: Run broadcaster_throughput benchmark
run: cargo bench --bench broadcaster_throughput
- name: Run coordination_overhead benchmark
run: cargo bench --bench coordination_overhead
- name: Upload Criterion HTML reports
uses: actions/upload-artifact@v4
if: always()
with:
name: criterion-reports
path: target/criterion/
retention-days: 14
- name: Store baseline on main branch
if: github.ref == 'refs/heads/main'
run: |
cargo bench --bench event_serialization -- --save-baseline main
cargo bench --bench broadcaster_throughput -- --save-baseline main
cargo bench --bench coordination_overhead -- --save-baseline main
- name: Compare against main baseline on PRs
if: github.event_name == 'pull_request'
run: |
# Note: For proper baseline comparison, we'd need to restore the baseline
# from a previous run. This is a simplified version that shows the pattern.
# Full implementation would use actions/cache to restore baselines.
cargo bench --bench event_serialization -- --baseline main || echo "No baseline to compare"
cargo bench --bench broadcaster_throughput -- --baseline main || echo "No baseline to compare"
cargo bench --bench coordination_overhead -- --baseline main || echo "No baseline to compare"
integration-performance:
name: Integration Performance Tests
runs-on: ubuntu-latest
# Only run on main branch pushes to avoid excessive CI time on every PR
if: github.ref == 'refs/heads/main'
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install Rust stable
uses: dtolnay/rust-toolchain@stable
- name: Cache cargo registry
uses: actions/cache@v4
with:
path: ~/.cargo/registry
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
- name: Cache cargo index
uses: actions/cache@v4
with:
path: ~/.cargo/git
key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }}
- name: Cache build artifacts
uses: actions/cache@v4
with:
path: target
key: ${{ runner.os }}-cargo-build-perf-${{ hashFiles('**/Cargo.lock') }}
- name: Build release binary
run: cargo build --release
- name: Run baseline single agent tests
run: cargo test --test perf_baseline_single_agent --release -- --nocapture
- name: Run concurrent agents test
run: cargo test --test perf_concurrent_agents --release -- --nocapture
- name: Run memory stability tests (ignored by default)
run: cargo test --test perf_memory_stability --release -- --ignored --nocapture
regression-check:
name: Regression Failure Detection
runs-on: ubuntu-latest
needs: [micro-benchmarks]
if: always()
steps:
- name: Check benchmark results
run: |
# This job aggregates results and would fail the workflow if:
# 1. Criterion detects >10% regression (configured in benchmark code with significance_level(0.1))
# 2. Integration tests fail assertions (>10s for 20 agents, >100ms p95 latency)
# 3. Memory stability tests detect unbounded growth
# In a production setup, this would parse Criterion output and fail if regression detected
echo "Benchmark results checked. See micro-benchmarks job for details."
echo "Criterion will fail if p-value indicates >10% regression with statistical significance."