Skip to content

feat: curated public benchmark dataset and leaderboard #467

feat: curated public benchmark dataset and leaderboard

feat: curated public benchmark dataset and leaderboard #467

Workflow file for this run

name: Validate
on:
push:
branches: [main]
pull_request:
jobs:
links:
name: Check Links
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Check relative markdown links
uses: lycheeverse/lychee-action@v2
with:
args: >-
--offline
--no-progress
--glob-ignore-case
--root-dir .
"**/*.md"
marketplace:
name: Validate Marketplace
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/setup-bun
- name: Validate marketplace.json (schema + sync)
run: bun scripts/marketplace/validate-marketplace.ts
- name: Check marketplace sorted
run: bun scripts/marketplace/check-sorted.ts
- name: Validate frontmatter
run: bun scripts/marketplace/validate-frontmatter.ts
evals:
name: Validate Evals
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/setup-bun
- name: Build
run: bun run build
- name: Check evals directories have eval files
run: bun scripts/validate-eval-dirs.ts
- name: Validate eval schemas
run: bun apps/cli/dist/cli.js validate 'examples/features/**/evals/**/*.eval.yaml' 'examples/features/**/*.EVAL.yaml'
benchmark-results:
name: Validate Benchmark Results
runs-on: ubuntu-latest
if: >-
contains(github.event.pull_request.title, 'benchmark') ||
contains(join(github.event.pull_request.labels.*.name, ','), 'benchmark') ||
github.event_name == 'push'
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/setup-bun
- name: Validate SWE-bench Lite result JSON files
run: |
if ls benchmarks/swe-bench-lite/results/*.json 1> /dev/null 2>&1; then
bun benchmarks/swe-bench-lite/validate-result.ts benchmarks/swe-bench-lite/results/*.json
else
echo "No result files found — skipping"
fi