IsaiahN · IsaiahN · Oct 14, 2025 · Oct 14, 2025 · Oct 14, 2025 · Oct 14, 2025
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -0,0 +1,34 @@
+name: Bench
+
+on:
+  workflow_dispatch: {}
+  push:
+    branches:
+      - tb-performance
+
+jobs:
+  run-bench:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+
+      - name: Install requirements (if any)
+        run: |
+          python -m pip install --upgrade pip
+          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+
+      - name: Run bench runner
+        run: |
+          python -m bench.runner --module bench.targets.game_runner_target --iters 3 > bench_output.json
+
+      - name: Upload results
+        uses: actions/upload-artifact@v4
+        with:
+          name: bench-results
+          path: bench_output.json
diff --git a/BRANCH_FEATURE.md b/BRANCH_FEATURE.md
@@ -0,0 +1 @@
+TB-PERFORMANCE: Focus = benchmarks, profiling, perf harnesses.
diff --git a/FEATURES_SUMMARY.md b/FEATURES_SUMMARY.md
@@ -76,3 +76,8 @@ This document is an inventory of the major features and modules present in the T
 
 
 *This inventory is intentionally high-level. The next step (SIMPLIFICATIONS.md) will analyze pros/cons and propose simplifications.*
+
+Additions in TB-Seed work:
+- `src/vision/schema.py` — small validator for detection outputs (`bbox`,`label`,`confidence/score`).
+- `scripts/run_minimal.py` and `src/training/game_runner.py` updated to use the minimal runner & opt-in stub API for CI.
+
diff --git a/README_MINIMAL.md b/README_MINIMAL.md
@@ -0,0 +1,22 @@
+Running TB-Seed minimal mode
+
+This README explains the minimal mode runner used for quick experiments and CI smoke tests.
+
+Usage (developer):
+
+- Run with real API manager (if configured):
+
+```powershell
+python scripts\run_minimal.py --game-id mygame --max-actions 50
+```
+
+- Run in CI/test mode using the stub ARC3 API (explicit opt-in flag required):
+
+```powershell
+python scripts\run_minimal.py --use-stub-api-for-ci --game-id test_game --max-actions 2
+```
+
+Notes:
+- Per the seed contract, production/research runs must use a real ARC3 API and DB-backed persistence.
+- The stub API is allowed only for CI/test runs and must be explicitly opted-in with `--use-stub-api-for-ci` or the environment variable `USE_STUB_API_FOR_CI=1`.
+- Tests live in `/tests`. The CI workflow runs those tests and a minimal smoke invocation.
diff --git a/bench/README.md b/bench/README.md
@@ -0,0 +1,42 @@
+# Bench
+
+Lightweight benchmarking helpers for the project. The bench harness is intentionally minimal and designed to be CI-friendly.
+
+Usage
+------
+
+Run a target locally (recommended to run as module so package imports resolve):
+
+```powershell
+python -m bench.runner --module bench.targets.game_runner_target --iters 3
+```
+
+Write JSON results to a file (useful for CI):
+
+```powershell
+python -m bench.runner --module bench.targets.game_runner_target --iters 3 --out bench/results.json
+```
+
+Notes
+------
+- Bench targets are under `bench/targets/`. Each target should expose a `main(iterations=...)` function or a callable `main()`.
+- Running as a module (`python -m bench.runner`) ensures `bench` is on sys.path and relative imports work.
+- Targets should use the project's `StubAPIManager` for deterministic runs in CI.
+
+CI integration
+--------------
+The repository includes a GitHub Actions workflow `.github/workflows/bench.yml` that runs this harness on `workflow_dispatch` and on pushes to the `tb-performance` branch. The workflow writes the runner stdout to `bench_output.json` and uploads it as an artifact named `bench-results`.
+
+Next steps
+-----------
+- Add more targets for microbenchmarks (DB, vision preprocessing, etc.).
+- Consider adding a small history store (CSV/JSON) to track performance over time.
+Performance harness README
+
+This folder contains simple benchmarking utilities to measure runtime and memory for key operations.
+
+Usage:
+
+- `python -m bench.runner --help` for help.
+
+The harness is intentionally minimal and designed to run quickly in CI with small inputs.
diff --git a/bench/history.json b/bench/history.json
@@ -0,0 +1,16 @@
+[
+  {
+    "timestamp": "2025-10-16T21:01:50.518805",
+    "results": {
+      "module": "bench.targets.game_runner_target",
+      "func": "main",
+      "iterations": 1,
+      "stats": {
+        "runs": 1,
+        "avg": 0.7940893999766558,
+        "min": 0.7940893999766558,
+        "max": 0.7940893999766558
+      }
+    }
+  }
+]
diff --git a/bench/results.json b/bench/results.json
@@ -0,0 +1,11 @@
+{
+  "module": "bench.targets.game_runner_target",
+  "func": "main",
+  "iterations": 1,
+  "stats": {
+    "runs": 1,
+    "avg": 0.7940893999766558,
+    "min": 0.7940893999766558,
+    "max": 0.7940893999766558
+  }
+}
diff --git a/bench/runner.py b/bench/runner.py
@@ -0,0 +1,66 @@
+"""Simple benchmark runner for the project.
+
+This runner provides a tiny harness to measure execution time for a provided target function.
+It is intentionally minimal and includes an option to write JSON results for CI.
+"""
+import time
+import argparse
+import json
+from importlib import import_module
+from typing import Dict, Any, List
+
+
+def time_function(module_path: str, func_name: str = "main", iterations: int = 10) -> List[float]:
+    module = import_module(module_path)
+    fn = getattr(module, func_name)
+    times = []
+    for _ in range(iterations):
+        start = time.perf_counter()
+        # allow target to accept iterations via signature, but call without args by default
+        fn()
+        elapsed = time.perf_counter() - start
+        times.append(elapsed)
+    return times
+
+
+def summarize(times: List[float]) -> Dict[str, Any]:
+    if not times:
+        return {"runs": 0, "avg": None, "min": None, "max": None}
+    return {
+        "runs": len(times),
+        "avg": sum(times) / len(times),
+        "min": min(times),
+        "max": max(times),
+    }
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--module", required=True, help="Module path to benchmark, e.g., src.training.game_runner_demo")
+    parser.add_argument("--func", default="main", help="Function name to call inside module")
+    parser.add_argument("--iters", type=int, default=3, help="Iterations to run")
+    parser.add_argument("--out", help="Path to write JSON output (also prints to stdout)")
+    args = parser.parse_args()
+
+    times = time_function(args.module, args.func, args.iters)
+    stats = summarize(times)
+
+    # Human-friendly output
+    if stats["runs"]:
+        print(f"Runs: {stats['runs']}, avg: {stats['avg']:.6f}s, min: {stats['min']:.6f}s, max: {stats['max']:.6f}s")
+    else:
+        print("No runs executed")
+
+    # Write JSON results if requested
+    if args.out:
+        payload = {"module": args.module, "func": args.func, "iterations": args.iters, "stats": stats}
+        try:
+            with open(args.out, "w", encoding="utf-8") as fh:
+                json.dump(payload, fh, indent=2)
+            print(f"Wrote results to {args.out}")
+        except Exception as e:
+            print(f"Failed to write output file {args.out}: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bench/targets/__init__.py b/bench/targets/__init__.py
@@ -0,0 +1 @@
+# Bench targets package
diff --git a/bench/targets/game_runner_target.py b/bench/targets/game_runner_target.py
@@ -0,0 +1,64 @@
+"""Benchmark target: Run a minimal GameRunner invocation once.
+
+This module exposes a `main()` function that the bench runner imports to measure.
+It uses the project's stub API manager and GameRunner where available.
+
+This target is intentionally lightweight and deterministic for CI.
+"""
+from __future__ import annotations
+
+import time
+from typing import Dict, Any
+import asyncio
+
+# Import project modules with defensive fallbacks in case bench is run outside of PYTHONPATH
+try:
+    from src.api.stub_api_manager import StubAPIManager
+    from src.training.game_runner import GameRunner
+except Exception:
+    # Try relative imports if bench is run as part of package
+    try:
+        from ..src.api.stub_api_manager import StubAPIManager  # type: ignore
+        from ..src.training.game_runner import GameRunner  # type: ignore
+    except Exception as e:  # pragma: no cover - best-effort import
+        raise ImportError(
+            "Could not import project GameRunner or StubAPIManager. Ensure bench is run with repository root on PYTHONPATH"
+        ) from e
+
+
+def _run_once(api: Any) -> Dict[str, Any]:
+    """Helper to run the async GameRunner.run_game synchronously via asyncio.run."""
+    runner = GameRunner(api_manager=api)
+    # GameRunner exposes async run_game(game_id, max_actions)
+    return asyncio.run(runner.run_game("bench_game", max_actions=10))
+
+
+def main(iterations: int = 1) -> Dict[str, Any]:
+    """Run GameRunner `iterations` times and return timing summary.
+
+    Returns a dict with keys: iterations, total_time, avg_time, sample_result
+    """
+    api = StubAPIManager()
+    results = []
+    start = time.perf_counter()
+    sample_result = None
+    for _ in range(iterations):
+        res = _run_once(api)
+        sample_result = res
+        results.append(res)
+    end = time.perf_counter()
+    total = end - start
+    avg = total / iterations if iterations else 0
+    return {
+        "iterations": iterations,
+        "total_time": total,
+        "avg_time": avg,
+        "sample_result": sample_result,
+    }
+
+
+if __name__ == "__main__":
+    import json
+
+    out = main(iterations=1)
+    print(json.dumps(out, indent=2))
diff --git a/bench_output.json b/bench_output.json
@@ -0,0 +1,3 @@
+[OK] ADVANCED SYSTEMS IMPORTED SUCCESSFULLY
+[OK] Environment variables loaded from .env file in centralized_config
+Runs: 1, avg: 0.771294s, min: 0.771294s, max: 0.771294s
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		TB-PERFORMANCE: Focus = benchmarks, profiling, perf harnesses.