janelia-python
diff --git a/‎README.md‎
Lines changed: 68 additions & 2 deletions b/‎README.md‎
Lines changed: 68 additions & 2 deletions
diff --git a/‎pixi.lock‎
Lines changed: 1 addition & 1 deletion b/‎pixi.lock‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎scripts/bench_matrix.py‎
Lines changed: 24 additions & 4 deletions b/‎scripts/bench_matrix.py‎
Lines changed: 24 additions & 4 deletions
@@ -85,14 +85,16 @@ For Ethernet benchmarks, the simplest setup is:
 
 ```sh
 export ARENA_ETH_IP=192.168.10.104
-pixi run bench -- --stream-path patterns/pat0004.pat --json-out bench_results.jsonl
+pixi run bench-full -- --json-out bench_results.jsonl
 ```
 
 Useful pre-defined tasks:
 
 - `pixi run all-on`: turn all LEDs on as a communication sanity check
 - `pixi run all-off`: turn all LEDs off as a communication sanity check
-- `pixi run bench`: default host benchmark suite
+- `pixi run bench`: default host-side suite (`command_rtt` + `spf_updates`)
+- `pixi run bench-full`: default host-side suite plus `stream_frames` using `patterns/pat0004.pat`
+- `pixi run bench-smoke`: shorter full run for quick confidence checks
 - `pixi run bench-persistent`: force persistent TCP sockets for small-command RTT
 - `pixi run bench-new-connection`: open a new TCP connection per command
 - `pixi run bench-no-quickack`: disable Linux `TCP_QUICKACK` but keep `TCP_NODELAY`
@@ -103,6 +105,70 @@ Useful pre-defined tasks:
 Extra arguments after the task are forwarded to the CLI or script, so you can
 still customize labels, durations, rates, and pattern paths.
 
+Examples:
+
+```sh
+pixi run bench -- --json-out host_only.jsonl
+pixi run bench-full -- --json-out host_plus_stream.jsonl
+pixi run bench-full -- --stream-rate 250 --stream-seconds 8 --json-out stream_250hz.jsonl
+```
+
+## Benchmark progress, timeouts, and failure reporting
+
+The benchmark command now prints phase start and finish lines as it runs, along
+with throttled in-phase progress for the long loops. That makes it much easier
+to tell whether a run is healthy, slow, or stuck.
+
+By default, the benchmark suite applies a temporary per-operation I/O timeout of
+`5.0` seconds. This avoids the old behavior where a missing reply could block a
+run forever.
+
+You can override that timeout from the CLI:
+
+```sh
+pixi run bench -- --io-timeout 10
+pixi run bench-full -- --io-timeout 0
+```
+
+Use `--io-timeout 0` to disable the temporary benchmark timeout and fall back to
+blocking I/O.
+
+If a phase fails, the suite now:
+
+- records `status=error`
+- records the failed phase name and exception in the JSON result
+- attempts a best-effort `ALL_OFF` cleanup before returning
+- exits the CLI with a nonzero status
+
+This makes it much easier to automate benchmarks in CI-like shell scripts or
+lab orchestration scripts.
+
+## Host-only benchmarks versus QS logs
+
+The Python benchmark suite is enough to compare host-visible and end-to-end
+behavior across:
+
+- operating systems
+- host machines
+- NICs, switches, and cables
+- socket-option policies such as `TCP_NODELAY` and `TCP_QUICKACK`
+
+The JSON output is therefore a good default artifact for broad comparisons
+across rigs.
+
+QS logs are still important when you need firmware-internal detail, including:
+
+- `PERF_NET` poll cadence and command processing cost
+- `PERF_UPD` receive / process / commit / applied / coalesced counts
+- display-transfer and SPI bottlenecks
+- confirmation that the rig applied what the host sent
+
+A practical workflow is:
+
+1. Run Python-only benchmarks everywhere for broad comparison.
+2. Capture QS logs on representative runs or anomalous runs.
+3. Use the QS logs to explain why two host-visible results differ.
+
 ## Socket latency tuning
 
 The host code exposes both `TCP_NODELAY` and `TCP_QUICKACK` as explicit options.
 
@@ -86,6 +86,8 @@ check = [{ task = "lint" }, { task = "test" }]
 build = "python -m build"
 archive = "git archive --format=zip --output=../arena_interface_python.zip HEAD"
 bench = "arena-interface bench"
+bench-full = "arena-interface bench --stream-path patterns/pat0004.pat"
+bench-smoke = "arena-interface bench --cmd-iters 250 --spf-seconds 2 --stream-path patterns/pat0004.pat --stream-seconds 2"
 bench-persistent = "arena-interface bench --cmd-connect-mode persistent"
 bench-new-connection = "arena-interface bench --cmd-connect-mode new_connection"
 bench-no-quickack = "arena-interface --no-tcp-quickack bench"
 
@@ -13,7 +13,7 @@
         sys.path.insert(0, str(src_root))
 
 from arena_interface import ArenaInterface
-from arena_interface.arena_interface import SERIAL_BAUDRATE
+from arena_interface.arena_interface import BENCH_IO_TIMEOUT_S, SERIAL_BAUDRATE
 
 VARIANTS: dict[str, dict[str, bool]] = {
     "default": {"tcp_nodelay": True, "tcp_quickack": True},
@@ -59,6 +59,12 @@ def build_parser() -> argparse.ArgumentParser:
     parser.add_argument("--stream-seconds", type=float, default=5.0)
     parser.add_argument("--stream-coalesced", action=argparse.BooleanOptionalAction, default=True)
     parser.add_argument("--progress-interval", type=float, default=1.0)
+    parser.add_argument(
+        "--io-timeout",
+        type=float,
+        default=BENCH_IO_TIMEOUT_S,
+        help="Temporary per-read/connect timeout for each suite run in seconds. Use 0 to disable.",
+    )
     return parser
 
 
@@ -67,11 +73,20 @@ def variant_label(base_label: str | None, variant_name: str) -> str:
 
 
 def print_summary(variant_name: str, suite: dict) -> None:
+    meta = suite.get("meta", {})
+    quickack = meta.get("tcp_quickack_supported") and meta.get("tcp_quickack_requested")
+    status = suite.get("status", "unknown")
+
+    if status != "ok":
+        error = suite.get("error") or {}
+        print(
+            f"{variant_name:>18} | FAILED {error.get('phase')} {error.get('type')}: {error.get('message')}"
+        )
+        return
+
     cmd = suite["command_rtt"]
     spf = suite["spf_updates"]
     stream = suite.get("stream_frames")
-    meta = suite.get("meta", {})
-    quickack = meta.get("tcp_quickack_supported") and meta.get("tcp_quickack_requested")
 
     line = (
         f"{variant_name:>18} | cmd mean={cmd['mean_ms']:.3f} ms p99={cmd['p99_ms']:.3f} | "
@@ -96,6 +111,7 @@ def configure_transport(ai: ArenaInterface, args: argparse.Namespace) -> None:
 
 def main() -> int:
     args = build_parser().parse_args()
+    exit_code = 0
 
     print("variant               | command RTT               | SPF        | socket policy")
     print("----------------------+---------------------------+------------+-------------------------------")
@@ -125,12 +141,16 @@ def main() -> int:
                 stream_seconds=float(args.stream_seconds),
                 stream_coalesced=bool(args.stream_coalesced),
                 progress_interval_s=float(args.progress_interval),
+                bench_io_timeout_s=float(args.io_timeout),
+                status_callback=print,
             )
             if args.json_out is not None:
                 ArenaInterface.write_bench_jsonl(str(args.json_out), suite)
             print_summary(variant_name, suite)
+            if suite.get("status") != "ok":
+                exit_code = 1
 
-    return 0
+    return exit_code
 
 
 if __name__ == "__main__":