From 949b09b5518e93277254c992eae937d80d4c6b3c Mon Sep 17 00:00:00 2001 From: Yibai Meng Date: Tue, 2 Jun 2026 01:47:34 +0000 Subject: [PATCH 1/2] feat: show progress bar during multi-turn ISL pre-compute The serial apply_chat_template loop over every client turn runs silent for minutes on large multi-turn datasets, so it looks like a hang. Show a throttled tqdm bar (2s mininterval) over the loop for liveness + ETA in captured logs. No behavior change. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/inference_endpoint/commands/benchmark/execute.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/inference_endpoint/commands/benchmark/execute.py b/src/inference_endpoint/commands/benchmark/execute.py index 88e337f89..4221e1d4e 100644 --- a/src/inference_endpoint/commands/benchmark/execute.py +++ b/src/inference_endpoint/commands/benchmark/execute.py @@ -335,7 +335,15 @@ def _precompute_isl_for_multi_turn( return skipped = 0 first_failure_logged = False - for sample in dataloader.data or []: + samples = dataloader.data or [] + for sample in tqdm( + samples, + total=len(samples), + desc="Pre-computing ISL token counts", + unit="turn", + smoothing=0, + mininterval=2.0, + ): messages = sample.get("messages") if not messages: continue From 6b6d35ca6d8cc3f51e6f509a7237e55428bd47a2 Mon Sep 17 00:00:00 2001 From: Yibai Meng Date: Mon, 1 Jun 2026 19:48:25 -0700 Subject: [PATCH 2/2] Update src/inference_endpoint/commands/benchmark/execute.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- src/inference_endpoint/commands/benchmark/execute.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/inference_endpoint/commands/benchmark/execute.py b/src/inference_endpoint/commands/benchmark/execute.py index 4221e1d4e..090f43e50 100644 --- a/src/inference_endpoint/commands/benchmark/execute.py +++ b/src/inference_endpoint/commands/benchmark/execute.py @@ -343,6 +343,7 @@ def _precompute_isl_for_multi_turn( unit="turn", smoothing=0, mininterval=2.0, + disable=not logger.isEnabledFor(logging.INFO), ): messages = sample.get("messages") if not messages: