From 737f111578846f3d53ad1a7b81eb6200c032aecc Mon Sep 17 00:00:00 2001
From: Sohil Kshirsagar <sohil@usetusk.ai>
Date: Fri, 20 Mar 2026 10:58:55 -0400
Subject: [PATCH 1/9] fix: service and test logging improvements (wip)

---
 DEFAULT_ENABLE_SERVICE_LOGS.md                | 140 ++++++++
 cmd/run.go                                    |  35 +-
 docs/tui-testing.md                           | 326 ++++++++++++++++++
 internal/agent/prompts/phase_simple_test.md   |   7 +-
 .../prompts/phase_verify_complex_test.md      |   2 +-
 .../agent/prompts/phase_verify_simple_test.md |   6 +-
 internal/log/log.go                           |  27 +-
 internal/runner/environment.go                |  18 +
 internal/runner/environment_replay.go         |  15 +-
 internal/runner/executor.go                   |  68 +++-
 internal/runner/server.go                     |   6 +-
 internal/runner/service.go                    |  84 +++--
 internal/runner/service_test.go               |  11 +-
 internal/tui/test_executor.go                 | 112 +++++-
 14 files changed, 792 insertions(+), 65 deletions(-)
 create mode 100644 DEFAULT_ENABLE_SERVICE_LOGS.md
 create mode 100644 docs/tui-testing.md

diff --git a/DEFAULT_ENABLE_SERVICE_LOGS.md b/DEFAULT_ENABLE_SERVICE_LOGS.md
new file mode 100644
index 0000000..8bf0b12
--- /dev/null
+++ b/DEFAULT_ENABLE_SERVICE_LOGS.md
@@ -0,0 +1,140 @@
+# Slack thread context
+
+sohil  [4:56 PM]
+when service fails to start in CLI (but `--enable-service-logs` is false), thoughts on trying to start the service but with service logs enabled? rn i feel like it's hard to debug in CI when things go wrong
+
+Sohan  [4:58 PM]
+yea i like that. can enforce adding the following in CI too to upload log file
+
+```yaml
+- name: Upload Tusk service logs
+   if: always()
+   uses: actions/upload-artifact@v4
+   with:
+     name: tusk-service-logs
+     path: .tusk/logs
+     if-no-files-found: ignore
+```
+
+sohil  [4:59 PM]
+nice, yeah could do that our just output to logs and they'll show up in CI anyways
+marcel  [4:59 PM]
+nice nice
+Sohan  [5:00 PM]
+sdks should also support env var override for log level so we can run in debug mode (edited) 
+sohil  [7:00 PM]
+@Jun Yu thoughts on this? feels like this could even be default behavior
+(as in, if service doesn't start, then retry starting with service logs enabled)
+Jun Yu  [7:09 PM]
+why not just explicitly add `--enable-service-logs` to our CI workflow? (edited) 
+sohil  [7:11 PM]
+i'm thinking for customers, that's just an extra step they may not realize. so when they're iterating on their CI workflow, if smth fails, immediately they'll get logs explaining why
+Jun Yu  [7:15 PM]
+not a huge fan of your suggested pattern :thinking_face: bc of the obscure magic
+
+i'm thinking smth like this: `--service-logs [enable | enable-with-retry]` or something like that
+
+- `enable` = enable every time
+- `enable-with-retry` = retry with service logs
+  - honestly still could be better (we're coupling retry logic with service logs)
+
+sohil  [7:18 PM]
+hmm but that doesn't solve problem for customers who are iterating on workflow
+i feel like it could get frustrating to iterate when things are failing, and then having to wait for iteration
+another solution could just be to enable service logs by default in CI
+Jun Yu  [7:19 PM]
+hmm service logs by default in CI feels cleaner, can just add to the help text
+sohil  [7:20 PM]
+or our github action adds it by default, and we can update docs/prompts for setup agent
+rn we don't output service logs anywhere though right, it just persists to file system? we could also add logic in CLI where if it's run in `--print` mode, and service fails to start, it outputs service logs
+Jun Yu  [7:22 PM]
+do you think that could be verbose? with run output and service logs being interleaved
+sohil  [7:22 PM]
+it would only show if service failed to start, so there's no run output really?
+Jun Yu  [7:23 PM]
+oh yea makes sense
+sohil  [7:23 PM]
+maybe this is only in CI mode, since user doesn't have access to file system
+
+# Final solution
+
+Service logs (stdout/stderr from the user's service) are always captured during startup. This gives users immediate visibility when things go wrong, without needing to know about any flags.
+
+## Behavior
+
+### During service startup (always)
+- Service stdout/stderr is always captured during startup
+- If `--enable-service-logs` is set: stream directly to `.tusk/logs/` file during startup (file acts as the buffer)
+- If `--enable-service-logs` is NOT set: buffer in memory during startup
+
+### On startup failure
+- **TUI**: dump captured service logs into the right side panel
+- **Print mode**: dump last N lines to **stderr** (not stdout, to avoid breaking `--output-format json` consumers)
+- Update the startup failure help message (`executor.go:503-521`) — currently it suggests using `--enable-service-logs`, which is no longer relevant. New suggestions:
+  - "Check the service logs above for details"
+  - "For more verbose SDK logging, set the log level to debug in your SDK initialization"
+  - Note: `--debug` on the CLI only controls fence sandbox debug mode, not useful for customers debugging service startup
+
+### Sandbox retry and service logs
+- Default sandbox mode will change to `strict` (in a subsequent commit)
+- In `auto` mode (`environment.go:24-37`): if sandboxed startup fails, it retries without sandbox. Both attempts' logs should be kept — append a separator (e.g. `"⚠️ Retrying without sandbox..."`) and continue capturing. If the retry also fails, the user sees logs from both attempts. If the retry succeeds and `--enable-service-logs` isn't set, discard everything
+- For the in-memory buffer: just keep appending across retries
+- For the file buffer (`--enable-service-logs`): reuse the same file handle across retries rather than creating a new file
+
+### On startup success
+- If `--enable-service-logs` is set: continue streaming to `.tusk/logs/` file for the entire run (same as today's behavior)
+- If `--enable-service-logs` is NOT set: swap `e.serviceCmd.Stdout/Stderr` to `io.Discard` and nil out the buffer. This stops memory growth for the rest of the run while the process keeps writing harmlessly to nowhere
+
+### In-memory buffer implementation
+- Use a `bytes.Buffer` as the `io.Writer` for `e.serviceCmd.Stdout/Stderr` — it satisfies the same interface as `*os.File`
+- Add `startupLogBuffer *bytes.Buffer` field to `Executor`
+- In `service.go:150-158`, the wiring becomes:
+  - `--enable-service-logs` on → stream to file (existing path)
+  - `--enable-service-logs` off → `e.startupLogBuffer = &bytes.Buffer{}`, assign to stdout/stderr
+- On failure: read `startupLogBuffer.String()` (or read back the file if using file path) to dump to TUI/stderr
+- On success without flag: swap to `io.Discard`, nil the buffer
+
+### Flag changes
+- `--enable-service-logs` keeps its current name, no deprecation needed
+- No `--disable-service-logs` flag needed
+- `--debug` stays separate — controls fence sandbox debug mode
+
+### Early exit on process failure
+- Currently if the service process exits immediately (e.g. bad config, missing dependency), `waitForReadiness` keeps polling until the full timeout — wasted time
+- Improvement: after `e.serviceCmd.Start()`, run a goroutine that waits on `e.serviceCmd.Wait()`. If the process exits with a non-zero code before readiness is reached, bail immediately and dump service logs
+- Pairs well with the service logs change — user gets immediate feedback (fast failure + logs) instead of waiting 10+ seconds for timeout
+
+## Other improvements
+
+### Print mode timestamp logging
+Add duration logging to print mode phases that currently have none:
+- Test loading (`"➤ Found/Loaded %d tests"`)
+- Pre-app-start span fetching (`"✓ Loaded %d pre-app-start spans"`)
+- Suite span preparation (`"Loaded %d suite spans"`)
+- Environment start (`"➤ Starting environment..."` → `"✓ Environment ready"`)
+- Overall test execution and summary
+
+Per-test results already log duration (e.g. `"NO DEVIATION - traceId (150ms)"`).
+
+### SDK debug log level via CLI
+When `--debug` is used with the CLI, SDKs should automatically run with log level "debug" (may need a separate env var or command to propagate this)
+
+### Setup agent
+- Update `internal/agent/prompts/phase_simple_test.md` — remove the "Logs only appear if `debug: true` is set" note since logs are always captured during startup
+- Setup agent can always check service logs when iterating on start commands
+- Setup agent should know that if sandbox fails to start, it can set `sandbox_mode: "off"` to test if the service works without sandbox (currently referenced in prompts but should be more explicit given the move to strict default)
+
+### Disk accumulation
+- With the new design, `.tusk/logs/` only accumulates when `--enable-service-logs` is explicitly set, so this is less of a concern
+- Could still add a retention policy (e.g. older than 7 days) as a nice-to-have
+
+## Out of scope (future)
+- Deviation analysis with interlaced test markers in service logs
+- Concurrent run log file collision (timestamp has second-level granularity, low risk)
+
+## Considerations
+
+Does enabling service logs cause any meaningful performance degradation?
+
+Enabling service logs could potentially help us with deviation analysis in the future, how could we use this?
+Potentially in the service logs file, we interlace Tusk Drift logs that say "X trace test started" and "X trace test completed" within the service logs? That way, we can programtically extract the service logs for a specific trace test and use it in deviation analysis?
diff --git a/cmd/run.go b/cmd/run.go
index 6982f8e..96bc0df 100644
--- a/cmd/run.go
+++ b/cmd/run.go
@@ -453,11 +453,14 @@ func runTests(cmd *cobra.Command, args []string) error {
 		return nil
 	}
 
+	// Track overall timing for print mode
+	overallStart := time.Now()
+
 	if !interactive && !quiet {
 		if isValidation {
 			log.Stderrln(fmt.Sprintf("\n➤ Found %d traces to validate", len(tests)))
 		} else if !cloud {
-			log.Stderrln(fmt.Sprintf("\n➤ Loaded %d tests from local traces", len(tests)))
+			log.Stderrln(fmt.Sprintf("\n➤ Loaded %d tests from local traces (%.1fs)", len(tests), time.Since(overallStart).Seconds()))
 		}
 	}
 
@@ -732,6 +735,7 @@ func runTests(cmd *cobra.Command, args []string) error {
 	}
 
 	// Step 4: Run tests by environment
+	testPhaseStart := time.Now()
 	var results []runner.TestResult
 	if groupResult != nil && len(groupResult.Groups) > 0 {
 		// Use environment-based replay
@@ -739,6 +743,17 @@ func runTests(cmd *cobra.Command, args []string) error {
 		if err != nil {
 			cmd.SilenceUsage = true
 
+			// Dump startup logs so user can diagnose startup failures
+			startupLogs := executor.GetStartupLogs()
+			if startupLogs != "" {
+				log.Stderrln("\n📋 Service startup logs:")
+				for _, line := range strings.Split(strings.TrimRight(startupLogs, "\n"), "\n") {
+					log.Stderrln("  " + line)
+				}
+				log.Stderrln("")
+			}
+			log.Stderr(executor.GetStartupFailureHelpMessage())
+
 			// Update CI status to FAILURE if in cloud mode
 			if cloud && client != nil && (ci || isValidation) {
 				statusReq := &backend.UpdateDriftRunCIStatusRequest{
@@ -780,6 +795,14 @@ func runTests(cmd *cobra.Command, args []string) error {
 				}
 			}
 
+			startupLogs := executor.GetStartupLogs()
+			if startupLogs != "" {
+				log.Stderrln("\n📋 Service startup logs:")
+				for _, line := range strings.Split(strings.TrimRight(startupLogs, "\n"), "\n") {
+					log.Stderrln("  " + line)
+				}
+				log.Stderrln("")
+			}
 			log.Stderr(executor.GetStartupFailureHelpMessage())
 			return fmt.Errorf("failed to start environment: %w", err)
 		}
@@ -790,7 +813,7 @@ func runTests(cmd *cobra.Command, args []string) error {
 		}()
 
 		if !interactive && !quiet {
-			log.Stderrln("  ✓ Environment ready")
+			log.Stderrln(fmt.Sprintf("  ✓ Environment ready (%.1fs)", time.Since(testPhaseStart).Seconds()))
 			log.Stderrln(fmt.Sprintf("➤ Running %d tests (concurrency: %d)...\n", len(tests), executor.GetConcurrency()))
 		}
 
@@ -824,12 +847,20 @@ func runTests(cmd *cobra.Command, args []string) error {
 	_ = os.Stdout.Sync()
 	time.Sleep(1 * time.Millisecond)
 
+	if !interactive && !quiet {
+		log.Stderrln(fmt.Sprintf("✓ Tests completed (%.1fs)", time.Since(testPhaseStart).Seconds()))
+	}
+
 	var outputErr error
 	if !interactive {
 		// Results already streamed, just print summary
 		outputErr = runner.OutputResultsSummary(results, outputFormat, quiet)
 	}
 
+	if !interactive && !quiet {
+		log.Stderrln(fmt.Sprintf("Total elapsed: %.1fs", time.Since(overallStart).Seconds()))
+	}
+
 	// Step 5: Upload results to backend if in cloud mode
 	// Do this before returning any error so CI status is always updated
 	if cloud && client != nil && (ci || isValidation) {
diff --git a/docs/tui-testing.md b/docs/tui-testing.md
new file mode 100644
index 0000000..0a0ad43
--- /dev/null
+++ b/docs/tui-testing.md
@@ -0,0 +1,326 @@
+# CLI & TUI Testing Guide
+
+How to manually test the tusk CLI in both print mode and interactive TUI mode.
+
+## Print Mode Testing
+
+Print mode (`--print`) runs headlessly — no interactive UI. Run it directly and inspect stderr:
+
+```bash
+cd /path/to/test-project
+/path/to/tusk drift run --print 2>&1
+```
+
+Filter for specific output:
+
+```bash
+/path/to/tusk drift run --print 2>&1 | grep -E "(➤|✓|Tests:|Error:)"
+```
+
+### Testing failure scenarios
+
+To test startup failures, temporarily change the start command in `.tusk/config.yaml`:
+
+```yaml
+start:
+  command: 'node -e "console.log(''boot log line''); console.error(''some error''); process.exit(1)"'
+```
+
+To test with a service that starts but behaves differently, adjust the command or example codebase as needed.
+
+**Always restore the config after testing.**
+
+## TUI Testing with tmux
+
+The TUI (interactive mode, no `--print`) requires a terminal. We use tmux for programmatic control — it lets us send keystrokes and capture output without needing to be in the terminal ourselves.
+
+### Option A: Native screenshots (recommended)
+
+Opens a real Terminal.app window with tmux inside it, then uses macOS `screencapture -l` to capture that specific window by ID. This produces pixel-perfect Retina screenshots and should always be used to verify TUI visual changes.
+
+**One-time setup:** Grant Screen Recording permission to Terminal.app in System Settings > Privacy & Security > Screen Recording.
+
+```bash
+# 1. Open Terminal.app with a tmux session
+osascript -e 'tell application "Terminal"
+    do script "tmux new-session -s tui-test -x 160 -y 45"
+end tell'
+sleep 3
+
+# 2. Hide tmux status bar (otherwise a green bar appears at the bottom)
+tmux set -t tui-test status off
+
+# 3. Launch the TUI
+tmux send-keys -t tui-test 'cd /path/to/test-project && /path/to/tusk drift run' Enter
+
+# 4. Wait for the state you want to capture
+#    - Normal run with tests: ~15-20s (environment start + test execution)
+#    - Startup failure with sandbox retry: ~10-15s
+#    - Just initial render: ~3-5s
+sleep 15
+
+# 5. Navigate if needed
+tmux send-keys -t tui-test g        # go to top (select Service Logs)
+tmux send-keys -t tui-test j        # move selection down
+tmux send-keys -t tui-test J        # scroll log panel down
+tmux send-keys -t tui-test D        # half-page down in log panel
+sleep 1
+
+# 6. Find the Terminal.app window ID
+WINDOW_ID=$(python3 -c "
+import Quartz
+windows = Quartz.CGWindowListCopyWindowInfo(Quartz.kCGWindowListOptionOnScreenOnly, Quartz.kCGNullWindowID)
+for w in windows:
+    if w.get('kCGWindowOwnerName') == 'Terminal' and w.get('kCGWindowLayer', 0) == 0:
+        print(w['kCGWindowNumber'])
+        break
+")
+
+# 7. Capture the window
+screencapture -l "$WINDOW_ID" -o screenshot.png
+
+# 8. Cleanup
+tmux send-keys -t tui-test q
+sleep 2
+tmux kill-session -t tui-test
+osascript -e 'tell application "Terminal" to close front window' 2>/dev/null
+```
+
+**Output:** ~2800x1800 Retina PNG with native font rendering.
+
+**Notes:**
+
+- `screencapture -l` captures by window ID — the Terminal window doesn't need to be in the foreground. You can keep working in other windows.
+- The `-o` flag removes the window shadow.
+- `screencapture -l` fails silently without Screen Recording permission — you get a blank or tiny image.
+
+### Option B: Text capture (quick functional checks)
+
+Uses a detached tmux session — no visible window, no permissions needed. Good for verifying that specific text appears in the TUI or that navigation works. **Not a substitute for screenshots** when verifying layout, colors, or visual rendering.
+
+```bash
+# 1. Detached tmux session (no visible window)
+tmux new-session -d -s tui-test -x 160 -y 45
+
+# 2. Launch the TUI
+tmux send-keys -t tui-test 'cd /path/to/test-project && /path/to/tusk drift run' Enter
+sleep 15
+
+# 3. Capture the screen as plain text
+SCREEN=$(tmux capture-pane -t tui-test -p)
+
+# 4. Assert on content
+echo "$SCREEN" | grep -q "TEST EXECUTION" || echo "FAIL: header not found"
+echo "$SCREEN" | grep -q "Environment ready" || echo "FAIL: environment didn't start"
+
+# 5. Navigate and capture again
+tmux send-keys -t tui-test j
+sleep 0.5
+SCREEN=$(tmux capture-pane -t tui-test -p)
+
+# 6. Cleanup
+tmux send-keys -t tui-test q
+sleep 1
+tmux kill-session -t tui-test
+```
+
+### TUI keyboard shortcuts reference
+
+| Key       | Action                                  |
+| --------- | --------------------------------------- |
+| `j` / `k` | Select next/previous test in left panel |
+| `g` / `G` | Jump to top/bottom of test list         |
+| `u` / `d` | Half-page up/down in test list          |
+| `J` / `K` | Scroll log panel down/up                |
+| `U` / `D` | Half-page up/down in log panel          |
+| `y`       | Copy all logs                           |
+| `q`       | Quit                                    |
+
+## Recommended Dimensions
+
+| Setting      | Value | Notes                           |
+| ------------ | ----- | ------------------------------- |
+| tmux columns | 160   | Wide enough for both TUI panels |
+| tmux rows    | 45    | Tall enough to see tests + logs |
+
+## Common Gotchas
+
+1. **Timing is critical.** The TUI renders asynchronously. Capturing too early gives an incomplete screen. When in doubt, wait longer.
+2. **tmux status bar.** The green bar at the bottom of native screenshots is tmux's status line. Hide it with `tmux set -t tui-test status off` before capturing.
+3. **Scrolling.** Content often extends below the visible area of the log panel. Send `J` or `D` keys to scroll down before capturing.
+4. **Screen Recording permission.** Native `screencapture -l` fails silently without it. Grant it to Terminal.app in System Settings > Privacy & Security > Screen Recording.
+5. **YAML quoting.** When editing config.yaml with commands containing colons or quotes, wrap the entire value in double quotes and escape inner quotes.
+6. **Restore config.** Always restore `.tusk/config.yaml` after testing with modified start commands.
+7. **tmux targets by session name** (`-t tui-test`), so commands work regardless of which terminal you're focused on. You can keep working while tests run.
+
+## Fallback: Rendering ANSI captures to images
+
+If you have an ANSI text capture and need an image (e.g., for a bug report when screencapture isn't available):
+
+```bash
+# Capture with ANSI codes
+tmux capture-pane -t tui-test -p -e > /tmp/tui-capture.txt
+```
+
+### Pillow
+
+```bash
+python3 render_ansi_png.py  # see script below
+```
+
+<details>
+<summary>render_ansi_png.py</summary>
+
+```python
+#!/usr/bin/env python3
+"""Render tmux ANSI capture to a high-resolution PNG using Pillow."""
+
+import re
+from PIL import Image, ImageDraw, ImageFont
+
+INPUT_FILE = "/tmp/tui-capture.txt"
+OUTPUT_FILE = "tui-screenshot.png"
+FONT_SIZE = 22
+BG_COLOR = (30, 30, 30)
+FONT_PATH = "/System/Library/Fonts/Menlo.ttc"  # macOS; adjust for Linux
+
+
+def load_font(size):
+    try:
+        return ImageFont.truetype(FONT_PATH, size, index=0)
+    except Exception:
+        try:
+            return ImageFont.truetype("/System/Library/Fonts/Monaco.ttf", size)
+        except Exception:
+            return ImageFont.load_default()
+
+
+def build_256_palette():
+    palette = {}
+    base16 = [
+        (0, 0, 0), (205, 49, 49), (13, 188, 121), (229, 229, 16),
+        (36, 114, 200), (188, 63, 188), (17, 168, 205), (204, 204, 204),
+        (128, 128, 128), (241, 76, 76), (35, 209, 139), (245, 245, 67),
+        (59, 142, 234), (214, 112, 214), (41, 184, 219), (255, 255, 255),
+    ]
+    for i, c in enumerate(base16):
+        palette[i] = c
+    for i in range(216):
+        r, g, b = i // 36, (i % 36) // 6, i % 6
+        palette[16 + i] = (
+            0 if r == 0 else 55 + r * 40,
+            0 if g == 0 else 55 + g * 40,
+            0 if b == 0 else 55 + b * 40,
+        )
+    for i in range(24):
+        v = 8 + i * 10
+        palette[232 + i] = (v, v, v)
+    return palette
+
+
+PALETTE_256 = build_256_palette()
+ANSI_RE = re.compile(r'\x1b\[([0-9;]*)m')
+
+
+def parse_ansi_line(line):
+    segments = []
+    fg, bg, bold = (204, 204, 204), None, False
+    pos = 0
+
+    for match in ANSI_RE.finditer(line):
+        start, end = match.span()
+        for ch in line[pos:start]:
+            segments.append((ch, fg, bg, bold))
+        pos = end
+
+        params = [int(p) if p else 0 for p in (match.group(1) or "0").split(';')]
+        i = 0
+        while i < len(params):
+            p = params[i]
+            if p == 0:
+                fg, bg, bold = (204, 204, 204), None, False
+            elif p == 1:
+                bold = True
+            elif p == 22:
+                bold = False
+            elif 30 <= p <= 37:
+                fg = PALETTE_256[p - 30]
+            elif p == 38:
+                if i + 1 < len(params) and params[i + 1] == 5 and i + 2 < len(params):
+                    fg = PALETTE_256.get(params[i + 2], fg)
+                    i += 2
+                elif i + 1 < len(params) and params[i + 1] == 2 and i + 4 < len(params):
+                    fg = (params[i + 2], params[i + 3], params[i + 4])
+                    i += 4
+            elif p == 39:
+                fg = (204, 204, 204)
+            elif 40 <= p <= 47:
+                bg = PALETTE_256[p - 40]
+            elif p == 48:
+                if i + 1 < len(params) and params[i + 1] == 5 and i + 2 < len(params):
+                    bg = PALETTE_256.get(params[i + 2], bg)
+                    i += 2
+                elif i + 1 < len(params) and params[i + 1] == 2 and i + 4 < len(params):
+                    bg = (params[i + 2], params[i + 3], params[i + 4])
+                    i += 4
+            elif p == 49:
+                bg = None
+            elif 90 <= p <= 97:
+                fg = PALETTE_256[p - 90 + 8]
+            elif 100 <= p <= 107:
+                bg = PALETTE_256[p - 100 + 8]
+            i += 1
+
+    for ch in line[pos:]:
+        segments.append((ch, fg, bg, bold))
+    return segments
+
+
+def main():
+    with open(INPUT_FILE, 'r') as f:
+        lines = [line.rstrip('\n') for line in f.readlines()]
+
+    font = load_font(FONT_SIZE)
+    bbox = font.getbbox("M")
+    char_w = bbox[2] - bbox[0]
+    char_h = int(FONT_SIZE * 1.45)
+
+    padding = 16
+    max_cols = 160
+    img_w = padding * 2 + char_w * max_cols
+    img_h = padding * 2 + char_h * len(lines)
+
+    img = Image.new('RGB', (img_w, img_h), BG_COLOR)
+    draw = ImageDraw.Draw(img)
+
+    for row, line in enumerate(lines):
+        segments = parse_ansi_line(line)
+        y = padding + row * char_h
+        for col, (ch, fg, bg, is_bold) in enumerate(segments):
+            x = padding + col * char_w
+            if bg:
+                draw.rectangle([x, y, x + char_w, y + char_h], fill=bg)
+            if ch and ch != ' ':
+                draw.text((x, y), ch, fill=fg, font=font)
+
+    img.save(OUTPUT_FILE)
+    print(f"Saved: {OUTPUT_FILE} ({img_w}x{img_h})")
+
+
+if __name__ == "__main__":
+    main()
+```
+
+</details>
+
+### ansi2html + Chrome headless
+
+If `ansi2html` is installed (`pip3 install ansi2html`):
+
+```bash
+cat /tmp/tui-capture.txt | ansi2html > /tmp/tui.html
+"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \
+    --headless --screenshot=/tmp/screenshot.png \
+    --window-size=1600,900 --force-device-scale-factor=2 \
+    /tmp/tui.html
+```
diff --git a/internal/agent/prompts/phase_simple_test.md b/internal/agent/prompts/phase_simple_test.md
index 4bd8a4b..f94af1e 100644
--- a/internal/agent/prompts/phase_simple_test.md
+++ b/internal/agent/prompts/phase_simple_test.md
@@ -44,9 +44,10 @@ TuskDrift.initialize({
 Run tusk_run to replay the trace.
 If it fails:
 
-- Run with `debug: true` (keep running it in debug mode until it passes)
-- If startup fails in sandbox (for example secret manager bootstrapping), retry with `sandbox_mode: "off"`
-- Check for errors in the output or in the logs (in .tusk/logs/). Logs only appear if `debug: true` is set.
+- Check the service startup logs for errors (always shown on startup failure)
+- If the service starts but a test replay fails, run with `debug: true` to see runtime logs (keep running it in debug mode until it passes)
+- If you need more detail from the SDK itself, set `logLevel: "debug"` in the SDK initialization to see SDK-level diagnostics
+- If startup fails in sandbox (for example secret manager bootstrapping), retry with `sandbox_mode: "off"` and if that works, add a comment in config.yaml explaining why sandbox was disabled (e.g., `# sandbox disabled: service requires external secret manager during startup`)
 - If you see config-related errors (e.g., "no start command"), run `tusk_validate_config` to check for config issues
 - Try to fix issues and retry (max 3 attempts)
 - If still failing, ask the user for help
diff --git a/internal/agent/prompts/phase_verify_complex_test.md b/internal/agent/prompts/phase_verify_complex_test.md
index 408e52f..4fb77f0 100644
--- a/internal/agent/prompts/phase_verify_complex_test.md
+++ b/internal/agent/prompts/phase_verify_complex_test.md
@@ -29,7 +29,7 @@ Follow the same process as the simple test:
 4. Wait 3 seconds for trace to be written
 5. Stop the service
 6. Run tusk_list to verify trace was recorded
-7. Run tusk_run to replay (try with `debug: true` on failure; if startup fails in sandbox, retry with `sandbox_mode: "off"`)
+7. Run tusk_run to replay (startup logs are always shown on startup failure; if a test replay fails, retry with `debug: true` for runtime logs; if startup fails in sandbox, retry with `sandbox_mode: "off"` and if that works, add a comment in config.yaml explaining why)
 
 ### Step 3: Save to Cache
 
diff --git a/internal/agent/prompts/phase_verify_simple_test.md b/internal/agent/prompts/phase_verify_simple_test.md
index 7fec9b3..c9732d4 100644
--- a/internal/agent/prompts/phase_verify_simple_test.md
+++ b/internal/agent/prompts/phase_verify_simple_test.md
@@ -32,8 +32,10 @@ If no traces appear, this is a verification failure.
 Run tusk_run to replay the trace.
 If it fails:
 
-- Run with `debug: true` once more
-- If startup fails in sandbox, retry with `sandbox_mode: "off"`
+- Check the service startup logs for errors (always shown on startup failure)
+- If the service starts but a test replay fails, retry with `debug: true` to see runtime logs
+- If you need more detail from the SDK, set `logLevel: "debug"` in the SDK initialization
+- If startup fails in sandbox, retry with `sandbox_mode: "off"` and if that works, add a comment in config.yaml explaining why sandbox was disabled
 - If still failing, mark as failed
 
 ### Step 5: Save to Cache
diff --git a/internal/log/log.go b/internal/log/log.go
index 6dca3cb..da4bec7 100644
--- a/internal/log/log.go
+++ b/internal/log/log.go
@@ -2,6 +2,7 @@
 package log
 
 import (
+	"context"
 	"io"
 	"log/slog"
 	"os"
@@ -90,15 +91,24 @@ func (l *Logger) process() {
 
 func (l *Logger) handleLogMessage(msg logMessage) {
 	tuiPtr := l.tuiLogger.Load()
-	if tuiPtr == nil {
+	if tuiPtr != nil {
+		tui := *tuiPtr
+		switch msg.msgType {
+		case logTypeService:
+			tui.LogToService(msg.message)
+		case logTypeTest:
+			tui.LogToCurrentTest(msg.testID, msg.message)
+		}
 		return
 	}
-	tui := *tuiPtr
+
+	// No TUI active (print mode). Route through slog so messages respect
+	// the configured log level and use the standard slog format.
 	switch msg.msgType {
 	case logTypeService:
-		tui.LogToService(msg.message)
+		slog.Debug(msg.message)
 	case logTypeTest:
-		tui.LogToCurrentTest(msg.testID, msg.message)
+		slog.Debug(msg.message, "testID", msg.testID)
 	}
 }
 
@@ -261,6 +271,15 @@ func TestLog(testID, msg string) {
 	}
 }
 
+// TestDebug logs a debug-level message to a specific test's log panel.
+// Only emits when debug logging is enabled (--debug flag).
+func TestDebug(testID, msg string) {
+	if !slog.Default().Enabled(context.Background(), slog.LevelDebug) {
+		return
+	}
+	TestLog(testID, msg)
+}
+
 // TestOrServiceLog tries to log to test, falls back to service if testID is empty
 func TestOrServiceLog(testID, msg string) {
 	if testID != "" {
diff --git a/internal/runner/environment.go b/internal/runner/environment.go
index 5203fc1..4a4f9cd 100644
--- a/internal/runner/environment.go
+++ b/internal/runner/environment.go
@@ -24,7 +24,20 @@ func (e *Executor) StartEnvironment() error {
 	if err := e.StartService(); err != nil {
 		if e.GetSandboxMode() == SandboxModeAuto && e.lastServiceSandboxed {
 			log.ServiceLog("⚠️  Service failed to start in sandbox; retrying once without sandbox...")
+
+			// Preserve log file/buffer across the retry so startup logs accumulate
+			savedLogFile := e.serviceLogFile
+			e.serviceLogFile = nil // prevent StopService → cleanupLogFiles from closing it
 			_ = e.StopService()
+			e.serviceLogFile = savedLogFile
+
+			// Write separator so the user can see where the retry begins
+			if e.enableServiceLogs && savedLogFile != nil {
+				_, _ = savedLogFile.WriteString("\n⚠️  Retrying without sandbox...\n\n")
+			} else if e.startupLogBuffer != nil {
+				_, _ = e.startupLogBuffer.Write([]byte("\n⚠️  Retrying without sandbox...\n\n"))
+			}
+
 			e.sandboxBypass = true
 			e.lastServiceSandboxed = false
 
@@ -52,6 +65,11 @@ waitForSDK:
 	log.ServiceLog("✅ SDK acknowledged")
 
 	log.Debug("Environment is ready")
+
+	// Discard the in-memory startup buffer now that startup succeeded.
+	// File-based logging (--enable-service-logs) persists for the full run.
+	e.DiscardStartupBuffer()
+
 	return nil
 }
 
diff --git a/internal/runner/environment_replay.go b/internal/runner/environment_replay.go
index fd4c71a..de19674 100644
--- a/internal/runner/environment_replay.go
+++ b/internal/runner/environment_replay.go
@@ -4,6 +4,8 @@ import (
 	"context"
 	"fmt"
 	"os"
+	"strings"
+	"time"
 
 	"github.com/Use-Tusk/tusk-drift-cli/internal/config"
 	"github.com/Use-Tusk/tusk-drift-cli/internal/log"
@@ -24,6 +26,8 @@ func ReplayTestsByEnvironment(
 	allResults := make([]TestResult, 0)
 
 	for i, group := range groups {
+		envStart := time.Now()
+
 		log.Debug("Starting replay for environment group",
 			"environment", group.Name,
 			"test_count", len(group.Tests),
@@ -41,6 +45,14 @@ func ReplayTestsByEnvironment(
 
 		// 2. Start environment (server + service)
 		if err := executor.StartEnvironment(); err != nil {
+			// Dump startup logs before returning so the caller's help message makes sense
+			startupLogs := executor.GetStartupLogs()
+			if startupLogs != "" {
+				log.ServiceLog("📋 Service startup logs:")
+				for _, line := range strings.Split(strings.TrimRight(startupLogs, "\n"), "\n") {
+					log.ServiceLog(line)
+				}
+			}
 			cleanup() // Restore env vars before returning
 			return allResults, fmt.Errorf("failed to start environment for %s: %w", group.Name, err)
 		}
@@ -70,7 +82,8 @@ func ReplayTestsByEnvironment(
 
 		log.Debug("Completed replay for environment group",
 			"environment", group.Name,
-			"results_count", len(results))
+			"results_count", len(results),
+			"duration_seconds", time.Since(envStart).Seconds())
 	}
 
 	log.Debug("Completed all environment group replays",
diff --git a/internal/runner/executor.go b/internal/runner/executor.go
index daa9981..f753ce2 100644
--- a/internal/runner/executor.go
+++ b/internal/runner/executor.go
@@ -12,6 +12,7 @@ import (
 	"os/exec"
 	"path/filepath"
 	"strings"
+	"sync"
 	"time"
 
 	"github.com/Use-Tusk/fence/pkg/fence"
@@ -33,6 +34,24 @@ const (
 	SandboxModeOff    = "off"
 )
 
+// syncBuffer is a thread-safe buffer for capturing service stdout/stderr concurrently.
+type syncBuffer struct {
+	mu  sync.Mutex
+	buf bytes.Buffer
+}
+
+func (s *syncBuffer) Write(p []byte) (int, error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.buf.Write(p)
+}
+
+func (s *syncBuffer) String() string {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.buf.String()
+}
+
 type Executor struct {
 	serviceURL              string
 	parallel                int
@@ -40,6 +59,8 @@ type Executor struct {
 	serviceCmd              *exec.Cmd
 	server                  *Server
 	serviceLogFile          *os.File
+	startupLogBuffer        *syncBuffer // in-memory buffer when --enable-service-logs is off
+	processExitCh           chan error   // signals early process exit
 	enableServiceLogs       bool
 	servicePort             int
 	resultsDir              string
@@ -511,24 +532,43 @@ func countPassedTests(results []TestResult) int {
 	return count
 }
 
-// GetStartupFailureHelpMessage returns a user-friendly help message when the service fails to start.
-// It explains how to use --enable-service-logs and shows where logs will be saved.
-func (e *Executor) GetStartupFailureHelpMessage() string {
-	var msg strings.Builder
+// GetStartupLogs returns the captured service startup logs.
+// When --enable-service-logs is set, it reads back from the log file.
+// Otherwise, it returns the contents of the in-memory startup buffer.
+func (e *Executor) GetStartupLogs() string {
+	if e.enableServiceLogs && e.serviceLogFile != nil {
+		data, err := os.ReadFile(e.serviceLogFile.Name())
+		if err != nil {
+			return ""
+		}
+		return string(data)
+	}
+	if e.startupLogBuffer != nil {
+		return e.startupLogBuffer.String()
+	}
+	return ""
+}
 
-	msg.WriteString("\n")
-	msg.WriteString("💡 Tip: Use --enable-service-logs to see detailed service logs and diagnose startup issues.\n")
-	msg.WriteString("   Service logs show the stdout/stderr output from your service, which can help identify why the service failed to start.\n")
+// DiscardStartupBuffer detaches the in-memory startup log buffer from the service process
+// and discards it. This is called after the service starts successfully to avoid unbounded
+// memory growth during the test run. Has no effect when --enable-service-logs is set
+// (file-based logging persists for the full run).
+func (e *Executor) DiscardStartupBuffer() {
+	if !e.enableServiceLogs && e.startupLogBuffer != nil {
+		if e.serviceCmd != nil {
+			e.serviceCmd.Stdout = io.Discard
+			e.serviceCmd.Stderr = io.Discard
+		}
+		e.startupLogBuffer = nil
+	}
+}
 
-	// Always show where logs would be saved
-	logsDir := utils.GetLogsDir()
+// GetStartupFailureHelpMessage returns a user-friendly help message when the service fails to start.
+func (e *Executor) GetStartupFailureHelpMessage() string {
 	if e.enableServiceLogs && e.serviceLogFile != nil {
-		msg.WriteString(fmt.Sprintf("📄 Service logs are available at: %s\n", e.serviceLogFile.Name()))
-	} else {
-		msg.WriteString(fmt.Sprintf("📁 Service logs will be saved to: %s/\n", logsDir))
+		return fmt.Sprintf("\n📄 Service logs are available at: %s\n", e.serviceLogFile.Name())
 	}
-
-	return msg.String()
+	return ""
 }
 
 // RunSingleTest replays a single trace on the service under test.
diff --git a/internal/runner/server.go b/internal/runner/server.go
index 0123e26..5b63efe 100644
--- a/internal/runner/server.go
+++ b/internal/runner/server.go
@@ -1027,11 +1027,7 @@ func (ms *Server) handleInstrumentationVersionMismatchAlert(alert *core.Instrume
 }
 
 func (ms *Server) handleUnpatchedDependencyAlert(alert *core.UnpatchedDependencyAlert) {
-	log.Info("Unpatched dependency alert",
-		"traceTestServerSpanId", alert.TraceTestServerSpanId,
-		"stackTrace", alert.StackTrace,
-		"sdkVersion", alert.SdkVersion,
-	)
+	log.TestDebug(alert.TraceTestServerSpanId, fmt.Sprintf("Unpatched dependency alert (sdk %s)\n%s", alert.SdkVersion, alert.StackTrace))
 
 	// Send to PostHog
 	if client := ms.GetAnalyticsClient(); client != nil {
diff --git a/internal/runner/service.go b/internal/runner/service.go
index 003a9ee..9489124 100644
--- a/internal/runner/service.go
+++ b/internal/runner/service.go
@@ -147,15 +147,19 @@ func (e *Executor) StartService() error {
 	env = append(env, "TUSK_DRIFT_MODE=REPLAY")
 	e.serviceCmd.Env = env
 
-	// Dump service logs to file in .tusk/logs instead of suppressing
-	// TODO: provide option whether to store these logs
+	// Always capture service logs during startup.
+	// When --enable-service-logs is set, logs go to a file.
+	// Otherwise, logs go to an in-memory buffer that is shown on failure and discarded on success.
 	if err := e.setupServiceLogging(); err != nil {
 		log.Debug("Failed to setup service logging, suppressing output", "error", err)
 		e.serviceCmd.Stdout = nil
 		e.serviceCmd.Stderr = nil
-	} else {
+	} else if e.enableServiceLogs {
 		e.serviceCmd.Stdout = e.serviceLogFile
 		e.serviceCmd.Stderr = e.serviceLogFile
+	} else {
+		e.serviceCmd.Stdout = e.startupLogBuffer
+		e.serviceCmd.Stderr = e.startupLogBuffer
 	}
 
 	if err := e.serviceCmd.Start(); err != nil {
@@ -166,6 +170,12 @@ func (e *Executor) StartService() error {
 		return fmt.Errorf("failed to start service: %w", err)
 	}
 
+	// Monitor process for early exit so waitForReadiness can fail fast
+	e.processExitCh = make(chan error, 1)
+	go func() {
+		e.processExitCh <- e.serviceCmd.Wait()
+	}()
+
 	if err := e.waitForReadiness(cfg); err != nil {
 		_ = e.StopService()
 		return fmt.Errorf("service readiness check failed: %w", err)
@@ -400,8 +410,13 @@ func (e *Executor) waitForReadiness(cfg *config.Config) error {
 				waitTime = parsed
 			}
 		}
-		time.Sleep(waitTime)
-		return nil
+		// Wait for the specified duration, but fail fast if the process exits
+		select {
+		case exitErr := <-e.processExitCh:
+			return fmt.Errorf("service process exited during startup: %w", exitErr)
+		case <-time.After(waitTime):
+			return nil
+		}
 	}
 
 	timeout := 10 * time.Second
@@ -421,13 +436,25 @@ func (e *Executor) waitForReadiness(cfg *config.Config) error {
 	deadline := time.Now().Add(timeout)
 
 	for time.Now().Before(deadline) {
+		// Check for early process exit before each readiness attempt
+		select {
+		case exitErr := <-e.processExitCh:
+			return fmt.Errorf("service process exited during startup: %w", exitErr)
+		default:
+		}
+
 		cmd := createReadinessCommand(cfg.Service.Readiness.Command)
 		cmd.Env = e.buildCommandEnv()
 		if err := cmd.Run(); err == nil {
 			return nil
 		}
 
-		time.Sleep(interval)
+		// Wait for the interval, but fail fast if the process exits
+		select {
+		case exitErr := <-e.processExitCh:
+			return fmt.Errorf("service process exited during startup: %w", exitErr)
+		case <-time.After(interval):
+		}
 	}
 
 	return fmt.Errorf("service failed to become ready within %v. You can increase the timeout in .tusk/config.yaml under service.readiness.timeout", timeout)
@@ -438,34 +465,37 @@ func (e *Executor) SetEnableServiceLogs(enable bool) {
 	e.enableServiceLogs = enable
 }
 
-// setupServiceLogging creates a log file for service stdout and stderr if service logging is not disabled.
+// setupServiceLogging prepares log capture for the service process.
+// When --enable-service-logs is set, logs are written to a file in .tusk/logs/.
+// Otherwise, an in-memory buffer captures startup output (shown on failure, discarded on success).
 func (e *Executor) setupServiceLogging() error {
-	if !e.enableServiceLogs {
-		return fmt.Errorf("service logging disabled")
-	}
+	if e.enableServiceLogs {
+		// Allow tests to override the logs directory
+		logsDir := utils.GetLogsDir()
+		if testLogsDir := os.Getenv("TUSK_TEST_LOGS_DIR"); testLogsDir != "" {
+			logsDir = testLogsDir
+		}
 
-	// Allow tests to override the logs directory
-	logsDir := utils.GetLogsDir()
-	if testLogsDir := os.Getenv("TUSK_TEST_LOGS_DIR"); testLogsDir != "" {
-		logsDir = testLogsDir
-	}
+		if err := os.MkdirAll(logsDir, 0o750); err != nil {
+			return fmt.Errorf("failed to create logs directory: %w", err)
+		}
 
-	if err := os.MkdirAll(logsDir, 0o750); err != nil {
-		return fmt.Errorf("failed to create logs directory: %w", err)
-	}
+		timestamp := time.Now().Format("20060102-150405")
 
-	timestamp := time.Now().Format("20060102-150405")
+		logPath := filepath.Join(logsDir, fmt.Sprintf("tusk-replay-%s.log", timestamp))
+		logFile, err := os.Create(logPath) // #nosec G304
+		if err != nil {
+			return fmt.Errorf("failed to create service log file: %w", err)
+		}
 
-	logPath := filepath.Join(logsDir, fmt.Sprintf("tusk-replay-%s.log", timestamp))
-	logFile, err := os.Create(logPath) // #nosec G304
-	if err != nil {
-		return fmt.Errorf("failed to create service log file: %w", err)
+		e.serviceLogFile = logFile
+		log.ServiceLog(fmt.Sprintf("Service logs will be written to: %s", logPath))
+	} else if e.startupLogBuffer == nil {
+		// Only create a new buffer if one doesn't already exist.
+		// During sandbox retry, the existing buffer preserves the first attempt's logs.
+		e.startupLogBuffer = &syncBuffer{}
 	}
 
-	e.serviceLogFile = logFile
-
-	log.ServiceLog(fmt.Sprintf("Service logs will be written to: %s", logPath))
-
 	return nil
 }
 
diff --git a/internal/runner/service_test.go b/internal/runner/service_test.go
index edd7105..93920b2 100644
--- a/internal/runner/service_test.go
+++ b/internal/runner/service_test.go
@@ -820,10 +820,10 @@ func TestSetupServiceLogging(t *testing.T) {
 		expectError       bool
 	}{
 		{
-			name:              "logging_disabled",
+			name:              "logging_disabled_creates_buffer",
 			enableServiceLogs: false,
 			setupFunc:         noopCleanup,
-			expectError:       true,
+			expectError:       false,
 		},
 		{
 			name:              "logging_enabled_creates_file",
@@ -881,7 +881,7 @@ func TestSetupServiceLogging(t *testing.T) {
 			if tt.expectError {
 				assert.Error(t, err)
 				assert.Nil(t, e.serviceLogFile)
-			} else {
+			} else if tt.enableServiceLogs {
 				assert.NoError(t, err)
 				assert.NotNil(t, e.serviceLogFile)
 
@@ -892,6 +892,11 @@ func TestSetupServiceLogging(t *testing.T) {
 
 				_ = e.serviceLogFile.Close()
 				_ = os.Remove(e.serviceLogFile.Name())
+			} else {
+				// Logging disabled: no error, no file, but buffer is created
+				assert.NoError(t, err)
+				assert.Nil(t, e.serviceLogFile)
+				assert.NotNil(t, e.startupLogBuffer)
 			}
 		})
 	}
diff --git a/internal/tui/test_executor.go b/internal/tui/test_executor.go
index c98fc4f..5ec9c71 100644
--- a/internal/tui/test_executor.go
+++ b/internal/tui/test_executor.go
@@ -3,6 +3,7 @@ package tui
 import (
 	"context"
 	"fmt"
+	"io"
 	"log/slog"
 	"os"
 	"slices"
@@ -245,6 +246,13 @@ func RunTestsInteractiveWithOpts(tests []runner.Test, executor *runner.Executor,
 	// Register this model as the global test logger
 	log.SetTUILogger(m)
 
+	// Redirect stderr to the TUI service log panel.
+	// Libraries like fence write directly to os.Stderr with fmt.Fprintf,
+	// which corrupts the Bubble Tea alt screen. This pipe captures those
+	// writes and routes them through the TUI log panel instead.
+	stderrRestore := redirectStderrToTUI(m)
+	defer stderrRestore()
+
 	// Prepend initial service logs
 	if opts != nil && len(opts.InitialServiceLogs) > 0 {
 		for _, line := range opts.InitialServiceLogs {
@@ -270,6 +278,48 @@ func RunTestsInteractiveWithOpts(tests []runner.Test, executor *runner.Executor,
 	return m.results, nil
 }
 
+// redirectStderrToTUI replaces os.Stderr with a pipe that routes lines to the
+// TUI service log panel. Returns a function that restores the original stderr.
+// This prevents libraries that write directly to os.Stderr (e.g. fence) from
+// corrupting the Bubble Tea alt screen.
+func redirectStderrToTUI(m *testExecutorModel) func() {
+	origStderr := os.Stderr
+	pr, pw, err := os.Pipe()
+	if err != nil {
+		// If pipe fails, don't redirect — better to have some corruption than crash
+		return func() {}
+	}
+	os.Stderr = pw
+
+	done := make(chan struct{})
+	go func() {
+		defer close(done)
+		buf := make([]byte, 4096)
+		for {
+			n, readErr := pr.Read(buf)
+			if n > 0 {
+				lines := strings.Split(strings.TrimRight(string(buf[:n]), "\n"), "\n")
+				for _, line := range lines {
+					line = strings.TrimSpace(line)
+					if line != "" {
+						m.addServiceLog(line)
+					}
+				}
+			}
+			if readErr != nil {
+				return
+			}
+		}
+	}()
+
+	return func() {
+		os.Stderr = origStderr
+		_ = pw.Close()
+		<-done // wait for reader goroutine to drain
+		_ = pr.Close()
+	}
+}
+
 func newTestExecutorModel(tests []runner.Test, executor *runner.Executor, opts *InteractiveOpts) *testExecutorModel {
 	model := &testExecutorModel{
 		tests:             tests,
@@ -306,12 +356,59 @@ func (m *testExecutorModel) setupTUILogging() {
 		level = slog.LevelDebug
 	}
 
-	handler := slog.NewTextHandler(tuiWriter, &slog.HandlerOptions{
-		Level: level,
-	})
+	handler := &tuiSlogHandler{
+		level:  level,
+		writer: tuiWriter,
+	}
 	slog.SetDefault(slog.New(handler))
 }
 
+// tuiSlogHandler formats slog records cleanly for the TUI (no time/level prefix).
+// It extracts just the message and key=value pairs.
+type tuiSlogHandler struct {
+	level  slog.Level
+	writer io.Writer
+	attrs  []slog.Attr
+	group  string
+}
+
+func (h *tuiSlogHandler) Enabled(_ context.Context, level slog.Level) bool {
+	return level >= h.level
+}
+
+func (h *tuiSlogHandler) Handle(_ context.Context, r slog.Record) error {
+	var b strings.Builder
+	b.WriteString(r.Message)
+	// Append pre-set attrs
+	for _, a := range h.attrs {
+		b.WriteString(" ")
+		b.WriteString(a.Key)
+		b.WriteString("=")
+		b.WriteString(fmt.Sprintf("%v", a.Value.Any()))
+	}
+	// Append record attrs
+	r.Attrs(func(a slog.Attr) bool {
+		b.WriteString(" ")
+		b.WriteString(a.Key)
+		b.WriteString("=")
+		b.WriteString(fmt.Sprintf("%v", a.Value.Any()))
+		return true
+	})
+	_, err := h.writer.Write([]byte(b.String()))
+	return err
+}
+
+func (h *tuiSlogHandler) WithAttrs(attrs []slog.Attr) slog.Handler {
+	newAttrs := make([]slog.Attr, len(h.attrs)+len(attrs))
+	copy(newAttrs, h.attrs)
+	copy(newAttrs[len(h.attrs):], attrs)
+	return &tuiSlogHandler{level: h.level, writer: h.writer, attrs: newAttrs, group: h.group}
+}
+
+func (h *tuiSlogHandler) WithGroup(name string) slog.Handler {
+	return &tuiSlogHandler{level: h.level, writer: h.writer, attrs: h.attrs, group: name}
+}
+
 func (m *testExecutorModel) loadTestsIfNeeded() tea.Cmd {
 	if m.opts == nil || m.opts.LoadTests == nil {
 		return nil
@@ -922,6 +1019,15 @@ func (m *testExecutorModel) startNextEnvironmentGroup() tea.Cmd {
 		// Start environment
 		if err := m.executor.StartEnvironment(); err != nil {
 			m.groupCleanup()
+
+			startupLogs := m.executor.GetStartupLogs()
+			if startupLogs != "" {
+				m.addServiceLog("📋 Service startup logs:")
+				for _, line := range strings.Split(strings.TrimRight(startupLogs, "\n"), "\n") {
+					m.addServiceLog(line)
+				}
+			}
+
 			m.addServiceLog(fmt.Sprintf("❌ Failed to start environment for %s: %v", group.Name, err))
 
 			helpMsg := m.executor.GetStartupFailureHelpMessage()

From 734bb86fce79e98193ebdffa525d1f1f61b344a9 Mon Sep 17 00:00:00 2001
From: Sohil Kshirsagar <sohil@usetusk.ai>
Date: Fri, 20 Mar 2026 16:29:47 -0400
Subject: [PATCH 2/9] minor fixes

---
 cmd/run.go                            | 10 +++-------
 internal/runner/environment.go        |  4 ++--
 internal/runner/environment_replay.go |  8 +++++++-
 internal/runner/suite_spans.go        |  5 ++++-
 internal/tui/test_executor.go         |  8 ++++----
 5 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/cmd/run.go b/cmd/run.go
index 96bc0df..1aaf207 100644
--- a/cmd/run.go
+++ b/cmd/run.go
@@ -391,6 +391,9 @@ func runTests(cmd *cobra.Command, args []string) error {
 	var tests []runner.Test
 	var err error
 
+	// Track overall timing for print mode (includes test loading)
+	overallStart := time.Now()
+
 	// Step 3: Load tests - in cloud mode, fetch from backend; otherwise use local files
 	deferLoadTests := interactive
 	if deferLoadTests {
@@ -453,9 +456,6 @@ func runTests(cmd *cobra.Command, args []string) error {
 		return nil
 	}
 
-	// Track overall timing for print mode
-	overallStart := time.Now()
-
 	if !interactive && !quiet {
 		if isValidation {
 			log.Stderrln(fmt.Sprintf("\n➤ Found %d traces to validate", len(tests)))
@@ -847,10 +847,6 @@ func runTests(cmd *cobra.Command, args []string) error {
 	_ = os.Stdout.Sync()
 	time.Sleep(1 * time.Millisecond)
 
-	if !interactive && !quiet {
-		log.Stderrln(fmt.Sprintf("✓ Tests completed (%.1fs)", time.Since(testPhaseStart).Seconds()))
-	}
-
 	var outputErr error
 	if !interactive {
 		// Results already streamed, just print summary
diff --git a/internal/runner/environment.go b/internal/runner/environment.go
index 4a4f9cd..c693189 100644
--- a/internal/runner/environment.go
+++ b/internal/runner/environment.go
@@ -33,9 +33,9 @@ func (e *Executor) StartEnvironment() error {
 
 			// Write separator so the user can see where the retry begins
 			if e.enableServiceLogs && savedLogFile != nil {
-				_, _ = savedLogFile.WriteString("\n⚠️  Retrying without sandbox...\n\n")
+				_, _ = savedLogFile.WriteString("⚠️  Retrying without sandbox...\n")
 			} else if e.startupLogBuffer != nil {
-				_, _ = e.startupLogBuffer.Write([]byte("\n⚠️  Retrying without sandbox...\n\n"))
+				_, _ = e.startupLogBuffer.Write([]byte("⚠️  Retrying without sandbox...\n"))
 			}
 
 			e.sandboxBypass = true
diff --git a/internal/runner/environment_replay.go b/internal/runner/environment_replay.go
index de19674..06c5153 100644
--- a/internal/runner/environment_replay.go
+++ b/internal/runner/environment_replay.go
@@ -44,6 +44,7 @@ func ReplayTestsByEnvironment(
 		}
 
 		// 2. Start environment (server + service)
+		envStartTime := time.Now()
 		if err := executor.StartEnvironment(); err != nil {
 			// Dump startup logs before returning so the caller's help message makes sense
 			startupLogs := executor.GetStartupLogs()
@@ -57,6 +58,10 @@ func ReplayTestsByEnvironment(
 			return allResults, fmt.Errorf("failed to start environment for %s: %w", group.Name, err)
 		}
 
+		envStartDuration := time.Since(envStartTime).Seconds()
+		log.ServiceLog(fmt.Sprintf("✓ Environment ready (%.1fs)", envStartDuration))
+		log.Stderrln(fmt.Sprintf("✓ Environment ready (%.1fs)", envStartDuration))
+
 		// 3. Run tests for this environment
 		results, err := executor.RunTests(group.Tests)
 		if err != nil {
@@ -80,10 +85,11 @@ func ReplayTestsByEnvironment(
 		// 6. Restore environment variables
 		cleanup()
 
+		envDuration := time.Since(envStart).Seconds()
 		log.Debug("Completed replay for environment group",
 			"environment", group.Name,
 			"results_count", len(results),
-			"duration_seconds", time.Since(envStart).Seconds())
+			"duration_seconds", envDuration)
 	}
 
 	log.Debug("Completed all environment group replays",
diff --git a/internal/runner/suite_spans.go b/internal/runner/suite_spans.go
index 3be14a0..03cb3dd 100644
--- a/internal/runner/suite_spans.go
+++ b/internal/runner/suite_spans.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"path/filepath"
+	"time"
 
 	"github.com/Use-Tusk/tusk-drift-cli/internal/api"
 	"github.com/Use-Tusk/tusk-drift-cli/internal/log"
@@ -134,17 +135,19 @@ func PrepareAndSetSuiteSpans(
 	opts SuiteSpanOptions,
 	currentTests []Test,
 ) error {
+	buildStart := time.Now()
 	result, err := BuildSuiteSpansForRun(ctx, opts, currentTests)
 	if err != nil {
 		return err
 	}
+	buildDuration := time.Since(buildStart).Seconds()
 	if opts.Interactive {
 		log.ServiceLog(fmt.Sprintf(
 			"Loading %d suite spans for matching (%d unique traces, %d pre-app-start)",
 			len(result.SuiteSpans), result.UniqueTraceCount, result.PreAppStartCount,
 		))
 	} else if !opts.Quiet {
-		log.UserProgress(fmt.Sprintf("  ↳ Loaded %d suite spans (%d unique traces, %d pre-app-start)", len(result.SuiteSpans), result.UniqueTraceCount, result.PreAppStartCount))
+		log.UserProgress(fmt.Sprintf("  ↳ Loaded %d suite spans (%d unique traces, %d pre-app-start) (%.1fs)", len(result.SuiteSpans), result.UniqueTraceCount, result.PreAppStartCount, buildDuration))
 	}
 	log.Debug("Prepared suite spans for matching",
 		"count", len(result.SuiteSpans),
diff --git a/internal/tui/test_executor.go b/internal/tui/test_executor.go
index 5ec9c71..851dc41 100644
--- a/internal/tui/test_executor.go
+++ b/internal/tui/test_executor.go
@@ -1161,17 +1161,17 @@ func (m *testExecutorModel) executeTest(index int) tea.Cmd {
 				}
 			}
 
-			// Determine if there are more tests to run
+			// Determine if there are more tests to run (including pending retries)
 			hasMoreTests := false
 			if m.inRetryPhase {
 				// In retry phase, check if there are more tests in the retry queue
 				hasMoreTests = m.currentEnvTestsStarted < len(m.currentEnvTestIndices)
 			} else {
-				// In normal phase, check if there are more tests overall
-				hasMoreTests = index < len(m.tests)-1
+				// In normal phase, check if there are more tests overall OR tests queued for retry
+				hasMoreTests = index < len(m.tests)-1 || len(m.testsToRetry) > 0
 			}
 
-			// Attempt to restart the server for next test
+			// Attempt to restart the server for next test (or pending retries)
 			if hasMoreTests {
 				m.addServiceLog("🔄 Restarting server...")
 				if restartErr := m.executor.RestartServerWithRetry(0); restartErr != nil {

From b2b1599ead40c62fd07351031033f29d7cd0e6d5 Mon Sep 17 00:00:00 2001
From: Sohil Kshirsagar <sohil@usetusk.ai>
Date: Fri, 20 Mar 2026 16:41:36 -0400
Subject: [PATCH 3/9] remove planning doc

---
 DEFAULT_ENABLE_SERVICE_LOGS.md | 140 ---------------------------------
 1 file changed, 140 deletions(-)
 delete mode 100644 DEFAULT_ENABLE_SERVICE_LOGS.md

diff --git a/DEFAULT_ENABLE_SERVICE_LOGS.md b/DEFAULT_ENABLE_SERVICE_LOGS.md
deleted file mode 100644
index 8bf0b12..0000000
--- a/DEFAULT_ENABLE_SERVICE_LOGS.md
+++ /dev/null
@@ -1,140 +0,0 @@
-# Slack thread context
-
-sohil  [4:56 PM]
-when service fails to start in CLI (but `--enable-service-logs` is false), thoughts on trying to start the service but with service logs enabled? rn i feel like it's hard to debug in CI when things go wrong
-
-Sohan  [4:58 PM]
-yea i like that. can enforce adding the following in CI too to upload log file
-
-```yaml
-- name: Upload Tusk service logs
-   if: always()
-   uses: actions/upload-artifact@v4
-   with:
-     name: tusk-service-logs
-     path: .tusk/logs
-     if-no-files-found: ignore
-```
-
-sohil  [4:59 PM]
-nice, yeah could do that our just output to logs and they'll show up in CI anyways
-marcel  [4:59 PM]
-nice nice
-Sohan  [5:00 PM]
-sdks should also support env var override for log level so we can run in debug mode (edited) 
-sohil  [7:00 PM]
-@Jun Yu thoughts on this? feels like this could even be default behavior
-(as in, if service doesn't start, then retry starting with service logs enabled)
-Jun Yu  [7:09 PM]
-why not just explicitly add `--enable-service-logs` to our CI workflow? (edited) 
-sohil  [7:11 PM]
-i'm thinking for customers, that's just an extra step they may not realize. so when they're iterating on their CI workflow, if smth fails, immediately they'll get logs explaining why
-Jun Yu  [7:15 PM]
-not a huge fan of your suggested pattern :thinking_face: bc of the obscure magic
-
-i'm thinking smth like this: `--service-logs [enable | enable-with-retry]` or something like that
-
-- `enable` = enable every time
-- `enable-with-retry` = retry with service logs
-  - honestly still could be better (we're coupling retry logic with service logs)
-
-sohil  [7:18 PM]
-hmm but that doesn't solve problem for customers who are iterating on workflow
-i feel like it could get frustrating to iterate when things are failing, and then having to wait for iteration
-another solution could just be to enable service logs by default in CI
-Jun Yu  [7:19 PM]
-hmm service logs by default in CI feels cleaner, can just add to the help text
-sohil  [7:20 PM]
-or our github action adds it by default, and we can update docs/prompts for setup agent
-rn we don't output service logs anywhere though right, it just persists to file system? we could also add logic in CLI where if it's run in `--print` mode, and service fails to start, it outputs service logs
-Jun Yu  [7:22 PM]
-do you think that could be verbose? with run output and service logs being interleaved
-sohil  [7:22 PM]
-it would only show if service failed to start, so there's no run output really?
-Jun Yu  [7:23 PM]
-oh yea makes sense
-sohil  [7:23 PM]
-maybe this is only in CI mode, since user doesn't have access to file system
-
-# Final solution
-
-Service logs (stdout/stderr from the user's service) are always captured during startup. This gives users immediate visibility when things go wrong, without needing to know about any flags.
-
-## Behavior
-
-### During service startup (always)
-- Service stdout/stderr is always captured during startup
-- If `--enable-service-logs` is set: stream directly to `.tusk/logs/` file during startup (file acts as the buffer)
-- If `--enable-service-logs` is NOT set: buffer in memory during startup
-
-### On startup failure
-- **TUI**: dump captured service logs into the right side panel
-- **Print mode**: dump last N lines to **stderr** (not stdout, to avoid breaking `--output-format json` consumers)
-- Update the startup failure help message (`executor.go:503-521`) — currently it suggests using `--enable-service-logs`, which is no longer relevant. New suggestions:
-  - "Check the service logs above for details"
-  - "For more verbose SDK logging, set the log level to debug in your SDK initialization"
-  - Note: `--debug` on the CLI only controls fence sandbox debug mode, not useful for customers debugging service startup
-
-### Sandbox retry and service logs
-- Default sandbox mode will change to `strict` (in a subsequent commit)
-- In `auto` mode (`environment.go:24-37`): if sandboxed startup fails, it retries without sandbox. Both attempts' logs should be kept — append a separator (e.g. `"⚠️ Retrying without sandbox..."`) and continue capturing. If the retry also fails, the user sees logs from both attempts. If the retry succeeds and `--enable-service-logs` isn't set, discard everything
-- For the in-memory buffer: just keep appending across retries
-- For the file buffer (`--enable-service-logs`): reuse the same file handle across retries rather than creating a new file
-
-### On startup success
-- If `--enable-service-logs` is set: continue streaming to `.tusk/logs/` file for the entire run (same as today's behavior)
-- If `--enable-service-logs` is NOT set: swap `e.serviceCmd.Stdout/Stderr` to `io.Discard` and nil out the buffer. This stops memory growth for the rest of the run while the process keeps writing harmlessly to nowhere
-
-### In-memory buffer implementation
-- Use a `bytes.Buffer` as the `io.Writer` for `e.serviceCmd.Stdout/Stderr` — it satisfies the same interface as `*os.File`
-- Add `startupLogBuffer *bytes.Buffer` field to `Executor`
-- In `service.go:150-158`, the wiring becomes:
-  - `--enable-service-logs` on → stream to file (existing path)
-  - `--enable-service-logs` off → `e.startupLogBuffer = &bytes.Buffer{}`, assign to stdout/stderr
-- On failure: read `startupLogBuffer.String()` (or read back the file if using file path) to dump to TUI/stderr
-- On success without flag: swap to `io.Discard`, nil the buffer
-
-### Flag changes
-- `--enable-service-logs` keeps its current name, no deprecation needed
-- No `--disable-service-logs` flag needed
-- `--debug` stays separate — controls fence sandbox debug mode
-
-### Early exit on process failure
-- Currently if the service process exits immediately (e.g. bad config, missing dependency), `waitForReadiness` keeps polling until the full timeout — wasted time
-- Improvement: after `e.serviceCmd.Start()`, run a goroutine that waits on `e.serviceCmd.Wait()`. If the process exits with a non-zero code before readiness is reached, bail immediately and dump service logs
-- Pairs well with the service logs change — user gets immediate feedback (fast failure + logs) instead of waiting 10+ seconds for timeout
-
-## Other improvements
-
-### Print mode timestamp logging
-Add duration logging to print mode phases that currently have none:
-- Test loading (`"➤ Found/Loaded %d tests"`)
-- Pre-app-start span fetching (`"✓ Loaded %d pre-app-start spans"`)
-- Suite span preparation (`"Loaded %d suite spans"`)
-- Environment start (`"➤ Starting environment..."` → `"✓ Environment ready"`)
-- Overall test execution and summary
-
-Per-test results already log duration (e.g. `"NO DEVIATION - traceId (150ms)"`).
-
-### SDK debug log level via CLI
-When `--debug` is used with the CLI, SDKs should automatically run with log level "debug" (may need a separate env var or command to propagate this)
-
-### Setup agent
-- Update `internal/agent/prompts/phase_simple_test.md` — remove the "Logs only appear if `debug: true` is set" note since logs are always captured during startup
-- Setup agent can always check service logs when iterating on start commands
-- Setup agent should know that if sandbox fails to start, it can set `sandbox_mode: "off"` to test if the service works without sandbox (currently referenced in prompts but should be more explicit given the move to strict default)
-
-### Disk accumulation
-- With the new design, `.tusk/logs/` only accumulates when `--enable-service-logs` is explicitly set, so this is less of a concern
-- Could still add a retention policy (e.g. older than 7 days) as a nice-to-have
-
-## Out of scope (future)
-- Deviation analysis with interlaced test markers in service logs
-- Concurrent run log file collision (timestamp has second-level granularity, low risk)
-
-## Considerations
-
-Does enabling service logs cause any meaningful performance degradation?
-
-Enabling service logs could potentially help us with deviation analysis in the future, how could we use this?
-Potentially in the service logs file, we interlace Tusk Drift logs that say "X trace test started" and "X trace test completed" within the service logs? That way, we can programtically extract the service logs for a specific trace test and use it in deviation analysis?

From 174d51a62293dd6ccdb47e97859dddc70513b647 Mon Sep 17 00:00:00 2001
From: Sohil Kshirsagar <sohil@usetusk.ai>
Date: Fri, 20 Mar 2026 16:44:26 -0400
Subject: [PATCH 4/9] udpate tui testing doc

---
 docs/tui-testing.md | 216 +++++---------------------------------------
 1 file changed, 25 insertions(+), 191 deletions(-)

diff --git a/docs/tui-testing.md b/docs/tui-testing.md
index 0a0ad43..29f36fc 100644
--- a/docs/tui-testing.md
+++ b/docs/tui-testing.md
@@ -23,7 +23,7 @@ To test startup failures, temporarily change the start command in `.tusk/config.
 
 ```yaml
 start:
-  command: 'node -e "console.log(''boot log line''); console.error(''some error''); process.exit(1)"'
+    command: node -e "console.log('boot log line'); console.error('some error'); process.exit(1)"
 ```
 
 To test with a service that starts but behaves differently, adjust the command or example codebase as needed.
@@ -43,30 +43,34 @@ Opens a real Terminal.app window with tmux inside it, then uses macOS `screencap
 ```bash
 # 1. Open Terminal.app with a tmux session
 osascript -e 'tell application "Terminal"
-    do script "tmux new-session -s tui-test -x 160 -y 45"
+    do script "tmux new-session -s tui-test -x 200 -y 55"
 end tell'
 sleep 3
 
-# 2. Hide tmux status bar (otherwise a green bar appears at the bottom)
+# 2. Resize the window to fill most of the screen (fits 14"/16" MacBook Pro)
+osascript -e 'tell application "Terminal" to set bounds of front window to {0, 25, 1700, 1100}'
+sleep 1
+
+# 3. Hide tmux status bar (otherwise a green bar appears at the bottom)
 tmux set -t tui-test status off
 
-# 3. Launch the TUI
+# 4. Launch the TUI
 tmux send-keys -t tui-test 'cd /path/to/test-project && /path/to/tusk drift run' Enter
 
-# 4. Wait for the state you want to capture
-#    - Normal run with tests: ~15-20s (environment start + test execution)
-#    - Startup failure with sandbox retry: ~10-15s
+# 5. Wait for the state you want to capture
+#    - Normal run with tests: ~25-30s (environment start + test execution)
+#    - Startup failure with sandbox retry: ~15-18s
 #    - Just initial render: ~3-5s
-sleep 15
+sleep 25
 
-# 5. Navigate if needed
+# 6. Navigate if needed
 tmux send-keys -t tui-test g        # go to top (select Service Logs)
 tmux send-keys -t tui-test j        # move selection down
 tmux send-keys -t tui-test J        # scroll log panel down
 tmux send-keys -t tui-test D        # half-page down in log panel
 sleep 1
 
-# 6. Find the Terminal.app window ID
+# 7. Find the Terminal.app window ID
 WINDOW_ID=$(python3 -c "
 import Quartz
 windows = Quartz.CGWindowListCopyWindowInfo(Quartz.kCGWindowListOptionOnScreenOnly, Quartz.kCGNullWindowID)
@@ -76,10 +80,10 @@ for w in windows:
         break
 ")
 
-# 7. Capture the window
+# 8. Capture the window
 screencapture -l "$WINDOW_ID" -o screenshot.png
 
-# 8. Cleanup
+# 9. Cleanup
 tmux send-keys -t tui-test q
 sleep 2
 tmux kill-session -t tui-test
@@ -89,10 +93,10 @@ osascript -e 'tell application "Terminal" to close front window' 2>/dev/null
 **Output:** ~2800x1800 Retina PNG with native font rendering.
 
 **Notes:**
-
 - `screencapture -l` captures by window ID — the Terminal window doesn't need to be in the foreground. You can keep working in other windows.
 - The `-o` flag removes the window shadow.
 - `screencapture -l` fails silently without Screen Recording permission — you get a blank or tiny image.
+- When finding the window ID, make sure to match `kCGWindowOwnerName == 'Terminal'` — other apps (Chrome, etc.) may be in front.
 
 ### Option B: Text capture (quick functional checks)
 
@@ -100,11 +104,11 @@ Uses a detached tmux session — no visible window, no permissions needed. Good
 
 ```bash
 # 1. Detached tmux session (no visible window)
-tmux new-session -d -s tui-test -x 160 -y 45
+tmux new-session -d -s tui-test -x 200 -y 55
 
 # 2. Launch the TUI
 tmux send-keys -t tui-test 'cd /path/to/test-project && /path/to/tusk drift run' Enter
-sleep 15
+sleep 25
 
 # 3. Capture the screen as plain text
 SCREEN=$(tmux capture-pane -t tui-test -p)
@@ -138,10 +142,11 @@ tmux kill-session -t tui-test
 
 ## Recommended Dimensions
 
-| Setting      | Value | Notes                           |
-| ------------ | ----- | ------------------------------- |
-| tmux columns | 160   | Wide enough for both TUI panels |
-| tmux rows    | 45    | Tall enough to see tests + logs |
+| Setting      | Value | Notes                                      |
+| ------------ | ----- | ------------------------------------------ |
+| tmux columns | 200   | Wide enough for both TUI panels + detail   |
+| tmux rows    | 55    | Tall enough to see tests + logs            |
+| Window bounds | {0, 25, 1700, 1100} | Fits 14"/16" MacBook Pro (adjust for your display) |
 
 ## Common Gotchas
 
@@ -152,175 +157,4 @@ tmux kill-session -t tui-test
 5. **YAML quoting.** When editing config.yaml with commands containing colons or quotes, wrap the entire value in double quotes and escape inner quotes.
 6. **Restore config.** Always restore `.tusk/config.yaml` after testing with modified start commands.
 7. **tmux targets by session name** (`-t tui-test`), so commands work regardless of which terminal you're focused on. You can keep working while tests run.
-
-## Fallback: Rendering ANSI captures to images
-
-If you have an ANSI text capture and need an image (e.g., for a bug report when screencapture isn't available):
-
-```bash
-# Capture with ANSI codes
-tmux capture-pane -t tui-test -p -e > /tmp/tui-capture.txt
-```
-
-### Pillow
-
-```bash
-python3 render_ansi_png.py  # see script below
-```
-
-<details>
-<summary>render_ansi_png.py</summary>
-
-```python
-#!/usr/bin/env python3
-"""Render tmux ANSI capture to a high-resolution PNG using Pillow."""
-
-import re
-from PIL import Image, ImageDraw, ImageFont
-
-INPUT_FILE = "/tmp/tui-capture.txt"
-OUTPUT_FILE = "tui-screenshot.png"
-FONT_SIZE = 22
-BG_COLOR = (30, 30, 30)
-FONT_PATH = "/System/Library/Fonts/Menlo.ttc"  # macOS; adjust for Linux
-
-
-def load_font(size):
-    try:
-        return ImageFont.truetype(FONT_PATH, size, index=0)
-    except Exception:
-        try:
-            return ImageFont.truetype("/System/Library/Fonts/Monaco.ttf", size)
-        except Exception:
-            return ImageFont.load_default()
-
-
-def build_256_palette():
-    palette = {}
-    base16 = [
-        (0, 0, 0), (205, 49, 49), (13, 188, 121), (229, 229, 16),
-        (36, 114, 200), (188, 63, 188), (17, 168, 205), (204, 204, 204),
-        (128, 128, 128), (241, 76, 76), (35, 209, 139), (245, 245, 67),
-        (59, 142, 234), (214, 112, 214), (41, 184, 219), (255, 255, 255),
-    ]
-    for i, c in enumerate(base16):
-        palette[i] = c
-    for i in range(216):
-        r, g, b = i // 36, (i % 36) // 6, i % 6
-        palette[16 + i] = (
-            0 if r == 0 else 55 + r * 40,
-            0 if g == 0 else 55 + g * 40,
-            0 if b == 0 else 55 + b * 40,
-        )
-    for i in range(24):
-        v = 8 + i * 10
-        palette[232 + i] = (v, v, v)
-    return palette
-
-
-PALETTE_256 = build_256_palette()
-ANSI_RE = re.compile(r'\x1b\[([0-9;]*)m')
-
-
-def parse_ansi_line(line):
-    segments = []
-    fg, bg, bold = (204, 204, 204), None, False
-    pos = 0
-
-    for match in ANSI_RE.finditer(line):
-        start, end = match.span()
-        for ch in line[pos:start]:
-            segments.append((ch, fg, bg, bold))
-        pos = end
-
-        params = [int(p) if p else 0 for p in (match.group(1) or "0").split(';')]
-        i = 0
-        while i < len(params):
-            p = params[i]
-            if p == 0:
-                fg, bg, bold = (204, 204, 204), None, False
-            elif p == 1:
-                bold = True
-            elif p == 22:
-                bold = False
-            elif 30 <= p <= 37:
-                fg = PALETTE_256[p - 30]
-            elif p == 38:
-                if i + 1 < len(params) and params[i + 1] == 5 and i + 2 < len(params):
-                    fg = PALETTE_256.get(params[i + 2], fg)
-                    i += 2
-                elif i + 1 < len(params) and params[i + 1] == 2 and i + 4 < len(params):
-                    fg = (params[i + 2], params[i + 3], params[i + 4])
-                    i += 4
-            elif p == 39:
-                fg = (204, 204, 204)
-            elif 40 <= p <= 47:
-                bg = PALETTE_256[p - 40]
-            elif p == 48:
-                if i + 1 < len(params) and params[i + 1] == 5 and i + 2 < len(params):
-                    bg = PALETTE_256.get(params[i + 2], bg)
-                    i += 2
-                elif i + 1 < len(params) and params[i + 1] == 2 and i + 4 < len(params):
-                    bg = (params[i + 2], params[i + 3], params[i + 4])
-                    i += 4
-            elif p == 49:
-                bg = None
-            elif 90 <= p <= 97:
-                fg = PALETTE_256[p - 90 + 8]
-            elif 100 <= p <= 107:
-                bg = PALETTE_256[p - 100 + 8]
-            i += 1
-
-    for ch in line[pos:]:
-        segments.append((ch, fg, bg, bold))
-    return segments
-
-
-def main():
-    with open(INPUT_FILE, 'r') as f:
-        lines = [line.rstrip('\n') for line in f.readlines()]
-
-    font = load_font(FONT_SIZE)
-    bbox = font.getbbox("M")
-    char_w = bbox[2] - bbox[0]
-    char_h = int(FONT_SIZE * 1.45)
-
-    padding = 16
-    max_cols = 160
-    img_w = padding * 2 + char_w * max_cols
-    img_h = padding * 2 + char_h * len(lines)
-
-    img = Image.new('RGB', (img_w, img_h), BG_COLOR)
-    draw = ImageDraw.Draw(img)
-
-    for row, line in enumerate(lines):
-        segments = parse_ansi_line(line)
-        y = padding + row * char_h
-        for col, (ch, fg, bg, is_bold) in enumerate(segments):
-            x = padding + col * char_w
-            if bg:
-                draw.rectangle([x, y, x + char_w, y + char_h], fill=bg)
-            if ch and ch != ' ':
-                draw.text((x, y), ch, fill=fg, font=font)
-
-    img.save(OUTPUT_FILE)
-    print(f"Saved: {OUTPUT_FILE} ({img_w}x{img_h})")
-
-
-if __name__ == "__main__":
-    main()
-```
-
-</details>
-
-### ansi2html + Chrome headless
-
-If `ansi2html` is installed (`pip3 install ansi2html`):
-
-```bash
-cat /tmp/tui-capture.txt | ansi2html > /tmp/tui.html
-"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \
-    --headless --screenshot=/tmp/screenshot.png \
-    --window-size=1600,900 --force-device-scale-factor=2 \
-    /tmp/tui.html
-```
+8. **Window ID targeting.** When using `screencapture -l`, make sure the Python script finds the Terminal window, not Chrome or another app that may be in front.

From 77bee0acccb90522314055b149ee010385b5ee02 Mon Sep 17 00:00:00 2001
From: Sohil Kshirsagar <sohil@usetusk.ai>
Date: Fri, 20 Mar 2026 16:52:41 -0400
Subject: [PATCH 5/9] fix: lint

---
 internal/runner/executor.go     | 2 +-
 internal/runner/service_test.go | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/internal/runner/executor.go b/internal/runner/executor.go
index f753ce2..9ace40c 100644
--- a/internal/runner/executor.go
+++ b/internal/runner/executor.go
@@ -60,7 +60,7 @@ type Executor struct {
 	server                  *Server
 	serviceLogFile          *os.File
 	startupLogBuffer        *syncBuffer // in-memory buffer when --enable-service-logs is off
-	processExitCh           chan error   // signals early process exit
+	processExitCh           chan error  // signals early process exit
 	enableServiceLogs       bool
 	servicePort             int
 	resultsDir              string
diff --git a/internal/runner/service_test.go b/internal/runner/service_test.go
index 93920b2..f3b8559 100644
--- a/internal/runner/service_test.go
+++ b/internal/runner/service_test.go
@@ -878,10 +878,11 @@ func TestSetupServiceLogging(t *testing.T) {
 
 			err := e.setupServiceLogging()
 
-			if tt.expectError {
+			switch {
+			case tt.expectError:
 				assert.Error(t, err)
 				assert.Nil(t, e.serviceLogFile)
-			} else if tt.enableServiceLogs {
+			case tt.enableServiceLogs:
 				assert.NoError(t, err)
 				assert.NotNil(t, e.serviceLogFile)
 
@@ -892,7 +893,7 @@ func TestSetupServiceLogging(t *testing.T) {
 
 				_ = e.serviceLogFile.Close()
 				_ = os.Remove(e.serviceLogFile.Name())
-			} else {
+			default:
 				// Logging disabled: no error, no file, but buffer is created
 				assert.NoError(t, err)
 				assert.Nil(t, e.serviceLogFile)

From 8efd44107309b1ca2f34429eb3fa8088a2bed5e0 Mon Sep 17 00:00:00 2001
From: Sohil Kshirsagar <sohil@usetusk.ai>
Date: Fri, 20 Mar 2026 16:57:44 -0400
Subject: [PATCH 6/9] fix: windows tests

---
 internal/runner/service_test_helpers_windows.go | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/internal/runner/service_test_helpers_windows.go b/internal/runner/service_test_helpers_windows.go
index f40dcc9..f9b8a2d 100644
--- a/internal/runner/service_test_helpers_windows.go
+++ b/internal/runner/service_test_helpers_windows.go
@@ -10,8 +10,9 @@ import (
 
 // createTestCommand creates a test command that can be gracefully killed
 func createTestCommand(ctx context.Context, duration string) *exec.Cmd {
-	// Windows: use timeout command (similar to sleep, but in seconds)
-	cmd := exec.CommandContext(ctx, "cmd.exe", "/c", "timeout /t "+duration+" /nobreak >nul")
+	// Windows: use ping for delay (timeout command fails in non-interactive CI)
+	// ping -n <duration+1> gives roughly <duration> seconds of delay
+	cmd := exec.CommandContext(ctx, "cmd.exe", "/c", "ping -n "+duration+" 127.0.0.1 >nul")
 	cmd.SysProcAttr = &syscall.SysProcAttr{
 		CreationFlags: syscall.CREATE_NEW_PROCESS_GROUP,
 	}
@@ -31,18 +32,18 @@ func createUnkillableTestCommand(ctx context.Context, duration string) *exec.Cmd
 
 // getLongRunningCommand returns a shell command that runs indefinitely
 func getLongRunningCommand() string {
-	// Windows: infinite loop with timeout
-	return "cmd.exe /c :loop & timeout /t 1 /nobreak >nul & goto loop"
+	// Windows: infinite loop with ping delay (timeout command fails in non-interactive CI)
+	return "cmd.exe /c :loop & ping -n 2 127.0.0.1 >nul & goto loop"
 }
 
 // getSimpleSleepCommand returns a simple sleep command for testing
 func getSimpleSleepCommand() string {
-	// Windows: 1 second timeout (timeout is in seconds, minimum is 1)
-	return "cmd.exe /c timeout /t 1 /nobreak >nul"
+	// Windows: use ping for ~1 second delay (timeout command fails in non-interactive CI)
+	return "cmd.exe /c ping -n 2 127.0.0.1 >nul"
 }
 
 // getMediumSleepCommand returns a medium duration sleep for integration tests
 func getMediumSleepCommand() string {
-	// Windows: 1 second is minimum, but this is acceptable for medium duration
-	return "cmd.exe /c timeout /t 2 /nobreak >nul"
+	// Windows: use ping for ~2 second delay (timeout command fails in non-interactive CI)
+	return "cmd.exe /c ping -n 3 127.0.0.1 >nul"
 }

From 5d1d704f97ba7424c4f24ea6ee721e208a77dc80 Mon Sep 17 00:00:00 2001
From: Sohil Kshirsagar <sohil@usetusk.ai>
Date: Fri, 20 Mar 2026 17:17:03 -0400
Subject: [PATCH 7/9] fix: discard buffer

---
 internal/runner/executor.go | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/internal/runner/executor.go b/internal/runner/executor.go
index b058958..1db9a7d 100644
--- a/internal/runner/executor.go
+++ b/internal/runner/executor.go
@@ -36,13 +36,17 @@ const (
 
 // syncBuffer is a thread-safe buffer for capturing service stdout/stderr concurrently.
 type syncBuffer struct {
-	mu  sync.Mutex
-	buf bytes.Buffer
+	mu      sync.Mutex
+	buf     bytes.Buffer
+	discard bool
 }
 
 func (s *syncBuffer) Write(p []byte) (int, error) {
 	s.mu.Lock()
 	defer s.mu.Unlock()
+	if s.discard {
+		return len(p), nil
+	}
 	return s.buf.Write(p)
 }
 
@@ -52,6 +56,14 @@ func (s *syncBuffer) String() string {
 	return s.buf.String()
 }
 
+// Discard makes future writes no-ops and frees the buffer memory.
+func (s *syncBuffer) Discard() {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.discard = true
+	s.buf.Reset()
+}
+
 type Executor struct {
 	serviceURL              string
 	parallel                int
@@ -558,16 +570,17 @@ func (e *Executor) GetStartupLogs() string {
 	return ""
 }
 
-// DiscardStartupBuffer detaches the in-memory startup log buffer from the service process
-// and discards it. This is called after the service starts successfully to avoid unbounded
-// memory growth during the test run. Has no effect when --enable-service-logs is set
-// (file-based logging persists for the full run).
+// DiscardStartupBuffer makes the in-memory startup log buffer discard future writes
+// and frees its memory. This is called after the service starts successfully to avoid
+// unbounded memory growth during the test run. Has no effect when --enable-service-logs
+// is set (file-based logging persists for the full run).
+//
+// Note: we can't swap cmd.Stdout after Start() because Go's exec package captures the
+// writer by reference in an internal goroutine at Start() time. Instead, we set a flag
+// on the buffer itself to make Write a no-op.
 func (e *Executor) DiscardStartupBuffer() {
 	if !e.enableServiceLogs && e.startupLogBuffer != nil {
-		if e.serviceCmd != nil {
-			e.serviceCmd.Stdout = io.Discard
-			e.serviceCmd.Stderr = io.Discard
-		}
+		e.startupLogBuffer.Discard()
 		e.startupLogBuffer = nil
 	}
 }

From 75baaa3ba16ca28337d2beef736b9f19e1681248 Mon Sep 17 00:00:00 2001
From: Sohil Kshirsagar <sohil@usetusk.ai>
Date: Sun, 22 Mar 2026 14:28:59 -0400
Subject: [PATCH 8/9] fix: minor fixes

---
 cmd/run.go                 | 4 ++--
 internal/runner/service.go | 7 +++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/cmd/run.go b/cmd/run.go
index a61d878..ff83980 100644
--- a/cmd/run.go
+++ b/cmd/run.go
@@ -748,7 +748,7 @@ func runTests(cmd *cobra.Command, args []string) error {
 			if startupLogs != "" {
 				log.Stderrln("\n📋 Service startup logs:")
 				for _, line := range strings.Split(strings.TrimRight(startupLogs, "\n"), "\n") {
-					log.Stderrln("  " + line)
+					log.Stderrln(line)
 				}
 				log.Stderrln("")
 			}
@@ -799,7 +799,7 @@ func runTests(cmd *cobra.Command, args []string) error {
 			if startupLogs != "" {
 				log.Stderrln("\n📋 Service startup logs:")
 				for _, line := range strings.Split(strings.TrimRight(startupLogs, "\n"), "\n") {
-					log.Stderrln("  " + line)
+					log.Stderrln(line)
 				}
 				log.Stderrln("")
 			}
diff --git a/internal/runner/service.go b/internal/runner/service.go
index f553d6d..915b407 100644
--- a/internal/runner/service.go
+++ b/internal/runner/service.go
@@ -171,10 +171,13 @@ func (e *Executor) StartService() error {
 		return fmt.Errorf("failed to start service: %w", err)
 	}
 
-	// Monitor process for early exit so waitForReadiness can fail fast
+	// Monitor process for early exit so waitForReadiness can fail fast.
+	// Capture the channel locally so that if a sandbox retry creates a new channel,
+	// this goroutine still sends to the original one (not the new one).
 	e.processExitCh = make(chan error, 1)
+	exitCh := e.processExitCh
 	go func() {
-		e.processExitCh <- e.serviceCmd.Wait()
+		exitCh <- e.serviceCmd.Wait()
 	}()
 
 	if err := e.waitForReadiness(cfg); err != nil {

From 4d3938e566093259ec8cd616189dfc90d3cc3835 Mon Sep 17 00:00:00 2001
From: Sohil Kshirsagar <sohil@usetusk.ai>
Date: Sun, 22 Mar 2026 15:05:50 -0400
Subject: [PATCH 9/9] fix: startup log capture correctness and defensive
 goroutine captures

---
 internal/runner/environment.go                | 18 ++++----
 internal/runner/executor.go                   |  9 ++--
 internal/runner/service.go                    | 45 ++++++++++++-------
 .../runner/service_test_helpers_windows.go    | 11 +++--
 internal/tui/test_executor.go                 |  7 +--
 5 files changed, 54 insertions(+), 36 deletions(-)

diff --git a/internal/runner/environment.go b/internal/runner/environment.go
index 9a025c5..79acf5c 100644
--- a/internal/runner/environment.go
+++ b/internal/runner/environment.go
@@ -24,18 +24,18 @@ func (e *Executor) StartEnvironment() error {
 	if err := e.StartService(); err != nil {
 		if e.GetEffectiveSandboxMode() == SandboxModeAuto && e.lastServiceSandboxed {
 			log.ServiceLog("⚠️  Service failed to start in sandbox; retrying once without sandbox...")
-
-			// Preserve log file/buffer across the retry so startup logs accumulate
-			savedLogFile := e.serviceLogFile
-			e.serviceLogFile = nil // prevent StopService → cleanupLogFiles from closing it
 			_ = e.StopService()
-			e.serviceLogFile = savedLogFile
 
-			// Write separator so the user can see where the retry begins
-			if e.enableServiceLogs && savedLogFile != nil {
-				_, _ = savedLogFile.WriteString("⚠️  Retrying without sandbox...\n")
+			// Write separator so the user can see where the retry begins.
+			// The in-memory buffer survives StopService; the file path persists
+			// via serviceLogPath and setupServiceLogging will reopen in append mode.
+			if e.enableServiceLogs && e.serviceLogPath != "" {
+				if f, err := os.OpenFile(e.serviceLogPath, os.O_APPEND|os.O_WRONLY, 0o600); err == nil { // #nosec G304
+					_, _ = f.WriteString("⚠️ Retrying without sandbox...\n")
+					_ = f.Close()
+				}
 			} else if e.startupLogBuffer != nil {
-				_, _ = e.startupLogBuffer.Write([]byte("⚠️  Retrying without sandbox...\n"))
+				_, _ = e.startupLogBuffer.Write([]byte("⚠️ Retrying without sandbox...\n"))
 			}
 
 			e.sandboxBypass = true
diff --git a/internal/runner/executor.go b/internal/runner/executor.go
index 1db9a7d..bb3629c 100644
--- a/internal/runner/executor.go
+++ b/internal/runner/executor.go
@@ -71,6 +71,7 @@ type Executor struct {
 	serviceCmd              *exec.Cmd
 	server                  *Server
 	serviceLogFile          *os.File
+	serviceLogPath          string      // persists across StopService so GetStartupLogs can read it back
 	startupLogBuffer        *syncBuffer // in-memory buffer when --enable-service-logs is off
 	processExitCh           chan error  // signals early process exit
 	enableServiceLogs       bool
@@ -557,8 +558,8 @@ func countPassedTests(results []TestResult) int {
 // When --enable-service-logs is set, it reads back from the log file.
 // Otherwise, it returns the contents of the in-memory startup buffer.
 func (e *Executor) GetStartupLogs() string {
-	if e.enableServiceLogs && e.serviceLogFile != nil {
-		data, err := os.ReadFile(e.serviceLogFile.Name())
+	if e.enableServiceLogs && e.serviceLogPath != "" {
+		data, err := os.ReadFile(e.serviceLogPath)
 		if err != nil {
 			return ""
 		}
@@ -587,8 +588,8 @@ func (e *Executor) DiscardStartupBuffer() {
 
 // GetStartupFailureHelpMessage returns a user-friendly help message when the service fails to start.
 func (e *Executor) GetStartupFailureHelpMessage() string {
-	if e.enableServiceLogs && e.serviceLogFile != nil {
-		return fmt.Sprintf("\n📄 Service logs are available at: %s\n", e.serviceLogFile.Name())
+	if e.enableServiceLogs && e.serviceLogPath != "" {
+		return fmt.Sprintf("\n📄 Service logs are available at: %s\n", e.serviceLogPath)
 	}
 	return ""
 }
diff --git a/internal/runner/service.go b/internal/runner/service.go
index 915b407..1d84e6a 100644
--- a/internal/runner/service.go
+++ b/internal/runner/service.go
@@ -176,8 +176,9 @@ func (e *Executor) StartService() error {
 	// this goroutine still sends to the original one (not the new one).
 	e.processExitCh = make(chan error, 1)
 	exitCh := e.processExitCh
+	cmd := e.serviceCmd
 	go func() {
-		exitCh <- e.serviceCmd.Wait()
+		exitCh <- cmd.Wait()
 	}()
 
 	if err := e.waitForReadiness(cfg); err != nil {
@@ -474,26 +475,36 @@ func (e *Executor) SetEnableServiceLogs(enable bool) {
 // Otherwise, an in-memory buffer captures startup output (shown on failure, discarded on success).
 func (e *Executor) setupServiceLogging() error {
 	if e.enableServiceLogs {
-		// Allow tests to override the logs directory
-		logsDir := utils.GetLogsDir()
-		if testLogsDir := os.Getenv("TUSK_TEST_LOGS_DIR"); testLogsDir != "" {
-			logsDir = testLogsDir
-		}
+		// Reuse existing log file across sandbox retries (same as the buffer path guard below).
+		// The file was closed by StopService → cleanupLogFiles, but we can reopen it for append.
+		if e.serviceLogPath != "" {
+			logFile, err := os.OpenFile(e.serviceLogPath, os.O_APPEND|os.O_WRONLY, 0o600) // #nosec G304
+			if err != nil {
+				return fmt.Errorf("failed to reopen service log file: %w", err)
+			}
+			e.serviceLogFile = logFile
+		} else {
+			// Allow tests to override the logs directory
+			logsDir := utils.GetLogsDir()
+			if testLogsDir := os.Getenv("TUSK_TEST_LOGS_DIR"); testLogsDir != "" {
+				logsDir = testLogsDir
+			}
 
-		if err := os.MkdirAll(logsDir, 0o750); err != nil {
-			return fmt.Errorf("failed to create logs directory: %w", err)
-		}
+			if err := os.MkdirAll(logsDir, 0o750); err != nil {
+				return fmt.Errorf("failed to create logs directory: %w", err)
+			}
 
-		timestamp := time.Now().Format("20060102-150405")
+			timestamp := time.Now().Format("20060102-150405")
+			logPath := filepath.Join(logsDir, fmt.Sprintf("tusk-replay-%s.log", timestamp))
+			logFile, err := os.Create(logPath) // #nosec G304
+			if err != nil {
+				return fmt.Errorf("failed to create service log file: %w", err)
+			}
 
-		logPath := filepath.Join(logsDir, fmt.Sprintf("tusk-replay-%s.log", timestamp))
-		logFile, err := os.Create(logPath) // #nosec G304
-		if err != nil {
-			return fmt.Errorf("failed to create service log file: %w", err)
+			e.serviceLogFile = logFile
+			e.serviceLogPath = logPath
+			log.ServiceLog(fmt.Sprintf("Service logs will be written to: %s", logPath))
 		}
-
-		e.serviceLogFile = logFile
-		log.ServiceLog(fmt.Sprintf("Service logs will be written to: %s", logPath))
 	} else if e.startupLogBuffer == nil {
 		// Only create a new buffer if one doesn't already exist.
 		// During sandbox retry, the existing buffer preserves the first attempt's logs.
diff --git a/internal/runner/service_test_helpers_windows.go b/internal/runner/service_test_helpers_windows.go
index f9b8a2d..5a70c11 100644
--- a/internal/runner/service_test_helpers_windows.go
+++ b/internal/runner/service_test_helpers_windows.go
@@ -4,15 +4,20 @@ package runner
 
 import (
 	"context"
+	"fmt"
 	"os/exec"
+	"strconv"
 	"syscall"
 )
 
 // createTestCommand creates a test command that can be gracefully killed
 func createTestCommand(ctx context.Context, duration string) *exec.Cmd {
-	// Windows: use ping for delay (timeout command fails in non-interactive CI)
-	// ping -n <duration+1> gives roughly <duration> seconds of delay
-	cmd := exec.CommandContext(ctx, "cmd.exe", "/c", "ping -n "+duration+" 127.0.0.1 >nul")
+	// Windows: use ping for delay (timeout command fails in non-interactive CI).
+	// ping -n N sends N pings with ~1s between them, so total delay ≈ N-1 seconds.
+	// Add 1 to match the requested duration.
+	n, _ := strconv.Atoi(duration)
+	pingCount := fmt.Sprintf("%d", n+1)
+	cmd := exec.CommandContext(ctx, "cmd.exe", "/c", "ping -n "+pingCount+" 127.0.0.1 >nul")
 	cmd.SysProcAttr = &syscall.SysProcAttr{
 		CreationFlags: syscall.CREATE_NEW_PROCESS_GROUP,
 	}
diff --git a/internal/tui/test_executor.go b/internal/tui/test_executor.go
index 851dc41..8b9e1eb 100644
--- a/internal/tui/test_executor.go
+++ b/internal/tui/test_executor.go
@@ -1030,9 +1030,10 @@ func (m *testExecutorModel) startNextEnvironmentGroup() tea.Cmd {
 
 			m.addServiceLog(fmt.Sprintf("❌ Failed to start environment for %s: %v", group.Name, err))
 
-			helpMsg := m.executor.GetStartupFailureHelpMessage()
-			for _, line := range strings.Split(strings.TrimSpace(helpMsg), "\n") {
-				m.addServiceLog(line)
+			if helpMsg := m.executor.GetStartupFailureHelpMessage(); helpMsg != "" {
+				for _, line := range strings.Split(strings.TrimSpace(helpMsg), "\n") {
+					m.addServiceLog(line)
+				}
 			}
 
 			return executionFailedMsg{reason: fmt.Sprintf("Failed to start environment: %v", err)}