diff --git a/cmd/run.go b/cmd/run.go
index 5ceb0e1..ff83980 100644
--- a/cmd/run.go
+++ b/cmd/run.go
@@ -391,6 +391,9 @@ func runTests(cmd *cobra.Command, args []string) error {
 	var tests []runner.Test
 	var err error
 
+	// Track overall timing for print mode (includes test loading)
+	overallStart := time.Now()
+
 	// Step 3: Load tests - in cloud mode, fetch from backend; otherwise use local files
 	deferLoadTests := interactive
 	if deferLoadTests {
@@ -457,7 +460,7 @@ func runTests(cmd *cobra.Command, args []string) error {
 		if isValidation {
 			log.Stderrln(fmt.Sprintf("\n➤ Found %d traces to validate", len(tests)))
 		} else if !cloud {
-			log.Stderrln(fmt.Sprintf("\n➤ Loaded %d tests from local traces", len(tests)))
+			log.Stderrln(fmt.Sprintf("\n➤ Loaded %d tests from local traces (%.1fs)", len(tests), time.Since(overallStart).Seconds()))
 		}
 	}
 
@@ -732,6 +735,7 @@ func runTests(cmd *cobra.Command, args []string) error {
 	}
 
 	// Step 4: Run tests by environment
+	testPhaseStart := time.Now()
 	var results []runner.TestResult
 	if groupResult != nil && len(groupResult.Groups) > 0 {
 		// Use environment-based replay
@@ -739,6 +743,17 @@ func runTests(cmd *cobra.Command, args []string) error {
 		if err != nil {
 			cmd.SilenceUsage = true
 
+			// Dump startup logs so user can diagnose startup failures
+			startupLogs := executor.GetStartupLogs()
+			if startupLogs != "" {
+				log.Stderrln("\n📋 Service startup logs:")
+				for _, line := range strings.Split(strings.TrimRight(startupLogs, "\n"), "\n") {
+					log.Stderrln(line)
+				}
+				log.Stderrln("")
+			}
+			log.Stderr(executor.GetStartupFailureHelpMessage())
+
 			// Update CI status to FAILURE if in cloud mode
 			if cloud && client != nil && (ci || isValidation) {
 				statusReq := &backend.UpdateDriftRunCIStatusRequest{
@@ -780,6 +795,14 @@ func runTests(cmd *cobra.Command, args []string) error {
 				}
 			}
 
+			startupLogs := executor.GetStartupLogs()
+			if startupLogs != "" {
+				log.Stderrln("\n📋 Service startup logs:")
+				for _, line := range strings.Split(strings.TrimRight(startupLogs, "\n"), "\n") {
+					log.Stderrln(line)
+				}
+				log.Stderrln("")
+			}
 			log.Stderr(executor.GetStartupFailureHelpMessage())
 			return fmt.Errorf("failed to start environment: %w", err)
 		}
@@ -790,7 +813,7 @@ func runTests(cmd *cobra.Command, args []string) error {
 		}()
 
 		if !interactive && !quiet {
-			log.Stderrln("  ✓ Environment ready")
+			log.Stderrln(fmt.Sprintf("  ✓ Environment ready (%.1fs)", time.Since(testPhaseStart).Seconds()))
 			log.Stderrln(fmt.Sprintf("➤ Running %d tests (concurrency: %d)...\n", len(tests), executor.GetConcurrency()))
 		}
 
@@ -830,6 +853,10 @@ func runTests(cmd *cobra.Command, args []string) error {
 		outputErr = runner.OutputResultsSummary(results, outputFormat, quiet)
 	}
 
+	if !interactive && !quiet {
+		log.Stderrln(fmt.Sprintf("Total elapsed: %.1fs", time.Since(overallStart).Seconds()))
+	}
+
 	// Step 5: Upload results to backend if in cloud mode
 	// Do this before returning any error so CI status is always updated
 	if cloud && client != nil && (ci || isValidation) {
diff --git a/docs/tui-testing.md b/docs/tui-testing.md
new file mode 100644
index 0000000..29f36fc
--- /dev/null
+++ b/docs/tui-testing.md
@@ -0,0 +1,160 @@
+# CLI & TUI Testing Guide
+
+How to manually test the tusk CLI in both print mode and interactive TUI mode.
+
+## Print Mode Testing
+
+Print mode (`--print`) runs headlessly — no interactive UI. Run it directly and inspect stderr:
+
+```bash
+cd /path/to/test-project
+/path/to/tusk drift run --print 2>&1
+```
+
+Filter for specific output:
+
+```bash
+/path/to/tusk drift run --print 2>&1 | grep -E "(➤|✓|Tests:|Error:)"
+```
+
+### Testing failure scenarios
+
+To test startup failures, temporarily change the start command in `.tusk/config.yaml`:
+
+```yaml
+start:
+    command: node -e "console.log('boot log line'); console.error('some error'); process.exit(1)"
+```
+
+To test with a service that starts but behaves differently, adjust the command or example codebase as needed.
+
+**Always restore the config after testing.**
+
+## TUI Testing with tmux
+
+The TUI (interactive mode, no `--print`) requires a terminal. We use tmux for programmatic control — it lets us send keystrokes and capture output without needing to be in the terminal ourselves.
+
+### Option A: Native screenshots (recommended)
+
+Opens a real Terminal.app window with tmux inside it, then uses macOS `screencapture -l` to capture that specific window by ID. This produces pixel-perfect Retina screenshots and should always be used to verify TUI visual changes.
+
+**One-time setup:** Grant Screen Recording permission to Terminal.app in System Settings > Privacy & Security > Screen Recording.
+
+```bash
+# 1. Open Terminal.app with a tmux session
+osascript -e 'tell application "Terminal"
+    do script "tmux new-session -s tui-test -x 200 -y 55"
+end tell'
+sleep 3
+
+# 2. Resize the window to fill most of the screen (fits 14"/16" MacBook Pro)
+osascript -e 'tell application "Terminal" to set bounds of front window to {0, 25, 1700, 1100}'
+sleep 1
+
+# 3. Hide tmux status bar (otherwise a green bar appears at the bottom)
+tmux set -t tui-test status off
+
+# 4. Launch the TUI
+tmux send-keys -t tui-test 'cd /path/to/test-project && /path/to/tusk drift run' Enter
+
+# 5. Wait for the state you want to capture
+#    - Normal run with tests: ~25-30s (environment start + test execution)
+#    - Startup failure with sandbox retry: ~15-18s
+#    - Just initial render: ~3-5s
+sleep 25
+
+# 6. Navigate if needed
+tmux send-keys -t tui-test g        # go to top (select Service Logs)
+tmux send-keys -t tui-test j        # move selection down
+tmux send-keys -t tui-test J        # scroll log panel down
+tmux send-keys -t tui-test D        # half-page down in log panel
+sleep 1
+
+# 7. Find the Terminal.app window ID
+WINDOW_ID=$(python3 -c "
+import Quartz
+windows = Quartz.CGWindowListCopyWindowInfo(Quartz.kCGWindowListOptionOnScreenOnly, Quartz.kCGNullWindowID)
+for w in windows:
+    if w.get('kCGWindowOwnerName') == 'Terminal' and w.get('kCGWindowLayer', 0) == 0:
+        print(w['kCGWindowNumber'])
+        break
+")
+
+# 8. Capture the window
+screencapture -l "$WINDOW_ID" -o screenshot.png
+
+# 9. Cleanup
+tmux send-keys -t tui-test q
+sleep 2
+tmux kill-session -t tui-test
+osascript -e 'tell application "Terminal" to close front window' 2>/dev/null
+```
+
+**Output:** ~2800x1800 Retina PNG with native font rendering.
+
+**Notes:**
+- `screencapture -l` captures by window ID — the Terminal window doesn't need to be in the foreground. You can keep working in other windows.
+- The `-o` flag removes the window shadow.
+- `screencapture -l` fails silently without Screen Recording permission — you get a blank or tiny image.
+- When finding the window ID, make sure to match `kCGWindowOwnerName == 'Terminal'` — other apps (Chrome, etc.) may be in front.
+
+### Option B: Text capture (quick functional checks)
+
+Uses a detached tmux session — no visible window, no permissions needed. Good for verifying that specific text appears in the TUI or that navigation works. **Not a substitute for screenshots** when verifying layout, colors, or visual rendering.
+
+```bash
+# 1. Detached tmux session (no visible window)
+tmux new-session -d -s tui-test -x 200 -y 55
+
+# 2. Launch the TUI
+tmux send-keys -t tui-test 'cd /path/to/test-project && /path/to/tusk drift run' Enter
+sleep 25
+
+# 3. Capture the screen as plain text
+SCREEN=$(tmux capture-pane -t tui-test -p)
+
+# 4. Assert on content
+echo "$SCREEN" | grep -q "TEST EXECUTION" || echo "FAIL: header not found"
+echo "$SCREEN" | grep -q "Environment ready" || echo "FAIL: environment didn't start"
+
+# 5. Navigate and capture again
+tmux send-keys -t tui-test j
+sleep 0.5
+SCREEN=$(tmux capture-pane -t tui-test -p)
+
+# 6. Cleanup
+tmux send-keys -t tui-test q
+sleep 1
+tmux kill-session -t tui-test
+```
+
+### TUI keyboard shortcuts reference
+
+| Key       | Action                                  |
+| --------- | --------------------------------------- |
+| `j` / `k` | Select next/previous test in left panel |
+| `g` / `G` | Jump to top/bottom of test list         |
+| `u` / `d` | Half-page up/down in test list          |
+| `J` / `K` | Scroll log panel down/up                |
+| `U` / `D` | Half-page up/down in log panel          |
+| `y`       | Copy all logs                           |
+| `q`       | Quit                                    |
+
+## Recommended Dimensions
+
+| Setting      | Value | Notes                                      |
+| ------------ | ----- | ------------------------------------------ |
+| tmux columns | 200   | Wide enough for both TUI panels + detail   |
+| tmux rows    | 55    | Tall enough to see tests + logs            |
+| Window bounds | {0, 25, 1700, 1100} | Fits 14"/16" MacBook Pro (adjust for your display) |
+
+## Common Gotchas
+
+1. **Timing is critical.** The TUI renders asynchronously. Capturing too early gives an incomplete screen. When in doubt, wait longer.
+2. **tmux status bar.** The green bar at the bottom of native screenshots is tmux's status line. Hide it with `tmux set -t tui-test status off` before capturing.
+3. **Scrolling.** Content often extends below the visible area of the log panel. Send `J` or `D` keys to scroll down before capturing.
+4. **Screen Recording permission.** Native `screencapture -l` fails silently without it. Grant it to Terminal.app in System Settings > Privacy & Security > Screen Recording.
+5. **YAML quoting.** When editing config.yaml with commands containing colons or quotes, wrap the entire value in double quotes and escape inner quotes.
+6. **Restore config.** Always restore `.tusk/config.yaml` after testing with modified start commands.
+7. **tmux targets by session name** (`-t tui-test`), so commands work regardless of which terminal you're focused on. You can keep working while tests run.
+8. **Window ID targeting.** When using `screencapture -l`, make sure the Python script finds the Terminal window, not Chrome or another app that may be in front.
diff --git a/internal/agent/prompts/phase_simple_test.md b/internal/agent/prompts/phase_simple_test.md
index 4bd8a4b..f94af1e 100644
--- a/internal/agent/prompts/phase_simple_test.md
+++ b/internal/agent/prompts/phase_simple_test.md
@@ -44,9 +44,10 @@ TuskDrift.initialize({
 Run tusk_run to replay the trace.
 If it fails:
 
-- Run with `debug: true` (keep running it in debug mode until it passes)
-- If startup fails in sandbox (for example secret manager bootstrapping), retry with `sandbox_mode: "off"`
-- Check for errors in the output or in the logs (in .tusk/logs/). Logs only appear if `debug: true` is set.
+- Check the service startup logs for errors (always shown on startup failure)
+- If the service starts but a test replay fails, run with `debug: true` to see runtime logs (keep running it in debug mode until it passes)
+- If you need more detail from the SDK itself, set `logLevel: "debug"` in the SDK initialization to see SDK-level diagnostics
+- If startup fails in sandbox (for example secret manager bootstrapping), retry with `sandbox_mode: "off"` and if that works, add a comment in config.yaml explaining why sandbox was disabled (e.g., `# sandbox disabled: service requires external secret manager during startup`)
 - If you see config-related errors (e.g., "no start command"), run `tusk_validate_config` to check for config issues
 - Try to fix issues and retry (max 3 attempts)
 - If still failing, ask the user for help
diff --git a/internal/agent/prompts/phase_verify_complex_test.md b/internal/agent/prompts/phase_verify_complex_test.md
index 408e52f..4fb77f0 100644
--- a/internal/agent/prompts/phase_verify_complex_test.md
+++ b/internal/agent/prompts/phase_verify_complex_test.md
@@ -29,7 +29,7 @@ Follow the same process as the simple test:
 4. Wait 3 seconds for trace to be written
 5. Stop the service
 6. Run tusk_list to verify trace was recorded
-7. Run tusk_run to replay (try with `debug: true` on failure; if startup fails in sandbox, retry with `sandbox_mode: "off"`)
+7. Run tusk_run to replay (startup logs are always shown on startup failure; if a test replay fails, retry with `debug: true` for runtime logs; if startup fails in sandbox, retry with `sandbox_mode: "off"` and if that works, add a comment in config.yaml explaining why)
 
 ### Step 3: Save to Cache
 
diff --git a/internal/agent/prompts/phase_verify_simple_test.md b/internal/agent/prompts/phase_verify_simple_test.md
index 7fec9b3..c9732d4 100644
--- a/internal/agent/prompts/phase_verify_simple_test.md
+++ b/internal/agent/prompts/phase_verify_simple_test.md
@@ -32,8 +32,10 @@ If no traces appear, this is a verification failure.
 Run tusk_run to replay the trace.
 If it fails:
 
-- Run with `debug: true` once more
-- If startup fails in sandbox, retry with `sandbox_mode: "off"`
+- Check the service startup logs for errors (always shown on startup failure)
+- If the service starts but a test replay fails, retry with `debug: true` to see runtime logs
+- If you need more detail from the SDK, set `logLevel: "debug"` in the SDK initialization
+- If startup fails in sandbox, retry with `sandbox_mode: "off"` and if that works, add a comment in config.yaml explaining why sandbox was disabled
 - If still failing, mark as failed
 
 ### Step 5: Save to Cache
diff --git a/internal/log/log.go b/internal/log/log.go
index 6dca3cb..da4bec7 100644
--- a/internal/log/log.go
+++ b/internal/log/log.go
@@ -2,6 +2,7 @@
 package log
 
 import (
+	"context"
 	"io"
 	"log/slog"
 	"os"
@@ -90,15 +91,24 @@ func (l *Logger) process() {
 
 func (l *Logger) handleLogMessage(msg logMessage) {
 	tuiPtr := l.tuiLogger.Load()
-	if tuiPtr == nil {
+	if tuiPtr != nil {
+		tui := *tuiPtr
+		switch msg.msgType {
+		case logTypeService:
+			tui.LogToService(msg.message)
+		case logTypeTest:
+			tui.LogToCurrentTest(msg.testID, msg.message)
+		}
 		return
 	}
-	tui := *tuiPtr
+
+	// No TUI active (print mode). Route through slog so messages respect
+	// the configured log level and use the standard slog format.
 	switch msg.msgType {
 	case logTypeService:
-		tui.LogToService(msg.message)
+		slog.Debug(msg.message)
 	case logTypeTest:
-		tui.LogToCurrentTest(msg.testID, msg.message)
+		slog.Debug(msg.message, "testID", msg.testID)
 	}
 }
 
@@ -261,6 +271,15 @@ func TestLog(testID, msg string) {
 	}
 }
 
+// TestDebug logs a debug-level message to a specific test's log panel.
+// Only emits when debug logging is enabled (--debug flag).
+func TestDebug(testID, msg string) {
+	if !slog.Default().Enabled(context.Background(), slog.LevelDebug) {
+		return
+	}
+	TestLog(testID, msg)
+}
+
 // TestOrServiceLog tries to log to test, falls back to service if testID is empty
 func TestOrServiceLog(testID, msg string) {
 	if testID != "" {
diff --git a/internal/runner/environment.go b/internal/runner/environment.go
index d64049f..79acf5c 100644
--- a/internal/runner/environment.go
+++ b/internal/runner/environment.go
@@ -25,6 +25,19 @@ func (e *Executor) StartEnvironment() error {
 		if e.GetEffectiveSandboxMode() == SandboxModeAuto && e.lastServiceSandboxed {
 			log.ServiceLog("⚠️  Service failed to start in sandbox; retrying once without sandbox...")
 			_ = e.StopService()
+
+			// Write separator so the user can see where the retry begins.
+			// The in-memory buffer survives StopService; the file path persists
+			// via serviceLogPath and setupServiceLogging will reopen in append mode.
+			if e.enableServiceLogs && e.serviceLogPath != "" {
+				if f, err := os.OpenFile(e.serviceLogPath, os.O_APPEND|os.O_WRONLY, 0o600); err == nil { // #nosec G304
+					_, _ = f.WriteString("⚠️ Retrying without sandbox...\n")
+					_ = f.Close()
+				}
+			} else if e.startupLogBuffer != nil {
+				_, _ = e.startupLogBuffer.Write([]byte("⚠️ Retrying without sandbox...\n"))
+			}
+
 			e.sandboxBypass = true
 			e.lastServiceSandboxed = false
 
@@ -52,6 +65,11 @@ waitForSDK:
 	log.ServiceLog("✅ SDK acknowledged")
 
 	log.Debug("Environment is ready")
+
+	// Discard the in-memory startup buffer now that startup succeeded.
+	// File-based logging (--enable-service-logs) persists for the full run.
+	e.DiscardStartupBuffer()
+
 	return nil
 }
 
diff --git a/internal/runner/environment_replay.go b/internal/runner/environment_replay.go
index fd4c71a..06c5153 100644
--- a/internal/runner/environment_replay.go
+++ b/internal/runner/environment_replay.go
@@ -4,6 +4,8 @@ import (
 	"context"
 	"fmt"
 	"os"
+	"strings"
+	"time"
 
 	"github.com/Use-Tusk/tusk-drift-cli/internal/config"
 	"github.com/Use-Tusk/tusk-drift-cli/internal/log"
@@ -24,6 +26,8 @@ func ReplayTestsByEnvironment(
 	allResults := make([]TestResult, 0)
 
 	for i, group := range groups {
+		envStart := time.Now()
+
 		log.Debug("Starting replay for environment group",
 			"environment", group.Name,
 			"test_count", len(group.Tests),
@@ -40,11 +44,24 @@ func ReplayTestsByEnvironment(
 		}
 
 		// 2. Start environment (server + service)
+		envStartTime := time.Now()
 		if err := executor.StartEnvironment(); err != nil {
+			// Dump startup logs before returning so the caller's help message makes sense
+			startupLogs := executor.GetStartupLogs()
+			if startupLogs != "" {
+				log.ServiceLog("📋 Service startup logs:")
+				for _, line := range strings.Split(strings.TrimRight(startupLogs, "\n"), "\n") {
+					log.ServiceLog(line)
+				}
+			}
 			cleanup() // Restore env vars before returning
 			return allResults, fmt.Errorf("failed to start environment for %s: %w", group.Name, err)
 		}
 
+		envStartDuration := time.Since(envStartTime).Seconds()
+		log.ServiceLog(fmt.Sprintf("✓ Environment ready (%.1fs)", envStartDuration))
+		log.Stderrln(fmt.Sprintf("✓ Environment ready (%.1fs)", envStartDuration))
+
 		// 3. Run tests for this environment
 		results, err := executor.RunTests(group.Tests)
 		if err != nil {
@@ -68,9 +85,11 @@ func ReplayTestsByEnvironment(
 		// 6. Restore environment variables
 		cleanup()
 
+		envDuration := time.Since(envStart).Seconds()
 		log.Debug("Completed replay for environment group",
 			"environment", group.Name,
-			"results_count", len(results))
+			"results_count", len(results),
+			"duration_seconds", envDuration)
 	}
 
 	log.Debug("Completed all environment group replays",
diff --git a/internal/runner/executor.go b/internal/runner/executor.go
index 7f9d2da..bb3629c 100644
--- a/internal/runner/executor.go
+++ b/internal/runner/executor.go
@@ -12,6 +12,7 @@ import (
 	"os/exec"
 	"path/filepath"
 	"strings"
+	"sync"
 	"time"
 
 	"github.com/Use-Tusk/fence/pkg/fence"
@@ -33,6 +34,36 @@ const (
 	SandboxModeOff    = "off"
 )
 
+// syncBuffer is a thread-safe buffer for capturing service stdout/stderr concurrently.
+type syncBuffer struct {
+	mu      sync.Mutex
+	buf     bytes.Buffer
+	discard bool
+}
+
+func (s *syncBuffer) Write(p []byte) (int, error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.discard {
+		return len(p), nil
+	}
+	return s.buf.Write(p)
+}
+
+func (s *syncBuffer) String() string {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.buf.String()
+}
+
+// Discard makes future writes no-ops and frees the buffer memory.
+func (s *syncBuffer) Discard() {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.discard = true
+	s.buf.Reset()
+}
+
 type Executor struct {
 	serviceURL              string
 	parallel                int
@@ -40,6 +71,9 @@ type Executor struct {
 	serviceCmd              *exec.Cmd
 	server                  *Server
 	serviceLogFile          *os.File
+	serviceLogPath          string      // persists across StopService so GetStartupLogs can read it back
+	startupLogBuffer        *syncBuffer // in-memory buffer when --enable-service-logs is off
+	processExitCh           chan error  // signals early process exit
 	enableServiceLogs       bool
 	servicePort             int
 	resultsDir              string
@@ -520,24 +554,44 @@ func countPassedTests(results []TestResult) int {
 	return count
 }
 
-// GetStartupFailureHelpMessage returns a user-friendly help message when the service fails to start.
-// It explains how to use --enable-service-logs and shows where logs will be saved.
-func (e *Executor) GetStartupFailureHelpMessage() string {
-	var msg strings.Builder
-
-	msg.WriteString("\n")
-	msg.WriteString("💡 Tip: Use --enable-service-logs to see detailed service logs and diagnose startup issues.\n")
-	msg.WriteString("   Service logs show the stdout/stderr output from your service, which can help identify why the service failed to start.\n")
+// GetStartupLogs returns the captured service startup logs.
+// When --enable-service-logs is set, it reads back from the log file.
+// Otherwise, it returns the contents of the in-memory startup buffer.
+func (e *Executor) GetStartupLogs() string {
+	if e.enableServiceLogs && e.serviceLogPath != "" {
+		data, err := os.ReadFile(e.serviceLogPath)
+		if err != nil {
+			return ""
+		}
+		return string(data)
+	}
+	if e.startupLogBuffer != nil {
+		return e.startupLogBuffer.String()
+	}
+	return ""
+}
 
-	// Always show where logs would be saved
-	logsDir := utils.GetLogsDir()
-	if e.enableServiceLogs && e.serviceLogFile != nil {
-		msg.WriteString(fmt.Sprintf("📄 Service logs are available at: %s\n", e.serviceLogFile.Name()))
-	} else {
-		msg.WriteString(fmt.Sprintf("📁 Service logs will be saved to: %s/\n", logsDir))
+// DiscardStartupBuffer makes the in-memory startup log buffer discard future writes
+// and frees its memory. This is called after the service starts successfully to avoid
+// unbounded memory growth during the test run. Has no effect when --enable-service-logs
+// is set (file-based logging persists for the full run).
+//
+// Note: we can't swap cmd.Stdout after Start() because Go's exec package captures the
+// writer by reference in an internal goroutine at Start() time. Instead, we set a flag
+// on the buffer itself to make Write a no-op.
+func (e *Executor) DiscardStartupBuffer() {
+	if !e.enableServiceLogs && e.startupLogBuffer != nil {
+		e.startupLogBuffer.Discard()
+		e.startupLogBuffer = nil
 	}
+}
 
-	return msg.String()
+// GetStartupFailureHelpMessage returns a user-friendly help message when the service fails to start.
+func (e *Executor) GetStartupFailureHelpMessage() string {
+	if e.enableServiceLogs && e.serviceLogPath != "" {
+		return fmt.Sprintf("\n📄 Service logs are available at: %s\n", e.serviceLogPath)
+	}
+	return ""
 }
 
 // RunSingleTest replays a single trace on the service under test.
diff --git a/internal/runner/server.go b/internal/runner/server.go
index cfc37e8..ff60729 100644
--- a/internal/runner/server.go
+++ b/internal/runner/server.go
@@ -1085,11 +1085,7 @@ func (ms *Server) handleInstrumentationVersionMismatchAlert(alert *core.Instrume
 }
 
 func (ms *Server) handleUnpatchedDependencyAlert(alert *core.UnpatchedDependencyAlert) {
-	log.Info("Unpatched dependency alert",
-		"traceTestServerSpanId", alert.TraceTestServerSpanId,
-		"stackTrace", alert.StackTrace,
-		"sdkVersion", alert.SdkVersion,
-	)
+	log.TestDebug(alert.TraceTestServerSpanId, fmt.Sprintf("Unpatched dependency alert (sdk %s)\n%s", alert.SdkVersion, alert.StackTrace))
 
 	// Send to PostHog
 	if client := ms.GetAnalyticsClient(); client != nil {
diff --git a/internal/runner/service.go b/internal/runner/service.go
index a29539f..1d84e6a 100644
--- a/internal/runner/service.go
+++ b/internal/runner/service.go
@@ -148,15 +148,19 @@ func (e *Executor) StartService() error {
 	env = append(env, "TUSK_DRIFT_MODE=REPLAY")
 	e.serviceCmd.Env = env
 
-	// Dump service logs to file in .tusk/logs instead of suppressing
-	// TODO: provide option whether to store these logs
+	// Always capture service logs during startup.
+	// When --enable-service-logs is set, logs go to a file.
+	// Otherwise, logs go to an in-memory buffer that is shown on failure and discarded on success.
 	if err := e.setupServiceLogging(); err != nil {
 		log.Debug("Failed to setup service logging, suppressing output", "error", err)
 		e.serviceCmd.Stdout = nil
 		e.serviceCmd.Stderr = nil
-	} else {
+	} else if e.enableServiceLogs {
 		e.serviceCmd.Stdout = e.serviceLogFile
 		e.serviceCmd.Stderr = e.serviceLogFile
+	} else {
+		e.serviceCmd.Stdout = e.startupLogBuffer
+		e.serviceCmd.Stderr = e.startupLogBuffer
 	}
 
 	if err := e.serviceCmd.Start(); err != nil {
@@ -167,6 +171,16 @@ func (e *Executor) StartService() error {
 		return fmt.Errorf("failed to start service: %w", err)
 	}
 
+	// Monitor process for early exit so waitForReadiness can fail fast.
+	// Capture the channel locally so that if a sandbox retry creates a new channel,
+	// this goroutine still sends to the original one (not the new one).
+	e.processExitCh = make(chan error, 1)
+	exitCh := e.processExitCh
+	cmd := e.serviceCmd
+	go func() {
+		exitCh <- cmd.Wait()
+	}()
+
 	if err := e.waitForReadiness(cfg); err != nil {
 		_ = e.StopService()
 		return fmt.Errorf("service readiness check failed: %w", err)
@@ -401,8 +415,13 @@ func (e *Executor) waitForReadiness(cfg *config.Config) error {
 				waitTime = parsed
 			}
 		}
-		time.Sleep(waitTime)
-		return nil
+		// Wait for the specified duration, but fail fast if the process exits
+		select {
+		case exitErr := <-e.processExitCh:
+			return fmt.Errorf("service process exited during startup: %w", exitErr)
+		case <-time.After(waitTime):
+			return nil
+		}
 	}
 
 	timeout := 10 * time.Second
@@ -422,13 +441,25 @@ func (e *Executor) waitForReadiness(cfg *config.Config) error {
 	deadline := time.Now().Add(timeout)
 
 	for time.Now().Before(deadline) {
+		// Check for early process exit before each readiness attempt
+		select {
+		case exitErr := <-e.processExitCh:
+			return fmt.Errorf("service process exited during startup: %w", exitErr)
+		default:
+		}
+
 		cmd := createReadinessCommand(cfg.Service.Readiness.Command)
 		cmd.Env = e.buildCommandEnv()
 		if err := cmd.Run(); err == nil {
 			return nil
 		}
 
-		time.Sleep(interval)
+		// Wait for the interval, but fail fast if the process exits
+		select {
+		case exitErr := <-e.processExitCh:
+			return fmt.Errorf("service process exited during startup: %w", exitErr)
+		case <-time.After(interval):
+		}
 	}
 
 	return fmt.Errorf("service failed to become ready within %v. You can increase the timeout in .tusk/config.yaml under service.readiness.timeout", timeout)
@@ -439,34 +470,47 @@ func (e *Executor) SetEnableServiceLogs(enable bool) {
 	e.enableServiceLogs = enable
 }
 
-// setupServiceLogging creates a log file for service stdout and stderr if service logging is not disabled.
+// setupServiceLogging prepares log capture for the service process.
+// When --enable-service-logs is set, logs are written to a file in .tusk/logs/.
+// Otherwise, an in-memory buffer captures startup output (shown on failure, discarded on success).
 func (e *Executor) setupServiceLogging() error {
-	if !e.enableServiceLogs {
-		return fmt.Errorf("service logging disabled")
-	}
-
-	// Allow tests to override the logs directory
-	logsDir := utils.GetLogsDir()
-	if testLogsDir := os.Getenv("TUSK_TEST_LOGS_DIR"); testLogsDir != "" {
-		logsDir = testLogsDir
-	}
+	if e.enableServiceLogs {
+		// Reuse existing log file across sandbox retries (same as the buffer path guard below).
+		// The file was closed by StopService → cleanupLogFiles, but we can reopen it for append.
+		if e.serviceLogPath != "" {
+			logFile, err := os.OpenFile(e.serviceLogPath, os.O_APPEND|os.O_WRONLY, 0o600) // #nosec G304
+			if err != nil {
+				return fmt.Errorf("failed to reopen service log file: %w", err)
+			}
+			e.serviceLogFile = logFile
+		} else {
+			// Allow tests to override the logs directory
+			logsDir := utils.GetLogsDir()
+			if testLogsDir := os.Getenv("TUSK_TEST_LOGS_DIR"); testLogsDir != "" {
+				logsDir = testLogsDir
+			}
 
-	if err := os.MkdirAll(logsDir, 0o750); err != nil {
-		return fmt.Errorf("failed to create logs directory: %w", err)
-	}
+			if err := os.MkdirAll(logsDir, 0o750); err != nil {
+				return fmt.Errorf("failed to create logs directory: %w", err)
+			}
 
-	timestamp := time.Now().Format("20060102-150405")
+			timestamp := time.Now().Format("20060102-150405")
+			logPath := filepath.Join(logsDir, fmt.Sprintf("tusk-replay-%s.log", timestamp))
+			logFile, err := os.Create(logPath) // #nosec G304
+			if err != nil {
+				return fmt.Errorf("failed to create service log file: %w", err)
+			}
 
-	logPath := filepath.Join(logsDir, fmt.Sprintf("tusk-replay-%s.log", timestamp))
-	logFile, err := os.Create(logPath) // #nosec G304
-	if err != nil {
-		return fmt.Errorf("failed to create service log file: %w", err)
+			e.serviceLogFile = logFile
+			e.serviceLogPath = logPath
+			log.ServiceLog(fmt.Sprintf("Service logs will be written to: %s", logPath))
+		}
+	} else if e.startupLogBuffer == nil {
+		// Only create a new buffer if one doesn't already exist.
+		// During sandbox retry, the existing buffer preserves the first attempt's logs.
+		e.startupLogBuffer = &syncBuffer{}
 	}
 
-	e.serviceLogFile = logFile
-
-	log.ServiceLog(fmt.Sprintf("Service logs will be written to: %s", logPath))
-
 	return nil
 }
 
diff --git a/internal/runner/service_test.go b/internal/runner/service_test.go
index dabf7c5..af378f8 100644
--- a/internal/runner/service_test.go
+++ b/internal/runner/service_test.go
@@ -820,10 +820,10 @@ func TestSetupServiceLogging(t *testing.T) {
 		expectError       bool
 	}{
 		{
-			name:              "logging_disabled",
+			name:              "logging_disabled_creates_buffer",
 			enableServiceLogs: false,
 			setupFunc:         noopCleanup,
-			expectError:       true,
+			expectError:       false,
 		},
 		{
 			name:              "logging_enabled_creates_file",
@@ -878,10 +878,11 @@ func TestSetupServiceLogging(t *testing.T) {
 
 			err := e.setupServiceLogging()
 
-			if tt.expectError {
+			switch {
+			case tt.expectError:
 				assert.Error(t, err)
 				assert.Nil(t, e.serviceLogFile)
-			} else {
+			case tt.enableServiceLogs:
 				assert.NoError(t, err)
 				assert.NotNil(t, e.serviceLogFile)
 
@@ -892,6 +893,11 @@ func TestSetupServiceLogging(t *testing.T) {
 
 				_ = e.serviceLogFile.Close()
 				_ = os.Remove(e.serviceLogFile.Name())
+			default:
+				// Logging disabled: no error, no file, but buffer is created
+				assert.NoError(t, err)
+				assert.Nil(t, e.serviceLogFile)
+				assert.NotNil(t, e.startupLogBuffer)
 			}
 		})
 	}
diff --git a/internal/runner/service_test_helpers_windows.go b/internal/runner/service_test_helpers_windows.go
index f40dcc9..5a70c11 100644
--- a/internal/runner/service_test_helpers_windows.go
+++ b/internal/runner/service_test_helpers_windows.go
@@ -4,14 +4,20 @@ package runner
 
 import (
 	"context"
+	"fmt"
 	"os/exec"
+	"strconv"
 	"syscall"
 )
 
 // createTestCommand creates a test command that can be gracefully killed
 func createTestCommand(ctx context.Context, duration string) *exec.Cmd {
-	// Windows: use timeout command (similar to sleep, but in seconds)
-	cmd := exec.CommandContext(ctx, "cmd.exe", "/c", "timeout /t "+duration+" /nobreak >nul")
+	// Windows: use ping for delay (timeout command fails in non-interactive CI).
+	// ping -n N sends N pings with ~1s between them, so total delay ≈ N-1 seconds.
+	// Add 1 to match the requested duration.
+	n, _ := strconv.Atoi(duration)
+	pingCount := fmt.Sprintf("%d", n+1)
+	cmd := exec.CommandContext(ctx, "cmd.exe", "/c", "ping -n "+pingCount+" 127.0.0.1 >nul")
 	cmd.SysProcAttr = &syscall.SysProcAttr{
 		CreationFlags: syscall.CREATE_NEW_PROCESS_GROUP,
 	}
@@ -31,18 +37,18 @@ func createUnkillableTestCommand(ctx context.Context, duration string) *exec.Cmd
 
 // getLongRunningCommand returns a shell command that runs indefinitely
 func getLongRunningCommand() string {
-	// Windows: infinite loop with timeout
-	return "cmd.exe /c :loop & timeout /t 1 /nobreak >nul & goto loop"
+	// Windows: infinite loop with ping delay (timeout command fails in non-interactive CI)
+	return "cmd.exe /c :loop & ping -n 2 127.0.0.1 >nul & goto loop"
 }
 
 // getSimpleSleepCommand returns a simple sleep command for testing
 func getSimpleSleepCommand() string {
-	// Windows: 1 second timeout (timeout is in seconds, minimum is 1)
-	return "cmd.exe /c timeout /t 1 /nobreak >nul"
+	// Windows: use ping for ~1 second delay (timeout command fails in non-interactive CI)
+	return "cmd.exe /c ping -n 2 127.0.0.1 >nul"
 }
 
 // getMediumSleepCommand returns a medium duration sleep for integration tests
 func getMediumSleepCommand() string {
-	// Windows: 1 second is minimum, but this is acceptable for medium duration
-	return "cmd.exe /c timeout /t 2 /nobreak >nul"
+	// Windows: use ping for ~2 second delay (timeout command fails in non-interactive CI)
+	return "cmd.exe /c ping -n 3 127.0.0.1 >nul"
 }
diff --git a/internal/runner/suite_spans.go b/internal/runner/suite_spans.go
index 3be14a0..03cb3dd 100644
--- a/internal/runner/suite_spans.go
+++ b/internal/runner/suite_spans.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"path/filepath"
+	"time"
 
 	"github.com/Use-Tusk/tusk-drift-cli/internal/api"
 	"github.com/Use-Tusk/tusk-drift-cli/internal/log"
@@ -134,17 +135,19 @@ func PrepareAndSetSuiteSpans(
 	opts SuiteSpanOptions,
 	currentTests []Test,
 ) error {
+	buildStart := time.Now()
 	result, err := BuildSuiteSpansForRun(ctx, opts, currentTests)
 	if err != nil {
 		return err
 	}
+	buildDuration := time.Since(buildStart).Seconds()
 	if opts.Interactive {
 		log.ServiceLog(fmt.Sprintf(
 			"Loading %d suite spans for matching (%d unique traces, %d pre-app-start)",
 			len(result.SuiteSpans), result.UniqueTraceCount, result.PreAppStartCount,
 		))
 	} else if !opts.Quiet {
-		log.UserProgress(fmt.Sprintf("  ↳ Loaded %d suite spans (%d unique traces, %d pre-app-start)", len(result.SuiteSpans), result.UniqueTraceCount, result.PreAppStartCount))
+		log.UserProgress(fmt.Sprintf("  ↳ Loaded %d suite spans (%d unique traces, %d pre-app-start) (%.1fs)", len(result.SuiteSpans), result.UniqueTraceCount, result.PreAppStartCount, buildDuration))
 	}
 	log.Debug("Prepared suite spans for matching",
 		"count", len(result.SuiteSpans),
diff --git a/internal/tui/test_executor.go b/internal/tui/test_executor.go
index c98fc4f..8b9e1eb 100644
--- a/internal/tui/test_executor.go
+++ b/internal/tui/test_executor.go
@@ -3,6 +3,7 @@ package tui
 import (
 	"context"
 	"fmt"
+	"io"
 	"log/slog"
 	"os"
 	"slices"
@@ -245,6 +246,13 @@ func RunTestsInteractiveWithOpts(tests []runner.Test, executor *runner.Executor,
 	// Register this model as the global test logger
 	log.SetTUILogger(m)
 
+	// Redirect stderr to the TUI service log panel.
+	// Libraries like fence write directly to os.Stderr with fmt.Fprintf,
+	// which corrupts the Bubble Tea alt screen. This pipe captures those
+	// writes and routes them through the TUI log panel instead.
+	stderrRestore := redirectStderrToTUI(m)
+	defer stderrRestore()
+
 	// Prepend initial service logs
 	if opts != nil && len(opts.InitialServiceLogs) > 0 {
 		for _, line := range opts.InitialServiceLogs {
@@ -270,6 +278,48 @@ func RunTestsInteractiveWithOpts(tests []runner.Test, executor *runner.Executor,
 	return m.results, nil
 }
 
+// redirectStderrToTUI replaces os.Stderr with a pipe that routes lines to the
+// TUI service log panel. Returns a function that restores the original stderr.
+// This prevents libraries that write directly to os.Stderr (e.g. fence) from
+// corrupting the Bubble Tea alt screen.
+func redirectStderrToTUI(m *testExecutorModel) func() {
+	origStderr := os.Stderr
+	pr, pw, err := os.Pipe()
+	if err != nil {
+		// If pipe fails, don't redirect — better to have some corruption than crash
+		return func() {}
+	}
+	os.Stderr = pw
+
+	done := make(chan struct{})
+	go func() {
+		defer close(done)
+		buf := make([]byte, 4096)
+		for {
+			n, readErr := pr.Read(buf)
+			if n > 0 {
+				lines := strings.Split(strings.TrimRight(string(buf[:n]), "\n"), "\n")
+				for _, line := range lines {
+					line = strings.TrimSpace(line)
+					if line != "" {
+						m.addServiceLog(line)
+					}
+				}
+			}
+			if readErr != nil {
+				return
+			}
+		}
+	}()
+
+	return func() {
+		os.Stderr = origStderr
+		_ = pw.Close()
+		<-done // wait for reader goroutine to drain
+		_ = pr.Close()
+	}
+}
+
 func newTestExecutorModel(tests []runner.Test, executor *runner.Executor, opts *InteractiveOpts) *testExecutorModel {
 	model := &testExecutorModel{
 		tests:             tests,
@@ -306,12 +356,59 @@ func (m *testExecutorModel) setupTUILogging() {
 		level = slog.LevelDebug
 	}
 
-	handler := slog.NewTextHandler(tuiWriter, &slog.HandlerOptions{
-		Level: level,
-	})
+	handler := &tuiSlogHandler{
+		level:  level,
+		writer: tuiWriter,
+	}
 	slog.SetDefault(slog.New(handler))
 }
 
+// tuiSlogHandler formats slog records cleanly for the TUI (no time/level prefix).
+// It extracts just the message and key=value pairs.
+type tuiSlogHandler struct {
+	level  slog.Level
+	writer io.Writer
+	attrs  []slog.Attr
+	group  string
+}
+
+func (h *tuiSlogHandler) Enabled(_ context.Context, level slog.Level) bool {
+	return level >= h.level
+}
+
+func (h *tuiSlogHandler) Handle(_ context.Context, r slog.Record) error {
+	var b strings.Builder
+	b.WriteString(r.Message)
+	// Append pre-set attrs
+	for _, a := range h.attrs {
+		b.WriteString(" ")
+		b.WriteString(a.Key)
+		b.WriteString("=")
+		b.WriteString(fmt.Sprintf("%v", a.Value.Any()))
+	}
+	// Append record attrs
+	r.Attrs(func(a slog.Attr) bool {
+		b.WriteString(" ")
+		b.WriteString(a.Key)
+		b.WriteString("=")
+		b.WriteString(fmt.Sprintf("%v", a.Value.Any()))
+		return true
+	})
+	_, err := h.writer.Write([]byte(b.String()))
+	return err
+}
+
+func (h *tuiSlogHandler) WithAttrs(attrs []slog.Attr) slog.Handler {
+	newAttrs := make([]slog.Attr, len(h.attrs)+len(attrs))
+	copy(newAttrs, h.attrs)
+	copy(newAttrs[len(h.attrs):], attrs)
+	return &tuiSlogHandler{level: h.level, writer: h.writer, attrs: newAttrs, group: h.group}
+}
+
+func (h *tuiSlogHandler) WithGroup(name string) slog.Handler {
+	return &tuiSlogHandler{level: h.level, writer: h.writer, attrs: h.attrs, group: name}
+}
+
 func (m *testExecutorModel) loadTestsIfNeeded() tea.Cmd {
 	if m.opts == nil || m.opts.LoadTests == nil {
 		return nil
@@ -922,11 +1019,21 @@ func (m *testExecutorModel) startNextEnvironmentGroup() tea.Cmd {
 		// Start environment
 		if err := m.executor.StartEnvironment(); err != nil {
 			m.groupCleanup()
+
+			startupLogs := m.executor.GetStartupLogs()
+			if startupLogs != "" {
+				m.addServiceLog("📋 Service startup logs:")
+				for _, line := range strings.Split(strings.TrimRight(startupLogs, "\n"), "\n") {
+					m.addServiceLog(line)
+				}
+			}
+
 			m.addServiceLog(fmt.Sprintf("❌ Failed to start environment for %s: %v", group.Name, err))
 
-			helpMsg := m.executor.GetStartupFailureHelpMessage()
-			for _, line := range strings.Split(strings.TrimSpace(helpMsg), "\n") {
-				m.addServiceLog(line)
+			if helpMsg := m.executor.GetStartupFailureHelpMessage(); helpMsg != "" {
+				for _, line := range strings.Split(strings.TrimSpace(helpMsg), "\n") {
+					m.addServiceLog(line)
+				}
 			}
 
 			return executionFailedMsg{reason: fmt.Sprintf("Failed to start environment: %v", err)}
@@ -1055,17 +1162,17 @@ func (m *testExecutorModel) executeTest(index int) tea.Cmd {
 				}
 			}
 
-			// Determine if there are more tests to run
+			// Determine if there are more tests to run (including pending retries)
 			hasMoreTests := false
 			if m.inRetryPhase {
 				// In retry phase, check if there are more tests in the retry queue
 				hasMoreTests = m.currentEnvTestsStarted < len(m.currentEnvTestIndices)
 			} else {
-				// In normal phase, check if there are more tests overall
-				hasMoreTests = index < len(m.tests)-1
+				// In normal phase, check if there are more tests overall OR tests queued for retry
+				hasMoreTests = index < len(m.tests)-1 || len(m.testsToRetry) > 0
 			}
 
-			// Attempt to restart the server for next test
+			// Attempt to restart the server for next test (or pending retries)
 			if hasMoreTests {
 				m.addServiceLog("🔄 Restarting server...")
 				if restartErr := m.executor.RestartServerWithRetry(0); restartErr != nil {