Skip to content
31 changes: 29 additions & 2 deletions cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,9 @@ func runTests(cmd *cobra.Command, args []string) error {
var tests []runner.Test
var err error

// Track overall timing for print mode (includes test loading)
overallStart := time.Now()

// Step 3: Load tests - in cloud mode, fetch from backend; otherwise use local files
deferLoadTests := interactive
if deferLoadTests {
Expand Down Expand Up @@ -457,7 +460,7 @@ func runTests(cmd *cobra.Command, args []string) error {
if isValidation {
log.Stderrln(fmt.Sprintf("\n➤ Found %d traces to validate", len(tests)))
} else if !cloud {
log.Stderrln(fmt.Sprintf("\n➤ Loaded %d tests from local traces", len(tests)))
log.Stderrln(fmt.Sprintf("\n➤ Loaded %d tests from local traces (%.1fs)", len(tests), time.Since(overallStart).Seconds()))
}
}

Expand Down Expand Up @@ -732,13 +735,25 @@ func runTests(cmd *cobra.Command, args []string) error {
}

// Step 4: Run tests by environment
testPhaseStart := time.Now()
var results []runner.TestResult
if groupResult != nil && len(groupResult.Groups) > 0 {
// Use environment-based replay
results, err = runner.ReplayTestsByEnvironment(context.Background(), executor, groupResult.Groups)
if err != nil {
cmd.SilenceUsage = true

// Dump startup logs so user can diagnose startup failures
startupLogs := executor.GetStartupLogs()
if startupLogs != "" {
log.Stderrln("\n📋 Service startup logs:")
for _, line := range strings.Split(strings.TrimRight(startupLogs, "\n"), "\n") {
log.Stderrln(line)
}
log.Stderrln("")
}
log.Stderr(executor.GetStartupFailureHelpMessage())

// Update CI status to FAILURE if in cloud mode
if cloud && client != nil && (ci || isValidation) {
statusReq := &backend.UpdateDriftRunCIStatusRequest{
Expand Down Expand Up @@ -780,6 +795,14 @@ func runTests(cmd *cobra.Command, args []string) error {
}
}

startupLogs := executor.GetStartupLogs()
if startupLogs != "" {
log.Stderrln("\n📋 Service startup logs:")
for _, line := range strings.Split(strings.TrimRight(startupLogs, "\n"), "\n") {
log.Stderrln(line)
}
log.Stderrln("")
}
log.Stderr(executor.GetStartupFailureHelpMessage())
return fmt.Errorf("failed to start environment: %w", err)
}
Expand All @@ -790,7 +813,7 @@ func runTests(cmd *cobra.Command, args []string) error {
}()

if !interactive && !quiet {
log.Stderrln(" ✓ Environment ready")
log.Stderrln(fmt.Sprintf(" ✓ Environment ready (%.1fs)", time.Since(testPhaseStart).Seconds()))
log.Stderrln(fmt.Sprintf("➤ Running %d tests (concurrency: %d)...\n", len(tests), executor.GetConcurrency()))
}

Expand Down Expand Up @@ -830,6 +853,10 @@ func runTests(cmd *cobra.Command, args []string) error {
outputErr = runner.OutputResultsSummary(results, outputFormat, quiet)
}

if !interactive && !quiet {
log.Stderrln(fmt.Sprintf("Total elapsed: %.1fs", time.Since(overallStart).Seconds()))
}

// Step 5: Upload results to backend if in cloud mode
// Do this before returning any error so CI status is always updated
if cloud && client != nil && (ci || isValidation) {
Expand Down
160 changes: 160 additions & 0 deletions docs/tui-testing.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
# CLI & TUI Testing Guide

How to manually test the tusk CLI in both print mode and interactive TUI mode.

## Print Mode Testing

Print mode (`--print`) runs headlessly — no interactive UI. Run it directly and inspect stderr:

```bash
cd /path/to/test-project
/path/to/tusk drift run --print 2>&1
```

Filter for specific output:

```bash
/path/to/tusk drift run --print 2>&1 | grep -E "(➤|✓|Tests:|Error:)"
```

### Testing failure scenarios

To test startup failures, temporarily change the start command in `.tusk/config.yaml`:

```yaml
start:
command: node -e "console.log('boot log line'); console.error('some error'); process.exit(1)"
```

To test with a service that starts but behaves differently, adjust the command or example codebase as needed.

**Always restore the config after testing.**

## TUI Testing with tmux

The TUI (interactive mode, no `--print`) requires a terminal. We use tmux for programmatic control — it lets us send keystrokes and capture output without needing to be in the terminal ourselves.

### Option A: Native screenshots (recommended)

Opens a real Terminal.app window with tmux inside it, then uses macOS `screencapture -l` to capture that specific window by ID. This produces pixel-perfect Retina screenshots and should always be used to verify TUI visual changes.

**One-time setup:** Grant Screen Recording permission to Terminal.app in System Settings > Privacy & Security > Screen Recording.

```bash
# 1. Open Terminal.app with a tmux session
osascript -e 'tell application "Terminal"
do script "tmux new-session -s tui-test -x 200 -y 55"
end tell'
sleep 3

# 2. Resize the window to fill most of the screen (fits 14"/16" MacBook Pro)
osascript -e 'tell application "Terminal" to set bounds of front window to {0, 25, 1700, 1100}'
sleep 1

# 3. Hide tmux status bar (otherwise a green bar appears at the bottom)
tmux set -t tui-test status off

# 4. Launch the TUI
tmux send-keys -t tui-test 'cd /path/to/test-project && /path/to/tusk drift run' Enter

# 5. Wait for the state you want to capture
# - Normal run with tests: ~25-30s (environment start + test execution)
# - Startup failure with sandbox retry: ~15-18s
# - Just initial render: ~3-5s
sleep 25

# 6. Navigate if needed
tmux send-keys -t tui-test g # go to top (select Service Logs)
tmux send-keys -t tui-test j # move selection down
tmux send-keys -t tui-test J # scroll log panel down
tmux send-keys -t tui-test D # half-page down in log panel
sleep 1

# 7. Find the Terminal.app window ID
WINDOW_ID=$(python3 -c "
import Quartz
windows = Quartz.CGWindowListCopyWindowInfo(Quartz.kCGWindowListOptionOnScreenOnly, Quartz.kCGNullWindowID)
for w in windows:
if w.get('kCGWindowOwnerName') == 'Terminal' and w.get('kCGWindowLayer', 0) == 0:
print(w['kCGWindowNumber'])
break
")

# 8. Capture the window
screencapture -l "$WINDOW_ID" -o screenshot.png

# 9. Cleanup
tmux send-keys -t tui-test q
sleep 2
tmux kill-session -t tui-test
osascript -e 'tell application "Terminal" to close front window' 2>/dev/null
```

**Output:** ~2800x1800 Retina PNG with native font rendering.

**Notes:**
- `screencapture -l` captures by window ID — the Terminal window doesn't need to be in the foreground. You can keep working in other windows.
- The `-o` flag removes the window shadow.
- `screencapture -l` fails silently without Screen Recording permission — you get a blank or tiny image.
- When finding the window ID, make sure to match `kCGWindowOwnerName == 'Terminal'` — other apps (Chrome, etc.) may be in front.

### Option B: Text capture (quick functional checks)

Uses a detached tmux session — no visible window, no permissions needed. Good for verifying that specific text appears in the TUI or that navigation works. **Not a substitute for screenshots** when verifying layout, colors, or visual rendering.

```bash
# 1. Detached tmux session (no visible window)
tmux new-session -d -s tui-test -x 200 -y 55

# 2. Launch the TUI
tmux send-keys -t tui-test 'cd /path/to/test-project && /path/to/tusk drift run' Enter
sleep 25

# 3. Capture the screen as plain text
SCREEN=$(tmux capture-pane -t tui-test -p)

# 4. Assert on content
echo "$SCREEN" | grep -q "TEST EXECUTION" || echo "FAIL: header not found"
echo "$SCREEN" | grep -q "Environment ready" || echo "FAIL: environment didn't start"

# 5. Navigate and capture again
tmux send-keys -t tui-test j
sleep 0.5
SCREEN=$(tmux capture-pane -t tui-test -p)

# 6. Cleanup
tmux send-keys -t tui-test q
sleep 1
tmux kill-session -t tui-test
```

### TUI keyboard shortcuts reference

| Key | Action |
| --------- | --------------------------------------- |
| `j` / `k` | Select next/previous test in left panel |
| `g` / `G` | Jump to top/bottom of test list |
| `u` / `d` | Half-page up/down in test list |
| `J` / `K` | Scroll log panel down/up |
| `U` / `D` | Half-page up/down in log panel |
| `y` | Copy all logs |
| `q` | Quit |

## Recommended Dimensions

| Setting | Value | Notes |
| ------------ | ----- | ------------------------------------------ |
| tmux columns | 200 | Wide enough for both TUI panels + detail |
| tmux rows | 55 | Tall enough to see tests + logs |
| Window bounds | {0, 25, 1700, 1100} | Fits 14"/16" MacBook Pro (adjust for your display) |

## Common Gotchas

1. **Timing is critical.** The TUI renders asynchronously. Capturing too early gives an incomplete screen. When in doubt, wait longer.
2. **tmux status bar.** The green bar at the bottom of native screenshots is tmux's status line. Hide it with `tmux set -t tui-test status off` before capturing.
3. **Scrolling.** Content often extends below the visible area of the log panel. Send `J` or `D` keys to scroll down before capturing.
4. **Screen Recording permission.** Native `screencapture -l` fails silently without it. Grant it to Terminal.app in System Settings > Privacy & Security > Screen Recording.
5. **YAML quoting.** When editing config.yaml with commands containing colons or quotes, wrap the entire value in double quotes and escape inner quotes.
6. **Restore config.** Always restore `.tusk/config.yaml` after testing with modified start commands.
7. **tmux targets by session name** (`-t tui-test`), so commands work regardless of which terminal you're focused on. You can keep working while tests run.
8. **Window ID targeting.** When using `screencapture -l`, make sure the Python script finds the Terminal window, not Chrome or another app that may be in front.
7 changes: 4 additions & 3 deletions internal/agent/prompts/phase_simple_test.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,10 @@ TuskDrift.initialize({
Run tusk_run to replay the trace.
If it fails:

- Run with `debug: true` (keep running it in debug mode until it passes)
- If startup fails in sandbox (for example secret manager bootstrapping), retry with `sandbox_mode: "off"`
- Check for errors in the output or in the logs (in .tusk/logs/). Logs only appear if `debug: true` is set.
- Check the service startup logs for errors (always shown on startup failure)
- If the service starts but a test replay fails, run with `debug: true` to see runtime logs (keep running it in debug mode until it passes)
- If you need more detail from the SDK itself, set `logLevel: "debug"` in the SDK initialization to see SDK-level diagnostics
- If startup fails in sandbox (for example secret manager bootstrapping), retry with `sandbox_mode: "off"` and if that works, add a comment in config.yaml explaining why sandbox was disabled (e.g., `# sandbox disabled: service requires external secret manager during startup`)
- If you see config-related errors (e.g., "no start command"), run `tusk_validate_config` to check for config issues
- Try to fix issues and retry (max 3 attempts)
- If still failing, ask the user for help
Expand Down
2 changes: 1 addition & 1 deletion internal/agent/prompts/phase_verify_complex_test.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Follow the same process as the simple test:
4. Wait 3 seconds for trace to be written
5. Stop the service
6. Run tusk_list to verify trace was recorded
7. Run tusk_run to replay (try with `debug: true` on failure; if startup fails in sandbox, retry with `sandbox_mode: "off"`)
7. Run tusk_run to replay (startup logs are always shown on startup failure; if a test replay fails, retry with `debug: true` for runtime logs; if startup fails in sandbox, retry with `sandbox_mode: "off"` and if that works, add a comment in config.yaml explaining why)

### Step 3: Save to Cache

Expand Down
6 changes: 4 additions & 2 deletions internal/agent/prompts/phase_verify_simple_test.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@ If no traces appear, this is a verification failure.
Run tusk_run to replay the trace.
If it fails:

- Run with `debug: true` once more
- If startup fails in sandbox, retry with `sandbox_mode: "off"`
- Check the service startup logs for errors (always shown on startup failure)
- If the service starts but a test replay fails, retry with `debug: true` to see runtime logs
- If you need more detail from the SDK, set `logLevel: "debug"` in the SDK initialization
- If startup fails in sandbox, retry with `sandbox_mode: "off"` and if that works, add a comment in config.yaml explaining why sandbox was disabled
- If still failing, mark as failed

### Step 5: Save to Cache
Expand Down
27 changes: 23 additions & 4 deletions internal/log/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
package log

import (
"context"
"io"
"log/slog"
"os"
Expand Down Expand Up @@ -90,15 +91,24 @@ func (l *Logger) process() {

func (l *Logger) handleLogMessage(msg logMessage) {
tuiPtr := l.tuiLogger.Load()
if tuiPtr == nil {
if tuiPtr != nil {
tui := *tuiPtr
switch msg.msgType {
case logTypeService:
tui.LogToService(msg.message)
case logTypeTest:
tui.LogToCurrentTest(msg.testID, msg.message)
}
return
}
tui := *tuiPtr

// No TUI active (print mode). Route through slog so messages respect
// the configured log level and use the standard slog format.
switch msg.msgType {
case logTypeService:
tui.LogToService(msg.message)
slog.Debug(msg.message)
case logTypeTest:
tui.LogToCurrentTest(msg.testID, msg.message)
slog.Debug(msg.message, "testID", msg.testID)
}
}

Expand Down Expand Up @@ -261,6 +271,15 @@ func TestLog(testID, msg string) {
}
}

// TestDebug logs a debug-level message to a specific test's log panel.
// Only emits when debug logging is enabled (--debug flag).
func TestDebug(testID, msg string) {
if !slog.Default().Enabled(context.Background(), slog.LevelDebug) {
return
}
TestLog(testID, msg)
}

// TestOrServiceLog tries to log to test, falls back to service if testID is empty
func TestOrServiceLog(testID, msg string) {
if testID != "" {
Expand Down
18 changes: 18 additions & 0 deletions internal/runner/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,19 @@ func (e *Executor) StartEnvironment() error {
if e.GetEffectiveSandboxMode() == SandboxModeAuto && e.lastServiceSandboxed {
log.ServiceLog("⚠️ Service failed to start in sandbox; retrying once without sandbox...")
_ = e.StopService()

// Write separator so the user can see where the retry begins.
// The in-memory buffer survives StopService; the file path persists
// via serviceLogPath and setupServiceLogging will reopen in append mode.
if e.enableServiceLogs && e.serviceLogPath != "" {
if f, err := os.OpenFile(e.serviceLogPath, os.O_APPEND|os.O_WRONLY, 0o600); err == nil { // #nosec G304
_, _ = f.WriteString("⚠️ Retrying without sandbox...\n")
_ = f.Close()
}
} else if e.startupLogBuffer != nil {
_, _ = e.startupLogBuffer.Write([]byte("⚠️ Retrying without sandbox...\n"))
}

e.sandboxBypass = true
e.lastServiceSandboxed = false

Expand Down Expand Up @@ -52,6 +65,11 @@ waitForSDK:
log.ServiceLog("✅ SDK acknowledged")

log.Debug("Environment is ready")

// Discard the in-memory startup buffer now that startup succeeded.
// File-based logging (--enable-service-logs) persists for the full run.
e.DiscardStartupBuffer()

return nil
}

Expand Down
Loading
Loading