From 00e2d9c1f01ab8c9d1e92c1946be59a6d2f72ce9 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Mon, 13 Apr 2026 13:52:21 +1000
Subject: [PATCH 1/3] fix(core): use resolved absolute path for file references
 in agent mode

formatSegment was using the display path (relative) instead of
resolvedPath (absolute) when rendering <file: path="..."> tags in agent
mode. This caused agents running in isolated workspaces to fail when
trying to read input_files, since the relative paths did not exist in
the workspace directory.

Now uses segment.resolvedPath (set by message-processor.ts during file
resolution) with a fallback to the display path for synthetic segments.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../formatting/segment-formatter.ts           |   8 +-
 .../formatting/segment-formatter.test.ts      | 101 ++++++++++++++++++
 2 files changed, 107 insertions(+), 2 deletions(-)
 create mode 100644 packages/core/test/evaluation/formatting/segment-formatter.test.ts
diff --git a/packages/core/src/evaluation/formatting/segment-formatter.ts b/packages/core/src/evaluation/formatting/segment-formatter.ts
index 8596a947f..666ac857a 100644
--- a/packages/core/src/evaluation/formatting/segment-formatter.ts
+++ b/packages/core/src/evaluation/formatting/segment-formatter.ts
@@ -54,9 +54,13 @@ export function formatSegment(
       return undefined;
     }
 
-    // Agent mode: return file reference only
+    // Agent mode: return file reference with absolute path so agents can locate
+    // the file regardless of their working directory. resolvedPath is set by
+    // message-processor.ts during file resolution; fall back to the display
+    // path when it is absent (e.g. in unit tests with synthetic segments).
     if (mode === 'agent') {
-      return `<file: path="${filePath}">`;
+      const absolutePath = asString(segment.resolvedPath) ?? filePath;
+      return `<file: path="${absolutePath}">`;
     }
 
     // LM mode: return embedded content with XML tags
diff --git a/packages/core/test/evaluation/formatting/segment-formatter.test.ts b/packages/core/test/evaluation/formatting/segment-formatter.test.ts
new file mode 100644
index 000000000..339ca872d
--- /dev/null
+++ b/packages/core/test/evaluation/formatting/segment-formatter.test.ts
@@ -0,0 +1,101 @@
+import { describe, expect, it } from 'bun:test';
+
+import {
+  formatFileContents,
+  formatSegment,
+  hasVisibleContent,
+} from '../../../src/evaluation/formatting/segment-formatter.js';
+
+describe('formatSegment', () => {
+  describe('text segments', () => {
+    it('returns value for text segments', () => {
+      expect(formatSegment({ type: 'text', value: 'hello' })).toBe('hello');
+    });
+
+    it('returns undefined for missing value', () => {
+      expect(formatSegment({ type: 'text' })).toBeUndefined();
+    });
+  });
+
+  describe('file segments in lm mode', () => {
+    it('returns embedded content with XML tags', () => {
+      const segment = { type: 'file', path: 'data.csv', text: 'a,b\n1,2' };
+      const result = formatSegment(segment, 'lm');
+      expect(result).toContain('<file path="data.csv">');
+      expect(result).toContain('a,b\n1,2');
+      expect(result).toContain('</file>');
+    });
+
+    it('returns undefined when text is missing', () => {
+      const segment = { type: 'file', path: 'data.csv' };
+      expect(formatSegment(segment, 'lm')).toBeUndefined();
+    });
+  });
+
+  describe('file segments in agent mode', () => {
+    it('uses resolvedPath (absolute) when available', () => {
+      const segment = {
+        type: 'file',
+        path: 'snippets/data.csv',
+        text: 'content',
+        resolvedPath: '/abs/path/to/snippets/data.csv',
+      };
+      expect(formatSegment(segment, 'agent')).toBe('<file: path="/abs/path/to/snippets/data.csv">');
+    });
+
+    it('falls back to display path when resolvedPath is absent', () => {
+      const segment = { type: 'file', path: 'snippets/data.csv', text: 'content' };
+      expect(formatSegment(segment, 'agent')).toBe('<file: path="snippets/data.csv">');
+    });
+
+    it('returns undefined when path is missing', () => {
+      const segment = { type: 'file', text: 'content' };
+      expect(formatSegment(segment, 'agent')).toBeUndefined();
+    });
+  });
+
+  describe('unknown segment types', () => {
+    it('returns undefined for unknown types', () => {
+      expect(formatSegment({ type: 'video', value: 'test' })).toBeUndefined();
+    });
+
+    it('returns undefined for missing type', () => {
+      expect(formatSegment({ value: 'test' })).toBeUndefined();
+    });
+  });
+});
+
+describe('formatFileContents', () => {
+  it('wraps file parts in XML tags', () => {
+    const result = formatFileContents([
+      { content: 'a,b\n1,2', isFile: true, displayPath: 'data.csv' },
+    ]);
+    expect(result).toBe('<file path="data.csv">\na,b\n1,2\n</file>');
+  });
+
+  it('joins non-file parts with spaces when no files present', () => {
+    const result = formatFileContents([
+      { content: 'hello', isFile: false },
+      { content: 'world', isFile: false },
+    ]);
+    expect(result).toBe('hello world');
+  });
+});
+
+describe('hasVisibleContent', () => {
+  it('returns true for non-empty text segments', () => {
+    expect(hasVisibleContent([{ type: 'text', value: 'hello' }])).toBe(true);
+  });
+
+  it('returns false for whitespace-only text', () => {
+    expect(hasVisibleContent([{ type: 'text', value: '   ' }])).toBe(false);
+  });
+
+  it('returns true for file segments with text', () => {
+    expect(hasVisibleContent([{ type: 'file', text: 'content' }])).toBe(true);
+  });
+
+  it('returns false for empty segments', () => {
+    expect(hasVisibleContent([])).toBe(false);
+  });
+});

From 234934c7bd397f800e4db7580b780c248dc21d34 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Mon, 13 Apr 2026 14:52:38 +1000
Subject: [PATCH 2/3] test: trim to only regression-relevant tests

Remove tests for existing behavior already covered by the 1626 core
tests. Keep only the two agent-mode file path tests that guard the fix.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../formatting/segment-formatter.test.ts      | 113 +++---------------
 1 file changed, 16 insertions(+), 97 deletions(-)

diff --git a/packages/core/test/evaluation/formatting/segment-formatter.test.ts b/packages/core/test/evaluation/formatting/segment-formatter.test.ts
index 339ca872d..d44642b02 100644
--- a/packages/core/test/evaluation/formatting/segment-formatter.test.ts
+++ b/packages/core/test/evaluation/formatting/segment-formatter.test.ts
@@ -1,101 +1,20 @@
 import { describe, expect, it } from 'bun:test';
 
-import {
-  formatFileContents,
-  formatSegment,
-  hasVisibleContent,
-} from '../../../src/evaluation/formatting/segment-formatter.js';
-
-describe('formatSegment', () => {
-  describe('text segments', () => {
-    it('returns value for text segments', () => {
-      expect(formatSegment({ type: 'text', value: 'hello' })).toBe('hello');
-    });
-
-    it('returns undefined for missing value', () => {
-      expect(formatSegment({ type: 'text' })).toBeUndefined();
-    });
-  });
-
-  describe('file segments in lm mode', () => {
-    it('returns embedded content with XML tags', () => {
-      const segment = { type: 'file', path: 'data.csv', text: 'a,b\n1,2' };
-      const result = formatSegment(segment, 'lm');
-      expect(result).toContain('<file path="data.csv">');
-      expect(result).toContain('a,b\n1,2');
-      expect(result).toContain('</file>');
-    });
-
-    it('returns undefined when text is missing', () => {
-      const segment = { type: 'file', path: 'data.csv' };
-      expect(formatSegment(segment, 'lm')).toBeUndefined();
-    });
-  });
-
-  describe('file segments in agent mode', () => {
-    it('uses resolvedPath (absolute) when available', () => {
-      const segment = {
-        type: 'file',
-        path: 'snippets/data.csv',
-        text: 'content',
-        resolvedPath: '/abs/path/to/snippets/data.csv',
-      };
-      expect(formatSegment(segment, 'agent')).toBe('<file: path="/abs/path/to/snippets/data.csv">');
-    });
-
-    it('falls back to display path when resolvedPath is absent', () => {
-      const segment = { type: 'file', path: 'snippets/data.csv', text: 'content' };
-      expect(formatSegment(segment, 'agent')).toBe('<file: path="snippets/data.csv">');
-    });
-
-    it('returns undefined when path is missing', () => {
-      const segment = { type: 'file', text: 'content' };
-      expect(formatSegment(segment, 'agent')).toBeUndefined();
-    });
-  });
-
-  describe('unknown segment types', () => {
-    it('returns undefined for unknown types', () => {
-      expect(formatSegment({ type: 'video', value: 'test' })).toBeUndefined();
-    });
-
-    it('returns undefined for missing type', () => {
-      expect(formatSegment({ value: 'test' })).toBeUndefined();
-    });
-  });
-});
-
-describe('formatFileContents', () => {
-  it('wraps file parts in XML tags', () => {
-    const result = formatFileContents([
-      { content: 'a,b\n1,2', isFile: true, displayPath: 'data.csv' },
-    ]);
-    expect(result).toBe('<file path="data.csv">\na,b\n1,2\n</file>');
-  });
-
-  it('joins non-file parts with spaces when no files present', () => {
-    const result = formatFileContents([
-      { content: 'hello', isFile: false },
-      { content: 'world', isFile: false },
-    ]);
-    expect(result).toBe('hello world');
-  });
-});
-
-describe('hasVisibleContent', () => {
-  it('returns true for non-empty text segments', () => {
-    expect(hasVisibleContent([{ type: 'text', value: 'hello' }])).toBe(true);
-  });
-
-  it('returns false for whitespace-only text', () => {
-    expect(hasVisibleContent([{ type: 'text', value: '   ' }])).toBe(false);
-  });
-
-  it('returns true for file segments with text', () => {
-    expect(hasVisibleContent([{ type: 'file', text: 'content' }])).toBe(true);
-  });
-
-  it('returns false for empty segments', () => {
-    expect(hasVisibleContent([])).toBe(false);
+import { formatSegment } from '../../../src/evaluation/formatting/segment-formatter.js';
+
+describe('formatSegment agent mode file paths', () => {
+  it('uses resolvedPath (absolute) when available', () => {
+    const segment = {
+      type: 'file',
+      path: 'snippets/data.csv',
+      text: 'content',
+      resolvedPath: '/abs/path/to/snippets/data.csv',
+    };
+    expect(formatSegment(segment, 'agent')).toBe('<file: path="/abs/path/to/snippets/data.csv">');
+  });
+
+  it('falls back to display path when resolvedPath is absent', () => {
+    const segment = { type: 'file', path: 'snippets/data.csv', text: 'content' };
+    expect(formatSegment(segment, 'agent')).toBe('<file: path="snippets/data.csv">');
   });
 });

From ca90e457d001c97c6e36bbca4df3f92d6086b9d5 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Mon, 13 Apr 2026 14:54:29 +1000
Subject: [PATCH 3/3] docs: add test authoring guidelines to AGENTS.md

Add "Writing Tests" section to prevent over-engineering tests. Focus on
regression-relevant tests over comprehensive coverage of existing behavior.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 AGENTS.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/AGENTS.md b/AGENTS.md
index 4256a3c75..4ca7d7e8b 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -250,6 +250,15 @@ wait
 
 This does not apply to lightweight LLM-only targets (azure, openai, gemini, openrouter) which can run with higher concurrency.
 
+### Writing Tests
+
+Tests should be lean and focused on what matters. Follow these principles:
+
+- **Only test new or changed behavior.** Don't write tests for existing behavior that's already covered by the 1600+ core tests. If you fix a bug, test the fix and its edge cases — not the surrounding module.
+- **One test per distinct behavior.** Don't write separate tests for trivially different inputs that exercise the same code path.
+- **No tests for obvious code.** If a function returns `undefined` for missing input and that's a one-line null check, you don't need a test for it unless it's a regression risk.
+- **Regression tests > comprehensive tests.** A test that would have caught the bug is worth more than five tests that exercise happy paths.
+
 ### Verifying Evaluator Changes
 
 Unit tests alone are insufficient for evaluator changes. After implementing or modifying evaluators: