Skip to content

Commit d60d0d5

Browse files
authored
Merge pull request #9 from agent-ecosystem/fix/llms-txt-links-markdown
fix: llms-txt-links-markdown: false positive
2 parents c2be240 + a33b847 commit d60d0d5

2 files changed

Lines changed: 75 additions & 0 deletions

File tree

src/checks/llms-txt/llms-txt-links-markdown.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { registerCheck } from '../registry.js';
22
import { extractMarkdownLinks } from './llms-txt-valid.js';
33
import { toMdUrls } from '../../helpers/to-md-urls.js';
4+
import { looksLikeMarkdown } from '../../helpers/detect-markdown.js';
45
import type { CheckContext, CheckResult, DiscoveredFile } from '../../types.js';
56

67
interface LinkMarkdownResult {
@@ -107,6 +108,27 @@ async function checkLlmsTxtLinksMarkdown(ctx: CheckContext): Promise<CheckResult
107108
};
108109
}
109110

111+
// For .txt URLs (e.g. llms-full.txt, llms-small.txt companion files),
112+
// content-sniff because they may contain markdown served as text/plain
113+
if (new URL(url).pathname.endsWith('.txt')) {
114+
try {
115+
const getResp = await ctx.http.fetch(url);
116+
if (getResp.ok) {
117+
const body = await getResp.text();
118+
if (looksLikeMarkdown(body)) {
119+
return {
120+
url,
121+
hasMarkdownExtension: false,
122+
servesMarkdown: true,
123+
status: response.status,
124+
};
125+
}
126+
}
127+
} catch {
128+
// Fall through to .md variant check
129+
}
130+
}
131+
110132
// Try .md variant candidates
111133
const candidates = toMdUrls(url);
112134
for (const mdUrl of candidates) {

test/unit/checks/llms-txt-links-markdown.test.ts

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,59 @@ Just text, no links here.
210210
expect(result.message).toContain('sampled links');
211211
});
212212

213+
it('passes for .txt companion files that contain markdown', async () => {
214+
server.use(
215+
http.head(
216+
'http://test.local/llms-full.txt',
217+
() =>
218+
new HttpResponse(null, {
219+
status: 200,
220+
headers: { 'content-type': 'text/plain' },
221+
}),
222+
),
223+
http.get(
224+
'http://test.local/llms-full.txt',
225+
() =>
226+
new HttpResponse(
227+
'# Full docs\n\n## API Reference\n\n- [Endpoint](http://test.local/api.md): API',
228+
{
229+
status: 200,
230+
headers: { 'content-type': 'text/plain' },
231+
},
232+
),
233+
),
234+
);
235+
236+
const content = `# Test\n> Summary\n## Links\n- [Full docs](http://test.local/llms-full.txt): Complete documentation\n`;
237+
const result = await check.run(makeCtx(content));
238+
expect(result.status).toBe('pass');
239+
});
240+
241+
it('fails for .txt files that do not contain markdown', async () => {
242+
server.use(
243+
http.head(
244+
'http://test.local/robots.txt',
245+
() =>
246+
new HttpResponse(null, {
247+
status: 200,
248+
headers: { 'content-type': 'text/plain' },
249+
}),
250+
),
251+
http.get(
252+
'http://test.local/robots.txt',
253+
() =>
254+
new HttpResponse('User-agent: *\nDisallow: /private/', {
255+
status: 200,
256+
headers: { 'content-type': 'text/plain' },
257+
}),
258+
),
259+
);
260+
261+
const content = `# Test\n> Summary\n## Links\n- [Robots](http://test.local/robots.txt): Robots file\n`;
262+
const result = await check.run(makeCtx(content));
263+
expect(result.status).toBe('fail');
264+
});
265+
213266
it('uses toMdUrls to find .md variants (handles trailing slash and .html)', async () => {
214267
server.use(
215268
http.head(

0 commit comments

Comments
 (0)