Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions src/checks/llms-txt/llms-txt-links-markdown.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { registerCheck } from '../registry.js';
import { extractMarkdownLinks } from './llms-txt-valid.js';
import { toMdUrls } from '../../helpers/to-md-urls.js';
import { looksLikeMarkdown } from '../../helpers/detect-markdown.js';
import type { CheckContext, CheckResult, DiscoveredFile } from '../../types.js';

interface LinkMarkdownResult {
Expand Down Expand Up @@ -107,6 +108,27 @@ async function checkLlmsTxtLinksMarkdown(ctx: CheckContext): Promise<CheckResult
};
}

// For .txt URLs (e.g. llms-full.txt, llms-small.txt companion files),
// content-sniff because they may contain markdown served as text/plain
if (new URL(url).pathname.endsWith('.txt')) {
try {
const getResp = await ctx.http.fetch(url);
if (getResp.ok) {
const body = await getResp.text();
if (looksLikeMarkdown(body)) {
return {
url,
hasMarkdownExtension: false,
servesMarkdown: true,
status: response.status,
};
}
}
} catch {
// Fall through to .md variant check
}
}

// Try .md variant candidates
const candidates = toMdUrls(url);
for (const mdUrl of candidates) {
Expand Down
53 changes: 53 additions & 0 deletions test/unit/checks/llms-txt-links-markdown.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,59 @@ Just text, no links here.
expect(result.message).toContain('sampled links');
});

it('passes for .txt companion files that contain markdown', async () => {
server.use(
http.head(
'http://test.local/llms-full.txt',
() =>
new HttpResponse(null, {
status: 200,
headers: { 'content-type': 'text/plain' },
}),
),
http.get(
'http://test.local/llms-full.txt',
() =>
new HttpResponse(
'# Full docs\n\n## API Reference\n\n- [Endpoint](http://test.local/api.md): API',
{
status: 200,
headers: { 'content-type': 'text/plain' },
},
),
),
);

const content = `# Test\n> Summary\n## Links\n- [Full docs](http://test.local/llms-full.txt): Complete documentation\n`;
const result = await check.run(makeCtx(content));
expect(result.status).toBe('pass');
});

it('fails for .txt files that do not contain markdown', async () => {
server.use(
http.head(
'http://test.local/robots.txt',
() =>
new HttpResponse(null, {
status: 200,
headers: { 'content-type': 'text/plain' },
}),
),
http.get(
'http://test.local/robots.txt',
() =>
new HttpResponse('User-agent: *\nDisallow: /private/', {
status: 200,
headers: { 'content-type': 'text/plain' },
}),
),
);

const content = `# Test\n> Summary\n## Links\n- [Robots](http://test.local/robots.txt): Robots file\n`;
const result = await check.run(makeCtx(content));
expect(result.status).toBe('fail');
});

it('uses toMdUrls to find .md variants (handles trailing slash and .html)', async () => {
server.use(
http.head(
Expand Down
Loading