diff --git a/src/__tests__/commands/scrape.test.ts b/src/__tests__/commands/scrape.test.ts index 28ddea2..1103775 100644 --- a/src/__tests__/commands/scrape.test.ts +++ b/src/__tests__/commands/scrape.test.ts @@ -3,7 +3,8 @@ */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import { executeScrape } from '../../commands/scrape'; +import * as fs from 'fs'; +import { executeScrape, handleScrapeCommand } from '../../commands/scrape'; import { getClient } from '../../utils/client'; import { initializeConfig } from '../../utils/config'; import { setupTest, teardownTest } from '../utils/mock-client'; @@ -387,6 +388,69 @@ describe('executeScrape', () => { }); }); + describe('Screenshot binary output', () => { + it('should download screenshot binary when output is an image file', async () => { + const pngBytes = new Uint8Array([0x89, 0x50, 0x4e, 0x47]); + const mockResponse = { + screenshot: 'https://cdn.firecrawl.dev/screenshot-abc.png', + metadata: { title: 'Test' }, + }; + mockClient.scrape.mockResolvedValue(mockResponse); + + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + arrayBuffer: () => Promise.resolve(pngBytes.buffer), + }); + vi.stubGlobal('fetch', mockFetch); + + vi.mock('fs', async () => { + const actual = await vi.importActual('fs'); + return { + ...actual, + existsSync: vi.fn().mockReturnValue(true), + writeFileSync: vi.fn(), + mkdirSync: vi.fn(), + }; + }); + + await handleScrapeCommand({ + url: 'https://example.com', + formats: ['screenshot'], + output: '/tmp/test-screenshot.png', + }); + + expect(mockFetch).toHaveBeenCalledWith( + 'https://cdn.firecrawl.dev/screenshot-abc.png' + ); + expect(fs.writeFileSync).toHaveBeenCalledWith( + '/tmp/test-screenshot.png', + expect.any(Buffer) + ); + }); + + it('should not fetch binary when output is not an image extension', async () => { + const mockResponse = { + screenshot: 'https://cdn.firecrawl.dev/screenshot-abc.png', + metadata: { title: 'Test' }, + }; + mockClient.scrape.mockResolvedValue(mockResponse); + + const mockFetch = vi.fn(); + vi.stubGlobal('fetch', mockFetch); + + vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + + await handleScrapeCommand({ + url: 'https://example.com', + formats: ['screenshot'], + output: '/tmp/result.txt', + }); + + // Should NOT call fetch — falls through to handleScrapeOutput + expect(mockFetch).not.toHaveBeenCalled(); + }); + }); + describe('Type safety', () => { it('should accept valid ScrapeFormat types', async () => { const formatList: Array<'markdown' | 'html' | 'rawHtml' | 'links'> = [ diff --git a/src/commands/scrape.ts b/src/commands/scrape.ts index 271acb6..2631161 100644 --- a/src/commands/scrape.ts +++ b/src/commands/scrape.ts @@ -183,6 +183,39 @@ export async function handleScrapeCommand( effectiveFormats.push('screenshot'); } + // When outputting a single screenshot to an image file, fetch the actual + // binary instead of writing the URL as text. The multi-URL code path + // (handleAllScrapeCommand) already does this, but the single-URL path + // was missing it — resulting in a text file containing the CDN URL. + if ( + options.output && + result.success && + result.data?.screenshot && + effectiveFormats.length === 1 && + effectiveFormats[0] === 'screenshot' && + /\.(png|jpg|jpeg|webp)$/i.test(options.output) + ) { + const fs = await import('fs'); + const path = await import('path'); + + const dir = path.dirname(options.output); + if (dir && !fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } + + const response = await fetch(result.data.screenshot); + if (response.ok) { + const buffer = Buffer.from(await response.arrayBuffer()); + fs.writeFileSync(options.output, buffer); + return; + } + + console.error( + `Failed to download screenshot: ${response.status} ${response.statusText}` + ); + process.exit(1); + } + handleScrapeOutput( result, effectiveFormats,