From 6dc05ba640ad254eca1993e2e192ca63a639359c Mon Sep 17 00:00:00 2001 From: Azhar Zaman Date: Sun, 15 Mar 2026 22:02:54 +0500 Subject: [PATCH 1/3] add metadata clear_cache support --- .prettierrc | 4 + package-lock.json | 2 + src/cache/disk.ts | 161 ++++++++++------- src/cli.ts | 280 +++++++++++++++-------------- src/crawler/queue.ts | 417 +++++++++++++++++++++++-------------------- src/parser/meta.ts | 58 ++++++ src/types.ts | 88 +++++---- 7 files changed, 577 insertions(+), 433 deletions(-) create mode 100644 .prettierrc create mode 100644 src/parser/meta.ts diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 0000000..7a44207 --- /dev/null +++ b/.prettierrc @@ -0,0 +1,4 @@ +{ + "singleQuote": true, + "trailingComma": false +} diff --git a/package-lock.json b/package-lock.json index da3f172..d3ba9b7 100644 --- a/package-lock.json +++ b/package-lock.json @@ -141,6 +141,7 @@ } ], "license": "MIT", + "peer": true, "engines": { "node": ">=18" }, @@ -163,6 +164,7 @@ } ], "license": "MIT", + "peer": true, "engines": { "node": ">=18" } diff --git a/src/cache/disk.ts b/src/cache/disk.ts index b757564..0ed7828 100644 --- a/src/cache/disk.ts +++ b/src/cache/disk.ts @@ -1,85 +1,108 @@ -import { createHash } from 'crypto'; -import { mkdir, readFile, writeFile, access } from 'fs/promises'; -import { join } from 'path'; -import { CacheEntry } from '../types.js'; +import { createHash } from "crypto"; +import { mkdir, readFile, writeFile, access, rm } from "fs/promises"; +import { join } from "path"; +import { CacheEntry, PageMetadata } from "../types.js"; export class DiskCache { - private cacheDir: string; - private cacheEnabled: boolean = true; + private cacheDir: string; + private cacheEnabled: boolean = true; - constructor(cacheDir: string = '.cache') { - this.cacheDir = cacheDir; - } + constructor(cacheDir: string = ".cache") { + this.cacheDir = cacheDir; + } - async init(): Promise { - try { - await mkdir(this.cacheDir, { recursive: true }); - } catch (error) { - // If we can't create the cache directory, disable caching - // Only show warning in debug mode or when explicitly requested - if (process.env.DEBUG || process.env.CRAWL_DEBUG) { - console.warn(`Cache directory creation failed: ${error instanceof Error ? error.message : 'Unknown error'}. Caching will be disabled.`); - } - this.cacheEnabled = false; - } + async init(): Promise { + try { + await mkdir(this.cacheDir, { recursive: true }); + } catch (error) { + // If we can't create the cache directory, disable caching + // Only show warning in debug mode or when explicitly requested + if (process.env.DEBUG || process.env.CRAWL_DEBUG) { + console.warn( + `Cache directory creation failed: ${error instanceof Error ? error.message : "Unknown error"}. Caching will be disabled.`, + ); + } + this.cacheEnabled = false; } + } - private getCacheKey(url: string): string { - return createHash('sha256').update(url).digest('hex'); - } + private getCacheKey(url: string): string { + return createHash("sha256").update(url).digest("hex"); + } - private getCachePath(url: string): string { - const key = this.getCacheKey(url); - return join(this.cacheDir, `${key}.json`); - } + private getCachePath(url: string): string { + const key = this.getCacheKey(url); + return join(this.cacheDir, `${key}.json`); + } + + async has(url: string): Promise { + if (!this.cacheEnabled) return false; - async has(url: string): Promise { - if (!this.cacheEnabled) return false; - - try { - await access(this.getCachePath(url)); - return true; - } catch { - return false; - } + try { + await access(this.getCachePath(url)); + return true; + } catch { + return false; } + } - async get(url: string): Promise { - if (!this.cacheEnabled) return null; - - try { - const path = this.getCachePath(url); - const data = await readFile(path, 'utf-8'); - return JSON.parse(data) as CacheEntry; - } catch { - return null; - } + async get(url: string): Promise { + if (!this.cacheEnabled) return null; + + try { + const path = this.getCachePath(url); + const data = await readFile(path, "utf-8"); + return JSON.parse(data) as CacheEntry; + } catch { + return null; } + } + + async put( + url: string, + markdown: string, + title?: string, + metadata?: PageMetadata, + ): Promise { + if (!this.cacheEnabled) return; - async put(url: string, markdown: string, title?: string): Promise { - if (!this.cacheEnabled) return; - - const entry: CacheEntry = { - url, - markdown, - timestamp: Date.now(), - title, - }; + const entry: CacheEntry = { + url, + markdown, + timestamp: Date.now(), + title, + metadata, + }; - const path = this.getCachePath(url); - try { - await writeFile(path, JSON.stringify(entry, null, 2)); - } catch (error) { - // Only show warning in debug mode - if (process.env.DEBUG || process.env.CRAWL_DEBUG) { - console.warn(`Failed to write to cache: ${error instanceof Error ? error.message : 'Unknown error'}`); - } - } + const path = this.getCachePath(url); + try { + await writeFile(path, JSON.stringify(entry, null, 2)); + } catch (error) { + // Only show warning in debug mode + if (process.env.DEBUG || process.env.CRAWL_DEBUG) { + console.warn( + `Failed to write to cache: ${error instanceof Error ? error.message : "Unknown error"}`, + ); + } } + } + + async getAge(url: string): Promise { + const entry = await this.get(url); + if (!entry) return null; + return Date.now() - entry.timestamp; + } - async getAge(url: string): Promise { - const entry = await this.get(url); - if (!entry) return null; - return Date.now() - entry.timestamp; + async clear(): Promise { + try { + await rm(this.cacheDir, { recursive: true, force: true }); + } catch (error) { + // Only show warning in debug mode + if (process.env.DEBUG || process.env.CRAWL_DEBUG) { + console.warn( + `Failed to clear cache: ${error instanceof Error ? error.message : "Unknown error"}`, + ); + } } -} \ No newline at end of file + } +} diff --git a/src/cli.ts b/src/cli.ts index 6f4408c..773e518 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -1,152 +1,158 @@ #!/usr/bin/env node -import yargs from 'yargs'; -import { hideBin } from 'yargs/helpers'; -import { fetch } from './index.js'; -import { CrawlOptions } from './types.js'; +import yargs from "yargs"; +import { hideBin } from "yargs/helpers"; +import { fetch } from "./index.js"; +import { CrawlOptions } from "./types.js"; const argv = yargs(hideBin(process.argv)) - .command( - '$0 ', - 'Fetch a URL and convert to Markdown', - (yargs) => { - return yargs - .positional('url', { - describe: 'The URL to fetch', - type: 'string', - demandOption: true, - }) - .option('depth', { - alias: 'd', - describe: 'Crawl depth (0 = single page)', - type: 'number', - default: 0, - }) - .option('concurrency', { - alias: 'c', - describe: 'Max concurrent requests', - type: 'number', - default: 3, - }) - .option('robots', { - describe: 'Respect robots.txt', - type: 'boolean', - default: true, - }) - .option('all-origins', { - describe: 'Allow cross-origin crawling', - type: 'boolean', - default: false, - }) - .option('user-agent', { - alias: 'u', - describe: 'Custom user agent', - type: 'string', - }) - .option('cache-dir', { - describe: 'Cache directory', - type: 'string', - default: '.cache', - }) - .option('timeout', { - alias: 't', - describe: 'Request timeout in milliseconds', - type: 'number', - default: 30000, - }) - .option('output', { - alias: 'o', - describe: 'Output format', - type: 'string', - choices: ['json', 'markdown', 'both'], - default: 'markdown', - }); - }, - async (argv) => { - try { - const crawlOptions: CrawlOptions = { - depth: argv.depth, - maxConcurrency: argv.concurrency, - respectRobots: argv.robots, - sameOriginOnly: !argv.allOrigins, - userAgent: argv.userAgent, - cacheDir: argv.cacheDir, - timeout: argv.timeout, - }; + .command( + "$0 ", + "Fetch a URL and convert to Markdown", + (yargs) => { + return yargs + .positional("url", { + describe: "The URL to fetch", + type: "string", + demandOption: true, + }) + .option("depth", { + alias: "d", + describe: "Crawl depth (0 = single page)", + type: "number", + default: 0, + }) + .option("concurrency", { + alias: "c", + describe: "Max concurrent requests", + type: "number", + default: 3, + }) + .option("robots", { + describe: "Respect robots.txt", + type: "boolean", + default: true, + }) + .option("all-origins", { + describe: "Allow cross-origin crawling", + type: "boolean", + default: false, + }) + .option("user-agent", { + alias: "u", + describe: "Custom user agent", + type: "string", + }) + .option("cache-dir", { + describe: "Cache directory", + type: "string", + default: ".cache", + }) + .option("timeout", { + alias: "t", + describe: "Request timeout in milliseconds", + type: "number", + default: 30000, + }) + .option("output", { + alias: "o", + describe: "Output format", + type: "string", + choices: ["json", "markdown", "both"], + default: "markdown", + }) + .option("include-metadata", { + alias: "meta", + describe: "Include metadata in the crawl results", + type: "boolean", + default: false, + }) + .option("clear-cache", { + describe: "Clear the cache directory", + type: "boolean", + default: false, + }); + }, + async (argv) => { + try { + const crawlOptions: CrawlOptions = { + depth: argv.depth, + maxConcurrency: argv.concurrency, + respectRobots: argv.robots, + sameOriginOnly: !argv.allOrigins, + userAgent: argv.userAgent, + cacheDir: argv.cacheDir, + timeout: argv.timeout, + includeMetadata: argv.includeMetadata, + clearCache: argv.clearCache, + }; - console.error(`Fetching ${argv.url}...`); - const results = await fetch(argv.url as string, crawlOptions); + console.error(`Fetching ${argv.url}...`); + const results = await fetch(argv.url as string, crawlOptions); - if (argv.output === 'json') { - console.log(JSON.stringify(results, null, 2)); - } else if (argv.output === 'markdown') { - results.forEach(result => { - // Always output markdown if we have it, even with errors - if (result.markdown) { - console.log(result.markdown); - if (results.length > 1) { - console.log('\n---\n'); // Separator between multiple pages - } - } - // Show error as warning if we also have content - if (result.error && result.markdown) { - console.error( - `Warning for ${result.url}: ${result.error}` - ); - } else if (result.error && !result.markdown) { - console.error( - `Error for ${result.url}: ${result.error}` - ); - } - }); - } else if (argv.output === 'both') { - results.forEach(result => { - console.log(`\n## URL: ${result.url}\n`); - if (result.markdown) { - console.log(result.markdown); - } - if (result.error) { - console.error( - `${result.markdown ? 'Warning' : 'Error'}: ${result.error}` - ); - } - }); - } - - // Exit with error only if we have errors without content - const hasFatalErrors = results.some(r => r.error && !r.markdown); - if (hasFatalErrors) { - process.exit(1); - } - } catch (error) { - console.error( - 'Error:', - error instanceof Error ? error.message : error - ); - process.exit(1); + if (argv.output === "json") { + console.log(JSON.stringify(results, null, 2)); + } else if (argv.output === "markdown") { + results.forEach((result) => { + // Always output markdown if we have it, even with errors + if (result.markdown) { + console.log(result.markdown); + if (results.length > 1) { + console.log("\n---\n"); // Separator between multiple pages + } + } + // Show error as warning if we also have content + if (result.error && result.markdown) { + console.error(`Warning for ${result.url}: ${result.error}`); + } else if (result.error && !result.markdown) { + console.error(`Error for ${result.url}: ${result.error}`); + } + }); + } else if (argv.output === "both") { + results.forEach((result) => { + console.log(`\n## URL: ${result.url}\n`); + if (result.markdown) { + console.log(result.markdown); + } + if (result.error) { + console.error( + `${result.markdown ? "Warning" : "Error"}: ${result.error}`, + ); } + }); } - ) - .command( - 'clear-cache', - 'Clear the cache directory', - (yargs) => { + + // Exit with error only if we have errors without content + const hasFatalErrors = results.some((r) => r.error && !r.markdown); + if (hasFatalErrors) { + process.exit(1); + } + } catch (error) { + console.error("Error:", error instanceof Error ? error.message : error); + process.exit(1); + } + }, + ) + .command( + "clear-cache", + "Clear the cache directory", + (yargs) => { return yargs.option('cache-dir', { describe: 'Cache directory', type: 'string', default: '.cache', - }); - }, - async (argv) => { - try { + }); + }, + async (argv) => { + try { const { rm } = await import('fs/promises'); - await rm(argv.cacheDir, { recursive: true, force: true }); - console.log(`Cache cleared: ${argv.cacheDir}`); - } catch (error) { + await rm(argv.cacheDir, { recursive: true, force: true }); + console.log(`Cache cleared: ${argv.cacheDir}`); + } catch (error) { console.error('Error clearing cache:', error); - process.exit(1); - } + process.exit(1); + } } - ) - .help() - .parse(); \ No newline at end of file + ) + .help() + .parse(); \ No newline at end of file diff --git a/src/crawler/queue.ts b/src/crawler/queue.ts index f331a78..837fc3a 100644 --- a/src/crawler/queue.ts +++ b/src/crawler/queue.ts @@ -1,214 +1,241 @@ -import pLimit from 'p-limit'; -import { CrawlOptions, CrawlResult } from '../types.js'; -import { normalizeUrl, isSameOrigin } from '../cache/normalize.js'; -import { DiskCache } from '../cache/disk.js'; -import { fetchStream, isValidUrl } from './fetch.js'; -import { parseNetscapeCookieFile, buildCookieHeaderForUrl, type CookieEntry } from './cookies.js'; -import { isAllowedByRobots, getCrawlDelay } from './robots.js'; -import { htmlToDom, extractLinks } from '../parser/dom.js'; -import { extractArticle } from '../parser/article.js'; -import { formatArticleMarkdown } from '../parser/markdown.js'; +import pLimit from "p-limit"; +import { CrawlOptions, CrawlResult, PageMetadata } from "../types.js"; +import { normalizeUrl, isSameOrigin } from "../cache/normalize.js"; +import { DiskCache } from "../cache/disk.js"; +import { fetchStream, isValidUrl } from "./fetch.js"; +import { + parseNetscapeCookieFile, + buildCookieHeaderForUrl, + type CookieEntry, +} from "./cookies.js"; +import { isAllowedByRobots, getCrawlDelay } from "./robots.js"; +import { htmlToDom, extractLinks } from "../parser/dom.js"; +import { extractArticle } from "../parser/article.js"; +import { formatArticleMarkdown } from "../parser/markdown.js"; +import { extractMetadata } from "../parser/meta.js"; export class CrawlQueue { - private visited = new Set(); - private queue: string[] = []; - private limit: ReturnType; - private cache: DiskCache; - private options: Required> & Pick; - private results: CrawlResult[] = []; - private cookieJar?: CookieEntry[]; - - constructor(options: CrawlOptions = {}) { - this.options = { - depth: options.depth ?? 0, - maxConcurrency: options.maxConcurrency ?? 3, - respectRobots: options.respectRobots ?? true, - sameOriginOnly: options.sameOriginOnly ?? true, - userAgent: options.userAgent ?? 'MCP/0.1', - cacheDir: options.cacheDir ?? '.cache', - timeout: options.timeout ?? 30000, - cookieHeader: options.cookieHeader, - cookiesFile: options.cookiesFile, - }; - - this.limit = pLimit(this.options.maxConcurrency); - this.cache = new DiskCache(this.options.cacheDir); - // Load cookies file once if provided - if (options.cookiesFile) { - try { - this.cookieJar = parseNetscapeCookieFile(options.cookiesFile); - } catch { - // Ignore cookie file errors and proceed without cookies - this.cookieJar = undefined; - } - } + private visited = new Set(); + private queue: string[] = []; + private limit: ReturnType; + private cache: DiskCache; + private options: Required< + Omit + > & + Pick; + private results: CrawlResult[] = []; + private cookieJar?: CookieEntry[]; + + constructor(options: CrawlOptions = {}) { + this.options = { + depth: options.depth ?? 0, + maxConcurrency: options.maxConcurrency ?? 3, + respectRobots: options.respectRobots ?? true, + sameOriginOnly: options.sameOriginOnly ?? true, + userAgent: options.userAgent ?? "MCP/0.1", + cacheDir: options.cacheDir ?? ".cache", + timeout: options.timeout ?? 30000, + cookieHeader: options.cookieHeader, + cookiesFile: options.cookiesFile, + includeMetadata: options.includeMetadata ?? false, + clearCache: options.clearCache ?? false, + }; + + this.limit = pLimit(this.options.maxConcurrency); + this.cache = new DiskCache(this.options.cacheDir); + // Load cookies file once if provided + if (options.cookiesFile) { + try { + this.cookieJar = parseNetscapeCookieFile(options.cookiesFile); + } catch { + // Ignore cookie file errors and proceed without cookies + this.cookieJar = undefined; + } } + } - async init(): Promise { - await this.cache.init(); + async init(): Promise { + await this.cache.init(); + if (this.options.clearCache) { + console.log("Clearing cache..."); + await this.cache.clear(); } + } - async crawl(startUrl: string): Promise { - const normalizedUrl = normalizeUrl(startUrl); + async crawl(startUrl: string): Promise { + const normalizedUrl = normalizeUrl(startUrl); - if (!isValidUrl(normalizedUrl)) { - throw new Error(`Invalid URL: ${startUrl}`); - } + if (!isValidUrl(normalizedUrl)) { + throw new Error(`Invalid URL: ${startUrl}`); + } - this.queue.push(normalizedUrl); - await this.processQueue(0); + this.queue.push(normalizedUrl); + await this.processQueue(0); - return this.results; - } + return this.results; + } - private async processQueue(currentDepth: number): Promise { - if (currentDepth > this.options.depth) return; + private async processQueue(currentDepth: number): Promise { + if (currentDepth > this.options.depth) return; - const urls = [...this.queue]; - this.queue = []; + const urls = [...this.queue]; + this.queue = []; - const tasks = urls.map(url => - this.limit(() => this.processUrl(url, currentDepth)) - ); + const tasks = urls.map((url) => + this.limit(() => this.processUrl(url, currentDepth)), + ); - await Promise.all(tasks); + await Promise.all(tasks); - if (this.queue.length > 0) { - await this.processQueue(currentDepth + 1); - } + if (this.queue.length > 0) { + await this.processQueue(currentDepth + 1); } + } + + private async processUrl(url: string, depth: number): Promise { + const normalizedUrl = normalizeUrl(url); + + if (this.visited.has(normalizedUrl)) return; + this.visited.add(normalizedUrl); + + try { + // Check cache first + const cached = await this.cache.get(normalizedUrl); + if (cached) { + console.log("Cache hit:", normalizedUrl); + this.results.push({ + url: normalizedUrl, + markdown: cached.markdown, + title: cached.title, + metadata: cached.metadata, + }); + return; + } + + // Check robots.txt + if (this.options.respectRobots) { + const allowed = await isAllowedByRobots( + normalizedUrl, + this.options.userAgent, + ); + if (!allowed) { + this.results.push({ + url: normalizedUrl, + markdown: "", + error: "Blocked by robots.txt", + }); + return; + } - private async processUrl(url: string, depth: number): Promise { - const normalizedUrl = normalizeUrl(url); - - if (this.visited.has(normalizedUrl)) return; - this.visited.add(normalizedUrl); - - try { - // Check cache first - const cached = await this.cache.get(normalizedUrl); - if (cached) { - this.results.push({ - url: normalizedUrl, - markdown: cached.markdown, - title: cached.title, - }); - return; - } - - // Check robots.txt - if (this.options.respectRobots) { - const allowed = await isAllowedByRobots( - normalizedUrl, - this.options.userAgent - ); - if (!allowed) { - this.results.push({ - url: normalizedUrl, - markdown: '', - error: 'Blocked by robots.txt', - }); - return; - } - - const delay = await getCrawlDelay( - normalizedUrl, - this.options.userAgent - ); - if (delay > 0) { - await new Promise(resolve => - setTimeout(resolve, delay * 1000) - ); - } - } - - // Fetch and parse - const cookieHeader = this.options.cookieHeader || (this.cookieJar ? buildCookieHeaderForUrl(normalizedUrl, this.cookieJar) : undefined); - const html = await fetchStream(normalizedUrl, { - userAgent: this.options.userAgent, - timeout: this.options.timeout, - cookieHeader, - }); - - // Check if we got valid HTML - if (!html || html.trim().length === 0) { - this.results.push({ - url: normalizedUrl, - markdown: '', - error: 'Empty response from server', - }); - return; - } - - const dom = htmlToDom(html, normalizedUrl); - const article = extractArticle(dom); - - if (!article) { - this.results.push({ - url: normalizedUrl, - markdown: '', - error: 'Failed to extract article content', - }); - return; - } - - // Check if we got meaningful content - // For SPAs and JavaScript-heavy sites, we still want to extract whatever we can - if (!article.content || article.content.trim().length < 50) { - // Try to provide some basic content instead of failing - const fallbackMarkdown = - `# ${article.title || 'Page Content'}\n\n` + - `*Note: This page appears to be JavaScript-rendered. Limited content extracted.*\n\n` + - (article.textContent - ? article.textContent.substring(0, 1000) + '...' - : 'No text content available'); - - this.results.push({ - url: normalizedUrl, - markdown: fallbackMarkdown, - title: article.title || normalizedUrl, - error: 'Limited content extracted (JavaScript-rendered page)', - }); - return; - } - - const markdown = formatArticleMarkdown(article); - - // Cache the result - await this.cache.put(normalizedUrl, markdown, article.title); - - // Extract links for further crawling - let links: string[] = []; - if (depth < this.options.depth) { - links = extractLinks(dom); - - if (this.options.sameOriginOnly) { - links = links.filter(link => - isSameOrigin(normalizedUrl, link) - ); - } - - // Add to queue - links.forEach(link => { - const normalized = normalizeUrl(link); - if (!this.visited.has(normalized)) { - this.queue.push(normalized); - } - }); - } - - this.results.push({ - url: normalizedUrl, - markdown, - title: article.title, - links: links.length > 0 ? links : undefined, - }); - } catch (error) { - this.results.push({ - url: normalizedUrl, - markdown: '', - error: error instanceof Error ? error.message : 'Unknown error', - }); + const delay = await getCrawlDelay( + normalizedUrl, + this.options.userAgent, + ); + if (delay > 0) { + await new Promise((resolve) => setTimeout(resolve, delay * 1000)); } + } + + // Fetch and parse + const cookieHeader = + this.options.cookieHeader || + (this.cookieJar + ? buildCookieHeaderForUrl(normalizedUrl, this.cookieJar) + : undefined); + + const html = await fetchStream(normalizedUrl, { + userAgent: this.options.userAgent, + timeout: this.options.timeout, + cookieHeader, + }); + + // Check if we got valid HTML + if (!html || html.trim().length === 0) { + this.results.push({ + url: normalizedUrl, + markdown: "", + error: "Empty response from server", + }); + return; + } + + const dom = htmlToDom(html, normalizedUrl); + + let metadata: PageMetadata | undefined; + + if (this.options.includeMetadata) { + metadata = extractMetadata(dom); + } + + const article = extractArticle(dom); + + if (!article) { + this.results.push({ + url: normalizedUrl, + markdown: "", + error: "Failed to extract article content", + metadata, + }); + return; + } + + // Check if we got meaningful content + // For SPAs and JavaScript-heavy sites, we still want to extract whatever we can + if (!article.content || article.content.trim().length < 50) { + // Try to provide some basic content instead of failing + const fallbackMarkdown = + `# ${article.title || "Page Content"}\n\n` + + `*Note: This page appears to be JavaScript-rendered. Limited content extracted.*\n\n` + + (article.textContent + ? article.textContent.substring(0, 1000) + "..." + : "No text content available"); + + this.results.push({ + url: normalizedUrl, + markdown: fallbackMarkdown, + title: article.title || normalizedUrl, + error: "Limited content extracted (JavaScript-rendered page)", + metadata, + }); + return; + } + + const markdown = formatArticleMarkdown(article); + + // Cache the result + await this.cache.put(normalizedUrl, markdown, article.title, metadata); + + // Extract links for further crawling + let links: string[] = []; + if (depth < this.options.depth) { + links = extractLinks(dom); + + if (this.options.sameOriginOnly) { + links = links.filter((link) => isSameOrigin(normalizedUrl, link)); + } + + // Add to queue + links.forEach((link) => { + const normalized = normalizeUrl(link); + if (!this.visited.has(normalized)) { + this.queue.push(normalized); + } + }); + } + + this.results.push({ + url: normalizedUrl, + markdown, + title: article.title, + links: links.length > 0 ? links : undefined, + metadata, + }); + } catch (error) { + this.results.push({ + url: normalizedUrl, + markdown: "", + error: error instanceof Error ? error.message : "Unknown error", + }); } + } } diff --git a/src/parser/meta.ts b/src/parser/meta.ts new file mode 100644 index 0000000..0862b28 --- /dev/null +++ b/src/parser/meta.ts @@ -0,0 +1,58 @@ +import { JSDOM } from "jsdom"; +import { PageMetadata } from "../types.js"; + +export function extractMetadata(dom: JSDOM): PageMetadata { + const document = dom.window.document; + + // Helper to get attribute content from various selectors + const getMeta = (selector: string): string | undefined => { + return ( + document.querySelector(selector)?.getAttribute("content")?.trim() || + undefined + ); + }; + + const metadata: PageMetadata = { + // Standard SEO + title: document.title || getMeta('meta[name="title"]'), + description: getMeta('meta[name="description"]'), + author: + getMeta('meta[name="author"]') || + getMeta('meta[property="article:author"]'), + keywords: getMeta('meta[name="keywords"]') + ?.split(",") + .map((k) => k.trim()) + .filter(Boolean), + canonical: + document.querySelector('link[rel="canonical"]')?.getAttribute("href") || + undefined, + language: + document.documentElement.lang || getMeta('meta[property="og:locale"]'), + + // OpenGraph + ogTitle: getMeta('meta[property="og:title"]'), + ogDescription: getMeta('meta[property="og:description"]'), + ogImage: getMeta('meta[property="og:image"]'), + ogType: getMeta('meta[property="og:type"]'), + + // Twitter Cards + twitterCard: getMeta('meta[name="twitter:card"]'), + twitterSite: getMeta('meta[name="twitter:site"]'), + twitterCreator: getMeta('meta[name="twitter:creator"]'), + + // Articles/Blogs + publishedTime: + getMeta('meta[property="article:published_time"]') || + getMeta('meta[itemprop="datePublished"]'), + modifiedTime: + getMeta('meta[property="article:modified_time"]') || + getMeta('meta[itemprop="dateModified"]'), + }; + + // Fallback: If title is still missing, try H1 + if (!metadata.title) { + metadata.title = document.querySelector("h1")?.textContent?.trim(); + } + + return metadata; +} diff --git a/src/types.ts b/src/types.ts index ace1b43..a8b655b 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,43 +1,67 @@ export interface CrawlOptions { - depth?: number; - maxConcurrency?: number; - respectRobots?: boolean; - sameOriginOnly?: boolean; - userAgent?: string; - cacheDir?: string; - timeout?: number; - // Optional cookies - // Raw Cookie header to send on all requests (advanced override) - cookieHeader?: string; - // Path to a Netscape cookie file (.txt) used to build Cookie headers per URL - cookiesFile?: string; + depth?: number; + maxConcurrency?: number; + respectRobots?: boolean; + sameOriginOnly?: boolean; + userAgent?: string; + cacheDir?: string; + timeout?: number; + // Optional cookies + // Raw Cookie header to send on all requests (advanced override) + cookieHeader?: string; + // Path to a Netscape cookie file (.txt) used to build Cookie headers per URL + cookiesFile?: string; + // Whether to include metadata in the crawl results + includeMetadata?: boolean; + // Whether to clear the cache directory before crawling + clearCache?: boolean; } export interface Article { - title: string; - content: string; - textContent: string; - length: number; - excerpt: string; - byline: string | null; - dir: string | null; - siteName: string | null; - lang: string | null; - publishedTime: string | null; - baseUrl?: string; + title: string; + content: string; + textContent: string; + length: number; + excerpt: string; + byline: string | null; + dir: string | null; + siteName: string | null; + lang: string | null; + publishedTime: string | null; + baseUrl?: string; } export interface CrawlResult { - url: string; - markdown: string; - title?: string; - links?: string[]; - error?: string; + url: string; + markdown: string; + title?: string; + links?: string[]; + error?: string; + metadata?: PageMetadata; } export interface CacheEntry { - url: string; - markdown: string; - timestamp: number; - title?: string; + url: string; + markdown: string; + timestamp: number; + title?: string; + metadata?: PageMetadata; +} + +export interface PageMetadata { + title?: string; + description?: string; + keywords?: string[]; + author?: string; + ogTitle?: string; + ogDescription?: string; + ogImage?: string; + ogType?: string; + twitterCard?: string; + twitterSite?: string; + twitterCreator?: string; + canonical?: string; + publishedTime?: string; + modifiedTime?: string; + language?: string; } From 7df5013744db5cab4ea13962261aee5f99d0b433 Mon Sep 17 00:00:00 2001 From: Azhar Zaman Date: Sun, 15 Mar 2026 22:22:48 +0500 Subject: [PATCH 2/3] tsc --- dist/cli.js | 111 +++++++++++++++++++++++++++--------------------- dist/cli.js.map | 2 +- 2 files changed, 63 insertions(+), 50 deletions(-) diff --git a/dist/cli.js b/dist/cli.js index 360c0cf..53cf4ab 100755 --- a/dist/cli.js +++ b/dist/cli.js @@ -1,59 +1,70 @@ #!/usr/bin/env node -import yargs from 'yargs'; -import { hideBin } from 'yargs/helpers'; -import { fetch } from './index.js'; +import yargs from "yargs"; +import { hideBin } from "yargs/helpers"; +import { fetch } from "./index.js"; const argv = yargs(hideBin(process.argv)) - .command('$0 ', 'Fetch a URL and convert to Markdown', (yargs) => { + .command("$0 ", "Fetch a URL and convert to Markdown", (yargs) => { return yargs - .positional('url', { - describe: 'The URL to fetch', - type: 'string', + .positional("url", { + describe: "The URL to fetch", + type: "string", demandOption: true, }) - .option('depth', { - alias: 'd', - describe: 'Crawl depth (0 = single page)', - type: 'number', + .option("depth", { + alias: "d", + describe: "Crawl depth (0 = single page)", + type: "number", default: 0, }) - .option('concurrency', { - alias: 'c', - describe: 'Max concurrent requests', - type: 'number', + .option("concurrency", { + alias: "c", + describe: "Max concurrent requests", + type: "number", default: 3, }) - .option('robots', { - describe: 'Respect robots.txt', - type: 'boolean', + .option("robots", { + describe: "Respect robots.txt", + type: "boolean", default: true, }) - .option('all-origins', { - describe: 'Allow cross-origin crawling', - type: 'boolean', + .option("all-origins", { + describe: "Allow cross-origin crawling", + type: "boolean", default: false, }) - .option('user-agent', { - alias: 'u', - describe: 'Custom user agent', - type: 'string', + .option("user-agent", { + alias: "u", + describe: "Custom user agent", + type: "string", }) - .option('cache-dir', { - describe: 'Cache directory', - type: 'string', - default: '.cache', + .option("cache-dir", { + describe: "Cache directory", + type: "string", + default: ".cache", }) - .option('timeout', { - alias: 't', - describe: 'Request timeout in milliseconds', - type: 'number', + .option("timeout", { + alias: "t", + describe: "Request timeout in milliseconds", + type: "number", default: 30000, }) - .option('output', { - alias: 'o', - describe: 'Output format', - type: 'string', - choices: ['json', 'markdown', 'both'], - default: 'markdown', + .option("output", { + alias: "o", + describe: "Output format", + type: "string", + choices: ["json", "markdown", "both"], + default: "markdown", + }) + .option("include-metadata", { + alias: "meta", + describe: "Include metadata in the crawl results", + type: "boolean", + default: false, + }) + .option("clear-cache", { + describe: "Clear the cache directory", + type: "boolean", + default: false, }); }, async (argv) => { try { @@ -65,19 +76,21 @@ const argv = yargs(hideBin(process.argv)) userAgent: argv.userAgent, cacheDir: argv.cacheDir, timeout: argv.timeout, + includeMetadata: argv.includeMetadata, + clearCache: argv.clearCache, }; console.error(`Fetching ${argv.url}...`); const results = await fetch(argv.url, crawlOptions); - if (argv.output === 'json') { + if (argv.output === "json") { console.log(JSON.stringify(results, null, 2)); } - else if (argv.output === 'markdown') { - results.forEach(result => { + else if (argv.output === "markdown") { + results.forEach((result) => { // Always output markdown if we have it, even with errors if (result.markdown) { console.log(result.markdown); if (results.length > 1) { - console.log('\n---\n'); // Separator between multiple pages + console.log("\n---\n"); // Separator between multiple pages } } // Show error as warning if we also have content @@ -89,29 +102,29 @@ const argv = yargs(hideBin(process.argv)) } }); } - else if (argv.output === 'both') { - results.forEach(result => { + else if (argv.output === "both") { + results.forEach((result) => { console.log(`\n## URL: ${result.url}\n`); if (result.markdown) { console.log(result.markdown); } if (result.error) { - console.error(`${result.markdown ? 'Warning' : 'Error'}: ${result.error}`); + console.error(`${result.markdown ? "Warning" : "Error"}: ${result.error}`); } }); } // Exit with error only if we have errors without content - const hasFatalErrors = results.some(r => r.error && !r.markdown); + const hasFatalErrors = results.some((r) => r.error && !r.markdown); if (hasFatalErrors) { process.exit(1); } } catch (error) { - console.error('Error:', error instanceof Error ? error.message : error); + console.error("Error:", error instanceof Error ? error.message : error); process.exit(1); } }) - .command('clear-cache', 'Clear the cache directory', (yargs) => { + .command("clear-cache", "Clear the cache directory", (yargs) => { return yargs.option('cache-dir', { describe: 'Cache directory', type: 'string', diff --git a/dist/cli.js.map b/dist/cli.js.map index 8675719..a2f1ccb 100644 --- a/dist/cli.js.map +++ b/dist/cli.js.map @@ -1 +1 @@ -{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AACxC,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AAGnC,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;KACpC,OAAO,CACJ,UAAU,EACV,qCAAqC,EACrC,CAAC,KAAK,EAAE,EAAE;IACN,OAAO,KAAK;SACP,UAAU,CAAC,KAAK,EAAE;QACf,QAAQ,EAAE,kBAAkB;QAC5B,IAAI,EAAE,QAAQ;QACd,YAAY,EAAE,IAAI;KACrB,CAAC;SACD,MAAM,CAAC,OAAO,EAAE;QACb,KAAK,EAAE,GAAG;QACV,QAAQ,EAAE,+BAA+B;QACzC,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE,CAAC;KACb,CAAC;SACD,MAAM,CAAC,aAAa,EAAE;QACnB,KAAK,EAAE,GAAG;QACV,QAAQ,EAAE,yBAAyB;QACnC,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE,CAAC;KACb,CAAC;SACD,MAAM,CAAC,QAAQ,EAAE;QACd,QAAQ,EAAE,oBAAoB;QAC9B,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,IAAI;KAChB,CAAC;SACD,MAAM,CAAC,aAAa,EAAE;QACnB,QAAQ,EAAE,6BAA6B;QACvC,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,KAAK;KACjB,CAAC;SACD,MAAM,CAAC,YAAY,EAAE;QAClB,KAAK,EAAE,GAAG;QACV,QAAQ,EAAE,mBAAmB;QAC7B,IAAI,EAAE,QAAQ;KACjB,CAAC;SACD,MAAM,CAAC,WAAW,EAAE;QACjB,QAAQ,EAAE,iBAAiB;QAC3B,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE,QAAQ;KACpB,CAAC;SACD,MAAM,CAAC,SAAS,EAAE;QACf,KAAK,EAAE,GAAG;QACV,QAAQ,EAAE,iCAAiC;QAC3C,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE,KAAK;KACjB,CAAC;SACD,MAAM,CAAC,QAAQ,EAAE;QACd,KAAK,EAAE,GAAG;QACV,QAAQ,EAAE,eAAe;QACzB,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE,CAAC,MAAM,EAAE,UAAU,EAAE,MAAM,CAAC;QACrC,OAAO,EAAE,UAAU;KACtB,CAAC,CAAC;AACX,CAAC,EACD,KAAK,EAAE,IAAI,EAAE,EAAE;IACX,IAAI,CAAC;QACD,MAAM,YAAY,GAAiB;YAC/B,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,cAAc,EAAE,IAAI,CAAC,WAAW;YAChC,aAAa,EAAE,IAAI,CAAC,MAAM;YAC1B,cAAc,EAAE,CAAC,IAAI,CAAC,UAAU;YAChC,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,OAAO,EAAE,IAAI,CAAC,OAAO;SACxB,CAAC;QAEF,OAAO,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;QACzC,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,GAAa,EAAE,YAAY,CAAC,CAAC;QAE9D,IAAI,IAAI,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YACzB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAClD,CAAC;aAAM,IAAI,IAAI,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;YACpC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE;gBACrB,yDAAyD;gBACzD,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;oBAClB,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;oBAC7B,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;wBACrB,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,mCAAmC;oBAC/D,CAAC;gBACL,CAAC;gBACD,gDAAgD;gBAChD,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;oBAClC,OAAO,CAAC,KAAK,CACT,eAAe,MAAM,CAAC,GAAG,KAAK,MAAM,CAAC,KAAK,EAAE,CAC/C,CAAC;gBACN,CAAC;qBAAM,IAAI,MAAM,CAAC,KAAK,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;oBAC1C,OAAO,CAAC,KAAK,CACT,aAAa,MAAM,CAAC,GAAG,KAAK,MAAM,CAAC,KAAK,EAAE,CAC7C,CAAC;gBACN,CAAC;YACL,CAAC,CAAC,CAAC;QACP,CAAC;aAAM,IAAI,IAAI,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YAChC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE;gBACrB,OAAO,CAAC,GAAG,CAAC,aAAa,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC;gBACzC,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;oBAClB,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;gBACjC,CAAC;gBACD,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;oBACf,OAAO,CAAC,KAAK,CACT,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,KAAK,MAAM,CAAC,KAAK,EAAE,CAC9D,CAAC;gBACN,CAAC;YACL,CAAC,CAAC,CAAC;QACP,CAAC;QAED,yDAAyD;QACzD,MAAM,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QACjE,IAAI,cAAc,EAAE,CAAC;YACjB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CACT,QAAQ,EACR,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CACjD,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;AACL,CAAC,CACJ;KACA,OAAO,CACJ,aAAa,EACb,2BAA2B,EAC3B,CAAC,KAAK,EAAE,EAAE;IACN,OAAO,KAAK,CAAC,MAAM,CAAC,WAAW,EAAE;QAC7B,QAAQ,EAAE,iBAAiB;QAC3B,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE,QAAQ;KACpB,CAAC,CAAC;AACP,CAAC,EACD,KAAK,EAAE,IAAI,EAAE,EAAE;IACX,IAAI,CAAC;QACD,MAAM,EAAE,EAAE,EAAE,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC;QAC3C,MAAM,EAAE,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1D,OAAO,CAAC,GAAG,CAAC,kBAAkB,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;IACnD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CAAC,uBAAuB,EAAE,KAAK,CAAC,CAAC;QAC9C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;AACL,CAAC,CACJ;KACA,IAAI,EAAE;KACN,KAAK,EAAE,CAAC"} \ No newline at end of file +{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AACxC,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AAGnC,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;KACtC,OAAO,CACN,UAAU,EACV,qCAAqC,EACrC,CAAC,KAAK,EAAE,EAAE;IACR,OAAO,KAAK;SACT,UAAU,CAAC,KAAK,EAAE;QACjB,QAAQ,EAAE,kBAAkB;QAC5B,IAAI,EAAE,QAAQ;QACd,YAAY,EAAE,IAAI;KACnB,CAAC;SACD,MAAM,CAAC,OAAO,EAAE;QACf,KAAK,EAAE,GAAG;QACV,QAAQ,EAAE,+BAA+B;QACzC,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE,CAAC;KACX,CAAC;SACD,MAAM,CAAC,aAAa,EAAE;QACrB,KAAK,EAAE,GAAG;QACV,QAAQ,EAAE,yBAAyB;QACnC,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE,CAAC;KACX,CAAC;SACD,MAAM,CAAC,QAAQ,EAAE;QAChB,QAAQ,EAAE,oBAAoB;QAC9B,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,IAAI;KACd,CAAC;SACD,MAAM,CAAC,aAAa,EAAE;QACrB,QAAQ,EAAE,6BAA6B;QACvC,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,KAAK;KACf,CAAC;SACD,MAAM,CAAC,YAAY,EAAE;QACpB,KAAK,EAAE,GAAG;QACV,QAAQ,EAAE,mBAAmB;QAC7B,IAAI,EAAE,QAAQ;KACf,CAAC;SACD,MAAM,CAAC,WAAW,EAAE;QACnB,QAAQ,EAAE,iBAAiB;QAC3B,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE,QAAQ;KAClB,CAAC;SACD,MAAM,CAAC,SAAS,EAAE;QACjB,KAAK,EAAE,GAAG;QACV,QAAQ,EAAE,iCAAiC;QAC3C,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE,KAAK;KACf,CAAC;SACD,MAAM,CAAC,QAAQ,EAAE;QAChB,KAAK,EAAE,GAAG;QACV,QAAQ,EAAE,eAAe;QACzB,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE,CAAC,MAAM,EAAE,UAAU,EAAE,MAAM,CAAC;QACrC,OAAO,EAAE,UAAU;KACpB,CAAC;SACD,MAAM,CAAC,kBAAkB,EAAE;QAC1B,KAAK,EAAE,MAAM;QACb,QAAQ,EAAE,uCAAuC;QACjD,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,KAAK;KACf,CAAC;SACD,MAAM,CAAC,aAAa,EAAE;QACrB,QAAQ,EAAE,2BAA2B;QACrC,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,KAAK;KACf,CAAC,CAAC;AACP,CAAC,EACD,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,IAAI,CAAC;QACH,MAAM,YAAY,GAAiB;YACjC,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,cAAc,EAAE,IAAI,CAAC,WAAW;YAChC,aAAa,EAAE,IAAI,CAAC,MAAM;YAC1B,cAAc,EAAE,CAAC,IAAI,CAAC,UAAU;YAChC,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,UAAU,EAAE,IAAI,CAAC,UAAU;SAC5B,CAAC;QAEF,OAAO,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;QACzC,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,GAAa,EAAE,YAAY,CAAC,CAAC;QAE9D,IAAI,IAAI,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YAC3B,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAChD,CAAC;aAAM,IAAI,IAAI,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;YACtC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,EAAE;gBACzB,yDAAyD;gBACzD,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;oBACpB,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;oBAC7B,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;wBACvB,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,mCAAmC;oBAC7D,CAAC;gBACH,CAAC;gBACD,gDAAgD;gBAChD,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;oBACpC,OAAO,CAAC,KAAK,CAAC,eAAe,MAAM,CAAC,GAAG,KAAK,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;gBAC9D,CAAC;qBAAM,IAAI,MAAM,CAAC,KAAK,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;oBAC5C,OAAO,CAAC,KAAK,CAAC,aAAa,MAAM,CAAC,GAAG,KAAK,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;gBAC5D,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;aAAM,IAAI,IAAI,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YAClC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,EAAE;gBACzB,OAAO,CAAC,GAAG,CAAC,aAAa,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC;gBACzC,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;oBACpB,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;gBAC/B,CAAC;gBACD,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;oBACjB,OAAO,CAAC,KAAK,CACX,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,KAAK,MAAM,CAAC,KAAK,EAAE,CAC5D,CAAC;gBACJ,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;QAED,yDAAyD;QACzD,MAAM,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QACnE,IAAI,cAAc,EAAE,CAAC;YACnB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QACxE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CACF;KACA,OAAO,CACN,aAAa,EACb,2BAA2B,EAC3B,CAAC,KAAK,EAAE,EAAE;IACF,OAAO,KAAK,CAAC,MAAM,CAAC,WAAW,EAAE;QAC7B,QAAQ,EAAE,iBAAiB;QAC3B,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE,QAAQ;KAC1B,CAAC,CAAC;AACL,CAAC,EACD,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,IAAI,CAAC;QACK,MAAM,EAAE,EAAE,EAAE,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC;QACnD,MAAM,EAAE,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1D,OAAO,CAAC,GAAG,CAAC,kBAAkB,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;IACjD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACP,OAAO,CAAC,KAAK,CAAC,uBAAuB,EAAE,KAAK,CAAC,CAAC;QACtD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACC,CAAC,CACN;KACA,IAAI,EAAE;KACN,KAAK,EAAE,CAAC"} \ No newline at end of file From abfedca1632883320a07d17ad0d15e7110a96c69 Mon Sep 17 00:00:00 2001 From: Azhar Zaman Date: Sun, 15 Mar 2026 22:55:08 +0500 Subject: [PATCH 3/3] prepare script --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index f08ed68..2efde5f 100644 --- a/package.json +++ b/package.json @@ -17,6 +17,7 @@ "build": "tsc", "dev": "tsx src/cli.ts", "prepublishOnly": "npm run build", + "prepare": "npm run build", "typecheck": "tsc --noEmit" }, "keywords": [