From 015eae834c284c427dd90e62ad9e3d5cf8aa07c9 Mon Sep 17 00:00:00 2001 From: qduc Date: Mon, 15 Dec 2025 18:21:05 +0700 Subject: [PATCH 1/8] feat: Implement browser fallback for SPAs in webFetch tool - Added support for headless browser fetching using @sparticuz/chromium and puppeteer-core. - Introduced BrowserService to handle fetching logic based on environment (Electron vs Server). - Created PuppeteerProvider and ElectronProvider for managing browser instances and fetching content. - Enhanced webFetch.js to first attempt fetching with JSDOM, falling back to browser if content is insufficient or requires JavaScript. - Updated package.json to include new dependencies for browser functionality. - Improved SPA detection logic to trigger browser fallback based on content length and specific error messages. --- backend/Dockerfile | 24 +- backend/package-lock.json | 632 ++++++++++++++++++- backend/package.json | 6 +- backend/src/lib/browser/BrowserService.js | 23 + backend/src/lib/browser/ElectronProvider.js | 64 ++ backend/src/lib/browser/PuppeteerProvider.js | 99 +++ backend/src/lib/tools/webFetch.js | 240 ++++--- docs/webfetch-spa-support-plan.md | 438 +++---------- 8 files changed, 1030 insertions(+), 496 deletions(-) create mode 100644 backend/src/lib/browser/BrowserService.js create mode 100644 backend/src/lib/browser/ElectronProvider.js create mode 100644 backend/src/lib/browser/PuppeteerProvider.js diff --git a/backend/Dockerfile b/backend/Dockerfile index e86b2670..edfbe46b 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -4,6 +4,7 @@ WORKDIR /app COPY package*.json ./ RUN chown -R node:node /app USER node +ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true RUN npm ci ARG NODE_IMAGE @@ -12,6 +13,7 @@ WORKDIR /app COPY package*.json ./ RUN chown -R node:node /app USER node +ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true RUN npm ci --omit=dev ARG NODE_IMAGE @@ -19,7 +21,16 @@ FROM ${NODE_IMAGE} AS dev WORKDIR /app ENV NODE_ENV=development COPY --chown=node:node package*.json ./ -RUN apk add --no-cache su-exec sqlite-libs +RUN apk add --no-cache su-exec sqlite-libs \ + chromium \ + nss \ + freetype \ + harfbuzz \ + ca-certificates \ + ttf-freefont + +ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true +ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium-browser COPY --from=deps --chown=node:node /app/node_modules ./node_modules COPY --chown=node:node . . RUN chmod +x entrypoint.sh @@ -38,7 +49,16 @@ ENV PORT=3001 ENV INSTALL_ON_START=0 COPY --from=prod-deps --chown=node:node /app/node_modules ./node_modules COPY --chown=node:node . . -RUN apk add --no-cache su-exec +RUN apk add --no-cache su-exec \ + chromium \ + nss \ + freetype \ + harfbuzz \ + ca-certificates \ + ttf-freefont + +ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true +ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium-browser RUN chmod +x entrypoint.sh RUN mkdir -p logs && chown -R node:node logs USER node diff --git a/backend/package-lock.json b/backend/package-lock.json index 0e31c0e0..9c699bff 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -11,6 +11,7 @@ "dependencies": { "@blackglory/better-sqlite3-migrations": "^0.1.20", "@mozilla/readability": "^0.6.0", + "@sparticuz/chromium": "^143.0.0", "bcryptjs": "^3.0.2", "better-sqlite3": "^12.4.6", "cors": "^2.8.5", @@ -26,6 +27,7 @@ "p-limit": "^4.0.0", "pino": "^9.3.2", "pino-roll": "^3.1.0", + "puppeteer-core": "^24.33.0", "turndown": "^7.2.1", "uuid": "^9.0.1", "zod": "^3.23.8" @@ -1550,6 +1552,64 @@ "url": "https://opencollective.com/pkgr" } }, + "node_modules/@puppeteer/browsers": { + "version": "2.11.0", + "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.11.0.tgz", + "integrity": "sha512-n6oQX6mYkG8TRPuPXmbPidkUbsSRalhmaaVAQxvH1IkQy63cwsH+kOjB3e4cpCDHg0aSvsiX9bQ4s2VB6mGWUQ==", + "license": "Apache-2.0", + "dependencies": { + "debug": "^4.4.3", + "extract-zip": "^2.0.1", + "progress": "^2.0.3", + "proxy-agent": "^6.5.0", + "semver": "^7.7.3", + "tar-fs": "^3.1.1", + "yargs": "^17.7.2" + }, + "bin": { + "browsers": "lib/cjs/main-cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@puppeteer/browsers/node_modules/semver": { + "version": "7.7.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz", + "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/@puppeteer/browsers/node_modules/tar-fs": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz", + "integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==", + "license": "MIT", + "dependencies": { + "pump": "^3.0.0", + "tar-stream": "^3.1.5" + }, + "optionalDependencies": { + "bare-fs": "^4.0.1", + "bare-path": "^3.0.0" + } + }, + "node_modules/@puppeteer/browsers/node_modules/tar-stream": { + "version": "3.1.7", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz", + "integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==", + "license": "MIT", + "dependencies": { + "b4a": "^1.6.4", + "fast-fifo": "^1.2.0", + "streamx": "^2.15.0" + } + }, "node_modules/@sinclair/typebox": { "version": "0.34.41", "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.34.41.tgz", @@ -1577,6 +1637,50 @@ "@sinonjs/commons": "^3.0.1" } }, + "node_modules/@sparticuz/chromium": { + "version": "143.0.0", + "resolved": "https://registry.npmjs.org/@sparticuz/chromium/-/chromium-143.0.0.tgz", + "integrity": "sha512-/vvplFwu1yDNt/Q1uwXQUwxmfZutU9K+WtzSYwptLWV8IeSdSF6vwW9hW11fZA8Gl9+2mytAGUe6/otgd2VeXA==", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.15.11", + "tar-fs": "^3.1.1" + }, + "engines": { + "node": ">=20.11.0" + } + }, + "node_modules/@sparticuz/chromium/node_modules/tar-fs": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz", + "integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==", + "license": "MIT", + "dependencies": { + "pump": "^3.0.0", + "tar-stream": "^3.1.5" + }, + "optionalDependencies": { + "bare-fs": "^4.0.1", + "bare-path": "^3.0.0" + } + }, + "node_modules/@sparticuz/chromium/node_modules/tar-stream": { + "version": "3.1.7", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz", + "integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==", + "license": "MIT", + "dependencies": { + "b4a": "^1.6.4", + "fast-fifo": "^1.2.0", + "streamx": "^2.15.0" + } + }, + "node_modules/@tootallnate/quickjs-emscripten": { + "version": "0.23.0", + "resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz", + "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==", + "license": "MIT" + }, "node_modules/@tybys/wasm-util": { "version": "0.10.1", "resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.10.1.tgz", @@ -1678,7 +1782,7 @@ "version": "24.10.1", "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.1.tgz", "integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==", - "dev": true, + "devOptional": true, "license": "MIT", "dependencies": { "undici-types": "~7.16.0" @@ -1708,6 +1812,16 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/yauzl": { + "version": "2.10.3", + "resolved": "https://registry.npmjs.org/@types/yauzl/-/yauzl-2.10.3.tgz", + "integrity": "sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==", + "license": "MIT", + "optional": true, + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@ungap/structured-clone": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.3.0.tgz", @@ -2092,7 +2206,6 @@ "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, "license": "MIT", "dependencies": { "color-convert": "^2.0.1" @@ -2137,6 +2250,18 @@ "dev": true, "license": "MIT" }, + "node_modules/ast-types": { + "version": "0.13.4", + "resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.13.4.tgz", + "integrity": "sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==", + "license": "MIT", + "dependencies": { + "tslib": "^2.0.1" + }, + "engines": { + "node": ">=4" + } + }, "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", @@ -2152,6 +2277,20 @@ "node": ">=8.0.0" } }, + "node_modules/b4a": { + "version": "1.7.3", + "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.7.3.tgz", + "integrity": "sha512-5Q2mfq2WfGuFp3uS//0s6baOJLMoVduPYVeNmDYxu5OUA1/cBfvr2RIS7vi62LdNj/urk1hfmj867I3qt6uZ7Q==", + "license": "Apache-2.0", + "peerDependencies": { + "react-native-b4a": "*" + }, + "peerDependenciesMeta": { + "react-native-b4a": { + "optional": true + } + } + }, "node_modules/babel-jest": { "version": "30.2.0", "resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-30.2.0.tgz", @@ -2258,6 +2397,97 @@ "dev": true, "license": "MIT" }, + "node_modules/bare-events": { + "version": "2.8.2", + "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.8.2.tgz", + "integrity": "sha512-riJjyv1/mHLIPX4RwiK+oW9/4c3TEUeORHKefKAKnZ5kyslbN+HXowtbaVEqt4IMUB7OXlfixcs6gsFeo/jhiQ==", + "license": "Apache-2.0", + "peerDependencies": { + "bare-abort-controller": "*" + }, + "peerDependenciesMeta": { + "bare-abort-controller": { + "optional": true + } + } + }, + "node_modules/bare-fs": { + "version": "4.5.2", + "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz", + "integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==", + "license": "Apache-2.0", + "optional": true, + "dependencies": { + "bare-events": "^2.5.4", + "bare-path": "^3.0.0", + "bare-stream": "^2.6.4", + "bare-url": "^2.2.2", + "fast-fifo": "^1.3.2" + }, + "engines": { + "bare": ">=1.16.0" + }, + "peerDependencies": { + "bare-buffer": "*" + }, + "peerDependenciesMeta": { + "bare-buffer": { + "optional": true + } + } + }, + "node_modules/bare-os": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz", + "integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==", + "license": "Apache-2.0", + "optional": true, + "engines": { + "bare": ">=1.14.0" + } + }, + "node_modules/bare-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz", + "integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==", + "license": "Apache-2.0", + "optional": true, + "dependencies": { + "bare-os": "^3.0.1" + } + }, + "node_modules/bare-stream": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz", + "integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==", + "license": "Apache-2.0", + "optional": true, + "dependencies": { + "streamx": "^2.21.0" + }, + "peerDependencies": { + "bare-buffer": "*", + "bare-events": "*" + }, + "peerDependenciesMeta": { + "bare-buffer": { + "optional": true + }, + "bare-events": { + "optional": true + } + } + }, + "node_modules/bare-url": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz", + "integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==", + "license": "Apache-2.0", + "optional": true, + "dependencies": { + "bare-path": "^3.0.0" + } + }, "node_modules/base64-js": { "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", @@ -2288,6 +2518,15 @@ "baseline-browser-mapping": "dist/cli.js" } }, + "node_modules/basic-ftp": { + "version": "5.0.5", + "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.0.5.tgz", + "integrity": "sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/bcryptjs": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/bcryptjs/-/bcryptjs-3.0.3.tgz", @@ -2460,6 +2699,15 @@ "ieee754": "^1.1.13" } }, + "node_modules/buffer-crc32": { + "version": "0.2.13", + "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", + "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==", + "license": "MIT", + "engines": { + "node": "*" + } + }, "node_modules/buffer-equal-constant-time": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", @@ -2633,6 +2881,19 @@ "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==", "license": "ISC" }, + "node_modules/chromium-bidi": { + "version": "11.0.0", + "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-11.0.0.tgz", + "integrity": "sha512-cM3DI+OOb89T3wO8cpPSro80Q9eKYJ7hGVXoGS3GkDPxnYSqiv+6xwpIf6XERyJ9Tdsl09hmNmY94BkgZdVekw==", + "license": "Apache-2.0", + "dependencies": { + "mitt": "^3.0.1", + "zod": "^3.24.1" + }, + "peerDependencies": { + "devtools-protocol": "*" + } + }, "node_modules/ci-info": { "version": "4.3.1", "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-4.3.1.tgz", @@ -2660,7 +2921,6 @@ "version": "8.0.1", "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", - "dev": true, "license": "ISC", "dependencies": { "string-width": "^4.2.0", @@ -2675,7 +2935,6 @@ "version": "5.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "dev": true, "license": "MIT", "engines": { "node": ">=8" @@ -2685,14 +2944,12 @@ "version": "8.0.0", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true, "license": "MIT" }, "node_modules/cliui/node_modules/string-width": { "version": "4.2.3", "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dev": true, "license": "MIT", "dependencies": { "emoji-regex": "^8.0.0", @@ -2707,7 +2964,6 @@ "version": "6.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, "license": "MIT", "dependencies": { "ansi-regex": "^5.0.1" @@ -2720,7 +2976,6 @@ "version": "7.0.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", - "dev": true, "license": "MIT", "dependencies": { "ansi-styles": "^4.0.0", @@ -2756,7 +3011,6 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dev": true, "license": "MIT", "dependencies": { "color-name": "~1.1.4" @@ -2769,7 +3023,6 @@ "version": "1.1.4", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true, "license": "MIT" }, "node_modules/colorette": { @@ -3045,6 +3298,20 @@ "node": ">=0.10.0" } }, + "node_modules/degenerator": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz", + "integrity": "sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==", + "license": "MIT", + "dependencies": { + "ast-types": "^0.13.4", + "escodegen": "^2.1.0", + "esprima": "^4.0.1" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/delayed-stream": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", @@ -3082,6 +3349,12 @@ "node": ">=8" } }, + "node_modules/devtools-protocol": { + "version": "0.0.1534754", + "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1534754.tgz", + "integrity": "sha512-26T91cV5dbOYnXdJi5qQHoTtUoNEqwkHcAyu/IKtjIAxiEqPMrDiRkDOPWVsGfNZGmlQVHQbZRSjD8sxagWVsQ==", + "license": "BSD-3-Clause" + }, "node_modules/dezalgo": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/dezalgo/-/dezalgo-1.0.4.tgz", @@ -3257,7 +3530,6 @@ "version": "3.2.0", "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", - "dev": true, "license": "MIT", "engines": { "node": ">=6" @@ -3282,6 +3554,27 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/escodegen": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.1.0.tgz", + "integrity": "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==", + "license": "BSD-2-Clause", + "dependencies": { + "esprima": "^4.0.1", + "estraverse": "^5.2.0", + "esutils": "^2.0.2" + }, + "bin": { + "escodegen": "bin/escodegen.js", + "esgenerate": "bin/esgenerate.js" + }, + "engines": { + "node": ">=6.0" + }, + "optionalDependencies": { + "source-map": "~0.6.1" + } + }, "node_modules/eslint": { "version": "9.39.1", "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.1.tgz", @@ -3477,7 +3770,6 @@ "version": "4.0.1", "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", - "dev": true, "license": "BSD-2-Clause", "bin": { "esparse": "bin/esparse.js", @@ -3517,7 +3809,6 @@ "version": "5.3.0", "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", - "dev": true, "license": "BSD-2-Clause", "engines": { "node": ">=4.0" @@ -3527,7 +3818,6 @@ "version": "2.0.3", "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", - "dev": true, "license": "BSD-2-Clause", "engines": { "node": ">=0.10.0" @@ -3562,6 +3852,15 @@ "node": ">=0.8.x" } }, + "node_modules/events-universal": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/events-universal/-/events-universal-1.0.1.tgz", + "integrity": "sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==", + "license": "Apache-2.0", + "dependencies": { + "bare-events": "^2.7.0" + } + }, "node_modules/execa": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/execa/-/execa-5.1.1.tgz", @@ -3696,6 +3995,41 @@ "integrity": "sha512-C55Cr/dQWQHdwuMTF+ySNMYqydVclmKdgFHoC/8gTu5Zoe1Nrx6jgArwfZ+7jKU78VjDfhrkGAJ38ucf0lozeQ==", "license": "MIT" }, + "node_modules/extract-zip": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz", + "integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==", + "license": "BSD-2-Clause", + "dependencies": { + "debug": "^4.1.1", + "get-stream": "^5.1.0", + "yauzl": "^2.10.0" + }, + "bin": { + "extract-zip": "cli.js" + }, + "engines": { + "node": ">= 10.17.0" + }, + "optionalDependencies": { + "@types/yauzl": "^2.9.1" + } + }, + "node_modules/extract-zip/node_modules/get-stream": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz", + "integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==", + "license": "MIT", + "dependencies": { + "pump": "^3.0.0" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/fast-copy": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/fast-copy/-/fast-copy-3.0.2.tgz", @@ -3710,6 +4044,12 @@ "dev": true, "license": "MIT" }, + "node_modules/fast-fifo": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz", + "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==", + "license": "MIT" + }, "node_modules/fast-json-stable-stringify": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", @@ -3741,6 +4081,15 @@ "bser": "2.1.1" } }, + "node_modules/fd-slicer": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz", + "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==", + "license": "MIT", + "dependencies": { + "pend": "~1.2.0" + } + }, "node_modules/fetch-blob": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz", @@ -3851,6 +4200,26 @@ "dev": true, "license": "ISC" }, + "node_modules/follow-redirects": { + "version": "1.15.11", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", + "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, "node_modules/foreground-child": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz", @@ -4004,7 +4373,6 @@ "version": "2.0.5", "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", - "dev": true, "license": "ISC", "engines": { "node": "6.* || 8.* || >= 10.*" @@ -4070,6 +4438,29 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/get-uri": { + "version": "6.0.5", + "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.5.tgz", + "integrity": "sha512-b1O07XYq8eRuVzBNgJLstU6FYc1tS6wnMtF1I1D9lE8LxZSOGZ7LhxN54yPP6mGw5f2CkXY2BQUL9Fx41qvcIg==", + "license": "MIT", + "dependencies": { + "basic-ftp": "^5.0.2", + "data-uri-to-buffer": "^6.0.2", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/get-uri/node_modules/data-uri-to-buffer": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz", + "integrity": "sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, "node_modules/github-from-package": { "version": "0.0.0", "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", @@ -4491,7 +4882,6 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "dev": true, "license": "MIT", "engines": { "node": ">=8" @@ -5853,6 +6243,12 @@ "node": ">=16 || 14 >=14.17" } }, + "node_modules/mitt": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz", + "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==", + "license": "MIT" + }, "node_modules/mkdirp": { "version": "0.5.6", "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz", @@ -5994,6 +6390,15 @@ "node": ">= 0.6" } }, + "node_modules/netmask": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/netmask/-/netmask-2.0.2.tgz", + "integrity": "sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==", + "license": "MIT", + "engines": { + "node": ">= 0.4.0" + } + }, "node_modules/node-abi": { "version": "3.85.0", "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.85.0.tgz", @@ -6326,6 +6731,38 @@ "node": ">=6" } }, + "node_modules/pac-proxy-agent": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz", + "integrity": "sha512-TEB8ESquiLMc0lV8vcd5Ql/JAKAoyzHFXaStwjkzpOpC5Yv+pIzLfHvjTSdf3vpa2bMiUQrg9i6276yn8666aA==", + "license": "MIT", + "dependencies": { + "@tootallnate/quickjs-emscripten": "^0.23.0", + "agent-base": "^7.1.2", + "debug": "^4.3.4", + "get-uri": "^6.0.1", + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.6", + "pac-resolver": "^7.0.1", + "socks-proxy-agent": "^8.0.5" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/pac-resolver": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/pac-resolver/-/pac-resolver-7.0.1.tgz", + "integrity": "sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==", + "license": "MIT", + "dependencies": { + "degenerator": "^5.0.0", + "netmask": "^2.0.2" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/package-json-from-dist": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", @@ -6457,6 +6894,12 @@ "url": "https://opencollective.com/express" } }, + "node_modules/pend": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz", + "integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==", + "license": "MIT" + }, "node_modules/picocolors": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", @@ -6777,6 +7220,15 @@ ], "license": "MIT" }, + "node_modules/progress": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", + "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/proxy-addr": { "version": "2.0.7", "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", @@ -6790,6 +7242,40 @@ "node": ">= 0.10" } }, + "node_modules/proxy-agent": { + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/proxy-agent/-/proxy-agent-6.5.0.tgz", + "integrity": "sha512-TmatMXdr2KlRiA2CyDu8GqR8EjahTG3aY3nXjdzFyoZbmB8hrBsTyMezhULIXKnC0jpfjlmiZ3+EaCzoInSu/A==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "^4.3.4", + "http-proxy-agent": "^7.0.1", + "https-proxy-agent": "^7.0.6", + "lru-cache": "^7.14.1", + "pac-proxy-agent": "^7.1.0", + "proxy-from-env": "^1.1.0", + "socks-proxy-agent": "^8.0.5" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/proxy-agent/node_modules/lru-cache": { + "version": "7.18.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz", + "integrity": "sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", + "license": "MIT" + }, "node_modules/psl": { "version": "1.15.0", "resolved": "https://registry.npmjs.org/psl/-/psl-1.15.0.tgz", @@ -6828,6 +7314,24 @@ "node": ">=6" } }, + "node_modules/puppeteer-core": { + "version": "24.33.0", + "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-24.33.0.tgz", + "integrity": "sha512-tPTxVg+Qdj/8av4cy6szv3GlhxeOoNhiiMZ955fjxQyvPQE/6DjCa6ZyF/x0WJrlgBZtaLSP8TQgJb7FdLDXXA==", + "license": "Apache-2.0", + "dependencies": { + "@puppeteer/browsers": "2.11.0", + "chromium-bidi": "11.0.0", + "debug": "^4.4.3", + "devtools-protocol": "0.0.1534754", + "typed-query-selector": "^2.12.0", + "webdriver-bidi-protocol": "0.3.9", + "ws": "^8.18.3" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/pure-rand": { "version": "7.0.1", "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-7.0.1.tgz", @@ -6980,7 +7484,6 @@ "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", - "dev": true, "license": "MIT", "engines": { "node": ">=0.10.0" @@ -7361,6 +7864,44 @@ "node": ">=8" } }, + "node_modules/smart-buffer": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz", + "integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==", + "license": "MIT", + "engines": { + "node": ">= 6.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/socks": { + "version": "2.8.7", + "resolved": "https://registry.npmjs.org/socks/-/socks-2.8.7.tgz", + "integrity": "sha512-HLpt+uLy/pxB+bum/9DzAgiKS8CX1EvbWxI4zlmgGCExImLdiad2iCwXT5Z4c9c3Eq8rP2318mPW2c+QbtjK8A==", + "license": "MIT", + "dependencies": { + "ip-address": "^10.0.1", + "smart-buffer": "^4.2.0" + }, + "engines": { + "node": ">= 10.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/socks-proxy-agent": { + "version": "8.0.5", + "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.5.tgz", + "integrity": "sha512-HehCEsotFqbPW9sJ8WVYB6UbmIMv7kUUORIF2Nncq4VQvBfNBLibW9YZR5dlYCSUhwcD628pRllm7n+E+YTzJw==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "^4.3.4", + "socks": "^2.8.3" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/sonic-boom": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/sonic-boom/-/sonic-boom-4.2.0.tgz", @@ -7374,7 +7915,7 @@ "version": "0.6.1", "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", - "dev": true, + "devOptional": true, "license": "BSD-3-Clause", "engines": { "node": ">=0.10.0" @@ -7447,6 +7988,17 @@ "node": ">=10.0.0" } }, + "node_modules/streamx": { + "version": "2.23.0", + "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz", + "integrity": "sha512-kn+e44esVfn2Fa/O0CPFcex27fjIL6MkVae0Mm6q+E6f0hWv578YCERbv+4m02cjxvDsPKLnmxral/rR6lBMAg==", + "license": "MIT", + "dependencies": { + "events-universal": "^1.0.0", + "fast-fifo": "^1.3.2", + "text-decoder": "^1.1.0" + } + }, "node_modules/string_decoder": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", @@ -7778,6 +8330,15 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/text-decoder": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.3.tgz", + "integrity": "sha512-3/o9z3X0X0fTupwsYvR03pJ/DjWuqqrfwBgTQzdWDiQSm9KitAyz/9WqsT2JQW7KV2m+bC2ol/zqpW37NHxLaA==", + "license": "Apache-2.0", + "dependencies": { + "b4a": "^1.6.4" + } + }, "node_modules/thread-stream": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/thread-stream/-/thread-stream-3.1.0.tgz", @@ -7857,9 +8418,7 @@ "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "dev": true, - "license": "0BSD", - "optional": true + "license": "0BSD" }, "node_modules/tunnel-agent": { "version": "0.6.0", @@ -7932,6 +8491,12 @@ "node": ">= 0.6" } }, + "node_modules/typed-query-selector": { + "version": "2.12.0", + "resolved": "https://registry.npmjs.org/typed-query-selector/-/typed-query-selector-2.12.0.tgz", + "integrity": "sha512-SbklCd1F0EiZOyPiW192rrHZzZ5sBijB6xM+cpmrwDqObvdtunOHHIk9fCGsoK5JVIYXoyEp4iEdE3upFH3PAg==", + "license": "MIT" + }, "node_modules/typedarray": { "version": "0.0.6", "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz", @@ -7949,7 +8514,7 @@ "version": "7.16.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==", - "dev": true, + "devOptional": true, "license": "MIT" }, "node_modules/universalify": { @@ -8130,6 +8695,12 @@ "node": ">= 8" } }, + "node_modules/webdriver-bidi-protocol": { + "version": "0.3.9", + "resolved": "https://registry.npmjs.org/webdriver-bidi-protocol/-/webdriver-bidi-protocol-0.3.9.tgz", + "integrity": "sha512-uIYvlRQ0PwtZR1EzHlTMol1G0lAlmOe6wPykF9a77AK3bkpvZHzIVxRE2ThOx5vjy2zISe0zhwf5rzuUfbo1PQ==", + "license": "Apache-2.0" + }, "node_modules/webidl-conversions": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz", @@ -8375,7 +8946,6 @@ "version": "5.0.8", "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", - "dev": true, "license": "ISC", "engines": { "node": ">=10" @@ -8392,7 +8962,6 @@ "version": "17.7.2", "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", - "dev": true, "license": "MIT", "dependencies": { "cliui": "^8.0.1", @@ -8411,7 +8980,6 @@ "version": "21.1.1", "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", - "dev": true, "license": "ISC", "engines": { "node": ">=12" @@ -8421,7 +8989,6 @@ "version": "5.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "dev": true, "license": "MIT", "engines": { "node": ">=8" @@ -8431,14 +8998,12 @@ "version": "8.0.0", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true, "license": "MIT" }, "node_modules/yargs/node_modules/string-width": { "version": "4.2.3", "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dev": true, "license": "MIT", "dependencies": { "emoji-regex": "^8.0.0", @@ -8453,7 +9018,6 @@ "version": "6.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, "license": "MIT", "dependencies": { "ansi-regex": "^5.0.1" @@ -8462,6 +9026,16 @@ "node": ">=8" } }, + "node_modules/yauzl": { + "version": "2.10.0", + "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz", + "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==", + "license": "MIT", + "dependencies": { + "buffer-crc32": "~0.2.3", + "fd-slicer": "~1.1.0" + } + }, "node_modules/yocto-queue": { "version": "1.2.2", "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz", diff --git a/backend/package.json b/backend/package.json index f9b20cca..bf703d71 100644 --- a/backend/package.json +++ b/backend/package.json @@ -17,6 +17,7 @@ "dependencies": { "@blackglory/better-sqlite3-migrations": "^0.1.20", "@mozilla/readability": "^0.6.0", + "@sparticuz/chromium": "^143.0.0", "bcryptjs": "^3.0.2", "better-sqlite3": "^12.4.6", "cors": "^2.8.5", @@ -29,12 +30,13 @@ "multer": "^2.0.2", "nanoid": "^5.1.6", "node-fetch": "^3.3.2", + "p-limit": "^4.0.0", "pino": "^9.3.2", "pino-roll": "^3.1.0", + "puppeteer-core": "^24.33.0", "turndown": "^7.2.1", "uuid": "^9.0.1", - "zod": "^3.23.8", - "p-limit": "^4.0.0" + "zod": "^3.23.8" }, "devDependencies": { "eslint": "^9.34.0", diff --git a/backend/src/lib/browser/BrowserService.js b/backend/src/lib/browser/BrowserService.js new file mode 100644 index 00000000..3274a75f --- /dev/null +++ b/backend/src/lib/browser/BrowserService.js @@ -0,0 +1,23 @@ +class BrowserService { + /** + * Fetches content from a URL using a browser engine. + * Logic splits based on environment: + * - Electron: Uses native BrowserWindow + * - Server/Docker: Uses Puppeteer/Chromium + * @param {string} url + * @returns {Promise} HTML content + */ + async fetchPageContent(url) { + if (process.env.IS_ELECTRON) { + // Use Electron's native browser capabilities (singleton) + const { electronProvider } = await import('./ElectronProvider.js'); + return electronProvider.fetchPageContent(url); + } else { + // Use Puppeteer (headless Chrome) + const { puppeteerProvider } = await import('./PuppeteerProvider.js'); + return puppeteerProvider.fetchPageContent(url); + } + } +} + +export const browserService = new BrowserService(); diff --git a/backend/src/lib/browser/ElectronProvider.js b/backend/src/lib/browser/ElectronProvider.js new file mode 100644 index 00000000..3f8bbb16 --- /dev/null +++ b/backend/src/lib/browser/ElectronProvider.js @@ -0,0 +1,64 @@ +export class ElectronProvider { + /** + * Load URL with timeout to prevent hanging on slow/broken pages + * @param {Electron.BrowserWindow} win + * @param {string} url + * @param {number} timeout - Timeout in milliseconds (default 30000) + * @returns {Promise} + */ + _loadWithTimeout(win, url, timeout = 30000) { + return new Promise((resolve, reject) => { + const timer = setTimeout(() => reject(new Error('Page load timeout')), timeout); + win.webContents.once('did-finish-load', () => { + clearTimeout(timer); + resolve(); + }); + win.webContents.once('did-fail-load', (_, code, desc) => { + clearTimeout(timer); + reject(new Error(`Load failed: ${desc}`)); + }); + win.loadURL(url); + }); + } + + async fetchPageContent(url) { + let BrowserWindow; + try { + const electron = await import('electron'); + BrowserWindow = electron.BrowserWindow; + } catch (error) { + throw new Error(`Failed to import electron: ${error.message}`); + } + + if (!BrowserWindow) { + throw new Error('BrowserWindow is not defined in electron module'); + } + + const win = new BrowserWindow({ + show: false, + webPreferences: { + offscreen: true, + nodeIntegration: false, + contextIsolation: true, + } + }); + + try { + await this._loadWithTimeout(win, url); + const content = await win.webContents.executeJavaScript('document.documentElement.outerHTML'); + return content; + } catch (error) { + // Add a comprehensive error message + console.error('[ElectronProvider] Error fetching page:', error); + throw error; + } finally { + // Ensure the window is destroyed to free memory + if (!win.isDestroyed()) { + win.destroy(); + } + } + } +} + +// Export singleton instance for consistency with PuppeteerProvider +export const electronProvider = new ElectronProvider(); diff --git a/backend/src/lib/browser/PuppeteerProvider.js b/backend/src/lib/browser/PuppeteerProvider.js new file mode 100644 index 00000000..a1e6dbae --- /dev/null +++ b/backend/src/lib/browser/PuppeteerProvider.js @@ -0,0 +1,99 @@ +import puppeteer from 'puppeteer-core'; +import chromium from '@sparticuz/chromium'; +import pLimit from 'p-limit'; + +class PuppeteerProvider { + constructor() { + this.browser = null; + this.browserPromise = null; // Promise-based lock for concurrent initialization + this.timeoutId = null; + this.limit = pLimit(5); // Max 5 concurrent pages + } + + async getBrowser() { + if (this.browser) { + this.rescheduleCleanup(); + return this.browser; + } + + // Return existing initialization if in progress (prevents race condition) + if (this.browserPromise) { + return this.browserPromise; + } + + this.browserPromise = this._initBrowser(); + try { + this.browser = await this.browserPromise; + this.rescheduleCleanup(); + return this.browser; + } finally { + this.browserPromise = null; + } + } + + /** + * Initialize browser instance (separated for race condition handling) + * @returns {Promise} + */ + async _initBrowser() { + // Determine executable path + // In Docker (Alpine), we set PUPPETEER_EXECUTABLE_PATH. + // Use it if available, otherwise try @sparticuz/chromium or default. + let executablePath = process.env.PUPPETEER_EXECUTABLE_PATH; + + if (!executablePath) { + try { + executablePath = await chromium.executablePath(); + } catch (error) { + // Fallback or ignore if running locally without the lambda layer + console.debug('Could not get chromium executable path from @sparticuz/chromium', error); + } + } + + const browser = await puppeteer.launch({ + args: chromium.args, + defaultViewport: chromium.defaultViewport, + executablePath: executablePath || '/usr/bin/chromium-browser', + headless: chromium.headless, + ignoreHTTPSErrors: true, + }); + + return browser; + } + + rescheduleCleanup() { + if (this.timeoutId) clearTimeout(this.timeoutId); + this.timeoutId = setTimeout(() => this.cleanup(), 5 * 60 * 1000); // 5 minutes + } + + async cleanup() { + if (this.browser) { + console.log('[PuppeteerProvider] Closing idle browser'); + await this.browser.close(); + this.browser = null; + } + } + + async fetchPageContent(url) { + return this.limit(async () => { + const browser = await this.getBrowser(); + const page = await browser.newPage(); + try { + // Basic bot evasion / settings + await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'); + + await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 }); + const content = await page.content(); + return content; + } catch (error) { + console.error('[PuppeteerProvider] Error fetching page:', error); + throw error; + } finally { + if (page) await page.close(); + this.rescheduleCleanup(); + } + }); + } +} + +export const puppeteerProvider = new PuppeteerProvider(); diff --git a/backend/src/lib/tools/webFetch.js b/backend/src/lib/tools/webFetch.js index bb02feb8..310734a0 100644 --- a/backend/src/lib/tools/webFetch.js +++ b/backend/src/lib/tools/webFetch.js @@ -2,6 +2,7 @@ import { createTool } from './baseTool.js'; import TurndownService from 'turndown'; import { Readability } from '@mozilla/readability'; import { JSDOM } from 'jsdom'; +import { browserService } from '../browser/BrowserService.js'; const TOOL_NAME = 'web_fetch'; @@ -170,136 +171,177 @@ function handleContinuation(token, maxChars) { }; } -async function handler({ url, maxChars, targetHeading, headingRange, continuation_token }) { - // Handle continuation token (fetch next chunk from cache) - if (continuation_token) { - return handleContinuation(continuation_token, maxChars); - } +// Helper: detect if a small binary buffer looks like text +function isProbablyText(buffer) { + if (!buffer || buffer.length === 0) return false; - try { - // Fetch the web page - const response = await fetch(url, { - headers: { - 'User-Agent': 'Mozilla/5.0 (compatible; ChatForge/1.0; +https://chatforge.app)', - }, - redirect: 'follow', - // 10 second timeout - signal: AbortSignal.timeout(10000), - }); + // Quick null-byte check (very likely binary) + const sampleLen = Math.min(buffer.length, 1024); + for (let i = 0; i < sampleLen; i++) { + if (buffer[i] === 0) return false; + } - if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`); + // Decode and examine printable vs control chars + const sample = new TextDecoder('utf-8', { fatal: false }).decode(buffer.slice(0, sampleLen)); + let nonPrintable = 0; + let total = 0; + for (let i = 0; i < sample.length; i++) { + const code = sample.charCodeAt(i); + // allow common whitespace: tab, line feed, carriage return + if (code === 9 || code === 10 || code === 13) { + total++; + continue; } + if (code < 32) { + nonPrintable++; + } + total++; + } + if (total === 0) return false; + // If less than 10% of the sample are non-printable control chars, + // treat it as text. + return (nonPrintable / total) < 0.10; +} - const contentType = response.headers.get('content-type') || ''; +async function basicFetch(url) { + // Fetch the web page + const response = await fetch(url, { + headers: { + 'User-Agent': 'Mozilla/5.0 (compatible; ChatForge/1.0; +https://chatforge.app)', + }, + redirect: 'follow', + // 10 second timeout + signal: AbortSignal.timeout(10000), + }); - // If the Content-Type clearly indicates text-like content, accept it. - // Otherwise we'll peek at the first chunk of the body and apply a - // lightweight binary-vs-text heuristic to decide if the response is - // text-parsable. This allows fetching resources that may not set - // Content-Type correctly but are still text (e.g., some servers). - const contentTypeLooksLikeText = /^(?:text\/)|(?:application\/(?:xml|xhtml\+xml|json))|html|xml|json/i.test(contentType); + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } - // Stream response body with size limit to prevent memory blowup - const reader = response.body && typeof response.body.getReader === 'function' - ? response.body.getReader() - : null; + const contentType = response.headers.get('content-type') || ''; - if (!reader) { - throw new Error('Response body is not readable'); - } + // If the Content-Type clearly indicates text-like content, accept it. + // Otherwise we'll peek at the first chunk of the body and apply a + // lightweight binary-vs-text heuristic to decide if the response is + // text-parsable. This allows fetching resources that may not set + // Content-Type correctly but are still text (e.g., some servers). + const contentTypeLooksLikeText = /^(?:text\/)|(?:application\/(?:xml|xhtml\+xml|json))|html|xml|json/i.test(contentType); - const decoder = new TextDecoder(); - let html = ''; - let bytesDownloaded = 0; + // Stream response body with size limit to prevent memory blowup + const reader = response.body && typeof response.body.getReader === 'function' + ? response.body.getReader() + : null; - // Helper: detect if a small binary buffer looks like text - function isProbablyText(buffer) { - if (!buffer || buffer.length === 0) return false; + if (!reader) { + throw new Error('Response body is not readable'); + } - // Quick null-byte check (very likely binary) - const sampleLen = Math.min(buffer.length, 1024); - for (let i = 0; i < sampleLen; i++) { - if (buffer[i] === 0) return false; - } + const decoder = new TextDecoder(); + let html = ''; + let bytesDownloaded = 0; - // Decode and examine printable vs control chars - const sample = new TextDecoder('utf-8', { fatal: false }).decode(buffer.slice(0, sampleLen)); - let nonPrintable = 0; - let total = 0; - for (let i = 0; i < sample.length; i++) { - const code = sample.charCodeAt(i); - // allow common whitespace: tab, line feed, carriage return - if (code === 9 || code === 10 || code === 13) { - total++; - continue; - } - if (code < 32) { - nonPrintable++; - } - total++; - } - if (total === 0) return false; - // If less than 10% of the sample are non-printable control chars, - // treat it as text. - return (nonPrintable / total) < 0.10; + try { + // Read the first chunk to allow content sniffing when needed + const first = await reader.read(); + if (first.done) { + reader.releaseLock(); + throw new Error('Empty response body'); } - try { - // Read the first chunk to allow content sniffing when needed - const first = await reader.read(); - if (first.done) { - reader.releaseLock(); - throw new Error('Empty response body'); + const firstChunk = first.value; + bytesDownloaded += firstChunk.length; + + if (bytesDownloaded > MAX_BODY_SIZE) { + reader.cancel(); + throw new Error(`Response body exceeds maximum size limit of ${MAX_BODY_SIZE / (1024 * 1024)} MB`); + } + + if (!contentTypeLooksLikeText) { + // If the header doesn't clearly say text, use the heuristic on the + // first chunk to avoid reading binary blobs. + if (!isProbablyText(firstChunk)) { + reader.cancel(); + throw new Error(`URL does not return text-parsable content. Content-Type: ${contentType}`); } + } - const firstChunk = first.value; - bytesDownloaded += firstChunk.length; + // Append first chunk and continue streaming the rest + html += decoder.decode(firstChunk, { stream: true }); + + while (true) { + const { done, value } = await reader.read(); + + if (done) break; + + bytesDownloaded += value.length; if (bytesDownloaded > MAX_BODY_SIZE) { reader.cancel(); throw new Error(`Response body exceeds maximum size limit of ${MAX_BODY_SIZE / (1024 * 1024)} MB`); } - if (!contentTypeLooksLikeText) { - // If the header doesn't clearly say text, use the heuristic on the - // first chunk to avoid reading binary blobs. - if (!isProbablyText(firstChunk)) { - reader.cancel(); - throw new Error(`URL does not return text-parsable content. Content-Type: ${contentType}`); - } - } - - // Append first chunk and continue streaming the rest - html += decoder.decode(firstChunk, { stream: true }); - - while (true) { - const { done, value } = await reader.read(); + html += decoder.decode(value, { stream: true }); + } - if (done) break; + // Flush any remaining bytes in the decoder + html += decoder.decode(); + } finally { + try { + reader.releaseLock(); + } catch { + // ignore + } + } + return html; +} - bytesDownloaded += value.length; +async function handler({ url, maxChars, targetHeading, headingRange, continuation_token }) { + // Handle continuation token (fetch next chunk from cache) + if (continuation_token) { + return handleContinuation(continuation_token, maxChars); + } - if (bytesDownloaded > MAX_BODY_SIZE) { - reader.cancel(); - throw new Error(`Response body exceeds maximum size limit of ${MAX_BODY_SIZE / (1024 * 1024)} MB`); - } + let html = ''; + let errorMessages = []; - html += decoder.decode(value, { stream: true }); - } + // 1. Try simple fetch + JSDOM first (fastest) + try { + html = await basicFetch(url); + } catch (error) { + errorMessages.push(`Basic fetch failed: ${error.message}`); + } - // Flush any remaining bytes in the decoder - html += decoder.decode(); - } finally { - try { - reader.releaseLock(); - } catch { - // ignore + // 2. Check for failure triggers (SPA detection) + // - No content (fetch failed) + // - Very short content (<300 chars usually means stub) + // - Specific "Enable JS" messages + // - noscript tag containing JavaScript requirement messages (not just any noscript tag) + const noscriptNeedsJs = /]*>.*?(?:enable|require|need).*?javascript/is.test(html); + const isFailure = !html + || html.length < 300 + || html.includes("You need to enable JavaScript") + || noscriptNeedsJs; + + // 3. Fallback to Browser Engine if needed + if (isFailure) { + try { + // console.log(`Triggering browser fallback for ${url}`); + html = await browserService.fetchPageContent(url); + } catch (browserError) { + console.error('[webFetch] Browser fallback failed:', browserError); + errorMessages.push(`Browser fallback failed: ${browserError.message}`); + + // If we have some content from basic fetch, usage it despite being "low quality" is better than crashing + // But if we have NO content, throw exception. + if (!html) { + throw new Error(`Failed to fetch URL. Errors: ${errorMessages.join('; ')}`); } } + } + try { const dom = new JSDOM(html, { url }); const document = dom.window.document; diff --git a/docs/webfetch-spa-support-plan.md b/docs/webfetch-spa-support-plan.md index 6a810db8..a52cac71 100644 --- a/docs/webfetch-spa-support-plan.md +++ b/docs/webfetch-spa-support-plan.md @@ -8,404 +8,114 @@ Upgrade the `webFetch` tool (`backend/src/lib/tools/webFetch.js`) with a browser **Goal**: Add browser fallback for SPAs while keeping JSDOM as the primary (lightweight) method. -## Recommended Approach +## Approved Strategy -**Hybrid Strategy**: Try JSDOM first (current behavior), fall back to headless browser only when needed. +**Hybrid Engine Approach**: +1. **Server/Docker**: Use `@sparticuz/chromium` + `puppeteer-core`. +2. **Electron App**: Use native `BrowserWindow` (no extra dependencies). +3. **Trigger**: Try JSDOM first. Auto-fallback to browser if content is missing or requires JS. -**Browser Engine**: chrome-aws-lambda with puppeteer-core (lightest option at ~80-120MB) +## Research Decisions (Tasks Resolved) -## Research Tasks (Do These First) - -### 1. Research chrome-aws-lambda current status and alternatives -**Why**: -- chrome-aws-lambda may be deprecated or have maintenance issues -- Newer alternatives like @sparticuz/chromium might be better -- Need to verify 2025 compatibility with Node.js versions -- Check if there are lighter-weight alternatives - -**What to research**: -- Current maintenance status of chrome-aws-lambda -- Alternative packages (@sparticuz/chromium, playwright-chromium-headless) -- Version compatibility with Node.js 20+ -- Known issues in Docker environments - -### 2. Research Chromium Docker optimization best practices -**Why**: -- Docker Chromium setup has specific requirements that evolve -- Security concerns (sandboxing, capabilities) -- Minimal dependency list changes over time -- Multi-stage build optimizations - -**What to research**: -- Minimal Chromium dependencies for 2025 Debian/Alpine images -- Chrome flags for resource optimization in containers -- Security best practices (--no-sandbox implications) -- Font and locale requirements - -### 3. Research browser pooling patterns and resource limits -**Why**: -- Browser pooling libraries may exist (generic-pool, etc.) -- Best practices for connection limits have evolved -- Memory leak prevention strategies -- Graceful shutdown patterns - -**What to research**: -- Existing browser pooling libraries -- Recommended pool sizes for different memory constraints -- Browser instance lifecycle management -- Memory leak detection and prevention - -### 4. Research modern SPA detection techniques -**Why**: -- New JavaScript frameworks emerge constantly -- Detection patterns need to catch Vue 3, React 18+, Svelte, etc. -- Meta tags and data attributes have evolved -- Better heuristics may exist - -**What to research**: -- Current SPA framework detection methods -- Meta tags used by modern frameworks (2024-2025) -- DOM patterns that indicate client-side rendering -- Reliable heuristics (script-to-content ratio thresholds) +- **Engine Selection**: `@sparticuz/chromium` chosen for server environments (Node 20+ compatible). +- **Docker Optimization**: Will use Alpine Linux packages (`apk add chromium`) instead of complex manual builds. +- **Pooling**: Will implement a simple LRU-style pool (max 2 instances) directly. +- **SPA Detection**: "Failure-driven" detection (fallback if JSDOM gets <300 chars or specific "enable JS" warnings) instead of complex heuristics. ## Implementation Steps -### Step 1: Install Dependencies +### Step 1: Install Server Dependencies +For `backend/` only: ```bash -./dev.sh exec backend npm install chrome-aws-lambda puppeteer-core --save +npm install puppeteer-core @sparticuz/chromium ``` +*Note: These will be mostly unused in the Electron build, but required for the server build.* -**Note**: After research, this may change to a different package. - -### Step 2: Update Dockerfile -Update `backend/Dockerfile` to include Chromium and dependencies: +### Step 2: Update Dockerfile (Alpine) +Update `backend/Dockerfile` to install system Chromium for Alpine: ```dockerfile -# Install Chromium dependencies -RUN apt-get update && apt-get install -y \ +# Add to "prod-deps" and "runner" stages +RUN apk add --no-cache \ chromium \ - chromium-driver \ - fonts-liberation \ - libnss3 \ - libxss1 \ - && rm -rf /var/lib/apt/lists/* + nss \ + freetype \ + harfbuzz \ + ca-certificates \ + ttf-freefont -# Set Chromium environment variables -ENV CHROME_BIN=/usr/bin/chromium ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true -ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium -``` - -**Note**: Final dependency list should come from research task #2. - -### Step 3: Create Browser Utility Module -Create `backend/src/lib/browserFetcher.js` with: - -**Core functionality**: -- Browser initialization with optimized flags -- Browser instance pooling (reuse instances) -- Timeout handling (10s default) -- Resource cleanup -- Error handling - -**Example structure**: -```javascript -import chromium from 'chrome-aws-lambda'; -import puppeteer from 'puppeteer-core'; - -class BrowserPool { - constructor(maxInstances = 2) { - this.pool = []; - this.maxInstances = maxInstances; - this.idleTimeout = 5 * 60 * 1000; // 5 minutes - } - - async acquireBrowser() { - // Get or create browser instance - } - - async releaseBrowser(browser) { - // Return to pool or close if pool is full - } - - async fetchWithBrowser(url, options = {}) { - const browser = await this.acquireBrowser(); - try { - const page = await browser.newPage(); - await page.goto(url, { - waitUntil: 'networkidle0', - timeout: options.timeout || 10000 - }); - const html = await page.content(); - await page.close(); - return html; - } finally { - await this.releaseBrowser(browser); - } - } - - async cleanup() { - // Close all browser instances - } -} - -export const browserPool = new BrowserPool(); -``` - -**Optimized Chromium flags** (adjust based on research): -```javascript -const args = [ - '--disable-gpu', - '--disable-dev-shm-usage', - '--disable-setuid-sandbox', - '--no-sandbox', - '--no-zygote', - '--single-process', - '--disable-accelerated-2d-canvas', - '--disable-background-networking', - '--disable-default-apps', - '--disable-extensions', - '--disable-sync', - '--metrics-recording-only', - '--mute-audio', - '--no-first-run', -]; +ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium-browser ``` -### Step 4: Add SPA Detection Logic -In `webFetch.js`, create function to detect if a page needs JavaScript: +### Step 3: Create Abstract Browser Interface +Create `backend/src/lib/browser/BrowserService.js` to handle the environment split: ```javascript -function isSPA(html, extractedContent) { - // Check if extracted content is too short - if (extractedContent.textContent.length < 300) { - return true; - } - - // Detect common SPA frameworks - const spaIndicators = [ - /
<\/div>/, // React - /
<\/div>/, // Vue - /ng-app=/, // Angular - /__NEXT_DATA__/, // Next.js - /__nuxt/, // Nuxt - ]; - - for (const pattern of spaIndicators) { - if (pattern.test(html)) { - return true; +class BrowserService { + async fetchPageContent(url) { + if (process.env.IS_ELECTRON) { + return this.fetchWithElectron(url); + } else { + return this.fetchWithPuppeteer(url); } } - // Check script-to-content ratio - const scriptMatches = html.match(/]*>[\s\S]*?<\/script>/gi) || []; - const totalScriptLength = scriptMatches.join('').length; - const contentLength = extractedContent.textContent.length; - - if (totalScriptLength > contentLength * 3) { - // More script than content likely indicates SPA - return true; + async fetchWithElectron(url) { + // Dynamic import to avoid bundling electron in server build + const { BrowserWindow } = await import('electron'); + // Create invisible window, load URL, get content, destroy window } - return false; -} -``` - -**Note**: Detection logic should be refined based on research task #4. - -### Step 5: Implement Browser Fallback -Update the `handler` function in `webFetch.js`: - -```javascript -import { browserPool } from './browserFetcher.js'; - -async function handler({ url, maxChars, targetHeading, headingRange, continuation_token }) { - // ... existing continuation handling ... - - try { - // Fetch the web page (existing code) - const response = await fetch(url, { /* ... */ }); - let html = await streamResponse(response); - - // Try JSDOM extraction first - const dom = new JSDOM(html, { url }); - const document = dom.window.document; - let extractedContent = tryExtractionStrategies(document); - - // Check if we need browser fallback - let usedBrowser = false; - let browserError = null; - - if (isSPA(html, extractedContent)) { - try { - // Re-fetch with browser - html = await browserPool.fetchWithBrowser(url); - - // Re-extract with browser-rendered HTML - const browserDom = new JSDOM(html, { url }); - extractedContent = tryExtractionStrategies(browserDom.window.document); - usedBrowser = true; - } catch (error) { - // Browser failed, use JSDOM result with warning - browserError = `Browser fallback failed: ${error.message}`; - } - } - - // ... rest of existing extraction logic ... - - return { - url, - title: extractedContent.title || 'Untitled', - markdown: finalMarkdown, - // ... existing fields ... - ...(usedBrowser && { extractionMethod: 'browser' }), - ...(browserError && { browserError }), - }; - } catch (error) { - // ... existing error handling ... + async fetchWithPuppeteer(url) { + // Check pool, acquire instance, new page, get content, release } } ``` -### Step 6: Browser Instance Pooling -Implement efficient resource management in `browserFetcher.js`: +### Step 4: Implement Puppeteer Provider (Server) +Create `backend/src/lib/browser/PuppeteerProvider.js`: +- Manages `@sparticuz/chromium` instance. +- Implements simple pooling (reuse browser instance). +- Handles resource cleanup (close browser after 5m idle). -**Features**: -- Create browser pool manager (max 2-3 instances) -- Reuse browser instances across requests -- Implement idle timeout (close after 5 min inactivity) -- Add graceful shutdown on process exit +### Step 5: Implement Electron Provider (Desktop) +Create `backend/src/lib/browser/ElectronProvider.js`: +- Uses `new BrowserWindow({ show: false, webPreferences: { offscreen: true } })`. +- Much faster as it shares the main process engine. +- Zero extra memory overhead compared to spawning a whole new Chromium. -**Pool configuration**: -```javascript -const POOL_CONFIG = { - maxInstances: 2, // Max concurrent browsers - idleTimeout: 5 * 60 * 1000, // 5 minutes - pageTimeout: 10000, // 10 seconds per page -}; -``` +### Step 6: Update WebFetch Logic +Update `webFetch.js` to use the fallback strategy: -**Graceful shutdown**: ```javascript -process.on('SIGTERM', async () => { - await browserPool.cleanup(); - process.exit(0); -}); - -process.on('SIGINT', async () => { - await browserPool.cleanup(); - process.exit(0); -}); +// 1. Try simple fetch + JSDOM +let content = await basicFetch(url); + +// 2. Check for failure triggers +const isFailure = content.length < 300 + || content.includes("You need to enable JavaScript") + || content.includes("