shebinleo · shebinleo · Jun 1, 2025 · Jun 1, 2025 · Jun 1, 2025 · Jun 1, 2025
diff --git a/.github/workflows/node.js.yml b/.github/workflows/node.js.yml
@@ -22,10 +22,10 @@ jobs:
 
     steps:
       - name: Checkout code
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Setup Node.js ${{ matrix.node-version }}
-        uses: actions/setup-node@v3
+        uses: actions/setup-node@v4
         with:
           node-version: ${{ matrix.node-version }}
           cache: 'npm'
@@ -37,6 +37,61 @@ jobs:
       - name: Build (if present)
         run: npm run build --if-present
 
-      - name: Run tests
-        run: npm test
+      - name: Run tests with coverage
+        run: npm run test:coverage
         continue-on-error: ${{ matrix.experimental == true }}
+
+      - name: Generate coverage reports
+        if: matrix.node-version == '20.x'
+        run: |
+          npm run test:coverage:text
+          npm run test:coverage:lcov
+          echo "### Test Coverage Report" >> $GITHUB_STEP_SUMMARY
+          echo '```' >> $GITHUB_STEP_SUMMARY
+          npx nyc report --reporter=text-summary >> $GITHUB_STEP_SUMMARY
+          echo '```' >> $GITHUB_STEP_SUMMARY
+
+      - name: Upload coverage artifacts
+        if: matrix.node-version == '20.x'
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-report
+          path: coverage/
+          retention-days: 7
+
+      - name: Comment PR with coverage
+        if: matrix.node-version == '20.x' && github.event_name == 'pull_request'
+        uses: romeovs/lcov-reporter-action@v0.3.1
+        with:
+          lcov-file: ./coverage/lcov.info
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+
+  coverage-check:
+    needs: build
+    runs-on: ubuntu-latest
+    if: github.event_name == 'pull_request'
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20.x'
+          cache: 'npm'
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Check coverage thresholds
+        run: |
+          npm run test:coverage
+          echo "### Coverage Threshold Check" >> $GITHUB_STEP_SUMMARY
+          echo '```' >> $GITHUB_STEP_SUMMARY
+          npx nyc report --reporter=text >> $GITHUB_STEP_SUMMARY
+          echo '```' >> $GITHUB_STEP_SUMMARY
+
+      - name: Enforce coverage thresholds
+        run: |
+          npx nyc check-coverage --lines 80 --functions 80 --branches 80 --statements 80
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,59 @@
-.DS_STORE
+# Node.js dependencies
+node_modules/
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+package-lock.json
+yarn.lock
+pnpm-lock.yaml
+
+# Environment variables
+.env
+.env.*.local
+
+# Logs
+logs/
 *.log
+log.txt
 
-node_modules/
+# Runtime data
+pids/
+*.pid
+*.seed
+*.pid.lock
+
+# Coverage directories
+coverage/
+.nyc_output/
+
+# Optional npm cache directory
+.npm/
+
+# IDEs and editors
 .idea/
+.vscode/
+*.sublime-workspace
+*.sublime-project
+
+# OS-specific
+.DS_Store
+Thumbs.db
+
+# Build directories
+dist/
+build/
+tmp/
+temp/
+
+# TypeScript
+*.tsbuildinfo
+
+# Optional ESLint cache
+.eslintcache
+
+# Optional stylelint cache
+.stylelintcache
+
+# Optional REPL history
+.node_repl_history
diff --git a/README.md b/README.md
@@ -15,6 +15,7 @@
 - **Page-by-page processing** - Process PDFs page by page
 - **Metadata extraction** - Extract author, title, creation date, and more
 - **Thumbnail generation** - Generate preview images from PDF pages
+- **Buffer support** - Process PDFs from memory buffers or file paths
 - **TypeScript support** - Full type definitions included
 - **Async/Promise based** - Modern async API
 - **Configurable** - Extensive options for customization
@@ -54,44 +55,59 @@ The installation process will automatically download the required Apache Tika an
 
 ```javascript
 const pdf2html = require('pdf2html');
+const fs = require('fs');
 
-// Simple conversion
+// From file path
 const html = await pdf2html.html('path/to/document.pdf');
 console.log(html);
 
+// From buffer
+const pdfBuffer = fs.readFileSync('path/to/document.pdf');
+const html = await pdf2html.html(pdfBuffer);
+console.log(html);
+
 // With options
-const html = await pdf2html.html('path/to/document.pdf', {
+const html = await pdf2html.html(pdfBuffer, {
     maxBuffer: 1024 * 1024 * 10, // 10MB buffer
 });
 ```
 
 ### Extract Text
 
 ```javascript
-// Extract all text from PDF
+// From file path
 const text = await pdf2html.text('path/to/document.pdf');
+
+// From buffer
+const pdfBuffer = fs.readFileSync('path/to/document.pdf');
+const text = await pdf2html.text(pdfBuffer);
 console.log(text);
 ```
 
 ### Process Pages Individually
 
 ```javascript
-// Get HTML for each page
+// From file path
 const htmlPages = await pdf2html.pages('path/to/document.pdf');
+
+// From buffer
+const pdfBuffer = fs.readFileSync('path/to/document.pdf');
+const htmlPages = await pdf2html.pages(pdfBuffer);
 htmlPages.forEach((page, index) => {
     console.log(`Page ${index + 1}:`, page);
 });
 
 // Get text for each page
-const textPages = await pdf2html.pages('path/to/document.pdf', {
+const textPages = await pdf2html.pages(pdfBuffer, {
     text: true,
 });
 ```
 
 ### Extract Metadata
 
 ```javascript
-const metadata = await pdf2html.meta('path/to/document.pdf');
+// From file path or buffer
+const metadata = await pdf2html.meta(pdfBuffer);
 console.log(metadata);
 // Output: {
 //   title: 'Document Title',
@@ -109,12 +125,16 @@ console.log(metadata);
 ### Generate Thumbnails
 
 ```javascript
-// Generate thumbnail with default settings
+// From file path
 const thumbnailPath = await pdf2html.thumbnail('path/to/document.pdf');
+
+// From buffer
+const pdfBuffer = fs.readFileSync('path/to/document.pdf');
+const thumbnailPath = await pdf2html.thumbnail(pdfBuffer);
 console.log('Thumbnail saved to:', thumbnailPath);
 
 // Custom thumbnail options
-const thumbnailPath = await pdf2html.thumbnail('path/to/document.pdf', {
+const thumbnailPath = await pdf2html.thumbnail(pdfBuffer, {
     page: 1, // Page number (default: 1)
     imageType: 'png', // 'png' or 'jpg' (default: 'png')
     width: 300, // Width in pixels (default: 160)
@@ -162,48 +182,48 @@ try {
 
 ## 🏗️ API Reference
 
-### `pdf2html.html(filepath, [options])`
+### `pdf2html.html(input, [options])`
 
 Converts PDF to HTML format.
 
-- **filepath** `string` - Path to the PDF file
+- **input** `string | Buffer` - Path to the PDF file or PDF buffer
 - **options** `object` (optional)
     - `maxBuffer` `number` - Maximum buffer size in bytes (default: 2MB)
 - **Returns:** `Promise<string>` - HTML content
 
-### `pdf2html.text(filepath, [options])`
+### `pdf2html.text(input, [options])`
 
 Extracts text from PDF.
 
-- **filepath** `string` - Path to the PDF file
+- **input** `string | Buffer` - Path to the PDF file or PDF buffer
 - **options** `object` (optional)
     - `maxBuffer` `number` - Maximum buffer size in bytes
 - **Returns:** `Promise<string>` - Extracted text
 
-### `pdf2html.pages(filepath, [options])`
+### `pdf2html.pages(input, [options])`
 
 Processes PDF page by page.
 
-- **filepath** `string` - Path to the PDF file
+- **input** `string | Buffer` - Path to the PDF file or PDF buffer
 - **options** `object` (optional)
     - `text` `boolean` - Extract text instead of HTML (default: false)
     - `maxBuffer` `number` - Maximum buffer size in bytes
 - **Returns:** `Promise<string[]>` - Array of HTML or text strings
 
-### `pdf2html.meta(filepath, [options])`
+### `pdf2html.meta(input, [options])`
 
 Extracts PDF metadata.
 
-- **filepath** `string` - Path to the PDF file
+- **input** `string | Buffer` - Path to the PDF file or PDF buffer
 - **options** `object` (optional)
     - `maxBuffer` `number` - Maximum buffer size in bytes
 - **Returns:** `Promise<object>` - Metadata object
 
-### `pdf2html.thumbnail(filepath, [options])`
+### `pdf2html.thumbnail(input, [options])`
 
 Generates a thumbnail image from PDF.
 
-- **filepath** `string` - Path to the PDF file
+- **input** `string | Buffer` - Path to the PDF file or PDF buffer
 - **options** `object` (optional)
     - `page` `number` - Page to thumbnail (default: 1)
     - `imageType` `string` - 'png' or 'jpg' (default: 'png')

diff --git a/lib/FileManager.js b/lib/FileManager.js
@@ -3,13 +3,20 @@ const debug = require('debug')('pdf2html');
 const fse = require('fs-extra');
 const path = require('path');
 const URI = require('urijs');
+const crypto = require('crypto');
 const constants = require('../constants');
 
 /**
  * File management utilities
  */
 class FileManager {
     static async withTempFile(sourceFile, tempDir, operation) {
+        // If a source file is already in the temp directory, don't copy it
+        if (sourceFile.includes(tempDir)) {
+            const uri = new URI(sourceFile);
+            return operation(sourceFile, uri);
+        }
+
         const uri = new URI(sourceFile);
         const tempFilePath = path.join(tempDir, uri.filename());
 
@@ -25,6 +32,63 @@ class FileManager {
         const dirs = Object.values(constants.DIRECTORY);
         await Promise.all(dirs.map((dir) => fse.ensureDir(dir)));
     }
+
+    /**
+     * Creates a temporary file from a buffer
+     * @param {Buffer} buffer - The buffer to write
+     * @param {string} extension - File extension (e.g., '.pdf')
+     * @returns {Promise<string>} - Path to the temporary file
+     */
+    static async createTempFileFromBuffer(buffer, extension = '.pdf') {
+        await this.ensureDirectories();
+
+        // Generate unique filename using hash of buffer content
+        const timestamp = Date.now();
+        const randomBytes = crypto.randomBytes(8).toString('hex');
+        const tempFileName = `temp_${timestamp}_${randomBytes}${extension}`;
+        const tempFilePath = path.join(constants.DIRECTORY.PDF, tempFileName);
+
+        await fse.writeFile(tempFilePath, buffer);
+        return tempFilePath;
+    }
+
+    /**
+     * Processes input that can be either a file path or buffer
+     * @param {string|Buffer} input - File path or buffer
+     * @param {Function} processor - Function to process the file path
+     * @returns {Promise<*>} - Result from processor
+     */
+    static async processInput(input, processor) {
+        // Validate input
+        if (input === null || input === undefined) {
+            throw new Error('Input cannot be null or undefined');
+        }
+
+        if (typeof input === 'string') {
+            if (input.trim() === '') {
+                throw new Error('File path cannot be empty');
+            }
+        } else if (!Buffer.isBuffer(input)) {
+            throw new Error('Input must be a file path (string) or Buffer');
+        }
+
+        const isBuffer = Buffer.isBuffer(input);
+        let filePath = input;
+        let tempFilePath = null;
+
+        try {
+            if (isBuffer) {
+                tempFilePath = await this.createTempFileFromBuffer(input, '.pdf');
+                filePath = tempFilePath;
+            }
+
+            return await processor(filePath, isBuffer, tempFilePath);
+        } finally {
+            if (tempFilePath) {
+                await fse.remove(tempFilePath).catch((err) => debug(`Failed to remove temp file ${tempFilePath}: ${err.message}`));
+            }
+        }
+    }
 }
 
 module.exports = FileManager;