Gyyyn · 5h4h6y46 · Feb 12, 2026 · Feb 14, 2026
diff --git a/.gitignore b/.gitignore
@@ -19,6 +19,10 @@ models/kokoro/*
 # Ignore audio files
 static/audio_cache/*
 
+# Ignore PDF position caches
+users/**/.pdf_cache/
+**/.pdf_cache/
+
 # Ignore build dirs
 .flatpak-builder
 build-dir

diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/OpenWebTTS.iml b/.idea/OpenWebTTS.iml
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/BUILD.md b/BUILD.md
@@ -50,6 +50,10 @@ OpenWebTTS/
 2. `pip` for managing dependencies (usually comes with Python).
 3. `espeak-ng` for fallback.
 4. `ffmpeg` or `libav` for audio processing.
+5. **Tesseract OCR** for PDF text extraction (scanned PDFs). [Download for Windows](https://github.com/UB-Mannheim/tesseract/wiki)
+6. **Poppler** for PDF rendering and OCR preprocessing. [Download for Windows](https://github.com/oschwartz10612/poppler-windows/releases/)
+   - After installation, add Poppler's `bin` directory to your system PATH
+   - Or use Chocolatey: `choco install poppler` (requires admin)
 
 > **Note:** Other Python versions might not be fully compatible due to dependencies. Later version might work, but use at your own risk.
 

diff --git a/Dockerfile b/Dockerfile
@@ -10,16 +10,22 @@ RUN apt-get update && apt-get install -y \
   ffmpeg \
   pkg-config \
   espeak-ng \
+  curl \
   && rm -rf /var/lib/apt/lists/*
 
+# Install Node.js and npm
+RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
+  apt-get install -y nodejs && \
+  rm -rf /var/lib/apt/lists/*
+
 # Set working directory
 WORKDIR /app
 
 # Copy requirements first for better caching
 COPY requirements.txt .
 
 # Install Python dependencies
-RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install -r requirements.txt
 RUN pip install https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl
 RUN pip install https://github.com/rsxdalv/chatterbox/releases/download/v0.4.4/tts_webui_chatterbox_tts-0.4.4-py3-none-any.whl
 

diff --git a/README.md b/README.md
@@ -26,6 +26,33 @@ Features marked with an `*` are *paid* on other platforms!
 
 See `BUILD.md` for detailed instructions. If you know what you're doing: clone the repo, install Python dependencies with a venv and build with `npm`.
 
+## Browser Extension 🔊
+
+OpenWebTTS now includes browser extensions for **Chrome** and **Firefox** that let you read any webpage aloud with word-by-word highlighting!
+
+### Features:
+- 📖 Read entire webpages or just selected text
+- 🎯 Real-time word-by-word highlighting as text is spoken
+- 🎨 Customizable highlight colors (yellow, green, blue, pink, orange)
+- ⚡ Adjustable reading speed (0.5x to 2.0x)
+- 🔄 Auto-scroll to keep reading position visible
+- 🎭 Support for all OpenWebTTS voice engines
+
+### Quick Install:
+
+1. **Start the backend:**
+   ```bash
+   python app.py
+   ```
+
+2. **Load extension:**
+   - **Chrome:** Navigate to `chrome://extensions/`, enable Developer mode, click "Load unpacked", select `browser-extension/` folder
+   - **Firefox:** Navigate to `about:debugging#/runtime/this-firefox`, click "Load Temporary Add-on", select `browser-extension/manifest_firefox.json`
+
+3. **Start reading:** Click the extension icon, navigate to any webpage, and click "📖 Read Page"!
+
+See [browser-extension/README.md](browser-extension/README.md) for complete installation guide and features.
+
 ## Using TTS models
 
 ### Piper

diff --git a/app.py b/app.py
@@ -4,18 +4,68 @@
 import threading
 import time
 import socket
+import mimetypes
+import logging
+import warnings
 
 from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
+from starlette.responses import JSONResponse
 
 # Import config and router
 import config
 from functions.routes import router
 from functions.openai_api import openai_api_router
 
+# Suppress noisy connection reset errors (benign - occur when browser cancels requests)
+logging.getLogger("asyncio").setLevel(logging.CRITICAL)
+warnings.filterwarnings("ignore", message=".*Connection reset.*")
+
+# Configure MIME types for JavaScript modules
+mimetypes.add_type('application/javascript', '.mjs')
+mimetypes.add_type('application/javascript', '.js')
+
+# Custom StaticFiles with better error handling and CORS support
+class AudioStaticFiles(StaticFiles):
+    async def __call__(self, scope, receive, send):
+        response_started = False
+
+        async def send_with_cors(message):
+            nonlocal response_started
+            if message["type"] == "http.response.start":
+                response_started = True
+                # Add CORS headers to allow cross-origin access
+                headers = list(message.get("headers", []))
+                headers.append((b"access-control-allow-origin", b"*"))
+                headers.append((b"access-control-allow-methods", b"GET, OPTIONS"))
+                headers.append((b"access-control-allow-headers", b"*"))
+                headers.append((b"access-control-allow-private-network", b"true"))
+                message["headers"] = headers
+            await send(message)
+
+        try:
+            await super().__call__(scope, receive, send_with_cors)
+        except RuntimeError as e:
+            error_msg = str(e)
+            if "Response content shorter than Content-Length" in error_msg:
+                # File accessed while being written
+                if not response_started:
+                    # Only send error response if we haven't started sending the response yet
+                    response = JSONResponse(
+                        status_code=503,
+                        content={"detail": "Audio file still being generated, please retry"}
+                    )
+                    await response(scope, receive, send_with_cors)
+                else:
+                    # Response already started, can't send error response, just log it
+                    pass
+            else:
+                raise
+
 # --- FastAPI Setup ---
 app = FastAPI()
-app.mount("/static", StaticFiles(directory=config.STATIC_DIR), name="static")
+app.mount("/static", AudioStaticFiles(directory=config.STATIC_DIR), name="static")
+app.mount("/audio_cache", AudioStaticFiles(directory=config.AUDIO_CACHE_DIR), name="audio_cache")
 
 app.include_router(router)
 app.include_router(openai_api_router)

diff --git a/browser-extension/.gitignore b/browser-extension/.gitignore
@@ -0,0 +1,31 @@
+# Node modules
+node_modules/
+npm-debug.log
+package-lock.json
+
+# Build artifacts
+dist/
+*.zip
+*.xpi
+*.crx
+
+# Generated icons (if regenerated)
+icons/*.png
+
+# Development files
+.web-ext-config.js
+web-ext-artifacts/
+
+# OS files
+.DS_Store
+Thumbs.db
+desktop.ini
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# Logs
+*.log