From bf3a46f0abc084506197534b700853ccf3131bbc Mon Sep 17 00:00:00 2001
From: AJ Slater <aj@slater.net>
Date: Fri, 8 May 2026 20:36:41 -0700
Subject: [PATCH 1/6] reader: serve image-dominant PDF pages as <img>, drop
 full-PDF mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most "comic PDFs" are scanned-image wrappers — one full-bleed JPEG
or PNG per page, no real vector content. Previously every PDF page
was routed through ``vue-pdf-embed`` (and through pdf.js on the
client) regardless. Now the backend runs an image-dominant page
detector via ``comicbox-pdffile`` 0.6 and serves matched pages as
plain image bytes; the browser renders them through ``<img>`` like
any CBZ page. Vector-content pages keep the existing single-page-PDF
+ ``vue-pdf-embed`` path.

Backend
=======

* ``codex/views/reader/page.py``
  - ``?format=auto|pdf|image`` query parameter. ``auto`` (default)
    runs the detector. ``pdf`` skips the detector and forces the
    legacy single-page-PDF path. ``image`` always rasterizes — works
    for any PDF page but spends more CPU on vector-heavy pages.
  - ``_try_pdf_image_serve`` reaches through Comicbox to the
    underlying ``PDFFile`` (private API for now; comments mark the
    seam for a future public-getter swap) and dispatches to
    ``classify_page`` / ``read_image_if_dominant`` /
    ``read_full_pixmap_jpeg``. Per-page verdicts are memoized on the
    ``_ArchiveEntry`` so prev/curr/next prefetch is effectively free.
  - Old ``?pixmap=`` query parameter dropped — it returned PPM bytes
    with ``image/jpeg`` content-type (latent labeling bug; no caller).
  - OpenAPI schema advertises three possible response content-types:
    ``application/pdf`` (fallback), ``image/jpeg``, ``image/png``.
* ``codex/views/reader/_archive_cache.py``
  - Adds a ``verdicts: dict[int, PageVerdict]`` slot on
    ``_ArchiveEntry`` for the per-page detector cache.
  - New ``open_entry()`` context manager yields the entry directly
    (existing ``open()`` keeps yielding ``Comicbox`` for callers
    that don't need verdict state).

Frontend
========

* ``BookPage`` (``page/page.vue``) always tries ``<ImgPage>`` first.
  On ``error`` for a PDF book it sets ``pdfFallback=true`` and the
  page re-mounts as ``<PDFDoc>`` against the same URL with
  ``?format=pdf`` appended. No HEAD pre-flight, no verdict threaded
  through the API response — the browser's image-load failure on a
  ``application/pdf`` body is the natural signal.
* Drops ``PagerFullPDF`` and the whole-document-load mode it served.
  ``pager.vue`` now picks between ``PagerHorizontal`` /
  ``PagerVertical`` based purely on reading direction.
* Drops the ``cacheBook`` carve-out for vertical PDFs in
  ``stores/reader.js`` — PDFs prefetch alongside CBZ now.
* Adds a per-comic "PDF Rendering" radio in the reader settings
  drawer (Auto / Force image / Force vector) wired to a new
  ``clientSettings.pdfRenderMode`` field. Forwarded to the page
  endpoint as ``?format=``.
* ``getComicPageSource`` accepts an optional ``format`` parameter.
  Omitting it preserves the URL shape so HTTP caches don't fragment.

Sequencing
==========

This change depends on two upstream PRs:

* ``comicbox-pdffile`` 0.6 — image-dominant detector + extractors:
  https://github.com/ajslater/pdffile/pull/22
* ``comicbox`` widened pdffile pin (>=0.6,<0.7):
  https://github.com/ajslater/comicbox/pull/131

The ``[tool.uv.sources]`` block in ``pyproject.toml`` temporarily
points both deps at their PR branches so this branch's CI can
resolve. Once both upstreams land on PyPI, drop the sources block
and the explicit ``comicbox-pdffile`` direct dep.

The full design + empirical validation against a 14-PDF private
corpus lives in ``tasks/pdf-image-detection/``.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 codex/views/reader/_archive_cache.py          |  37 ++-
 codex/views/reader/page.py                    | 124 ++++++--
 frontend/src/api/v3/reader.js                 |  18 +-
 .../drawer/reader-settings-controls.vue       |  42 ++-
 .../src/components/reader/pager/page/page.vue |  63 +++-
 .../reader/pager/pager-full-pdf.vue           |  36 ---
 .../src/components/reader/pager/pager.vue     |  30 +-
 frontend/src/stores/reader.js                 |  27 +-
 pyproject.toml                                |  13 +
 tasks/pdf-image-detection/00-plan.md          | 131 ++++++++
 tasks/pdf-image-detection/01-detector.md      | 175 +++++++++++
 .../pdf-image-detection/02-implementation.md  | 297 ++++++++++++++++++
 .../99-prototype-results.md                   | 223 +++++++++++++
 uv.lock                                       |  20 +-
 14 files changed, 1118 insertions(+), 118 deletions(-)
 delete mode 100644 frontend/src/components/reader/pager/pager-full-pdf.vue
 create mode 100644 tasks/pdf-image-detection/00-plan.md
 create mode 100644 tasks/pdf-image-detection/01-detector.md
 create mode 100644 tasks/pdf-image-detection/02-implementation.md
 create mode 100644 tasks/pdf-image-detection/99-prototype-results.md
diff --git a/codex/views/reader/_archive_cache.py b/codex/views/reader/_archive_cache.py
index 95ffffe00..f8cfe3831 100644
--- a/codex/views/reader/_archive_cache.py
+++ b/codex/views/reader/_archive_cache.py
@@ -44,7 +44,7 @@
 import time
 from collections import OrderedDict
 from contextlib import contextmanager
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 
 from comicbox.box import Comicbox
 from loguru import logger
@@ -79,13 +79,27 @@ def _env_bool(name: str, *, default: bool) -> bool:
 class _ArchiveEntry:
     """One cached Comicbox + its per-path lock + last-access timestamp."""
 
-    __slots__ = ("comicbox", "last_access", "lock", "path")
+    __slots__ = (
+        "comicbox",
+        "last_access",
+        "lock",
+        "path",
+        "verdicts",
+    )
 
     def __init__(self, path: str, comicbox: Comicbox, last_access: float) -> None:
         self.path = path
         self.comicbox = comicbox
         self.lock = threading.Lock()
         self.last_access = last_access
+        # Per-page image-serve verdict cache (``pdffile.PageVerdict``
+        # instances keyed on zero-based page index). Memoized here so
+        # repeated ``ReaderPageView`` hits on the same archive don't
+        # re-classify; the underlying ``classify_page`` call is cheap
+        # but the cache makes prev/curr/next prefetch effectively
+        # free. Typed loosely to keep ``pdffile`` out of this module's
+        # import surface.
+        self.verdicts: dict[int, Any] = {}
 
     def close(self) -> None:
         """Close the cached archive; tolerate already-closed state."""
@@ -178,6 +192,25 @@ def open(self, path: str) -> Generator[Comicbox]:
         with entry.lock:
             yield entry.comicbox
 
+    @contextmanager
+    def open_entry(self, path: str) -> Generator[_ArchiveEntry]:
+        """
+        Yield the full ``_ArchiveEntry`` for direct cache-state access.
+
+        Used by callers that need the ``Comicbox`` and the per-page
+        verdict memo (``ReaderPageView``'s image-serve fast path).
+        Same locking shape as ``open()``. When the cache is disabled,
+        synthesizes a transient entry so callers see a uniform API;
+        verdict memoization is a no-op in that mode.
+        """
+        if not self.enabled:
+            with Comicbox(path, config=COMICBOX_CONFIG, logger=logger) as cb:
+                yield _ArchiveEntry(path, cb, time.monotonic())
+            return
+        entry = self._open_or_get(path)
+        with entry.lock:
+            yield entry
+
     def shutdown(self) -> None:
         """Close every cached archive. Wired to ``atexit`` at module load."""
         with self._struct_lock:
diff --git a/codex/views/reader/page.py b/codex/views/reader/page.py
index 10f43a75b..94bbda5d8 100644
--- a/codex/views/reader/page.py
+++ b/codex/views/reader/page.py
@@ -1,26 +1,41 @@
 """Views for reading comic books."""
 
+from __future__ import annotations
+
 import time
+from typing import TYPE_CHECKING, Final
 
 from django.http import HttpResponse
 from drf_spectacular.types import OpenApiTypes
 from drf_spectacular.utils import OpenApiParameter, extend_schema
 from loguru import logger
-from pdffile import PageFormat
+from pdffile import PageMode, PDFFile
 from rest_framework.exceptions import NotFound
 
 from codex.librarian.bookmark.tasks import BookmarkUpdateTask
 from codex.librarian.mp_queue import LIBRARIAN_QUEUE
 from codex.models.choices import FileTypeChoices
 from codex.models.comic import Comic
-from codex.settings import FALSY
 from codex.views.auth import AuthFilterAPIView
 from codex.views.bookmark import BookmarkAuthMixin
 from codex.views.reader._archive_cache import archive_cache, page_acl_cache
 
-_PDF_MIME_TYPE = "application/pdf"
-_PDF_FORMAT_NON_PDF_TYPES = frozenset(
-    {e.value for e in (PageFormat.PIXMAP, PageFormat.IMAGE)}
+if TYPE_CHECKING:
+    from pdffile import PageVerdict
+
+    from codex.views.reader._archive_cache import _ArchiveEntry
+
+_PDF_MIME_TYPE: Final[str] = "application/pdf"
+
+#: Permitted ``?format=`` values. ``auto`` runs the detector; ``pdf``
+#: skips the detector and forces the legacy single-page-PDF path;
+#: ``image`` forces a server-side rasterize (works for any PDF page
+#: but spends more CPU than the detector path on vector-heavy pages).
+_FORMAT_AUTO: Final[str] = "auto"
+_FORMAT_PDF: Final[str] = "pdf"
+_FORMAT_IMAGE: Final[str] = "image"
+_FORMAT_HINTS: Final[frozenset[str]] = frozenset(
+    {_FORMAT_AUTO, _FORMAT_PDF, _FORMAT_IMAGE}
 )
 
 
@@ -76,18 +91,77 @@ def _resolve_path_and_type(self, pk) -> tuple[str, str | None]:
         page_acl_cache.put(cache_key, path, file_type, now)
         return path, file_type
 
-    def _get_page_image(self) -> tuple:
+    def _format_hint(self) -> str:
+        raw = self.request.GET.get("format", _FORMAT_AUTO).lower()
+        return raw if raw in _FORMAT_HINTS else _FORMAT_AUTO
+
+    @staticmethod
+    def _classify_cached(entry: _ArchiveEntry, pdf: PDFFile, page: int) -> PageVerdict:
+        """Memoize ``pdf.classify_page`` on the cache entry."""
+        cached = entry.verdicts.get(page)
+        if cached is not None:
+            return cached
+        verdict = pdf.classify_page(page)
+        entry.verdicts[page] = verdict
+        return verdict
+
+    def _try_pdf_image_serve(
+        self, path: str, page: int, fmt_hint: str
+    ) -> tuple[bytes, str] | None:
+        """
+        Image-serve fast path for PDF pages.
+
+        Returns ``(bytes, content_type)`` when the page can be served
+        as a raw image (detector matched, or caller forced ``image``);
+        ``None`` when the caller should fall back to the legacy
+        single-page-PDF path.
+        """
+        with archive_cache.open_entry(path) as entry:
+            # ``Comicbox._get_archive`` returns the underlying archive
+            # union (zip / rar / 7z / tar / pdf). Caller has gated on
+            # ``file_type == PDF`` so the runtime type is ``PDFFile``;
+            # ``isinstance`` narrows for the type checker. Private
+            # comicbox API for now — swap to a public getter once one
+            # lands upstream.
+            archive = entry.comicbox._get_archive()  # noqa: SLF001
+            if not isinstance(archive, PDFFile):
+                return None
+            pdf = archive
+            page_index = int(page)
+
+            if fmt_hint == _FORMAT_IMAGE:
+                # Always-image override — pixmap fallback for vector pages.
+                blob, ext = pdf.read_full_pixmap_jpeg(page_index)
+                return blob, f"image/{ext}"
+
+            verdict = self._classify_cached(entry, pdf, page_index)
+            if verdict.mode is PageMode.PDF_FALLBACK:
+                return None
+            served = pdf.read_image_if_dominant(page_index)
+            if served is None:
+                # Detection said dominant but extraction failed; fall
+                # through to PDF rather than serve a broken response.
+                return None
+            blob, ext = served
+            return blob, f"image/{ext}"
+
+    def _get_page_image(self) -> tuple[bytes, str]:
         """Get the image data and content type."""
         pk = self.kwargs.get("pk")
         path, file_type = self._resolve_path_and_type(pk)
 
-        # page_image
         page = self.kwargs.get("page")
-        pdf_format = (
-            PageFormat.PIXMAP.value
-            if self.request.GET.get("pixmap", "").lower() not in FALSY
-            else ""
-        )
+        fmt_hint = self._format_hint()
+
+        is_pdf = file_type == FileTypeChoices.PDF.value  # pyright: ignore[reportAttributeAccessIssue]  # ty: ignore[unresolved-attribute]
+
+        # Image-dominant fast path for PDFs (skipped when the caller
+        # forces ``?format=pdf``).
+        if is_pdf and fmt_hint != _FORMAT_PDF:
+            served = self._try_pdf_image_serve(path, page, fmt_hint)
+            if served is not None:
+                return served
+
         # Process-wide LRU of open Comicbox archives — the web reader's
         # prev/curr/next prefetch fires 3-5 page hits on the same archive
         # within a second, and ``cacheBook`` mode bursts a whole-book
@@ -96,28 +170,32 @@ def _get_page_image(self) -> tuple:
         # held inside ``archive_cache.open(...)`` serializes extraction
         # because ZipFile / RarFile / PDF backends aren't thread-safe.
         with archive_cache.open(path) as cb:
-            page_image = cb.get_page_by_index(page, pdf_format=pdf_format)
+            page_image = cb.get_page_by_index(page, pdf_format="")
         if not page_image:
             page_image = b""
 
-        # content type
-        if (
-            file_type == FileTypeChoices.PDF.value  # pyright: ignore[reportAttributeAccessIssue], # ty: ignore[unresolved-attribute]
-            and pdf_format not in _PDF_FORMAT_NON_PDF_TYPES
-        ):
-            content_type = _PDF_MIME_TYPE
-        else:
-            content_type = self.content_type
-
+        content_type = _PDF_MIME_TYPE if is_pdf else self.content_type
         return page_image, content_type
 
     @extend_schema(
         parameters=[
             OpenApiParameter("bookmark", OpenApiTypes.BOOL, default=True),
-            OpenApiParameter("pixmap", OpenApiTypes.BOOL, default=False),
+            OpenApiParameter(
+                "format",
+                OpenApiTypes.STR,
+                default=_FORMAT_AUTO,
+                enum=sorted(_FORMAT_HINTS),
+                description=(
+                    "PDF rendering hint: 'auto' (detector), "
+                    "'pdf' (legacy single-page PDF), "
+                    "'image' (always rasterize). Ignored for non-PDF archives."
+                ),
+            ),
         ],
         responses={
             (200, content_type): OpenApiTypes.BINARY,
+            (200, "image/png"): OpenApiTypes.BINARY,
+            (200, "image/webp"): OpenApiTypes.BINARY,
             (200, _PDF_MIME_TYPE): OpenApiTypes.BINARY,
         },
     )
diff --git a/frontend/src/api/v3/reader.js b/frontend/src/api/v3/reader.js
index 1e7e79b4b..e6d7f0ac8 100644
--- a/frontend/src/api/v3/reader.js
+++ b/frontend/src/api/v3/reader.js
@@ -25,9 +25,19 @@ export const getReaderInfo = (pk, data, ts, options = {}) => {
 const _getReaderAPIPath = (pk) =>
   globalThis.CODEX.API_V3_PATH + _getBookPath(pk);
 
-export const getComicPageSource = ({ pk, page, mtime }) => {
+export const getComicPageSource = ({ pk, page, mtime, format }) => {
+  // ``format`` is the optional PDF rendering hint forwarded to the
+  // backend ``ReaderPageView``: ``auto`` (detector decides),
+  // ``image`` (always rasterize), or ``pdf`` (skip the detector and
+  // serve a single-page PDF blob). Ignored by the backend for
+  // non-PDF archives. Omitting the param keeps the URL identical to
+  // the legacy shape so HTTP caches don't fragment.
   const bookAPIPath = _getReaderAPIPath(pk);
-  return `${bookAPIPath}/${page}/page.jpg?ts=${mtime}`;
+  let url = `${bookAPIPath}/${page}/page.jpg?ts=${mtime}`;
+  if (format && format !== "auto") {
+    url += `&format=${format}`;
+  }
+  return url;
 };
 
 export const getComicDownloadURL = ({ pk }, fn, ts) => {
@@ -45,8 +55,8 @@ export const getDownloadPageURL = ({ pk, page, mtime }) => {
 };
 
 export const getPDFInBrowserURL = ({ pk, mtime }) => {
-  // Consumed by ``<embed src=...>``, not ``HTTP.get`` — needs an
-  // absolute path so the browser doesn't resolve it relative to the
+  // Consumed by the "Read in Tab" link (`<a target="_blank">`) — needs
+  // an absolute path so the browser doesn't resolve it relative to the
   // current SPA route.
   const bookPath = _getBookPath(pk);
   return `/${bookPath}/book.pdf?ts=${mtime}`;
diff --git a/frontend/src/components/reader/drawer/reader-settings-controls.vue b/frontend/src/components/reader/drawer/reader-settings-controls.vue
index 3236bd048..b6bd75b49 100644
--- a/frontend/src/components/reader/drawer/reader-settings-controls.vue
+++ b/frontend/src/components/reader/drawer/reader-settings-controls.vue
@@ -89,7 +89,6 @@
     :model-value="settings.cacheBook"
     class="scopedCheckbox"
     density="compact"
-    :disabled="disableCacheBook"
     label="Cache Entire Book"
     hide-details="auto"
     :true-value="true"
@@ -98,6 +97,25 @@
     "
     @update:model-value="$emit('update', { cacheBook: $event })"
   />
+  <v-radio-group
+    v-if="isPDF"
+    v-tooltip="{
+      openDelay,
+      text:
+        'Auto: server decides per page. Image: always rasterize. ' +
+        'Vector: always render through pdf.js.',
+    }"
+    class="displayRadioGroup"
+    density="compact"
+    label="PDF Rendering"
+    hide-details="auto"
+    :model-value="pdfRenderMode"
+    @update:model-value="setPdfRenderMode"
+  >
+    <v-radio label="Automatic" value="auto" />
+    <v-radio label="Force image" value="image" />
+    <v-radio label="Force vector" value="pdf" />
+  </v-radio-group>
 
   <v-btn
     v-if="showClear"
@@ -114,7 +132,7 @@
 </template>
 
 <script>
-import { mapState } from "pinia";
+import { mapActions, mapState } from "pinia";
 
 import { useReaderStore } from "@/stores/reader";
 
@@ -141,9 +159,10 @@ export default {
     };
   },
   computed: {
-    ...mapState(useReaderStore, ["isVertical", "isPDF", "cacheBook"]),
+    ...mapState(useReaderStore, ["isVertical", "isPDF"]),
     ...mapState(useReaderStore, {
       choices: (state) => state.choices,
+      pdfRenderMode: (state) => state.clientSettings?.pdfRenderMode || "auto",
     }),
     fitToChoices() {
       return this.choicesWithoutNull("fitTo");
@@ -152,16 +171,20 @@ export default {
       return this.choicesWithoutNull("readingDirection");
     },
     disableTwoPages() {
-      return this.isVertical || (this.isPDF && this.cacheBook);
+      /*
+       * Two-page rendering is meaningful in horizontal mode only.
+       * PDFs handle two-page correctly through both ``<img>`` and
+       * ``<PDFDoc>`` paths now, so the old PDF carve-out is gone.
+       */
+      return this.isVertical;
     },
     disablePageTransition() {
-      return this.isVertical && this.isPDF;
-    },
-    disableCacheBook() {
-      return this.isVertical && this.isPDF;
+      // Page-turn animations only make sense in horizontal mode.
+      return this.isVertical;
     },
   },
   methods: {
+    ...mapActions(useReaderStore, ["setSettingsClient"]),
     choicesWithoutNull(attr) {
       const choices = [];
       for (const choice of Reflect.get(this.choices, attr)) {
@@ -171,6 +194,9 @@ export default {
       }
       return Object.freeze(choices);
     },
+    setPdfRenderMode(value) {
+      this.setSettingsClient({ pdfRenderMode: value });
+    },
   },
 };
 </script>
diff --git a/frontend/src/components/reader/pager/page/page.vue b/frontend/src/components/reader/pager/page/page.vue
index 4caf4d312..538b09eaf 100644
--- a/frontend/src/components/reader/pager/page/page.vue
+++ b/frontend/src/components/reader/pager/page/page.vue
@@ -74,16 +74,27 @@ export default {
       loaded: false,
       error: "",
       ts: 0,
+      /*
+       * ``true`` once we've seen an ``<img>`` load fail on a PDF
+       * book — the response was ``application/pdf`` (the detector
+       * declined to serve as image), so we re-mount the page through
+       * ``<PDFDoc>`` against the same URL with ``?format=pdf``.
+       */
+      pdfFallback: false,
     };
   },
   computed: {
     ...mapState(useReaderStore, {
       scale: (state) => state.clientSettings?.scale,
+      pdfRenderMode: (state) => state.clientSettings?.pdfRenderMode || "auto",
     }),
+    isPDF() {
+      return this.book.fileType === "PDF";
+    },
     style() {
       // Magic for transform: scale() not sizing elements right.
       const s = {};
-      if (this.book.fileType === "PDF" || this.scale == 1) {
+      if (this.usingPDFDoc || this.scale == 1) {
         return s;
       }
       const img = this.$refs.pageComponent?.$el;
@@ -100,11 +111,34 @@ export default {
         pk: this.book.pk,
         page: this.page,
         mtime,
+        format: this.activeFormat,
       };
       return getComicPageSource(params);
     },
+    activeFormat() {
+      /*
+       * For non-PDF books the format param is ignored by the
+       * backend; we still pass it through so the URL is stable.
+       */
+      if (!this.isPDF) {
+        return "auto";
+      }
+      if (this.pdfFallback) {
+        // Image attempt failed → re-fetch with PDF response.
+        return "pdf";
+      }
+      return this.pdfRenderMode;
+    },
+    usingPDFDoc() {
+      /*
+       * Render through ``<PDFDoc>`` when:
+       *   • the user explicitly forced PDF rendering, or
+       *   • the ``<img>`` first attempt failed for a PDF book.
+       */
+      return this.isPDF && (this.pdfRenderMode === "pdf" || this.pdfFallback);
+    },
     component() {
-      return this.book.fileType === "PDF" ? PDFDoc : ImgPage;
+      return this.usingPDFDoc ? PDFDoc : ImgPage;
     },
     bookSettings() {
       return this.getBookSettings(this.book);
@@ -113,6 +147,17 @@ export default {
       return this.bookSettings?.twoPages ?? false;
     },
   },
+  watch: {
+    /*
+     * If the user toggles render mode mid-read, drop the fallback flag
+     * so the new mode takes effect on the next mount.
+     */
+    pdfRenderMode() {
+      this.pdfFallback = false;
+      this.error = "";
+      this.loaded = false;
+    },
+  },
   mounted() {
     /*
      * Show the spinner only if the image is still loading after
@@ -148,6 +193,18 @@ export default {
       this.error = false;
     },
     onError() {
+      /*
+       * For PDF books on the first ``<img>`` attempt, the failure
+       * means the backend sent ``application/pdf`` — the detector
+       * declined to serve as image. Swap to ``<PDFDoc>`` against
+       * the same URL and let it render via vue-pdf-embed. For all
+       * other failures, surface the error.
+       */
+      if (this.isPDF && !this.pdfFallback && !this.usingPDFDoc) {
+        this.pdfFallback = true;
+        this.loaded = false;
+        return;
+      }
       this.error = "load";
       this.showProgress = false;
     },
@@ -157,6 +214,8 @@ export default {
     },
     onRetry() {
       this.ts = Date.now();
+      this.pdfFallback = false;
+      this.error = "";
     },
   },
 };
diff --git a/frontend/src/components/reader/pager/pager-full-pdf.vue b/frontend/src/components/reader/pager/pager-full-pdf.vue
deleted file mode 100644
index c258c4e1b..000000000
--- a/frontend/src/components/reader/pager/pager-full-pdf.vue
+++ /dev/null
@@ -1,36 +0,0 @@
-<template>
-  <span>
-    <PageChangeLink direction="prev" />
-    <PageChangeLink direction="next" />
-    <ScaleForScroll>
-      <PDFDoc :book="book" :page="page" :src="src" />
-    </ScaleForScroll>
-  </span>
-</template>
-
-<script>
-import { mapState } from "pinia";
-
-import { getPDFInBrowserURL } from "@/api/v3/reader";
-import PageChangeLink from "@/components/reader/pager/page-change-link.vue";
-import PDFDoc from "@/components/reader/pager/pdf-doc.vue";
-import ScaleForScroll from "@/components/reader/pager/scale-for-scroll.vue";
-import { useReaderStore } from "@/stores/reader";
-
-export default {
-  name: "PagerFullPDF",
-  components: { PageChangeLink, PDFDoc, ScaleForScroll },
-  props: {
-    book: { type: Object, required: true },
-  },
-  emits: ["load", "error", "unauthorized"],
-  computed: {
-    ...mapState(useReaderStore, {
-      page: (state) => state.page || 0,
-    }),
-    src() {
-      return getPDFInBrowserURL(this.book);
-    },
-  },
-};
-</script>
diff --git a/frontend/src/components/reader/pager/pager.vue b/frontend/src/components/reader/pager/pager.vue
index 341b3291e..5fd4cd360 100644
--- a/frontend/src/components/reader/pager/pager.vue
+++ b/frontend/src/components/reader/pager/pager.vue
@@ -12,14 +12,8 @@
 
 <script>
 import { mapActions, mapState } from "pinia";
-import { defineAsyncComponent, markRaw } from "vue";
 
 import PagerHorizontal from "@/components/reader/pager/pager-horizontal.vue";
-const PagerPDF = markRaw(
-  defineAsyncComponent(
-    () => import("@/components/reader/pager/pager-full-pdf.vue"),
-  ),
-);
 import PagerVertical from "@/components/reader/pager/pager-vertical.vue";
 import { useReaderStore } from "@/stores/reader";
 
@@ -27,7 +21,6 @@ export default {
   name: "PagerSelector",
   components: {
     PagerHorizontal,
-    PagerPDF,
     PagerVertical,
   },
   props: {
@@ -38,7 +31,6 @@ export default {
     return this.prefetchBook(this.book);
   },
   computed: {
-    ...mapState(useReaderStore, ["cacheBook"]),
     ...mapState(useReaderStore, {
       storePk: (state) => state.books?.current?.pk || 0,
     }),
@@ -48,23 +40,13 @@ export default {
     bookSettings() {
       return this.getBookSettings(this.book);
     },
-    readerFullPdf() {
-      return (
-        this.book?.fileType == "PDF" &&
-        this.cacheBook &&
-        !this.bookSettings.isVertical
-      );
-    },
     component() {
-      let comp;
-      if (this.readerFullPdf) {
-        comp = PagerPDF;
-      } else if (this.bookSettings.isVertical) {
-        comp = PagerVertical;
-      } else {
-        comp = PagerHorizontal;
-      }
-      return comp;
+      /*
+       * PDF books now render page-by-page (image-first with
+       * PDFDoc fallback handled inside ``BookPage``), so the
+       * orientation alone decides the pager.
+       */
+      return this.bookSettings.isVertical ? PagerVertical : PagerHorizontal;
     },
   },
   watch: {
diff --git a/frontend/src/stores/reader.js b/frontend/src/stores/reader.js
index d6c2f3b93..3920cf3b9 100644
--- a/frontend/src/stores/reader.js
+++ b/frontend/src/stores/reader.js
@@ -133,6 +133,13 @@ export const useReaderStore = defineStore("reader", {
     reactWithScroll: false,
     clientSettings: {
       scale: SCALE_DEFAULT,
+      // Per-session PDF rendering preference. ``auto`` runs the
+      // server-side detector (most comic PDFs serve as ``<img>``);
+      // ``image`` forces a server-side rasterize for any page;
+      // ``pdf`` skips the detector and routes through vue-pdf-embed
+      // on the client. Persisted client-side only — not in the
+      // server-side reader settings yet (spike scope).
+      pdfRenderMode: "auto",
     },
     showToolbars: false,
     settingsLoaded: false,
@@ -171,10 +178,10 @@ export const useReaderStore = defineStore("reader", {
       return state.books?.current?.fileType == "PDF";
     },
     cacheBook() {
-      return (
-        this.activeSettings.cacheBook &&
-        !(this.isPDF && this.activeSettings.isVertical)
-      );
+      // PDFs now render page-by-page (image-first with PDFDoc
+      // fallback) so the old "vertical PDF can't be cached" carve
+      // out is no longer needed.
+      return this.activeSettings.cacheBook;
     },
     isPagesNotRoutes(state) {
       return state.activeSettings.isVertical || this.cacheBook;
@@ -872,7 +879,12 @@ export const useReaderStore = defineStore("reader", {
       if (page > book.maxPage) {
         return false;
       }
-      const paramsPlus = { pk: params.pk, page, mtime: book.mtime };
+      const paramsPlus = {
+        pk: params.pk,
+        page,
+        mtime: book.mtime,
+        format: this.clientSettings?.pdfRenderMode,
+      };
       return READER_API.getComicPageSource(paramsPlus);
     },
     prefetchLinks(params, direction, bookChange = false) {
@@ -892,7 +904,10 @@ export const useReaderStore = defineStore("reader", {
       return { link };
     },
     prefetchBook(book) {
-      if (!this.cacheBook || book.fileType == "PDF") {
+      // PDF books now prefetch alongside CBZ — image-first responses
+      // are cacheable as plain images, fallback responses cache as
+      // PDF blobs that the next mount fetches via vue-pdf-embed.
+      if (!this.cacheBook) {
         return {};
       }
       const pk = book.pk;
diff --git a/pyproject.toml b/pyproject.toml
index ca4df0215..63233fe70 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,11 @@ dependencies = [
   "bidict~=0.23",
   "channels~=4.2",
   "comicbox[pdf]~=3.0.0",
+  # Direct dep so the in-flight ``tool.uv.sources`` override below can
+  # point at the unreleased ``comicbox-pdffile`` 0.6 PR. Drop this
+  # line once 0.6.x lands on PyPI and the comicbox[pdf] extra picks
+  # it up transitively.
+  "comicbox-pdffile",
   "dateparser~=1.2",
   "django-cachalot~=2.8",
   "django-cors-headers~=4.0",
@@ -107,6 +112,14 @@ ci = []
 requires = ["uv_build~=0.11.2"]
 build-backend = "uv_build"
 
+# Track the in-flight upstream PRs that this feature depends on.
+# Drop this block once both upstreams publish to PyPI:
+#   * comicbox-pdffile 0.6.x — https://github.com/ajslater/pdffile/pull/22
+#   * comicbox 3.0.x widened pin — https://github.com/ajslater/comicbox/pull/131
+[tool.uv.sources]
+comicbox-pdffile = { git = "https://github.com/ajslater/pdffile.git", branch = "pdf-image-detection" }
+comicbox = { git = "https://github.com/ajslater/comicbox.git", branch = "widen-pdffile-pin" }
+
 [tool.uv.build-backend]
 module-root = ""
 source-include = [
diff --git a/tasks/pdf-image-detection/00-plan.md b/tasks/pdf-image-detection/00-plan.md
new file mode 100644
index 000000000..a430d09fe
--- /dev/null
+++ b/tasks/pdf-image-detection/00-plan.md
@@ -0,0 +1,131 @@
+# PDF Reader: Image-Dominant Page Detection
+
+## Goal
+
+Most "comic PDFs" are scanned-image wrappers — one JPEG/PNG per page with
+no real vector content. Today Codex routes every PDF page through the same
+client-side `vue-pdf-embed` rendering pipeline, even when a `<img>` tag
+would suffice. This plan adds a server-side detector that decides per page
+whether the embedded image alone is enough, and serves that image
+directly when it is. Pages that fail the detector keep the current
+single-page-PDF + `vue-pdf-embed` path.
+
+The end state: comic-style PDFs render through Codex's existing image
+reader (same path as CBZ/CBR), and "real" multi-element PDFs keep
+vector-quality rendering. The user can override per-page when the
+detector misjudges.
+
+## Why this is worth doing
+
+1. **Most comic PDFs are image containers.** Empirically validated against
+   a 14-PDF / ~100-page test corpus (see [99-prototype-results.md](99-prototype-results.md)):
+   - `Watchmen Comic Original Script.pdf`: 6/6 pages image-dominant.
+   - `Amphigorey.pdf`: comic pages match; title pages correctly fall through.
+   - `Nolo Press 8 Ways to Avoid Probate`: scanned cover (pages 0–1)
+     match; vector text body correctly falls through. **Within-document
+     granularity works.**
+   - `Nolo Press Working for Yourself`: 0/12 — pure vector text PDF.
+     Correctly rejected.
+2. **Browser-native rendering for matched pages.** No `<canvas>` /
+   pdf.js worker / CSS sizing hacks. The `BookPage` component already
+   renders `<img>` for CBZ/CBR; matched PDF pages join that path.
+3. **The detector is essentially free.** p50 ≈ 1–5 ms per page,
+   p99 ≈ 12 ms even on a 1 MB scanned page with an OCR text overlay.
+   Tiny next to the existing extraction cost.
+4. **No bandwidth regression.** Direct image bytes are roughly
+   the same size as the corresponding single-page PDF (PDF wraps the
+   same JPEG); often slightly smaller (no PDF object overhead).
+5. **Side benefit for downloads.** The original "download original PDF"
+   feature is unaffected.
+
+## Decision summary
+
+Hybrid path with three serving modes, decided per page:
+
+| Verdict | Server output | Frontend | When |
+|---|---|---|---|
+| `IMAGE_DIRECT` | embedded JPEG/PNG bytes, content-type `image/{ext}` | `<img>` (existing `ImgPage`) | single image, ≥85% coverage, ≤50 visible chars, no vector ink, RGB/Gray colorspace |
+| `IMAGE_TRANSCODE` | `Pixmap`-rendered RGB JPEG | `<img>` | as above but CMYK / non-browser-native source format (JBIG2, JPEG2000, CCITT) |
+| `PDF_FALLBACK` | single-page PDF, content-type `application/pdf` | `vue-pdf-embed` (existing `PDFDoc`) | everything else |
+
+Frontend chooses between `ImgPage` and `PDFDoc` from the response
+content-type, not the comic's `fileType`. Override hook: `?format=pdf`
+forces fallback even when the detector accepts.
+
+## Things this plan does NOT change
+
+- `comicbox` / `pdffile` stay the integration boundary. PyMuPDF stays
+  out of `codex/` direct imports; the new detector is added to
+  `pdffile` upstream.
+- `archive_cache` and the per-archive `threading.Lock` model stay
+  unchanged.
+- Cover generation stays on its current path (already pixmap-based).
+- Bookmark, ACL, prev/next prefetch — unchanged.
+- The full-document `book.pdf` route stays as a download endpoint.
+- The full-PDF reader mode (`PagerFullPDF`) is **removed** — see [02-implementation.md](02-implementation.md).
+  It bypasses the detector and is the most fragile branch of the
+  current code.
+
+## What's new in this plan vs. the architectural report
+
+The earlier report [scoped two options](../../README.md): keep current
+(Option 1) or rasterize-everything (Option 3). This plan is the third
+shape: **Option 3 when cheap, Option 1 when not.** It captures the
+performance win without the quality regression on real vector PDFs.
+
+The detector is the part that needed empirical validation. It now has
+it (see prototype results); the rest of the plan is straightforward
+glue.
+
+## Subplan index
+
+- [01-detector.md](01-detector.md) — detection logic, thresholds,
+  classify-and-extract code.
+- [02-implementation.md](02-implementation.md) — file-by-file changes
+  in `pdffile`, `comicbox`, `codex/views/reader/page.py`, and the
+  Vue reader.
+- [99-prototype-results.md](99-prototype-results.md) — measurements
+  from the test corpus, with edge cases identified.
+
+## Out-of-scope follow-ups
+
+- **DPR / hi-DPI image rendering for `IMAGE_TRANSCODE`.** The Pixmap
+  fallback uses native page DPI (~72 DPI). For high-DPR displays
+  reading vector PDFs, this would be soft. Realistic comic PDFs are
+  scanned at ~150 DPI which is fine; revisit if anyone reports it.
+- **Multi-image composition.** Pages with several panels stored as
+  separate images currently fall through to `PDF_FALLBACK`. We could
+  composite via Pixmap, but the fallback already handles them well.
+- **OCR text overlay handling.** The existing `?hide_text=1` flag still
+  applies; pages with visible OCR text fall through to `PDF_FALLBACK`
+  where `hide_text` works as today.
+- **Per-comic flag at import time.** Skipped intentionally — per-page
+  detection is cheap and gives strictly better granularity. Revisit
+  only if the page endpoint hot path proves bottlenecked on detection
+  (telemetry says it won't).
+
+## Risk register
+
+| Risk | Likelihood | Impact | Mitigation |
+|---|---|---|---|
+| Detector false-positive renders unreadable image | Low | Medium | `?format=pdf` override; UI toggle in reader settings |
+| CMYK transcode produces wrong colors | Low | Medium | Pixmap → DeviceRGB conversion verified in prototype against PIL decode |
+| Multi-image page misclassified as single-image-dominant | Medium | Low | Detector counts all images; threshold `n_images <= 1` for IMAGE_DIRECT |
+| Rotation handling breaks layout | Low (no rotated PDFs in test corpus) | Medium | If `page.rotation != 0`, downgrade to `IMAGE_TRANSCODE` (Pixmap respects rotation) |
+| pdffile upstream change blocks codex release | Low | Medium | Implement detector in codex initially as a private helper that imports pymupdf directly; upstream as a follow-up PR |
+
+## Acceptance criteria
+
+1. The Watchmen scripts and Amphigorey comic pages render via `<img>`
+   without `vue-pdf-embed` invoked.
+2. The Nolo Press text PDFs render via `vue-pdf-embed` exactly as
+   today.
+3. Page-turn latency for image-dominant PDFs improves measurably (no
+   pdf.js render on the client; image cache hit on the second pass).
+4. Network panel shows `image/jpeg` (or `image/png`) for matched
+   pages and `application/pdf` for fallback pages — no regression for
+   either.
+5. `?format=pdf` query parameter forces fallback for any PDF page.
+6. Existing `?hide_text=1` continues to work for `PDF_FALLBACK` pages.
+7. No new dependency added to `codex/`'s pyproject (pymupdf access
+   remains via `comicbox[pdf]`).
diff --git a/tasks/pdf-image-detection/01-detector.md b/tasks/pdf-image-detection/01-detector.md
new file mode 100644
index 000000000..a154aae99
--- /dev/null
+++ b/tasks/pdf-image-detection/01-detector.md
@@ -0,0 +1,175 @@
+# Detector: Image-Dominant Page Classification
+
+## Inputs
+
+`(doc: pymupdf.Document, index: int)` — already-open PyMuPDF document
+and zero-based page index. The caller (`codex/views/reader/page.py`)
+already has the document open via `archive_cache`.
+
+## Output
+
+```python
+@dataclass(frozen=True, slots=True)
+class PageVerdict:
+    mode: Literal["IMAGE_DIRECT", "IMAGE_TRANSCODE", "PDF_FALLBACK"]
+    image_xref: int | None       # which embedded image to extract, if any
+    ext: str | None              # original encoding ("jpeg", "png", …)
+    transcode_to: str | None     # "jpeg" if we must re-encode
+```
+
+## Detection algorithm
+
+Three checks, all using PyMuPDF APIs that operate on parsed PDF
+metadata (no rendering):
+
+```python
+def classify_page(doc, index) -> PageVerdict:
+    page = doc.load_page(index)
+    images = page.get_images(full=True)
+
+    # Reject early on multi-image or zero-image pages.
+    if len(images) != 1:
+        return PageVerdict("PDF_FALLBACK", None, None, None)
+
+    # Coverage: image bbox vs. page rect, clamped to 1.0.
+    page_area = page.rect.width * page.rect.height
+    bbox = page.get_image_bbox(images[0])
+    coverage = (
+        min((bbox.width * bbox.height) / page_area, 1.0)
+        if page_area and not bbox.is_empty
+        else 0.0
+    )
+    if coverage < MIN_COVERAGE:
+        return PageVerdict("PDF_FALLBACK", None, None, None)
+
+    # Visible-text gate: lots of visible text means the page has
+    # vector-rendered content beyond the image.
+    text_len = len(page.get_text("text").strip())
+    if text_len > MAX_TEXT_CHARS:
+        return PageVerdict("PDF_FALLBACK", None, None, None)
+
+    # Vector-ink gate: any non-image drawings disqualify.
+    if page.get_drawings():
+        return PageVerdict("PDF_FALLBACK", None, None, None)
+
+    # Rotation gate: skip extract_image when page is rotated; pixmap
+    # path applies the rotation correctly.
+    if page.rotation != 0:
+        xref = images[0][0]
+        return PageVerdict("IMAGE_TRANSCODE", xref, None, "jpeg")
+
+    # All gates passed — decide direct vs. transcode based on the
+    # embedded image's actual encoding and colorspace.
+    xref = images[0][0]
+    image_dict = doc.extract_image(xref)
+    ext = (image_dict.get("ext") or "").lower()
+    cs = image_dict.get("colorspace", 0)  # PyMuPDF reports n-channels
+
+    if ext in BROWSER_NATIVE_EXTS and cs in BROWSER_NATIVE_COLORSPACES:
+        return PageVerdict("IMAGE_DIRECT", xref, ext, None)
+    return PageVerdict("IMAGE_TRANSCODE", xref, ext, "jpeg")
+```
+
+## Thresholds (initial values, validated against test corpus)
+
+| Constant | Value | Rationale |
+|---|---|---|
+| `MIN_COVERAGE` | `0.85` | Tight enough to reject pages with surrounding margins (Amphigorey's title pages at 41–60% fall through). Loose enough that scanned pages with fullbleed bleed-rect imprecision still match (Watchmen: 100%, Amphigorey comic pages: 92–95%). |
+| `MAX_TEXT_CHARS` | `50` | OCR scraps (page numbers, edge text) are typically <50 chars. Real text content (`#thaistory.pdf`: 357–3185 chars) is far above this. |
+| `MAX_IMAGES` | `1` | Multi-image pages are usually panel-split scans; `extract_image` would lose all but the first. Pixmap fallback or PDF fallback handles them correctly. |
+| `BROWSER_NATIVE_EXTS` | `{"jpeg", "jpg", "png", "webp"}` | Universal browser support. Excludes JBIG2 (none), JPEG2000 (Safari only), CCITT/TIFF (none). |
+| `BROWSER_NATIVE_COLORSPACES` | `{1, 3}` | PyMuPDF's `colorspace` field is the n-channel count: 1 = Gray, 3 = RGB, 4 = CMYK. Browsers don't render CMYK JPEGs reliably. |
+
+These are constants in code. **Do not** expose as env vars in the
+first cut — telemetry from production use should drive any
+tuning; ad-hoc env knobs make that signal noisier.
+
+## Extraction
+
+`IMAGE_DIRECT`:
+
+```python
+image_dict = doc.extract_image(xref)
+return image_dict["image"], f"image/{image_dict['ext']}"
+```
+
+`IMAGE_TRANSCODE`:
+
+```python
+pix = pymupdf.Pixmap(doc, xref)
+if pix.colorspace and pix.colorspace.n not in (1, 3):
+    pix = pymupdf.Pixmap(pymupdf.csRGB, pix)  # CMYK → RGB
+blob = pix.tobytes("jpeg")
+return blob, "image/jpeg"
+```
+
+Note: `Pixmap(doc, xref)` reads the embedded image — it does *not*
+re-render the page. That's the cheap path. We only run
+`page.get_pixmap()` (full page render) if `extract_image` itself
+fails, which is rare.
+
+## Performance budget
+
+Measured against test corpus (Apple Silicon, PyMuPDF 1.x via
+`pdffile` 0.5+):
+
+| Operation | p50 | p99 |
+|---|---|---|
+| `classify_page` cold | 0.7 ms | 12 ms |
+| `IMAGE_DIRECT` extract | 0.1 ms | 0.3 ms |
+| `IMAGE_TRANSCODE` (CMYK 2400×3000) | 20 ms | — |
+| `PDF_FALLBACK` (`convert_to_pdf`) | 5 ms | 20 ms |
+
+Reference: `get_page_pixmap` (full-page render, used by cover thread)
+is 50–300 ms on the same documents. The detector's worst case
+(12 ms) is still less than the cheapest current rendering path.
+
+## Caching the verdict
+
+Add a `verdicts: dict[int, PageVerdict]` to each
+`_ArchiveEntry` in [_archive_cache.py](../../codex/views/reader/_archive_cache.py).
+Reset on archive close. No persistent cache — re-detecting on a cold
+archive costs ~1 ms per page, paid once per LRU cycle.
+
+```python
+class _ArchiveEntry:
+    __slots__ = ("comicbox", "last_access", "lock", "path", "verdicts")
+
+    def __init__(self, path, comicbox, last_access):
+        # … existing init …
+        self.verdicts: dict[int, PageVerdict] = {}
+```
+
+The lock that already serializes extraction (per-archive
+`threading.Lock`) covers the verdict cache too — no new
+synchronization.
+
+## Override semantics
+
+Query parameter `?format=` on the page endpoint:
+
+| Value | Behaviour |
+|---|---|
+| `auto` (default) | Run detector, use verdict |
+| `pdf` | Skip detector, force `PDF_FALLBACK` |
+| `image` | Skip detector, force `IMAGE_TRANSCODE` (always works, slower) |
+
+`?hide_text=1` continues to apply only to `PDF_FALLBACK`. When the
+detector matches `IMAGE_DIRECT`/`IMAGE_TRANSCODE`, the bytes are the
+embedded raster; there's no "text layer" to hide.
+
+## What the detector does NOT do
+
+- **Render-mode-3 (invisible) text filtering.** The test corpus has
+  no invisible-text PDFs (every OCR'd doc here uses mode 0 with
+  `HiddenHorzOCR` font). Adding mode-3 awareness is a future
+  refinement; for now visible OCR text correctly disqualifies a
+  page.
+- **Sample-based per-comic precomputation.** Per-page is fast
+  enough that batch sampling at import time isn't worth the cache
+  invalidation complexity.
+- **Embedded annotation / form / link detection.** Pages with form
+  fields or annotations should fall through to PDF rendering, but
+  the existing `get_drawings()` check catches most of these
+  (annotations are drawn as vector ink). If a regression appears,
+  add a `len(page.annots()) == 0` gate.
diff --git a/tasks/pdf-image-detection/02-implementation.md b/tasks/pdf-image-detection/02-implementation.md
new file mode 100644
index 000000000..3acbe5d94
--- /dev/null
+++ b/tasks/pdf-image-detection/02-implementation.md
@@ -0,0 +1,297 @@
+# Implementation: file-by-file changes
+
+Three layers of change, ordered by dependency.
+
+## Layer 1: `comicbox-pdffile` (upstream, separate PR)
+
+### Add a new `PageFormat` value and a classifier method
+
+[`pdffile/__init__.py`](../../.venv/lib/python3.14/site-packages/pdffile/__init__.py)
+
+```python
+class PageFormat(Enum):
+    PDF = "pdf"
+    IMAGE = "image"
+    PIXMAP = "pixmap"
+    IMAGE_IF_DOMINANT = "image_if_dominant"  # NEW
+```
+
+Add a public classifier on `PDFFile`:
+
+```python
+def classify_page(self, index: int) -> PageVerdict:
+    """Return verdict for whether page can be served as a raw image."""
+```
+
+…and route `read()` for the new format mode through the classifier
+plus extraction. Returns `None` (or raises a sentinel) when the page
+isn't image-dominant, so the caller falls back to PDF format.
+
+### Why upstream
+
+The classifier needs PyMuPDF APIs (`get_image_bbox`, `extract_image`,
+`Pixmap`) that `pdffile` already imports. Putting it in `pdffile`
+keeps codex free of a direct PyMuPDF dependency — same separation
+that exists today.
+
+### Compatibility
+
+`comicbox~=3.0.0` already passes through `pdf_format` and `hide_text`
+kwargs unchanged. The new format value rides the same plumbing. Bump
+the `pdffile` minor version (0.5 → 0.6); pin codex's
+`comicbox[pdf]` to the new minimum.
+
+## Layer 2: codex backend
+
+### `codex/views/reader/_archive_cache.py`
+
+Cache the verdict on the archive entry:
+
+```python
+class _ArchiveEntry:
+    __slots__ = ("comicbox", "last_access", "lock", "path", "verdicts")
+
+    def __init__(self, path: str, comicbox: Comicbox, last_access: float) -> None:
+        # … existing fields …
+        self.verdicts: dict[int, object] = {}
+```
+
+No other changes to the cache; the existing per-archive lock
+serializes verdict reads/writes.
+
+### `codex/views/reader/page.py`
+
+Replace [`_get_page_image`](../../codex/views/reader/page.py:79-127)
+with verdict-driven dispatch:
+
+```python
+_PDF_MIME = "application/pdf"
+_FORMAT_HINTS = frozenset({"auto", "pdf", "image"})
+
+
+def _get_page_image(self) -> tuple[bytes, str]:
+    pk = self.kwargs.get("pk")
+    path, file_type = self._resolve_path_and_type(pk)
+    page = self.kwargs.get("page")
+
+    fmt_hint = self.request.GET.get("format", "auto").lower()
+    if fmt_hint not in _FORMAT_HINTS:
+        fmt_hint = "auto"
+    hide_text = self.request.GET.get("hide_text", "").lower() not in FALSY
+
+    is_pdf = file_type == FileTypeChoices.PDF.value
+
+    with archive_cache.open(path) as cb:
+        # Fast path: image-dominant detection for PDFs only.
+        if is_pdf and fmt_hint != "pdf":
+            blob, content_type = self._try_image_serve(cb, page, fmt_hint)
+            if blob is not None:
+                return blob, content_type
+
+        # Fallback: original path.
+        if is_pdf:
+            blob = cb.get_page_by_index(page, pdf_format="", hide_text=hide_text)
+            return blob or b"", _PDF_MIME
+        # Non-PDF archive — unchanged.
+        blob = cb.get_page_by_index(page)
+        return blob or b"", self.content_type
+```
+
+`_try_image_serve` (new private helper):
+
+```python
+def _try_image_serve(self, cb, page, fmt_hint):
+    """Return (bytes, content_type) or (None, None) if not viable."""
+    if fmt_hint == "image":
+        # Force pixmap path — always works, may be slower.
+        blob = cb.get_page_by_index(page, pdf_format="pixmap")
+        return blob, "image/jpeg"  # pdffile pixmap is PPM today; see note
+    blob, ext = cb.get_page_by_index(page, pdf_format="image_if_dominant")
+    if blob is None:
+        return None, None
+    return blob, f"image/{ext}"
+```
+
+Open question: **`pdffile.read_pixmap` returns PPM bytes today.**
+Either `pdffile` adds a JPEG-encoded pixmap output (preferred — see
+[1-detector.md](01-detector.md)), or codex pipes the PPM through
+PIL like the cover generator does. Suggested: bake JPEG output
+into `pdffile` so codex stays a thin caller.
+
+### Drop the latent `?pixmap=` parameter
+
+[page.py:86-90](../../codex/views/reader/page.py:86) — the existing
+`?pixmap=1` returns PPM bytes labeled `image/jpeg`. No frontend caller
+exists. Replaced by `?format=image` with proper content-type.
+
+### OpenAPI schema
+
+Replace the `pixmap` parameter with `format`:
+
+```python
+@extend_schema(
+    parameters=[
+        OpenApiParameter("bookmark", OpenApiTypes.BOOL, default=True),
+        OpenApiParameter("format", OpenApiTypes.STR, default="auto",
+                         enum=["auto", "pdf", "image"]),
+        OpenApiParameter("hide_text", OpenApiTypes.BOOL, default=False),
+    ],
+    responses={
+        (200, "image/jpeg"): OpenApiTypes.BINARY,
+        (200, "image/png"): OpenApiTypes.BINARY,
+        (200, _PDF_MIME): OpenApiTypes.BINARY,
+    },
+)
+```
+
+### Tests
+
+Add fixtures (commit small synthetic PDFs to `codex/tests/fixtures/pdf/`):
+
+- `image_dominant_jpeg.pdf` — single full-bleed JPEG, no text.
+- `image_dominant_cmyk.pdf` — single full-bleed CMYK JPEG.
+- `vector_text.pdf` — vector-rendered text, no images.
+- `mixed.pdf` — image + significant text.
+- `multi_image.pdf` — multiple images on one page.
+- `rotated.pdf` — image-dominant with `/Rotate 90`.
+
+Test cases:
+
+1. Each fixture returns the expected content-type via the page endpoint.
+2. `?format=pdf` always returns `application/pdf`.
+3. `?format=image` always returns `image/*`.
+4. `?hide_text=1` is honoured when the page falls back to PDF.
+5. Detector cache: requesting page 0 twice on the same archive runs
+   the detector exactly once (instrument via patch).
+
+## Layer 3: codex frontend
+
+### Drop full-PDF mode
+
+[`pager.vue`](../../frontend/src/components/reader/pager/pager.vue):
+
+```diff
+- import PagerHorizontal from "@/components/reader/pager/pager-horizontal.vue";
+- const PagerPDF = markRaw(
+-   defineAsyncComponent(
+-     () => import("@/components/reader/pager/pager-full-pdf.vue"),
+-   ),
+- );
+- import PagerVertical from "@/components/reader/pager/pager-vertical.vue";
++ import PagerHorizontal from "@/components/reader/pager/pager-horizontal.vue";
++ import PagerVertical from "@/components/reader/pager/pager-vertical.vue";
+```
+
+Remove the `readerFullPdf` computed and the `cacheBook` exclusion in
+[reader.js:170-180](../../frontend/src/stores/reader.js):
+
+```diff
+- cacheBook() {
+-   return (
+-     this.activeSettings.cacheBook &&
+-     !(this.isPDF && this.activeSettings.isVertical)
+-   );
+- },
++ cacheBook() {
++   return this.activeSettings.cacheBook;
++ },
+```
+
+### Drive component choice from response, not `fileType`
+
+[`page.vue:106-108`](../../frontend/src/components/reader/pager/page/page.vue:106):
+
+The simplest robust shape is to **always start with `<ImgPage>`** and
+fall back to `<PDFDoc>` on a load error. The browser auto-detects from
+content-type when the response is a PDF — `<img src=…>` will fail
+to render, fire `error`, we re-mount as `<PDFDoc>` with `?format=pdf`
+appended.
+
+```vue
+<template>
+  <ImgPage v-if="!fallbackToPDF" ... @error="onImgError" />
+  <PDFDoc v-else ... :src="srcWithFormatPdf" />
+</template>
+```
+
+```javascript
+data() {
+  return { fallbackToPDF: false, ... };
+},
+methods: {
+  onImgError(event) {
+    if (this.book.fileType === "PDF" && !this.fallbackToPDF) {
+      this.fallbackToPDF = true;  // re-mount as PDFDoc
+      return;
+    }
+    this.error = "load";  // existing path
+  },
+},
+```
+
+For non-PDF books (`fileType !== "PDF"`), `onImgError` keeps the
+existing behaviour. For PDFs, the first load attempt is `<img>`; if
+the server returned `application/pdf` (i.e. `PDF_FALLBACK`), the
+browser fails the image load and we swap in `<PDFDoc>`.
+
+This avoids a HEAD request to determine content-type, and avoids
+threading the verdict through the API response.
+
+### Delete dead files
+
+- `frontend/src/components/reader/pager/pager-full-pdf.vue`
+- `frontend/src/components/reader/pager/pdf-doc.vue` — keep only if
+  the fallback path still uses it; otherwise delete and inline a
+  thin wrapper. Decision: **keep** — the fallback path uses it.
+- `frontend/src/api/v3/reader.js` `getPDFInBrowserURL` export:
+  delete unless the download panel still uses it (check
+  `download-panel.vue`).
+
+### UI override toggle
+
+Add to the reader settings drawer
+([reader-drawer-settings](../../frontend/src/components/reader/drawer/)):
+
+```vue
+<v-select
+  v-if="isPDF"
+  v-model="pdfRenderMode"
+  :items="[
+    { value: 'auto', title: 'Automatic' },
+    { value: 'image', title: 'Force image' },
+    { value: 'pdf', title: 'Force vector' },
+  ]"
+  label="PDF rendering"
+/>
+```
+
+Wire `pdfRenderMode` into [`getComicPageSource`](../../frontend/src/api/v3/reader.js)
+as a `format=` query param.
+
+Default: `auto`. Persist per-comic in the existing reader-settings
+endpoint, alongside `fitTo` etc.
+
+### Bundle size
+
+`vue-pdf-embed` stays in `package.json` because the fallback path
+uses it. The win is that for image-dominant comic PDFs, pdf.js never
+loads — the `defineAsyncComponent` import in [pager.vue:18-22](../../frontend/src/components/reader/pager/pager.vue:18)
+fires only when a fallback page actually mounts.
+
+## Order of work
+
+1. **Spike (codex-internal):** add the detector as a private helper
+   in `codex/views/reader/page.py`, importing `pymupdf` directly.
+   Wire `?format=auto|pdf|image` and the new content-type branches.
+   Validate end-to-end against the Watchmen / Amphigorey / Nolo Press
+   PDFs.
+2. **Frontend swap:** implement the `<img>`-first fallback-to-`<PDFDoc>`
+   shape. Delete `PagerFullPDF` and the `readerFullPdf` getter.
+3. **UI toggle:** add the per-comic override to reader settings.
+4. **Upstream pdffile:** lift the detector and JPEG-pixmap output
+   into `pdffile`, replace the codex-internal `pymupdf` import with
+   `comicbox`-mediated calls.
+5. **Tests + docs:** fixtures, integration tests, README/changelog.
+
+Steps 1–3 can ship as one PR (codex-only, no upstream blocker).
+Step 4 is a follow-up PR after the upstream `pdffile` release.
diff --git a/tasks/pdf-image-detection/99-prototype-results.md b/tasks/pdf-image-detection/99-prototype-results.md
new file mode 100644
index 000000000..42f86ce63
--- /dev/null
+++ b/tasks/pdf-image-detection/99-prototype-results.md
@@ -0,0 +1,223 @@
+# Prototype results
+
+Empirical data from running [`prototype.py`](prototype.py),
+[`probe.py`](probe.py), and [`cmyk_probe.py`](cmyk_probe.py) against
+14 PDFs in `/Users/aj/Milliways/Comics/full/` on Apple Silicon
+with PyMuPDF via `comicbox-pdffile` 0.5+.
+
+Test corpus is intentionally diverse: image-comic scans, vector text
+PDFs, mixed scripts/comics, OCR overlays, and the PDF 2.0 spec
+example files.
+
+## Per-PDF detector verdict (first 12 pages)
+
+| PDF | pages | image-dominant | det p50 | det max | image format(s) |
+|---|---:|---:|---:|---:|---|
+| `dos-world-#19.pdf` (scanned mag, OCR-overlay) | 92 | 0/12 | 9.99 ms | 12.24 ms | jpeg (rejected: visible text) |
+| `double-text.pdf` (multi-image, vector text) | 1 | 0/1 | 24.48 ms | 24.48 ms | jpeg+png (rejected: 4 images) |
+| `#thaistory.pdf` (vector layout w/ insets) | 228 | 0/12 | 3.54 ms | 7.69 ms | jpeg+png (rejected: low coverage) |
+| `Amphigorey.pdf` (comic) | 204 | **6/12** | 0.65 ms | 2.08 ms | png |
+| `Nolo Press 8 Ways to Avoid Probate.pdf` | 249 | **2/12** (cover scan) | 3.01 ms | 5.37 ms | jpeg (page 1: CMYK) |
+| `Nolo Press Working for Yourself.pdf` | 382 | 0/12 | 4.84 ms | 7.02 ms | png+jpeg (rejected: heavy text) |
+| `Watchmen Comic Original Script.pdf` | 6 | **6/6** | 0.58 ms | 1.88 ms | jpeg |
+| 7 × pdf20examples (spec test files) | 9 total | 0/9 | 0.58 ms | 4.97 ms | png+jpeg (rejected: low coverage / no images) |
+
+Combined: **14 of 79 sampled pages classified as image-dominant**, all
+correctly. Zero false positives observed.
+
+## Within-document granularity
+
+The most-validated finding: **the detector distinguishes within a
+single PDF.**
+
+`Nolo Press 8 Ways to Avoid Probate.pdf`:
+
+```
+page  dom?  imgs  cov%  text
+   0     Y     1   100     0       ← scanned cover
+   1     Y     1   100     0       ← scanned inside cover (CMYK)
+   2     n     0     0  1514       ← vector text
+   3     n     1     9  1488       ← vector text + small inset
+   4     n     0     0  1579       ← vector text
+   ...
+```
+
+Pages 0–1 use the IMAGE path; pages 2+ correctly fall through to PDF
+rendering. This is the case the user explicitly raised: "people use
+codex to display regular PDFs as well." The detector handles
+mixed-content PDFs without per-comic configuration.
+
+`Amphigorey.pdf`:
+
+```
+page  dom?  imgs  cov%
+   0     Y     1    93     ← comic page
+   1     n     1    41     ← title page (small image, lots of margin)
+   2     n     1    60     ← chapter intro
+   3     n     1    29     ← chapter intro
+   4     n     1     1     ← mostly blank
+   ...
+   7     Y     1    95     ← comic page
+   8     Y     1    92     ← comic page
+```
+
+Title pages and chapter intros correctly fall through to PDF
+rendering even though they contain a single image — coverage is too
+low.
+
+## Image format diversity
+
+```
+dos-world-#19.pdf:                    jpeg=8 colorspaces=RGB
+double-text.pdf:                      jpeg=1, png=3
+#thaistory.pdf:                       jpeg=1, png=3
+Amphigorey.pdf:                       png=8 (all grayscale)
+Nolo Press 8 Ways to Avoid Probate:   jpeg=3 (RGB, CMYK, Gray) ⚠
+Nolo Press Working for Yourself:      png=7, jpeg=2
+Watchmen Comic Original Script:       jpeg=6 (RGB)
+PDF 2.0 image with BPC.pdf:           jpeg=2
+PDF 2.0 with page level output intent: png=2
+pdf20-utf8-test:                      png=1
+```
+
+**No JBIG2, JPEG2000, CCITT, or TIFF** in this corpus. The transcode
+fallback for those formats is defensive code that won't fire here,
+but stays in the design for future-proofing.
+
+**One CMYK image found** (Nolo Press 8 Ways page 1, jpeg cs=4). The
+transcode path handles it correctly:
+
+```
+ext   cs  mode             tx_ms safe?   sz_dir    sz_tx
+jpeg   4  rgb-transcode    19.77     Y   118759    95964
+```
+
+PIL decodes the transcoded bytes as `RGB` mode → browser-safe.
+
+## Performance
+
+Detector overhead per page (cold, no caching):
+
+```
+median across all PDFs:    1.1 ms
+p99 across all PDFs:      12.2 ms (dos-world-#19, page 2: 4855 chars)
+worst-case (multi-image, lots of text): 24.5 ms (double-text.pdf)
+```
+
+For comparison, the operations the detector is trying to *avoid*:
+
+```
+get_page_pixmap + JPEG encode (page render):  10–95 ms
+convert_to_pdf (current default for PDFs):    5–20 ms
+```
+
+The detector is cheaper than even the cheapest current path. After
+caching the verdict on `_ArchiveEntry`, subsequent requests pay zero
+detection cost.
+
+## Output sizes
+
+For matched pages, raw extracted bytes are typically **smaller** than
+the equivalent single-page PDF (which wraps the same JPEG + PDF
+overhead):
+
+```
+Amphigorey p7:   ext=417 KB,  pdf=427 KB,  pix=63 KB (lossy)
+Watchmen  p0:   ext=516 KB,  pdf=518 KB,  pix=224 KB (lossy)
+Nolo p0:        ext=52 KB,   pdf=55 KB,   pix=65 KB
+```
+
+`pix` (full-page rasterize) is smaller because PyMuPDF re-encodes at
+default quality; not directly comparable.
+
+Bandwidth equivalence is a non-event: matched pages are the same
+size as today's PDF responses, ±5%.
+
+## Edge cases identified
+
+### Visible OCR text on scanned PDFs
+
+`dos-world-#19.pdf` is a scan with OCR rendered visibly (mode 0,
+font `HiddenHorzOCR`). Coverage is 100% but `get_text("text")`
+returns 200–5000 chars per page. The detector rejects these pages,
+correctly — the visible OCR text *is* part of the rendered page,
+even if it overlaps the raster.
+
+These pages keep the existing `?hide_text=1` workaround on the PDF
+fallback path. Future refinement: detect mode-0 text rendered with a
+known invisible-font name (`HiddenHorzOCR` family) and treat as
+invisible. Not in scope for v1.
+
+### Multi-image pages
+
+`double-text.pdf` has 4 images (JPEG raster + 3 tiny PNGs presumably
+for icons/overlays). `extract_image` would return only one. Detector
+rejects via `n_images != 1` → PDF fallback. Correct.
+
+A future refinement could detect "1 dominant + N tiny" and ignore
+sub-1% bbox satellites; not required for v1.
+
+### CMYK colorspace
+
+Nolo Press 8 Ways page 1 has a CMYK JPEG. Pure `extract_image` would
+return CMYK bytes that Chrome renders with wrong colors. The
+transcode path through `Pixmap(csRGB, pix).tobytes("jpeg")` produces
+a 96 KB RGB JPEG that PIL verifies decodes to mode `RGB`.
+**Verified working.**
+
+### Rotation
+
+No rotated pages in test corpus. Defensive design: `page.rotation != 0`
+downgrades to `IMAGE_TRANSCODE` (Pixmap respects rotation flags).
+Worth adding to the fixture set for the test suite.
+
+## Threshold validation
+
+The `MIN_COVERAGE = 0.85` threshold is well-clear of both classes:
+
+- Image-dominant pages: 92, 93, 94, 95, 100% — all comfortably above 85%.
+- Non-dominant pages: 1, 2, 7, 19, 29, 39, 41, 50, 51, 59, 60, 63, 68% — all comfortably below.
+
+No page in the corpus sits in the 70–84% zone. The threshold could
+move to 0.80 or 0.90 without changing any verdict in this dataset.
+Defaulting to 0.85 is a defensible middle.
+
+The `MAX_TEXT_CHARS = 50` threshold is similarly safe:
+
+- Image-dominant pages: 0 chars (most), 0 chars, 0 chars …
+- Non-dominant pages: 117, 199, 275, 276, 357, 390, 514, 669, 742, 857 …
+
+Big gap between classes. The actual edge case is "scanned page with
+OCR overlay" (`dos-world`) where text count is 100s–1000s and the
+detector correctly rejects.
+
+## What this validates
+
+- Detector correctness: 100% on the test corpus. Zero false positives,
+  zero false negatives that would cause user-visible quality loss.
+- Performance: detector runs in <13 ms p99 cold; <1 ms hot. Negligible
+  next to current rendering.
+- CMYK transcode: works, ~20 ms overhead, browser-safe output verified.
+- JBIG2/JPEG2000 path: not exercised by corpus but defensive.
+
+## What this does NOT validate
+
+- **Rotated PDFs** — no fixtures. Add a synthetic `/Rotate 90` PDF
+  to the test suite.
+- **Annotations and form fields** — no fixtures. The vector-ink gate
+  (`page.get_drawings()`) likely catches these but should be tested.
+- **Encrypted PDFs** — not exercised. Existing
+  `@password-requested` handler in [`pdf-doc.vue:14`](../../frontend/src/components/reader/pager/pdf-doc.vue:14)
+  should still apply on the fallback path.
+- **JBIG2 / JPEG2000 PDFs** — defensive code is sound, but worth a
+  synthetic fixture before declaring this hardened.
+
+## Reproducing
+
+The exploratory scripts that produced these numbers
+(`prototype.py`, `probe.py`, `cmyk_probe.py`, `integration_check.py`)
+are not committed — they were one-off harnesses against a private
+PDF corpus. The detection logic itself is now in
+`codex/views/reader/_pdf_image_serve.py`; running it against a
+local corpus is a few lines of glue around `classify_page` and
+`extract_image_for_page`.
diff --git a/uv.lock b/uv.lock
index ed7f049ca..a6017f751 100644
--- a/uv.lock
+++ b/uv.lock
@@ -471,6 +471,7 @@ dependencies = [
     { name = "bidict" },
     { name = "channels" },
     { name = "comicbox", extra = ["pdf"] },
+    { name = "comicbox-pdffile" },
     { name = "dateparser" },
     { name = "django" },
     { name = "django-cachalot" },
@@ -545,7 +546,8 @@ requires-dist = [
     { name = "adrf", specifier = "~=0.1.12" },
     { name = "bidict", specifier = "~=0.23" },
     { name = "channels", specifier = "~=4.2" },
-    { name = "comicbox", extras = ["pdf"], specifier = "~=3.0.0" },
+    { name = "comicbox", extras = ["pdf"], git = "https://github.com/ajslater/comicbox.git?branch=widen-pdffile-pin" },
+    { name = "comicbox-pdffile", git = "https://github.com/ajslater/pdffile.git?branch=pdf-image-detection" },
     { name = "dateparser", specifier = "~=1.2" },
     { name = "django", specifier = "~=6.0" },
     { name = "django-cachalot", specifier = "~=2.8" },
@@ -626,8 +628,8 @@ wheels = [
 
 [[package]]
 name = "comicbox"
-version = "3.0.0"
-source = { registry = "https://pypi.org/simple" }
+version = "3.0.1"
+source = { git = "https://github.com/ajslater/comicbox.git?branch=widen-pdffile-pin#93d45deed0db5204410dc690211ce4ed0dd5b3ad" }
 dependencies = [
     { name = "ansicolors" },
     { name = "bidict" },
@@ -655,10 +657,6 @@ dependencies = [
     { name = "xmltodict" },
     { name = "zipremove" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/7d/a2/a83ddb8b2f1f26537ff7de68be10c781f10d55fa27df4e06ae3277ba85ae/comicbox-3.0.0.tar.gz", hash = "sha256:2e88f63b27a1e70efa2dd5098c9dba9e3563c8c14549782df8ae1d1b701d6d04", size = 71184214, upload-time = "2026-05-04T02:17:24.375Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/91/9a/91741d522e5adb0800f3d68bc14311d1f7c6e0dfb882767f7759d73357bf/comicbox-3.0.0-py3-none-any.whl", hash = "sha256:da682f1a79721f37acce9f643a0a44536098fc52e36cce6ff9a1fcdf0d35cf2c", size = 189956, upload-time = "2026-05-04T02:17:18.144Z" },
-]
 
 [package.optional-dependencies]
 pdf = [
@@ -667,18 +665,14 @@ pdf = [
 
 [[package]]
 name = "comicbox-pdffile"
-version = "0.5.1"
-source = { registry = "https://pypi.org/simple" }
+version = "0.6.0"
+source = { git = "https://github.com/ajslater/pdffile.git?branch=pdf-image-detection#83c05f2e7ccc5c65f00ea974d636b0d300124bcb" }
 dependencies = [
     { name = "filetype" },
     { name = "pymupdf" },
     { name = "python-dateutil" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/75/0e/4593ecd880842c952a216134e37b2f6ada33473b6f7cd54caa4f3e701cd6/comicbox_pdffile-0.5.1.tar.gz", hash = "sha256:6a14000dd2511da07426326425e843b12665a2414e965bf1f5671ca5c4449327", size = 176423, upload-time = "2026-05-09T01:52:37.188Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fa/00/da2794ce34757651c7df9033b77e1c1e83f92dbb5289e1f9d627e37a39b1/comicbox_pdffile-0.5.1-py3-none-any.whl", hash = "sha256:2f048d3172f22065b5c6e0c6c2a0d2a957b1b19977e919c8d7a36178d4a3eef8", size = 7580, upload-time = "2026-05-09T01:52:36.325Z" },
-]
 
 [[package]]
 name = "comicfn2dict"

From d49d37661ce75961ab4ba02cf68c208c3ecefcde Mon Sep 17 00:00:00 2001
From: AJ Slater <aj@slater.net>
Date: Fri, 8 May 2026 21:43:52 -0700
Subject: [PATCH 2/6] TEMP: debug logging for PDF page-render failure paths
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reports of 404s on PDF page requests with the new image-dominant
detector landed. The catch-all ``except Exception`` in
``ReaderPageView.get`` was logging only ``str(exc)`` — no traceback,
no path context — so the actual failure point was invisible. Hook
loguru's ``logger.exception`` to surface the traceback, plus
``logger.debug`` markers at the decision points so we can see which
branch each request took:

* image-serve auto: which verdict + xref + ext we got
* image-serve force-image: bytes/ext returned
* image-serve declined: why we fell through
* legacy PDF path: bytes/content-type served, plus a wrapper that
  surfaces ``get_page_by_index`` failures with a traceback before
  the catch-all flattens them to 404

All log lines tagged ``[pdf-debug]`` for grep + an easy revert.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 codex/views/reader/page.py | 65 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 63 insertions(+), 2 deletions(-)

diff --git a/codex/views/reader/page.py b/codex/views/reader/page.py
index 94bbda5d8..f96b501a8 100644
--- a/codex/views/reader/page.py
+++ b/codex/views/reader/page.py
@@ -125,6 +125,11 @@ def _try_pdf_image_serve(
             # lands upstream.
             archive = entry.comicbox._get_archive()  # noqa: SLF001
             if not isinstance(archive, PDFFile):
+                # TEMP DEBUG: shouldn't happen if caller gated on file_type
+                logger.debug(
+                    f"[pdf-debug] image-serve: archive is "
+                    f"{type(archive).__name__}, not PDFFile; path={path}"
+                )
                 return None
             pdf = archive
             page_index = int(page)
@@ -132,17 +137,35 @@ def _try_pdf_image_serve(
             if fmt_hint == _FORMAT_IMAGE:
                 # Always-image override — pixmap fallback for vector pages.
                 blob, ext = pdf.read_full_pixmap_jpeg(page_index)
+                logger.debug(  # TEMP DEBUG
+                    f"[pdf-debug] image-serve force-image: "
+                    f"page={page_index} bytes={len(blob)} ext={ext}"
+                )
                 return blob, f"image/{ext}"
 
             verdict = self._classify_cached(entry, pdf, page_index)
+            logger.debug(  # TEMP DEBUG
+                f"[pdf-debug] image-serve auto: page={page_index} "
+                f"verdict={verdict.mode.value} xref={verdict.image_xref} "
+                f"ext={verdict.ext}"
+            )
             if verdict.mode is PageMode.PDF_FALLBACK:
                 return None
             served = pdf.read_image_if_dominant(page_index)
             if served is None:
                 # Detection said dominant but extraction failed; fall
                 # through to PDF rather than serve a broken response.
+                logger.debug(  # TEMP DEBUG
+                    f"[pdf-debug] image-serve: read_image_if_dominant "
+                    f"returned None for page={page_index} despite verdict "
+                    f"{verdict.mode.value}"
+                )
                 return None
             blob, ext = served
+            logger.debug(  # TEMP DEBUG
+                f"[pdf-debug] image-serve served via {verdict.mode.value}: "
+                f"page={page_index} bytes={len(blob)} ext={ext}"
+            )
             return blob, f"image/{ext}"
 
     def _get_page_image(self) -> tuple[bytes, str]:
@@ -155,12 +178,21 @@ def _get_page_image(self) -> tuple[bytes, str]:
 
         is_pdf = file_type == FileTypeChoices.PDF.value  # pyright: ignore[reportAttributeAccessIssue]  # ty: ignore[unresolved-attribute]
 
+        logger.debug(  # TEMP DEBUG
+            f"[pdf-debug] page request: pk={pk} page={page} "
+            f"fmt={fmt_hint} is_pdf={is_pdf} path={path}"
+        )
+
         # Image-dominant fast path for PDFs (skipped when the caller
         # forces ``?format=pdf``).
         if is_pdf and fmt_hint != _FORMAT_PDF:
             served = self._try_pdf_image_serve(path, page, fmt_hint)
             if served is not None:
                 return served
+            logger.debug(  # TEMP DEBUG
+                f"[pdf-debug] image-serve declined for page={page}; "
+                f"falling through to PDF path"
+            )
 
         # Process-wide LRU of open Comicbox archives — the web reader's
         # prev/curr/next prefetch fires 3-5 page hits on the same archive
@@ -170,11 +202,30 @@ def _get_page_image(self) -> tuple[bytes, str]:
         # held inside ``archive_cache.open(...)`` serializes extraction
         # because ZipFile / RarFile / PDF backends aren't thread-safe.
         with archive_cache.open(path) as cb:
-            page_image = cb.get_page_by_index(page, pdf_format="")
+            try:
+                page_image = cb.get_page_by_index(page, pdf_format="")
+            except Exception:
+                # TEMP DEBUG: surface comicbox failures with a traceback
+                # before the catch-all in ``get`` flattens them into 404.
+                logger.exception(
+                    f"[pdf-debug] get_page_by_index failed: "
+                    f"path={path} page={page} is_pdf={is_pdf}"
+                )
+                raise
         if not page_image:
+            # TEMP DEBUG: distinguish "comicbox returned empty" from
+            # "comicbox raised" — both end up as 404 to the client.
+            logger.warning(
+                f"[pdf-debug] get_page_by_index returned empty/None: "
+                f"path={path} page={page} is_pdf={is_pdf}"
+            )
             page_image = b""
 
         content_type = _PDF_MIME_TYPE if is_pdf else self.content_type
+        logger.debug(  # TEMP DEBUG
+            f"[pdf-debug] legacy PDF path served: page={page} "
+            f"bytes={len(page_image)} ct={content_type}"
+        )
         return page_image, content_type
 
     @extend_schema(
@@ -213,7 +264,17 @@ def get(self, *_args, **_kwargs) -> HttpResponse:
             detail = f"comic path for {pk} not found: {exc}."
             raise NotFound(detail=detail) from exc
         except Exception as exc:
-            logger.warning(exc)
+            # TEMP DEBUG: ``logger.exception`` includes the traceback
+            # so we see *why* the request failed instead of just the
+            # exception's str. Revert to ``logger.warning(exc)`` once
+            # the PDF-rendering paths are stable.
+            pk = self.kwargs.get("pk")
+            page = self.kwargs.get("page")
+            fmt = self.request.GET.get("format", "auto")
+            logger.exception(
+                f"[pdf-debug] page request failed: pk={pk} page={page} "
+                f"fmt={fmt} {type(exc).__name__}: {exc}"
+            )
             raise NotFound(detail="comic page not found") from exc
         else:
             return HttpResponse(page_image, content_type=content_type)

From be980887a76c8809d135c577f11447a6c98ec9ce Mon Sep 17 00:00:00 2001
From: AJ Slater <aj@slater.net>
Date: Fri, 8 May 2026 21:56:46 -0700
Subject: [PATCH 3/6] TEMP: log view entry/exit + Comic.DoesNotExist +
 FileNotFound

The earlier debug pass logged the catch-all exception path but
silently 404'd through the Comic.DoesNotExist / FileNotFoundError
handlers (no log line). User reports a 404 on /api/v3/c/1/0/page.jpg
that has *no* matching [pdf-debug] log line, meaning the request
either bombs out before _get_page_image (in _resolve_path_and_type's
ACL/DB lookup) or gets caught by one of the silent handlers.

Add view-entry, view-exit, and per-handler log lines so every 404
correlates to one specific [pdf-debug] line. Entry log includes the
User-Agent and Referer so we can tell whether a failing request
comes from <img>, vue-pdf-embed's fetch, a prefetch, or a 'Read in
Tab' direct nav.

All TEMP DEBUG; revert with grep -l '[pdf-debug]' once stable.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 codex/views/reader/page.py | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/codex/views/reader/page.py b/codex/views/reader/page.py
index f96b501a8..3b4dbe67e 100644
--- a/codex/views/reader/page.py
+++ b/codex/views/reader/page.py
@@ -252,15 +252,35 @@ def _get_page_image(self) -> tuple[bytes, str]:
     )
     def get(self, *_args, **_kwargs) -> HttpResponse:
         """Get the comic page from the archive."""
+        # TEMP DEBUG: log every entry to ReaderPageView.get so requests
+        # that 404 in ``_resolve_path_and_type`` (Comic.DoesNotExist)
+        # don't disappear before the existing ``page request:`` log.
+        pk = self.kwargs.get("pk")
+        page = self.kwargs.get("page")
+        fmt = self.request.GET.get("format", "auto")
+        ua = self.request.headers.get("User-Agent", "?")[:60]
+        referer = self.request.headers.get("Referer", "?")
+        logger.debug(
+            f"[pdf-debug] >>>>> view entry: pk={pk} page={page} "
+            f"fmt={fmt} ua={ua!r} referer={referer!r}"
+        )
         try:
             page_image, content_type = self._get_page_image()
             self._update_bookmark()
         except Comic.DoesNotExist as exc:
-            pk = self.kwargs.get("pk")
+            # TEMP DEBUG: surface the ACL-filter miss explicitly.
+            logger.warning(
+                f"[pdf-debug] Comic.DoesNotExist: pk={pk} page={page} "
+                f"fmt={fmt} (ACL filter rejected or comic deleted)"
+            )
             detail = f"comic {pk} not found in db."
             raise NotFound(detail=detail) from exc
         except FileNotFoundError as exc:
-            pk = self.kwargs.get("pk")
+            # TEMP DEBUG: comic path missing on disk.
+            logger.warning(
+                f"[pdf-debug] FileNotFoundError: pk={pk} page={page} "
+                f"fmt={fmt}: {exc}"
+            )
             detail = f"comic path for {pk} not found: {exc}."
             raise NotFound(detail=detail) from exc
         except Exception as exc:
@@ -268,13 +288,14 @@ def get(self, *_args, **_kwargs) -> HttpResponse:
             # so we see *why* the request failed instead of just the
             # exception's str. Revert to ``logger.warning(exc)`` once
             # the PDF-rendering paths are stable.
-            pk = self.kwargs.get("pk")
-            page = self.kwargs.get("page")
-            fmt = self.request.GET.get("format", "auto")
             logger.exception(
                 f"[pdf-debug] page request failed: pk={pk} page={page} "
                 f"fmt={fmt} {type(exc).__name__}: {exc}"
             )
             raise NotFound(detail="comic page not found") from exc
         else:
+            logger.debug(
+                f"[pdf-debug] <<<<< view exit OK: pk={pk} page={page} "
+                f"fmt={fmt} bytes={len(page_image)} ct={content_type}"
+            )
             return HttpResponse(page_image, content_type=content_type)

From f970a3bb2ccabf5dc0ea79382b3ba5d07fc07925 Mon Sep 17 00:00:00 2001
From: AJ Slater <aj@slater.net>
Date: Fri, 8 May 2026 22:11:03 -0700
Subject: [PATCH 4/6] =?UTF-8?q?fix:=20rename=20=3Fformat=3D=20=E2=86=92=20?=
 =?UTF-8?q?=3Fserve=3D=20to=20dodge=20DRF's=20URL=5FFORMAT=5FOVERRIDE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bug
===

PDF pages were 404'ing with ``?format=pdf`` (the ``serve as a single
single-page PDF blob to vue-pdf-embed`` path). Server logs showed the
URL request reaching ``ReaderPageView.get`` for the no-param ``<img>``
attempt and serving the PDF bytes successfully. The follow-up
``?format=pdf`` request 404'd before the view's entry-debug log
fired, with the response body shaped like DRF's
``{"detail": "Not found."}`` and ``Content-Type: application/json``.

Cause
=====

DRF reserves ``?format=`` (``REST_FRAMEWORK['URL_FORMAT_OVERRIDE']``,
default ``'format'``) as a renderer-format selector.
``DefaultContentNegotiation.filter_renderers(renderers, 'pdf')`` runs
inside ``APIView.dispatch.initial`` *before* the view's
``get`` handler. With no PDF renderer registered, that method
raises ``exceptions.NotFound`` per DRF source:

    def filter_renderers(self, renderers, format):
        renderers = [r for r in renderers if r.format == format]
        if not renderers:
            raise exceptions.NotFound(...)
        return renderers

So the request was getting a 404 from DRF's content negotiator
before the view code ran — explaining the missing entry log.

Fix
===

Rename the query parameter from ``format`` to ``serve`` end-to-end:

* Backend: ``_FORMAT_*`` constants → ``_SERVE_*``; OpenAPI schema
  parameter renamed; debug-log fields renamed.
* Frontend: ``getComicPageSource({ ..., format })`` →
  ``({ ..., serve })`` and the URL builder emits ``&serve=`` instead
  of ``&format=``. Internal field names (``pdfRenderMode`` etc.)
  unchanged — only the wire param renamed.

Verified by curl:

    HEAD ?ts=...&format=pdf  →  404 (DRF NotFound)
    HEAD ?ts=...&serve=pdf   →  200 application/pdf  (✓ fixed)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 codex/views/reader/page.py                    | 58 ++++++++++---------
 frontend/src/api/v3/reader.js                 | 13 +++--
 .../src/components/reader/pager/page/page.vue |  8 +--
 frontend/src/stores/reader.js                 |  2 +-
 4 files changed, 46 insertions(+), 35 deletions(-)

diff --git a/codex/views/reader/page.py b/codex/views/reader/page.py
index 3b4dbe67e..5c8c838d2 100644
--- a/codex/views/reader/page.py
+++ b/codex/views/reader/page.py
@@ -27,15 +27,21 @@
 
 _PDF_MIME_TYPE: Final[str] = "application/pdf"
 
-#: Permitted ``?format=`` values. ``auto`` runs the detector; ``pdf``
+#: Query-parameter name that selects the PDF serving mode. Picked to
+#: avoid colliding with DRF's reserved ``?format=`` (``URL_FORMAT_OVERRIDE``)
+#: which DRF interprets as a renderer-format selector — it raises
+#: ``NotFound`` for unknown values *before* the view's ``get`` runs.
+_SERVE_PARAM: Final[str] = "serve"
+
+#: Permitted ``?serve=`` values. ``auto`` runs the detector; ``pdf``
 #: skips the detector and forces the legacy single-page-PDF path;
 #: ``image`` forces a server-side rasterize (works for any PDF page
 #: but spends more CPU than the detector path on vector-heavy pages).
-_FORMAT_AUTO: Final[str] = "auto"
-_FORMAT_PDF: Final[str] = "pdf"
-_FORMAT_IMAGE: Final[str] = "image"
-_FORMAT_HINTS: Final[frozenset[str]] = frozenset(
-    {_FORMAT_AUTO, _FORMAT_PDF, _FORMAT_IMAGE}
+_SERVE_AUTO: Final[str] = "auto"
+_SERVE_PDF: Final[str] = "pdf"
+_SERVE_IMAGE: Final[str] = "image"
+_SERVE_HINTS: Final[frozenset[str]] = frozenset(
+    {_SERVE_AUTO, _SERVE_PDF, _SERVE_IMAGE}
 )
 
 
@@ -91,9 +97,9 @@ def _resolve_path_and_type(self, pk) -> tuple[str, str | None]:
         page_acl_cache.put(cache_key, path, file_type, now)
         return path, file_type
 
-    def _format_hint(self) -> str:
-        raw = self.request.GET.get("format", _FORMAT_AUTO).lower()
-        return raw if raw in _FORMAT_HINTS else _FORMAT_AUTO
+    def _serve_hint(self) -> str:
+        raw = self.request.GET.get(_SERVE_PARAM, _SERVE_AUTO).lower()
+        return raw if raw in _SERVE_HINTS else _SERVE_AUTO
 
     @staticmethod
     def _classify_cached(entry: _ArchiveEntry, pdf: PDFFile, page: int) -> PageVerdict:
@@ -106,7 +112,7 @@ def _classify_cached(entry: _ArchiveEntry, pdf: PDFFile, page: int) -> PageVerdi
         return verdict
 
     def _try_pdf_image_serve(
-        self, path: str, page: int, fmt_hint: str
+        self, path: str, page: int, serve_hint: str
     ) -> tuple[bytes, str] | None:
         """
         Image-serve fast path for PDF pages.
@@ -134,7 +140,7 @@ def _try_pdf_image_serve(
             pdf = archive
             page_index = int(page)
 
-            if fmt_hint == _FORMAT_IMAGE:
+            if serve_hint == _SERVE_IMAGE:
                 # Always-image override — pixmap fallback for vector pages.
                 blob, ext = pdf.read_full_pixmap_jpeg(page_index)
                 logger.debug(  # TEMP DEBUG
@@ -174,19 +180,19 @@ def _get_page_image(self) -> tuple[bytes, str]:
         path, file_type = self._resolve_path_and_type(pk)
 
         page = self.kwargs.get("page")
-        fmt_hint = self._format_hint()
+        serve_hint = self._serve_hint()
 
         is_pdf = file_type == FileTypeChoices.PDF.value  # pyright: ignore[reportAttributeAccessIssue]  # ty: ignore[unresolved-attribute]
 
         logger.debug(  # TEMP DEBUG
             f"[pdf-debug] page request: pk={pk} page={page} "
-            f"fmt={fmt_hint} is_pdf={is_pdf} path={path}"
+            f"serve={serve_hint} is_pdf={is_pdf} path={path}"
         )
 
         # Image-dominant fast path for PDFs (skipped when the caller
-        # forces ``?format=pdf``).
-        if is_pdf and fmt_hint != _FORMAT_PDF:
-            served = self._try_pdf_image_serve(path, page, fmt_hint)
+        # forces ``?serve=pdf``).
+        if is_pdf and serve_hint != _SERVE_PDF:
+            served = self._try_pdf_image_serve(path, page, serve_hint)
             if served is not None:
                 return served
             logger.debug(  # TEMP DEBUG
@@ -232,12 +238,12 @@ def _get_page_image(self) -> tuple[bytes, str]:
         parameters=[
             OpenApiParameter("bookmark", OpenApiTypes.BOOL, default=True),
             OpenApiParameter(
-                "format",
+                _SERVE_PARAM,
                 OpenApiTypes.STR,
-                default=_FORMAT_AUTO,
-                enum=sorted(_FORMAT_HINTS),
+                default=_SERVE_AUTO,
+                enum=sorted(_SERVE_HINTS),
                 description=(
-                    "PDF rendering hint: 'auto' (detector), "
+                    "PDF serving mode: 'auto' (detector), "
                     "'pdf' (legacy single-page PDF), "
                     "'image' (always rasterize). Ignored for non-PDF archives."
                 ),
@@ -257,12 +263,12 @@ def get(self, *_args, **_kwargs) -> HttpResponse:
         # don't disappear before the existing ``page request:`` log.
         pk = self.kwargs.get("pk")
         page = self.kwargs.get("page")
-        fmt = self.request.GET.get("format", "auto")
+        serve = self.request.GET.get(_SERVE_PARAM, _SERVE_AUTO)
         ua = self.request.headers.get("User-Agent", "?")[:60]
         referer = self.request.headers.get("Referer", "?")
         logger.debug(
             f"[pdf-debug] >>>>> view entry: pk={pk} page={page} "
-            f"fmt={fmt} ua={ua!r} referer={referer!r}"
+            f"serve={serve} ua={ua!r} referer={referer!r}"
         )
         try:
             page_image, content_type = self._get_page_image()
@@ -271,7 +277,7 @@ def get(self, *_args, **_kwargs) -> HttpResponse:
             # TEMP DEBUG: surface the ACL-filter miss explicitly.
             logger.warning(
                 f"[pdf-debug] Comic.DoesNotExist: pk={pk} page={page} "
-                f"fmt={fmt} (ACL filter rejected or comic deleted)"
+                f"serve={serve} (ACL filter rejected or comic deleted)"
             )
             detail = f"comic {pk} not found in db."
             raise NotFound(detail=detail) from exc
@@ -279,7 +285,7 @@ def get(self, *_args, **_kwargs) -> HttpResponse:
             # TEMP DEBUG: comic path missing on disk.
             logger.warning(
                 f"[pdf-debug] FileNotFoundError: pk={pk} page={page} "
-                f"fmt={fmt}: {exc}"
+                f"serve={serve}: {exc}"
             )
             detail = f"comic path for {pk} not found: {exc}."
             raise NotFound(detail=detail) from exc
@@ -290,12 +296,12 @@ def get(self, *_args, **_kwargs) -> HttpResponse:
             # the PDF-rendering paths are stable.
             logger.exception(
                 f"[pdf-debug] page request failed: pk={pk} page={page} "
-                f"fmt={fmt} {type(exc).__name__}: {exc}"
+                f"serve={serve} {type(exc).__name__}: {exc}"
             )
             raise NotFound(detail="comic page not found") from exc
         else:
             logger.debug(
                 f"[pdf-debug] <<<<< view exit OK: pk={pk} page={page} "
-                f"fmt={fmt} bytes={len(page_image)} ct={content_type}"
+                f"serve={serve} bytes={len(page_image)} ct={content_type}"
             )
             return HttpResponse(page_image, content_type=content_type)
diff --git a/frontend/src/api/v3/reader.js b/frontend/src/api/v3/reader.js
index e6d7f0ac8..1ef125007 100644
--- a/frontend/src/api/v3/reader.js
+++ b/frontend/src/api/v3/reader.js
@@ -25,17 +25,22 @@ export const getReaderInfo = (pk, data, ts, options = {}) => {
 const _getReaderAPIPath = (pk) =>
   globalThis.CODEX.API_V3_PATH + _getBookPath(pk);
 
-export const getComicPageSource = ({ pk, page, mtime, format }) => {
-  // ``format`` is the optional PDF rendering hint forwarded to the
+export const getComicPageSource = ({ pk, page, mtime, serve }) => {
+  // ``serve`` is the optional PDF serving-mode hint forwarded to the
   // backend ``ReaderPageView``: ``auto`` (detector decides),
   // ``image`` (always rasterize), or ``pdf`` (skip the detector and
   // serve a single-page PDF blob). Ignored by the backend for
   // non-PDF archives. Omitting the param keeps the URL identical to
   // the legacy shape so HTTP caches don't fragment.
+  //
+  // The query name is ``serve`` rather than ``format`` because DRF
+  // reserves ``?format=`` (URL_FORMAT_OVERRIDE) as a renderer-format
+  // selector and raises NotFound for unknown values before the view
+  // dispatches.
   const bookAPIPath = _getReaderAPIPath(pk);
   let url = `${bookAPIPath}/${page}/page.jpg?ts=${mtime}`;
-  if (format && format !== "auto") {
-    url += `&format=${format}`;
+  if (serve && serve !== "auto") {
+    url += `&serve=${serve}`;
   }
   return url;
 };
diff --git a/frontend/src/components/reader/pager/page/page.vue b/frontend/src/components/reader/pager/page/page.vue
index 538b09eaf..786287cf9 100644
--- a/frontend/src/components/reader/pager/page/page.vue
+++ b/frontend/src/components/reader/pager/page/page.vue
@@ -78,7 +78,7 @@ export default {
        * ``true`` once we've seen an ``<img>`` load fail on a PDF
        * book — the response was ``application/pdf`` (the detector
        * declined to serve as image), so we re-mount the page through
-       * ``<PDFDoc>`` against the same URL with ``?format=pdf``.
+       * ``<PDFDoc>`` against the same URL with ``?serve=pdf``.
        */
       pdfFallback: false,
     };
@@ -111,13 +111,13 @@ export default {
         pk: this.book.pk,
         page: this.page,
         mtime,
-        format: this.activeFormat,
+        serve: this.activeServe,
       };
       return getComicPageSource(params);
     },
-    activeFormat() {
+    activeServe() {
       /*
-       * For non-PDF books the format param is ignored by the
+       * For non-PDF books the serve param is ignored by the
        * backend; we still pass it through so the URL is stable.
        */
       if (!this.isPDF) {
diff --git a/frontend/src/stores/reader.js b/frontend/src/stores/reader.js
index 3920cf3b9..a316e0b5e 100644
--- a/frontend/src/stores/reader.js
+++ b/frontend/src/stores/reader.js
@@ -883,7 +883,7 @@ export const useReaderStore = defineStore("reader", {
         pk: params.pk,
         page,
         mtime: book.mtime,
-        format: this.clientSettings?.pdfRenderMode,
+        serve: this.clientSettings?.pdfRenderMode,
       };
       return READER_API.getComicPageSource(paramsPlus);
     },

From e1a1d66b81bf9c06d65c17200e0f4362adedfffd Mon Sep 17 00:00:00 2001
From: AJ Slater <aj@slater.net>
Date: Fri, 8 May 2026 22:15:09 -0700
Subject: [PATCH 5/6] fix: don't cache error responses on the page endpoint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Companion to f970a3b. The ``cache_control(max_age=PAGE_MAX_AGE,
public=True)`` decorator on ``page.jpg`` was patching its
``Cache-Control`` header onto *every* response — including 4xx —
because Django's ``cache_control`` doesn't filter by status.

That turned every transient failure into a week-long cache poison.
Symptom in the field: three concurrent ``?format=pdf`` 404s in the
browser surfaced only one server-side log line; the other two were
served from the browser HTTP cache (cached from earlier ``format=pdf``
requests in prior sessions, when DRF's URL_FORMAT_OVERRIDE rejected
the param). Once a 4xx ships with ``Cache-Control: public, max-age=
604800`` the browser pins it for a week and never asks again.

Add ``codex.views.util.cache_control_2xx`` — same shape as Django's
``cache_control`` but only patches the header on responses with
status 200-299. Swap it in on the page endpoint. Other routes that
use ``cache_control`` are mostly cover endpoints + book.pdf
(server-side cached via ``cache_page``), where 4xx is rare and
short-lived; leave them alone for now.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 codex/urls/api/reader.py |  8 +++++++-
 codex/views/util.py      | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/codex/urls/api/reader.py b/codex/urls/api/reader.py
index 4d76ef426..394883b6b 100644
--- a/codex/urls/api/reader.py
+++ b/codex/urls/api/reader.py
@@ -10,6 +10,7 @@
 from codex.views.reader.page import ReaderPageView
 from codex.views.reader.reader import ReaderView
 from codex.views.reader.settings import ReaderSettingsView
+from codex.views.util import cache_control_2xx
 
 app_name = "issue"
 urlpatterns = [
@@ -28,7 +29,12 @@
     ),
     path(
         "<int:pk>/<int:page>/page.jpg",
-        cache_control(max_age=PAGE_MAX_AGE, public=True)(ReaderPageView.as_view()),
+        # ``cache_control_2xx`` (not Django's ``cache_control``) — error
+        # responses from this endpoint must NOT be cached. The view can
+        # return 404 from DRF content negotiation, ACL filter misses,
+        # missing-file errors, etc; with ``public, max-age=PAGE_MAX_AGE``
+        # the browser would pin a transient failure for a week.
+        cache_control_2xx(max_age=PAGE_MAX_AGE, public=True)(ReaderPageView.as_view()),
         name="page",
     ),
     path(
diff --git a/codex/views/util.py b/codex/views/util.py
index 8e8b1444c..d744edf1e 100644
--- a/codex/views/util.py
+++ b/codex/views/util.py
@@ -2,8 +2,42 @@
 
 from collections.abc import Mapping
 from dataclasses import dataclass
+from functools import wraps
 from typing import override
 
+from django.utils.cache import patch_cache_control
+
+
+def cache_control_2xx(**kwargs):
+    """
+    Patch ``Cache-Control`` only on 2xx responses.
+
+    Like ``django.views.decorators.cache.cache_control`` but only
+    emits the header for success responses.
+
+    Django's ``cache_control`` patches the header onto every response,
+    including 4xx and 5xx. With ``public, max-age=<long>`` that turns
+    a transient error (a missing file, an ACL miss, a one-off
+    backend hiccup) into a week-long cache poison: every browser that
+    saw the failure keeps serving it from cache without ever reaching
+    the server. This wrapper only marks success responses cacheable;
+    errors are returned with whatever default headers DRF / Django
+    already set (typically uncached).
+    """
+
+    def _wrap(viewfunc):
+        @wraps(viewfunc)
+        def _wrapped(request, *args, **kw):
+            response = viewfunc(request, *args, **kw)
+            status = getattr(response, "status_code", 0)
+            if 200 <= status < 300:  # noqa: PLR2004
+                patch_cache_control(response, **kwargs)
+            return response
+
+        return _wrapped
+
+    return _wrap
+
 
 @dataclass
 class Route:

From 28839e70e0904b5024b8e2d4861d75b378ebedf4 Mon Sep 17 00:00:00 2001
From: AJ Slater <aj@slater.net>
Date: Fri, 8 May 2026 22:19:49 -0700
Subject: [PATCH 6/6] remove temporary [pdf-debug] logging from ReaderPageView
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PDF rendering paths are stable now (?format= → ?serve= rename
fixed the DRF NotFound issue, cache_control_2xx fixed the cached-404
issue). Strip the temporary view-entry/exit, image-serve-decision,
and exception logging that helped diagnose those.

Reverts:
* logger.exception(...) in catch-all → logger.warning(exc) (original)
* Surfacing legacy get_page_by_index failures with traceback
* All [pdf-debug] DEBUG-level decision logs

The structural fixes — ?serve= param, classify-on-cache-entry,
cache_control_2xx — stay.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 codex/views/reader/page.py | 90 ++------------------------------------
 1 file changed, 4 insertions(+), 86 deletions(-)

diff --git a/codex/views/reader/page.py b/codex/views/reader/page.py
index 5c8c838d2..2552eec3f 100644
--- a/codex/views/reader/page.py
+++ b/codex/views/reader/page.py
@@ -131,11 +131,6 @@ def _try_pdf_image_serve(
             # lands upstream.
             archive = entry.comicbox._get_archive()  # noqa: SLF001
             if not isinstance(archive, PDFFile):
-                # TEMP DEBUG: shouldn't happen if caller gated on file_type
-                logger.debug(
-                    f"[pdf-debug] image-serve: archive is "
-                    f"{type(archive).__name__}, not PDFFile; path={path}"
-                )
                 return None
             pdf = archive
             page_index = int(page)
@@ -143,35 +138,17 @@ def _try_pdf_image_serve(
             if serve_hint == _SERVE_IMAGE:
                 # Always-image override — pixmap fallback for vector pages.
                 blob, ext = pdf.read_full_pixmap_jpeg(page_index)
-                logger.debug(  # TEMP DEBUG
-                    f"[pdf-debug] image-serve force-image: "
-                    f"page={page_index} bytes={len(blob)} ext={ext}"
-                )
                 return blob, f"image/{ext}"
 
             verdict = self._classify_cached(entry, pdf, page_index)
-            logger.debug(  # TEMP DEBUG
-                f"[pdf-debug] image-serve auto: page={page_index} "
-                f"verdict={verdict.mode.value} xref={verdict.image_xref} "
-                f"ext={verdict.ext}"
-            )
             if verdict.mode is PageMode.PDF_FALLBACK:
                 return None
             served = pdf.read_image_if_dominant(page_index)
             if served is None:
                 # Detection said dominant but extraction failed; fall
                 # through to PDF rather than serve a broken response.
-                logger.debug(  # TEMP DEBUG
-                    f"[pdf-debug] image-serve: read_image_if_dominant "
-                    f"returned None for page={page_index} despite verdict "
-                    f"{verdict.mode.value}"
-                )
                 return None
             blob, ext = served
-            logger.debug(  # TEMP DEBUG
-                f"[pdf-debug] image-serve served via {verdict.mode.value}: "
-                f"page={page_index} bytes={len(blob)} ext={ext}"
-            )
             return blob, f"image/{ext}"
 
     def _get_page_image(self) -> tuple[bytes, str]:
@@ -184,21 +161,12 @@ def _get_page_image(self) -> tuple[bytes, str]:
 
         is_pdf = file_type == FileTypeChoices.PDF.value  # pyright: ignore[reportAttributeAccessIssue]  # ty: ignore[unresolved-attribute]
 
-        logger.debug(  # TEMP DEBUG
-            f"[pdf-debug] page request: pk={pk} page={page} "
-            f"serve={serve_hint} is_pdf={is_pdf} path={path}"
-        )
-
         # Image-dominant fast path for PDFs (skipped when the caller
         # forces ``?serve=pdf``).
         if is_pdf and serve_hint != _SERVE_PDF:
             served = self._try_pdf_image_serve(path, page, serve_hint)
             if served is not None:
                 return served
-            logger.debug(  # TEMP DEBUG
-                f"[pdf-debug] image-serve declined for page={page}; "
-                f"falling through to PDF path"
-            )
 
         # Process-wide LRU of open Comicbox archives — the web reader's
         # prev/curr/next prefetch fires 3-5 page hits on the same archive
@@ -208,30 +176,11 @@ def _get_page_image(self) -> tuple[bytes, str]:
         # held inside ``archive_cache.open(...)`` serializes extraction
         # because ZipFile / RarFile / PDF backends aren't thread-safe.
         with archive_cache.open(path) as cb:
-            try:
-                page_image = cb.get_page_by_index(page, pdf_format="")
-            except Exception:
-                # TEMP DEBUG: surface comicbox failures with a traceback
-                # before the catch-all in ``get`` flattens them into 404.
-                logger.exception(
-                    f"[pdf-debug] get_page_by_index failed: "
-                    f"path={path} page={page} is_pdf={is_pdf}"
-                )
-                raise
+            page_image = cb.get_page_by_index(page, pdf_format="")
         if not page_image:
-            # TEMP DEBUG: distinguish "comicbox returned empty" from
-            # "comicbox raised" — both end up as 404 to the client.
-            logger.warning(
-                f"[pdf-debug] get_page_by_index returned empty/None: "
-                f"path={path} page={page} is_pdf={is_pdf}"
-            )
             page_image = b""
 
         content_type = _PDF_MIME_TYPE if is_pdf else self.content_type
-        logger.debug(  # TEMP DEBUG
-            f"[pdf-debug] legacy PDF path served: page={page} "
-            f"bytes={len(page_image)} ct={content_type}"
-        )
         return page_image, content_type
 
     @extend_schema(
@@ -258,50 +207,19 @@ def _get_page_image(self) -> tuple[bytes, str]:
     )
     def get(self, *_args, **_kwargs) -> HttpResponse:
         """Get the comic page from the archive."""
-        # TEMP DEBUG: log every entry to ReaderPageView.get so requests
-        # that 404 in ``_resolve_path_and_type`` (Comic.DoesNotExist)
-        # don't disappear before the existing ``page request:`` log.
-        pk = self.kwargs.get("pk")
-        page = self.kwargs.get("page")
-        serve = self.request.GET.get(_SERVE_PARAM, _SERVE_AUTO)
-        ua = self.request.headers.get("User-Agent", "?")[:60]
-        referer = self.request.headers.get("Referer", "?")
-        logger.debug(
-            f"[pdf-debug] >>>>> view entry: pk={pk} page={page} "
-            f"serve={serve} ua={ua!r} referer={referer!r}"
-        )
         try:
             page_image, content_type = self._get_page_image()
             self._update_bookmark()
         except Comic.DoesNotExist as exc:
-            # TEMP DEBUG: surface the ACL-filter miss explicitly.
-            logger.warning(
-                f"[pdf-debug] Comic.DoesNotExist: pk={pk} page={page} "
-                f"serve={serve} (ACL filter rejected or comic deleted)"
-            )
+            pk = self.kwargs.get("pk")
             detail = f"comic {pk} not found in db."
             raise NotFound(detail=detail) from exc
         except FileNotFoundError as exc:
-            # TEMP DEBUG: comic path missing on disk.
-            logger.warning(
-                f"[pdf-debug] FileNotFoundError: pk={pk} page={page} "
-                f"serve={serve}: {exc}"
-            )
+            pk = self.kwargs.get("pk")
             detail = f"comic path for {pk} not found: {exc}."
             raise NotFound(detail=detail) from exc
         except Exception as exc:
-            # TEMP DEBUG: ``logger.exception`` includes the traceback
-            # so we see *why* the request failed instead of just the
-            # exception's str. Revert to ``logger.warning(exc)`` once
-            # the PDF-rendering paths are stable.
-            logger.exception(
-                f"[pdf-debug] page request failed: pk={pk} page={page} "
-                f"serve={serve} {type(exc).__name__}: {exc}"
-            )
+            logger.warning(exc)
             raise NotFound(detail="comic page not found") from exc
         else:
-            logger.debug(
-                f"[pdf-debug] <<<<< view exit OK: pk={pk} page={page} "
-                f"serve={serve} bytes={len(page_image)} ct={content_type}"
-            )
             return HttpResponse(page_image, content_type=content_type)