diff --git a/codex/urls/api/reader.py b/codex/urls/api/reader.py index 4d76ef426..394883b6b 100644 --- a/codex/urls/api/reader.py +++ b/codex/urls/api/reader.py @@ -10,6 +10,7 @@ from codex.views.reader.page import ReaderPageView from codex.views.reader.reader import ReaderView from codex.views.reader.settings import ReaderSettingsView +from codex.views.util import cache_control_2xx app_name = "issue" urlpatterns = [ @@ -28,7 +29,12 @@ ), path( "//page.jpg", - cache_control(max_age=PAGE_MAX_AGE, public=True)(ReaderPageView.as_view()), + # ``cache_control_2xx`` (not Django's ``cache_control``) — error + # responses from this endpoint must NOT be cached. The view can + # return 404 from DRF content negotiation, ACL filter misses, + # missing-file errors, etc; with ``public, max-age=PAGE_MAX_AGE`` + # the browser would pin a transient failure for a week. + cache_control_2xx(max_age=PAGE_MAX_AGE, public=True)(ReaderPageView.as_view()), name="page", ), path( diff --git a/codex/views/reader/_archive_cache.py b/codex/views/reader/_archive_cache.py index 95ffffe00..f8cfe3831 100644 --- a/codex/views/reader/_archive_cache.py +++ b/codex/views/reader/_archive_cache.py @@ -44,7 +44,7 @@ import time from collections import OrderedDict from contextlib import contextmanager -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from comicbox.box import Comicbox from loguru import logger @@ -79,13 +79,27 @@ def _env_bool(name: str, *, default: bool) -> bool: class _ArchiveEntry: """One cached Comicbox + its per-path lock + last-access timestamp.""" - __slots__ = ("comicbox", "last_access", "lock", "path") + __slots__ = ( + "comicbox", + "last_access", + "lock", + "path", + "verdicts", + ) def __init__(self, path: str, comicbox: Comicbox, last_access: float) -> None: self.path = path self.comicbox = comicbox self.lock = threading.Lock() self.last_access = last_access + # Per-page image-serve verdict cache (``pdffile.PageVerdict`` + # instances keyed on zero-based page index). Memoized here so + # repeated ``ReaderPageView`` hits on the same archive don't + # re-classify; the underlying ``classify_page`` call is cheap + # but the cache makes prev/curr/next prefetch effectively + # free. Typed loosely to keep ``pdffile`` out of this module's + # import surface. + self.verdicts: dict[int, Any] = {} def close(self) -> None: """Close the cached archive; tolerate already-closed state.""" @@ -178,6 +192,25 @@ def open(self, path: str) -> Generator[Comicbox]: with entry.lock: yield entry.comicbox + @contextmanager + def open_entry(self, path: str) -> Generator[_ArchiveEntry]: + """ + Yield the full ``_ArchiveEntry`` for direct cache-state access. + + Used by callers that need the ``Comicbox`` and the per-page + verdict memo (``ReaderPageView``'s image-serve fast path). + Same locking shape as ``open()``. When the cache is disabled, + synthesizes a transient entry so callers see a uniform API; + verdict memoization is a no-op in that mode. + """ + if not self.enabled: + with Comicbox(path, config=COMICBOX_CONFIG, logger=logger) as cb: + yield _ArchiveEntry(path, cb, time.monotonic()) + return + entry = self._open_or_get(path) + with entry.lock: + yield entry + def shutdown(self) -> None: """Close every cached archive. Wired to ``atexit`` at module load.""" with self._struct_lock: diff --git a/codex/views/reader/page.py b/codex/views/reader/page.py index 10f43a75b..2552eec3f 100644 --- a/codex/views/reader/page.py +++ b/codex/views/reader/page.py @@ -1,26 +1,47 @@ """Views for reading comic books.""" +from __future__ import annotations + import time +from typing import TYPE_CHECKING, Final from django.http import HttpResponse from drf_spectacular.types import OpenApiTypes from drf_spectacular.utils import OpenApiParameter, extend_schema from loguru import logger -from pdffile import PageFormat +from pdffile import PageMode, PDFFile from rest_framework.exceptions import NotFound from codex.librarian.bookmark.tasks import BookmarkUpdateTask from codex.librarian.mp_queue import LIBRARIAN_QUEUE from codex.models.choices import FileTypeChoices from codex.models.comic import Comic -from codex.settings import FALSY from codex.views.auth import AuthFilterAPIView from codex.views.bookmark import BookmarkAuthMixin from codex.views.reader._archive_cache import archive_cache, page_acl_cache -_PDF_MIME_TYPE = "application/pdf" -_PDF_FORMAT_NON_PDF_TYPES = frozenset( - {e.value for e in (PageFormat.PIXMAP, PageFormat.IMAGE)} +if TYPE_CHECKING: + from pdffile import PageVerdict + + from codex.views.reader._archive_cache import _ArchiveEntry + +_PDF_MIME_TYPE: Final[str] = "application/pdf" + +#: Query-parameter name that selects the PDF serving mode. Picked to +#: avoid colliding with DRF's reserved ``?format=`` (``URL_FORMAT_OVERRIDE``) +#: which DRF interprets as a renderer-format selector — it raises +#: ``NotFound`` for unknown values *before* the view's ``get`` runs. +_SERVE_PARAM: Final[str] = "serve" + +#: Permitted ``?serve=`` values. ``auto`` runs the detector; ``pdf`` +#: skips the detector and forces the legacy single-page-PDF path; +#: ``image`` forces a server-side rasterize (works for any PDF page +#: but spends more CPU than the detector path on vector-heavy pages). +_SERVE_AUTO: Final[str] = "auto" +_SERVE_PDF: Final[str] = "pdf" +_SERVE_IMAGE: Final[str] = "image" +_SERVE_HINTS: Final[frozenset[str]] = frozenset( + {_SERVE_AUTO, _SERVE_PDF, _SERVE_IMAGE} ) @@ -76,18 +97,77 @@ def _resolve_path_and_type(self, pk) -> tuple[str, str | None]: page_acl_cache.put(cache_key, path, file_type, now) return path, file_type - def _get_page_image(self) -> tuple: + def _serve_hint(self) -> str: + raw = self.request.GET.get(_SERVE_PARAM, _SERVE_AUTO).lower() + return raw if raw in _SERVE_HINTS else _SERVE_AUTO + + @staticmethod + def _classify_cached(entry: _ArchiveEntry, pdf: PDFFile, page: int) -> PageVerdict: + """Memoize ``pdf.classify_page`` on the cache entry.""" + cached = entry.verdicts.get(page) + if cached is not None: + return cached + verdict = pdf.classify_page(page) + entry.verdicts[page] = verdict + return verdict + + def _try_pdf_image_serve( + self, path: str, page: int, serve_hint: str + ) -> tuple[bytes, str] | None: + """ + Image-serve fast path for PDF pages. + + Returns ``(bytes, content_type)`` when the page can be served + as a raw image (detector matched, or caller forced ``image``); + ``None`` when the caller should fall back to the legacy + single-page-PDF path. + """ + with archive_cache.open_entry(path) as entry: + # ``Comicbox._get_archive`` returns the underlying archive + # union (zip / rar / 7z / tar / pdf). Caller has gated on + # ``file_type == PDF`` so the runtime type is ``PDFFile``; + # ``isinstance`` narrows for the type checker. Private + # comicbox API for now — swap to a public getter once one + # lands upstream. + archive = entry.comicbox._get_archive() # noqa: SLF001 + if not isinstance(archive, PDFFile): + return None + pdf = archive + page_index = int(page) + + if serve_hint == _SERVE_IMAGE: + # Always-image override — pixmap fallback for vector pages. + blob, ext = pdf.read_full_pixmap_jpeg(page_index) + return blob, f"image/{ext}" + + verdict = self._classify_cached(entry, pdf, page_index) + if verdict.mode is PageMode.PDF_FALLBACK: + return None + served = pdf.read_image_if_dominant(page_index) + if served is None: + # Detection said dominant but extraction failed; fall + # through to PDF rather than serve a broken response. + return None + blob, ext = served + return blob, f"image/{ext}" + + def _get_page_image(self) -> tuple[bytes, str]: """Get the image data and content type.""" pk = self.kwargs.get("pk") path, file_type = self._resolve_path_and_type(pk) - # page_image page = self.kwargs.get("page") - pdf_format = ( - PageFormat.PIXMAP.value - if self.request.GET.get("pixmap", "").lower() not in FALSY - else "" - ) + serve_hint = self._serve_hint() + + is_pdf = file_type == FileTypeChoices.PDF.value # pyright: ignore[reportAttributeAccessIssue] # ty: ignore[unresolved-attribute] + + # Image-dominant fast path for PDFs (skipped when the caller + # forces ``?serve=pdf``). + if is_pdf and serve_hint != _SERVE_PDF: + served = self._try_pdf_image_serve(path, page, serve_hint) + if served is not None: + return served + # Process-wide LRU of open Comicbox archives — the web reader's # prev/curr/next prefetch fires 3-5 page hits on the same archive # within a second, and ``cacheBook`` mode bursts a whole-book @@ -96,28 +176,32 @@ def _get_page_image(self) -> tuple: # held inside ``archive_cache.open(...)`` serializes extraction # because ZipFile / RarFile / PDF backends aren't thread-safe. with archive_cache.open(path) as cb: - page_image = cb.get_page_by_index(page, pdf_format=pdf_format) + page_image = cb.get_page_by_index(page, pdf_format="") if not page_image: page_image = b"" - # content type - if ( - file_type == FileTypeChoices.PDF.value # pyright: ignore[reportAttributeAccessIssue], # ty: ignore[unresolved-attribute] - and pdf_format not in _PDF_FORMAT_NON_PDF_TYPES - ): - content_type = _PDF_MIME_TYPE - else: - content_type = self.content_type - + content_type = _PDF_MIME_TYPE if is_pdf else self.content_type return page_image, content_type @extend_schema( parameters=[ OpenApiParameter("bookmark", OpenApiTypes.BOOL, default=True), - OpenApiParameter("pixmap", OpenApiTypes.BOOL, default=False), + OpenApiParameter( + _SERVE_PARAM, + OpenApiTypes.STR, + default=_SERVE_AUTO, + enum=sorted(_SERVE_HINTS), + description=( + "PDF serving mode: 'auto' (detector), " + "'pdf' (legacy single-page PDF), " + "'image' (always rasterize). Ignored for non-PDF archives." + ), + ), ], responses={ (200, content_type): OpenApiTypes.BINARY, + (200, "image/png"): OpenApiTypes.BINARY, + (200, "image/webp"): OpenApiTypes.BINARY, (200, _PDF_MIME_TYPE): OpenApiTypes.BINARY, }, ) diff --git a/codex/views/util.py b/codex/views/util.py index 8e8b1444c..d744edf1e 100644 --- a/codex/views/util.py +++ b/codex/views/util.py @@ -2,8 +2,42 @@ from collections.abc import Mapping from dataclasses import dataclass +from functools import wraps from typing import override +from django.utils.cache import patch_cache_control + + +def cache_control_2xx(**kwargs): + """ + Patch ``Cache-Control`` only on 2xx responses. + + Like ``django.views.decorators.cache.cache_control`` but only + emits the header for success responses. + + Django's ``cache_control`` patches the header onto every response, + including 4xx and 5xx. With ``public, max-age=`` that turns + a transient error (a missing file, an ACL miss, a one-off + backend hiccup) into a week-long cache poison: every browser that + saw the failure keeps serving it from cache without ever reaching + the server. This wrapper only marks success responses cacheable; + errors are returned with whatever default headers DRF / Django + already set (typically uncached). + """ + + def _wrap(viewfunc): + @wraps(viewfunc) + def _wrapped(request, *args, **kw): + response = viewfunc(request, *args, **kw) + status = getattr(response, "status_code", 0) + if 200 <= status < 300: # noqa: PLR2004 + patch_cache_control(response, **kwargs) + return response + + return _wrapped + + return _wrap + @dataclass class Route: diff --git a/frontend/src/api/v3/reader.js b/frontend/src/api/v3/reader.js index 1e7e79b4b..1ef125007 100644 --- a/frontend/src/api/v3/reader.js +++ b/frontend/src/api/v3/reader.js @@ -25,9 +25,24 @@ export const getReaderInfo = (pk, data, ts, options = {}) => { const _getReaderAPIPath = (pk) => globalThis.CODEX.API_V3_PATH + _getBookPath(pk); -export const getComicPageSource = ({ pk, page, mtime }) => { +export const getComicPageSource = ({ pk, page, mtime, serve }) => { + // ``serve`` is the optional PDF serving-mode hint forwarded to the + // backend ``ReaderPageView``: ``auto`` (detector decides), + // ``image`` (always rasterize), or ``pdf`` (skip the detector and + // serve a single-page PDF blob). Ignored by the backend for + // non-PDF archives. Omitting the param keeps the URL identical to + // the legacy shape so HTTP caches don't fragment. + // + // The query name is ``serve`` rather than ``format`` because DRF + // reserves ``?format=`` (URL_FORMAT_OVERRIDE) as a renderer-format + // selector and raises NotFound for unknown values before the view + // dispatches. const bookAPIPath = _getReaderAPIPath(pk); - return `${bookAPIPath}/${page}/page.jpg?ts=${mtime}`; + let url = `${bookAPIPath}/${page}/page.jpg?ts=${mtime}`; + if (serve && serve !== "auto") { + url += `&serve=${serve}`; + } + return url; }; export const getComicDownloadURL = ({ pk }, fn, ts) => { @@ -45,8 +60,8 @@ export const getDownloadPageURL = ({ pk, page, mtime }) => { }; export const getPDFInBrowserURL = ({ pk, mtime }) => { - // Consumed by ````, not ``HTTP.get`` — needs an - // absolute path so the browser doesn't resolve it relative to the + // Consumed by the "Read in Tab" link (``) — needs + // an absolute path so the browser doesn't resolve it relative to the // current SPA route. const bookPath = _getBookPath(pk); return `/${bookPath}/book.pdf?ts=${mtime}`; diff --git a/frontend/src/components/reader/drawer/reader-settings-controls.vue b/frontend/src/components/reader/drawer/reader-settings-controls.vue index 3236bd048..b6bd75b49 100644 --- a/frontend/src/components/reader/drawer/reader-settings-controls.vue +++ b/frontend/src/components/reader/drawer/reader-settings-controls.vue @@ -89,7 +89,6 @@ :model-value="settings.cacheBook" class="scopedCheckbox" density="compact" - :disabled="disableCacheBook" label="Cache Entire Book" hide-details="auto" :true-value="true" @@ -98,6 +97,25 @@ " @update:model-value="$emit('update', { cacheBook: $event })" /> + + + + + diff --git a/frontend/src/components/reader/pager/page/page.vue b/frontend/src/components/reader/pager/page/page.vue index 4caf4d312..786287cf9 100644 --- a/frontend/src/components/reader/pager/page/page.vue +++ b/frontend/src/components/reader/pager/page/page.vue @@ -74,16 +74,27 @@ export default { loaded: false, error: "", ts: 0, + /* + * ``true`` once we've seen an ```` load fail on a PDF + * book — the response was ``application/pdf`` (the detector + * declined to serve as image), so we re-mount the page through + * ```` against the same URL with ``?serve=pdf``. + */ + pdfFallback: false, }; }, computed: { ...mapState(useReaderStore, { scale: (state) => state.clientSettings?.scale, + pdfRenderMode: (state) => state.clientSettings?.pdfRenderMode || "auto", }), + isPDF() { + return this.book.fileType === "PDF"; + }, style() { // Magic for transform: scale() not sizing elements right. const s = {}; - if (this.book.fileType === "PDF" || this.scale == 1) { + if (this.usingPDFDoc || this.scale == 1) { return s; } const img = this.$refs.pageComponent?.$el; @@ -100,11 +111,34 @@ export default { pk: this.book.pk, page: this.page, mtime, + serve: this.activeServe, }; return getComicPageSource(params); }, + activeServe() { + /* + * For non-PDF books the serve param is ignored by the + * backend; we still pass it through so the URL is stable. + */ + if (!this.isPDF) { + return "auto"; + } + if (this.pdfFallback) { + // Image attempt failed → re-fetch with PDF response. + return "pdf"; + } + return this.pdfRenderMode; + }, + usingPDFDoc() { + /* + * Render through ```` when: + * • the user explicitly forced PDF rendering, or + * • the ```` first attempt failed for a PDF book. + */ + return this.isPDF && (this.pdfRenderMode === "pdf" || this.pdfFallback); + }, component() { - return this.book.fileType === "PDF" ? PDFDoc : ImgPage; + return this.usingPDFDoc ? PDFDoc : ImgPage; }, bookSettings() { return this.getBookSettings(this.book); @@ -113,6 +147,17 @@ export default { return this.bookSettings?.twoPages ?? false; }, }, + watch: { + /* + * If the user toggles render mode mid-read, drop the fallback flag + * so the new mode takes effect on the next mount. + */ + pdfRenderMode() { + this.pdfFallback = false; + this.error = ""; + this.loaded = false; + }, + }, mounted() { /* * Show the spinner only if the image is still loading after @@ -148,6 +193,18 @@ export default { this.error = false; }, onError() { + /* + * For PDF books on the first ```` attempt, the failure + * means the backend sent ``application/pdf`` — the detector + * declined to serve as image. Swap to ```` against + * the same URL and let it render via vue-pdf-embed. For all + * other failures, surface the error. + */ + if (this.isPDF && !this.pdfFallback && !this.usingPDFDoc) { + this.pdfFallback = true; + this.loaded = false; + return; + } this.error = "load"; this.showProgress = false; }, @@ -157,6 +214,8 @@ export default { }, onRetry() { this.ts = Date.now(); + this.pdfFallback = false; + this.error = ""; }, }, }; diff --git a/frontend/src/components/reader/pager/pager-full-pdf.vue b/frontend/src/components/reader/pager/pager-full-pdf.vue deleted file mode 100644 index c258c4e1b..000000000 --- a/frontend/src/components/reader/pager/pager-full-pdf.vue +++ /dev/null @@ -1,36 +0,0 @@ - - - diff --git a/frontend/src/components/reader/pager/pager.vue b/frontend/src/components/reader/pager/pager.vue index 341b3291e..5fd4cd360 100644 --- a/frontend/src/components/reader/pager/pager.vue +++ b/frontend/src/components/reader/pager/pager.vue @@ -12,14 +12,8 @@