Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ border-radius: 128px;

## v1.11.5

- Features
- Reader: add `?hide_text=1` query param for PDF pages — suppresses
visible text rendering on badly-OCR'd scans where the OCR layer
doubles up against the page's raster.
- Fixes
- Fix occasional recreation of comics on docker or network filesystems.
- Fix double polling of some libraries.
Expand Down
18 changes: 17 additions & 1 deletion codex/views/reader/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,13 @@ def _get_page_image(self) -> tuple:
if self.request.GET.get("pixmap", "").lower() not in FALSY
else ""
)
# ``?hide_text=1`` suppresses visible text rendering on PDF
# pages — useful for badly-OCR'd scans that draw the OCR
# layer with rendering mode 0 (visible) on top of the page's
# raster, doubling the text. Forwarded straight through
# comicbox to the pdffile backend; non-PDF archives ignore
# it.
hide_text = self.request.GET.get("hide_text", "").lower() not in FALSY
# Process-wide LRU of open Comicbox archives — the web reader's
# prev/curr/next prefetch fires 3-5 page hits on the same archive
# within a second, and ``cacheBook`` mode bursts a whole-book
Expand All @@ -96,7 +103,15 @@ def _get_page_image(self) -> tuple:
# held inside ``archive_cache.open(...)`` serializes extraction
# because ZipFile / RarFile / PDF backends aren't thread-safe.
with archive_cache.open(path) as cb:
page_image = cb.get_page_by_index(page, pdf_format=pdf_format)
# ``hide_text`` requires comicbox > 3.0.0 / comicbox-pdffile
# > 0.5.0; pyright's lock-file-pinned typeshed lags behind
# the editable install used during dev. Drop the ignore once
# both deps land on PyPI and ``pyproject.toml`` is bumped.
page_image = cb.get_page_by_index(
page,
pdf_format=pdf_format,
hide_text=hide_text, # pyright: ignore[reportCallIssue] # ty: ignore[unknown-argument]
)
if not page_image:
page_image = b""

Expand All @@ -115,6 +130,7 @@ def _get_page_image(self) -> tuple:
parameters=[
OpenApiParameter("bookmark", OpenApiTypes.BOOL, default=True),
OpenApiParameter("pixmap", OpenApiTypes.BOOL, default=False),
OpenApiParameter("hide_text", OpenApiTypes.BOOL, default=False),
],
responses={
(200, content_type): OpenApiTypes.BINARY,
Expand Down