Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
278 changes: 278 additions & 0 deletions app-review.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
# -*- coding: utf-8 -*-
"""
@file: app.py
Generate PNG thumbnails (respecting freedesktop sizes).
Usage: app.py [-h] [-o OUTPUT] [-j JOBS] [-f|--force] input

This script generates PNG thumbnails of images in various sizes
(normal, large, x-large, xx-large) and saves them in a specified
output directory (should be `~/.cache/thumbnails/`).

- `Multiprocessing` to execute processing in parallel.
- `Pillow` library for image processing (resize).
- `Pillow.ImageDraw` to add a label to the thumbnail indicating the original file format.
- `Subprocess` to extract preview images from RAW files using `ExifTool`.
- `Subprocess` to handle rotation from Exif files using `ExifTool`.

The input can be either a file or a directory (which will be searched recursively).

Example of usage:
$ python3 app.py /mnt/sda1/photos/2025/ -o /home/thomas/.cache/thumbnails/ -j 16
$ python3 app.py /mnt/sda1/photos/2025/20250127/2J0A1537.cr3

Set thumnail-cache to handle more storage and keep it longer:
$ gsettings set org.gnome.nautilus.preferences thumbnail-limit 536870912000 (~500Gb)
$ gsettings set org.gnome.desktop.thumbnail-cache maximum-age 365
"""

import hashlib
import logging
import multiprocessing
import subprocess
import io

from pathlib import Path
from concurrent.futures import ProcessPoolExecutor, as_completed
from typing import List, Optional, Union

from PIL import Image, ImageOps, ImageDraw, ImageFont
from PIL.PngImagePlugin import PngInfo

from tqdm import tqdm

logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Insufficient Log Format category Logging

Tell me more
What is the issue?

Basic logging format missing crucial debugging information like timestamp and module name.

Why this matters

Current format makes it difficult to trace when issues occurred and from which module they originated, complicating debugging in production.

Suggested change ∙ Feature Preview

Update to: logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

Provide feedback to improve future suggestions

Nice Catch Incorrect Not in Scope Not in coding standard Other

💬 Looking for more details? Reply to this comment to chat with Korbit.


SUPPORTED_EXTENSIONS = {".jpg", ".jpeg", ".png", ".cr2", ".cr3", ".arw"}
FONT = "/usr/share/fonts/truetype/jetbrains-mono/JetBrainsMono-Bold.ttf"
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hardcoded font path category Readability

Tell me more
What is the issue?

Hardcoded font path that may not exist on all systems.

Why this matters

The script may fail on systems with different font paths or missing the specific font, making it less portable.

Suggested change ∙ Feature Preview

Move to configuration and provide fallback options:

FONT_PATHS = [
    "/usr/share/fonts/truetype/jetbrains-mono/JetBrainsMono-Bold.ttf",
    "/usr/share/fonts/TTF/DejaVuSans-Bold.ttf",
    # Add more fallback paths
]

def get_system_font():
    for path in FONT_PATHS:
        if Path(path).exists():
            return path
    return None

FONT = get_system_font()
Provide feedback to improve future suggestions

Nice Catch Incorrect Not in Scope Not in coding standard Other

💬 Looking for more details? Reply to this comment to chat with Korbit.

THUMBNAIL_CONFIG = {
"normal": {
"size": (128, 128),
"font_size": 20,
},
"large": {
"size": (256, 256),
"font_size": 30,
},
"x-large": {
"size": (512, 512),
"font_size": 40,
},
# "xx-large": {
# "size": (1024, 1024),
# "font_size": 100,
# },
}

from PIL import ImageDraw, ImageFont


def add_raw_label(image: Image.Image, label: str, text: str) -> Image.Image:
"""Add a label to the image indicating the original file format."""
Comment on lines +69 to +70
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Incomplete function docstring category Documentation

Tell me more
What is the issue?

The docstring doesn't explain the parameters or return value, and doesn't clarify why the label is being added.

Why this matters

Makes it harder for developers to understand the expected inputs and purpose of the label addition.

Suggested change ∙ Feature Preview

"""Add a label to the image showing the original file format for RAW files.

Args:
image: The PIL Image to modify
label: The thumbnail size category (e.g. 'normal', 'large')
text: The file extension to display

Returns:
The modified image with label added"""

Provide feedback to improve future suggestions

Nice Catch Incorrect Not in Scope Not in coding standard Other

💬 Looking for more details? Reply to this comment to chat with Korbit.

draw = ImageDraw.Draw(image)
size = THUMBNAIL_CONFIG[label]["font_size"]
x, y = 0, 0

font = ImageFont.truetype(FONT, size)

text_bbox = draw.textbbox((x, y), text, font=font)
text_width = text_bbox[2] - text_bbox[0]
text_height = text_bbox[3] - text_bbox[1]
box_padding = int(size * 0.5)
draw.rectangle(
[
x - box_padding,
y - box_padding,
x + text_width + box_padding,
y + text_height + box_padding,
],
fill="black",
)
draw.text((x, y), text, fill="white", font=font)
return image


def extract_cr3_preview(image_path: Path, tag: str = "PreviewImage") -> io.BytesIO:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Misleading function name category Readability

Tell me more
What is the issue?

Function name is too specific (cr3) despite handling multiple RAW formats (cr2, cr3, arw).

Why this matters

The function name doesn't accurately reflect its capabilities, making it misleading for maintainers.

Suggested change ∙ Feature Preview

Rename to better reflect functionality:

def extract_raw_preview(image_path: Path, tag: str = "PreviewImage") -> io.BytesIO:
Provide feedback to improve future suggestions

Nice Catch Incorrect Not in Scope Not in coding standard Other

💬 Looking for more details? Reply to this comment to chat with Korbit.

"""Extract preview image from RAW file using exiftool."""
logging.info(f"Extracting preview from RAW: {image_path}")
result = subprocess.run(
["exiftool", f"-{tag}", "-b", str(image_path)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=False,
)
Comment on lines +97 to +102
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unsafe subprocess execution with unvalidated paths category Security

Tell me more
What is the issue?

Direct use of user-provided input (image_path) in subprocess calls without proper path validation could lead to command injection.

Why this matters

An attacker could craft a malicious filename that includes shell commands, potentially leading to arbitrary code execution.

Suggested change ∙ Feature Preview
from pathlib import Path
def extract_cr3_preview(image_path: Path, tag: str = "PreviewImage") -> io.BytesIO:
    if not image_path.is_file():
        raise ValueError("Invalid file path")
    image_path_str = str(image_path.resolve())
    result = subprocess.run(
        ["exiftool", f"-{tag}", "-b", image_path_str],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        check=False,
    )
Provide feedback to improve future suggestions

Nice Catch Incorrect Not in Scope Not in coding standard Other

💬 Looking for more details? Reply to this comment to chat with Korbit.

if result.returncode != 0 or not result.stdout:
raise RuntimeError(
f"ExifTool failed on {image_path}:\n{result.stderr.decode().strip()}"
)

orient_proc = subprocess.run(
["exiftool", "-Orientation", "-s3", str(image_path)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=False,
text=True,
)
orientation_str = orient_proc.stdout.strip()

rotate_map = {
"Rotate 90 CW": -90,
"Rotate 270 CW": -270,
"Rotate 90 CCW": 90,
"Rotate 180": 180,
}
img = Image.open(io.BytesIO(result.stdout))
rotation = rotate_map.get(orientation_str, 0)
if rotation:
img = img.rotate(rotation, expand=True)

img_bytes_io = io.BytesIO()
img.save(img_bytes_io, format="JPEG")

return img_bytes_io
Comment on lines +128 to +131
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BytesIO stream position not reset category Functionality

Tell me more
What is the issue?

The BytesIO object is returned without seeking to the start, making it unusable for subsequent read operations.

Why this matters

When the BytesIO object is used later, it will be at the end of the stream, resulting in no data being read.

Suggested change ∙ Feature Preview

Reset the stream position before returning:

img_bytes_io = io.BytesIO()
img.save(img_bytes_io, format="JPEG")
img_bytes_io.seek(0)
return img_bytes_io
Provide feedback to improve future suggestions

Nice Catch Incorrect Not in Scope Not in coding standard Other

💬 Looking for more details? Reply to this comment to chat with Korbit.



def generate_thumbnails(
image_path: Path, output_base: Path, overwrite=False
) -> Optional[str]:
"""Generate multiple size thumbnails for one image.
If one size is missing, it will generate all sizes.
overwrite (bool): If True, overwrite existing thumbnails."""

uri = image_path.resolve().as_uri()
hash_hex = hashlib.md5(uri.encode("utf-8")).hexdigest()
out_filename = f"{hash_hex}.png"

if not overwrite:
all_exist = True
for label in THUMBNAIL_CONFIG.keys():
out_dir = output_base / label
out_path = out_dir / out_filename
if not out_path.exists():
print(f"Thumbnail does not exist: {uri} {out_path}")
Comment on lines +150 to +151
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Print Statement Instead of Logger category Logging

Tell me more
What is the issue?

Using print() statement instead of logging for notifying about missing thumbnails.

Why this matters

Using print() makes it impossible to control output verbosity and breaks logging consistency. This message would not appear in log files or monitoring systems.

Suggested change ∙ Feature Preview

Replace with: logging.debug(f"Thumbnail does not exist: {uri} {out_path}")

Provide feedback to improve future suggestions

Nice Catch Incorrect Not in Scope Not in coding standard Other

💬 Looking for more details? Reply to this comment to chat with Korbit.

all_exist = False
break
if all_exist:
logging.info(f"All thumbnails already exist for: {image_path}")
return None

try:
if image_path.suffix.lower() in {".cr3", ".cr2", ".arw"}:
img = Image.open(extract_cr3_preview(image_path))
else:
img = Image.open(image_path)

img = ImageOps.exif_transpose(img)

metadata = PngInfo()
metadata.add_text("Thumb::URI", uri)
metadata.add_text("Thumb::MTime", str(int(image_path.stat().st_mtime)))
metadata.add_text("Software", "make-thumbnail")

for label, config in THUMBNAIL_CONFIG.items():
size = config["size"]
thumb = img.copy()
thumb.thumbnail(size, Image.LANCZOS)

out_dir = output_base / label
out_dir.mkdir(parents=True, exist_ok=True)
out_path = out_dir / out_filename
thumb.save(out_path, format="PNG", pnginfo=metadata)

file_extension = image_path.suffix.lower()
if file_extension in SUPPORTED_EXTENSIONS:
thumb = add_raw_label(thumb, label, file_extension)
thumb.save(out_path, format="PNG", pnginfo=metadata)
Comment on lines +179 to +184
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Redundant thumbnail saving category Performance

Tell me more
What is the issue?

The thumbnail is being saved twice for each size, once without the label and immediately after with the label, overwriting the first save.

Why this matters

This causes unnecessary I/O operations and processing time, impacting the overall performance of the thumbnail generation process.

Suggested change ∙ Feature Preview

Remove the first save operation and only save after adding the label:

            out_dir = output_base / label
            out_dir.mkdir(parents=True, exist_ok=True)
            out_path = out_dir / out_filename
            
            file_extension = image_path.suffix.lower()
            if file_extension in SUPPORTED_EXTENSIONS:
                thumb = add_raw_label(thumb, label, file_extension)
            thumb.save(out_path, format="PNG", pnginfo=metadata)
Provide feedback to improve future suggestions

Nice Catch Incorrect Not in Scope Not in coding standard Other

💬 Looking for more details? Reply to this comment to chat with Korbit.


except Exception as e:
return f"{image_path}: {e}"
Comment on lines +186 to +187
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Loss of Error Context category Error Handling

Tell me more
What is the issue?

Using a bare Exception catch without logging the full stack trace loses valuable debugging information.

Why this matters

When an error occurs, only the string representation of the error is returned, discarding the stack trace that could help diagnose the root cause.

Suggested change ∙ Feature Preview

Log the full exception details before returning the error message:

except Exception as e:
    logging.exception(f"Error processing {image_path}")
    return f"{image_path}: {e}"
Provide feedback to improve future suggestions

Nice Catch Incorrect Not in Scope Not in coding standard Other

💬 Looking for more details? Reply to this comment to chat with Korbit.

return None


def collect_image_paths(input_path: Path) -> List[Path]:
"""Recursively collect image files from a path."""
if input_path.is_file():
return [input_path] if input_path.suffix.lower() in SUPPORTED_EXTENSIONS else []
elif input_path.is_dir():
return [
p for p in input_path.rglob("*") if p.suffix.lower() in SUPPORTED_EXTENSIONS
]
else:
raise ValueError("Input must be a file or directory.")


def process_images(
image_paths: List[Path], output_dir: Path, num_workers: int, overwrite: bool = False
) -> List[str]:
"""Prepare threads to generate thumbnails in parallel."""
errors = []
with ProcessPoolExecutor(max_workers=num_workers) as executor:
futures = {
executor.submit(generate_thumbnails, path, output_dir, overwrite): path
for path in image_paths
}
for future in tqdm(
as_completed(futures),
total=len(futures),
desc="Generating thumbnails",
):
error = future.result()
if error:
errors.append(error)
return errors


def main(
input_path: Union[str, Path],
output_dir: Union[str, Path],
num_workers: Optional[int] = None,
overwrite: bool = False,
) -> None:
input_path = Path(input_path)
output_dir = Path(output_dir)
num_workers = num_workers or multiprocessing.cpu_count()

try:
image_paths = collect_image_paths(input_path)
except ValueError as ve:
logging.error(str(ve))
return

if not image_paths:
logging.warning("No supported image files found.")
return

errors = process_images(image_paths, output_dir, num_workers, overwrite=overwrite)

if errors:
logging.warning("Some images failed to process:")
for err in errors:
logging.warning(err)
else:
logging.info("All thumbnails generated successfully.")


if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser(
description="Generate PNG thumbnails (freedesktop sizes)."
)
parser.add_argument("input", help="Input image file or directory.")
parser.add_argument(
"-o", "--output", default="thumbnails", help="Output directory."
)
parser.add_argument(
"-j",
"--jobs",
type=int,
default=None,
help="Number of worker processes (default: all cores).",
)
parser.add_argument(
"-f",
action="store_true",
help="Overwrite existing thumbnails.",
)

args = parser.parse_args()
main(args.input, args.output, args.jobs, overwrite=args.f)