From 3909a77d6eea2b0bad01ae3c745d8c9c3cc0574a Mon Sep 17 00:00:00 2001 From: cargocultprogramming <10373572+cargocultprogramming@users.noreply.github.com> Date: Thu, 9 Apr 2026 20:33:54 +0200 Subject: [PATCH 1/2] First stab at implementation. --- .../plugins/image_process/image_process.py | 225 +++++++++++++----- .../image_process/test_format_conversion.py | 191 +++++++++++++++ 2 files changed, 356 insertions(+), 60 deletions(-) create mode 100644 pelican/plugins/image_process/test_format_conversion.py diff --git a/pelican/plugins/image_process/image_process.py b/pelican/plugins/image_process/image_process.py index 644fc5e..cd4cfb3 100644 --- a/pelican/plugins/image_process/image_process.py +++ b/pelican/plugins/image_process/image_process.py @@ -140,6 +140,47 @@ def _send_command(self, params): ) +def get_target_format(config, default_format=None): + """Extract the target format from various configuration structures. + + Target format can be specified in a number of different ways in the configuration: + + - Top-level default format: "output-format": "webp" + - Responsive image: "srcset": [ ("small", ["scale_in 100 100 True"], "webp"),] + - Picture: "sources": [ ("small", ["scale_in 100 100 True"], "webp"),] + + Returns the target format string (e.g., "webp") or None. + """ + if isinstance(config, dict): + return config.get("output-format", default_format) + + if isinstance(config, (list, str)): + return default_format + + if isinstance(config, tuple): + # Handle (condition, ops, format) or (ops, format) + match config: + # Matches (condition, ops, format) where 1st element is a string + case (str(), ops, str() as format_str): + return format_str + + # Matches (ops, format) with ops not string + case (ops, format_str) if not isinstance(ops, str): + return format_str + + return default_format + + +def get_target_filename(filename, target_format): + """Return the filename with the target format extension.""" + if not target_format or target_format == "original": + return filename + + base, _ext = os.path.splitext(filename) + target_format = target_format.lstrip(".") + return f"{base}.{target_format}" + + def convert_box(image, top, left, right, bottom): """Convert box coordinates strings to integer. @@ -448,8 +489,11 @@ def process_img_tag(img, settings, derivative): path = compute_paths(img["src"], settings, derivative) process = settings["IMAGE_PROCESS"][derivative] - img["src"] = posixpath.join(path.base_url, path.filename) - destination = os.path.join(str(path.base_path), path.filename) + target_format = get_target_format(process) + filename = get_target_filename(path.filename, target_format) + + img["src"] = posixpath.join(path.base_url, filename) + destination = os.path.join(str(path.base_path), filename) if not isinstance(process, list): process = process["ops"] @@ -474,10 +518,20 @@ def build_srcset(img, settings, derivative): path = compute_paths(img["src"], settings, derivative) process = settings["IMAGE_PROCESS"][derivative] + # Top-level default format. + top_default_format = get_target_format(process) + default = process["default"] default_name = "" + default_format = top_default_format if isinstance(default, str): - breakpoints = {i for i, _ in process["srcset"]} + # find the entry in srcset to get its format + for entry in process["srcset"]: + if entry[0] == default: + default_format = get_target_format(entry, top_default_format) + break + + breakpoints = {entry[0] for entry in process["srcset"]} if default not in breakpoints: logger.error( '%s srcset "%s" does not define default "%s"', @@ -486,34 +540,36 @@ def build_srcset(img, settings, derivative): default, ) default_name = default - elif isinstance(default, list): + elif isinstance(default, (list, tuple)): default_name = "default" - destination = os.path.join(str(path.base_path), default_name, path.filename) - process_image((path.source, destination, default), settings) + default_format = get_target_format(default, top_default_format) + ops = default[0] if isinstance(default, tuple) else default + filename = get_target_filename(path.filename, default_format) + destination = os.path.join(str(path.base_path), default_name, filename) + process_image((path.source, destination, ops), settings) - img["src"] = posixpath.join(path.base_url, default_name, path.filename) + filename = get_target_filename(path.filename, default_format) + img["src"] = posixpath.join(path.base_url, default_name, filename) if "sizes" in process: img["sizes"] = process["sizes"] srcset = [] for src in process["srcset"]: - file_path = posixpath.join(path.base_url, src[0], path.filename) + entry_format = get_target_format(src, top_default_format) + filename = get_target_filename(path.filename, entry_format) + file_path = posixpath.join(path.base_url, src[0], filename) srcset.append(format_srcset_element(file_path, src[0])) - destination = os.path.join(str(path.base_path), src[0], path.filename) + destination = os.path.join(str(path.base_path), src[0], filename) process_image((path.source, destination, src[1]), settings) if len(srcset) > 0: img["srcset"] = ", ".join(srcset) -def convert_div_to_picture_tag(soup, img, group, settings, derivative): - """Convert a div containing multiple images to a picture.""" +def prepare_image_sources(img, group, settings, derivative): + """Prepare image sources for the picture tag.""" process_dir = settings["IMAGE_PROCESS_DIR"] - # Compile sources URL. Special source "default" uses the main - # image URL. Other sources use the img with classes - # [source['name'], 'image-process']. We also remove the img from - # the DOM. sources = copy.deepcopy(settings["IMAGE_PROCESS"][derivative]["sources"]) for s in sources: if s["name"] == "default": @@ -529,6 +585,42 @@ def convert_div_to_picture_tag(soup, img, group, settings, derivative): url_path, s["filename"] = os.path.split(s["url"]) s["base_url"] = os.path.join(url_path, process_dir, derivative) s["base_path"] = os.path.join(settings["OUTPUT_PATH"], s["base_url"][1:]) + return sources + + +def construct_picture_tag(soup, img, sources, settings): + """Construct the picture tag and add it to the DOM.""" + picture_tag = soup.new_tag("picture") + for s in sources: + # Create new + source_attrs = {k: s[k] for k in s if k in ["media", "sizes"]} + source_tag = soup.new_tag("source", **source_attrs) + + top_source_format = get_target_format(s) + + srcset = [] + for src in s["srcset"]: + entry_format = get_target_format(src, top_source_format) + filename = get_target_filename(s["filename"], entry_format) + url = os.path.join(s["base_url"], s["name"], src[0], filename) + srcset.append(format_srcset_element(str(url), src[0])) + + source = os.path.join(settings["PATH"], s["url"][1:]) + destination = os.path.join(s["base_path"], s["name"], src[0], filename) + process_image((source, destination, src[1]), settings) + + if len(srcset) > 0: + source_tag["srcset"] = ", ".join(srcset) + + picture_tag.append(source_tag) + + # Wrap img with + img.wrap(picture_tag) + + +def convert_div_to_picture_tag(soup, img, group, settings, derivative): + """Convert a div containing multiple images to a picture.""" + sources = prepare_image_sources(img, group, settings, derivative) # If default is not None, change default img source to the image # derivative referenced. @@ -550,18 +642,29 @@ def convert_div_to_picture_tag(soup, img, group, settings, derivative): if isinstance(default[1], str): default_item_name = default[1] + # find format from srcset + default_item_format = None + for entry in default_source["srcset"]: + if entry[0] == default_item_name: + default_item_format = get_target_format(entry) + break - elif isinstance(default[1], list): + elif isinstance(default[1], (list, tuple)): default_item_name = "default" + default_item_format = get_target_format(default[1]) + ops = default[1][0] if isinstance(default[1], tuple) else default[1] source = os.path.join(settings["PATH"], default_source["url"][1:]) + filename = get_target_filename( + default_source["filename"], default_item_format + ) destination = os.path.join( default_source["base_path"], default_source_name, default_item_name, - default_source["filename"], + filename, ) - process_image((source, destination, default[1]), settings) + process_image((source, destination, ops), settings) else: raise RuntimeError( "Unexpected type for the second value of tuple " @@ -569,37 +672,39 @@ def convert_div_to_picture_tag(soup, img, group, settings, derivative): (derivative,), ) + filename = get_target_filename(default_source["filename"], default_item_format) # Change img src to url of default processed image. img["src"] = os.path.join( default_source["base_url"], default_source_name, default_item_name, - default_source["filename"], + filename, ) - # Create picture tag. - picture_tag = soup.new_tag("picture") - for s in sources: - # Create new - source_attrs = {k: s[k] for k in s if k in ["media", "sizes"]} - source_tag = soup.new_tag("source", **source_attrs) + construct_picture_tag(soup, img, sources, settings) - srcset = [] - for src in s["srcset"]: - url = os.path.join(s["base_url"], s["name"], src[0], s["filename"]) - srcset.append(format_srcset_element(str(url), src[0])) - source = os.path.join(settings["PATH"], s["url"][1:]) - destination = os.path.join(s["base_path"], s["name"], src[0], s["filename"]) - process_image((source, destination, src[1]), settings) +def generate_srcset_and_insert_source(img, s, settings): + """Generate srcset for a source and insert it into the DOM.""" + top_source_format = get_target_format(s) - if len(srcset) > 0: - source_tag["srcset"] = ", ".join(srcset) + srcset = [] + for src in s["srcset"]: + entry_format = get_target_format(src, top_source_format) + filename = get_target_filename(s["filename"], entry_format) + url = posixpath.join(s["base_url"], s["name"], src[0], filename) + srcset.append(format_srcset_element(str(url), src[0])) - picture_tag.append(source_tag) + source = os.path.join(settings["PATH"], s["url"][1:]) + destination = os.path.join(s["base_path"], s["name"], src[0], filename) + process_image((source, destination, src[1]), settings) - # Wrap img with - img.wrap(picture_tag) + if len(srcset) > 0: + # Append source elements to the picture in the same order + # as they are found in + # settings['IMAGE_PROCESS'][derivative]['sources']. + s["element"]["srcset"] = ", ".join(srcset) + img.insert_before(s["element"]) def process_picture(soup, img, group, settings, derivative): @@ -662,18 +767,29 @@ def process_picture(soup, img, group, settings, derivative): if isinstance(default[1], str): default_item_name = default[1] + # find format from srcset + default_item_format = None + for entry in default_source["srcset"]: + if entry[0] == default_item_name: + default_item_format = get_target_format(entry) + break - elif isinstance(default[1], list): + elif isinstance(default[1], (list, tuple)): default_item_name = "default" + default_item_format = get_target_format(default[1]) + ops = default[1][0] if isinstance(default[1], tuple) else default[1] source = os.path.join(settings["PATH"], default_source["url"][1:]) + filename = get_target_filename( + default_source["filename"], default_item_format + ) destination = os.path.join( default_source["base_path"], default_source_name, default_item_name, - default_source["filename"], + filename, ) - process_image((source, destination, default[1]), settings) + process_image((source, destination, ops), settings) else: raise RuntimeError( @@ -682,31 +798,18 @@ def process_picture(soup, img, group, settings, derivative): (derivative,), ) + filename = get_target_filename(default_source["filename"], default_item_format) # Change img src to url of default processed image. img["src"] = posixpath.join( default_source["base_url"], default_source_name, default_item_name, - default_source["filename"], + filename, ) # Generate srcsets and put back s in . for s in sources: - srcset = [] - for src in s["srcset"]: - url = posixpath.join(s["base_url"], s["name"], src[0], s["filename"]) - srcset.append(format_srcset_element(str(url), src[0])) - - source = os.path.join(settings["PATH"], s["url"][1:]) - destination = os.path.join(s["base_path"], s["name"], src[0], s["filename"]) - process_image((source, destination, src[1]), settings) - - if len(srcset) > 0: - # Append source elements to the picture in the same order - # as they are found in - # settings['IMAGE_PROCESS'][derivative]['sources']. - s["element"]["srcset"] = ", ".join(srcset) - img.insert_before(s["element"]) + generate_srcset_and_insert_source(img, s, settings) def try_open_image(path): @@ -828,10 +931,12 @@ def process_metadata(generator, metadata): path = compute_paths(value, generator.context, derivative) original_values[key] = value - metadata[key] = urljoin( - site_url, posixpath.join(path.base_url, path.filename) - ) - destination = os.path.join(str(path.base_path), path.filename) + + target_format = get_target_format(process) + filename = get_target_filename(path.filename, target_format) + + metadata[key] = urljoin(site_url, posixpath.join(path.base_url, filename)) + destination = os.path.join(str(path.base_path), filename) if not isinstance(process, list): process = process["ops"] diff --git a/pelican/plugins/image_process/test_format_conversion.py b/pelican/plugins/image_process/test_format_conversion.py new file mode 100644 index 0000000..b417d3d --- /dev/null +++ b/pelican/plugins/image_process/test_format_conversion.py @@ -0,0 +1,191 @@ +from pathlib import Path + +from bs4 import BeautifulSoup +from PIL import Image +import pytest + +from pelican.plugins.image_process import ( + harvest_images_in_fragment, + process_metadata, + set_default_settings, +) + +HERE = Path(__file__).resolve().parent +TEST_DATA = HERE.joinpath("test_data").resolve() + + +def get_settings(**kwargs): + DEFAULT_CONFIG = { + "PATH": str(TEST_DATA), + "OUTPUT_PATH": "output", + "static_content": {}, + "filenames": {}, + "SITEURL": "https://www.example.com", + "IMAGE_PROCESS": {}, + } + settings = DEFAULT_CONFIG.copy() + settings.update(kwargs) + set_default_settings(settings) + return settings + + +@pytest.fixture +def output_dir(tmp_path): + out = tmp_path / "output" + out.mkdir() + return out + + +def test_single_image_conversion(output_dir): + settings = get_settings( + OUTPUT_PATH=str(output_dir), + IMAGE_PROCESS={ + "webp": { + "type": "image", + "ops": ["scale_in 100 100 True"], + "output-format": "webp", + } + }, + ) + + fragment = '' + result = harvest_images_in_fragment(fragment, settings) + + soup = BeautifulSoup(result, "html.parser") + img = soup.find("img") + + assert img["src"] == "/derivatives/webp/pelican-bird.webp" + + dest_path = output_dir / "derivatives" / "webp" / "pelican-bird.webp" + assert dest_path.exists() + + with Image.open(dest_path) as im: + assert im.format == "WEBP" + + +def test_responsive_image_conversion(output_dir): + settings = get_settings( + OUTPUT_PATH=str(output_dir), + IMAGE_PROCESS={ + "responsive": { + "type": "responsive-image", + "srcset": [ + ("small", ["scale_in 100 100 True"], "webp"), + ( + "large", + ["scale_in 800 800 True"], + ), # uses top-level default or original + ], + "default": "small", + "output-format": "jpg", + } + }, + ) + + fragment = '' + result = harvest_images_in_fragment(fragment, settings) + + soup = BeautifulSoup(result, "html.parser") + img = soup.find("img") + + assert img["src"] == "/derivatives/responsive/small/pelican-bird.webp" + assert "srcset" in img.attrs + srcset = img["srcset"] + assert "/derivatives/responsive/small/pelican-bird.webp small" in srcset + assert "/derivatives/responsive/large/pelican-bird.jpg large" in srcset + + assert ( + output_dir / "derivatives" / "responsive" / "small" / "pelican-bird.webp" + ).exists() + assert ( + output_dir / "derivatives" / "responsive" / "large" / "pelican-bird.jpg" + ).exists() + + +def test_picture_conversion(output_dir): + settings = get_settings( + OUTPUT_PATH=str(output_dir), + IMAGE_PROCESS={ + "viz": { + "type": "picture", + "sources": [ + { + "name": "default", + "srcset": [("small", ["scale_in 100 100 True"], "webp")], + }, + { + "name": "source-1", + "srcset": [("large", ["scale_in 800 800 True"], "jpg")], + }, + ], + "default": ("default", "small"), + } + }, + ) + + fragment = """ +
+ + +
+ """ + result = harvest_images_in_fragment(fragment, settings) + + soup = BeautifulSoup(result, "html.parser") + picture = soup.find("picture") + assert picture is not None + + sources = picture.find_all("source") + assert "webp" in sources[0]["srcset"] + assert "jpg" in sources[1]["srcset"] + + assert "/derivatives/viz/default/small/pelican-bird.webp" in sources[0]["srcset"] + assert "/derivatives/viz/source-1/large/black-borders.jpg" in sources[1]["srcset"] + + img = picture.find("img") + assert img["src"] == "/derivatives/viz/default/small/pelican-bird.webp" + + +def test_metadata_conversion(output_dir): + settings = get_settings( + OUTPUT_PATH=str(output_dir), + IMAGE_PROCESS={ + "webp-meta": { + "type": "image", + "ops": ["scale_in 100 100 True"], + "output-format": "webp", + } + }, + IMAGE_PROCESS_METADATA={"og_image": "webp-meta"}, + ) + + class MockGenerator: + def __init__(self, context): + self.context = context + + generator = MockGenerator(settings) + metadata = {"og_image": "/pelican-bird.png"} + + process_metadata(generator, metadata) + + assert ( + metadata["og_image"] + == "https://www.example.com/derivatives/webp-meta/pelican-bird.webp" + ) + assert (output_dir / "derivatives" / "webp-meta" / "pelican-bird.webp").exists() + + +def test_backward_compatibility(output_dir): + # Ensure that without output-format, it keeps the original extension + settings = get_settings( + OUTPUT_PATH=str(output_dir), IMAGE_PROCESS={"legacy": ["scale_in 100 100 True"]} + ) + + fragment = '' + result = harvest_images_in_fragment(fragment, settings) + + soup = BeautifulSoup(result, "html.parser") + img = soup.find("img") + + assert img["src"] == "/derivatives/legacy/pelican-bird.png" + assert (output_dir / "derivatives" / "legacy" / "pelican-bird.png").exists() From c394c50feef9084fefbd06c978681072632f3ada Mon Sep 17 00:00:00 2001 From: cargocultprogramming <10373572+cargocultprogramming@users.noreply.github.com> Date: Thu, 9 Apr 2026 22:26:56 +0200 Subject: [PATCH 2/2] Added release. --- RELEASE.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 RELEASE.md diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 0000000..a5ff239 --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,3 @@ +Release type: major + +Added option to specify output image format to automatically convert images.