From 215488734d6620f1ad8027e7b1e6fb447ad929e0 Mon Sep 17 00:00:00 2001 From: dex0shubham Date: Tue, 23 Jun 2026 10:10:39 +0100 Subject: [PATCH] fix(eval): sanitize example_id to prevent path traversal (fixes #78, #79) example_id flowed unsanitized into os.path.join output filenames in eval/lib/pixel_query.py, so an id like '../../evil' could write outside output_dir. Add a _safe_stem() helper (Path(x).name) and apply it at all three sites: PixelQueryRenderer.render (#78), .render_all (missed by the audit), and QueryImageTextRenderer.render (#79). Add tests/test_pixel_query_paths.py. Co-Authored-By: Claude Opus 4.8 --- eval/lib/pixel_query.py | 20 +++++-- tests/test_pixel_query_paths.py | 92 +++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 3 deletions(-) create mode 100644 tests/test_pixel_query_paths.py diff --git a/eval/lib/pixel_query.py b/eval/lib/pixel_query.py index c28c071..c62ada8 100644 --- a/eval/lib/pixel_query.py +++ b/eval/lib/pixel_query.py @@ -11,10 +11,22 @@ import logging import os +from pathlib import Path from PIL import Image, ImageDraw, ImageFont logger = logging.getLogger(__name__) + +def _safe_stem(example_id: str) -> str: + """Strip directory components from an id before using it in a filename. + + ``Path(x).name`` keeps only the final path component, which never contains a + path separator and is never absolute — so a crafted id like ``../../evil`` can + no longer escape the output directory (path traversal). + """ + return Path(str(example_id)).name + + # Default font paths (tried in order) _FONT_CANDIDATES = [ "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", @@ -118,7 +130,7 @@ def render(self, example_id: str, query_text: str) -> str: If the image already exists on disk it is *not* re-rendered. """ - out_path = os.path.join(self.output_dir, f"{example_id}_query.png") + out_path = os.path.join(self.output_dir, f"{_safe_stem(example_id)}_query.png") if os.path.exists(out_path): return out_path @@ -140,7 +152,7 @@ def render_all(self, examples: list[dict]) -> dict[str, str]: rendered, cached = 0, 0 for ex in examples: eid = ex["id"] - path = os.path.join(self.output_dir, f"{eid}_query.png") + path = os.path.join(self.output_dir, f"{_safe_stem(eid)}_query.png") if os.path.exists(path): cached += 1 else: @@ -270,7 +282,9 @@ def render( Returns: Path to the saved PNG. """ - out_path = os.path.join(self.output_dir, f"{example_id}_query_card.png") + out_path = os.path.join( + self.output_dir, f"{_safe_stem(example_id)}_query_card.png" + ) if os.path.exists(out_path) and not force: return out_path diff --git a/tests/test_pixel_query_paths.py b/tests/test_pixel_query_paths.py new file mode 100644 index 0000000..e74c68a --- /dev/null +++ b/tests/test_pixel_query_paths.py @@ -0,0 +1,92 @@ +"""Path-traversal hardening for eval/lib/pixel_query.py (issues #78, #79). + +`example_id` is interpolated into output filenames; a crafted id like ``../../evil`` +must not let the renderer write outside ``output_dir``. These tests assert the +sanitizer neutralizes separators and that constructed output paths stay contained. + +The eval lib is a standalone module (not part of the installed ``pixelrag`` wheel), +so it is imported by file path. Only ``_safe_stem`` and path construction are exercised +by default — an integration test that actually renders is skipped unless a TTF font is +available, since rendering requires one. +""" + +import importlib.util +import os +from pathlib import Path + +import pytest + +_PIXEL_QUERY = Path(__file__).resolve().parents[1] / "eval" / "lib" / "pixel_query.py" + + +def _load_module(): + spec = importlib.util.spec_from_file_location("pixel_query", _PIXEL_QUERY) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod + + +pixel_query = _load_module() + +# Ids that try to escape the output directory. +MALICIOUS_IDS = [ + "../evil", + "../../etc/passwd", + "a/b/c", + "/abs/evil", + "..\\..\\evil", # Windows-style separators + "foo/../bar", +] + + +@pytest.mark.parametrize("bad_id", MALICIOUS_IDS) +def test_safe_stem_has_no_separators(bad_id): + stem = pixel_query._safe_stem(bad_id) + assert "/" not in stem + assert os.sep not in stem + if os.altsep: + assert os.altsep not in stem + assert not os.path.isabs(stem) + + +@pytest.mark.parametrize("bad_id", MALICIOUS_IDS) +def test_constructed_path_stays_in_output_dir(tmp_path, bad_id): + # Mirror exactly how the renderers build their paths. + out_dir = tmp_path / "out" + out_dir.mkdir() + out_path = os.path.join(str(out_dir), f"{pixel_query._safe_stem(bad_id)}_query.png") + resolved = Path(out_path).resolve() + assert out_dir.resolve() == resolved.parent, ( + f"{bad_id!r} escaped output_dir: resolved to {resolved}" + ) + + +def test_safe_stem_preserves_normal_ids(): + assert pixel_query._safe_stem("abc123") == "abc123" + assert pixel_query._safe_stem("example_42") == "example_42" + + +def _find_font(): + for c in ( + *pixel_query._FONT_CANDIDATES, + r"C:\Windows\Fonts\arial.ttf", + "/System/Library/Fonts/Supplemental/Arial.ttf", + ): + if os.path.exists(c): + return c + return None + + +@pytest.mark.skipif(_find_font() is None, reason="no TTF font available to render") +def test_render_malicious_id_does_not_escape(tmp_path): + out_dir = tmp_path / "out" + sentinel = tmp_path / "evil_query.png" # where ../evil would land pre-fix + renderer = pixel_query.PixelQueryRenderer( + output_dir=str(out_dir), font_path=_find_font() + ) + written = renderer.render("../evil", "what is the capital of France?") + written_path = Path(written).resolve() + + assert out_dir.resolve() == written_path.parent, "render escaped output_dir" + assert written_path.exists() + assert not sentinel.exists(), "file was written outside output_dir"