Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions eval/lib/pixel_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,22 @@

import logging
import os
from pathlib import Path
from PIL import Image, ImageDraw, ImageFont

logger = logging.getLogger(__name__)


def _safe_stem(example_id: str) -> str:
"""Strip directory components from an id before using it in a filename.

``Path(x).name`` keeps only the final path component, which never contains a
path separator and is never absolute — so a crafted id like ``../../evil`` can
no longer escape the output directory (path traversal).
"""
return Path(str(example_id)).name


# Default font paths (tried in order)
_FONT_CANDIDATES = [
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
Expand Down Expand Up @@ -118,7 +130,7 @@ def render(self, example_id: str, query_text: str) -> str:

If the image already exists on disk it is *not* re-rendered.
"""
out_path = os.path.join(self.output_dir, f"{example_id}_query.png")
out_path = os.path.join(self.output_dir, f"{_safe_stem(example_id)}_query.png")
if os.path.exists(out_path):
return out_path

Expand All @@ -140,7 +152,7 @@ def render_all(self, examples: list[dict]) -> dict[str, str]:
rendered, cached = 0, 0
for ex in examples:
eid = ex["id"]
path = os.path.join(self.output_dir, f"{eid}_query.png")
path = os.path.join(self.output_dir, f"{_safe_stem(eid)}_query.png")
if os.path.exists(path):
cached += 1
else:
Expand Down Expand Up @@ -270,7 +282,9 @@ def render(
Returns:
Path to the saved PNG.
"""
out_path = os.path.join(self.output_dir, f"{example_id}_query_card.png")
out_path = os.path.join(
self.output_dir, f"{_safe_stem(example_id)}_query_card.png"
)
if os.path.exists(out_path) and not force:
return out_path

Expand Down
92 changes: 92 additions & 0 deletions tests/test_pixel_query_paths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
"""Path-traversal hardening for eval/lib/pixel_query.py (issues #78, #79).

`example_id` is interpolated into output filenames; a crafted id like ``../../evil``
must not let the renderer write outside ``output_dir``. These tests assert the
sanitizer neutralizes separators and that constructed output paths stay contained.

The eval lib is a standalone module (not part of the installed ``pixelrag`` wheel),
so it is imported by file path. Only ``_safe_stem`` and path construction are exercised
by default — an integration test that actually renders is skipped unless a TTF font is
available, since rendering requires one.
"""

import importlib.util
import os
from pathlib import Path

import pytest

_PIXEL_QUERY = Path(__file__).resolve().parents[1] / "eval" / "lib" / "pixel_query.py"


def _load_module():
spec = importlib.util.spec_from_file_location("pixel_query", _PIXEL_QUERY)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
return mod


pixel_query = _load_module()

# Ids that try to escape the output directory.
MALICIOUS_IDS = [
"../evil",
"../../etc/passwd",
"a/b/c",
"/abs/evil",
"..\\..\\evil", # Windows-style separators
"foo/../bar",
]


@pytest.mark.parametrize("bad_id", MALICIOUS_IDS)
def test_safe_stem_has_no_separators(bad_id):
stem = pixel_query._safe_stem(bad_id)
assert "/" not in stem
assert os.sep not in stem
if os.altsep:
assert os.altsep not in stem
assert not os.path.isabs(stem)


@pytest.mark.parametrize("bad_id", MALICIOUS_IDS)
def test_constructed_path_stays_in_output_dir(tmp_path, bad_id):
# Mirror exactly how the renderers build their paths.
out_dir = tmp_path / "out"
out_dir.mkdir()
out_path = os.path.join(str(out_dir), f"{pixel_query._safe_stem(bad_id)}_query.png")
resolved = Path(out_path).resolve()
assert out_dir.resolve() == resolved.parent, (
f"{bad_id!r} escaped output_dir: resolved to {resolved}"
)


def test_safe_stem_preserves_normal_ids():
assert pixel_query._safe_stem("abc123") == "abc123"
assert pixel_query._safe_stem("example_42") == "example_42"


def _find_font():
for c in (
*pixel_query._FONT_CANDIDATES,
r"C:\Windows\Fonts\arial.ttf",
"/System/Library/Fonts/Supplemental/Arial.ttf",
):
if os.path.exists(c):
return c
return None


@pytest.mark.skipif(_find_font() is None, reason="no TTF font available to render")
def test_render_malicious_id_does_not_escape(tmp_path):
out_dir = tmp_path / "out"
sentinel = tmp_path / "evil_query.png" # where ../evil would land pre-fix
renderer = pixel_query.PixelQueryRenderer(
output_dir=str(out_dir), font_path=_find_font()
)
written = renderer.render("../evil", "what is the capital of France?")
written_path = Path(written).resolve()

assert out_dir.resolve() == written_path.parent, "render escaped output_dir"
assert written_path.exists()
assert not sentinel.exists(), "file was written outside output_dir"
Loading