Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions helpers/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@ def prepare_content(content: Any) -> Any:
return {key: prepare_content(value) for key, value in content.items()}


def strip_images(content: Any) -> Any:
"""Remove all image_url blocks from message content when vision is disabled."""
if isinstance(content, list):
filtered = [strip_images(item) for item in content if not (isinstance(item, dict) and item.get("type") == "image_url")]
if not filtered:
return "" # image-only message; return empty string rather than []
# Collapse single-text-block list back to a plain string
if len(filtered) == 1 and isinstance(filtered[0], dict) and filtered[0].get("type") == "text":
return filtered[0].get("text", "")
return filtered
return content


def is_local_ref(url: str) -> bool:
if not url:
return False
Expand Down
6 changes: 5 additions & 1 deletion models.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,9 +327,13 @@ def _convert_messages(self, messages: List[BaseMessage], explicit_caching: bool
"system": "system",
"tool": "tool",
}
vision_enabled = self.a0_model_conf.vision if self.a0_model_conf else True
for m in messages:
role = role_mapping.get(m.type, m.type)
message_dict = {"role": role, "content": images.prepare_content(m.content)}
content = images.prepare_content(m.content)
if not vision_enabled:
content = images.strip_images(content)
message_dict = {"role": role, "content": content}

# Handle tool calls for AI messages
tool_calls = getattr(m, "tool_calls", None)
Expand Down
45 changes: 45 additions & 0 deletions tests/test_vision_load_image_refs.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,51 @@ def __init__(self, raw_content, preview):
monkeypatch.delitem(sys.modules, "tools.vision_load", raising=False)


def test_strip_images_passthrough_string():
assert images.strip_images("hello") == "hello"
assert images.strip_images("") == ""


def test_strip_images_image_only_returns_empty_string():
content = [{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}}]
assert images.strip_images(content) == ""


def test_strip_images_multi_image_only_returns_empty_string():
content = [
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,xyz"}},
]
assert images.strip_images(content) == ""


def test_strip_images_text_and_image_collapses_to_string():
content = [
{"type": "text", "text": "describe this"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
]
assert images.strip_images(content) == "describe this"


def test_strip_images_multiple_text_blocks_preserved():
content = [
{"type": "text", "text": "first"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
{"type": "text", "text": "second"},
]
result = images.strip_images(content)
assert result == [{"type": "text", "text": "first"}, {"type": "text", "text": "second"}]


def test_strip_images_no_images_unchanged():
content = [{"type": "text", "text": "plain text"}]
assert images.strip_images(content) == "plain text"


def test_strip_images_plain_string_content_unchanged():
assert images.strip_images("no images here") == "no images here"


def test_prepare_content_keeps_missing_local_image_refs_strict():
missing_path = "/tmp/a0-missing-desktop-screenshot.png"

Expand Down