From 9bb35c9a46771efefff3fb82f4a7f4f88317b1c4 Mon Sep 17 00:00:00 2001 From: Pluviobyte Date: Thu, 28 May 2026 06:03:41 +0000 Subject: [PATCH] fix(file): convert unsupported image formats to PNG in ReadMediaFile Kimi (and Anthropic/Google) image input only accepts image/png, image/jpeg, image/gif, and image/webp. When the agent calls ReadMediaFile on a .ico file (mime image/x-icon), the resulting data URL was written straight into session history. The next provider request then crashed with `400 unsupported image format: image/x-icon`, and because the offending turn was already persisted, every subsequent resume hit the same error -- the conversation could never be continued. Re-encode any non-supported image format to PNG at the ReadMediaFile boundary so the persisted ImageURLPart is always model-compatible. Falls back to the original bytes when Pillow cannot decode the file, so genuine read failures keep their previous behaviour instead of turning into tool errors. Fixes #2017 Co-authored-by: Cursor --- CHANGELOG.md | 2 ++ src/kimi_cli/tools/file/read_media.py | 45 +++++++++++++++++++++++++-- tests/tools/test_read_media_file.py | 24 ++++++++++++++ 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 73272051f..5e804bea6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ Only write entries that are worth mentioning to users. ## Unreleased +- Tools: `ReadMediaFile` now re-encodes images whose MIME type the model gateway does not accept (e.g. `image/x-icon` from `.ico`, `image/bmp`, `image/tiff`) as PNG before they enter session history, so unsupported-format `400`s no longer poison the conversation + ## 1.45.0 (2026-05-26) - Shell: `/clear` is now an alias for `/new` — both commands start a new session; previously `/clear` only cleared context without creating a new session diff --git a/src/kimi_cli/tools/file/read_media.py b/src/kimi_cli/tools/file/read_media.py index 1a1df422b..40c4529d6 100644 --- a/src/kimi_cli/tools/file/read_media.py +++ b/src/kimi_cli/tools/file/read_media.py @@ -19,6 +19,15 @@ MAX_MEDIA_MEGABYTES = 100 +# Kimi (and Anthropic/Google) image input only accepts these MIME types. +# Other formats (e.g. ``image/x-icon`` from ``.ico``, ``image/bmp``, +# ``image/tiff``) cause the model gateway to reject the entire request with +# ``400 unsupported image format``, which then poisons the session history +# and prevents the conversation from continuing on resume. +_PROVIDER_SUPPORTED_IMAGE_MIME_TYPES = frozenset( + {"image/png", "image/jpeg", "image/gif", "image/webp"} +) + def _to_data_url(mime_type: str, data: bytes) -> str: encoded = base64.b64encode(data).decode("ascii") @@ -38,6 +47,35 @@ def _extract_image_size(data: bytes) -> tuple[int, int] | None: return None +def _normalize_image_for_provider(data: bytes, mime_type: str) -> tuple[bytes, str]: + """Re-encode unsupported image formats to PNG before sending to the model. + + Returns the original ``(data, mime_type)`` for already-supported formats. + If conversion fails (e.g. corrupt image, missing Pillow plugin), falls + back to the original bytes; the provider may still reject the request, + but at least the failure mode is unchanged. + """ + if mime_type in _PROVIDER_SUPPORTED_IMAGE_MIME_TYPES: + return data, mime_type + try: + from PIL import Image + + with Image.open(BytesIO(data)) as image: + image.load() + if image.mode not in ("RGB", "RGBA"): + image = image.convert("RGBA") + buffer = BytesIO() + image.save(buffer, format="PNG") + return buffer.getvalue(), "image/png" + except Exception as exc: + logger.warning( + "Failed to re-encode {mime} image as PNG; sending original bytes: {error}", + mime=mime_type, + error=exc, + ) + return data, mime_type + + class Params(BaseModel): path: str = Field( description=( @@ -112,10 +150,13 @@ async def _read_media(self, path: KaosPath, file_type: FileType) -> ToolReturnVa match file_type.kind: case "image": data = await path.read_bytes() - data_url = _to_data_url(file_type.mime_type, data) + image_size = _extract_image_size(data) + normalized_data, normalized_mime = _normalize_image_for_provider( + data, file_type.mime_type + ) + data_url = _to_data_url(normalized_mime, normalized_data) part = ImageURLPart(image_url=ImageURLPart.ImageURL(url=data_url)) wrapped = wrap_media_part(part, tag="image", attrs={"path": media_path}) - image_size = _extract_image_size(data) case "video": data = await path.read_bytes() if (llm := self._runtime.llm) and isinstance(llm.chat_provider, Kimi): diff --git a/tests/tools/test_read_media_file.py b/tests/tools/test_read_media_file.py index a1933a412..07611cf19 100644 --- a/tests/tools/test_read_media_file.py +++ b/tests/tools/test_read_media_file.py @@ -91,6 +91,30 @@ async def test_read_image_file_with_size( ) +async def test_read_ico_file_converts_to_png( + read_media_file_tool: ReadMediaFile, temp_work_dir: KaosPath +): + """``.ico`` files have ``image/x-icon`` MIME, which Kimi rejects as + ``unsupported image format`` (regression: #2017). The tool must + re-encode them to PNG so the conversation can continue.""" + Image = pytest.importorskip("PIL.Image") + image_file = temp_work_dir / "favicon.ico" + image = Image.new("RGBA", (16, 16), (255, 0, 0, 255)) + buffer = BytesIO() + image.save(buffer, format="ICO") + await image_file.write_bytes(buffer.getvalue()) + + result = await read_media_file_tool(Params(path=str(image_file))) + + assert not result.is_error + assert isinstance(result.output, list) + part = result.output[1] + assert isinstance(part, ImageURLPart) + assert part.image_url.url.startswith("data:image/png;base64,"), ( + f"expected ico to be re-encoded as PNG, got: {part.image_url.url[:64]!r}" + ) + + async def test_read_video_file(read_media_file_tool: ReadMediaFile, temp_work_dir: KaosPath): """Test reading a video file.""" video_file = temp_work_dir / "sample.mp4"