Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ Only write entries that are worth mentioning to users.

## Unreleased

- Tools: `ReadMediaFile` now re-encodes images whose MIME type the model gateway does not accept (e.g. `image/x-icon` from `.ico`, `image/bmp`, `image/tiff`) as PNG before they enter session history, so unsupported-format `400`s no longer poison the conversation

## 1.47.0 (2026-06-05)

- Shell: Guide users to the new standalone Kimi Code — adds a `/upgrade` command that installs it (migrating your config & sessions automatically), a welcome-screen nudge, and a once-per-day tip shown on exit
Expand Down
45 changes: 43 additions & 2 deletions src/kimi_cli/tools/file/read_media.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@

MAX_MEDIA_MEGABYTES = 100

# Kimi (and Anthropic/Google) image input only accepts these MIME types.
# Other formats (e.g. ``image/x-icon`` from ``.ico``, ``image/bmp``,
# ``image/tiff``) cause the model gateway to reject the entire request with
# ``400 unsupported image format``, which then poisons the session history
# and prevents the conversation from continuing on resume.
_PROVIDER_SUPPORTED_IMAGE_MIME_TYPES = frozenset(
{"image/png", "image/jpeg", "image/gif", "image/webp"}
)


def _to_data_url(mime_type: str, data: bytes) -> str:
encoded = base64.b64encode(data).decode("ascii")
Expand All @@ -38,6 +47,35 @@ def _extract_image_size(data: bytes) -> tuple[int, int] | None:
return None


def _normalize_image_for_provider(data: bytes, mime_type: str) -> tuple[bytes, str]:
"""Re-encode unsupported image formats to PNG before sending to the model.

Returns the original ``(data, mime_type)`` for already-supported formats.
If conversion fails (e.g. corrupt image, missing Pillow plugin), falls
back to the original bytes; the provider may still reject the request,
but at least the failure mode is unchanged.
"""
if mime_type in _PROVIDER_SUPPORTED_IMAGE_MIME_TYPES:
return data, mime_type
try:
from PIL import Image

with Image.open(BytesIO(data)) as image:
image.load()
if image.mode not in ("RGB", "RGBA"):
image = image.convert("RGBA")
buffer = BytesIO()
image.save(buffer, format="PNG")
return buffer.getvalue(), "image/png"
except Exception as exc:
logger.warning(
"Failed to re-encode {mime} image as PNG; sending original bytes: {error}",
mime=mime_type,
error=exc,
)
return data, mime_type


class Params(BaseModel):
path: str = Field(
description=(
Expand Down Expand Up @@ -112,10 +150,13 @@ async def _read_media(self, path: KaosPath, file_type: FileType) -> ToolReturnVa
match file_type.kind:
case "image":
data = await path.read_bytes()
data_url = _to_data_url(file_type.mime_type, data)
image_size = _extract_image_size(data)
normalized_data, normalized_mime = _normalize_image_for_provider(
data, file_type.mime_type
)
data_url = _to_data_url(normalized_mime, normalized_data)
part = ImageURLPart(image_url=ImageURLPart.ImageURL(url=data_url))
wrapped = wrap_media_part(part, tag="image", attrs={"path": media_path})
image_size = _extract_image_size(data)
case "video":
data = await path.read_bytes()
if (llm := self._runtime.llm) and isinstance(llm.chat_provider, Kimi):
Expand Down
24 changes: 24 additions & 0 deletions tests/tools/test_read_media_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,30 @@ async def test_read_image_file_with_size(
)


async def test_read_ico_file_converts_to_png(
read_media_file_tool: ReadMediaFile, temp_work_dir: KaosPath
):
"""``.ico`` files have ``image/x-icon`` MIME, which Kimi rejects as
``unsupported image format`` (regression: #2017). The tool must
re-encode them to PNG so the conversation can continue."""
Image = pytest.importorskip("PIL.Image")
image_file = temp_work_dir / "favicon.ico"
image = Image.new("RGBA", (16, 16), (255, 0, 0, 255))
buffer = BytesIO()
image.save(buffer, format="ICO")
await image_file.write_bytes(buffer.getvalue())

result = await read_media_file_tool(Params(path=str(image_file)))

assert not result.is_error
assert isinstance(result.output, list)
part = result.output[1]
assert isinstance(part, ImageURLPart)
assert part.image_url.url.startswith("data:image/png;base64,"), (
f"expected ico to be re-encoded as PNG, got: {part.image_url.url[:64]!r}"
)


async def test_read_video_file(read_media_file_tool: ReadMediaFile, temp_work_dir: KaosPath):
"""Test reading a video file."""
video_file = temp_work_dir / "sample.mp4"
Expand Down