Skip to content

Commit 42eceb3

Browse files
Raman369AIGWeale
authored andcommitted
fix: raise ValueError for unsupported MIME types in file_data URI path
Merge #5023 Fixes #5022 ## Problem When a `Part` with `file_data.file_uri` has no determinable MIME type, the library fell back to `application/octet-stream` via `_DEFAULT_MIME_TYPE`. This value then propagated to LiteLLM which raised a cryptic internal `ValueError` with no guidance for the user. The same failure occurred when the caller explicitly set `mime_type = "application/octet-stream"` on the Part. Both cases reach the same failure point. There was also an inconsistency between the two content paths: - The `inline_data` path raises `ValueError` immediately for unsupported MIME types - The `file_data` path silently used a fallback and failed later with a cryptic message `GcsArtifactService` generates URIs like `gs://bucket/artifact/0` with no extension and no MIME type, making ADK's own artifact system the primary trigger for this fallback. ## Fix Removes `_DEFAULT_MIME_TYPE` and raises `ValueError` early with an actionable message when the resolved MIME type is either unknown or `application/octet-stream`. This aligns the `file_data` path with the existing fail-fast behavior of the `inline_data` path. The logic order is also corrected so providers that always produce a text fallback (anthropic, non-Gemini Vertex AI) and OpenAI/Azure HTTP media URLs are handled before the MIME type guard, keeping those paths unaffected. ## Changes - `src/google/adk/models/lite_llm.py`: remove `_DEFAULT_MIME_TYPE`, restructure file_uri handling block, raise `ValueError` for missing or generic MIME types - `tests/unittests/models/test_litellm.py`: update two existing tests to assert the new `ValueError`, add one new test covering explicit `application/octet-stream` ## Testing ``` pytest tests/unittests/models/test_litellm.py 241 passed, 5 errors (pre-existing, missing pytest-mock fixture) ``` Format verified with `pyink`. mypy error count unchanged from main (26). Co-authored-by: George Weale <gweale@google.com> COPYBARA_INTEGRATE_REVIEW=#5023 from Raman369AI:fix/file-uri-unknown-mime-type-error c7eb917 PiperOrigin-RevId: 938679610
1 parent 3c7d65a commit 42eceb3

2 files changed

Lines changed: 87 additions & 61 deletions

File tree

src/google/adk/models/lite_llm.py

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -330,10 +330,6 @@ def _get_provider_from_model(model: str) -> str:
330330
return ""
331331

332332

333-
# Default MIME type when none can be inferred
334-
_DEFAULT_MIME_TYPE = "application/octet-stream"
335-
336-
337333
def _infer_mime_type_from_uri(uri: str) -> Optional[str]:
338334
"""Attempts to infer MIME type from a URI's path extension.
339335
@@ -1217,33 +1213,33 @@ async def _get_content(
12171213
})
12181214
continue
12191215

1220-
# Determine MIME type: use explicit value, infer from URI, or use default.
1216+
# Resolve MIME type early: needed before the media-URL shortcut below,
1217+
# which must run before the generic text-fallback check. The raise is
1218+
# deferred until after all early-continue paths so that providers which
1219+
# always fall back to text (anthropic, non-Gemini Vertex AI) are never
1220+
# asked for a MIME type they cannot supply.
12211221
mime_type = part.file_data.mime_type
12221222
if not mime_type:
12231223
mime_type = _infer_mime_type_from_uri(part.file_data.file_uri)
12241224
if not mime_type and part.file_data.display_name:
12251225
guessed_mime_type, _ = mimetypes.guess_type(part.file_data.display_name)
12261226
mime_type = guessed_mime_type
1227-
if not mime_type:
1228-
# LiteLLM's Vertex AI backend requires format for GCS URIs.
1229-
mime_type = _DEFAULT_MIME_TYPE
1230-
logger.debug(
1231-
"Could not determine MIME type for file_uri %s, using default: %s",
1232-
part.file_data.file_uri,
1233-
mime_type,
1234-
)
1235-
mime_type = _normalize_mime_type(mime_type)
1227+
if mime_type:
1228+
mime_type = _normalize_mime_type(mime_type)
12361229

1230+
# For OpenAI/Azure: HTTP media URLs (image, video, audio) are sent as
1231+
# typed URL blocks and must be handled before the generic text fallback.
12371232
if provider in _FILE_ID_REQUIRED_PROVIDERS and _is_http_url(
12381233
part.file_data.file_uri
12391234
):
1240-
url_content_type = _media_url_content_type(mime_type)
1241-
if url_content_type:
1242-
content_objects.append({
1243-
"type": url_content_type,
1244-
url_content_type: {"url": part.file_data.file_uri},
1245-
})
1246-
continue
1235+
if mime_type:
1236+
url_content_type = _media_url_content_type(mime_type)
1237+
if url_content_type:
1238+
content_objects.append({
1239+
"type": url_content_type,
1240+
url_content_type: {"url": part.file_data.file_uri},
1241+
})
1242+
continue
12471243

12481244
if not _is_file_uri_supported(provider, model, part.file_data.file_uri):
12491245
redacted_file_uri = _redact_file_uri_for_log(
@@ -1255,6 +1251,19 @@ async def _get_content(
12551251
f" {provider}."
12561252
)
12571253

1254+
# All remaining providers (e.g. Vertex AI + Gemini) require a specific
1255+
# MIME type in the file object. Both a missing type and
1256+
# 'application/octet-stream' cause a downstream ValueError from LiteLLM
1257+
# regardless of whether the value was set explicitly by the caller or
1258+
# arrived via a default fallback; raise early with an actionable message.
1259+
if not mime_type or mime_type == "application/octet-stream":
1260+
type_label = mime_type or "(unknown)"
1261+
raise ValueError(
1262+
f"Cannot process file_uri {part.file_data.file_uri!r}: MIME type"
1263+
f" {type_label!r} is not supported. Please set a specific MIME"
1264+
" type on `file_data.mime_type`."
1265+
)
1266+
12581267
file_object: ChatCompletionFileUrlObject = {
12591268
"file_id": part.file_data.file_uri,
12601269
}

tests/unittests/models/test_litellm.py

Lines changed: 57 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1972,14 +1972,12 @@ async def test_content_to_message_param_user_message_file_uri_only(
19721972

19731973
@pytest.mark.asyncio
19741974
async def test_content_to_message_param_user_message_file_uri_without_mime_type():
1975-
"""Test handling of file_data without mime_type (GcsArtifactService scenario).
1975+
"""Test that file_data without an inferable mime_type raises ValueError.
19761976
19771977
When using GcsArtifactService, artifacts may have file_uri (gs://...) but
1978-
without mime_type set. LiteLLM's Vertex AI backend requires the format
1979-
field to be present, so we infer MIME type from the URI extension or use
1980-
a default fallback to ensure compatibility.
1981-
1982-
See: https://github.com/google/adk-python/issues/3787
1978+
without mime_type set. When the MIME type cannot be determined from the URI
1979+
extension or display_name, ADK raises a clear ValueError rather than
1980+
forwarding an unsupported 'application/octet-stream' to LiteLLM.
19831981
"""
19841982
file_part = types.Part(
19851983
file_data=types.FileData(
@@ -1994,22 +1992,34 @@ async def test_content_to_message_param_user_message_file_uri_without_mime_type(
19941992
],
19951993
)
19961994

1997-
message = await _content_to_message_param(content)
1998-
assert message == {
1999-
"role": "user",
2000-
"content": [
2001-
{"type": "text", "text": "Analyze this file."},
2002-
{
2003-
"type": "file",
2004-
"file": {
2005-
"file_id": (
2006-
"gs://agent-artifact-bucket/app/user/session/artifact/0"
2007-
),
2008-
"format": "application/octet-stream",
2009-
},
2010-
},
1995+
with pytest.raises(ValueError, match="Cannot process file_uri"):
1996+
await _content_to_message_param(content)
1997+
1998+
1999+
@pytest.mark.asyncio
2000+
async def test_content_to_message_param_user_message_file_uri_explicit_octet_stream():
2001+
"""Test that an explicit application/octet-stream MIME type raises ValueError.
2002+
2003+
Upstream callers may explicitly set mime_type to 'application/octet-stream'
2004+
when the true type is unknown. ADK treats this identically to a missing MIME
2005+
type and raises early rather than forwarding the unsupported type to LiteLLM.
2006+
"""
2007+
file_part = types.Part(
2008+
file_data=types.FileData(
2009+
file_uri="gs://agent-artifact-bucket/app/user/session/artifact/0",
2010+
mime_type="application/octet-stream",
2011+
)
2012+
)
2013+
content = types.Content(
2014+
role="user",
2015+
parts=[
2016+
types.Part.from_text(text="Analyze this file."),
2017+
file_part,
20112018
],
2012-
}
2019+
)
2020+
2021+
with pytest.raises(ValueError, match="application/octet-stream"):
2022+
await _content_to_message_param(content)
20132023

20142024

20152025
@pytest.mark.asyncio
@@ -2018,8 +2028,6 @@ async def test_content_to_message_param_user_message_file_uri_infer_mime_type():
20182028
20192029
When file_data has a file_uri with a recognizable extension but no explicit
20202030
mime_type, the MIME type should be inferred from the extension.
2021-
2022-
See: https://github.com/google/adk-python/issues/3787
20232031
"""
20242032
file_part = types.Part(
20252033
file_data=types.FileData(
@@ -3293,8 +3301,6 @@ async def test_get_content_file_uri_infer_mime_type():
32933301
32943302
When file_data has a file_uri with a recognizable extension but no explicit
32953303
mime_type, the MIME type should be inferred from the extension.
3296-
3297-
See: https://github.com/google/adk-python/issues/3787
32983304
"""
32993305
# Use Part constructor directly to test MIME type inference in _get_content
33003306
# (types.Part.from_uri does its own inference, so we bypass it)
@@ -3344,27 +3350,38 @@ async def test_get_content_file_uri_infers_from_display_name():
33443350

33453351
@pytest.mark.asyncio
33463352
async def test_get_content_file_uri_default_mime_type():
3347-
"""Test that file_uri without extension uses default MIME type.
3353+
"""Test that file_uri without an inferable extension raises ValueError.
33483354
33493355
When file_data has a file_uri without a recognizable extension and no explicit
3350-
mime_type, a default MIME type should be used to ensure compatibility with
3351-
LiteLLM backends.
3352-
3353-
See: https://github.com/google/adk-python/issues/3787
3356+
mime_type, ADK raises a clear ValueError instead of forwarding the unsupported
3357+
'application/octet-stream' MIME type to LiteLLM.
33543358
"""
3355-
# Use Part constructor directly to create file_data without mime_type
3356-
# (types.Part.from_uri requires a valid mime_type when it can't infer)
33573359
parts = [
33583360
types.Part(file_data=types.FileData(file_uri="gs://bucket/artifact/0"))
33593361
]
3360-
content = await _get_content(parts)
3361-
assert content[0] == {
3362-
"type": "file",
3363-
"file": {
3364-
"file_id": "gs://bucket/artifact/0",
3365-
"format": "application/octet-stream",
3366-
},
3367-
}
3362+
with pytest.raises(ValueError, match="Cannot process file_uri"):
3363+
await _get_content(parts)
3364+
3365+
3366+
@pytest.mark.asyncio
3367+
async def test_get_content_file_uri_explicit_octet_stream_raises():
3368+
"""Test that an explicit application/octet-stream MIME type raises ValueError.
3369+
3370+
'application/octet-stream' is semantically equivalent to an unknown type and
3371+
causes the same downstream ValueError from LiteLLM whether it arrives as a
3372+
default fallback or is set explicitly by the caller. ADK raises early with
3373+
an actionable message in both cases.
3374+
"""
3375+
parts = [
3376+
types.Part(
3377+
file_data=types.FileData(
3378+
file_uri="gs://bucket/artifact/0",
3379+
mime_type="application/octet-stream",
3380+
)
3381+
)
3382+
]
3383+
with pytest.raises(ValueError, match="application/octet-stream"):
3384+
await _get_content(parts)
33683385

33693386

33703387
@pytest.mark.asyncio

0 commit comments

Comments
 (0)