Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion swift/template/vision_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ def rescale_image(img: Image.Image, max_pixels: int) -> Image.Image:

def _check_path(path: str) -> Union[str, None]:
"""If it is a path, return the string; if it is base64, return None."""
if not isinstance(path, str):
# bytes audio/image data is not a path; let the caller fall back to it
# instead of crashing on the str-only checks below (e.g. startswith).
return None
MAX_PATH_HEURISTIC = 2000
if len(path) > MAX_PATH_HEURISTIC:
return
Expand Down Expand Up @@ -302,7 +306,7 @@ def load_audio(audio: Union[str, bytes], sampling_rate: int, return_sr: bool = F
audio_io = load_file(audio)
res = librosa.load(audio_io, sr=sampling_rate)
except Exception:
if audio.startswith(('http://', 'https://')):
if isinstance(audio, str) and audio.startswith(('http://', 'https://')):

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

While guarding the startswith check with isinstance(audio, str) prevents a TypeError on this line, a similar TypeError will still occur on line 309 in the else block:

audio_io = _check_path(audio) or audio

If audio is of type bytes, _check_path(audio) is called. _check_path expects a str and performs string-specific operations (like data.startswith('data:') on line 118), which will raise TypeError: startswith first arg must be bytes or a tuple of bytes, not str.

To fully support bytes input without crashing, we should also avoid calling _check_path when audio is bytes.

import audioread
audio_io = audioread.ffdec.FFmpegAudioFile(audio)
else:
Expand Down
37 changes: 37 additions & 0 deletions tests/general/test_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,40 @@ def test_mllm_dataset_map():
test_mllm()
test_llm_dataset_map()
test_mllm_dataset_map()


def test_load_audio_bytes_input_does_not_crash_on_fallback(monkeypatch):
import sys
import types

from swift.template import vision_utils

calls = []

fake_librosa = types.ModuleType('librosa')

def fake_load(audio_io, sr):
calls.append(audio_io)
if len(calls) == 1:
# First attempt fails (e.g. a format soundfile can't read), forcing
# the except branch that used to call bytes.startswith and crash.
raise RuntimeError('first load fails')
return ([0.1, 0.2], sr)

fake_librosa.load = fake_load
monkeypatch.setitem(sys.modules, 'librosa', fake_librosa)

# bytes audio (allowed by the Union[str, bytes] signature) must not raise a
# TypeError from `audio.startswith(...)` or from `_check_path(bytes)` when
# the first decode fails and the except branch runs.
result = vision_utils.load_audio(b'\x00\x01raw-audio-bytes', sampling_rate=16000)

assert result == [0.1, 0.2]


def test_check_path_with_bytes_returns_none():
from swift.template.vision_utils import _check_path

# bytes input is not a path; it must return None instead of raising a
# TypeError from the str-only checks (len/os.path/startswith) below.
assert _check_path(b'\x00\x01raw-bytes') is None