From 09ae59a85fe6ef2efe001ab471df48f1f62bcbc7 Mon Sep 17 00:00:00 2001 From: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:33:32 +0800 Subject: [PATCH 1/2] fix(template): preserve non-string prefix elements in _replace_system _replace_system replaced {{SYSTEM}} in string prefix elements but filtered out non-string elements entirely. Token-ID lists such as ['bos_token_id'] are valid prefix components used by ziya, bluelm, emu3_chat and other templates (prefix=[['bos_token_id'], '{{SYSTEM}}']). When no system message is given, those elements were silently dropped, causing every training/inference sequence for those templates to start without the expected BOS token. Change the filter comprehension to a conditional expression so that non-strings pass through unchanged. --- swift/template/template_meta.py | 2 +- tests/general/test_template_meta.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 tests/general/test_template_meta.py diff --git a/swift/template/template_meta.py b/swift/template/template_meta.py index 7a8b6dcb3b..7e7a5e171c 100644 --- a/swift/template/template_meta.py +++ b/swift/template/template_meta.py @@ -69,7 +69,7 @@ def _has_system(prefix_or_prompt: Prompt) -> bool: @staticmethod def _replace_system(prefix: Prompt) -> Prompt: - return [p.replace('{{SYSTEM}}', '') for p in prefix if isinstance(p, str)] + return [p.replace('{{SYSTEM}}', '') if isinstance(p, str) else p for p in prefix] def _check_template_meta(self): # check diff --git a/tests/general/test_template_meta.py b/tests/general/test_template_meta.py new file mode 100644 index 0000000000..37dad66878 --- /dev/null +++ b/tests/general/test_template_meta.py @@ -0,0 +1,24 @@ +# Copyright (c) ModelScope Contributors. All rights reserved. +from swift.template import TemplateMeta + + +def test_replace_system_preserves_non_string_elements(): + """_replace_system must not drop list elements like ['bos_token_id']. + + Templates such as ziya, bluelm and emu3_chat use + ``prefix=[['bos_token_id'], '{{SYSTEM}}']``. When no system message is + provided the prefix is produced by _replace_system, which should keep every + non-string element intact and only strip the placeholder from strings. + """ + meta = TemplateMeta( + template_type='_test_replace_system_bug', + prefix=[['bos_token_id'], '{{SYSTEM}}'], + prompt=['{{QUERY}}'], + chat_sep=['\n'], + ) + # __post_init__ moves prefix to system_prefix and builds a no-system prefix + # via _replace_system. The list element must survive. + assert any(isinstance(p, list) for p in meta.prefix), ( + f'_replace_system dropped the bos_token_id list; ' + f'meta.prefix={meta.prefix!r}' + ) From 3ec09580d78d3f6f7274224c6bb6bea38fed0554 Mon Sep 17 00:00:00 2001 From: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Date: Wed, 24 Jun 2026 11:07:32 +0800 Subject: [PATCH 2/2] style: format the template-meta regression test to satisfy yapf --- tests/general/test_template_meta.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/general/test_template_meta.py b/tests/general/test_template_meta.py index 37dad66878..58d8757e29 100644 --- a/tests/general/test_template_meta.py +++ b/tests/general/test_template_meta.py @@ -18,7 +18,5 @@ def test_replace_system_preserves_non_string_elements(): ) # __post_init__ moves prefix to system_prefix and builds a no-system prefix # via _replace_system. The list element must survive. - assert any(isinstance(p, list) for p in meta.prefix), ( - f'_replace_system dropped the bos_token_id list; ' - f'meta.prefix={meta.prefix!r}' - ) + assert any(isinstance(p, list) for p in meta.prefix), (f'_replace_system dropped the bos_token_id list; ' + f'meta.prefix={meta.prefix!r}')