diff --git a/swift/dataset/dataset/llm.py b/swift/dataset/dataset/llm.py index a144964f22..55169b752e 100644 --- a/swift/dataset/dataset/llm.py +++ b/swift/dataset/dataset/llm.py @@ -93,6 +93,10 @@ def preprocess(self, row): def _repair_ms_bench(messages: str) -> Optional[List[Dict[str, str]]]: if isinstance(messages, str): messages = ast.literal_eval(messages) + if not messages: + # A row with no messages can't be repaired; skip it like the MOSS case + # below instead of crashing the whole dataset load on messages[0]. + return None default_system = 'You are a helpful assistant.' messages: List[Dict[str, str]] if messages[0]['from'] == 'system' and messages[0]['value'] == default_system: diff --git a/tests/general/test_repair_ms_bench.py b/tests/general/test_repair_ms_bench.py new file mode 100644 index 0000000000..d9e10f490b --- /dev/null +++ b/tests/general/test_repair_ms_bench.py @@ -0,0 +1,46 @@ +import unittest + +from swift.dataset.dataset.llm import _repair_ms_bench + + +class TestRepairMsBench(unittest.TestCase): + """Pure unit tests for the ms_bench messages repair function (no network).""" + + def test_empty_messages_returns_none(self): + # An empty row can't be repaired; it must be skipped (None) like the MOSS + # case rather than crashing the whole dataset load on messages[0]. + self.assertIsNone(_repair_ms_bench('[]')) + self.assertIsNone(_repair_ms_bench([])) + + def test_strips_default_system_message(self): + messages = [ + { + 'from': 'system', + 'value': 'You are a helpful assistant.' + }, + { + 'from': 'user', + 'value': 'hi' + }, + ] + self.assertEqual(_repair_ms_bench(messages), [{'from': 'user', 'value': 'hi'}]) + + def test_keeps_a_normal_conversation(self): + messages = [ + { + 'from': 'user', + 'value': 'hi' + }, + { + 'from': 'assistant', + 'value': 'hello' + }, + ] + self.assertEqual(_repair_ms_bench(messages), messages) + + def test_skips_moss_rows(self): + self.assertIsNone(_repair_ms_bench([{'from': 'user', 'value': 'moss reply'}])) + + +if __name__ == '__main__': + unittest.main()