Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions swift/dataset/dataset/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ def preprocess(self, row):
def _repair_ms_bench(messages: str) -> Optional[List[Dict[str, str]]]:
if isinstance(messages, str):
messages = ast.literal_eval(messages)
if not messages:
# A row with no messages can't be repaired; skip it like the MOSS case
# below instead of crashing the whole dataset load on messages[0].
return None
default_system = 'You are a helpful assistant.'
messages: List[Dict[str, str]]
if messages[0]['from'] == 'system' and messages[0]['value'] == default_system:
Expand Down
46 changes: 46 additions & 0 deletions tests/general/test_repair_ms_bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import unittest

from swift.dataset.dataset.llm import _repair_ms_bench


class TestRepairMsBench(unittest.TestCase):
"""Pure unit tests for the ms_bench messages repair function (no network)."""

def test_empty_messages_returns_none(self):
# An empty row can't be repaired; it must be skipped (None) like the MOSS
# case rather than crashing the whole dataset load on messages[0].
self.assertIsNone(_repair_ms_bench('[]'))
self.assertIsNone(_repair_ms_bench([]))

def test_strips_default_system_message(self):
messages = [
{
'from': 'system',
'value': 'You are a helpful assistant.'
},
{
'from': 'user',
'value': 'hi'
},
]
self.assertEqual(_repair_ms_bench(messages), [{'from': 'user', 'value': 'hi'}])

def test_keeps_a_normal_conversation(self):
messages = [
{
'from': 'user',
'value': 'hi'
},
{
'from': 'assistant',
'value': 'hello'
},
]
self.assertEqual(_repair_ms_bench(messages), messages)

def test_skips_moss_rows(self):
self.assertIsNone(_repair_ms_bench([{'from': 'user', 'value': 'moss reply'}]))


if __name__ == '__main__':
unittest.main()