From d1af7690a8048b1a167666c0a254ff165e321769 Mon Sep 17 00:00:00 2001 From: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:30:23 +0800 Subject: [PATCH] fix(dataset): propagate system message to rejected_messages in DPO preprocessing When a DPO dataset row has both a `system` key and `rejected_messages`, the recursive preprocess() call only forwarded `{'messages': row['rejected_messages']}`, dropping the system key. This meant system was prepended to chosen messages but never to rejected messages, producing asymmetric conversation prefixes and corrupting the DPO training signal. Pass `'system': row.get('system')` to the recursive call so both sides receive the same system prefix. Also adds a regression test. --- swift/dataset/preprocessor/core.py | 2 +- tests/general/test_data_preprocess.py | 33 ++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/swift/dataset/preprocessor/core.py b/swift/dataset/preprocessor/core.py index f8822e2fa4..5cc42e4db2 100644 --- a/swift/dataset/preprocessor/core.py +++ b/swift/dataset/preprocessor/core.py @@ -514,7 +514,7 @@ def _to_std_key(messages: List[Dict[str, str]], std_key: str, optional_keys: Lis def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: if 'rejected_messages' in row: row['rejected_messages'] = MessagesPreprocessor.preprocess( - self, {'messages': row['rejected_messages']})['messages'] + self, {'messages': row['rejected_messages'], 'system': row.get('system')})['messages'] messages = row['messages'] if self.inner_key is not None: messages = messages[self.inner_key] diff --git a/tests/general/test_data_preprocess.py b/tests/general/test_data_preprocess.py index a8330fca20..4ba8adff18 100644 --- a/tests/general/test_data_preprocess.py +++ b/tests/general/test_data_preprocess.py @@ -1,6 +1,6 @@ import unittest -from swift.dataset import EncodePreprocessor, PackingDataset, load_dataset +from swift.dataset import EncodePreprocessor, MessagesPreprocessor, PackingDataset, load_dataset from swift.model import get_processor from swift.template import get_template @@ -147,5 +147,36 @@ def test_packing_dataset(self): self.assertIn('labels', packed[0]) +class TestMessagesPreprocessor(unittest.TestCase): + """Unit tests for MessagesPreprocessor (no model required).""" + + def test_system_propagated_to_rejected_messages(self): + """system message must be prepended to rejected_messages, not only to messages. + + Regression test for the bug where the recursive preprocess() call for + rejected_messages omitted the 'system' key, causing chosen and rejected + to have asymmetric conversation prefixes during DPO training. + """ + row = { + 'messages': [ + {'role': 'user', 'content': 'Q'}, + {'role': 'assistant', 'content': 'good'}, + ], + 'rejected_messages': [ + {'role': 'user', 'content': 'Q'}, + {'role': 'assistant', 'content': 'bad'}, + ], + 'system': 'You are helpful.', + } + result = MessagesPreprocessor().preprocess(row) + self.assertIsNotNone(result) + self.assertEqual(result['messages'][0]['role'], 'system', + 'system message should be first in chosen messages') + self.assertEqual(result['rejected_messages'][0]['role'], 'system', + 'system message should also be first in rejected_messages') + self.assertEqual(result['messages'][0]['content'], 'You are helpful.') + self.assertEqual(result['rejected_messages'][0]['content'], 'You are helpful.') + + if __name__ == '__main__': unittest.main()