From 31a589ecc241d2d37147a8fad0cd61a920908d0c Mon Sep 17 00:00:00 2001 From: tastelikefeet Date: Thu, 5 Mar 2026 11:20:35 +0800 Subject: [PATCH] fix video mm --- src/twinkle/processor/base.py | 10 +++++++--- src/twinkle/template/base.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/twinkle/processor/base.py b/src/twinkle/processor/base.py index e7bce2d3..03493689 100644 --- a/src/twinkle/processor/base.py +++ b/src/twinkle/processor/base.py @@ -311,9 +311,13 @@ def to_transformers_dict(inputs: List[InputFeature], **kwargs) -> List[InputFeat for _input in inputs: output = {} _keys = [ - 'input_ids', 'input_embeddings', 'attention_mask', 'position_ids', 'labels', 'completion_mask', - 'pixel_values', 'image_grid_thw' - ] + 'input_ids', + 'input_embeddings', + 'attention_mask', + 'position_ids', + 'labels', + 'completion_mask', + ] + list(InputProcessor.VLM_CONCAT_FIELDS) for key in list(_input.keys()): if key in _keys: output[key] = np.array(_input[key]) if not isinstance(_input[key], torch.Tensor) else _input[key] diff --git a/src/twinkle/template/base.py b/src/twinkle/template/base.py index dfa04a58..33e970e8 100644 --- a/src/twinkle/template/base.py +++ b/src/twinkle/template/base.py @@ -226,7 +226,7 @@ def _build_mm_messages(self, trajectory: Trajectory) -> List[Trajectory]: message['images'] = self.preprocess_images(msg_images) assert len(message['images']) == content.count(self.image_placeholder) if msg_videos: - message['videos'] = self.preprocess_images(msg_videos) + message['videos'] = self.preprocess_videos(msg_videos) assert len(message['videos']) == content.count(self.video_placeholder) if msg_audios: message['audios'] = self.preprocess_audios(msg_audios)