diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py index 62e8f2d75..0b3e19208 100644 --- a/src/memos/mem_reader/multi_modal_struct.py +++ b/src/memos/mem_reader/multi_modal_struct.py @@ -190,8 +190,16 @@ def _concat_multi_modal_memories( else: processed_items.append(item) - # If only one item after processing, return as-is + # If only one item after processing, compute embedding and return if len(processed_items) == 1: + single_item = processed_items[0] + if single_item and single_item.memory: + try: + single_item.metadata.embedding = self.embedder.embed([single_item.memory])[0] + except Exception as e: + logger.error( + f"[MultiModalStruct] Error computing embedding for single item: {e}" + ) return processed_items windows = [] @@ -289,7 +297,6 @@ def _build_window_from_items( # Collect all memory texts and sources memory_texts = [] all_sources = [] - seen_content = set() # Track seen source content to avoid duplicates roles = set() aggregated_file_ids: list[str] = [] @@ -303,18 +310,8 @@ def _build_window_from_items( item_sources = [item_sources] for source in item_sources: - # Get content from source for deduplication - source_content = None - if isinstance(source, dict): - source_content = source.get("content", "") - else: - source_content = getattr(source, "content", "") or "" - - # Only add if content is different (empty content is considered unique) - content_key = source_content if source_content else None - if content_key and content_key not in seen_content: - seen_content.add(content_key) - all_sources.append(source) + # Add source to all_sources + all_sources.append(source) # Extract role from source if hasattr(source, "role") and source.role: diff --git a/src/memos/mem_reader/read_multi_modal/image_parser.py b/src/memos/mem_reader/read_multi_modal/image_parser.py index 97400ca26..d66642edb 100644 --- a/src/memos/mem_reader/read_multi_modal/image_parser.py +++ b/src/memos/mem_reader/read_multi_modal/image_parser.py @@ -137,10 +137,10 @@ def parse_fine( # Get context items if available context_items = kwargs.get("context_items") - # Determine language: prioritize lang from source (passed via kwargs), - # fallback to detecting from context_items if lang not provided + # Determine language: prioritize lang from context_items, + # fallback to kwargs lang = kwargs.get("lang") - if lang is None and context_items: + if context_items: for item in context_items: if hasattr(item, "memory") and item.memory: lang = detect_lang(item.memory) diff --git a/src/memos/mem_reader/read_multi_modal/utils.py b/src/memos/mem_reader/read_multi_modal/utils.py index be82587bf..96918589b 100644 --- a/src/memos/mem_reader/read_multi_modal/utils.py +++ b/src/memos/mem_reader/read_multi_modal/utils.py @@ -341,13 +341,32 @@ def detect_lang(text): if not text or not isinstance(text, str): return "en" cleaned_text = text - # remove role and timestamp + # remove role and timestamp-like prefixes cleaned_text = re.sub( r"\b(user|assistant|query|answer)\s*:", "", cleaned_text, flags=re.IGNORECASE ) + # timestamps like [11:32 AM on 04 March, 2026] + cleaned_text = re.sub( + r"\[\s*\d{1,2}:\d{2}\s*(?:AM|PM)\s+on\s+\d{2}\s+[A-Za-z]+\s*,\s*\d{4}\s*\]", + "", + cleaned_text, + flags=re.IGNORECASE, + ) + # purely numeric timestamps like [2025-01-01 10:00] cleaned_text = re.sub(r"\[[\d\-:\s]+\]", "", cleaned_text) # remove URLs to prevent the dilution of Chinese characters cleaned_text = re.sub(r'https?://[^\s<>"{}|\\^`\[\]]+', "", cleaned_text) + # remove MessageType schema keywords (multimodal JSON noise) + cleaned_text = re.sub( + r"\b(text|type|image_url|imageurl|url)\b", "", cleaned_text, flags=re.IGNORECASE + ) + # remove schema keywords like text / type / image_url / url + cleaned_text = re.sub( + r"\b(text|type|image_url|imageurl|url|file|file_id)\b", + "", + cleaned_text, + flags=re.IGNORECASE, + ) # extract chinese characters chinese_pattern = r"[\u4e00-\u9fff\u3400-\u4dbf\U00020000-\U0002a6df\U0002a700-\U0002b73f\U0002b740-\U0002b81f\U0002b820-\U0002ceaf\uf900-\ufaff]" chinese_chars = re.findall(chinese_pattern, cleaned_text)