From d29e2f5fac9b185ea2ad1f090606f758655e0a89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B8=AD=E9=98=B3=E9=98=B3?= Date: Tue, 3 Mar 2026 19:51:19 +0800 Subject: [PATCH 1/4] fix: image url bug --- src/memos/mem_reader/multi_modal_struct.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py index 62e8f2d75..fe9ec40cc 100644 --- a/src/memos/mem_reader/multi_modal_struct.py +++ b/src/memos/mem_reader/multi_modal_struct.py @@ -289,7 +289,6 @@ def _build_window_from_items( # Collect all memory texts and sources memory_texts = [] all_sources = [] - seen_content = set() # Track seen source content to avoid duplicates roles = set() aggregated_file_ids: list[str] = [] @@ -303,18 +302,8 @@ def _build_window_from_items( item_sources = [item_sources] for source in item_sources: - # Get content from source for deduplication - source_content = None - if isinstance(source, dict): - source_content = source.get("content", "") - else: - source_content = getattr(source, "content", "") or "" - - # Only add if content is different (empty content is considered unique) - content_key = source_content if source_content else None - if content_key and content_key not in seen_content: - seen_content.add(content_key) - all_sources.append(source) + # Add source to all_sources + all_sources.append(source) # Extract role from source if hasattr(source, "role") and source.role: From 714d41577e768dd9a0186cc77fe350175b22a514 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B8=AD=E9=98=B3=E9=98=B3?= Date: Tue, 3 Mar 2026 21:16:23 +0800 Subject: [PATCH 2/4] fix: single item has no embedding bug --- src/memos/mem_reader/multi_modal_struct.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py index fe9ec40cc..0b3e19208 100644 --- a/src/memos/mem_reader/multi_modal_struct.py +++ b/src/memos/mem_reader/multi_modal_struct.py @@ -190,8 +190,16 @@ def _concat_multi_modal_memories( else: processed_items.append(item) - # If only one item after processing, return as-is + # If only one item after processing, compute embedding and return if len(processed_items) == 1: + single_item = processed_items[0] + if single_item and single_item.memory: + try: + single_item.metadata.embedding = self.embedder.embed([single_item.memory])[0] + except Exception as e: + logger.error( + f"[MultiModalStruct] Error computing embedding for single item: {e}" + ) return processed_items windows = [] From 9fef2289e271ce93e5fad4f2b44cfe0ba29d9f09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B8=AD=E9=98=B3=E9=98=B3?= Date: Wed, 4 Mar 2026 11:27:49 +0800 Subject: [PATCH 3/4] fix: image lang bug --- src/memos/mem_reader/read_multi_modal/image_parser.py | 6 +++--- src/memos/mem_reader/read_multi_modal/utils.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/memos/mem_reader/read_multi_modal/image_parser.py b/src/memos/mem_reader/read_multi_modal/image_parser.py index 97400ca26..d66642edb 100644 --- a/src/memos/mem_reader/read_multi_modal/image_parser.py +++ b/src/memos/mem_reader/read_multi_modal/image_parser.py @@ -137,10 +137,10 @@ def parse_fine( # Get context items if available context_items = kwargs.get("context_items") - # Determine language: prioritize lang from source (passed via kwargs), - # fallback to detecting from context_items if lang not provided + # Determine language: prioritize lang from context_items, + # fallback to kwargs lang = kwargs.get("lang") - if lang is None and context_items: + if context_items: for item in context_items: if hasattr(item, "memory") and item.memory: lang = detect_lang(item.memory) diff --git a/src/memos/mem_reader/read_multi_modal/utils.py b/src/memos/mem_reader/read_multi_modal/utils.py index be82587bf..209fe9b3c 100644 --- a/src/memos/mem_reader/read_multi_modal/utils.py +++ b/src/memos/mem_reader/read_multi_modal/utils.py @@ -345,7 +345,7 @@ def detect_lang(text): cleaned_text = re.sub( r"\b(user|assistant|query|answer)\s*:", "", cleaned_text, flags=re.IGNORECASE ) - cleaned_text = re.sub(r"\[[\d\-:\s]+\]", "", cleaned_text) + cleaned_text = re.sub(r"\[[^\]]+\]", "", cleaned_text) # remove URLs to prevent the dilution of Chinese characters cleaned_text = re.sub(r'https?://[^\s<>"{}|\\^`\[\]]+', "", cleaned_text) # extract chinese characters From b411f1f87751cdd7cd837490f609787e6e2566c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B8=AD=E9=98=B3=E9=98=B3?= Date: Wed, 4 Mar 2026 11:41:51 +0800 Subject: [PATCH 4/4] fix: image lang bug --- .../mem_reader/read_multi_modal/utils.py | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/memos/mem_reader/read_multi_modal/utils.py b/src/memos/mem_reader/read_multi_modal/utils.py index 209fe9b3c..96918589b 100644 --- a/src/memos/mem_reader/read_multi_modal/utils.py +++ b/src/memos/mem_reader/read_multi_modal/utils.py @@ -341,13 +341,32 @@ def detect_lang(text): if not text or not isinstance(text, str): return "en" cleaned_text = text - # remove role and timestamp + # remove role and timestamp-like prefixes cleaned_text = re.sub( r"\b(user|assistant|query|answer)\s*:", "", cleaned_text, flags=re.IGNORECASE ) - cleaned_text = re.sub(r"\[[^\]]+\]", "", cleaned_text) + # timestamps like [11:32 AM on 04 March, 2026] + cleaned_text = re.sub( + r"\[\s*\d{1,2}:\d{2}\s*(?:AM|PM)\s+on\s+\d{2}\s+[A-Za-z]+\s*,\s*\d{4}\s*\]", + "", + cleaned_text, + flags=re.IGNORECASE, + ) + # purely numeric timestamps like [2025-01-01 10:00] + cleaned_text = re.sub(r"\[[\d\-:\s]+\]", "", cleaned_text) # remove URLs to prevent the dilution of Chinese characters cleaned_text = re.sub(r'https?://[^\s<>"{}|\\^`\[\]]+', "", cleaned_text) + # remove MessageType schema keywords (multimodal JSON noise) + cleaned_text = re.sub( + r"\b(text|type|image_url|imageurl|url)\b", "", cleaned_text, flags=re.IGNORECASE + ) + # remove schema keywords like text / type / image_url / url + cleaned_text = re.sub( + r"\b(text|type|image_url|imageurl|url|file|file_id)\b", + "", + cleaned_text, + flags=re.IGNORECASE, + ) # extract chinese characters chinese_pattern = r"[\u4e00-\u9fff\u3400-\u4dbf\U00020000-\U0002a6df\U0002a700-\U0002b73f\U0002b740-\U0002b81f\U0002b820-\U0002ceaf\uf900-\ufaff]" chinese_chars = re.findall(chinese_pattern, cleaned_text)