diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py
index 62e8f2d75..0b3e19208 100644
--- a/src/memos/mem_reader/multi_modal_struct.py
+++ b/src/memos/mem_reader/multi_modal_struct.py
@@ -190,8 +190,16 @@ def _concat_multi_modal_memories(
                 else:
                     processed_items.append(item)
 
-        # If only one item after processing, return as-is
+        # If only one item after processing, compute embedding and return
         if len(processed_items) == 1:
+            single_item = processed_items[0]
+            if single_item and single_item.memory:
+                try:
+                    single_item.metadata.embedding = self.embedder.embed([single_item.memory])[0]
+                except Exception as e:
+                    logger.error(
+                        f"[MultiModalStruct] Error computing embedding for single item: {e}"
+                    )
             return processed_items
 
         windows = []
@@ -289,7 +297,6 @@ def _build_window_from_items(
         # Collect all memory texts and sources
         memory_texts = []
         all_sources = []
-        seen_content = set()  # Track seen source content to avoid duplicates
         roles = set()
         aggregated_file_ids: list[str] = []
 
@@ -303,18 +310,8 @@ def _build_window_from_items(
                 item_sources = [item_sources]
 
             for source in item_sources:
-                # Get content from source for deduplication
-                source_content = None
-                if isinstance(source, dict):
-                    source_content = source.get("content", "")
-                else:
-                    source_content = getattr(source, "content", "") or ""
-
-                # Only add if content is different (empty content is considered unique)
-                content_key = source_content if source_content else None
-                if content_key and content_key not in seen_content:
-                    seen_content.add(content_key)
-                    all_sources.append(source)
+                # Add source to all_sources
+                all_sources.append(source)
 
                 # Extract role from source
                 if hasattr(source, "role") and source.role:
diff --git a/src/memos/mem_reader/read_multi_modal/image_parser.py b/src/memos/mem_reader/read_multi_modal/image_parser.py
index 97400ca26..d66642edb 100644
--- a/src/memos/mem_reader/read_multi_modal/image_parser.py
+++ b/src/memos/mem_reader/read_multi_modal/image_parser.py
@@ -137,10 +137,10 @@ def parse_fine(
         # Get context items if available
         context_items = kwargs.get("context_items")
 
-        # Determine language: prioritize lang from source (passed via kwargs),
-        # fallback to detecting from context_items if lang not provided
+        # Determine language: prioritize lang from context_items,
+        # fallback to kwargs
         lang = kwargs.get("lang")
-        if lang is None and context_items:
+        if context_items:
             for item in context_items:
                 if hasattr(item, "memory") and item.memory:
                     lang = detect_lang(item.memory)
diff --git a/src/memos/mem_reader/read_multi_modal/utils.py b/src/memos/mem_reader/read_multi_modal/utils.py
index be82587bf..96918589b 100644
--- a/src/memos/mem_reader/read_multi_modal/utils.py
+++ b/src/memos/mem_reader/read_multi_modal/utils.py
@@ -341,13 +341,32 @@ def detect_lang(text):
         if not text or not isinstance(text, str):
             return "en"
         cleaned_text = text
-        # remove role and timestamp
+        # remove role and timestamp-like prefixes
         cleaned_text = re.sub(
             r"\b(user|assistant|query|answer)\s*:", "", cleaned_text, flags=re.IGNORECASE
         )
+        # timestamps like [11:32 AM on 04 March, 2026]
+        cleaned_text = re.sub(
+            r"\[\s*\d{1,2}:\d{2}\s*(?:AM|PM)\s+on\s+\d{2}\s+[A-Za-z]+\s*,\s*\d{4}\s*\]",
+            "",
+            cleaned_text,
+            flags=re.IGNORECASE,
+        )
+        # purely numeric timestamps like [2025-01-01 10:00]
         cleaned_text = re.sub(r"\[[\d\-:\s]+\]", "", cleaned_text)
         # remove URLs to prevent the dilution of Chinese characters
         cleaned_text = re.sub(r'https?://[^\s<>"{}|\\^`\[\]]+', "", cleaned_text)
+        # remove MessageType schema keywords (multimodal JSON noise)
+        cleaned_text = re.sub(
+            r"\b(text|type|image_url|imageurl|url)\b", "", cleaned_text, flags=re.IGNORECASE
+        )
+        # remove schema keywords like text / type / image_url / url
+        cleaned_text = re.sub(
+            r"\b(text|type|image_url|imageurl|url|file|file_id)\b",
+            "",
+            cleaned_text,
+            flags=re.IGNORECASE,
+        )
         # extract chinese characters
         chinese_pattern = r"[\u4e00-\u9fff\u3400-\u4dbf\U00020000-\U0002a6df\U0002a700-\U0002b73f\U0002b740-\U0002b81f\U0002b820-\U0002ceaf\uf900-\ufaff]"
         chinese_chars = re.findall(chinese_pattern, cleaned_text)