Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 70 additions & 47 deletions src/memos/mem_reader/read_multi_modal/file_content_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@
class FileContentParser(BaseMessageParser):
"""Parser for file content parts."""

def _get_doc_llm_response(self, chunk_text: str, custom_tags: list[str] | None = None) -> dict:
def _get_doc_llm_response(
self, chunk_text: str, custom_tags: list[str] | None = None
) -> dict | list:
"""
Call LLM to extract memory from document chunk.
Uses doc prompts from DOC_PROMPT_DICT.
Expand All @@ -60,7 +62,7 @@ def _get_doc_llm_response(self, chunk_text: str, custom_tags: list[str] | None =
custom_tags: Optional list of custom tags for LLM extraction

Returns:
Parsed JSON response from LLM or empty dict if failed
Parsed JSON response from LLM (dict or list) or empty dict if failed
"""
if not self.llm:
logger.warning("[FileContentParser] LLM not available for fine mode")
Expand Down Expand Up @@ -777,35 +779,49 @@ def _make_fallback(
return [_make_fallback(idx, text, "no_llm") for idx, text in valid_chunks]

# Process single chunk with LLM extraction (worker function)
def _process_chunk(chunk_idx: int, chunk_text: str) -> TextualMemoryItem:
"""Process chunk with LLM, fallback to raw on failure."""
def _process_chunk(chunk_idx: int, chunk_text: str) -> list[TextualMemoryItem]:
"""Process chunk with LLM, fallback to raw on failure. Returns list of memory items."""
try:
response_json = self._get_doc_llm_response(chunk_text, custom_tags)
if response_json:
value = response_json.get("value", "").strip()
if value:
tags = response_json.get("tags", [])
tags = tags if isinstance(tags, list) else []
tags.extend(["mode:fine", "multimodal:file"])

llm_mem_type = response_json.get("memory_type", memory_type)
if llm_mem_type not in ["LongTermMemory", "UserMemory"]:
llm_mem_type = memory_type

return _make_memory_item(
value=value,
mem_type=llm_mem_type,
tags=tags,
key=response_json.get("key"),
chunk_idx=chunk_idx,
chunk_content=chunk_text,
)
# Handle list format response
response_list = response_json.get("memory list", [])
memory_items = []
for item_data in response_list:
if not isinstance(item_data, dict):
continue

value = item_data.get("value", "").strip()
if value:
tags = item_data.get("tags", [])
tags = tags if isinstance(tags, list) else []
tags.extend(["mode:fine", "multimodal:file"])
key_str = item_data.get("key", "")

llm_mem_type = item_data.get("memory_type", memory_type)
if llm_mem_type not in ["LongTermMemory", "UserMemory"]:
llm_mem_type = memory_type

memory_item = _make_memory_item(
value=value,
mem_type=llm_mem_type,
tags=tags,
key=key_str,
chunk_idx=chunk_idx,
chunk_content=chunk_text,
)
memory_items.append(memory_item)

if memory_items:
return memory_items
else:
return [_make_fallback(chunk_idx, chunk_text)]
except Exception as e:
logger.error(f"[FileContentParser] LLM error for chunk {chunk_idx}: {e}")

# Fallback to raw chunk
logger.warning(f"[FileContentParser] Fallback to raw for chunk {chunk_idx}")
return _make_fallback(chunk_idx, chunk_text)
return [_make_fallback(chunk_idx, chunk_text)]

def _relate_chunks(items: list[TextualMemoryItem]) -> None:
"""
Expand Down Expand Up @@ -853,30 +869,37 @@ def get_chunk_idx(item: TextualMemoryItem) -> int:
):
chunk_idx = futures[future]
try:
node = future.result()
memory_items.append(node)

# Check if this node is a fallback by checking tags
is_fallback = any(tag.startswith("fallback:") for tag in node.metadata.tags)
if is_fallback:
fallback_count += 1

# save raw file
node_id = node.id
if node.memory != node.metadata.sources[0].content:
chunk_node = _make_memory_item(
value=node.metadata.sources[0].content,
mem_type="RawFileMemory",
tags=[
"mode:fine",
"multimodal:file",
f"chunk:{chunk_idx + 1}/{total_chunks}",
],
chunk_idx=chunk_idx,
chunk_content="",
)
chunk_node.metadata.summary_ids = [node_id]
memory_items.append(chunk_node)
nodes = future.result()
memory_items.extend(nodes)

# Check if any node is a fallback by checking tags
has_fallback = False
for node in nodes:
is_fallback = any(tag.startswith("fallback:") for tag in node.metadata.tags)
if is_fallback:
fallback_count += 1
has_fallback = True

# save raw file only if no fallback (all nodes are LLM-extracted)
if not has_fallback and nodes:
# Use first node's source info for raw file
first_node = nodes[0]
if first_node.metadata.sources and len(first_node.metadata.sources) > 0:
# Collect all node IDs for summary_ids
node_ids = [node.id for node in nodes]
chunk_node = _make_memory_item(
value=first_node.metadata.sources[0].content,
mem_type="RawFileMemory",
tags=[
"mode:fine",
"multimodal:file",
f"chunk:{chunk_idx + 1}/{total_chunks}",
],
chunk_idx=chunk_idx,
chunk_content="",
)
chunk_node.metadata.summary_ids = node_ids
memory_items.append(chunk_node)

except Exception as e:
tqdm.write(f"[ERROR] Chunk {chunk_idx} failed: {e}")
Expand Down
34 changes: 22 additions & 12 deletions src/memos/templates/mem_reader_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,12 +244,17 @@

Return a single valid JSON object with the following structure:

Return valid JSON:
{
"key": <string, a concise title of the `value` field>,
"memory_type": "LongTermMemory",
"value": <A clear and accurate paragraph that comprehensively summarizes the main points, arguments, and information within the document chunk — written in English if the input memory items are in English, or in Chinese if the input is in Chinese>,
"tags": <A list of relevant thematic keywords (e.g., ["deadline", "team", "planning"])>
"memory list": [
{
"key": <string, a concise title of the `value` field>,
"memory_type": "LongTermMemory",
"value": <A clear and accurate paragraph that comprehensively summarizes the main points, arguments, and information within the document chunk — written in English if the input memory items are in English, or in Chinese if the input is in Chinese>,
"tags": <A list of relevant thematic keywords (e.g., ["deadline", "team", "planning"])>
}
...
],
"summary": <a concise summary of the document chunk>
}

Language rules:
Expand All @@ -264,7 +269,7 @@
Your Output:"""

SIMPLE_STRUCT_DOC_READER_PROMPT_ZH = """您是搜索与检索系统的文本分析专家。
您的任务是处理文档片段,并生成一个结构化的 JSON 对象
您的任务是处理文档片段,并生成一个结构化的 JSON 列表对象

请执行以下操作:
1. 识别反映文档中事实内容、见解、决策或含义的关键信息——包括任何显著的主题、结论或数据点,使读者无需阅读原文即可充分理解该片段的核心内容。
Expand All @@ -281,14 +286,19 @@
- 优先考虑完整性和保真度,而非简洁性。
- 不要泛化或跳过可能具有上下文意义的细节。

返回一个有效的 JSON 对象,结构如下
返回有效的 JSON 对象:

返回有效的 JSON:
{
"key": <字符串,`value` 字段的简洁标题>,
"memory_type": "LongTermMemory",
"value": <一段清晰准确的段落,全面总结文档片段中的主要观点、论据和信息——若输入摘要为英文,则用英文;若为中文,则用中文>,
"tags": <相关主题关键词列表(例如,["截止日期", "团队", "计划"])>
"memory list": [
{
"key": <字符串,`value` 字段的简洁标题>,
"memory_type": "LongTermMemory",
"value": <一段清晰准确的段落,全面总结文档片段中的主要观点、论据和信息——若输入摘要为英文,则用英文;若为中文,则用中文>,
"tags": <相关主题关键词列表(例如,["截止日期", "团队", "计划"])>
}
...
],
"summary": <简洁总结原文内容,与输入语言一致>
}

语言规则:
Expand Down