diff --git a/src/memos/mem_reader/read_multi_modal/file_content_parser.py b/src/memos/mem_reader/read_multi_modal/file_content_parser.py index b440a4aef..0629eda97 100644 --- a/src/memos/mem_reader/read_multi_modal/file_content_parser.py +++ b/src/memos/mem_reader/read_multi_modal/file_content_parser.py @@ -326,6 +326,7 @@ def __init__( llm: BaseLLM | None = None, parser: Any | None = None, direct_markdown_hostnames: list[str] | None = None, + image_parser: ImageParser | None = None, ): """ Initialize FileContentParser. @@ -341,7 +342,7 @@ def __init__( super().__init__(embedder, llm) self.parser = parser # Initialize ImageParser for processing images in markdown - self.image_parser = ImageParser(embedder, llm) if llm else None + self.image_parser = image_parser if image_parser is not None else ImageParser(embedder, llm) # Get inner markdown hostnames from config or environment if direct_markdown_hostnames is not None: diff --git a/src/memos/mem_reader/read_multi_modal/multi_modal_parser.py b/src/memos/mem_reader/read_multi_modal/multi_modal_parser.py index 528a409b6..a08aadc0c 100644 --- a/src/memos/mem_reader/read_multi_modal/multi_modal_parser.py +++ b/src/memos/mem_reader/read_multi_modal/multi_modal_parser.py @@ -67,11 +67,15 @@ def __init__( self.assistant_parser = AssistantParser(embedder, llm) self.tool_parser = ToolParser(embedder, llm) self.text_content_parser = TextContentParser(embedder, llm) - self.file_content_parser = FileContentParser( - embedder, llm, parser, direct_markdown_hostnames=direct_markdown_hostnames - ) # Use dedicated image_parser_llm for image parsing (requires vision model) self.image_parser = ImageParser(embedder, self.image_parser_llm) + self.file_content_parser = FileContentParser( + embedder, + llm, + parser, + direct_markdown_hostnames=direct_markdown_hostnames, + image_parser=self.image_parser, + ) self.audio_parser = None # future self.role_parsers = {