diff --git a/src/memos/configs/mem_reader.py b/src/memos/configs/mem_reader.py index 8ca02a42d..d4844d73f 100644 --- a/src/memos/configs/mem_reader.py +++ b/src/memos/configs/mem_reader.py @@ -34,7 +34,7 @@ def parse_datetime(cls, value): ) image_parser_llm: LLMConfigFactory | None = Field( default=None, - description="Vision LLM for image parsing. Falls back to main llm if not set.", + description="Vision LLM for image parsing. Falls back to general_llm if not set.", ) embedder: EmbedderConfigFactory = Field( ..., description="Embedder configuration for the MemReader" diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py index a9c84e3cf..123df5698 100644 --- a/src/memos/mem_reader/multi_modal_struct.py +++ b/src/memos/mem_reader/multi_modal_struct.py @@ -59,13 +59,12 @@ def __init__(self, config: MultiModalStructMemReaderConfig): super().__init__(simple_config) # Image parser LLM (requires vision model) - # Falls back to main llm if not configured + # Falls back to general_llm if not configured (general_llm itself falls back to main llm) self.image_parser_llm = ( LLMFactory.from_config(config.image_parser_llm) if config.image_parser_llm is not None - else self.llm + else self.general_llm ) - # Initialize MultiModalParser for routing to different parsers # Pass image_parser_llm for image parsing self.multi_modal_parser = MultiModalParser( @@ -1105,7 +1104,7 @@ def _process_transfer_multi_modal_data( ) # Add preference memory extraction future_pref = executor.submit( - process_preference_fine, raw_nodes, info, self.llm, self.embedder, **kwargs + process_preference_fine, raw_nodes, info, self.general_llm, self.embedder, **kwargs ) # Collect results