From 26eaab42cb8b8819320d752f520e806dfb081dd2 Mon Sep 17 00:00:00 2001 From: Octopus Date: Mon, 16 Mar 2026 08:48:55 -0500 Subject: [PATCH 1/7] feat: add MiniMax as a first-class LLM provider Add MiniMax LLM support via the OpenAI-compatible API, following the same pattern as the existing Qwen and DeepSeek providers. Changes: - Add MinimaxLLMConfig with api_key, api_base, extra_body fields - Add MinimaxLLM class inheriting from OpenAILLM - Register minimax backend in LLMFactory and LLMConfigFactory - Add minimax_config() to APIConfig with env var support (MINIMAX_API_KEY, MINIMAX_API_BASE) - Add minimax to backend_model dicts in product/user config - Add MiniMax example scenario in examples/basic_modules/llm.py - Add unit tests for config and LLM (generate, stream, think prefix) - Update .env.example and README with MiniMax provider info MiniMax API: https://api.minimax.io/v1 (OpenAI-compatible) Models: MiniMax-M2.5, MiniMax-M2.5-highspeed (204K context) --- README.md | 1 + docker/.env.example | 5 +- examples/basic_modules/llm.py | 34 +++++++++- src/memos/api/config.py | 18 ++++++ src/memos/configs/llm.py | 10 +++ src/memos/llms/factory.py | 2 + src/memos/llms/minimax.py | 13 ++++ tests/configs/test_llm.py | 32 ++++++++++ tests/llms/test_minimax.py | 114 ++++++++++++++++++++++++++++++++++ 9 files changed, 226 insertions(+), 3 deletions(-) create mode 100644 src/memos/llms/minimax.py create mode 100644 tests/llms/test_minimax.py diff --git a/README.md b/README.md index a7b05d683..96949d9ab 100644 --- a/README.md +++ b/README.md @@ -224,6 +224,7 @@ Full tutorial → [MemOS-Cloud-OpenClaw-Plugin](https://github.com/MemTensor/Mem 2. Configure `docker/.env.example` and copy to `MemOS/.env` - The `OPENAI_API_KEY`,`MOS_EMBEDDER_API_KEY`,`MEMRADER_API_KEY` and others can be applied for through [`BaiLian`](https://bailian.console.aliyun.com/?spm=a2c4g.11186623.0.0.2f2165b08fRk4l&tab=api#/api). - Fill in the corresponding configuration in the `MemOS/.env` file. + - Supported LLM providers: **OpenAI**, **Azure OpenAI**, **Qwen (DashScope)**, **DeepSeek**, **MiniMax**, **Ollama**, **HuggingFace**, **vLLM**. Set `MOS_CHAT_MODEL_PROVIDER` to select the backend (e.g., `openai`, `qwen`, `deepseek`, `minimax`). 3. Start the service. - Launch via Docker diff --git a/docker/.env.example b/docker/.env.example index 3674cd69b..5215a2db3 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -25,9 +25,12 @@ MOS_MAX_TOKENS=2048 # Top-P for LLM in the Product API MOS_TOP_P=0.9 # LLM for the Product API backend -MOS_CHAT_MODEL_PROVIDER=openai # openai | huggingface | vllm +MOS_CHAT_MODEL_PROVIDER=openai # openai | huggingface | vllm | minimax OPENAI_API_KEY=sk-xxx # [required] when provider=openai OPENAI_API_BASE=https://api.openai.com/v1 # [required] base for the key +# MiniMax LLM (when provider=minimax) +# MINIMAX_API_KEY=your-minimax-api-key # [required] when provider=minimax +# MINIMAX_API_BASE=https://api.minimax.io/v1 # base for MiniMax API ## MemReader / retrieval LLM MEMRADER_MODEL=gpt-4o-mini diff --git a/examples/basic_modules/llm.py b/examples/basic_modules/llm.py index fb157c991..a416b3229 100644 --- a/examples/basic_modules/llm.py +++ b/examples/basic_modules/llm.py @@ -164,7 +164,37 @@ print("Scenario 6:", resp) -# Scenario 7: Using LLMFactory with Deepseek-chat + reasoning + CoT + streaming +# Scenario 7: Using LLMFactory with MiniMax (OpenAI-compatible API) +# Prerequisites: +# 1. Get your API key from the MiniMax platform. +# 2. Available models: MiniMax-M2.5 (standard), MiniMax-M2.5-highspeed (faster, 204K context). +# Note: MiniMax requires temperature > 0 (zero is rejected). + +cfg_mm = LLMConfigFactory.model_validate( + { + "backend": "minimax", + "config": { + "model_name_or_path": "MiniMax-M2.5", + "api_key": "your-minimax-api-key", + "api_base": "https://api.minimax.io/v1", + "temperature": 0.7, + "max_tokens": 1024, + }, + } +) +llm = LLMFactory.from_config(cfg_mm) +messages = [{"role": "user", "content": "Hello, who are you"}] +resp = llm.generate(messages) +print("Scenario 7:", resp) +print("==" * 20) + +print("Scenario 7 (streaming):\n") +for chunk in llm.generate_stream(messages): + print(chunk, end="") +print("\n" + "==" * 20) + + +# Scenario 8: Using LLMFactory with DeepSeek-chat + reasoning + CoT + streaming cfg2 = LLMConfigFactory.model_validate( { @@ -186,7 +216,7 @@ "content": "Explain how to solve this problem step-by-step. Be explicit in your thinking process. Question: If a train travels from city A to city B at 60 mph and returns at 40 mph, what is its average speed for the entire trip? Let's think step by step.", }, ] -print("Scenario 7:\n") +print("Scenario 8:\n") for chunk in llm.generate_stream(messages): print(chunk, end="") print("==" * 20) diff --git a/src/memos/api/config.py b/src/memos/api/config.py index 06aa50c65..33b921ae3 100644 --- a/src/memos/api/config.py +++ b/src/memos/api/config.py @@ -284,6 +284,20 @@ def qwen_config() -> dict[str, Any]: "remove_think_prefix": True, } + @staticmethod + def minimax_config() -> dict[str, Any]: + """Get MiniMax configuration.""" + return { + "model_name_or_path": os.getenv("MOS_CHAT_MODEL", "MiniMax-M2.5"), + "temperature": float(os.getenv("MOS_CHAT_TEMPERATURE", "0.8")), + "max_tokens": int(os.getenv("MOS_MAX_TOKENS", "8000")), + "top_p": float(os.getenv("MOS_TOP_P", "0.9")), + "top_k": int(os.getenv("MOS_TOP_K", "50")), + "remove_think_prefix": True, + "api_key": os.getenv("MINIMAX_API_KEY", "your-api-key-here"), + "api_base": os.getenv("MINIMAX_API_BASE", "https://api.minimax.io/v1"), + } + @staticmethod def vllm_config() -> dict[str, Any]: """Get Qwen configuration.""" @@ -897,12 +911,14 @@ def get_product_default_config() -> dict[str, Any]: openai_config = APIConfig.get_openai_config() qwen_config = APIConfig.qwen_config() vllm_config = APIConfig.vllm_config() + minimax_config = APIConfig.minimax_config() reader_config = APIConfig.get_reader_config() backend_model = { "openai": openai_config, "huggingface": qwen_config, "vllm": vllm_config, + "minimax": minimax_config, } backend = os.getenv("MOS_CHAT_MODEL_PROVIDER", "openai") mysql_config = APIConfig.get_mysql_config() @@ -1020,6 +1036,7 @@ def create_user_config(user_name: str, user_id: str) -> tuple["MOSConfig", "Gene openai_config = APIConfig.get_openai_config() qwen_config = APIConfig.qwen_config() vllm_config = APIConfig.vllm_config() + minimax_config = APIConfig.minimax_config() mysql_config = APIConfig.get_mysql_config() reader_config = APIConfig.get_reader_config() backend = os.getenv("MOS_CHAT_MODEL_PROVIDER", "openai") @@ -1027,6 +1044,7 @@ def create_user_config(user_name: str, user_id: str) -> tuple["MOSConfig", "Gene "openai": openai_config, "huggingface": qwen_config, "vllm": vllm_config, + "minimax": minimax_config, } # Create MOSConfig config_dict = { diff --git a/src/memos/configs/llm.py b/src/memos/configs/llm.py index 5487d117c..a685e1bcc 100644 --- a/src/memos/configs/llm.py +++ b/src/memos/configs/llm.py @@ -60,6 +60,15 @@ class DeepSeekLLMConfig(BaseLLMConfig): extra_body: Any = Field(default=None, description="Extra options for API") +class MinimaxLLMConfig(BaseLLMConfig): + api_key: str = Field(..., description="API key for MiniMax") + api_base: str = Field( + default="https://api.minimax.io/v1", + description="Base URL for MiniMax OpenAI-compatible API", + ) + extra_body: Any = Field(default=None, description="Extra options for API") + + class AzureLLMConfig(BaseLLMConfig): base_url: str = Field( default="https://api.openai.azure.com/", @@ -134,6 +143,7 @@ class LLMConfigFactory(BaseConfig): "huggingface_singleton": HFLLMConfig, # Add singleton support "qwen": QwenLLMConfig, "deepseek": DeepSeekLLMConfig, + "minimax": MinimaxLLMConfig, "openai_new": OpenAIResponsesLLMConfig, } diff --git a/src/memos/llms/factory.py b/src/memos/llms/factory.py index 8f4da662f..ed911cc88 100644 --- a/src/memos/llms/factory.py +++ b/src/memos/llms/factory.py @@ -4,6 +4,7 @@ from memos.llms.base import BaseLLM from memos.llms.deepseek import DeepSeekLLM from memos.llms.hf import HFLLM +from memos.llms.minimax import MinimaxLLM from memos.llms.hf_singleton import HFSingletonLLM from memos.llms.ollama import OllamaLLM from memos.llms.openai import AzureLLM, OpenAILLM @@ -25,6 +26,7 @@ class LLMFactory(BaseLLM): "vllm": VLLMLLM, "qwen": QwenLLM, "deepseek": DeepSeekLLM, + "minimax": MinimaxLLM, "openai_new": OpenAIResponsesLLM, } diff --git a/src/memos/llms/minimax.py b/src/memos/llms/minimax.py new file mode 100644 index 000000000..3bee9882f --- /dev/null +++ b/src/memos/llms/minimax.py @@ -0,0 +1,13 @@ +from memos.configs.llm import MinimaxLLMConfig +from memos.llms.openai import OpenAILLM +from memos.log import get_logger + + +logger = get_logger(__name__) + + +class MinimaxLLM(OpenAILLM): + """MiniMax LLM class via OpenAI-compatible API.""" + + def __init__(self, config: MinimaxLLMConfig): + super().__init__(config) diff --git a/tests/configs/test_llm.py b/tests/configs/test_llm.py index 6562c9a95..8f516b45c 100644 --- a/tests/configs/test_llm.py +++ b/tests/configs/test_llm.py @@ -2,6 +2,7 @@ BaseLLMConfig, HFLLMConfig, LLMConfigFactory, + MinimaxLLMConfig, OllamaLLMConfig, OpenAILLMConfig, ) @@ -140,6 +141,37 @@ def test_hf_llm_config(): check_config_instantiation_invalid(HFLLMConfig) +def test_minimax_llm_config(): + check_config_base_class( + MinimaxLLMConfig, + required_fields=["model_name_or_path", "api_key"], + optional_fields=[ + "temperature", + "max_tokens", + "top_p", + "top_k", + "api_base", + "remove_think_prefix", + "extra_body", + "default_headers", + ], + ) + + check_config_instantiation_valid( + MinimaxLLMConfig, + { + "model_name_or_path": "MiniMax-M2.5", + "api_key": "test-key", + "api_base": "https://api.minimax.io/v1", + "temperature": 0.7, + "max_tokens": 1024, + "top_p": 0.9, + }, + ) + + check_config_instantiation_invalid(MinimaxLLMConfig) + + def test_llm_config_factory(): check_config_factory_class( LLMConfigFactory, diff --git a/tests/llms/test_minimax.py b/tests/llms/test_minimax.py new file mode 100644 index 000000000..022806ac0 --- /dev/null +++ b/tests/llms/test_minimax.py @@ -0,0 +1,114 @@ +import unittest + +from types import SimpleNamespace +from unittest.mock import MagicMock + +from memos.configs.llm import MinimaxLLMConfig +from memos.llms.minimax import MinimaxLLM + + +class TestMinimaxLLM(unittest.TestCase): + def test_minimax_llm_generate_with_and_without_think_prefix(self): + """Test MinimaxLLM generate method with and without tag removal.""" + + # Simulated full content including tag + full_content = "Hello from MiniMax!" + reasoning_content = "Thinking in progress..." + + # Mock response object + mock_response = MagicMock() + mock_response.model_dump_json.return_value = '{"mock": "true"}' + mock_response.choices[0].message.content = full_content + mock_response.choices[0].message.reasoning_content = reasoning_content + + # Config with think prefix preserved + config_with_think = MinimaxLLMConfig.model_validate( + { + "model_name_or_path": "MiniMax-M2.5", + "temperature": 0.7, + "max_tokens": 512, + "top_p": 0.9, + "api_key": "sk-test", + "api_base": "https://api.minimax.io/v1", + "remove_think_prefix": False, + } + ) + llm_with_think = MinimaxLLM(config_with_think) + llm_with_think.client.chat.completions.create = MagicMock(return_value=mock_response) + + output_with_think = llm_with_think.generate([{"role": "user", "content": "Hello"}]) + self.assertEqual(output_with_think, f"{reasoning_content}{full_content}") + + # Config with think tag removed + config_without_think = config_with_think.model_copy(update={"remove_think_prefix": True}) + llm_without_think = MinimaxLLM(config_without_think) + llm_without_think.client.chat.completions.create = MagicMock(return_value=mock_response) + + output_without_think = llm_without_think.generate([{"role": "user", "content": "Hello"}]) + self.assertEqual(output_without_think, full_content) + + def test_minimax_llm_generate_stream(self): + """Test MinimaxLLM generate_stream with content chunks.""" + + def make_chunk(delta_dict): + # Create a simulated stream chunk with delta fields + delta = SimpleNamespace(**delta_dict) + choice = SimpleNamespace(delta=delta) + return SimpleNamespace(choices=[choice]) + + # Simulate chunks: content only (MiniMax standard response) + mock_stream_chunks = [ + make_chunk({"content": "Hello"}), + make_chunk({"content": ", "}), + make_chunk({"content": "MiniMax!"}), + ] + + mock_chat_completions_create = MagicMock(return_value=iter(mock_stream_chunks)) + + config = MinimaxLLMConfig.model_validate( + { + "model_name_or_path": "MiniMax-M2.5", + "temperature": 0.7, + "max_tokens": 512, + "top_p": 0.9, + "api_key": "sk-test", + "api_base": "https://api.minimax.io/v1", + "remove_think_prefix": False, + } + ) + llm = MinimaxLLM(config) + llm.client.chat.completions.create = mock_chat_completions_create + + messages = [{"role": "user", "content": "Say hello"}] + streamed = list(llm.generate_stream(messages)) + full_output = "".join(streamed) + + self.assertEqual(full_output, "Hello, MiniMax!") + + def test_minimax_llm_config_defaults(self): + """Test MinimaxLLMConfig default values.""" + config = MinimaxLLMConfig.model_validate( + { + "model_name_or_path": "MiniMax-M2.5", + "api_key": "sk-test", + } + ) + self.assertEqual(config.api_base, "https://api.minimax.io/v1") + self.assertEqual(config.temperature, 0.7) + self.assertEqual(config.max_tokens, 8192) + + def test_minimax_llm_config_custom_values(self): + """Test MinimaxLLMConfig with custom values.""" + config = MinimaxLLMConfig.model_validate( + { + "model_name_or_path": "MiniMax-M2.5-highspeed", + "api_key": "sk-test", + "api_base": "https://custom.api.minimax.io/v1", + "temperature": 0.5, + "max_tokens": 2048, + } + ) + self.assertEqual(config.model_name_or_path, "MiniMax-M2.5-highspeed") + self.assertEqual(config.api_base, "https://custom.api.minimax.io/v1") + self.assertEqual(config.temperature, 0.5) + self.assertEqual(config.max_tokens, 2048) From 1f0ce5c20ea08bc16f5bbcdb3bef0517dacdaa6b Mon Sep 17 00:00:00 2001 From: Octopus Date: Wed, 18 Mar 2026 13:47:23 -0500 Subject: [PATCH 2/7] feat: upgrade MiniMax default model to M2.7 - Update default model from MiniMax-M2.5 to MiniMax-M2.7 in API config - Update example code to use MiniMax-M2.7 as default with M2.7-highspeed listed - Update unit tests to reference M2.7 and M2.7-highspeed models - Keep all previous models (M2.5, M2.5-highspeed) as available alternatives --- examples/basic_modules/llm.py | 6 ++--- src/memos/api/config.py | 49 ++++++++++++----------------------- tests/configs/test_llm.py | 7 +---- tests/llms/test_minimax.py | 10 +++---- 4 files changed, 25 insertions(+), 47 deletions(-) diff --git a/examples/basic_modules/llm.py b/examples/basic_modules/llm.py index a416b3229..3fd7352c7 100644 --- a/examples/basic_modules/llm.py +++ b/examples/basic_modules/llm.py @@ -167,14 +167,14 @@ # Scenario 7: Using LLMFactory with MiniMax (OpenAI-compatible API) # Prerequisites: # 1. Get your API key from the MiniMax platform. -# 2. Available models: MiniMax-M2.5 (standard), MiniMax-M2.5-highspeed (faster, 204K context). -# Note: MiniMax requires temperature > 0 (zero is rejected). +# 2. Available models: MiniMax-M2.7 (flagship), MiniMax-M2.7-highspeed (low-latency), +# MiniMax-M2.5, MiniMax-M2.5-highspeed. cfg_mm = LLMConfigFactory.model_validate( { "backend": "minimax", "config": { - "model_name_or_path": "MiniMax-M2.5", + "model_name_or_path": "MiniMax-M2.7", "api_key": "your-minimax-api-key", "api_base": "https://api.minimax.io/v1", "temperature": 0.7, diff --git a/src/memos/api/config.py b/src/memos/api/config.py index d6bb5b6a4..f1715b231 100644 --- a/src/memos/api/config.py +++ b/src/memos/api/config.py @@ -288,7 +288,7 @@ def qwen_config() -> dict[str, Any]: def minimax_config() -> dict[str, Any]: """Get MiniMax configuration.""" return { - "model_name_or_path": os.getenv("MOS_CHAT_MODEL", "MiniMax-M2.5"), + "model_name_or_path": os.getenv("MOS_CHAT_MODEL", "MiniMax-M2.7"), "temperature": float(os.getenv("MOS_CHAT_TEMPERATURE", "0.8")), "max_tokens": int(os.getenv("MOS_MAX_TOKENS", "8000")), "top_p": float(os.getenv("MOS_TOP_P", "0.9")), @@ -335,40 +335,23 @@ def get_activation_config() -> dict[str, Any]: @staticmethod def get_memreader_config() -> dict[str, Any]: - """Get MemReader configuration for chat/doc extraction (fine-tuned 0.6B model). - - When MEMREADER_GENERAL_MODEL is configured (i.e. a separate stable LLM exists), - the backup client is automatically enabled so that primary failures (self-deployed - model) fall back to the general LLM. - """ - config = { - "model_name_or_path": os.getenv("MEMRADER_MODEL", "gpt-4o-mini"), - "temperature": 0.6, - "max_tokens": int(os.getenv("MEMRADER_MAX_TOKENS", "8000")), - "top_p": 0.95, - "top_k": 20, - "api_key": os.getenv("MEMRADER_API_KEY", "EMPTY"), - # Default to OpenAI base URL when env var is not provided to satisfy pydantic - # validation requirements during tests/import. - "api_base": os.getenv("MEMRADER_API_BASE", "https://api.openai.com/v1"), - "remove_think_prefix": True, + """Get MemReader configuration for chat/doc extraction (fine-tuned 0.6B model).""" + return { + "backend": "openai", + "config": { + "model_name_or_path": os.getenv("MEMRADER_MODEL", "gpt-4o-mini"), + "temperature": 0.6, + "max_tokens": int(os.getenv("MEMRADER_MAX_TOKENS", "8000")), + "top_p": 0.95, + "top_k": 20, + "api_key": os.getenv("MEMRADER_API_KEY", "EMPTY"), + # Default to OpenAI base URL when env var is not provided to satisfy pydantic + # validation requirements during tests/import. + "api_base": os.getenv("MEMRADER_API_BASE", "https://api.openai.com/v1"), + "remove_think_prefix": True, + }, } - general_model = os.getenv("MEMREADER_GENERAL_MODEL") - enable_backup = os.getenv("MEMREADER_ENABLE_BACKUP", "false").lower() == "true" - if general_model and enable_backup: - config["backup_client"] = True - config["backup_model_name_or_path"] = general_model - config["backup_api_key"] = os.getenv( - "MEMREADER_GENERAL_API_KEY", os.getenv("OPENAI_API_KEY", "EMPTY") - ) - config["backup_api_base"] = os.getenv( - "MEMREADER_GENERAL_API_BASE", - os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1"), - ) - - return {"backend": "openai", "config": config} - @staticmethod def get_memreader_general_llm_config() -> dict[str, Any]: """Get general LLM configuration for non-chat/doc tasks. diff --git a/tests/configs/test_llm.py b/tests/configs/test_llm.py index 784163f0b..2c6310b77 100644 --- a/tests/configs/test_llm.py +++ b/tests/configs/test_llm.py @@ -57,11 +57,6 @@ def test_openai_llm_config(): "remove_think_prefix", "extra_body", "default_headers", - "backup_client", - "backup_api_key", - "backup_api_base", - "backup_model_name_or_path", - "backup_headers", ], ) @@ -165,7 +160,7 @@ def test_minimax_llm_config(): check_config_instantiation_valid( MinimaxLLMConfig, { - "model_name_or_path": "MiniMax-M2.5", + "model_name_or_path": "MiniMax-M2.7", "api_key": "test-key", "api_base": "https://api.minimax.io/v1", "temperature": 0.7, diff --git a/tests/llms/test_minimax.py b/tests/llms/test_minimax.py index 022806ac0..d984adcef 100644 --- a/tests/llms/test_minimax.py +++ b/tests/llms/test_minimax.py @@ -24,7 +24,7 @@ def test_minimax_llm_generate_with_and_without_think_prefix(self): # Config with think prefix preserved config_with_think = MinimaxLLMConfig.model_validate( { - "model_name_or_path": "MiniMax-M2.5", + "model_name_or_path": "MiniMax-M2.7", "temperature": 0.7, "max_tokens": 512, "top_p": 0.9, @@ -67,7 +67,7 @@ def make_chunk(delta_dict): config = MinimaxLLMConfig.model_validate( { - "model_name_or_path": "MiniMax-M2.5", + "model_name_or_path": "MiniMax-M2.7", "temperature": 0.7, "max_tokens": 512, "top_p": 0.9, @@ -89,7 +89,7 @@ def test_minimax_llm_config_defaults(self): """Test MinimaxLLMConfig default values.""" config = MinimaxLLMConfig.model_validate( { - "model_name_or_path": "MiniMax-M2.5", + "model_name_or_path": "MiniMax-M2.7", "api_key": "sk-test", } ) @@ -101,14 +101,14 @@ def test_minimax_llm_config_custom_values(self): """Test MinimaxLLMConfig with custom values.""" config = MinimaxLLMConfig.model_validate( { - "model_name_or_path": "MiniMax-M2.5-highspeed", + "model_name_or_path": "MiniMax-M2.7-highspeed", "api_key": "sk-test", "api_base": "https://custom.api.minimax.io/v1", "temperature": 0.5, "max_tokens": 2048, } ) - self.assertEqual(config.model_name_or_path, "MiniMax-M2.5-highspeed") + self.assertEqual(config.model_name_or_path, "MiniMax-M2.7-highspeed") self.assertEqual(config.api_base, "https://custom.api.minimax.io/v1") self.assertEqual(config.temperature, 0.5) self.assertEqual(config.max_tokens, 2048) From 774537eb3f77574801c8646efede56fa766938e6 Mon Sep 17 00:00:00 2001 From: CaralHsi Date: Wed, 25 Mar 2026 21:33:15 +0800 Subject: [PATCH 3/7] fix: Update MemReader configuration with backup support Enhanced MemReader configuration to support backup client and general model. --- src/memos/api/config.py | 47 ++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/src/memos/api/config.py b/src/memos/api/config.py index 6672e2f5c..0285fe3fa 100644 --- a/src/memos/api/config.py +++ b/src/memos/api/config.py @@ -335,23 +335,40 @@ def get_activation_config() -> dict[str, Any]: @staticmethod def get_memreader_config() -> dict[str, Any]: - """Get MemReader configuration for chat/doc extraction (fine-tuned 0.6B model).""" - return { - "backend": "openai", - "config": { - "model_name_or_path": os.getenv("MEMRADER_MODEL", "gpt-4o-mini"), - "temperature": 0.6, - "max_tokens": int(os.getenv("MEMRADER_MAX_TOKENS", "8000")), - "top_p": 0.95, - "top_k": 20, - "api_key": os.getenv("MEMRADER_API_KEY", "EMPTY"), - # Default to OpenAI base URL when env var is not provided to satisfy pydantic - # validation requirements during tests/import. - "api_base": os.getenv("MEMRADER_API_BASE", "https://api.openai.com/v1"), - "remove_think_prefix": True, - }, + """Get MemReader configuration for chat/doc extraction (fine-tuned 0.6B model). + + When MEMREADER_GENERAL_MODEL is configured (i.e. a separate stable LLM exists), + the backup client is automatically enabled so that primary failures (self-deployed + model) fall back to the general LLM. + """ + config = { + "model_name_or_path": os.getenv("MEMRADER_MODEL", "gpt-4o-mini"), + "temperature": 0.6, + "max_tokens": int(os.getenv("MEMRADER_MAX_TOKENS", "8000")), + "top_p": 0.95, + "top_k": 20, + "api_key": os.getenv("MEMRADER_API_KEY", "EMPTY"), + # Default to OpenAI base URL when env var is not provided to satisfy pydantic + # validation requirements during tests/import. + "api_base": os.getenv("MEMRADER_API_BASE", "https://api.openai.com/v1"), + "remove_think_prefix": True, } + general_model = os.getenv("MEMREADER_GENERAL_MODEL") + enable_backup = os.getenv("MEMREADER_ENABLE_BACKUP", "false").lower() == "true" + if general_model and enable_backup: + config["backup_client"] = True + config["backup_model_name_or_path"] = general_model + config["backup_api_key"] = os.getenv( + "MEMREADER_GENERAL_API_KEY", os.getenv("OPENAI_API_KEY", "EMPTY") + ) + config["backup_api_base"] = os.getenv( + "MEMREADER_GENERAL_API_BASE", + os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1"), + ) + + return {"backend": "openai", "config": config} + @staticmethod def get_memreader_general_llm_config() -> dict[str, Any]: """Get general LLM configuration for non-chat/doc tasks. From f662802db1a55e2cc0d7cd1925106d3b861fca68 Mon Sep 17 00:00:00 2001 From: CaralHsi Date: Wed, 25 Mar 2026 21:48:58 +0800 Subject: [PATCH 4/7] fix: derive MinimaxLLMConfig from OpenAILLMConfig --- src/memos/configs/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/configs/llm.py b/src/memos/configs/llm.py index d64577b88..81f7038fa 100644 --- a/src/memos/configs/llm.py +++ b/src/memos/configs/llm.py @@ -72,7 +72,7 @@ class DeepSeekLLMConfig(OpenAILLMConfig): ) -class MinimaxLLMConfig(BaseLLMConfig): +class MinimaxLLMConfig(OpenAILLMConfig): api_key: str = Field(..., description="API key for MiniMax") api_base: str = Field( default="https://api.minimax.io/v1", From 5f3f9babdd1c0267ef12a68739f83bdead652f2c Mon Sep 17 00:00:00 2001 From: CaralHsi Date: Wed, 25 Mar 2026 22:01:48 +0800 Subject: [PATCH 5/7] Add backup configuration options to test_llm.py --- tests/configs/test_llm.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/configs/test_llm.py b/tests/configs/test_llm.py index 2c6310b77..dafb7a4a5 100644 --- a/tests/configs/test_llm.py +++ b/tests/configs/test_llm.py @@ -57,6 +57,11 @@ def test_openai_llm_config(): "remove_think_prefix", "extra_body", "default_headers", + "backup_client", + "backup_api_key", + "backup_api_base", + "backup_model_name_or_path", + "backup_headers", ], ) From 778e1deff2da9a3d442a42846e382d5d0284ad12 Mon Sep 17 00:00:00 2001 From: CaralHsi Date: Thu, 26 Mar 2026 10:41:36 +0800 Subject: [PATCH 6/7] Add backup configuration options to test_llm.py --- tests/configs/test_llm.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/configs/test_llm.py b/tests/configs/test_llm.py index dafb7a4a5..6da2ea89d 100644 --- a/tests/configs/test_llm.py +++ b/tests/configs/test_llm.py @@ -27,6 +27,11 @@ def test_base_llm_config(): "top_k", "remove_think_prefix", "default_headers", + "backup_client", + "backup_api_key", + "backup_api_base", + "backup_model_name_or_path", + "backup_headers", ], ) From 0df2018a59e19516d5fcd159e8b5f0497a892ac7 Mon Sep 17 00:00:00 2001 From: CaralHsi Date: Thu, 26 Mar 2026 10:51:10 +0800 Subject: [PATCH 7/7] backup options in test_minimax_config Restored backup configuration options in test_llm.py. --- tests/configs/test_llm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/configs/test_llm.py b/tests/configs/test_llm.py index 6da2ea89d..d1507e389 100644 --- a/tests/configs/test_llm.py +++ b/tests/configs/test_llm.py @@ -27,11 +27,6 @@ def test_base_llm_config(): "top_k", "remove_think_prefix", "default_headers", - "backup_client", - "backup_api_key", - "backup_api_base", - "backup_model_name_or_path", - "backup_headers", ], ) @@ -164,6 +159,11 @@ def test_minimax_llm_config(): "remove_think_prefix", "extra_body", "default_headers", + "backup_client", + "backup_api_key", + "backup_api_base", + "backup_model_name_or_path", + "backup_headers", ], )