MemTensor · CaralHsi · Nov 22, 2025 · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025
diff --git a/src/memos/api/config.py b/src/memos/api/config.py
@@ -328,7 +328,9 @@ def get_memreader_config() -> dict[str, Any]:
                 "top_p": 0.95,
                 "top_k": 20,
                 "api_key": os.getenv("MEMRADER_API_KEY", "EMPTY"),
-                "api_base": os.getenv("MEMRADER_API_BASE"),
+                # Default to OpenAI base URL when env var is not provided to satisfy pydantic
+                # validation requirements during tests/import.
+                "api_base": os.getenv("MEMRADER_API_BASE", "https://api.openai.com/v1"),
                 "remove_think_prefix": True,
                 "extra_body": {"chat_template_kwargs": {"enable_thinking": False}},
             },

diff --git a/src/memos/api/product_models.py b/src/memos/api/product_models.py
@@ -93,6 +93,9 @@ class ChatRequest(BaseRequest):
     temperature: float | None = Field(None, description="Temperature for sampling")
     top_p: float | None = Field(None, description="Top-p (nucleus) sampling parameter")
     add_message_on_answer: bool = Field(True, description="Add dialogs to memory after chat")
+    moscube: bool = Field(
+        False, description="(Deprecated) Whether to use legacy MemOSCube pipeline"
+    )
 
 
 class ChatCompleteRequest(BaseRequest):
@@ -116,6 +119,11 @@ class ChatCompleteRequest(BaseRequest):
     top_p: float | None = Field(None, description="Top-p (nucleus) sampling parameter")
     add_message_on_answer: bool = Field(True, description="Add dialogs to memory after chat")
 
+    base_prompt: str | None = Field(None, description="(Deprecated) Base prompt alias")
+    moscube: bool = Field(
+        False, description="(Deprecated) Whether to use legacy MemOSCube pipeline"
+    )
+
 
 class UserCreate(BaseRequest):
     user_name: str | None = Field(None, description="Name of the user")
@@ -204,49 +212,218 @@ class SearchRequest(BaseRequest):
 class APISearchRequest(BaseRequest):
     """Request model for searching memories."""
 
-    query: str = Field(..., description="Search query")
-    user_id: str = Field(None, description="User ID")
-    mem_cube_id: str | None = Field(None, description="Cube ID to search in")
+    # ==== Basic inputs ====
+    query: str = Field(
+        ...,
+        description=("User search query"),
+    )
+    user_id: str = Field(..., description="User ID")
+
+    # ==== Cube scoping ====
+    mem_cube_id: str | None = Field(
+        None,
+        description=(
+            "(Deprecated) Single cube ID to search in. "
+            "Prefer `readable_cube_ids` for multi-cube search."
+        ),
+    )
     readable_cube_ids: list[str] | None = Field(
-        None, description="List of cube IDs user can read for multi-cube search"
+        None,
+        description=(
+            "List of cube IDs that are readable for this request. "
+            "Required for algorithm-facing API; optional for developer-facing API."
+        ),
     )
-    mode: SearchMode = Field(SearchMode.FAST, description="search mode: fast, fine, or mixture")
-    internet_search: bool = Field(False, description="Whether to use internet search")
-    top_k: int = Field(10, description="Number of results to return")
-    chat_history: list[MessageDict] | None = Field(None, description="Chat history")
-    session_id: str | None = Field(None, description="Session ID for soft-filtering memories")
+
+    # ==== Search mode ====
+    mode: SearchMode = Field(
+        SearchMode.FAST,
+        description="Search mode: fast, fine, or mixture.",
+    )
+
+    session_id: str | None = Field(
+        None,
+        description=(
+            "Session ID used as a soft signal to prioritize more relevant memories. "
+            "Only used for weighting, not as a hard filter."
+        ),
+    )
+
+    # ==== Result control ====
+    top_k: int = Field(
+        10,
+        ge=1,
+        description="Number of textual memories to retrieve (top-K). Default: 10.",
+    )
+
+    pref_top_k: int = Field(
+        6,
+        ge=0,
+        description="Number of preference memories to retrieve (top-K). Default: 6.",
+    )
+
+    include_preference: bool = Field(
+        True,
+        description=(
+            "Whether to retrieve preference memories along with general memories. "
+            "If enabled, the system will automatically recall user preferences "
+            "relevant to the query. Default: True."
+        ),
+    )
+
+    # ==== Filter conditions ====
+    # TODO: maybe add detailed description later
+    filter: dict[str, Any] | None = Field(
+        None,
+        description=("Filter for the memory"),
+    )
+
+    # ==== Extended capabilities ====
+    internet_search: bool = Field(
+        False,
+        description=(
+            "Whether to enable internet search in addition to memory search. "
+            "Primarily used by internal algorithms. Default: False."
+        ),
+    )
+
+    # Inner user, not supported in API yet
+    threshold: float | None = Field(
+        None,
+        description=(
+            "Internal similarity threshold for searching plaintext memories. "
+            "If None, default thresholds will be applied."
+        ),
+    )
+
+    # ==== Context ====
+    chat_history: list[MessageDict] | None = Field(
+        None,
+        description=(
+            "Historical chat messages used internally by algorithms. "
+            "If None, internal stored history may be used; "
+            "if provided (even an empty list), this value will be used as-is."
+        ),
+    )
+
+    # ==== Backward compatibility ====
+    moscube: bool = Field(
+        False,
+        description="(Deprecated / internal) Whether to use legacy MemOSCube path.",
+    )
+
     operation: list[PermissionDict] | None = Field(
-        None, description="operation ids for multi cubes"
+        None,
+        description="(Internal) Operation definitions for multi-cube read permissions.",
     )
-    include_preference: bool = Field(True, description="Whether to handle preference memory")
-    pref_top_k: int = Field(6, description="Number of preference results to return")
-    filter: dict[str, Any] | None = Field(None, description="Filter for the memory")
 
 
 class APIADDRequest(BaseRequest):
     """Request model for creating memories."""
 
+    # ==== Basic identifiers ====
     user_id: str = Field(None, description="User ID")
-    mem_cube_id: str | None = Field(None, description="Cube ID")
+    session_id: str | None = Field(
+        None,
+        description="Session ID. If not provided, a default session will be used.",
+    )
+
+    # ==== Single-cube writing (Deprecated) ====
+    mem_cube_id: str | None = Field(
+        None,
+        description="(Deprecated) Target cube ID for this add request (optional for developer API).",
+    )
+
+    # ==== Multi-cube writing ====
     writable_cube_ids: list[str] | None = Field(
         None, description="List of cube IDs user can write for multi-cube add"
     )
-    messages: list[MessageDict] | None = Field(None, description="List of messages to store.")
-    memory_content: str | None = Field(None, description="Memory content to store")
-    doc_path: str | None = Field(None, description="Path to document to store")
-    source: str | None = Field(None, description="Source of the memory")
-    chat_history: list[MessageDict] | None = Field(None, description="Chat history")
-    session_id: str | None = Field(None, description="Session id")
-    operation: list[PermissionDict] | None = Field(
-        None, description="operation ids for multi cubes"
-    )
+
+    # ==== Async control ====
     async_mode: Literal["async", "sync"] = Field(
-        "async", description="Whether to add memory in async mode"
+        "async",
+        description=(
+            "Whether to add memory in async mode. "
+            "Use 'async' to enqueue background add (non-blocking), "
+            "or 'sync' to add memories in the current call. "
+            "Default: 'async'."
+        ),
     )
-    custom_tags: list[str] | None = Field(None, description="Custom tags for the memory")
-    info: dict[str, str] | None = Field(None, description="Additional information for the memory")
+
+    # ==== Business tags & info ====
+    custom_tags: list[str] | None = Field(
+        None,
+        description=(
+            "Custom tags for this add request, e.g. ['Travel', 'family']. "
+            "These tags can be used as filters in search."
+        ),
+    )
+
+    info: dict[str, str] | None = Field(
+        None,
+        description=(
+            "Additional metadata for the add request. "
+            "All keys can be used as filters in search. "
+            "Example: "
+            "{'agent_id': 'xxxxxx', "
+            "'app_id': 'xxxx', "
+            "'source_type': 'web', "
+            "'source_url': 'https://www.baidu.com', "
+            "'source_content': '西湖是杭州最著名的景点'}."
+        ),
+    )
+
+    # ==== Input content ====
+    messages: list[MessageDict] | None = Field(
+        None,
+        description=(
+            "List of messages to store. Supports: "
+            "- system / user / assistant messages with 'content' and 'chat_time'; "
+            "- tool messages including: "
+            "  * tool_description (name, description, parameters), "
+            "  * tool_input (call_id, name, argument), "
+            "  * raw tool messages where content is str or list[str], "
+            "  * tool_output with structured output items "
+            "    (input_text / input_image / input_file, etc.). "
+            "Also supports pure input items when there is no dialog."
+        ),
+    )
+
+    # ==== Chat history ====
+    chat_history: list[MessageDict] | None = Field(
+        None,
+        description=(
+            "Historical chat messages used internally by algorithms. "
+            "If None, internal stored history will be used; "
+            "if provided (even an empty list), this value will be used as-is."
+        ),
+    )
+
+    # ==== Feedback flag ====
     is_feedback: bool = Field(
-        False, description="Whether the user feedback in knowladge base service"
+        False,
+        description=("Whether this request represents user feedback. Default: False."),
+    )
+
+    # ==== Backward compatibility fields (will delete later) ====
+    memory_content: str | None = Field(
+        None,
+        description="(Deprecated) Plain memory content to store. Prefer using `messages`.",
+    )
+    doc_path: str | None = Field(
+        None,
+        description="(Deprecated / internal) Path to document to store.",
+    )
+    source: str | None = Field(
+        None,
+        description=(
+            "(Deprecated) Simple source tag of the memory. "
+            "Prefer using `info.source_type` / `info.source_url`."
+        ),
+    )
+    operation: list[PermissionDict] | None = Field(
+        None,
+        description="(Internal) Operation definitions for multi-cube write permissions.",
     )
 
 

diff --git a/src/memos/api/routers/product_router.py b/src/memos/api/routers/product_router.py
@@ -297,7 +297,8 @@ def chat_complete(chat_req: ChatCompleteRequest):
             history=chat_req.history,
             internet_search=chat_req.internet_search,
             moscube=chat_req.moscube,
-            base_prompt=chat_req.base_prompt,
+            base_prompt=chat_req.base_prompt or chat_req.system_prompt,
+            # will deprecate base_prompt in the future
             top_k=chat_req.top_k,
             threshold=chat_req.threshold,
             session_id=chat_req.session_id,