From 44da34d7ad599b0da947b7ce12547f1d27ccc23b Mon Sep 17 00:00:00 2001 From: HalfAnElephant <12142917@qq.com> Date: Sun, 29 Mar 2026 14:54:09 +0800 Subject: [PATCH 01/19] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0LLM=E8=AE=BE?= =?UTF-8?q?=E7=BD=AE=E7=95=8C=E9=9D=A2=E5=92=8CDAG=E8=8A=82=E7=82=B9?= =?UTF-8?q?=E8=A7=86=E8=A7=89=E5=8D=87=E7=BA=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. 全局改进: - 创建会话时新增AI确认消息 - 新增设置模态框,支持选择LLM提供商 - 新增 /api/v1/settings/llm 接口 2. DAG编辑器优化: - 节点视觉升级为玻璃拟态风格 - 修复中文文字换行问题 - 使用LLM生成结构化研究问题树,解决层级关系错乱问题 3. 新增服务: - ResearchPlanGenerator: 用LLM生成结构化研究问题 - 重构MasterPlanner使用新生成器 Co-Authored-By: Claude Opus 4.6 --- backend/app/api/router.py | 2 + backend/app/api/routes/settings.py | 52 +++ backend/app/services/conversation_agent.py | 64 +++- backend/app/services/planner.py | 107 +++++- .../app/services/research_plan_generator.py | 340 ++++++++++++++++++ frontend/src/App.tsx | 20 ++ frontend/src/api.ts | 5 + frontend/src/components/DAGEditor.tsx | 120 +++++-- frontend/src/components/SettingsModal.tsx | 94 +++++ frontend/src/styles/components/dag-editor.css | 21 +- frontend/src/styles/components/settings.css | 225 ++++++++++++ frontend/src/styles/index.css | 5 +- frontend/src/types.ts | 15 + 13 files changed, 1013 insertions(+), 57 deletions(-) create mode 100644 backend/app/api/routes/settings.py create mode 100644 backend/app/services/research_plan_generator.py create mode 100644 frontend/src/components/SettingsModal.tsx create mode 100644 frontend/src/styles/components/settings.css diff --git a/backend/app/api/router.py b/backend/app/api/router.py index d82c916..f09788a 100644 --- a/backend/app/api/router.py +++ b/backend/app/api/router.py @@ -3,6 +3,7 @@ from app.api.routes.conversations import router as conversations_router from app.api.routes.evidence import router as evidence_router from app.api.routes.mcp import router as mcp_router +from app.api.routes.settings import router as settings_router from app.api.routes.tasks import router as tasks_router api_router = APIRouter() @@ -10,3 +11,4 @@ api_router.include_router(evidence_router) api_router.include_router(mcp_router) api_router.include_router(conversations_router) +api_router.include_router(settings_router) diff --git a/backend/app/api/routes/settings.py b/backend/app/api/routes/settings.py new file mode 100644 index 0000000..7f2ea55 --- /dev/null +++ b/backend/app/api/routes/settings.py @@ -0,0 +1,52 @@ +"""Settings API routes.""" +from __future__ import annotations + +from fastapi import APIRouter + +from app.core.config import settings +from app.models.schemas import LLMOption, LLMProvider, LLMSettingsResponse + +router = APIRouter(prefix="/api/v1/settings") + + +@router.get("/llm", response_model=LLMSettingsResponse) +def get_llm_settings() -> LLMSettingsResponse: + """Get available LLM configurations.""" + options: list[LLMOption] = [ + LLMOption( + provider=LLMProvider.OPENROUTER, + label="OpenRouter", + model=settings.openrouter_model, + configured=bool(settings.openrouter_api_key), + ), + LLMOption( + provider=LLMProvider.DEEPSEEK, + label="DeepSeek", + model=settings.deepseek_model, + configured=bool(settings.deepseek_api_key), + ), + LLMOption( + provider=LLMProvider.OPENAI, + label="OpenAI", + model=settings.openai_model, + configured=bool(settings.openai_api_key), + ), + ] + + # Determine default provider + default_provider = LLMProvider.OPENROUTER + provider_value = settings.default_llm_provider.lower().strip() + if provider_value == "deepseek": + default_provider = LLMProvider.DEEPSEEK + elif provider_value == "openai": + default_provider = LLMProvider.OPENAI + + return LLMSettingsResponse(defaultProvider=default_provider, options=options) + + +@router.patch("/llm", response_model=LLMSettingsResponse) +def update_llm_settings(payload: dict) -> LLMSettingsResponse: + """Update LLM settings (persisted to session/env).""" + # For now, this just returns current settings + # In a full implementation, this would persist the default provider + return get_llm_settings() \ No newline at end of file diff --git a/backend/app/services/conversation_agent.py b/backend/app/services/conversation_agent.py index e15a788..0497b87 100644 --- a/backend/app/services/conversation_agent.py +++ b/backend/app/services/conversation_agent.py @@ -14,6 +14,7 @@ ConversationDetail, ConversationMessage, ConversationStatus, + LLMProvider, MessageKind, MessageRole, NodeStatus, @@ -212,6 +213,15 @@ async def create_conversation(self, *, topic: str, config: TaskConfig | None = N content=topic, metadata={"stage": "CREATED"}, ) + # Add AI acknowledgment message before generating plan + self.repository.add_message( + conversation_id, + message_id=new_id(), + role=MessageRole.ASSISTANT, + kind=MessageKind.ASSISTANT_TEXT, + content=self._build_ack_message(topic=topic, config=selected_config), + metadata={"stage": "ACKNOWLEDGED"}, + ) markdown = await asyncio.to_thread(self._generate_initial_plan, topic=topic, config=selected_config) revision = self.repository.add_plan_revision( conversation_id, @@ -981,6 +991,13 @@ def _parse_plan(self, markdown: str, *, topic: str, base_config: TaskConfig) -> config_data["targetWordCount"] = self._int_or_default( value, base_config.targetWordCount, min_value=1000, max_value=50000) continue + if key == "llm_provider": + normalized = value.strip().strip('"').strip("'").lower() + if normalized in {item.value for item in LLMProvider}: + config_data["llmProvider"] = normalized + else: + warnings.append(f"llm_provider 无效,已忽略:{value}") + continue return ParsedPlan(title=parsed_title[:200], config=TaskConfig(**config_data), warnings=warnings) @@ -1004,7 +1021,7 @@ def _parse_sources(raw: str) -> list[str]: def _generate_initial_plan(self, *, topic: str, config: TaskConfig) -> str: prompt = ( "请为用户生成一个可执行的研究方案,输出必须是 Markdown,并且必须包含 front matter。\n" - "front matter 字段固定为:title, topic, max_depth, max_nodes, priority, search_sources, target_word_count。\n" + "front matter 字段固定为:title, topic, max_depth, max_nodes, priority, search_sources, target_word_count, llm_provider。\n" "正文至少包含:研究目标、研究问题拆解、方法与来源、执行步骤、风险与边界、交付标准。\n" "严禁输出解释性前言,直接返回完整 Markdown。" ) @@ -1012,11 +1029,11 @@ def _generate_initial_plan(self, *, topic: str, config: TaskConfig) -> str: f"主题:{topic}\n" f"配置建议:max_depth={config.maxDepth}, max_nodes={config.maxNodes}, " f"priority={config.priority}, search_sources={config.searchSources}, " - f"target_word_count={config.targetWordCount}\n" + f"target_word_count={config.targetWordCount}, llm_provider={config.llmProvider.value}\n" "输出语言:中文。" ) generated = self._chat_complete( - system_prompt=prompt, user_prompt=user_input) + system_prompt=prompt, user_prompt=user_input, provider=config.llmProvider) if generated: normalized = self._ensure_front_matter( generated, topic=topic, config=config) @@ -1043,10 +1060,10 @@ def _generate_revised_plan( f"当前方案如下:\n{current_plan}\n\n" f"保底配置:max_depth={config.maxDepth}, max_nodes={config.maxNodes}, " f"priority={config.priority}, search_sources={config.searchSources}, " - f"target_word_count={config.targetWordCount}" + f"target_word_count={config.targetWordCount}, llm_provider={config.llmProvider.value}" ) generated = self._chat_complete( - system_prompt=prompt, user_prompt=user_input) + system_prompt=prompt, user_prompt=user_input, provider=config.llmProvider) if generated: normalized = self._ensure_front_matter( generated, topic=topic, config=config) @@ -1054,10 +1071,16 @@ def _generate_revised_plan( return normalized return self._fallback_revision(current_plan=current_plan, instruction=instruction, topic=topic, config=config) - def _chat_complete(self, *, system_prompt: str, user_prompt: str) -> str: + def _chat_complete( + self, + *, + system_prompt: str, + user_prompt: str, + provider: LLMProvider | str | None = None, + ) -> str: if settings.use_mock_sources: return "" - base_url, api_key, model = self._resolve_provider() + base_url, api_key, model = self._resolve_provider(provider) if not base_url or not api_key: return "" try: @@ -1089,16 +1112,31 @@ def _chat_complete(self, *, system_prompt: str, user_prompt: str) -> str: return "" @staticmethod - def _resolve_provider() -> tuple[str, str, str]: - provider = settings.default_llm_provider.lower().strip() - if provider == "openrouter": + def _resolve_provider(provider: LLMProvider | str | None = None) -> tuple[str, str, str]: + selected = (provider.value if isinstance(provider, LLMProvider) else provider) or settings.default_llm_provider + provider_name = selected.lower().strip() + if provider_name == "openrouter": return settings.openrouter_base_url, settings.openrouter_api_key, settings.openrouter_model - if provider == "deepseek": + if provider_name == "deepseek": return settings.deepseek_base_url, settings.deepseek_api_key, settings.deepseek_model - if provider == "openai": + if provider_name == "openai": return settings.openai_base_url, settings.openai_api_key, settings.openai_model return "", "", "" + @staticmethod + def _build_ack_message(*, topic: str, config: TaskConfig) -> str: + provider_labels = { + LLMProvider.OPENROUTER: "OpenRouter", + LLMProvider.DEEPSEEK: "DeepSeek", + LLMProvider.OPENAI: "OpenAI", + } + provider_name = provider_labels.get(config.llmProvider, config.llmProvider.value) + return ( + f"已收到您的研究主题「{topic[:60]}」。" + f"我将使用 {provider_name} 为您构思研究方案," + f"预计将从 {config.maxDepth} 个层级展开 {config.maxNodes} 个研究节点。" + ) + def _abort_task_if_active(self, task_id: str | None) -> None: if not task_id: return @@ -1180,6 +1218,7 @@ def _ensure_front_matter(self, markdown: str, *, topic: str, config: TaskConfig) f"priority: {config.priority}\n" f"search_sources: [{', '.join(config.searchSources)}]\n" f"target_word_count: {config.targetWordCount}\n" + f"llm_provider: {config.llmProvider.value}\n" "---\n\n" f"{text}" ) @@ -1195,6 +1234,7 @@ def _fallback_plan(*, topic: str, config: TaskConfig) -> str: f"priority: {config.priority}\n" f"search_sources: [{', '.join(config.searchSources)}]\n" f"target_word_count: {config.targetWordCount}\n" + f"llm_provider: {config.llmProvider.value}\n" "---\n\n" "## 研究目标\n" "围绕主题建立可验证的结论链路,输出可执行决策建议。\n\n" diff --git a/backend/app/services/planner.py b/backend/app/services/planner.py index 762f085..4257304 100644 --- a/backend/app/services/planner.py +++ b/backend/app/services/planner.py @@ -5,13 +5,102 @@ from app.core.utils import new_id, now_iso from app.models.schemas import DAGGraph, DAGEdge, NodeStatus, TaskConfig, TaskMetadata, TaskNode, WritingSectionPlan +from app.services.research_plan_generator import research_plan_generator class MasterPlanner: - """Builds a bounded DAG with BFS + DFS expansion and simple pruning.""" + """Builds a bounded DAG with LLM-generated structured research questions.""" def build_dag(self, root_task_id: str, title: str, description: str, config: TaskConfig) -> DAGGraph: + """Build DAG using LLM-generated structured research plan.""" ts = now_iso() + + # Generate structured research plan using LLM + try: + plan = research_plan_generator.generate( + topic=title, + description=description, + config=config, + ) + return self._build_dag_from_plan(root_task_id, title, description, plan, config, ts) + except Exception: + # Fallback to template-based generation + return self._build_dag_fallback(root_task_id, title, description, config, ts) + + def _build_dag_from_plan( + self, + root_task_id: str, + title: str, + description: str, + plan, + config: TaskConfig, + ts: str, + ) -> DAGGraph: + """Convert structured research plan to DAG format.""" + nodes: list[TaskNode] = [] + edges: list[DAGEdge] = [] + question_to_task: dict[str, str] = {} + + # Map question IDs to task IDs + for question_id in plan.all_questions: + question_to_task[question_id] = new_id() + + # Root task ID mapping + root_question = plan.root_question + question_to_task[root_question.question_id] = root_task_id + + # Create TaskNode for each research question + for question_id, question in plan.all_questions.items(): + task_id = question_to_task[question_id] + + # Calculate priority based on level (deeper = lower priority) + priority = max(1, config.priority - question.level) + + # Determine status based on level + status = NodeStatus.PENDING + + node = TaskNode( + taskId=task_id, + parentTaskId=question_to_task.get(question.parent_id) if question.parent_id else None, + title=question.title, + description=question.description, + status=status, + priority=priority, + dependencies=[question_to_task[question.parent_id]] if question.parent_id else [], + children=[question_to_task[cid] for cid in question.children], + metadata=TaskMetadata( + estimatedTokenCost=800 + question.level * 200, + searchDepth=question.level, + infoGainScore=1.0 - (question.level * 0.15), + createdAt=ts, + updatedAt=ts, + ), + output=[], + ) + nodes.append(node) + + # Create edges based on parent-child relationships + for question_id, question in plan.all_questions.items(): + source_id = question_to_task[question_id] + for child_id in question.children: + target_id = question_to_task.get(child_id) + if target_id: + edges.append(DAGEdge.model_validate({ + "from": source_id, + "to": target_id, + })) + + return DAGGraph(nodes=nodes, edges=edges) + + def _build_dag_fallback( + self, + root_task_id: str, + title: str, + description: str, + config: TaskConfig, + ts: str, + ) -> DAGGraph: + """Fallback DAG generation when LLM fails.""" root = TaskNode( taskId=root_task_id, parentTaskId=None, @@ -35,7 +124,6 @@ def build_dag(self, root_task_id: str, title: str, description: str, config: Tas edges: list[DAGEdge] = [] q: deque[tuple[TaskNode, int]] = deque([(root, 0)]) total_nodes = 1 - low_gain_streak = 0 while q and total_nodes < config.maxNodes: parent, depth = q.popleft() @@ -47,25 +135,19 @@ def build_dag(self, root_task_id: str, title: str, description: str, config: Tas if total_nodes >= config.maxNodes: break node_id = new_id() - info_gain = self._estimate_info_gain(node_id, depth + 1) - status = NodeStatus.PRUNED if low_gain_streak >= 1 and info_gain < 0.2 else NodeStatus.PENDING - if info_gain < 0.2: - low_gain_streak += 1 - else: - low_gain_streak = 0 node = TaskNode( taskId=node_id, parentTaskId=parent.taskId, title=ctitle, description=f"{ctitle}: {description}", - status=status, + status=NodeStatus.PENDING, priority=max(1, config.priority - depth), dependencies=[parent.taskId], children=[], metadata=TaskMetadata( estimatedTokenCost=800 + depth * 200, searchDepth=depth + 1, - infoGainScore=info_gain, + infoGainScore=0.5, createdAt=ts, updatedAt=ts, ), @@ -305,8 +387,3 @@ def _expand_topic(parent_title: str, description: str, depth: int) -> list[str]: if item not in deduped: deduped.append(item) return deduped[:4] - - @staticmethod - def _estimate_info_gain(seed: str, depth: int) -> float: - value = (sum(ord(ch) for ch in seed) % 100) / 100.0 - return max(0.05, round(value * (1.0 / (depth + 0.5)), 2)) diff --git a/backend/app/services/research_plan_generator.py b/backend/app/services/research_plan_generator.py new file mode 100644 index 0000000..e02bbdf --- /dev/null +++ b/backend/app/services/research_plan_generator.py @@ -0,0 +1,340 @@ +"""Research Plan Generator - Generates structured research questions using LLM.""" +from __future__ import annotations + +import json +from dataclasses import dataclass +from typing import Any + +import httpx + +from app.core.config import settings +from app.core.utils import new_id +from app.models.schemas import LLMProvider, TaskConfig + + +@dataclass(frozen=True) +class ResearchQuestion: + """A structured research question node.""" + question_id: str + title: str + description: str + level: int + rank: int + parent_id: str | None + children: list[str] + + +@dataclass(frozen=True) +class StructuredResearchPlan: + """A complete structured research plan with hierarchical questions.""" + root_question: ResearchQuestion + all_questions: dict[str, ResearchQuestion] + total_nodes: int + max_depth: int + + +class ResearchPlanGenerator: + """Generates structured research questions using LLM.""" + + _SYSTEM_PROMPT = """你是一个研究规划专家。你的任务是将用户的研究主题分解为一个结构化的研究问题树。 + +输出要求: +1. 必须输出有效的 JSON 格式 +2. 每个问题节点包含:title(标题)、description(描述)、level(层级,从0开始)、rank(同级排序) +3. 问题树必须是一个合理的层级结构,根节点 level=0 +4. 每个层级最多 4 个子问题 +5. 每个问题应该独立、可研究 + +输出格式示例: +{ + "questions": [ + { + "title": "核心研究问题", + "description": "问题的详细描述", + "level": 0, + "rank": 0, + "children": [ + { + "title": "子问题1", + "description": "子问题描述", + "level": 1, + "rank": 0, + "children": [] + } + ] + } + ] +} + +注意: +- 不要输出任何额外的解释文本 +- 确保 JSON 格式正确 +- title 应简洁(不超过30字) +- description 应具体(50-150字)""" + + def generate( + self, + *, + topic: str, + description: str, + config: TaskConfig, + ) -> StructuredResearchPlan: + """Generate a structured research plan using LLM.""" + user_prompt = self._build_user_prompt(topic=topic, description=description, config=config) + + try: + response_text = self._call_llm( + system_prompt=self._SYSTEM_PROMPT, + user_prompt=user_prompt, + provider=config.llmProvider, + ) + return self._parse_response(response_text, topic=topic, description=description) + except Exception: + # Fallback to template-based generation if LLM fails + return self._fallback_plan(topic=topic, description=description, config=config) + + def _build_user_prompt( + self, + *, + topic: str, + description: str, + config: TaskConfig, + ) -> str: + max_nodes = min(config.maxNodes, 12) + max_depth = min(config.maxDepth, 3) + + return f"""研究主题:{topic} + +研究背景:{description[:500]} + +约束条件: +- 最大深度:{max_depth} 层 +- 最大节点数:{max_nodes} 个 +- 每个层级最多 4 个并列问题 + +请生成结构化的研究问题树。""" + + def _call_llm( + self, + *, + system_prompt: str, + user_prompt: str, + provider: LLMProvider | str | None = None, + ) -> str: + if settings.use_mock_sources: + raise ValueError("Mock sources enabled") + + base_url, api_key, model = self._resolve_provider(provider) + if not base_url or not api_key: + raise ValueError("LLM provider not configured") + + with httpx.Client(timeout=settings.llm_timeout_medium) as client: + response = client.post( + f"{base_url}/chat/completions", + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + json={ + "model": model, + "temperature": 0.3, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + }, + ) + response.raise_for_status() + payload = response.json() + + return ( + payload.get("choices", [{}])[0] + .get("message", {}) + .get("content", "") + .strip() + ) + + @staticmethod + def _resolve_provider(provider: LLMProvider | str | None = None) -> tuple[str, str, str]: + selected = (provider.value if isinstance(provider, LLMProvider) else provider) or settings.default_llm_provider + provider_name = selected.lower().strip() + if provider_name == "openrouter": + return settings.openrouter_base_url, settings.openrouter_api_key, settings.openrouter_model + if provider_name == "deepseek": + return settings.deepseek_base_url, settings.deepseek_api_key, settings.deepseek_model + if provider_name == "openai": + return settings.openai_base_url, settings.openai_api_key, settings.openai_model + return "", "", "" + + def _parse_response( + self, + response_text: str, + *, + topic: str, + description: str, + ) -> StructuredResearchPlan: + """Parse LLM response into a structured research plan.""" + # Extract JSON from response + json_text = self._extract_json(response_text) + + try: + data = json.loads(json_text) + except json.JSONDecodeError: + return self._fallback_plan(topic=topic, description=description, config=TaskConfig()) + + questions_data = data.get("questions", []) + if not questions_data: + return self._fallback_plan(topic=topic, description=description, config=TaskConfig()) + + all_questions: dict[str, ResearchQuestion] = {} + root_id = new_id() + + def process_node( + node_data: dict[str, Any], + level: int, + parent_id: str | None, + ) -> str: + question_id = new_id() + children_data = node_data.get("children", []) + children_ids: list[str] = [] + + # Process children first to get their IDs + for child_data in children_data: + child_id = process_node(child_data, level + 1, question_id) + children_ids.append(child_id) + + question = ResearchQuestion( + question_id=question_id, + title=node_data.get("title", "研究问题")[:60], + description=node_data.get("description", "")[:500], + level=level, + rank=node_data.get("rank", 0), + parent_id=parent_id, + children=children_ids, + ) + all_questions[question_id] = question + return question_id + + # Process root node(s) + root_questions = [] + for i, q_data in enumerate(questions_data): + if i == 0: + # First question becomes the root + root_id = process_node(q_data, 0, None) + root_questions.append(root_id) + else: + # Additional top-level questions become children of root + q_id = process_node(q_data, 1, root_id) + root_questions.append(q_id) + + if not all_questions: + return self._fallback_plan(topic=topic, description=description, config=TaskConfig()) + + root = all_questions.get(root_id) + if not root: + root = ResearchQuestion( + question_id=root_id, + title=topic[:60], + description=description[:500], + level=0, + rank=0, + parent_id=None, + children=[], + ) + all_questions[root_id] = root + + return StructuredResearchPlan( + root_question=root, + all_questions=all_questions, + total_nodes=len(all_questions), + max_depth=max(q.level for q in all_questions.values()) if all_questions else 0, + ) + + def _extract_json(self, text: str) -> str: + """Extract JSON from text that might contain markdown code blocks.""" + text = text.strip() + + # Try to find JSON in code blocks + if "```json" in text: + start = text.find("```json") + 7 + end = text.find("```", start) + if end > start: + return text[start:end].strip() + + if "```" in text: + start = text.find("```") + 3 + end = text.find("```", start) + if end > start: + return text[start:end].strip() + + # Try to find JSON object directly + start = text.find("{") + end = text.rfind("}") + if start >= 0 and end > start: + return text[start:end + 1] + + return text + + def _fallback_plan( + self, + *, + topic: str, + description: str, + config: TaskConfig, + ) -> StructuredResearchPlan: + """Generate a fallback plan when LLM fails.""" + root_id = new_id() + + questions: dict[str, ResearchQuestion] = { + root_id: ResearchQuestion( + question_id=root_id, + title=topic[:60], + description=description[:500], + level=0, + rank=0, + parent_id=None, + children=[], + ), + } + + # Generate sub-questions based on topic + sub_topics = [ + f"{topic}的核心问题", + f"{topic}的关键证据", + f"{topic}的争议与边界", + f"{topic}的落地条件", + ] + + for i, sub_title in enumerate(sub_topics[:4]): + sub_id = new_id() + questions[sub_id] = ResearchQuestion( + question_id=sub_id, + title=sub_title[:60], + description=f'围绕"{sub_title}"展开深入研究', + level=1, + rank=i, + parent_id=root_id, + children=[], + ) + # Update root's children + root = questions[root_id] + questions[root_id] = ResearchQuestion( + question_id=root_id, + title=root.title, + description=root.description, + level=root.level, + rank=root.rank, + parent_id=root.parent_id, + children=list(root.children) + [sub_id], + ) + + return StructuredResearchPlan( + root_question=questions[root_id], + all_questions=questions, + total_nodes=len(questions), + max_depth=1, + ) + + +# Singleton instance +research_plan_generator = ResearchPlanGenerator() \ No newline at end of file diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 4277c93..f5e26cf 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -7,6 +7,7 @@ import { deleteConversation, downloadConversationReport, getConversation, + getLLMSettings, listConversations, renameConversation, reviseConversationPlan, @@ -21,6 +22,7 @@ import { Dialog } from "./components/Dialog"; import { ExportModal } from "./components/ExportModal"; import { LibraryPage } from "./components/LibraryPage"; import { PlanEditorPane } from "./components/PlanEditorPane"; +import { SettingsModal } from "./components/SettingsModal"; import { APP_CONFIG, STATUS_LABEL } from "./constants"; import type { AgentState, @@ -29,6 +31,7 @@ import type { ConversationMessage, ConversationStatus, ConversationSummary, + LLMProvider, ProgressEvent, } from "./types"; @@ -229,6 +232,8 @@ export function App() { const [currentPhase, setCurrentPhase] = useState(null); const [streamClock, setStreamClock] = useState(() => Date.now()); const [showLibrary, setShowLibrary] = useState(false); + const [showSettings, setShowSettings] = useState(false); + const [selectedLLMProvider, setSelectedLLMProvider] = useState(null); const composerRef = useRef(null); const progressWsRef = useRef(null); @@ -1083,6 +1088,14 @@ export function App() { > 文献库 +

{activeSummary?.topic ?? (draftMode ? "新研究" : "Research Flow")}

@@ -1250,6 +1263,13 @@ export function App() {
)} + setShowSettings(false)} + selectedProvider={selectedLLMProvider} + onSelectProvider={setSelectedLLMProvider} + /> + {error &&
{error}
} diff --git a/frontend/src/api.ts b/frontend/src/api.ts index 9e40250..06da776 100644 --- a/frontend/src/api.ts +++ b/frontend/src/api.ts @@ -5,6 +5,7 @@ import type { ConversationDetail, ConversationSummary, Evidence, + LLMSettingsResponse, RevisePlanResponse, RunConversationResponse, TaskResponse @@ -387,3 +388,7 @@ export async function getLibraryKeywords(topN: number = 50): Promise { return json(`${API_BASE}/api/v1/library/summary`); } + +export async function getLLMSettings(): Promise { + return json(`${API_BASE}/api/v1/settings/llm`); +} diff --git a/frontend/src/components/DAGEditor.tsx b/frontend/src/components/DAGEditor.tsx index bf3689c..5bdf049 100644 --- a/frontend/src/components/DAGEditor.tsx +++ b/frontend/src/components/DAGEditor.tsx @@ -18,13 +18,14 @@ type DagreLayoutOptions = LayoutOptions & { }; const NODE_MIN_WIDTH = 220; -const NODE_MAX_WIDTH = 300; +const NODE_MAX_WIDTH = 320; const NODE_MIN_HEIGHT = 72; -const NODE_HORIZONTAL_PADDING = 36; +const NODE_HORIZONTAL_PADDING = 40; const NODE_VERTICAL_PADDING = 28; const NODE_FONT_SIZE = 14; -const NODE_LINE_HEIGHT = 18; -const NODE_CHARACTERS_PER_LINE = 14; +const NODE_LINE_HEIGHT = 20; +const AVG_CHAR_WIDTH = 9; +const CHINESE_CHAR_WIDTH_MULTIPLIER = 1.85; const FIT_PADDING = 36; const MIN_FIT_ZOOM = 0.62; const MAX_FIT_ZOOM = 1.05; @@ -81,21 +82,79 @@ function hexToRgba(hexColor: string, alpha: number): string { return `rgba(${red}, ${green}, ${blue}, ${alpha})`; } +/** + * Generate glassmorphism colors for a node based on status + */ +function getNodeGlassColors(status: TaskNodeStatus): { + backgroundColor: string; + borderColor: string; + shadowColor: string; + textColor: string; +} { + const baseColor = getNodeColor(status); + return { + backgroundColor: hexToRgba(baseColor, 0.18), + borderColor: hexToRgba(baseColor, 0.55), + shadowColor: hexToRgba(baseColor, 0.28), + textColor: "#1e293b", + }; +} + +const CJK_REGEX = /[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/; + +/** + * Calculate visual width of a character + * CJK characters are approximately 1.85x wider than Latin characters + */ +function getCharWidth(char: string): number { + return CJK_REGEX.test(char) ? AVG_CHAR_WIDTH * CHINESE_CHAR_WIDTH_MULTIPLIER : AVG_CHAR_WIDTH; +} + +/** + * Calculate total visual width of a string + */ +function calculateTextWidth(text: string): number { + let width = 0; + for (const char of text) { + width += getCharWidth(char); + } + return width; +} + function measureNode(title: string) { const trimmedTitle = title.trim() || "Untitled"; - const textLength = Array.from(trimmedTitle).length; - const lineCount = Math.max(1, Math.ceil(textLength / NODE_CHARACTERS_PER_LINE)); - const widestLineChars = Math.min( - NODE_CHARACTERS_PER_LINE + 4, - Math.max(NODE_CHARACTERS_PER_LINE, textLength) - ); + const maxLineWidth = NODE_MAX_WIDTH - NODE_HORIZONTAL_PADDING; + + // Split text into lines based on visual width + const lines: string[] = []; + let currentLine = ""; + let currentLineWidth = 0; + + for (const char of trimmedTitle) { + const charWidth = getCharWidth(char); + if (currentLineWidth + charWidth > maxLineWidth && currentLine.length > 0) { + lines.push(currentLine); + currentLine = char; + currentLineWidth = charWidth; + } else { + currentLine += char; + currentLineWidth += charWidth; + } + } + if (currentLine) { + lines.push(currentLine); + } + + // Calculate the width of the widest line + const widestLineWidth = Math.max(...lines.map(line => calculateTextWidth(line))); + const width = Math.min( NODE_MAX_WIDTH, - Math.max(NODE_MIN_WIDTH, widestLineChars * NODE_FONT_SIZE + NODE_HORIZONTAL_PADDING) + Math.max(NODE_MIN_WIDTH, widestLineWidth + NODE_HORIZONTAL_PADDING) ); const height = Math.max( NODE_MIN_HEIGHT, - lineCount * NODE_LINE_HEIGHT + NODE_VERTICAL_PADDING + lines.length * NODE_LINE_HEIGHT + NODE_VERTICAL_PADDING ); return { @@ -214,29 +273,34 @@ export function DAGEditor({ width: "data(width)", height: "data(height)", shape: "roundrectangle", - "corner-radius": "24px", + "corner-radius": "16px", padding: "14px", "text-wrap": "wrap", "text-max-width": "data(labelMaxWidth)", "text-valign": "center", "text-halign": "center", "font-size": NODE_FONT_SIZE, - "line-height": 1.3, - color: "#0f172a", - "border-width": 1.75, + "line-height": 1.4, + color: "#1e293b", + "border-width": 1.5, "border-color": "data(borderColor)", + "border-opacity": 0.6, "text-outline-width": 0, "shadow-color": "data(shadowColor)", - "shadow-blur": 18, - "shadow-opacity": 0.4, - "shadow-offset-y": 6, + "shadow-blur": 24, + "shadow-opacity": 0.35, + "shadow-offset-x": 0, + "shadow-offset-y": 8, + "background-opacity": 0.85, }; const selectedNodeStyle: Record = { - "border-width": 3, - "border-color": "#111827", - "shadow-blur": 24, - "shadow-opacity": 0.58, + "border-width": 2.5, + "border-color": "#1e293b", + "border-opacity": 1, + "shadow-blur": 32, + "shadow-opacity": 0.5, + "shadow-offset-y": 12, }; /** @@ -448,7 +512,7 @@ export function DAGEditor({ // Map nodes to Cytoscape format const cyNodes = nodes.map((node) => { - const baseColor = getNodeColor(node.status); + const glassColors = getNodeGlassColors(node.status); const nodeSize = measureNode(node.title); const fallbackPosition = { x: averageX + ((nodes.findIndex((n) => n.nodeId === node.nodeId) % 4) - 1.5) * 92, @@ -459,10 +523,10 @@ export function DAGEditor({ data: { id: node.nodeId, label: node.title, - color: baseColor, - backgroundColor: hexToRgba(baseColor, 0.3), - borderColor: hexToRgba(baseColor, 0.85), - shadowColor: hexToRgba(baseColor, 0.42), + color: glassColors.textColor, + backgroundColor: glassColors.backgroundColor, + borderColor: glassColors.borderColor, + shadowColor: glassColors.shadowColor, width: nodeSize.width, height: nodeSize.height, labelMaxWidth: nodeSize.labelMaxWidth, diff --git a/frontend/src/components/SettingsModal.tsx b/frontend/src/components/SettingsModal.tsx new file mode 100644 index 0000000..7db36ac --- /dev/null +++ b/frontend/src/components/SettingsModal.tsx @@ -0,0 +1,94 @@ +import { useCallback, useEffect, useState } from "react"; +import { getLLMSettings } from "../api"; +import type { LLMOption, LLMProvider, LLMSettingsResponse } from "../types"; + +interface SettingsModalProps { + open: boolean; + onClose: () => void; + selectedProvider: LLMProvider | null; + onSelectProvider: (provider: LLMProvider) => void; +} + +export function SettingsModal({ + open, + onClose, + selectedProvider, + onSelectProvider, +}: SettingsModalProps) { + const [settings, setSettings] = useState(null); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(""); + + useEffect(() => { + if (!open) return; + setLoading(true); + setError(""); + getLLMSettings() + .then(setSettings) + .catch((err) => setError(err instanceof Error ? err.message : String(err))) + .finally(() => setLoading(false)); + }, [open]); + + const handleSelect = useCallback( + (option: LLMOption) => { + if (!option.configured) return; + onSelectProvider(option.provider); + }, + [onSelectProvider] + ); + + if (!open) return null; + + return ( +
+
e.stopPropagation()}> +
+

设置

+ +
+ +
+
+

LLM 提供商

+

选择用于生成研究方案的 AI 模型

+ + {loading &&
加载中...
} + {error &&
{error}
} + + {settings && ( +
+ {settings.options.map((option) => ( + + ))} +
+ )} +
+
+ +
+ +
+
+
+ ); +} \ No newline at end of file diff --git a/frontend/src/styles/components/dag-editor.css b/frontend/src/styles/components/dag-editor.css index 84d34ed..04a3487 100644 --- a/frontend/src/styles/components/dag-editor.css +++ b/frontend/src/styles/components/dag-editor.css @@ -196,27 +196,46 @@ * 3. NODE STYLING * ============================================================================ * - * Color coding for different node statuses using CSS custom properties. + * Glassmorphism color coding for different node statuses. */ +:root { + --node-glass-bg: rgba(255, 255, 255, 0.25); + --node-glass-border: rgba(255, 255, 255, 0.4); + --node-glass-shadow: rgba(0, 0, 0, 0.1); +} + +@media (prefers-color-scheme: dark) { + :root { + --node-glass-bg: rgba(30, 41, 59, 0.65); + --node-glass-border: rgba(100, 116, 139, 0.35); + --node-glass-shadow: rgba(0, 0, 0, 0.25); + } +} + .dag-node-status-pending { --node-color: #4a90d9; + --node-glow: rgba(74, 144, 217, 0.3); } .dag-node-status-running { --node-color: #ffb84d; + --node-glow: rgba(255, 184, 77, 0.35); } .dag-node-status-completed { --node-color: #4caf50; + --node-glow: rgba(76, 175, 80, 0.3); } .dag-node-status-failed { --node-color: #ff6b6b; + --node-glow: rgba(255, 107, 107, 0.35); } .dag-node-status-pruned { --node-color: #9e9e9e; + --node-glow: rgba(158, 158, 158, 0.2); } /* Node base styles (applied by Cytoscape.js) */ diff --git a/frontend/src/styles/components/settings.css b/frontend/src/styles/components/settings.css new file mode 100644 index 0000000..15e827e --- /dev/null +++ b/frontend/src/styles/components/settings.css @@ -0,0 +1,225 @@ +/* Settings Modal Styles */ + +.settings-modal-overlay { + position: fixed; + inset: 0; + z-index: var(--z-modal); + background: rgba(0, 0, 0, 0.5); + backdrop-filter: blur(4px); + display: flex; + align-items: center; + justify-content: center; + animation: settings-fade-in var(--duration-normal) var(--ease-out); +} + +@keyframes settings-fade-in { + from { + opacity: 0; + } + to { + opacity: 1; + } +} + +.settings-modal { + width: min(480px, 90vw); + max-height: 85vh; + background: color-mix(in oklab, var(--card) 92%, transparent); + border-radius: var(--radius-lg); + border: 1px solid color-mix(in oklab, var(--line) 72%, transparent); + box-shadow: var(--shadow-xl); + backdrop-filter: blur(20px) saturate(140%); + display: flex; + flex-direction: column; + animation: settings-slide-up var(--duration-normal) var(--ease-out); +} + +@keyframes settings-slide-up { + from { + opacity: 0; + transform: translateY(20px); + } + to { + opacity: 1; + transform: translateY(0); + } +} + +.settings-modal-header { + display: flex; + justify-content: space-between; + align-items: center; + padding: var(--space-4) var(--space-5); + border-bottom: 1px solid color-mix(in oklab, var(--line) 72%, transparent); +} + +.settings-modal-header h2 { + margin: 0; + font-size: var(--text-lg); + font-weight: var(--font-semibold); + color: var(--ink); +} + +.settings-close-btn { + width: 32px; + height: 32px; + border: none; + background: transparent; + color: var(--ink-secondary); + font-size: var(--text-xl); + cursor: pointer; + border-radius: var(--radius-sm); + display: flex; + align-items: center; + justify-content: center; + transition: all var(--transition-fast); +} + +.settings-close-btn:hover { + background: var(--bg-soft); + color: var(--ink); +} + +.settings-modal-body { + flex: 1; + overflow-y: auto; + padding: var(--space-5); +} + +.settings-section { + margin-bottom: var(--space-6); +} + +.settings-section:last-child { + margin-bottom: 0; +} + +.settings-section h3 { + margin: 0 0 var(--space-1); + font-size: var(--text-base); + font-weight: var(--font-semibold); + color: var(--ink); +} + +.settings-section-desc { + margin: 0 0 var(--space-4); + font-size: var(--text-sm); + color: var(--muted); +} + +.settings-loading, +.settings-error { + padding: var(--space-4); + text-align: center; + font-size: var(--text-sm); + border-radius: var(--radius-md); +} + +.settings-loading { + background: color-mix(in oklab, var(--bg-soft) 80%, transparent); + color: var(--ink-secondary); +} + +.settings-error { + background: color-mix(in oklab, var(--danger-light) 60%, transparent); + color: var(--danger); + border: 1px solid color-mix(in oklab, var(--danger) 40%, transparent); +} + +.llm-options { + display: flex; + flex-direction: column; + gap: var(--space-2); +} + +.llm-option { + padding: var(--space-3) var(--space-4); + border: 1px solid color-mix(in oklab, var(--line) 80%, transparent); + border-radius: var(--radius-md); + background: color-mix(in oklab, var(--bg-soft) 60%, transparent); + cursor: pointer; + text-align: left; + transition: all var(--transition-fast); +} + +.llm-option:hover:not(.disabled) { + border-color: color-mix(in oklab, var(--primary) 50%, var(--line)); + background: color-mix(in oklab, var(--primary-light) 30%, transparent); +} + +.llm-option.selected { + border-color: var(--primary); + background: color-mix(in oklab, var(--primary-light) 50%, transparent); + box-shadow: 0 0 0 1px var(--primary); +} + +.llm-option.disabled { + opacity: 0.5; + cursor: not-allowed; +} + +.llm-option-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: var(--space-1); +} + +.llm-option-label { + font-size: var(--text-sm); + font-weight: var(--font-medium); + color: var(--ink); +} + +.llm-option-check { + width: 20px; + height: 20px; + background: var(--primary); + color: white; + border-radius: 50%; + display: flex; + align-items: center; + justify-content: center; + font-size: var(--text-xs); +} + +.llm-option-model { + font-size: var(--text-xs); + color: var(--muted); + font-family: var(--font-mono); +} + +.llm-option-unconfigured { + margin-top: var(--space-1); + font-size: var(--text-xs); + color: var(--warning); +} + +.settings-modal-footer { + padding: var(--space-4) var(--space-5); + border-top: 1px solid color-mix(in oklab, var(--line) 72%, transparent); + display: flex; + justify-content: flex-end; +} + +.settings-btn-primary { + padding: var(--space-2) var(--space-4); + background: var(--primary); + color: white; + border: none; + border-radius: var(--radius-md); + font-size: var(--text-sm); + font-weight: var(--font-medium); + cursor: pointer; + transition: all var(--transition-fast); +} + +.settings-btn-primary:hover { + background: var(--primary-hover); +} + +@media (prefers-color-scheme: dark) { + .settings-modal { + background: color-mix(in oklab, var(--card) 85%, black 15%); + } +} \ No newline at end of file diff --git a/frontend/src/styles/index.css b/frontend/src/styles/index.css index 0ad0659..56c4ad7 100644 --- a/frontend/src/styles/index.css +++ b/frontend/src/styles/index.css @@ -79,4 +79,7 @@ @import './components/new-components.css'; /* Feedback animations (success, error, copy toast, progress flash) */ -@import './components/feedback.css'; \ No newline at end of file +@import './components/feedback.css'; + +/* Settings modal styles */ +@import './components/settings.css'; \ No newline at end of file diff --git a/frontend/src/types.ts b/frontend/src/types.ts index d5df9f1..44747ab 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -195,3 +195,18 @@ export interface ResearchPlan { }>; createdAt: string; } + +// LLM Settings types +export type LLMProvider = "openrouter" | "deepseek" | "openai"; + +export interface LLMOption { + provider: LLMProvider; + label: string; + model: string; + configured: boolean; +} + +export interface LLMSettingsResponse { + defaultProvider: LLMProvider; + options: LLMOption[]; +} From cbd9c73dbd4952b5b2cec752cbefd9de0ace3120 Mon Sep 17 00:00:00 2001 From: HalfAnElephant <12142917@qq.com> Date: Sun, 29 Mar 2026 16:46:22 +0800 Subject: [PATCH 02/19] Implement P0 structured ideation foundation --- backend/app/core/database.py | 6 + backend/app/deps.py | 6 + backend/app/models/schemas.py | 122 +- backend/app/prompts/__init__.py | 2 + backend/app/prompts/ideation.py | 30 + backend/app/prompts/novelty.py | 21 + backend/app/prompts/plan_render.py | 18 + .../repositories/conversation_repository.py | 42 +- backend/app/services/conversation_agent.py | 184 ++- .../services/four_agents/ideation_agent.py | 21 +- .../services/four_agents/planning_agent.py | 27 +- backend/app/services/idea_service.py | 271 +++++ backend/app/services/novelty_gate.py | 241 ++++ backend/app/services/writer.py | 34 +- docs/AI_SCIENTIST_V2_ADVANTAGES.md | 596 ++++++++++ docs/AI_SCIENTIST_V2_LEARNING_PLAN.md | 1042 +++++++++++++++++ frontend/src/App.tsx | 1 + frontend/src/components/PlanEditorPane.tsx | 30 +- frontend/src/styles/components/editor.css | 73 +- frontend/src/types.ts | 31 + .../test_conversation_lifecycle.py | 6 + tests/unit/test_idea_service.py | 25 + tests/unit/test_novelty_gate.py | 52 + tests/unit/test_research_models.py | 44 + 24 files changed, 2879 insertions(+), 46 deletions(-) create mode 100644 backend/app/prompts/__init__.py create mode 100644 backend/app/prompts/ideation.py create mode 100644 backend/app/prompts/novelty.py create mode 100644 backend/app/prompts/plan_render.py create mode 100644 backend/app/services/idea_service.py create mode 100644 backend/app/services/novelty_gate.py create mode 100644 docs/AI_SCIENTIST_V2_ADVANTAGES.md create mode 100644 docs/AI_SCIENTIST_V2_LEARNING_PLAN.md create mode 100644 tests/unit/test_idea_service.py create mode 100644 tests/unit/test_novelty_gate.py create mode 100644 tests/unit/test_research_models.py diff --git a/backend/app/core/database.py b/backend/app/core/database.py index 5452112..2ec5e2a 100644 --- a/backend/app/core/database.py +++ b/backend/app/core/database.py @@ -82,6 +82,7 @@ topic TEXT NOT NULL, status TEXT NOT NULL, config_json TEXT NOT NULL, + current_ideas_json TEXT NOT NULL DEFAULT '[]', task_id TEXT, created_at TEXT NOT NULL, updated_at TEXT NOT NULL @@ -140,6 +141,11 @@ def init_db() -> None: conn.execute("ALTER TABLE task_nodes ADD COLUMN position_x REAL") if "position_y" not in task_node_columns: conn.execute("ALTER TABLE task_nodes ADD COLUMN position_y REAL") + + cursor = conn.execute("PRAGMA table_info(conversations)") + conversation_columns = [row[1] for row in cursor.fetchall()] + if "current_ideas_json" not in conversation_columns: + conn.execute("ALTER TABLE conversations ADD COLUMN current_ideas_json TEXT NOT NULL DEFAULT '[]'") conn.commit() conn.commit() diff --git a/backend/app/deps.py b/backend/app/deps.py index 7982a8a..2aa8e8a 100644 --- a/backend/app/deps.py +++ b/backend/app/deps.py @@ -9,7 +9,9 @@ from app.services.conversation_agent import ConversationAgent from app.services.execution_engine import ExecutionEngine from app.services.four_agents.checking.agent import CheckingAgent +from app.services.idea_service import IdeaService from app.services.mcp_executor import MCPExecutor +from app.services.novelty_gate import NoveltyGateService from app.services.planner import MasterPlanner from app.services.progress_hub import ProgressHub from app.services.retrieval import RetrievalService @@ -25,6 +27,8 @@ analyst_service = AnalystService() writer_service = WriterService() mcp_executor = MCPExecutor() +idea_service = IdeaService(retrieval_service=retrieval_service) +novelty_gate_service = NoveltyGateService() research_agent = ResearchAgent(retrieval_service=retrieval_service, mcp_executor=mcp_executor) # 创建四 Agent 架构实例 @@ -56,6 +60,8 @@ evidence_repository=evidence_repository, report_agent=report_agent, planner=planner, + idea_service=idea_service, + novelty_gate_service=novelty_gate_service, ) execution_engine.set_event_listener(conversation_agent.on_task_event) diff --git a/backend/app/models/schemas.py b/backend/app/models/schemas.py index 37722b5..a1e1b04 100644 --- a/backend/app/models/schemas.py +++ b/backend/app/models/schemas.py @@ -3,7 +3,7 @@ from enum import StrEnum from typing import Any -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator class TaskStatus(StrEnum): @@ -53,6 +53,25 @@ class AgentStatus(StrEnum): WAITING_INPUT = "WAITING_INPUT" +class LLMProvider(StrEnum): + OPENROUTER = "openrouter" + DEEPSEEK = "deepseek" + OPENAI = "openai" + + +class ResearchMode(StrEnum): + SURVEY = "survey" + EVIDENCE_REPORT = "evidence_report" + EXPERIMENTAL_RESEARCH = "experimental_research" + PAPER_WRITEUP = "paper_writeup" + + +class IdeaStatus(StrEnum): + CANDIDATE = "CANDIDATE" + SELECTED = "SELECTED" + REJECTED = "REJECTED" + + class TaskConfig(BaseModel): maxDepth: int = Field(default=3, ge=1, le=8) maxNodes: int = Field(default=50, ge=1, le=500) @@ -60,7 +79,31 @@ class TaskConfig(BaseModel): default_factory=lambda: ["Web Search", "arXiv", "Semantic Scholar", "OpenAlex"] ) priority: int = Field(default=3, ge=1, le=5) + researchMode: ResearchMode = Field(default=ResearchMode.EVIDENCE_REPORT) + numReflections: int = Field(default=2, ge=1, le=6) + numInitialIdeas: int = Field(default=3, ge=1, le=8) + requiresNoveltyCheck: bool = False targetWordCount: int = Field(default=5000, ge=1000, le=50000) + llmProvider: LLMProvider = Field(default=LLMProvider.OPENROUTER) + + @model_validator(mode="before") + @classmethod + def _apply_research_defaults(cls, data: Any) -> Any: + if not isinstance(data, dict): + return data + payload = dict(data) + raw_mode = payload.get("researchMode", ResearchMode.EVIDENCE_REPORT) + try: + mode = raw_mode if isinstance(raw_mode, ResearchMode) else ResearchMode(str(raw_mode)) + except Exception: + mode = ResearchMode.EVIDENCE_REPORT + payload["researchMode"] = mode + if payload.get("requiresNoveltyCheck") is None or "requiresNoveltyCheck" not in payload: + payload["requiresNoveltyCheck"] = mode in { + ResearchMode.EXPERIMENTAL_RESEARCH, + ResearchMode.PAPER_WRITEUP, + } + return payload class CreateTaskRequest(BaseModel): @@ -147,6 +190,7 @@ class MessageRole(StrEnum): class MessageKind(StrEnum): USER_TEXT = "USER_TEXT" + ASSISTANT_TEXT = "ASSISTANT_TEXT" PLAN_DRAFT = "PLAN_DRAFT" PLAN_EDITED = "PLAN_EDITED" PLAN_REVISION = "PLAN_REVISION" @@ -195,6 +239,69 @@ class AgentStateRecord(BaseModel): error: str | None = None +class RelatedWorkItem(BaseModel): + title: str = Field(min_length=1, max_length=300) + summary: str = Field(default="", max_length=1000) + url: str = "" + relevanceScore: float = Field(default=0.0, ge=0.0, le=1.0) + + +class NoveltyAssessment(BaseModel): + summary: str = "" + noveltyScore: float = Field(default=0.0, ge=0.0, le=1.0) + isNovel: bool = False + similarWork: list[str] = Field(default_factory=list) + differentiationNotes: list[str] = Field(default_factory=list) + + +class FeasibilityAssessment(BaseModel): + summary: str = "" + feasibilityScore: float = Field(default=0.0, ge=0.0, le=1.0) + isFeasible: bool = False + blockers: list[str] = Field(default_factory=list) + assumptions: list[str] = Field(default_factory=list) + + +class ExperimentProposal(BaseModel): + title: str = Field(min_length=1, max_length=200) + objective: str = Field(default="", max_length=1000) + method: str = Field(default="", max_length=2000) + metrics: list[str] = Field(default_factory=list) + expectedOutcome: str = Field(default="", max_length=1000) + + +class RiskAssessment(BaseModel): + risk: str = Field(min_length=1, max_length=500) + severity: str = Field(default="medium", pattern="^(low|medium|high)$") + mitigation: str = Field(default="", max_length=1000) + + +class ResearchScoreCard(BaseModel): + noveltyScore: float = Field(default=0.0, ge=0.0, le=1.0) + feasibilityScore: float = Field(default=0.0, ge=0.0, le=1.0) + evidenceStrengthScore: float = Field(default=0.0, ge=0.0, le=1.0) + writeupReadinessScore: float = Field(default=0.0, ge=0.0, le=1.0) + overallScore: float = Field(default=0.0, ge=0.0, le=1.0) + + +class ResearchIdea(BaseModel): + ideaId: str + title: str = Field(min_length=1, max_length=200) + problemStatement: str = Field(default="", max_length=2000) + shortHypothesis: str = Field(default="", max_length=1000) + abstract: str = Field(default="", max_length=3000) + relatedWork: list[RelatedWorkItem] = Field(default_factory=list) + differentiators: list[str] = Field(default_factory=list) + noveltyAssessment: NoveltyAssessment = Field(default_factory=NoveltyAssessment) + feasibilityAssessment: FeasibilityAssessment = Field(default_factory=FeasibilityAssessment) + experimentProposals: list[ExperimentProposal] = Field(default_factory=list) + riskFactors: list[RiskAssessment] = Field(default_factory=list) + limitations: list[str] = Field(default_factory=list) + scoreCard: ResearchScoreCard = Field(default_factory=ResearchScoreCard) + sourceEvidenceIds: list[str] = Field(default_factory=list) + status: IdeaStatus = Field(default=IdeaStatus.CANDIDATE) + + class ResearchHypothesis(BaseModel): """研究假设模型。""" hypothesisId: str @@ -218,6 +325,7 @@ class ConversationDetail(ConversationSummary): messages: list[ConversationMessage] = Field(default_factory=list) agentStates: list[AgentStateRecord] = Field(default_factory=list) currentHypothesis: ResearchHypothesis | None = None + currentIdeas: list[ResearchIdea] = Field(default_factory=list) class CreateConversationRequest(BaseModel): @@ -258,6 +366,18 @@ class ConversationDeleteResponse(BaseModel): deleted: bool +class LLMOption(BaseModel): + provider: LLMProvider + label: str + model: str + configured: bool + + +class LLMSettingsResponse(BaseModel): + defaultProvider: LLMProvider + options: list[LLMOption] + + class ConversationBulkDeleteResponse(BaseModel): deleted: bool deletedCount: int diff --git a/backend/app/prompts/__init__.py b/backend/app/prompts/__init__.py new file mode 100644 index 0000000..b161e4d --- /dev/null +++ b/backend/app/prompts/__init__.py @@ -0,0 +1,2 @@ +"""Prompt builders for research ideation and plan rendering.""" + diff --git a/backend/app/prompts/ideation.py b/backend/app/prompts/ideation.py new file mode 100644 index 0000000..9d22c83 --- /dev/null +++ b/backend/app/prompts/ideation.py @@ -0,0 +1,30 @@ +from __future__ import annotations + + +def build_ideation_system_prompt(*, num_ideas: int, num_reflections: int) -> str: + return ( + "你是一名研究构思 Agent。请围绕用户主题提出结构化研究想法," + "输出必须是 JSON,不要输出 Markdown 或解释。\n" + f"请至少生成 {num_ideas} 个候选 idea,并在内部做 {num_reflections} 轮自我检查。\n" + "每个 idea 必须包含:title, problemStatement, shortHypothesis, abstract, " + "relatedWork, differentiators, experimentProposals, riskFactors, limitations。\n" + "relatedWork 必须是数组;experimentProposals 必须包含 title/objective/method/metrics/expectedOutcome;" + "riskFactors 必须包含 risk/severity/mitigation。\n" + '最终 JSON 形状固定为:{"ideas":[...]}。' + ) + + +def build_ideation_user_prompt( + *, + topic: str, + search_sources: list[str], + evidence_snippets: str, +) -> str: + return ( + f"研究主题:{topic}\n" + f"可用检索源:{', '.join(search_sources) or '无'}\n" + "请基于主题和已有线索提出多个具有区分度的候选研究 idea。\n" + "每个 idea 需要说明与现有工作的差异、核心假设、实验或验证方式,以及主要风险。\n" + f"首轮参考证据:\n{evidence_snippets or '- 暂无外部证据,需基于主题做保守构思。'}" + ) + diff --git a/backend/app/prompts/novelty.py b/backend/app/prompts/novelty.py new file mode 100644 index 0000000..23aa9f4 --- /dev/null +++ b/backend/app/prompts/novelty.py @@ -0,0 +1,21 @@ +from __future__ import annotations + + +def build_novelty_system_prompt() -> str: + return ( + "你是一名研究新颖性评审 Agent。请评估 idea 是否足够新颖且可执行。" + "输出必须是 JSON,不要输出解释。" + 'JSON 形状固定为:{"summary":"","noveltyScore":0.0,"isNovel":true,' + '"similarWork":[],"differentiationNotes":[],"feasibilitySummary":"","feasibilityScore":0.0,' + '"isFeasible":true,"blockers":[],"assumptions":[]}.' + ) + + +def build_novelty_user_prompt(*, topic: str, idea_json: str, evidence_snippets: str) -> str: + return ( + f"主题:{topic}\n" + f"候选 idea JSON:\n{idea_json}\n\n" + f"相关证据与工作线索:\n{evidence_snippets or '- 暂无'}\n\n" + "请识别该 idea 与已有工作的重叠、差异、不新颖风险,以及可执行性阻碍。" + ) + diff --git a/backend/app/prompts/plan_render.py b/backend/app/prompts/plan_render.py new file mode 100644 index 0000000..d1bfe09 --- /dev/null +++ b/backend/app/prompts/plan_render.py @@ -0,0 +1,18 @@ +from __future__ import annotations + + +def build_plan_render_system_prompt() -> str: + return ( + "你是研究计划渲染 Agent。请把结构化研究 idea 渲染成可执行 Markdown 研究方案。" + "输出必须是完整 Markdown,并包含 front matter。不要输出额外解释。" + ) + + +def build_plan_render_user_prompt(*, topic: str, config_summary: str, idea_json: str) -> str: + return ( + f"主题:{topic}\n" + f"配置:{config_summary}\n" + f"结构化 idea:\n{idea_json}\n\n" + "请输出包含 front matter 的完整研究计划,正文至少覆盖:研究目标、研究问题拆解、" + "方法与来源、执行步骤、风险与边界、交付标准。" + ) diff --git a/backend/app/repositories/conversation_repository.py b/backend/app/repositories/conversation_repository.py index 78310b6..db4eb31 100644 --- a/backend/app/repositories/conversation_repository.py +++ b/backend/app/repositories/conversation_repository.py @@ -15,6 +15,7 @@ MessageKind, MessageRole, PlanRevision, + ResearchIdea, TaskConfig, ) @@ -48,11 +49,11 @@ def create_conversation( conn.execute( """ INSERT INTO conversations( - conversation_id, topic, status, config_json, task_id, created_at, updated_at - ) VALUES(?, ?, ?, ?, ?, ?, ?) + conversation_id, topic, status, config_json, current_ideas_json, task_id, created_at, updated_at + ) VALUES(?, ?, ?, ?, ?, ?, ?, ?) """, (conversation_id, topic, status.value, - config.model_dump_json(), None, ts, ts), + config.model_dump_json(), "[]", None, ts, ts), ) conn.commit() return self.get_summary(conversation_id) @@ -132,6 +133,40 @@ def set_task_id(self, conversation_id: str, task_id: str) -> None: if conn.total_changes == 0: raise KeyError(conversation_id) + def set_current_ideas(self, conversation_id: str, ideas: list[ResearchIdea]) -> None: + self.get_summary(conversation_id) + with get_connection() as conn: + conn.execute( + """ + UPDATE conversations + SET current_ideas_json = ?, updated_at = ? + WHERE conversation_id = ? + """, + ( + json.dumps([idea.model_dump(mode="json") for idea in ideas], ensure_ascii=False), + now_iso(), + conversation_id, + ), + ) + conn.commit() + + def get_current_ideas(self, conversation_id: str) -> list[ResearchIdea]: + with get_connection() as conn: + row = conn.execute( + "SELECT current_ideas_json FROM conversations WHERE conversation_id = ?", + (conversation_id,), + ).fetchone() + if row is None: + raise KeyError(conversation_id) + raw_items = json.loads(row["current_ideas_json"] or "[]") + if not isinstance(raw_items, list): + return [] + return [ + ResearchIdea.model_validate(item) + for item in raw_items + if isinstance(item, dict) + ] + def find_by_task_id(self, task_id: str) -> ConversationSummary | None: with get_connection() as conn: row = conn.execute( @@ -581,4 +616,5 @@ def get_detail(self, conversation_id: str) -> ConversationDetail: currentPlan=self.get_current_plan(conversation_id), messages=messages, agentStates=self._derive_agent_states_from_messages(messages), + currentIdeas=self.get_current_ideas(conversation_id), ) diff --git a/backend/app/services/conversation_agent.py b/backend/app/services/conversation_agent.py index 0497b87..9a26b62 100644 --- a/backend/app/services/conversation_agent.py +++ b/backend/app/services/conversation_agent.py @@ -19,15 +19,20 @@ MessageRole, NodeStatus, PlanRevision, + ResearchIdea, + ResearchMode, RunConversationResponse, TaskConfig, TaskStatus, ) +from app.prompts.plan_render import build_plan_render_system_prompt, build_plan_render_user_prompt from app.repositories.conversation_repository import ConversationRepository from app.repositories.evidence_repository import EvidenceRepository from app.repositories.task_repository import TaskRepository from app.services.agents import ReportAgent from app.services.execution_engine import ExecutionEngine +from app.services.idea_service import IdeaService +from app.services.novelty_gate import NoveltyGateService from app.services.planner import MasterPlanner @@ -106,6 +111,8 @@ def __init__( evidence_repository: EvidenceRepository | None = None, report_agent: ReportAgent | None = None, planner: MasterPlanner | None = None, + idea_service: IdeaService | None = None, + novelty_gate_service: NoveltyGateService | None = None, ) -> None: self.repository = repository self.task_repository = task_repository @@ -113,6 +120,8 @@ def __init__( self.evidence_repository = evidence_repository self.report_agent = report_agent self.planner = planner + self.idea_service = idea_service or IdeaService() + self.novelty_gate_service = novelty_gate_service or NoveltyGateService() def _build_task_payload( self, @@ -222,7 +231,12 @@ async def create_conversation(self, *, topic: str, config: TaskConfig | None = N content=self._build_ack_message(topic=topic, config=selected_config), metadata={"stage": "ACKNOWLEDGED"}, ) - markdown = await asyncio.to_thread(self._generate_initial_plan, topic=topic, config=selected_config) + ideas, markdown = await asyncio.to_thread( + self._prepare_initial_conversation_assets, + topic=topic, + config=selected_config, + ) + self.repository.set_current_ideas(conversation_id, ideas) revision = self.repository.add_plan_revision( conversation_id, author=MessageRole.ASSISTANT, @@ -441,12 +455,14 @@ async def _apply_plan_revision( content=instruction, ) config = self.repository.get_config(conversation_id) + selected_idea = self._selected_idea_for_conversation(conversation_id) revised = await asyncio.to_thread( self._generate_revised_plan, topic=topic, config=config, current_plan=current_plan, instruction=instruction, + selected_idea=selected_idea, ) revision = self.repository.add_plan_revision( conversation_id, @@ -1019,27 +1035,11 @@ def _parse_sources(raw: str) -> list[str]: return [part for part in parts if part] def _generate_initial_plan(self, *, topic: str, config: TaskConfig) -> str: - prompt = ( - "请为用户生成一个可执行的研究方案,输出必须是 Markdown,并且必须包含 front matter。\n" - "front matter 字段固定为:title, topic, max_depth, max_nodes, priority, search_sources, target_word_count, llm_provider。\n" - "正文至少包含:研究目标、研究问题拆解、方法与来源、执行步骤、风险与边界、交付标准。\n" - "严禁输出解释性前言,直接返回完整 Markdown。" - ) - user_input = ( - f"主题:{topic}\n" - f"配置建议:max_depth={config.maxDepth}, max_nodes={config.maxNodes}, " - f"priority={config.priority}, search_sources={config.searchSources}, " - f"target_word_count={config.targetWordCount}, llm_provider={config.llmProvider.value}\n" - "输出语言:中文。" - ) - generated = self._chat_complete( - system_prompt=prompt, user_prompt=user_input, provider=config.llmProvider) - if generated: - normalized = self._ensure_front_matter( - generated, topic=topic, config=config) - if normalized: - return normalized - return self._fallback_plan(topic=topic, config=config) + ideas = self._generate_initial_ideas(topic=topic, config=config) + selected = self._select_primary_idea(ideas) + if selected is None: + return self._fallback_plan(topic=topic, config=config) + return self._render_plan_from_idea(topic=topic, config=config, idea=selected) def _generate_revised_plan( self, @@ -1048,16 +1048,22 @@ def _generate_revised_plan( config: TaskConfig, current_plan: str, instruction: str, + selected_idea: ResearchIdea | None = None, ) -> str: prompt = ( "你是研究计划修订 Agent。请根据用户指令修订「当前研究方案」。\n" "输出必须是完整 Markdown,且必须包含完整 front matter。\n" "不要解释你做了什么,不要输出多余文本,只返回最终方案。" ) + selected_idea_text = ( + f"\n\n当前选中的结构化 idea:\n{selected_idea.model_dump_json(indent=2)}" + if selected_idea is not None + else "" + ) user_input = ( f"主题:{topic}\n" f"用户指令:{instruction}\n\n" - f"当前方案如下:\n{current_plan}\n\n" + f"当前方案如下:\n{current_plan}{selected_idea_text}\n\n" f"保底配置:max_depth={config.maxDepth}, max_nodes={config.maxNodes}, " f"priority={config.priority}, search_sources={config.searchSources}, " f"target_word_count={config.targetWordCount}, llm_provider={config.llmProvider.value}" @@ -1069,8 +1075,90 @@ def _generate_revised_plan( generated, topic=topic, config=config) if normalized: return normalized + if selected_idea is not None: + rendered = self._render_plan_from_idea(topic=topic, config=config, idea=selected_idea) + if rendered.strip(): + return rendered return self._fallback_revision(current_plan=current_plan, instruction=instruction, topic=topic, config=config) + def _prepare_initial_conversation_assets( + self, + *, + topic: str, + config: TaskConfig, + ) -> tuple[list[ResearchIdea], str]: + ideas = self._generate_initial_ideas(topic=topic, config=config) + selected = self._select_primary_idea(ideas) + markdown = self._render_plan_from_idea( + topic=topic, + config=config, + idea=selected, + ) if selected is not None else self._fallback_plan(topic=topic, config=config) + return ideas, markdown + + def _generate_initial_ideas(self, *, topic: str, config: TaskConfig) -> list[ResearchIdea]: + ideas, evidences = self.idea_service.generate_ideas(topic=topic, config=config) + if not ideas: + ideas = [] + evaluated = self.novelty_gate_service.evaluate_ideas( + topic=topic, + ideas=ideas, + evidences=evidences, + llm_provider=config.llmProvider, + enforce_thresholds=config.requiresNoveltyCheck, + ) + return evaluated or ideas + + def _render_plan_from_idea( + self, + *, + topic: str, + config: TaskConfig, + idea: ResearchIdea | None, + ) -> str: + if idea is None: + return self._fallback_plan(topic=topic, config=config) + if config.researchMode in {ResearchMode.SURVEY, ResearchMode.EVIDENCE_REPORT}: + return self._fallback_plan_from_idea(topic=topic, config=config, idea=idea) + config_summary = ( + f"max_depth={config.maxDepth}, max_nodes={config.maxNodes}, priority={config.priority}, " + f"research_mode={config.researchMode.value}, search_sources={config.searchSources}, " + f"target_word_count={config.targetWordCount}, llm_provider={config.llmProvider.value}" + ) + generated = self._chat_complete( + system_prompt=build_plan_render_system_prompt(), + user_prompt=build_plan_render_user_prompt( + topic=topic, + config_summary=config_summary, + idea_json=idea.model_dump_json(indent=2), + ), + provider=config.llmProvider, + ) + if generated: + normalized = self._ensure_front_matter( + generated, + topic=idea.title or topic, + config=config, + ) + if normalized: + return normalized + return self._fallback_plan_from_idea(topic=topic, config=config, idea=idea) + + def _select_primary_idea(self, ideas: list[ResearchIdea]) -> ResearchIdea | None: + if not ideas: + return None + selected = [idea for idea in ideas if idea.status.value == "SELECTED"] + if selected: + return selected[0] + return max(ideas, key=lambda idea: idea.scoreCard.overallScore) + + def _selected_idea_for_conversation(self, conversation_id: str) -> ResearchIdea | None: + try: + ideas = self.repository.get_current_ideas(conversation_id) + except KeyError: + return None + return self._select_primary_idea(ideas) + def _chat_complete( self, *, @@ -1258,6 +1346,56 @@ def _fallback_plan(*, topic: str, config: TaskConfig) -> str: "- 关键结论标注证据引用并给出行动建议。\n" ) + @staticmethod + def _fallback_plan_from_idea(*, topic: str, config: TaskConfig, idea: ResearchIdea) -> str: + related_work = "\n".join( + f"- {item.title}:{item.summary or '已有工作线索。'}" + for item in idea.relatedWork[:3] + ) or "- 首轮尚未形成稳定的相关工作清单,后续需补充检索。" + differentiators = "\n".join( + f"- {item}" for item in idea.differentiators[:4] + ) or "- 需要在执行阶段补充与已有工作的差异化说明。" + experiments = "\n".join( + f"{index + 1}. {proposal.title}:{proposal.objective or proposal.method or '补充验证方案。'}" + for index, proposal in enumerate(idea.experimentProposals[:4]) + ) or "1. 围绕核心假设补充验证路径与评估指标。" + risks = "\n".join( + f"- {item.risk}({item.severity}):{item.mitigation}" + for item in idea.riskFactors[:4] + ) or "- 需进一步确认相关工作重叠度与证据充分性。" + limitations = "\n".join( + f"- {item}" for item in idea.limitations[:4] + ) or "- 当前为首轮结构化 idea,后续还需补充更多证据。" + + return ( + "---\n" + f"title: {idea.title or topic}\n" + f"topic: {topic}\n" + f"max_depth: {config.maxDepth}\n" + f"max_nodes: {config.maxNodes}\n" + f"priority: {config.priority}\n" + f"search_sources: [{', '.join(config.searchSources)}]\n" + f"target_word_count: {config.targetWordCount}\n" + f"llm_provider: {config.llmProvider.value}\n" + "---\n\n" + "## 研究目标\n" + f"{idea.problemStatement or f'围绕“{topic}”形成结构化研究入口。'}\n\n" + "## 研究问题拆解\n" + f"1. 核心假设:{idea.shortHypothesis or f'识别“{topic}”的关键问题。'}\n" + "2. 该主题与已有工作的重叠和差异在哪里。\n" + "3. 应如何设计验证路径并明确结论边界。\n\n" + "## 方法与来源\n" + f"{related_work}\n\n" + "## 执行步骤\n" + f"{experiments}\n\n" + "## 风险与边界\n" + f"{risks}\n" + f"{limitations}\n\n" + "## 交付标准\n" + f"{differentiators}\n" + "- 输出兼容当前执行引擎的 Markdown 研究方案与最终报告。\n" + ) + def _fallback_revision(self, *, current_plan: str, instruction: str, topic: str, config: TaskConfig) -> str: normalized = self._ensure_front_matter( current_plan, topic=topic, config=config) diff --git a/backend/app/services/four_agents/ideation_agent.py b/backend/app/services/four_agents/ideation_agent.py index d9552af..d8c2156 100644 --- a/backend/app/services/four_agents/ideation_agent.py +++ b/backend/app/services/four_agents/ideation_agent.py @@ -1,9 +1,10 @@ -"""构思智能体 - 负责文献调研、研究方向分析和研究假设生成。""" +"""构思智能体 - 负责文献调研、研究方向分析和候选 idea 生成。""" from __future__ import annotations -from app.models.schemas import AgentType, ResearchHypothesis +from app.models.schemas import AgentType, ResearchHypothesis, TaskConfig from app.services.four_agents.base import AgentContext, AgentResult, BaseAgent +from app.services.idea_service import IdeaService from app.services.retrieval import RetrievalService @@ -25,6 +26,7 @@ def __init__( ) -> None: super().__init__(on_progress) self.retrieval = retrieval_service + self.idea_service = IdeaService(retrieval_service) async def run(self, context: AgentContext) -> AgentResult: """执行构思阶段任务。 @@ -49,15 +51,24 @@ async def run(self, context: AgentContext) -> AgentResult: directions = await self._analyze_directions(context.topic, evidences) self._set_progress(70, f"识别到 {len(directions)} 个研究方向") - # 3. 生成研究假设 - self._set_progress(80, "生成研究假设") + # 3. 生成候选 idea 与兼容性假设 + self._set_progress(80, "生成候选研究 idea") + raw_task_config = context.config.get("taskConfig") + config = TaskConfig.model_validate( + raw_task_config if isinstance(raw_task_config, dict) else context.config + ) + ideas, _ = self.idea_service.generate_ideas( + topic=context.topic, + config=config, + ) hypothesis = await self._generate_hypothesis(context, evidences, directions) - self._set_progress(95, "假设生成完成") + self._set_progress(95, f"已生成 {len(ideas)} 个 idea") return AgentResult( success=True, output={ "hypothesis": hypothesis.model_dump(), + "ideas": [idea.model_dump(mode="json") for idea in ideas], "evidence_count": len(evidences), "directions": directions } diff --git a/backend/app/services/four_agents/planning_agent.py b/backend/app/services/four_agents/planning_agent.py index cf586f9..ef34bec 100644 --- a/backend/app/services/four_agents/planning_agent.py +++ b/backend/app/services/four_agents/planning_agent.py @@ -2,7 +2,7 @@ from __future__ import annotations -from app.models.schemas import AgentType, ResearchHypothesis, ResearchPlan +from app.models.schemas import AgentType, ResearchHypothesis, ResearchIdea, ResearchPlan from app.services.four_agents.base import AgentContext, AgentResult, BaseAgent @@ -30,6 +30,7 @@ async def run(self, context: AgentContext) -> AgentResult: 包含 ResearchPlan 的执行结果。 """ hypothesis_data = context.config.get("hypothesis") + ideas_data = context.config.get("ideas", []) if not hypothesis_data: return AgentResult( success=False, @@ -47,6 +48,8 @@ async def run(self, context: AgentContext) -> AgentResult: feasibility = self._assess_feasibility(hypothesis, context) self._set_progress(50, "生成研究方案") + selected_idea = self._select_idea(ideas_data) + # 生成方案 plan = await self._create_plan(hypothesis, context, feasibility) self._set_progress(90, "方案生成完成") @@ -55,7 +58,8 @@ async def run(self, context: AgentContext) -> AgentResult: success=True, output={ "plan": plan.model_dump(), - "feasibility": feasibility + "feasibility": feasibility, + "selectedIdea": selected_idea.model_dump(mode="json") if selected_idea else None, } ) @@ -124,4 +128,21 @@ async def _create_plan( hypothesisId=hypothesis.hypothesisId, steps=steps, createdAt=datetime.utcnow().isoformat() - ) \ No newline at end of file + ) + + @staticmethod + def _select_idea(ideas_data: list[dict]) -> ResearchIdea | None: + ideas: list[ResearchIdea] = [] + for item in ideas_data: + if not isinstance(item, dict): + continue + try: + ideas.append(ResearchIdea.model_validate(item)) + except Exception: + continue + if not ideas: + return None + selected = [idea for idea in ideas if idea.status.value == "SELECTED"] + if selected: + return selected[0] + return max(ideas, key=lambda idea: idea.scoreCard.overallScore) diff --git a/backend/app/services/idea_service.py b/backend/app/services/idea_service.py new file mode 100644 index 0000000..a018e05 --- /dev/null +++ b/backend/app/services/idea_service.py @@ -0,0 +1,271 @@ +from __future__ import annotations + +import asyncio +import json +from typing import Any + +import httpx + +from app.core.config import settings +from app.core.utils import new_id +from app.models.schemas import ( + ExperimentProposal, + Evidence, + IdeaStatus, + LLMProvider, + RelatedWorkItem, + ResearchIdea, + ResearchMode, + ResearchScoreCard, + RiskAssessment, + TaskConfig, +) +from app.prompts.ideation import build_ideation_system_prompt, build_ideation_user_prompt +from app.services.retrieval import RetrievalService + + +class IdeaService: + def __init__(self, retrieval_service: RetrievalService | None = None) -> None: + self.retrieval_service = retrieval_service + + def generate_ideas(self, *, topic: str, config: TaskConfig) -> tuple[list[ResearchIdea], list[Evidence]]: + evidences = self._collect_seed_evidence(topic=topic, config=config) + llm_ideas: list[ResearchIdea] = [] + if config.researchMode in {ResearchMode.EXPERIMENTAL_RESEARCH, ResearchMode.PAPER_WRITEUP}: + llm_ideas = self._generate_with_llm(topic=topic, config=config, evidences=evidences) + ideas = llm_ideas or self._fallback_ideas(topic=topic, config=config, evidences=evidences) + return ideas[: config.numInitialIdeas], evidences + + def _collect_seed_evidence(self, *, topic: str, config: TaskConfig) -> list[Evidence]: + if self.retrieval_service is None: + return [] + try: + asyncio.get_running_loop() + return [] + except RuntimeError: + pass + try: + results = asyncio.run( + self.retrieval_service.retrieve( + task_id="conversation-idea", + node_id="conversation-idea", + query=topic, + sources=config.searchSources, + ) + ) + return results[:6] + except Exception: + return [] + + def _generate_with_llm( + self, + *, + topic: str, + config: TaskConfig, + evidences: list[Evidence], + ) -> list[ResearchIdea]: + if settings.use_mock_sources: + return [] + base_url, api_key, model = self._resolve_provider(config.llmProvider) + if not base_url or not api_key: + return [] + + evidence_snippets = self._evidence_snippets(evidences) + system_prompt = build_ideation_system_prompt( + num_ideas=config.numInitialIdeas, + num_reflections=config.numReflections, + ) + user_prompt = build_ideation_user_prompt( + topic=topic, + search_sources=config.searchSources, + evidence_snippets=evidence_snippets, + ) + try: + with httpx.Client(timeout=settings.llm_timeout_medium) as client: + response = client.post( + f"{base_url}/chat/completions", + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + json={ + "model": model, + "temperature": 0.4, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + }, + ) + response.raise_for_status() + content = ( + response.json().get("choices", [{}])[0] + .get("message", {}) + .get("content", "") + .strip() + ) + except Exception: + return [] + + payload = self._extract_json_payload(content) + ideas_raw = payload.get("ideas") if isinstance(payload, dict) else None + if not isinstance(ideas_raw, list): + return [] + normalized: list[ResearchIdea] = [] + for index, item in enumerate(ideas_raw[: config.numInitialIdeas]): + if not isinstance(item, dict): + continue + normalized.append( + self._normalize_idea_payload( + payload=item, + topic=topic, + fallback_index=index, + evidences=evidences, + ) + ) + return normalized + + def _fallback_ideas( + self, + *, + topic: str, + config: TaskConfig, + evidences: list[Evidence], + ) -> list[ResearchIdea]: + evidence_ids = [ev.id for ev in evidences[:4]] + evidence_titles = [ev.metadata.title for ev in evidences[:3] if ev.metadata.title] + directions = [ + "现状与瓶颈", + "方法机制与改进空间", + "评估体系与风险边界", + "落地条件与组织影响", + "与现有方案的差异化价值", + ] + ideas: list[ResearchIdea] = [] + for index in range(config.numInitialIdeas): + direction = directions[index % len(directions)] + title = f"{topic}:{direction}" + related = [ + RelatedWorkItem( + title=evidence_titles[i] if i < len(evidence_titles) else f"{topic} 相关工作线索 {i + 1}", + summary=f"与“{title}”相关的已有研究线索,用于后续 novelty 对照。", + relevanceScore=round(max(0.35, 0.72 - i * 0.1), 2), + ) + for i in range(min(2, max(1, len(evidence_titles) or 1))) + ] + ideas.append( + ResearchIdea( + ideaId=new_id(), + title=title, + problemStatement=f"围绕“{topic}”识别{direction},建立可验证的问题定义与分析边界。", + shortHypothesis=f"如果围绕“{direction}”组织研究链路,可以更系统地回答“{topic}”的核心问题。", + abstract=( + f"该 idea 聚焦“{topic}”的{direction},通过相关工作对照、证据归纳和实验/验证设想," + "形成可执行的研究入口。" + ), + relatedWork=related, + differentiators=[ + f"强调“{direction}”作为独立切入点,而不是泛泛罗列资料。", + "要求把差异点、风险和验证方法前置到研究入口。", + ], + experimentProposals=[ + ExperimentProposal( + title=f"{direction}验证方案", + objective=f"验证“{topic}”在“{direction}”上的关键判断是否成立。", + method="整理代表性案例、提炼对照维度,并用统一指标比较优劣。", + metrics=["结论一致性", "证据覆盖度", "实施复杂度"], + expectedOutcome="得到可写入研究方案的核心判断和待验证假设。", + ) + ], + riskFactors=[ + RiskAssessment( + risk="相关工作重叠度可能偏高", + severity="medium", + mitigation="在 novelty gate 中补充与已有工作的差异化对照。", + ) + ], + limitations=[ + "首轮 idea 仍基于有限证据线索,后续需补充更系统的相关工作检查。" + ], + scoreCard=ResearchScoreCard(), + sourceEvidenceIds=evidence_ids, + status=IdeaStatus.CANDIDATE, + ) + ) + return ideas + + def _normalize_idea_payload( + self, + *, + payload: dict[str, Any], + topic: str, + fallback_index: int, + evidences: list[Evidence], + ) -> ResearchIdea: + related_raw = payload.get("relatedWork") + experiments_raw = payload.get("experimentProposals") + risks_raw = payload.get("riskFactors") + related = [ + RelatedWorkItem.model_validate(item) + for item in related_raw + if isinstance(item, dict) + ] if isinstance(related_raw, list) else [] + experiments = [ + ExperimentProposal.model_validate(item) + for item in experiments_raw + if isinstance(item, dict) + ] if isinstance(experiments_raw, list) else [] + risks = [ + RiskAssessment.model_validate(item) + for item in risks_raw + if isinstance(item, dict) + ] if isinstance(risks_raw, list) else [] + + return ResearchIdea( + ideaId=str(payload.get("ideaId") or new_id()), + title=str(payload.get("title") or f"{topic} 候选方案 {fallback_index + 1}")[:200], + problemStatement=str(payload.get("problemStatement") or f"围绕“{topic}”构建问题定义。")[:2000], + shortHypothesis=str(payload.get("shortHypothesis") or f"该 idea 用于回答“{topic}”的关键研究问题。")[:1000], + abstract=str(payload.get("abstract") or f"围绕“{topic}”形成结构化研究构想。")[:3000], + relatedWork=related, + differentiators=[str(item)[:300] for item in payload.get("differentiators", []) if str(item).strip()], + experimentProposals=experiments, + riskFactors=risks, + limitations=[str(item)[:300] for item in payload.get("limitations", []) if str(item).strip()], + sourceEvidenceIds=[ev.id for ev in evidences[:4]], + status=IdeaStatus.CANDIDATE, + ) + + @staticmethod + def _extract_json_payload(content: str) -> dict[str, Any]: + text = content.strip() + if text.startswith("```"): + lines = text.splitlines() + if len(lines) >= 3: + text = "\n".join(lines[1:-1]).strip() + try: + payload = json.loads(text) + return payload if isinstance(payload, dict) else {} + except Exception: + return {} + + @staticmethod + def _evidence_snippets(evidences: list[Evidence]) -> str: + if not evidences: + return "" + return "\n".join( + f"- [{ev.id}] {ev.metadata.title} | {ev.metadata.publishDate or '未知'} | {ev.metadata.abstract[:180]}" + for ev in evidences[:4] + ) + + @staticmethod + def _resolve_provider(provider: LLMProvider | str | None = None) -> tuple[str, str, str]: + selected = (provider.value if isinstance(provider, LLMProvider) else provider) or settings.default_llm_provider + provider_name = selected.lower().strip() + if provider_name == "openrouter": + return settings.openrouter_base_url, settings.openrouter_api_key, settings.openrouter_model + if provider_name == "deepseek": + return settings.deepseek_base_url, settings.deepseek_api_key, settings.deepseek_model + if provider_name == "openai": + return settings.openai_base_url, settings.openai_api_key, settings.openai_model + return "", "", "" diff --git a/backend/app/services/novelty_gate.py b/backend/app/services/novelty_gate.py new file mode 100644 index 0000000..c1a4cec --- /dev/null +++ b/backend/app/services/novelty_gate.py @@ -0,0 +1,241 @@ +from __future__ import annotations + +import json + +import httpx + +from app.core.config import settings +from app.models.schemas import ( + Evidence, + FeasibilityAssessment, + IdeaStatus, + LLMProvider, + NoveltyAssessment, + ResearchIdea, + ResearchScoreCard, +) +from app.prompts.novelty import build_novelty_system_prompt, build_novelty_user_prompt + + +class NoveltyGateService: + NOVELTY_THRESHOLD = 0.55 + FEASIBILITY_THRESHOLD = 0.5 + + def evaluate_ideas( + self, + *, + topic: str, + ideas: list[ResearchIdea], + evidences: list[Evidence], + llm_provider: LLMProvider | str | None = None, + enforce_thresholds: bool = True, + ) -> list[ResearchIdea]: + evaluated = [ + self._evaluate_single_idea( + topic=topic, + idea=idea, + evidences=evidences, + llm_provider=llm_provider, + enforce_thresholds=enforce_thresholds, + ) + for idea in ideas + ] + if not evaluated: + return [] + + best = max(evaluated, key=lambda idea: idea.scoreCard.overallScore) + selected_id = best.ideaId + normalized: list[ResearchIdea] = [] + for idea in evaluated: + if idea.ideaId == selected_id: + normalized.append(idea.model_copy(update={"status": IdeaStatus.SELECTED})) + continue + status = IdeaStatus.REJECTED if enforce_thresholds and not ( + idea.noveltyAssessment.noveltyScore >= self.NOVELTY_THRESHOLD + and idea.feasibilityAssessment.feasibilityScore >= self.FEASIBILITY_THRESHOLD + ) else IdeaStatus.CANDIDATE + normalized.append(idea.model_copy(update={"status": status})) + return normalized + + def _evaluate_single_idea( + self, + *, + topic: str, + idea: ResearchIdea, + evidences: list[Evidence], + llm_provider: LLMProvider | str | None, + enforce_thresholds: bool, + ) -> ResearchIdea: + llm_assessment = self._evaluate_with_llm( + topic=topic, + idea=idea, + evidences=evidences, + llm_provider=llm_provider, + ) + + evidence_strength = min(1.0, 0.3 + 0.12 * min(len(idea.sourceEvidenceIds or evidences), 4)) + writeup_readiness = min( + 1.0, + 0.35 + + (0.15 if idea.abstract.strip() else 0.0) + + (0.15 if idea.problemStatement.strip() else 0.0) + + (0.15 if idea.shortHypothesis.strip() else 0.0) + + 0.05 * min(len(idea.limitations), 2), + ) + + novelty = llm_assessment["novelty"].noveltyScore if llm_assessment else min( + 1.0, + 0.34 + + 0.08 * min(len(idea.differentiators), 3) + + 0.05 * min(len(idea.experimentProposals), 2) + + self._title_variation_boost(idea.title), + ) + feasibility = llm_assessment["feasibility"].feasibilityScore if llm_assessment else min( + 1.0, + 0.32 + + 0.1 * min(len(idea.experimentProposals), 3) + + 0.06 * min(len(idea.limitations), 2) + + 0.08 * min(len(idea.riskFactors), 2) + + 0.08 * evidence_strength, + ) + novelty_assessment = llm_assessment["novelty"] if llm_assessment else NoveltyAssessment( + summary="基于差异点、相关工作和验证方案做了启发式 novelty 评估。", + noveltyScore=round(novelty, 4), + isNovel=novelty >= self.NOVELTY_THRESHOLD if enforce_thresholds else True, + similarWork=[item.title for item in idea.relatedWork[:2]], + differentiationNotes=idea.differentiators[:3], + ) + feasibility_assessment = llm_assessment["feasibility"] if llm_assessment else FeasibilityAssessment( + summary="基于验证方案、限制与风险因素做了启发式 feasibility 评估。", + feasibilityScore=round(feasibility, 4), + isFeasible=feasibility >= self.FEASIBILITY_THRESHOLD if enforce_thresholds else True, + blockers=[], + assumptions=["后续仍需补充更多相关工作和证据。"], + ) + overall = round( + novelty_assessment.noveltyScore * 0.35 + + feasibility_assessment.feasibilityScore * 0.3 + + evidence_strength * 0.2 + + writeup_readiness * 0.15, + 4, + ) + status = IdeaStatus.CANDIDATE + if enforce_thresholds and ( + novelty_assessment.noveltyScore < self.NOVELTY_THRESHOLD + or feasibility_assessment.feasibilityScore < self.FEASIBILITY_THRESHOLD + ): + status = IdeaStatus.REJECTED + + return idea.model_copy( + update={ + "noveltyAssessment": novelty_assessment, + "feasibilityAssessment": feasibility_assessment, + "scoreCard": ResearchScoreCard( + noveltyScore=round(novelty_assessment.noveltyScore, 4), + feasibilityScore=round(feasibility_assessment.feasibilityScore, 4), + evidenceStrengthScore=round(evidence_strength, 4), + writeupReadinessScore=round(writeup_readiness, 4), + overallScore=overall, + ), + "status": status, + } + ) + + def _evaluate_with_llm( + self, + *, + topic: str, + idea: ResearchIdea, + evidences: list[Evidence], + llm_provider: LLMProvider | str | None, + ) -> dict[str, NoveltyAssessment | FeasibilityAssessment] | None: + if settings.use_mock_sources: + return None + base_url, api_key, model = self._resolve_provider(llm_provider) + if not base_url or not api_key: + return None + try: + with httpx.Client(timeout=settings.llm_timeout_short) as client: + response = client.post( + f"{base_url}/chat/completions", + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + json={ + "model": model, + "temperature": 0.2, + "messages": [ + {"role": "system", "content": build_novelty_system_prompt()}, + { + "role": "user", + "content": build_novelty_user_prompt( + topic=topic, + idea_json=idea.model_dump_json(indent=2), + evidence_snippets=self._evidence_snippets(evidences), + ), + }, + ], + }, + ) + response.raise_for_status() + content = ( + response.json().get("choices", [{}])[0] + .get("message", {}) + .get("content", "") + .strip() + ) + payload = self._extract_json(content) + novelty = NoveltyAssessment( + summary=str(payload.get("summary") or ""), + noveltyScore=float(payload.get("noveltyScore") or 0.0), + isNovel=bool(payload.get("isNovel")), + similarWork=[str(item) for item in payload.get("similarWork", []) if str(item).strip()], + differentiationNotes=[str(item) for item in payload.get("differentiationNotes", []) if str(item).strip()], + ) + feasibility = FeasibilityAssessment( + summary=str(payload.get("feasibilitySummary") or payload.get("summary") or ""), + feasibilityScore=float(payload.get("feasibilityScore") or 0.0), + isFeasible=bool(payload.get("isFeasible")), + blockers=[str(item) for item in payload.get("blockers", []) if str(item).strip()], + assumptions=[str(item) for item in payload.get("assumptions", []) if str(item).strip()], + ) + return {"novelty": novelty, "feasibility": feasibility} + except Exception: + return None + + @staticmethod + def _extract_json(content: str) -> dict: + text = content.strip() + if text.startswith("```"): + lines = text.splitlines() + if len(lines) >= 3: + text = "\n".join(lines[1:-1]).strip() + try: + payload = json.loads(text) + return payload if isinstance(payload, dict) else {} + except Exception: + return {} + + @staticmethod + def _title_variation_boost(title: str) -> float: + return (sum(ord(ch) for ch in title[:24]) % 12) / 100 + + @staticmethod + def _evidence_snippets(evidences: list[Evidence]) -> str: + return "\n".join( + f"- {ev.metadata.title} | {ev.metadata.publishDate or '未知'}" + for ev in evidences[:4] + ) + + @staticmethod + def _resolve_provider(provider: LLMProvider | str | None = None) -> tuple[str, str, str]: + selected = (provider.value if isinstance(provider, LLMProvider) else provider) or settings.default_llm_provider + provider_name = selected.lower().strip() + if provider_name == "openrouter": + return settings.openrouter_base_url, settings.openrouter_api_key, settings.openrouter_model + if provider_name == "deepseek": + return settings.deepseek_base_url, settings.deepseek_api_key, settings.deepseek_model + if provider_name == "openai": + return settings.openai_base_url, settings.openai_api_key, settings.openai_model + return "", "", "" diff --git a/backend/app/services/writer.py b/backend/app/services/writer.py index cee22b5..4644403 100644 --- a/backend/app/services/writer.py +++ b/backend/app/services/writer.py @@ -10,7 +10,7 @@ import httpx from app.core.config import settings -from app.models.schemas import Citation, Evidence, ReportDraft, SectionDraft, WritingSectionPlan +from app.models.schemas import Citation, Evidence, LLMProvider, ReportDraft, SectionDraft, WritingSectionPlan logger = logging.getLogger(__name__) @@ -95,6 +95,7 @@ def write_report( locked_sections: set[str] | None = None, blueprint: ReportBlueprint | None = None, report_body: str | None = None, + llm_provider: LLMProvider | str | None = None, ) -> tuple[str, str, dict[str, Citation]]: """生成研究文章和引用列表两个独立的文件。 @@ -116,6 +117,7 @@ def write_report( sections=sections, evidences=evidences, blueprint=blueprint, + llm_provider=llm_provider, ) else: generated_body = report_body @@ -181,6 +183,7 @@ def generate_body( evidences: list[Evidence], blueprint: ReportBlueprint | None = None, writing_plan: list[WritingSectionPlan] | None = None, + llm_provider: LLMProvider | str | None = None, ) -> str: selected_blueprint = blueprint or self._default_blueprint() draft = self.generate_draft( @@ -190,6 +193,7 @@ def generate_body( evidences=evidences, blueprint=selected_blueprint, writing_plan=writing_plan, + llm_provider=llm_provider, ) return draft.body @@ -202,6 +206,7 @@ def generate_draft( evidences: list[Evidence], blueprint: ReportBlueprint | None = None, writing_plan: list[WritingSectionPlan] | None = None, + llm_provider: LLMProvider | str | None = None, ) -> ReportDraft: selected_blueprint = blueprint or self._default_blueprint() return self._generate_body( @@ -211,6 +216,7 @@ def generate_draft( evidences=evidences, blueprint=selected_blueprint, writing_plan=writing_plan, + llm_provider=llm_provider, ) def rewrite_body( @@ -225,6 +231,7 @@ def rewrite_body( feedback_issues: list[str], targeted_sections: list[str] | None = None, writing_plan: list[WritingSectionPlan] | None = None, + llm_provider: LLMProvider | str | None = None, ) -> str: draft = self.rewrite_draft( task_title=task_title, @@ -236,6 +243,7 @@ def rewrite_body( feedback_issues=feedback_issues, targeted_sections=targeted_sections, writing_plan=writing_plan, + llm_provider=llm_provider, ) return draft.body @@ -251,6 +259,7 @@ def rewrite_draft( feedback_issues: list[str], targeted_sections: list[str] | None = None, writing_plan: list[WritingSectionPlan] | None = None, + llm_provider: LLMProvider | str | None = None, ) -> ReportDraft: outlines = self._build_section_outlines( task_title=task_title, @@ -304,6 +313,7 @@ def rewrite_draft( blueprint=blueprint, rewrite_context=draft_body, feedback_issues=feedback_issues, + llm_provider=llm_provider, ) if not section_body.strip(): section_body = existing_sections.get( @@ -344,12 +354,13 @@ def generate_title( task_description: str, body: str, evidences: list[Evidence], + llm_provider: LLMProvider | str | None = None, ) -> str: body = self._normalize_text(body) if settings.use_mock_sources: return self._derive_title_from_text(task_title=task_title, body=body) - base_url, api_key, model = self._resolve_provider() + base_url, api_key, model = self._resolve_provider(llm_provider) if not base_url or not api_key: return self._derive_title_from_text(task_title=task_title, body=body) @@ -407,6 +418,7 @@ def _generate_body( evidences: list[Evidence], blueprint: ReportBlueprint, writing_plan: list[WritingSectionPlan] | None = None, + llm_provider: LLMProvider | str | None = None, ) -> ReportDraft: """生成文章正文内容,不含内部标记。""" outlines = self._build_section_outlines( @@ -433,6 +445,7 @@ def _generate_body( evidences=evidences, blueprint=blueprint, outlines=outlines, + llm_provider=llm_provider, ) return self._finalize_report_draft( drafts=section_drafts, @@ -450,8 +463,9 @@ def _generate_with_llm( evidences: list[Evidence], blueprint: ReportBlueprint, outlines: list[SectionOutline], + llm_provider: LLMProvider | str | None = None, ) -> list[SectionDraft]: - base_url, api_key, model = self._resolve_provider() + base_url, api_key, model = self._resolve_provider(llm_provider) if not base_url or not api_key: return [] @@ -543,13 +557,14 @@ def _generate_with_llm( return article_sections - def _resolve_provider(self) -> tuple[str, str, str]: - provider = settings.default_llm_provider.lower().strip() - if provider == "openrouter": + def _resolve_provider(self, provider: LLMProvider | str | None = None) -> tuple[str, str, str]: + selected = (provider.value if isinstance(provider, LLMProvider) else provider) or settings.default_llm_provider + provider_name = selected.lower().strip() + if provider_name == "openrouter": return settings.openrouter_base_url, settings.openrouter_api_key, settings.openrouter_model - if provider == "deepseek": + if provider_name == "deepseek": return settings.deepseek_base_url, settings.deepseek_api_key, settings.deepseek_model - if provider == "openai": + if provider_name == "openai": return settings.openai_base_url, settings.openai_api_key, settings.openai_model return "", "", "" @@ -826,12 +841,13 @@ def _generate_single_section_with_llm( system_prompt: str | None = None, rewrite_context: str = "", feedback_issues: list[str] | None = None, + llm_provider: LLMProvider | str | None = None, ) -> str: resolved_base_url = base_url resolved_api_key = api_key resolved_model = model if not resolved_base_url or not resolved_api_key or not resolved_model: - resolved_base_url, resolved_api_key, resolved_model = self._resolve_provider() + resolved_base_url, resolved_api_key, resolved_model = self._resolve_provider(llm_provider) if not resolved_base_url or not resolved_api_key or not resolved_model: return "" diff --git a/docs/AI_SCIENTIST_V2_ADVANTAGES.md b/docs/AI_SCIENTIST_V2_ADVANTAGES.md new file mode 100644 index 0000000..0ac2a39 --- /dev/null +++ b/docs/AI_SCIENTIST_V2_ADVANTAGES.md @@ -0,0 +1,596 @@ +# AI-Scientist-v2 相对本项目的优势清单 + +本文目标不是做中性综述,而是尽量穷举当前公开可见实现下,`SakanaAI/AI-Scientist-v2` 相对本项目的优势点,尤其聚焦: + +- prompt 设计 +- 生成机制 +- 路线探索 + +对比对象: + +- 对方项目:[`SakanaAI/AI-Scientist-v2`](https://github.com/SakanaAI/AI-Scientist-v2) +- 本项目:当前仓库 `Deep-Research` + +说明: + +- “明确优势”表示可以从公开 README / 公开代码 / 论文摘要直接确认。 +- “推断性优势”表示结合公开配置和实现入口,可以较高置信度推断,但细节未在当前对比中完全展开。 +- 由于 GitHub 网页抓取对部分长文件有折叠,以下结论以公开 README、公开原始文件入口、论文摘要,以及本项目现有代码实现为依据。 + +## 1. 总体判断 + +如果把本项目定义为“对话驱动的深度研究与报告生成系统”,那么 `AI-Scientist-v2` 的核心优势不是 UI 或通用信息整理,而是它更像一个面向机器学习科研产出的“自主实验型研究系统”。 + +它的强项集中在三件事: + +1. 它把“研究”定义为可执行实验搜索,而不是主要定义为文献检索加报告写作。 +2. 它把“生成”做成了多阶段、可回退、可并行、可 debug 的搜索过程,而不是单次规划后顺序执行。 +3. 它把“路线探索”做成了显式的 tree search 和 manager-driven exploration,而不是当前本项目这种受 `maxDepth/maxNodes` 约束的启发式 DAG 展开。 + +因此,如果用户目标是“产出一篇有实验、有图表、有消融、有论文格式的科研稿件”,`AI-Scientist-v2` 的方法论明显更强;如果用户目标是“围绕任意主题做资料研究、证据整理和中文报告生成”,本项目的交互性和产品形态更友好,但研究内核的开放式探索能力不如对方。 + +## 2. Prompt 设计方面的优势 + +### 2.1 Prompt 目标定义更高阶,不只是格式约束 + +`AI-Scientist-v2` 的 ideation prompt 不是简单要求“输出一个方案”,而是把模型角色直接设定为: + +- 提出高影响力研究想法 +- 类 grant proposal +- 必须新颖 +- 要与现有文献清楚区分 +- 资源约束必须落在学术实验室可承受范围 +- 目标是顶会可发表 + +这比本项目当前的方案生成 prompt 更强。 + +本项目的初始规划 prompt 在 [`backend/app/services/conversation_agent.py`](/Users/xcy/Program/SH-Program/Deep-Research/backend/app/services/conversation_agent.py#L1022) 附近,本质上是: + +- 生成一个可执行研究方案 +- 必须是 Markdown +- 必须带 front matter +- 正文包含固定章节 + +这个 prompt 的优势是稳定、可控、易解析,但问题是它主要在约束“输出格式”,而不是约束“研究质量上限”。对比之下,`AI-Scientist-v2` 的 prompt 从一开始就在约束: + +- novelty +- feasibility +- publishability +- distinction from prior work + +这使得它更像“科研提案生成 prompt”,而本项目更像“研究计划文档生成 prompt”。 + +### 2.2 Prompt 内建工具使用要求,形成“先检索再定稿”的硬约束 + +`perform_ideation_temp_free.py` 中的系统 prompt 明确要求至少做一次文献搜索后才能 finalize idea,并且暴露了 `SemanticScholarSearchTool` 与 `FinalizeIdea` 两类动作。 + +这比本项目当前的规划阶段强在: + +- 对方不是“先写计划,再进检索” +- 而是“在想法形成阶段就把检索与 novelty check 绑定” + +本项目当前的初始计划生成并没有把“先做 novelty / related work 检查再确认计划”做成 prompt 级硬约束,而是先产出计划,再由执行阶段去检索。这会导致计划更容易出现: + +- 选题重复 +- 问题过宽 +- 方法设定先验不足 +- 与现有工作区分度不够 + +### 2.3 Prompt 采用动作协议,而不是纯自然语言大段输出 + +`AI-Scientist-v2` 的 ideation prompt 要求模型按 `ACTION:` / `ARGUMENTS:` 格式输出,并在 finalize 时输出结构化 IDEA JSON。 + +这个设计的优点: + +- 更接近 agent protocol +- 更利于中间轮调用工具 +- 便于做反思轮次中的状态延续 +- 便于失败时定位是“动作错”还是“内容错” + +本项目当前的计划生成和计划修订仍然是标准 chat completion: + +- system prompt +- user prompt +- 返回整段 Markdown + +这种方式更轻,但在复杂研究任务上更脆弱,因为: + +- 中间思考过程不可见 +- 工具调用不是 prompt 原生协议的一部分 +- 失败恢复只能靠 fallback 或重试整段文本 + +### 2.4 Prompt 自带 reflection loop,研究想法不是一次性吐出 + +`AI-Scientist-v2` 的 ideation 有 `num_reflections` 参数,且反思 prompt 明确要求模型评估: + +- quality +- novelty +- feasibility +- clarity +- concise +- JSON correctness + +这个机制比本项目当前的方案修订强很多。本项目虽支持用户继续“改方案”,但默认系统不会主动在内部进行多轮自我批判和 refinement。也就是说: + +- 本项目的修订是“用户驱动” +- 对方的修订是“系统内生” + +在开放式科研任务里,后者对质量上限更有帮助。 + +### 2.5 Prompt 对实验细节的要求更具体 + +`FinalizeIdea` 要求输出内容包含: + +- `Short Hypothesis` +- `Related Work` +- `Abstract` +- `Experiments` +- `Risk Factors and Limitations` + +且实验部分要求: + +- simple and feasible +- specific +- exactly how to test the hypothesis +- precise algorithmic changes +- evaluation metrics + +相比之下,本项目当前方案 prompt 更偏“研究任务拆解”,不是“科研实验设计”。因此在科研场景下,对方 prompt 在以下方面更强: + +- 假设表达更明确 +- 实验可证伪性更强 +- 评价指标前置 +- 风险和局限是原生字段而不是补充段落 + +### 2.6 写作 prompt 与研究 prompt 分工更清晰 + +`AI-Scientist-v2` 把 ideation、experimentation、writeup、review 分开配置,且 README 中明确不同阶段允许使用不同模型: + +- experiment/code +- writeup +- citation +- review +- plot aggregation + +本项目虽然也有 planning / retrieval / writing / checking 等模块,但 prompt 层面的职责隔离没有对方那么强。比如本项目 [`backend/app/services/writer.py`](/Users/xcy/Program/SH-Program/Deep-Research/backend/app/services/writer.py#L519) 的 system prompt 主要聚焦中文学术写作质量,而不是把“写作”“引文收集”“审稿”“图表审美检查”拆成多个独立 prompt 闭环。 + +对方的好处是: + +- 每个 prompt 只解决一种问题 +- 每种模型调用的目标函数更单一 +- 更容易做阶段性替换和针对性调参 + +## 3. 生成机制方面的优势 + +### 3.1 从“生成文档”升级为“生成研究过程” + +本项目的主链路更接近: + +1. 生成方案 +2. 构建 DAG +3. 检索证据 +4. 分析冲突 +5. 写报告 + +`AI-Scientist-v2` 更接近: + +1. 生成研究 idea +2. 将 idea 转成实验工作区 +3. 运行 agentic tree search 做实验 +4. 汇总图表和结果 +5. 自动写 paper +6. 自动 review + +它的核心优势是:生成对象不是“报告文本”,而是“从假设到实验再到论文的完整科研过程”。这让它天然更适合: + +- 实证型研究 +- 模型改进研究 +- 需要代码试验和结果反馈的任务 + +### 3.2 真正把代码执行纳入生成闭环 + +README 明确写了: + +- 会执行 LLM-written code +- 有 experiment manager agent +- 有 debug depth +- 有 tree visualization + +这比本项目强在:生成内容不是停留在语言层,而是进入“代码生成 -> 执行 -> 结果反馈 -> 再生成”的闭环。 + +本项目虽然有执行引擎 [`backend/app/services/execution_engine.py`](/Users/xcy/Program/SH-Program/Deep-Research/backend/app/services/execution_engine.py),但当前主要执行的是: + +- 检索 +- 证据分析 +- 报告生成 + +不是广义的实验代码探索。换句话说,本项目是“知识工作流执行器”,对方是“科研实验工作流执行器”。 + +### 3.3 多模型分工更成熟 + +`launch_scientist_bfts.py` 公开暴露了独立模型参数: + +- `model_writeup` +- `model_citation` +- `model_review` +- `model_agg_plots` +- 配置文件中的 `code` / `feedback` / `vlm_feedback` + +这说明对方把生成机制拆成了多目标优化: + +- 代码生成模型 +- 文本写作模型 +- 审稿模型 +- 图像/图表反馈模型 + +本项目虽支持不同 provider,但主链路上仍更接近“单一 LLM 路由 + 少量 specialized prompt”。对方的优点是: + +- 能针对阶段选最适合模型 +- 降低单模型包打天下的失配 +- 成本和质量更容易阶段化权衡 + +### 3.4 原生支持失败恢复,而不是主要依赖 fallback 文本 + +本项目在 plan 生成和 section 生成里都有 fallback 逻辑,这是实用的,但说明主机制在失败时常回退到“启发式保底文本”。 + +对方的生成机制更偏: + +- 多 draft +- 多 worker +- 多 stage +- debug 重试 +- writeup retries + +这类设计的优势是:失败恢复仍然尝试保持在“真实搜索空间”内部,而不是快速退化成模板化结果。 + +### 3.5 中间产物更科研化 + +对方的公开输出包括: + +- idea JSON +- timestamped experiment folder +- tree visualization HTML +- experiment_results +- plots aggregation +- PDF paper +- text review +- image/caption/reference review +- token tracking + +本项目输出更偏: + +- Markdown 报告 +- references / bib +- DAG +- evidence / conflicts + +对方优势在于,它的中间产物更适合科研过程审计与复现实验,而不仅是阅读最终结论。 + +### 3.6 写作不是直接拼章节,而是建立在实验结果之后 + +本项目写作阶段虽然有章节规划、章节证据选择、审校与重写,但整体仍然是“围绕检索证据写一篇文章”。 + +`AI-Scientist-v2` 的写作建立在以下更强的基础上: + +- 已执行实验 +- 已产出图表 +- 已进行 citation gathering +- 已有 review / VLM review + +所以它的 writeup 不是单纯叙述型生成,而是实验结果驱动的 manuscript generation。这一点在科研场景中是本质优势。 + +### 3.7 将图表质量纳入生成链路 + +从 README 和论文摘要可见,对方引入了 VLM feedback loop,用于改进 figures 的内容与美观性。 + +这比本项目强很多。本项目当前没有真正把: + +- 图表审美 +- 图表和正文一致性 +- caption-ref 对齐 + +作为原生生成闭环的一部分。对方在论文式交付上明显更完整。 + +## 4. 路线探索方面的优势 + +这是 `AI-Scientist-v2` 相对本项目最核心、最明显的优势。 + +### 4.1 显式 tree search,探索结构比本项目 DAG 扩展更强 + +本项目的 [`MasterPlanner.build_dag()`](/Users/xcy/Program/SH-Program/Deep-Research/backend/app/services/planner.py#L12) 是一个有界 DAG 生成器,本质特征是: + +- BFS + DFS 混合扩展 +- 基于 `_seed_topics` / `_expand_topic` +- 用启发式 `infoGainScore` 做简单剪枝 +- 受 `maxDepth` / `maxNodes` 强约束 + +这套机制足够做“研究任务拆解”,但还不是“探索算法”。 + +对方则明确采用: + +- progressive agentic tree search +- best-first tree search 配置 +- `num_workers` +- `num_drafts` +- `max_debug_depth` +- `debug_prob` + +它的优势在于: + +- 搜索空间是实验路线,而不仅是话题子节点 +- 节点扩展质量由执行反馈决定,而不只是启发式标题展开 +- 可以并行探索多个候选研究方向 +- 可以在失败节点上继续 debug,而不是单纯 prune + +### 4.2 路线探索依赖真实反馈,不只依赖先验启发式 + +本项目的路线扩展更多依赖: + +- 标题与描述分解 +- 关键词 seed +- 预设优先级 +- 启发式信息增益 + +对方的路线探索依赖: + +- 代码执行结果 +- 实验成败 +- debug 可修复性 +- 阶段性目标 + +这意味着它做的是“闭环搜索”,而本项目做的是“静态规划后执行”。 + +闭环搜索的优势非常明显: + +- 能及时放弃无效方向 +- 能把资源集中到有效分支 +- 能从失败里获得局部改进 +- 更适合 open-ended research + +### 4.3 原生并行探索多个分支 + +`bfts_config.yaml` 中有 `num_workers` 和 `num_drafts`。这意味着对方不是只生成一个方案再局部修补,而是天然支持: + +- 多个起始草稿 +- 多条并行探索路径 +- 多个 worker 同时推进 + +本项目当前虽然也能执行多节点 DAG,但其节点多来自单一计划展开,不是“多候选研究路线并行竞争”。对方的优势是: + +- 起始多样性更强 +- 更不容易被第一版计划锁死 +- 有更大机会发现意外但有效的方向 + +### 4.4 将 debug 视为搜索动作的一部分 + +对方配置里有 `max_debug_depth` 和 `debug_prob`。这背后的设计非常重要: + +- 失败节点不是立即丢弃 +- 系统允许为一个失败分支投入调试预算 +- debug 本身被纳入 tree expansion 策略 + +本项目没有把 debug 作为研究路线探索的一等公民。当前系统更像: + +- 规划 +- 检索 +- 生成 +- 失败则 fallback 或结束 + +而不是: + +- 失败 +- 诊断 +- 局部修改 +- 再试 +- 选择是否继续保留该分支 + +对于真正复杂的科研探索,这是一项关键能力差异。 + +### 4.5 阶段化探索比单层规划更成熟 + +公开资料显示对方 tree search 是 progressive / staged 的,并带有 experiment manager agent。 + +这意味着它不是一口气把所有自由度同时打开,而是分阶段推进,例如: + +- 初步调查 +- 调参 +- 研究议程推进 +- 消融 + +即使不展开每个内部实现,单从设计哲学上也明显优于本项目当前的统一式 DAG。 + +本项目 DAG 更像“一次性把任务拆好”;对方更像“研究管理器根据阶段切换搜索策略”。其优势是: + +- 早期广搜,后期精搜更自然 +- 不同阶段可使用不同节点类型和评估标准 +- 研究路线不容易在初期就过度承诺 + +### 4.6 可视化树结构提升可审计性 + +对方公开产物里有 `unified_tree_viz.html`。这说明其路线探索不是黑箱串行日志,而是可视化的搜索树。 + +这比本项目当前 DAG 可视化更有价值的地方在于: + +- 用户能看到被探索过哪些假设和实验分支 +- 能看到哪些路径被放弃、为何放弃 +- 能看到最终论文是从哪条搜索路径收敛出来的 + +本项目虽然有 DAG editor / plan editor,但更多是“计划视图”,不是“实验探索历史视图”。 + +## 5. 研究质量控制方面的优势 + +### 5.1 novelty checking 更前置 + +从 README 可见,Semantic Scholar 在 ideation 阶段就用于 novelty 相关检查。 + +本项目当前检索强在资料获取,但“与既有工作是否重复”没有前置成强机制。对方的优势是: + +- 更少走重复路线 +- 更少出现已有论文已经覆盖的问题 +- 能在想法形成前就做 literature-grounded filtering + +### 5.2 review 是原生阶段,不是附属检查 + +对方写完后还有: + +- text review +- image/caption/reference review + +本项目虽然也有 review / checking,但当前重心仍然是清洗输出、避免 prompt 泄漏、保证章节质量。对方的 review 更接近“模拟论文审稿场景”,并且交付目标是 workshop-level paper,因此标准更接近论文投稿而不是普通研究报告。 + +### 5.3 引文收集是单独阶段 + +`launch_scientist_bfts.py` 暴露了: + +- `model_citation` +- `num_cite_rounds` + +说明 citation gathering 是独立预算、独立模型、独立回合数的任务。 + +本项目当前引用生成主要来自证据集合和文章末尾参考文献组织,缺少独立的“引文检索迭代阶段”。对方优势是: + +- 引文链更可能完整 +- citation 质量不被正文写作阶段吞掉 +- 参考文献可以持续补全 + +### 5.4 结果优先于叙事 + +对方整个系统的收敛目标是“实验结果能否支撑论文”,而不是“能否生成一篇像样的报告”。 + +这会带来一个很重要的优势: + +- 写作受结果约束,而不是受文风约束 + +本项目当前在中文写作质量、结构完整性、去污染方面做得不错,但从研究质量控制的角度,对方更强调“结果是否成立”。 + +## 6. 工程与运行机制方面的优势 + +### 6.1 工作区隔离更适合高自治实验 + +对方明确: + +- 用独立 workspace +- 会复制数据到 workspace +- 会执行代码 +- 强烈建议在受控沙箱运行 + +这表明它在工程上默认面对的是高风险、高自治执行。相比之下,本项目执行引擎更偏应用内任务编排,风险面较窄。对方在自治实验系统工程上更成熟。 + +### 6.2 成本、阶段、重试参数暴露得更完整 + +公开 CLI 和配置可直接调: + +- generation 数 +- reflection 数 +- writeup retries +- cite rounds +- workers +- stage iters +- debug depth + +这说明系统的搜索预算、生成预算、审查预算都是显式参数,而不是隐藏在代码内部。其优点是: + +- 更易做 ablation +- 更易做成本控制 +- 更易做大规模批量实验 + +### 6.3 token tracking 更系统 + +`launch_scientist_bfts.py` 里会把 token tracker summary 和 interactions 存盘。 + +本项目当前没有形成同等粒度的跨阶段 token 审计。对方的优势是: + +- 成本归因更容易 +- 能分析哪一阶段最贵 +- 方便后续优化策略 + +### 6.4 交付物天然适合论文工作流 + +对方的最终交付直接面向: + +- PDF manuscript +- review outputs +- figures +- citations + +本项目当前更偏: + +- Markdown 报告 +- 对话与计划编辑 + +对于科研产线,对方交付形态更接近真实学术工作流。 + +## 7. 逐项列出对方相对本项目的优势 + +下面给出尽量穷举的扁平列表,便于后续转成 roadmap。 + +- 更强的目标函数:不是“写一个研究方案”,而是“生成可发表的研究产出”。 +- 更强的 novelty 导向:在 ideation 阶段就要求至少一次文献搜索。 +- 更强的相关工作约束:prompt 显式要求与现有文献区分。 +- 更强的实验可执行性约束:要求给出具体实验、具体算法变化、评估指标。 +- 更强的 reflection 机制:一个 idea 会经历多轮自评和 refinement。 +- 更强的动作协议:`ACTION/ARGUMENTS` 比单段 Markdown 输出更 agentic。 +- 更强的结构化中间产物:idea JSON 比自由文本方案更利于后续自动处理。 +- 更强的多模型分工:code / feedback / writeup / citation / review / VLM review 分离。 +- 更强的失败恢复:debug、retry、multi-draft,而不是主要依赖 fallback 文本。 +- 更强的路线探索:真正做 tree search,不是启发式 DAG 扩展。 +- 更强的并行性:多个 workers 和多个 drafts 并行探索。 +- 更强的分阶段研究管理:progressive stages 明显优于单层统一规划。 +- 更强的分支保留策略:失败分支可继续 debug,而不是直接终止。 +- 更强的反馈闭环:实验结果反过来影响后续路线。 +- 更强的科研真实性:代码执行、实验结果、图表和论文连成闭环。 +- 更强的图表治理:VLM 参与 figure quality 改进。 +- 更强的论文式 review:不仅评正文,还评图像、caption、reference。 +- 更强的 citation 流水线:引文收集是独立阶段并有独立轮数。 +- 更强的实验审计:tree visualization、experiment folder、token tracker 更完整。 +- 更强的 reproducibility 倾向:工作区、日志、结果目录分层更适合复现。 +- 更强的预算控制:很多关键探索超参数在配置中显式暴露。 +- 更强的开放式研究能力:更适合发现新方向,而不只是整理已有资料。 +- 更强的 domain generalization 目标:明确强调移除 human-authored templates。 +- 更强的结果导向:系统以“论文能否成立”为目标,而不是“文本是否顺畅”为目标。 + +## 8. 对本项目最值得优先借鉴的点 + +如果只挑最值得抄的 10 个点,优先级如下: + +1. 把“novelty / related work check”前移到计划生成前。 +2. 把当前 plan prompt 从“格式约束”升级为“研究质量约束 + 结构化输出”。 +3. 引入 `reflection rounds`,让方案在系统内部先做 2 到 5 轮自我修订。 +4. 把计划输出改成结构化 schema,而不是 Markdown 为唯一主载体。 +5. 把 DAG 扩展升级成“多 draft + 多 worker + 明确评分”的探索机制。 +6. 把失败处理从 fallback 文本改成“局部 debug / regenerate / branch repair”。 +7. 增加显式 novelty score / differentiation score / feasibility score。 +8. 将 citation gathering 独立成单独阶段。 +9. 将 figure / table / appendix 生成纳入原生交付链路。 +10. 为研究过程增加搜索树或分支历史可视化,而不是只展示静态计划。 + +## 9. 结论 + +一句话概括: + +`AI-Scientist-v2` 的优势不在于“文案更好”,而在于它把 prompt、生成机制和路线探索都建立在“科研搜索系统”而不是“报告生成系统”的范式上。 + +相对本项目,它最强的三个优势分别是: + +- prompt 更像科研提案与实验设计器,而不是格式化计划生成器; +- 生成机制更像多阶段实验闭环,而不是检索后写作; +- 路线探索更像真实 tree search + debug 搜索,而不是启发式 DAG 拆分。 + +如果本项目后续要向“更强研究 agent”演化,最应该补的不是 UI,而是: + +- 前置 novelty/related-work 检查 +- reflection + structured proposal generation +- branch-based exploration and repair + +## 10. 依据来源 + +- `AI-Scientist-v2` README: [GitHub README](https://github.com/SakanaAI/AI-Scientist-v2) +- `AI-Scientist-v2` ideation 入口: [perform_ideation_temp_free.py](https://raw.githubusercontent.com/SakanaAI/AI-Scientist-v2/main/ai_scientist/perform_ideation_temp_free.py) +- `AI-Scientist-v2` 启动入口: [launch_scientist_bfts.py](https://raw.githubusercontent.com/SakanaAI/AI-Scientist-v2/main/launch_scientist_bfts.py) +- `AI-Scientist-v2` 搜索配置: [bfts_config.yaml](https://raw.githubusercontent.com/SakanaAI/AI-Scientist-v2/main/bfts_config.yaml) +- `AI-Scientist-v2` 论文摘要页: [Hugging Face Papers](https://huggingface.co/papers/2504.08066) +- 本项目计划生成: [`backend/app/services/conversation_agent.py`](/Users/xcy/Program/SH-Program/Deep-Research/backend/app/services/conversation_agent.py) +- 本项目 DAG 规划: [`backend/app/services/planner.py`](/Users/xcy/Program/SH-Program/Deep-Research/backend/app/services/planner.py) +- 本项目执行引擎: [`backend/app/services/execution_engine.py`](/Users/xcy/Program/SH-Program/Deep-Research/backend/app/services/execution_engine.py) +- 本项目写作服务: [`backend/app/services/writer.py`](/Users/xcy/Program/SH-Program/Deep-Research/backend/app/services/writer.py) + diff --git a/docs/AI_SCIENTIST_V2_LEARNING_PLAN.md b/docs/AI_SCIENTIST_V2_LEARNING_PLAN.md new file mode 100644 index 0000000..c58faaa --- /dev/null +++ b/docs/AI_SCIENTIST_V2_LEARNING_PLAN.md @@ -0,0 +1,1042 @@ +# 全面向 AI-Scientist-v2 学习的改进方案列表 + +本文不是“可以考虑”的建议集合,而是一份按落地顺序组织的系统改造清单。目标只有一个: + +- 把本项目从“会话驱动的研究报告系统”,升级为“具备自主探索、路线竞争、实验反馈和论文级交付能力的研究 Agent 平台”。 + +配套背景文档: + +- [`docs/AI_SCIENTIST_V2_ADVANTAGES.md`](/Users/xcy/Program/SH-Program/Deep-Research/docs/AI_SCIENTIST_V2_ADVANTAGES.md) + +--- + +## 1. 北极星目标 + +我们要学的不是某几个 prompt,而是整套范式。 + +目标架构要从当前模式: + +- 用户给题目 +- 系统生成 Markdown 方案 +- DAG 检索资料 +- 写一篇中文报告 + +升级为目标模式: + +- 用户给研究方向 +- 系统先做 novelty / related-work / feasibility 检查 +- 系统生成多个候选研究路线 +- 系统在分支上并行探索、执行、debug、淘汰 +- 系统沉淀结构化研究资产 +- 系统产出报告 / 论文 / 图表 / 审稿意见 / 过程树 + +一句话: + +- 从“生成答案”升级为“搜索研究空间并收敛到答案”。 + +--- + +## 2. 先定三条总原则 + +### 2.1 先改数据结构,再改 prompt + +现在本项目很多能力做不出来,不是 prompt 不够长,而是中间对象太弱。只要核心对象还是: + +- plan markdown +- dag node +- evidence +- report + +那么再怎么调 prompt,也很难做出 `AI-Scientist-v2` 那种多轮探索和分支竞争。 + +所以第一优先级不是“换一个更猛的 prompt”,而是补齐: + +- idea schema +- branch schema +- experiment schema +- review schema +- search tree schema +- run artifact schema + +### 2.2 让系统围绕“分支”工作,而不是围绕“单方案”工作 + +当前系统有一个明显短板: + +- 起点通常只有一个方案 +- 后续多是围绕这一个方案修修补补 + +要全面向对方学习,就必须把“多个候选研究路线并行竞争”变成一等公民。 + +### 2.3 失败不能直接 fallback 成模板文本,必须先进入 repair loop + +现在本项目很多失败路径最终会退化成: + +- fallback plan +- fallback section +- fallback wording + +这个对 demo 友好,但对研究系统伤害很大。后续要改成: + +- failed -> diagnose -> repair -> retry -> score -> decide prune + +而不是: + +- failed -> 生成保底文本 -> 继续往下走 + +--- + +## 3. 总体改造路线图 + +建议拆成 4 个阶段。 + +### Phase 1: 把“研究计划系统”升级成“结构化研究提案系统” + +目标: + +- 不再以 Markdown 方案为唯一核心对象 +- 引入 idea / novelty / feasibility / experiment 等结构化 schema + +### Phase 2: 把“静态 DAG 执行”升级成“多分支探索引擎” + +目标: + +- 不只展开节点 +- 开始展开候选路线、候选实验和候选修复 + +### Phase 3: 把“文献报告生成”升级成“实验反馈驱动的研究生成” + +目标: + +- 支持 workspace、代码执行、实验结果回流、debug loop + +### Phase 4: 把“最终报告”升级成“论文级科研交付物” + +目标: + +- 引文、图表、审稿、过程树、token 统计、论文式导出全部补齐 + +--- + +## 4. 全面改进方案列表 + +下面按工作流拆解,每一项都给出: + +- 要学什么 +- 为什么必须做 +- 具体怎么改 +- 验收标准 + +--- + +## 5. 工作流一:目标函数重写 + +### 5.1 把系统目标从“写报告”改成“完成研究任务” + +要学什么: + +- `AI-Scientist-v2` 的目标不是生成顺滑文本,而是收敛到可成立的研究产出。 + +怎么改: + +- 在产品定义和后端状态机层面新增 `research objective` 概念。 +- `TaskConfig` 增加 `researchMode`,至少支持: + - `survey` + - `evidence_report` + - `experimental_research` + - `paper_writeup` +- 会话创建时先判断研究类型,再决定后续链路。 + +建议新增字段: + +- `researchMode` +- `deliverableTypes` +- `requiresNoveltyCheck` +- `requiresExperimentLoop` +- `requiresPeerReview` + +验收标准: + +- 新建任务时,系统能根据模式走不同执行链路。 +- `experimental_research` 不再直接复用“检索 -> 写报告”的默认流水线。 + +### 5.2 把“成功”定义成多维评分,而不是是否生成了文件 + +怎么改: + +- 增加统一的 `ResearchScoreCard`: + - novelty_score + - feasibility_score + - evidence_strength_score + - execution_success_score + - writeup_score + - review_score +- 每轮运行结束都生成 scorecard。 + +验收标准: + +- 系统不再仅凭 `reportPath` 存在就视为完成。 +- 任务详情页可看到多维得分。 + +--- + +## 6. 工作流二:核心数据结构升级 + +### 6.1 新增 `ResearchIdea` schema + +要学什么: + +- 对方把 ideation 输出成结构化 idea,而不是自由文本方案。 + +怎么改: + +- 在 [`backend/app/models/schemas.py`](/Users/xcy/Program/SH-Program/Deep-Research/backend/app/models/schemas.py) 新增: + - `ResearchIdea` + - `NoveltyAssessment` + - `FeasibilityAssessment` + - `RelatedWorkItem` + - `ExperimentProposal` + - `RiskAssessment` + +建议字段: + +- `ideaId` +- `title` +- `problemStatement` +- `shortHypothesis` +- `abstract` +- `relatedWork` +- `differentiators` +- `noveltyAssessment` +- `feasibilityAssessment` +- `experimentProposals` +- `riskFactors` +- `limitations` +- `score` +- `sourceEvidenceIds` + +验收标准: + +- 首轮 ideation 结果能以 JSON 落库存储。 +- UI 和 API 能读取结构化 idea,而不是只读 Markdown。 + +### 6.2 新增 `SearchBranch` / `SearchTree` schema + +怎么改: + +- 为路线探索引擎补结构: + - `SearchTree` + - `SearchBranch` + - `BranchAction` + - `BranchEvaluation` + - `BranchFailure` + - `BranchRepairAttempt` + +关键字段: + +- branch_id +- parent_branch_id +- branch_type +- branch_goal +- action_type +- action_input +- action_output +- score_before +- score_after +- status +- prune_reason +- debug_depth +- worker_id + +验收标准: + +- 系统能持久化“探索过哪些分支,为什么保留/剪掉”。 +- 后端 API 可以返回 branch tree,而不仅是 DAG。 + +### 6.3 新增 `ExperimentRun` 和 `Artifact` schema + +怎么改: + +- 增加实验型对象: + - `ExperimentWorkspace` + - `ExperimentRun` + - `ExperimentMetric` + - `ExperimentArtifact` + - `FigureArtifact` + - `ReviewArtifact` + +验收标准: + +- 一次实验运行可以挂多个 artifact,而不是只产出 evidence 和 report。 + +--- + +## 7. 工作流三:Prompt 体系重构 + +### 7.1 彻底拆分 prompt 职责 + +当前问题: + +- 计划 prompt、写作 prompt 比较强,但缺少真正的研究探索 prompt 体系。 + +改法: + +- 新建独立 prompt 模块目录,例如: + - `backend/app/prompts/ideation.py` + - `backend/app/prompts/novelty.py` + - `backend/app/prompts/branching.py` + - `backend/app/prompts/repair.py` + - `backend/app/prompts/experiment.py` + - `backend/app/prompts/writeup.py` + - `backend/app/prompts/review.py` + - `backend/app/prompts/figure_review.py` + +原则: + +- 一个 prompt 只负责一个动作。 +- 不再让一个 prompt 同时负责“想法、计划、写作、修订”。 + +验收标准: + +- prompt 文件按职责拆分完成。 +- 每个阶段可以独立替换模型和参数。 + +### 7.2 把计划生成从 Markdown 优先改成 JSON 优先 + +怎么改: + +- 方案生成时先输出结构化 JSON: + - idea + - evaluation + - branches + - experiments +- Markdown 计划变成衍生视图,而不是源数据。 + +验收标准: + +- `ConversationAgent._generate_initial_plan()` 不再直接依赖 Markdown 作为主事实来源。 +- front matter 只用于显示和兼容,不再承担系统事实存储职责。 + +### 7.3 引入动作协议 + +要学什么: + +- 对方 ideation 是 action-based,而不是一次吐全文。 + +怎么改: + +- 定义内部动作协议: + - `SEARCH_LITERATURE` + - `ASSESS_NOVELTY` + - `PROPOSE_IDEA` + - `REFINE_IDEA` + - `SPAWN_BRANCH` + - `RUN_EXPERIMENT` + - `REPAIR_BRANCH` + - `FINALIZE_WRITEUP` +- 让 agent 输出结构化 action,而不是直接写长文。 + +验收标准: + +- ideation / branching / repair 阶段都能以 action 协议驱动。 + +### 7.4 引入 reflection rounds + +怎么改: + +- 所有关键阶段支持 `num_reflections`。 +- 至少覆盖: + - idea 生成 + - 分支评分 + - 实验失败诊断 + - 写作审校 + +建议: + +- 默认 2 轮 +- 高质量模式 4 到 6 轮 + +验收标准: + +- 每个关键输出都能看到初稿、反思、修正版。 + +--- + +## 8. 工作流四:Novelty 与 Related Work 前置 + +### 8.1 在生成研究路线前增加 novelty gate + +怎么改: + +- 新增 `NoveltyGateService`。 +- 执行顺序改为: + - 检索相关工作 + - 归纳已有方法 + - 识别空白点 + - 生成候选 idea + - 给出区分点 + +验收标准: + +- 没过 novelty gate 的 idea 不进入主搜索树。 + +### 8.2 新增“相似工作对照表” + +怎么改: + +- 每个 idea 自动生成 related-work diff: + - prior_work + - overlap + - difference + - expected_gain + - uncertainty + +验收标准: + +- 每个候选 idea 都能说明“和已有工作相比到底新在哪”。 + +### 8.3 让 plan 里出现“反对理由” + +怎么改: + +- ideation 输出除了支持理由,还必须产出: + - why_this_may_fail + - why_this_may_not_be_novel + - missing_evidence + +验收标准: + +- 每个 idea 至少有 3 条自我反驳。 + +--- + +## 9. 工作流五:从 DAG 扩展器升级为 Tree Search Engine + +### 9.1 保留 DAG,但让 DAG 退居“执行图” + +当前问题: + +- [`MasterPlanner.build_dag()`](/Users/xcy/Program/SH-Program/Deep-Research/backend/app/services/planner.py) 现在既承担拆题,又承担路线探索,职责混在一起。 + +改法: + +- DAG 继续用于“执行依赖关系”。 +- 新增 Tree Search Engine 专门负责“候选路线探索”。 + +目标分工: + +- Search Tree:找哪条路线值得做 +- Execution DAG:把选中的路线执行出来 + +验收标准: + +- 路线选择和任务执行分离成两个子系统。 + +### 9.2 引入 best-first / beam search / progressive widening + +怎么改: + +- 新增 `SearchStrategy` 抽象: + - `best_first` + - `beam_search` + - `staged_search` +- 初期可以先做 best-first,后续再加 progressive widening。 + +新增配置: + +- `searchStrategy` +- `numDrafts` +- `numWorkers` +- `beamWidth` +- `branchBudget` +- `maxDebugDepth` +- `debugProbability` + +验收标准: + +- 系统不再只按 BFS/启发式展开 topic。 +- 可通过配置选择搜索策略。 + +### 9.3 明确 branch scoring 机制 + +怎么改: + +- 每个 branch 用统一评分函数: + - novelty + - feasibility + - expected_info_gain + - execution_cost + - evidence_availability + - risk + +示例公式: + +- `branch_score = novelty*0.25 + feasibility*0.2 + info_gain*0.2 + evidence*0.15 - cost*0.1 - risk*0.1` + +验收标准: + +- 保留/剪枝有可解释打分,而不是低信息增益 streak 这类简单启发式。 + +### 9.4 支持多 draft 起点 + +怎么改: + +- 初始 ideation 一次至少生成 3 到 5 个候选 idea。 +- 每个 idea 进入独立 branch。 + +验收标准: + +- 首轮运行默认不是一个方案,而是多个候选方案竞争。 + +### 9.5 支持 branch repair,而不是只 prune + +怎么改: + +- 当 branch 失败时先做: + - diagnose + - repair proposal + - retry + - rescore +- 失败若可修复则保留,不可修复再 prune。 + +验收标准: + +- 分支失败后至少支持一次 repair 回合。 + +--- + +## 10. 工作流六:引入 Experiment Manager + +### 10.1 增加实验管理层 + +要学什么: + +- 对方有 manager agent,不只是任务节点。 + +怎么改: + +- 新增 `ExperimentManagerService`,职责包括: + - 选择要跑哪些实验 + - 分配 worker + - 监控实验状态 + - 汇总失败原因 + - 决定是否继续 debug + +验收标准: + +- 实验执行有统一 manager,而不是直接从 planner 跳到 execution。 + +### 10.2 区分“研究分支”和“实验运行” + +怎么改: + +- 一个研究分支下可包含多个 experiment runs。 +- experiment run 结果回写 branch score。 + +验收标准: + +- 一个 branch 可以有 baseline、variant、ablation 多次运行记录。 + +### 10.3 增加实验预算控制 + +怎么改: + +- 配置项增加: + - `maxExperimentRuns` + - `maxTokensPerBranch` + - `maxRuntimePerBranch` + - `maxFailedRunsBeforePrune` + +验收标准: + +- 系统能按 branch 控制预算,而不是全局粗放执行。 + +--- + +## 11. 工作流七:Workspace 和代码执行闭环 + +### 11.1 为实验型任务引入隔离 workspace + +怎么改: + +- 每个 experimental task 建立独立目录: + - `runs///` +- 存放: + - generated code + - configs + - logs + - metrics + - plots + - review notes + +验收标准: + +- 同一次任务的不同 branch 有独立工作区,互不污染。 + +### 11.2 引入 `CodeExecutionService` + +怎么改: + +- 新增代码执行服务: + - 写入实验代码 + - 运行命令 + - 收集 stdout/stderr + - 采集指标 + - 解析失败信号 + +验收标准: + +- 实验型任务支持真正的运行反馈,不是只靠语言判断。 + +### 11.3 引入失败诊断器 + +怎么改: + +- 新增 `ExecutionFailureAnalyzer`,提取: + - syntax error + - dependency error + - runtime error + - timeout + - metric regression + +验收标准: + +- repair loop 能基于失败类型走不同策略。 + +--- + +## 12. 工作流八:多 Agent 体系重做 + +### 12.1 当前四 Agent 需要从“展示型”变成“实战型” + +当前问题: + +- `four_agents` 目录里已有 ideation / planning / writing / checking,但整体还偏简化实现,没真正成为主引擎。 + +改法: + +- 重新定义 agent 职责: + - `IdeationAgent`: 生成多候选 idea + novelty check + - `PlanningAgent`: 将 idea 转成 branch plan 和 experiment plan + - `ExecutionAgent`: 跑实验、收集反馈 + - `RepairAgent`: 对失败分支做诊断和修复 + - `WritingAgent`: 生成论文式写作草稿 + - `ReviewAgent`: 文本审稿 + - `FigureReviewAgent`: 图表审稿 + - `CitationAgent`: 引文补全 + +验收标准: + +- 现有四 Agent 体系升级为真实工作流,而不是演示式流水线。 + +### 12.2 增加 Agent Manager + +怎么改: + +- 新增 `AgentManager` 统一调度各 agent。 +- 支持: + - round-based orchestration + - branch assignment + - retry policy + - handoff payload + +验收标准: + +- agent 之间的协作由 manager 协调,不再靠执行引擎硬编码串接。 + +--- + +## 13. 工作流九:模型路由升级 + +### 13.1 不再默认一个模型包打天下 + +怎么改: + +- `TaskConfig` 增加独立模型路由: + - `modelIdeation` + - `modelNovelty` + - `modelPlanning` + - `modelExecution` + - `modelRepair` + - `modelWriteup` + - `modelCitation` + - `modelReview` + - `modelFigureReview` + +验收标准: + +- 每阶段模型可独立配置。 + +### 13.2 为不同阶段设不同 temperature / timeout / budget + +怎么改: + +- 配置项拆分: + - `temperatureIdeation` + - `temperatureRepair` + - `timeoutExecution` + - `timeoutReview` + +验收标准: + +- 阶段参数不再被一个通用 `_chat_complete()` 吃掉。 + +--- + +## 14. 工作流十:写作链路升级成论文式写作链路 + +### 14.1 从“章节生成”升级成“manuscript assembly” + +怎么改: + +- 写作阶段新增文稿对象: + - abstract + - introduction + - related work + - method + - experiments + - results + - limitations + - conclusion + +验收标准: + +- `paper_writeup` 模式下不再复用通用中文报告结构。 + +### 14.2 引文补全变成独立阶段 + +怎么改: + +- 新增 `CitationAgent`: + - 扫描正文 claim + - 找缺失引用 + - 补足 citation candidates + - 生成 citation confidence + +验收标准: + +- 写作完成后会单独跑 citation pass。 + +### 14.3 引入章节级 rewrite loop + +怎么改: + +- 每章都能经历: + - draft + - review + - rewrite + - accept + +验收标准: + +- 章节不是一次生成后直接拼装,而是通过小闭环迭代。 + +--- + +## 15. 工作流十一:图表与视觉资产纳入主链路 + +### 15.1 新增 Figure / Table Planner + +怎么改: + +- 系统根据实验结果自动规划: + - 哪些表格该出现 + - 哪些图该出现 + - 每张图的目的是什么 + +验收标准: + +- 报告或论文包含结构化 figure plan。 + +### 15.2 新增 Figure Review Agent + +怎么改: + +- 审核: + - 信息量是否足够 + - 配色和标注是否清晰 + - caption 是否和正文一致 + +验收标准: + +- 图表不是附件,而是有独立 review 分数。 + +--- + +## 16. 工作流十二:Review 体系升级 + +### 16.1 把 checking 从“污染检测”升级成“审稿系统” + +当前问题: + +- 现有 checking 更偏 prompt leakage、机械措辞、脏输出治理。 + +改法: + +- 扩展 review 维度: + - novelty + - clarity + - methodology soundness + - evidence sufficiency + - citation quality + - figure quality + - reproducibility + +验收标准: + +- review 输出是结构化审稿意见,不只是“需不需要重写”。 + +### 16.2 新增 reviewer personas + +怎么改: + +- 支持多个 reviewer 视角: + - harsh reviewer + - method reviewer + - writing reviewer + - reproducibility reviewer + +验收标准: + +- 一次 writeup 至少经过 2 个独立 reviewer 视角。 + +--- + +## 17. 工作流十三:可视化与产品层补强 + +### 17.1 除计划视图外新增搜索树视图 + +怎么改: + +- 前端新增: + - search tree explorer + - branch detail panel + - branch score diff + - prune reason badge + +验收标准: + +- 用户能看到系统探索过哪些候选路线,而不是只看到最终计划。 + +### 17.2 新增实验资产视图 + +怎么改: + +- 前端展示: + - branch runs + - logs + - metrics + - plots + - review notes + +验收标准: + +- 用户能审查中间实验产物。 + +### 17.3 新增研究轨迹时间线 + +怎么改: + +- 时间线不只显示 progress group,还显示: + - idea accepted/rejected + - branch spawned/pruned + - experiment failed/repaired + - review passed/failed + +验收标准: + +- 时间线真正体现研究收敛过程。 + +--- + +## 18. 工作流十四:观测、成本和评测体系 + +### 18.1 增加 token / latency / success-rate 追踪 + +怎么改: + +- 每个阶段记录: + - token_in + - token_out + - latency_ms + - retry_count + - success/failure + +验收标准: + +- 任务详情页可以看到成本和耗时分布。 + +### 18.2 建立研究任务评测集 + +怎么改: + +- 建一个 benchmark 目录,覆盖: + - 综述型任务 + - 证据冲突型任务 + - novelty-sensitive 任务 + - experimental_research 任务 + - paper_writeup 任务 + +验收标准: + +- 每次重大改造后可批量回归。 + +### 18.3 建立 A/B 验证机制 + +怎么改: + +- 对比: + - 单方案 vs 多分支 + - 无 novelty gate vs 有 novelty gate + - 无 reflection vs 3 轮 reflection + +验收标准: + +- 关键机制改造有量化证据支撑。 + +--- + +## 19. Phase-by-Phase 落地清单 + +下面是推荐实施顺序,不然会陷入到处改、处处半成品。 + +### P0:两周内必须做完的基础改造 + +- 新增 `ResearchIdea`、`NoveltyAssessment`、`ExperimentProposal` schema。 +- 将当前 plan 生成改为“结构化 JSON 为主,Markdown 为视图”。 +- 新增 `NoveltyGateService`。 +- 初始 ideation 改为一次生成 3 个候选 idea。 +- 增加 `num_reflections` 配置。 +- 为每个候选 idea 生成 scorecard。 + +交付结果: + +- 系统第一次具备“多候选研究路线”的能力。 + +### P1:一个月内完成的搜索引擎升级 + +- 新增 `SearchTree` / `SearchBranch` 模型。 +- 新增 `SearchStrategy` 和 `BranchScorer`。 +- 将 `MasterPlanner` 从路线探索中剥离,只负责执行 DAG。 +- 支持 best-first + branch prune + repair。 +- 前端新增 branch tree 基础视图。 + +交付结果: + +- 系统第一次具备显式路线探索能力。 + +### P2:两个月内完成的实验闭环升级 + +- 新增 `ExperimentManagerService`。 +- 引入 workspace 和 `CodeExecutionService`。 +- 引入 `ExecutionFailureAnalyzer`。 +- 支持 baseline / variant / ablation runs。 +- experiment run 结果回写 branch score。 + +交付结果: + +- 系统第一次具备“代码执行反馈驱动的研究收敛能力”。 + +### P3:论文级交付能力补齐 + +- 写作模式拆成 `report` 和 `paper_writeup`。 +- 新增 citation pass。 +- 新增 figure planner / figure review。 +- 新增 multi-reviewer pass。 +- 支持 PDF / appendix / review note 导出。 + +交付结果: + +- 系统第一次具备“科研稿件交付能力”。 + +--- + +## 20. 优先级排序:什么最该先抄 + +如果资源有限,不要平均用力。最值钱的是下面 12 项。 + +### Top 1-4:先补研究核心 + +- 前置 novelty gate +- 多候选 idea 生成 +- reflection loop +- SearchTree + branch scorer + +### Top 5-8:再补探索闭环 + +- branch repair loop +- ExperimentManager +- workspace + code execution +- failure analyzer + +### Top 9-12:最后补论文交付 + +- citation pass +- review personas +- figure review +- search tree visualization + +--- + +## 21. 明确哪些不要抄偏 + +全面向对方学习,不等于机械照搬。下面三类不要先做。 + +### 21.1 不要先卷 UI 皮肤 + +真正差距在研究内核,不在界面样式。 + +### 21.2 不要先把 prompt 越写越长 + +如果 schema、状态机、分支结构没变,prompt 再长也只是高成本模板生成。 + +### 21.3 不要先把所有模式都变成实验型任务 + +本项目有自己的优势: + +- 对话驱动 +- 通用主题研究 +- 中文报告体验 + +正确做法是双轨: + +- `survey/evidence_report` 继续保留当前强项 +- `experimental_research/paper_writeup` 走新链路 + +--- + +## 22. 建议直接开工的代码切入点 + +第一批建议改这些位置: + +- [`backend/app/models/schemas.py`](/Users/xcy/Program/SH-Program/Deep-Research/backend/app/models/schemas.py) +- [`backend/app/services/conversation_agent.py`](/Users/xcy/Program/SH-Program/Deep-Research/backend/app/services/conversation_agent.py) +- [`backend/app/services/planner.py`](/Users/xcy/Program/SH-Program/Deep-Research/backend/app/services/planner.py) +- [`backend/app/services/execution_engine.py`](/Users/xcy/Program/SH-Program/Deep-Research/backend/app/services/execution_engine.py) +- [`backend/app/services/four_agents/ideation_agent.py`](/Users/xcy/Program/SH-Program/Deep-Research/backend/app/services/four_agents/ideation_agent.py) +- [`backend/app/services/four_agents/planning_agent.py`](/Users/xcy/Program/SH-Program/Deep-Research/backend/app/services/four_agents/planning_agent.py) +- [`backend/app/services/writer.py`](/Users/xcy/Program/SH-Program/Deep-Research/backend/app/services/writer.py) + +建议新增目录: + +- `backend/app/prompts/` +- `backend/app/services/search_tree/` +- `backend/app/services/experiments/` +- `backend/app/services/review/` +- `backend/app/services/citation/` + +--- + +## 23. 最后的结论 + +真正全面向 `AI-Scientist-v2` 学习,不是“把 prompt 写得更像它”,而是做三次范式切换: + +1. 从 Markdown 计划范式,切到结构化研究对象范式。 +2. 从单方案执行范式,切到多分支搜索范式。 +3. 从文献报告范式,切到实验反馈驱动的科研产出范式。 + +如果只允许我给一句最核心的执行建议,那就是: + +- 先把 `novelty gate + multi-idea + search tree + repair loop` 做出来。 + +这是所有后续能力的底座。没有这四个,本项目很难真正追上 `AI-Scientist-v2` 的研究内核。 + diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index f5e26cf..7f113be 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -1151,6 +1151,7 @@ export function App() { starting={starting} downloading={downloading} status={activeStatus} + currentIdeas={activeDetail?.currentIdeas ?? []} onRequestCloseMobile={() => setMobileEditorOpen(false)} onChange={(value) => { setPlanDraft(value); diff --git a/frontend/src/components/PlanEditorPane.tsx b/frontend/src/components/PlanEditorPane.tsx index 4439bc3..bd76bd5 100644 --- a/frontend/src/components/PlanEditorPane.tsx +++ b/frontend/src/components/PlanEditorPane.tsx @@ -1,5 +1,5 @@ import { memo } from "react"; -import type { ConversationStatus } from "../types"; +import type { ConversationStatus, ResearchIdea } from "../types"; import { PlanConfigForm } from "./PlanConfigForm"; import { parseYamlFrontmatter, serializeYamlFrontmatter } from "../utils/yamlFrontmatter"; @@ -11,6 +11,7 @@ export interface PlanEditorPaneProps { starting: boolean; downloading: boolean; status: ConversationStatus | null; + currentIdeas?: ResearchIdea[]; onRequestCloseMobile: () => void; onChange: (value: string) => void; onReset: () => void; @@ -57,6 +58,7 @@ function PlanEditorPaneBase(props: PlanEditorPaneProps) { starting, downloading, status, + currentIdeas, onRequestCloseMobile, onChange, onReset, @@ -158,6 +160,32 @@ function PlanEditorPaneBase(props: PlanEditorPaneProps) { showResetButton={false} /> + {Array.isArray(currentIdeas) && currentIdeas.length > 0 && ( +
+
+

候选 Ideas

+

当前展示首轮结构化研究想法,已按分数筛出主路线。

+
+
+ {currentIdeas.map((idea) => ( +
+
+ {idea.title} + {idea.status} +
+

{idea.shortHypothesis || idea.problemStatement}

+
+ overall {idea.scoreCard?.overallScore?.toFixed?.(2) ?? "0.00"} + novelty {idea.scoreCard?.noveltyScore?.toFixed?.(2) ?? "0.00"} +
+
+ ))} +
+
+ )}