diff --git a/config/manager.yaml b/config/manager.yaml new file mode 100644 index 0000000..e3010bf --- /dev/null +++ b/config/manager.yaml @@ -0,0 +1,32 @@ +# NeuriCo Interactive Manager Configuration +# +# These settings control the behavior of the interactive mode manager. +# Override via environment variables (NEURICO_MANAGER_*) or .env file. + +manager: + # LLM backend for manager reasoning + # Options: "cli" (uses claude -p, no extra keys), "anthropic_api", "openrouter" + # Can also be set via NEURICO_MANAGER_BACKEND env var + llm_backend: cli + + # Model to use for manager reasoning (null = default for backend) + # For cli: uses whatever claude version is installed + # For anthropic_api: e.g., "claude-sonnet-4-20250514" + # For openrouter: e.g., "anthropic/claude-sonnet-4" + # Can also be set via NEURICO_MANAGER_MODEL env var + llm_model: null + + # Engagement level: how often the manager stops to ask the user + # "hands_off": only critical decisions (direction changes, failures) + # "balanced": stage transitions + significant choices (default) + # "hands_on": frequent check-ins, present options at every step + engagement: balanced + + # How often to check agent status during long runs (seconds) + poll_interval: 60 + + # How often to proactively show progress summaries during long runs (seconds) + engagement_interval: 1800 # 30 minutes + + # Default AI provider for research agents + default_provider: claude diff --git a/docker/run.sh b/docker/run.sh index 7f81dab..3da0bd9 100755 --- a/docker/run.sh +++ b/docker/run.sh @@ -1652,6 +1652,77 @@ cmd_config() { # ----------------------------------------------------------------------------- # Show help # ----------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- +# Interactive mode: run a single agent inside Docker (internal command) +# Used by the interactive manager to invoke individual agents +# ----------------------------------------------------------------------------- +cmd__run_agent() { + if [ -z "$1" ]; then + echo -e "${RED}Usage: $0 _run-agent --workspace --provider --run-id --idea-file ${NC}" + echo "Agents: resource_finder, experiment_runner, paper_writer, comment_handler" + exit 1 + fi + + ensure_directories + check_env_file + + local gpu_flags=$(get_gpu_flags) + local user_flags=$(get_user_flags) + local credential_mounts=$(get_cli_credential_mounts) + local workspace_dir=$(get_workspace_dir) + + echo -e "${BLUE}Running agent: $1${NC}" + echo -e "${BLUE}Workspace:${NC} $workspace_dir -> /workspaces" + + eval "docker run --rm \ + $gpu_flags \ + $user_flags \ + --env-file \"$PROJECT_ROOT/.env\" \ + -e NEURICO_WORKSPACE=/workspaces \ + -v \"$workspace_dir:/workspaces\" \ + -v \"$PROJECT_ROOT/ideas:/app/ideas\" \ + -v \"$PROJECT_ROOT/logs:/app/logs\" \ + -v \"$PROJECT_ROOT/config:/app/config:ro\" \ + -v \"$PROJECT_ROOT/templates:/app/templates:ro\" \ + $credential_mounts \ + -w /app \ + \"$IMAGE_NAME\" \ + python /app/src/core/agent_runner.py $@" +} + +# ----------------------------------------------------------------------------- +# Interactive mode: launch the manager on the host +# The manager runs outside Docker and uses _run-agent to invoke agents +# ----------------------------------------------------------------------------- +cmd_interactive() { + if [ -z "$1" ]; then + echo -e "${RED}Usage: $0 interactive [--provider claude] [--engagement balanced]${NC}" + exit 1 + fi + + # The manager runs on the HOST, not inside Docker + # It needs Python available on the host + if ! command -v python3 &> /dev/null && ! command -v python &> /dev/null; then + echo -e "${RED}Python 3 is required on the host for interactive mode.${NC}" + echo "Install Python 3.10+ and try again." + exit 1 + fi + + local python_cmd="python3" + if ! command -v python3 &> /dev/null; then + python_cmd="python" + fi + + echo -e "${BLUE}Starting NeuriCo Interactive Mode...${NC}" + echo "" + + # Pass all arguments to the manager + # The manager will handle idea loading, Docker invocations, and user interaction + NEURICO_PROJECT_ROOT="$PROJECT_ROOT" \ + NEURICO_WORKSPACE_DIR="$(get_workspace_dir)" \ + $python_cmd "$PROJECT_ROOT/src/interactive/manager.py" "$@" +} + cmd_help() { show_banner show_status @@ -1669,6 +1740,7 @@ cmd_help() { echo " fetch [--submit] Fetch idea from IdeaHub" echo " submit Submit a research idea" echo " run [options] Run research exploration" + echo " interactive Interactive mode with human-in-the-loop" echo " update-tools Update Claude/Codex/Gemini to latest versions" echo " bump-version Bump version across all files (e.g., 0.3.0)" echo " up Start container in background (compose)" @@ -1683,6 +1755,7 @@ cmd_help() { echo "Daily usage:" echo " $0 fetch https://ideahub.example.com/idea/123 --submit --run --provider claude --full-permissions" echo " $0 run my-idea-id --provider claude --full-permissions" + echo " $0 interactive my-idea-id --provider claude" echo " $0 shell" echo "" } @@ -1728,6 +1801,12 @@ case "$ACTION" in run) cmd_run "$@" ;; + _run-agent) + cmd__run_agent "$@" + ;; + interactive) + cmd_interactive "$@" + ;; update-tools) cmd_update_tools ;; diff --git a/src/core/agent_runner.py b/src/core/agent_runner.py new file mode 100644 index 0000000..6e7d63b --- /dev/null +++ b/src/core/agent_runner.py @@ -0,0 +1,523 @@ +""" +Standalone Agent Runner + +Runs a single research agent (resource_finder, experiment_runner, paper_writer, +comment_handler) in a workspace directory WITHOUT managing idea lifecycle. + +This is used by the interactive mode manager to invoke individual agents. +Unlike runner.py which handles the full pipeline + idea state transitions, +this module: +- Takes a workspace path + idea spec + agent name +- Runs just that one agent +- Tracks invocation status via .neurico/runs// +- Does NOT move idea files between folders +- Does NOT manage GitHub integration +- Does NOT impose timeouts (the caller handles that) + +Usage (inside Docker): + python src/core/agent_runner.py --workspace /path --provider claude --run-id rf_001 --idea-file /path/to/idea.yaml + +Supported agents: resource_finder, experiment_runner, paper_writer, comment_handler +""" + +from pathlib import Path +from typing import Dict, Any, Optional +import argparse +import json +import os +import shlex +import subprocess +import sys +import time +import traceback +from datetime import datetime + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from core.security import sanitize_text + + +# CLI commands for different providers +CLI_COMMANDS = { + 'claude': 'claude -p', + 'codex': 'codex exec', + 'gemini': 'gemini' +} + +# CLI flags for verbose/structured transcript output +TRANSCRIPT_FLAGS = { + 'claude': '--verbose --output-format stream-json', + 'codex': '--json', + 'gemini': '--output-format stream-json' +} + + +class RunTracker: + """ + Tracks a single agent invocation via .neurico/runs//. + + Provides robust status tracking so the manager never has to guess + whether an agent is running, succeeded, or failed. + """ + + def __init__(self, work_dir: Path, run_id: str, agent_name: str): + self.run_dir = work_dir / ".neurico" / "runs" / run_id + self.run_dir.mkdir(parents=True, exist_ok=True) + self.status_file = self.run_dir / "status.json" + self.result_file = self.run_dir / "result.json" + self.error_file = self.run_dir / "error.json" + self.run_id = run_id + self.agent_name = agent_name + + def mark_running(self, pid: int): + """Mark this run as started.""" + self._write_status({ + "run_id": self.run_id, + "agent": self.agent_name, + "status": "running", + "pid": pid, + "started_at": datetime.now().isoformat(), + "completed_at": None, + "exit_code": None + }) + + def mark_completed(self, exit_code: int, result: Dict[str, Any]): + """Mark this run as successfully completed.""" + status = self._read_status() + status["status"] = "completed" + status["completed_at"] = datetime.now().isoformat() + status["exit_code"] = exit_code + self._write_status(status) + + with open(self.result_file, 'w') as f: + json.dump(result, f, indent=2) + + def mark_failed(self, exit_code: Optional[int], error_msg: str, tb: Optional[str] = None): + """Mark this run as failed.""" + status = self._read_status() + status["status"] = "failed" + status["completed_at"] = datetime.now().isoformat() + status["exit_code"] = exit_code + self._write_status(status) + + error_info = { + "error": error_msg, + "traceback": tb, + "timestamp": datetime.now().isoformat() + } + with open(self.error_file, 'w') as f: + json.dump(error_info, f, indent=2) + + def mark_stopped(self): + """Mark this run as stopped (by user/manager).""" + status = self._read_status() + status["status"] = "stopped" + status["completed_at"] = datetime.now().isoformat() + self._write_status(status) + + def _read_status(self) -> Dict[str, Any]: + if self.status_file.exists(): + with open(self.status_file) as f: + return json.load(f) + return {} + + def _write_status(self, status: Dict[str, Any]): + with open(self.status_file, 'w') as f: + json.dump(status, f, indent=2) + + +def _build_agent_command(provider: str, full_permissions: bool = True, + use_scribe: bool = False) -> str: + """Build the CLI command for launching an agent.""" + if use_scribe: + cmd = f"scribe {provider}" + else: + cmd = CLI_COMMANDS[provider] + + # Add permission flags + if full_permissions: + if provider == "codex": + cmd += " --yolo" + elif provider == "claude": + cmd += " --dangerously-skip-permissions" + elif provider == "gemini": + cmd += " --yolo" + + # Add transcript/JSON output flags + transcript_flag = TRANSCRIPT_FLAGS.get(provider, '') + if transcript_flag: + cmd += f" {transcript_flag}" + + return cmd + + +def _run_cli_agent(cmd: str, prompt: str, work_dir: Path, + log_file: Path, transcript_file: Path, + tracker: RunTracker) -> Dict[str, Any]: + """ + Execute a CLI agent with streaming output capture. + + This is the common execution pattern shared by all agents. + """ + env = os.environ.copy() + env['PYTHONUNBUFFERED'] = '1' + + # Disable IDE integration for Gemini CLI + if 'gemini' in cmd: + env['GEMINI_CLI_IDE_DISABLE'] = '1' + + log_file.parent.mkdir(parents=True, exist_ok=True) + + start_time = time.time() + + with open(log_file, 'w') as log_f, open(transcript_file, 'w') as transcript_f: + process = subprocess.Popen( + shlex.split(cmd), + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + env=env, + text=True, + bufsize=1, + cwd=str(work_dir) + ) + + tracker.mark_running(process.pid) + + # Send prompt via stdin + process.stdin.write(prompt) + process.stdin.close() + + # Stream output + for line in iter(process.stdout.readline, ''): + if line: + sanitized_line = sanitize_text(line) + print(sanitized_line, end='') + log_f.write(sanitized_line) + transcript_f.write(sanitized_line) + + return_code = process.wait() + + elapsed = time.time() - start_time + success = return_code == 0 + + return { + 'success': success, + 'return_code': return_code, + 'elapsed_time': elapsed, + 'log_file': str(log_file), + 'transcript_file': str(transcript_file) + } + + +def run_resource_finder(idea: Dict[str, Any], work_dir: Path, provider: str, + tracker: RunTracker, full_permissions: bool = True, + templates_dir: Optional[Path] = None) -> Dict[str, Any]: + """Run the resource finder agent.""" + from agents.resource_finder import generate_resource_finder_prompt + + if templates_dir is None: + templates_dir = Path(__file__).parent.parent.parent / "templates" + + print(f"๐Ÿ” Starting Resource Finder Agent (run: {tracker.run_id})") + print(f" Provider: {provider}") + print(f" Work dir: {work_dir}") + + # Generate prompt + prompt = generate_resource_finder_prompt(idea, templates_dir) + + # Save prompt for reference + prompt_file = work_dir / "logs" / "resource_finder_prompt.txt" + prompt_file.parent.mkdir(parents=True, exist_ok=True) + with open(prompt_file, 'w', encoding='utf-8') as f: + f.write(prompt) + + # Build and run command + cmd = _build_agent_command(provider, full_permissions) + log_file = work_dir / "logs" / f"resource_finder_{provider}.log" + transcript_file = work_dir / "logs" / f"resource_finder_{provider}_transcript.jsonl" + + result = _run_cli_agent(cmd, prompt, work_dir, log_file, transcript_file, tracker) + + # Check for outputs + outputs = {} + output_paths = { + 'literature_review': work_dir / "literature_review.md", + 'resources_catalog': work_dir / "resources.md", + 'papers_dir': work_dir / "papers", + 'datasets_dir': work_dir / "datasets", + 'code_dir': work_dir / "code" + } + for name, path in output_paths.items(): + if path.exists(): + outputs[name] = str(path) + + result['outputs'] = outputs + return result + + +def run_experiment_runner(idea: Dict[str, Any], work_dir: Path, provider: str, + tracker: RunTracker, full_permissions: bool = True, + use_scribe: bool = False, + templates_dir: Optional[Path] = None) -> Dict[str, Any]: + """ + Run the experiment runner agent. + + Extracted from pipeline_orchestrator.py to be callable standalone. + """ + from templates.prompt_generator import PromptGenerator + from templates.research_agent_instructions import generate_instructions + + if templates_dir is None: + templates_dir = Path(__file__).parent.parent.parent / "templates" + + print(f"๐Ÿงช Starting Experiment Runner Agent (run: {tracker.run_id})") + print(f" Provider: {provider}") + print(f" Work dir: {work_dir}") + + # Generate research prompt + prompt_generator = PromptGenerator(templates_dir) + prompt = prompt_generator.generate_research_prompt(idea, root_dir=work_dir) + + # Save prompt + prompt_file = work_dir / "logs" / "research_prompt.txt" + prompt_file.parent.mkdir(parents=True, exist_ok=True) + with open(prompt_file, 'w', encoding='utf-8') as f: + f.write(prompt) + + # Generate session instructions + domain = idea.get('idea', {}).get('domain', 'general') + session_instructions = generate_instructions( + prompt=prompt, + work_dir=str(work_dir), + use_scribe=use_scribe, + domain=domain + ) + + # Save session instructions + session_file = work_dir / "logs" / "session_instructions.txt" + with open(session_file, 'w', encoding='utf-8') as f: + f.write(session_instructions) + + # Build and run command + cmd = _build_agent_command(provider, full_permissions, use_scribe) + if use_scribe: + env_extra = {'SCRIBE_RUN_DIR': str(work_dir)} + os.environ.update(env_extra) + + log_file = work_dir / "logs" / f"execution_{provider}.log" + transcript_file = work_dir / "logs" / f"execution_{provider}_transcript.jsonl" + + # Experiment runner uses session_instructions (not raw prompt) as input + result = _run_cli_agent(cmd, session_instructions, work_dir, log_file, transcript_file, tracker) + + return result + + +def run_paper_writer(idea: Dict[str, Any], work_dir: Path, provider: str, + tracker: RunTracker, full_permissions: bool = True, + paper_style: str = "neurips", + templates_dir: Optional[Path] = None) -> Dict[str, Any]: + """Run the paper writer agent.""" + from agents.paper_writer import run_paper_writer as _run_paper_writer + + print(f"๐Ÿ“ Starting Paper Writer Agent (run: {tracker.run_id})") + print(f" Provider: {provider}") + print(f" Style: {paper_style}") + print(f" Work dir: {work_dir}") + + domain = idea.get('idea', {}).get('domain', 'general') + + # Delegate to existing paper_writer module (it handles prompt generation, + # style file copying, and CLI execution) + result = _run_paper_writer( + work_dir=work_dir, + provider=provider, + style=paper_style, + timeout=None, # No timeout in interactive mode + full_permissions=full_permissions, + domain=domain + ) + + return result + + +def run_comment_handler(idea: Dict[str, Any], work_dir: Path, provider: str, + tracker: RunTracker, full_permissions: bool = True, + templates_dir: Optional[Path] = None) -> Dict[str, Any]: + """Run the comment handler agent for targeted improvements.""" + from agents.comment_handler import generate_comment_prompt + + if templates_dir is None: + templates_dir = Path(__file__).parent.parent.parent / "templates" + + print(f"๐Ÿ’ฌ Starting Comment Handler Agent (run: {tracker.run_id})") + print(f" Provider: {provider}") + print(f" Work dir: {work_dir}") + + # Generate prompt from comments in the idea file + comments = idea.get('idea', {}).get('comments', []) + if not comments: + return {'success': False, 'error': 'No comments found in idea file'} + + prompt = generate_comment_prompt(idea, work_dir, templates_dir) + + # Build and run command + cmd = _build_agent_command(provider, full_permissions) + log_file = work_dir / "logs" / f"comment_handler_{provider}.log" + transcript_file = work_dir / "logs" / f"comment_handler_{provider}_transcript.jsonl" + + result = _run_cli_agent(cmd, prompt, work_dir, log_file, transcript_file, tracker) + + return result + + +# Agent dispatch table +AGENTS = { + 'resource_finder': run_resource_finder, + 'experiment_runner': run_experiment_runner, + 'paper_writer': run_paper_writer, + 'comment_handler': run_comment_handler, +} + + +def run_agent(agent_name: str, idea: Dict[str, Any], work_dir: Path, + provider: str, run_id: str, **kwargs) -> Dict[str, Any]: + """ + Run a single agent with full run tracking. + + This is the main entry point. It wraps the agent execution in a + try/finally to ensure status is always updated. + + Args: + agent_name: One of: resource_finder, experiment_runner, paper_writer, comment_handler + idea: Full idea specification (parsed YAML dict) + work_dir: Workspace directory for the research + provider: AI provider (claude, codex, gemini) + run_id: Unique identifier for this invocation + **kwargs: Additional agent-specific arguments (paper_style, use_scribe, etc.) + + Returns: + Result dictionary from the agent + """ + if agent_name not in AGENTS: + raise ValueError(f"Unknown agent: {agent_name}. Choose from: {list(AGENTS.keys())}") + + tracker = RunTracker(work_dir, run_id, agent_name) + agent_fn = AGENTS[agent_name] + + try: + result = agent_fn( + idea=idea, + work_dir=work_dir, + provider=provider, + tracker=tracker, + **kwargs + ) + + exit_code = result.get('return_code', 0 if result.get('success') else 1) + if result.get('success', False): + tracker.mark_completed(exit_code, result) + else: + tracker.mark_failed(exit_code, result.get('error', 'Agent returned unsuccessful')) + + return result + + except Exception as e: + tracker.mark_failed( + exit_code=1, + error_msg=str(e), + tb=traceback.format_exc() + ) + raise + + +def main(): + """CLI entry point for running agents inside Docker.""" + parser = argparse.ArgumentParser( + description="Run a single research agent (used by interactive mode)" + ) + parser.add_argument( + "agent", + choices=list(AGENTS.keys()), + help="Agent to run" + ) + parser.add_argument( + "--workspace", + required=True, + help="Workspace directory path" + ) + parser.add_argument( + "--provider", + default="claude", + choices=["claude", "codex", "gemini"], + help="AI provider (default: claude)" + ) + parser.add_argument( + "--run-id", + required=True, + help="Unique identifier for this invocation" + ) + parser.add_argument( + "--idea-file", + required=True, + help="Path to the idea YAML file" + ) + parser.add_argument( + "--full-permissions", + action=argparse.BooleanOptionalAction, + default=True, + help="Allow full permissions to CLI agents (default: True)" + ) + parser.add_argument( + "--paper-style", + default="neurips", + choices=["neurips", "icml", "acl", "ams"], + help="Paper style template (for paper_writer agent)" + ) + parser.add_argument( + "--use-scribe", + action="store_true", + help="Use scribe for notebook integration (for experiment_runner agent)" + ) + + args = parser.parse_args() + + # Load idea spec + import yaml + with open(args.idea_file, 'r') as f: + idea = yaml.safe_load(f) + + work_dir = Path(args.workspace) + + # Build kwargs based on agent type + kwargs = { + 'full_permissions': args.full_permissions, + } + if args.agent == 'paper_writer': + kwargs['paper_style'] = args.paper_style + if args.agent == 'experiment_runner': + kwargs['use_scribe'] = args.use_scribe + + # Run the agent + result = run_agent( + agent_name=args.agent, + idea=idea, + work_dir=work_dir, + provider=args.provider, + run_id=args.run_id, + **kwargs + ) + + # Print final status + if result.get('success'): + print(f"\nโœ… Agent {args.agent} completed successfully (run: {args.run_id})") + else: + print(f"\nโš ๏ธ Agent {args.agent} finished with issues (run: {args.run_id})") + + sys.exit(0 if result.get('success') else 1) + + +if __name__ == "__main__": + main() diff --git a/src/interactive/__init__.py b/src/interactive/__init__.py new file mode 100644 index 0000000..18e31e4 --- /dev/null +++ b/src/interactive/__init__.py @@ -0,0 +1,6 @@ +""" +NeuriCo Interactive Mode + +LLM-driven manager that orchestrates research agents dynamically, +engaging the human researcher at critical decision points. +""" diff --git a/src/interactive/llm_backend.py b/src/interactive/llm_backend.py new file mode 100644 index 0000000..d4ed1de --- /dev/null +++ b/src/interactive/llm_backend.py @@ -0,0 +1,384 @@ +""" +LLM Backend Abstraction + +Provides a unified interface for calling LLMs, whether via CLI (claude -p) +or API (Anthropic SDK / OpenRouter). The backend is configured by the user +in config/manager.yaml or .env. +""" + +from pathlib import Path +from typing import Dict, Any, List, Optional +from dataclasses import dataclass, field +import json +import os +import shlex +import subprocess + + +@dataclass +class ToolCall: + """A tool call parsed from the LLM response.""" + id: str + name: str + arguments: Dict[str, Any] + + +@dataclass +class LLMResponse: + """Parsed response from the LLM.""" + text: str + tool_calls: List[ToolCall] = field(default_factory=list) + raw: Any = None + + +class LLMBackend: + """ + Unified LLM interface. Calls the configured backend and returns + parsed responses with tool calls. + """ + + def __init__(self, backend: str = "cli", model: Optional[str] = None): + """ + Args: + backend: "cli", "anthropic_api", or "openrouter" + model: Model name override (None = default for backend) + """ + self.backend = backend + self.model = model + + def send(self, messages: List[Dict[str, Any]], + tools: Optional[List[Dict[str, Any]]] = None) -> LLMResponse: + """ + Send messages to the LLM and return the response. + + Args: + messages: Conversation messages in OpenAI-style format + [{"role": "system", "content": "..."}, {"role": "user", "content": "..."}, ...] + tools: Optional tool definitions (for API backends with native tool support) + + Returns: + LLMResponse with text content and any tool calls + """ + if self.backend == "cli": + return self._send_cli(messages, tools) + elif self.backend == "anthropic_api": + return self._send_anthropic_api(messages, tools) + elif self.backend == "openrouter": + return self._send_openrouter(messages, tools) + else: + raise ValueError(f"Unknown backend: {self.backend}") + + def _send_cli(self, messages: List[Dict[str, Any]], + tools: Optional[List[Dict[str, Any]]] = None) -> LLMResponse: + """ + Send via `claude -p` CLI. Constructs a single prompt from all messages + and parses the streaming JSON response for tool_use blocks. + """ + # Build prompt from messages + prompt = self._messages_to_prompt(messages, tools) + + # Build command + cmd = "claude -p --verbose --output-format stream-json" + if self.model: + cmd += f" --model {self.model}" + + process = subprocess.Popen( + shlex.split(cmd), + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + bufsize=1 + ) + + stdout, stderr = process.communicate(input=prompt) + + if process.returncode != 0: + # Try to extract useful error info + error_msg = stderr.strip() if stderr else f"claude -p exited with code {process.returncode}" + raise RuntimeError(f"CLI backend error: {error_msg}") + + return self._parse_cli_response(stdout) + + def _messages_to_prompt(self, messages: List[Dict[str, Any]], + tools: Optional[List[Dict[str, Any]]] = None) -> str: + """ + Convert structured messages into a single text prompt for CLI mode. + Includes tool definitions in the prompt text. + """ + parts = [] + + for msg in messages: + role = msg["role"] + content = msg.get("content", "") + + if role == "system": + parts.append(content) + elif role == "user": + parts.append(f"\n\n{content}\n") + elif role == "assistant": + parts.append(f"\n\n{content}\n") + elif role == "tool_result": + tool_call_id = msg.get("tool_call_id", "") + parts.append(f"\n\n{content}\n") + + # Append tool definitions if provided + if tools: + parts.append("\n") + for tool in tools: + parts.append(f"\n") + parts.append(f"Description: {tool.get('description', '')}") + if 'parameters' in tool: + parts.append(f"Parameters: {json.dumps(tool['parameters'], indent=2)}") + parts.append("") + parts.append("\n") + + parts.append( + "\n\nTo use a tool, respond with a block like this:" + '\n' + "\n{\"param1\": \"value1\", \"param2\": \"value2\"}" + "\n" + "\n\nYou can include text before or after tool calls. " + "You can make multiple tool calls in one response." + ) + + return "\n".join(parts) + + def _parse_cli_response(self, stdout: str) -> LLMResponse: + """ + Parse the streaming JSON output from `claude -p --output-format stream-json`. + Extracts text content and tool_use blocks. + """ + text_parts = [] + tool_calls = [] + raw_events = [] + + for line in stdout.strip().split('\n'): + line = line.strip() + if not line: + continue + + try: + event = json.loads(line) + raw_events.append(event) + except json.JSONDecodeError: + # Non-JSON output โ€” treat as text + text_parts.append(line) + continue + + event_type = event.get("type", "") + + # Handle different streaming event types + if event_type == "assistant" and "message" in event: + # Final assistant message with content blocks + for block in event.get("message", {}).get("content", []): + if block.get("type") == "text": + text_parts.append(block["text"]) + elif block.get("type") == "tool_use": + tool_calls.append(ToolCall( + id=block.get("id", ""), + name=block["name"], + arguments=block.get("input", {}) + )) + + elif event_type == "content_block_delta": + delta = event.get("delta", {}) + if delta.get("type") == "text_delta": + text_parts.append(delta.get("text", "")) + + elif event_type == "result": + # Claude Code result format + result_text = event.get("result", "") + if result_text and not text_parts: + text_parts.append(result_text) + + # Also try parsing text for tool_call XML blocks (fallback for CLI mode) + full_text = "".join(text_parts) + if "]*>.*?', '', full_text, flags=re.DOTALL).strip() + + return LLMResponse( + text=full_text, + tool_calls=tool_calls, + raw=raw_events + ) + + def _parse_xml_tool_calls(self, text: str) -> List[ToolCall]: + """Parse XML blocks from text output.""" + import re + tool_calls = [] + pattern = r'\s*(.*?)\s*' + for match in re.finditer(pattern, text, re.DOTALL): + name = match.group(1) + args_str = match.group(2).strip() + try: + arguments = json.loads(args_str) + except json.JSONDecodeError: + arguments = {"raw": args_str} + tool_calls.append(ToolCall( + id=f"call_{name}_{len(tool_calls)}", + name=name, + arguments=arguments + )) + return tool_calls + + def _send_anthropic_api(self, messages: List[Dict[str, Any]], + tools: Optional[List[Dict[str, Any]]] = None) -> LLMResponse: + """Send via Anthropic Python SDK. Requires ANTHROPIC_API_KEY.""" + try: + import anthropic + except ImportError: + raise ImportError( + "anthropic package required for API backend. " + "Install with: pip install anthropic" + ) + + api_key = os.environ.get("ANTHROPIC_API_KEY") + if not api_key: + raise ValueError("ANTHROPIC_API_KEY environment variable required for anthropic_api backend") + + client = anthropic.Anthropic(api_key=api_key) + + # Separate system message from conversation + system_msg = "" + api_messages = [] + for msg in messages: + if msg["role"] == "system": + system_msg = msg["content"] + elif msg["role"] == "tool_result": + api_messages.append({ + "role": "user", + "content": [{"type": "tool_result", + "tool_use_id": msg.get("tool_call_id", ""), + "content": msg["content"]}] + }) + else: + api_messages.append({"role": msg["role"], "content": msg["content"]}) + + # Build API tool definitions + api_tools = None + if tools: + api_tools = [] + for tool in tools: + api_tools.append({ + "name": tool["name"], + "description": tool.get("description", ""), + "input_schema": tool.get("parameters", {"type": "object", "properties": {}}) + }) + + model = self.model or "claude-sonnet-4-20250514" + + kwargs = { + "model": model, + "max_tokens": 4096, + "messages": api_messages, + } + if system_msg: + kwargs["system"] = system_msg + if api_tools: + kwargs["tools"] = api_tools + + response = client.messages.create(**kwargs) + + # Parse response + text_parts = [] + tool_calls = [] + for block in response.content: + if block.type == "text": + text_parts.append(block.text) + elif block.type == "tool_use": + tool_calls.append(ToolCall( + id=block.id, + name=block.name, + arguments=block.input + )) + + return LLMResponse( + text="\n".join(text_parts), + tool_calls=tool_calls, + raw=response + ) + + def _send_openrouter(self, messages: List[Dict[str, Any]], + tools: Optional[List[Dict[str, Any]]] = None) -> LLMResponse: + """Send via OpenRouter API. Requires OPENROUTER_API_KEY.""" + try: + import httpx + except ImportError: + raise ImportError( + "httpx package required for OpenRouter backend. " + "Install with: pip install httpx" + ) + + api_key = os.environ.get("OPENROUTER_API_KEY") + if not api_key: + raise ValueError("OPENROUTER_API_KEY environment variable required for openrouter backend") + + model = self.model or "anthropic/claude-sonnet-4" + + payload = { + "model": model, + "messages": [{"role": m["role"], "content": m["content"]} for m in messages], + "max_tokens": 4096, + } + + if tools: + payload["tools"] = [{ + "type": "function", + "function": { + "name": t["name"], + "description": t.get("description", ""), + "parameters": t.get("parameters", {}) + } + } for t in tools] + + response = httpx.post( + "https://openrouter.ai/api/v1/chat/completions", + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + json=payload, + timeout=120 + ) + response.raise_for_status() + data = response.json() + + # Parse OpenAI-compatible response + choice = data["choices"][0]["message"] + text = choice.get("content", "") or "" + tool_calls = [] + + for tc in choice.get("tool_calls", []): + func = tc.get("function", {}) + args = func.get("arguments", "{}") + if isinstance(args, str): + try: + args = json.loads(args) + except json.JSONDecodeError: + args = {"raw": args} + tool_calls.append(ToolCall( + id=tc.get("id", ""), + name=func.get("name", ""), + arguments=args + )) + + return LLMResponse(text=text, tool_calls=tool_calls, raw=data) + + +def create_backend(config: Dict[str, Any]) -> LLMBackend: + """ + Create an LLM backend from configuration. + + Config can come from config/manager.yaml or environment variables. + Environment variables take precedence. + """ + backend = os.environ.get("NEURICO_MANAGER_BACKEND", + config.get("manager", {}).get("llm_backend", "cli")) + model = os.environ.get("NEURICO_MANAGER_MODEL", + config.get("manager", {}).get("llm_model")) or None + + return LLMBackend(backend=backend, model=model) diff --git a/src/interactive/manager.py b/src/interactive/manager.py new file mode 100644 index 0000000..fe5f113 --- /dev/null +++ b/src/interactive/manager.py @@ -0,0 +1,437 @@ +""" +NeuriCo Interactive Manager + +The main agent loop for interactive mode. Orchestrates research agents +dynamically using LLM reasoning, engages the human at critical points, +and maintains session state across interactions. + +Usage: + ./neurico interactive [--provider claude] [--engagement balanced] + +Or directly: + NEURICO_PROJECT_ROOT=/path/to/NeuriCo python src/interactive/manager.py +""" + +import argparse +import json +import os +import signal +import sys +import time +from pathlib import Path +from typing import Dict, Any, List, Optional + +import yaml + +# Resolve project root and add to path +PROJECT_ROOT = Path(os.environ.get("NEURICO_PROJECT_ROOT", Path(__file__).parent.parent.parent)) +sys.path.insert(0, str(PROJECT_ROOT / "src")) + +from interactive.session_state import SessionState +from interactive.llm_backend import LLMBackend, LLMResponse, create_backend +from interactive.tools import ToolExecutor + + +def load_config() -> Dict[str, Any]: + """Load manager configuration from config/manager.yaml.""" + config_file = PROJECT_ROOT / "config" / "manager.yaml" + if config_file.exists(): + with open(config_file) as f: + return yaml.safe_load(f) or {} + return {} + + +def load_tool_definitions() -> List[Dict[str, Any]]: + """Load tool definitions from templates/manager/tools.yaml.""" + tools_file = PROJECT_ROOT / "templates" / "manager" / "tools.yaml" + if not tools_file.exists(): + raise FileNotFoundError(f"Tool definitions not found: {tools_file}") + + with open(tools_file) as f: + data = yaml.safe_load(f) + + return data.get("tools", []) + + +def load_system_prompt(idea: Dict[str, Any], workspace: Path, + provider: str, config: Dict[str, Any]) -> str: + """Load and render the system prompt template.""" + prompt_file = PROJECT_ROOT / "templates" / "manager" / "system_prompt.txt" + if not prompt_file.exists(): + raise FileNotFoundError(f"System prompt not found: {prompt_file}") + + template = prompt_file.read_text() + + # Build engagement instructions based on config + engagement = config.get("manager", {}).get("engagement", "balanced") + engagement_map = { + "hands_off": ( + "You are in HANDS-OFF mode. Only engage the user for critical decisions: " + "major direction changes, complete failures, or when explicitly asked. " + "Proceed autonomously for routine decisions." + ), + "balanced": ( + "You are in BALANCED mode. Engage the user at stage transitions and " + "significant choices. Proceed autonomously for routine decisions." + ), + "hands_on": ( + "You are in HANDS-ON mode. Frequently check in with the user. " + "Present options at every meaningful decision point. " + "The user wants to be closely involved in the research process." + ), + } + engagement_instructions = engagement_map.get(engagement, engagement_map["balanced"]) + + idea_content = idea.get("idea", {}) + + # Simple template rendering (avoid Jinja2 dependency on host) + rendered = template.replace("{{ idea_title }}", idea_content.get("title", "Unknown")) + rendered = rendered.replace("{{ hypothesis }}", idea_content.get("hypothesis", "Not specified")) + rendered = rendered.replace("{{ domain }}", idea_content.get("domain", "general")) + rendered = rendered.replace("{{ workspace_path }}", str(workspace)) + rendered = rendered.replace("{{ provider }}", provider) + rendered = rendered.replace("{{ engagement_instructions }}", engagement_instructions) + + return rendered + + +def find_idea(idea_id: str) -> Optional[Dict[str, Any]]: + """Find an idea by ID across submitted/in_progress/completed folders.""" + ideas_dir = PROJECT_ROOT / "ideas" + for folder in ["submitted", "in_progress", "completed"]: + folder_path = ideas_dir / folder + if not folder_path.exists(): + continue + for yaml_file in folder_path.glob("*.yaml"): + with open(yaml_file) as f: + idea = yaml.safe_load(f) + if idea and idea.get("idea", {}).get("metadata", {}).get("idea_id") == idea_id: + return idea, yaml_file + return None, None + + +def find_workspace(idea_id: str) -> Optional[Path]: + """Find the workspace directory for an idea.""" + workspace_base = Path(os.environ.get("NEURICO_WORKSPACE_DIR", + PROJECT_ROOT / "workspaces")) + if not workspace_base.exists(): + return None + + # Workspace dirs are named: {slug}_{provider}_{timestamp} or {slug}_{hash}_{provider} + # Just find any directory containing the idea_id slug + slug = idea_id.replace("-", "_").lower() + + candidates = [] + for d in workspace_base.iterdir(): + if d.is_dir() and slug in d.name.lower().replace("-", "_"): + candidates.append(d) + + if candidates: + # Return the most recently modified one + return max(candidates, key=lambda d: d.stat().st_mtime) + + # If no match, create a new workspace + workspace = workspace_base / f"{idea_id}_interactive" + workspace.mkdir(parents=True, exist_ok=True) + return workspace + + +class InteractiveManager: + """ + The main interactive manager agent loop. + + Implements a tool-use agent loop: + 1. Build messages from system prompt + conversation history + 2. Send to LLM + 3. Parse response for tool calls + 4. Execute tools, collect results + 5. If no tool calls, display to user and wait for input + 6. Repeat + """ + + def __init__(self, idea: Dict[str, Any], idea_file: Path, + workspace: Path, provider: str, config: Dict[str, Any]): + self.idea = idea + self.idea_file = idea_file + self.workspace = workspace + self.provider = provider + self.config = config + + idea_content = idea.get("idea", {}) + idea_id = idea_content.get("metadata", {}).get("idea_id", "unknown") + idea_title = idea_content.get("title", "Unknown") + + # Initialize components + self.session = SessionState(workspace, idea_id, idea_title, provider) + self.backend = create_backend(config) + self.tools = ToolExecutor(workspace, self.session, idea_file, provider, PROJECT_ROOT) + self.tool_definitions = load_tool_definitions() + self.system_prompt = load_system_prompt(idea, workspace, provider, config) + + # Conversation history (in-memory, backed by session) + self.messages: List[Dict[str, Any]] = [] + + # Polling config + manager_config = config.get("manager", {}) + self.poll_interval = manager_config.get("poll_interval", 60) + self.engagement_interval = manager_config.get("engagement_interval", 1800) + + self._shutdown = False + + def run(self): + """Main agent loop.""" + # Setup signal handler for graceful shutdown + signal.signal(signal.SIGINT, self._handle_sigint) + + print() + print("=" * 70) + print(" NeuriCo Interactive Mode") + print("=" * 70) + idea_content = self.idea.get("idea", {}) + print(f" Idea: {idea_content.get('title', 'Unknown')}") + print(f" Provider: {self.provider}") + print(f" Workspace: {self.workspace}") + print(f" Backend: {self.backend.backend}") + if self.session.is_resuming: + print(f" Resuming previous session: {self.session.session_id}") + print("=" * 70) + print() + + # Build initial messages + self.messages = [{"role": "system", "content": self.system_prompt}] + + # If resuming, add resume context + if self.session.is_resuming: + resume_ctx = self.session.get_resume_context() + # Load recent conversation history + history = self.session.load_conversation(max_messages=20) + for msg in history: + self.messages.append(msg) + self.messages.append({ + "role": "user", + "content": f"[Session resumed]\n{resume_ctx}\n\nPlease review the state and continue." + }) + else: + # First turn: present the idea + idea_yaml = yaml.dump(idea_content, default_flow_style=False) + self.messages.append({ + "role": "user", + "content": ( + f"Here is the research idea to work on:\n\n```yaml\n{idea_yaml}```\n\n" + "Please analyze this idea and propose how to proceed. " + "What should we investigate first?" + ) + }) + + # Agent loop + while not self._shutdown: + try: + self._agent_step() + except KeyboardInterrupt: + self._handle_sigint(None, None) + except Exception as e: + print(f"\n[Manager Error] {e}") + print("The session state has been saved. You can resume with the same command.") + break + + def _agent_step(self): + """Execute one step of the agent loop.""" + # Call LLM + print("\n[Manager thinking...]") + response = self.backend.send(self.messages, self.tool_definitions) + + # Handle tool calls + if response.tool_calls: + # Show what the manager is doing + assistant_msg = {"role": "assistant", "content": response.text, "tool_calls": [ + {"id": tc.id, "name": tc.name, "arguments": tc.arguments} + for tc in response.tool_calls + ]} + self.messages.append(assistant_msg) + self.session.append_message(assistant_msg) + + if response.text: + print(f"\n{response.text}") + + for tc in response.tool_calls: + print(f"\n[Executing: {tc.name}({json.dumps(tc.arguments, indent=2)})]") + + result = self.tools.execute(tc.name, tc.arguments) + + # If running an agent, enter polling mode + if tc.name == "run_agent" and self.tools.has_running_agents: + result = self._wait_for_agent_with_polling(result, tc.arguments.get("agent", "")) + + # Add tool result to conversation + tool_result_msg = { + "role": "tool_result", + "tool_call_id": tc.id, + "content": result + } + self.messages.append(tool_result_msg) + self.session.append_message(tool_result_msg) + + else: + # No tool calls โ€” display to user and wait for input + assistant_msg = {"role": "assistant", "content": response.text} + self.messages.append(assistant_msg) + self.session.append_message(assistant_msg) + + print(f"\n{response.text}") + + # Wait for user input + print() + try: + user_input = input("[You] ").strip() + except EOFError: + self._shutdown = True + return + + if not user_input: + return + + if user_input.lower() in ("exit", "quit", "done"): + self._handle_exit() + return + + user_msg = {"role": "user", "content": user_input} + self.messages.append(user_msg) + self.session.append_message(user_msg) + + def _wait_for_agent_with_polling(self, initial_result: str, agent_name: str) -> str: + """ + Wait for a running agent to complete, polling periodically. + User can interrupt with Ctrl+C to interact. + """ + print(f"\n[Agent '{agent_name}' running. Press Ctrl+C to interact while it runs.]") + + last_engagement = time.time() + final_result = initial_result + + while self.tools.has_running_agents and not self._shutdown: + try: + time.sleep(self.poll_interval) + except KeyboardInterrupt: + # User wants to interact โ€” return control to the agent loop + print("\n[Pausing to interact. The agent is still running in the background.]") + return initial_result + "\n[Agent still running. User requested interaction.]" + + # Check for completed agents + completed = self.tools.check_running_agents() + if completed: + results = [] + for c in completed: + status = "successfully" if c["success"] else f"with exit code {c['exit_code']}" + results.append(f"Agent run {c['run_id']} completed {status}.") + final_result = initial_result + "\n" + "\n".join(results) + break + + # Periodic engagement + elapsed = time.time() - last_engagement + if elapsed >= self.engagement_interval: + print(f"\n[Agent still running ({int(elapsed/60)} min)...]") + last_engagement = time.time() + + return final_result + + def _handle_sigint(self, signum, frame): + """Handle Ctrl+C gracefully.""" + if self.tools.has_running_agents: + print("\n\n[Ctrl+C detected. Agents are still running in Docker.]") + print("[Type 'exit' to stop, or press Enter to continue interacting.]") + else: + print("\n\n[Ctrl+C detected. Saving session...]") + self._handle_exit() + + def _handle_exit(self): + """Save session and exit.""" + self._shutdown = True + print("\n[Saving session state...]") + # Session is auto-saved on each state change + print(f"[Session saved to {self.session.session_file}]") + print("[You can resume with: ./neurico interactive ]") + + +def main(): + parser = argparse.ArgumentParser( + description="NeuriCo Interactive Research Manager" + ) + parser.add_argument( + "idea_id", + help="ID of the research idea" + ) + parser.add_argument( + "--provider", + default=None, + choices=["claude", "codex", "gemini"], + help="AI provider for research agents (default: from config)" + ) + parser.add_argument( + "--engagement", + default=None, + choices=["hands_off", "balanced", "hands_on"], + help="Engagement level (default: from config)" + ) + parser.add_argument( + "--backend", + default=None, + choices=["cli", "anthropic_api", "openrouter"], + help="LLM backend for manager reasoning (default: from config)" + ) + + args = parser.parse_args() + + # Load config + config = load_config() + manager_config = config.setdefault("manager", {}) + + # Apply CLI overrides + if args.provider: + manager_config["default_provider"] = args.provider + if args.engagement: + manager_config["engagement"] = args.engagement + if args.backend: + manager_config["llm_backend"] = args.backend + os.environ["NEURICO_MANAGER_BACKEND"] = args.backend + + provider = manager_config.get("default_provider", "claude") + + # Find idea + idea, idea_file = find_idea(args.idea_id) + if idea is None: + print(f"Error: Idea '{args.idea_id}' not found in ideas/submitted/, ideas/in_progress/, or ideas/completed/") + print("Submit an idea first with: ./neurico submit ") + sys.exit(1) + + # Find or create workspace + workspace = find_workspace(args.idea_id) + if workspace is None: + print(f"Error: Could not find or create workspace for idea '{args.idea_id}'") + sys.exit(1) + + # Move idea to in_progress if it's in submitted + idea_status = idea.get("idea", {}).get("metadata", {}).get("status", "submitted") + if idea_status == "submitted": + # Import idea_manager for state transition + try: + sys.path.insert(0, str(PROJECT_ROOT / "src")) + from core.idea_manager import IdeaManager + im = IdeaManager(PROJECT_ROOT / "ideas") + im.update_status(args.idea_id, "in_progress") + print(f"[Idea moved to in_progress]") + except Exception as e: + print(f"[Warning: Could not update idea status: {e}]") + + # Launch manager + manager = InteractiveManager( + idea=idea, + idea_file=idea_file, + workspace=workspace, + provider=provider, + config=config + ) + manager.run() + + +if __name__ == "__main__": + main() diff --git a/src/interactive/session_state.py b/src/interactive/session_state.py new file mode 100644 index 0000000..651818f --- /dev/null +++ b/src/interactive/session_state.py @@ -0,0 +1,184 @@ +""" +Session State Management + +Handles persistent session state and conversation history for the +interactive manager. Enables resume, error recovery, and context compaction. +""" + +from pathlib import Path +from typing import Dict, Any, List, Optional +import json +import uuid +from datetime import datetime + + +class SessionState: + """ + Manages persistent session state stored in .neurico/manager_session.json + and conversation history in .neurico/manager_conversation.jsonl. + """ + + def __init__(self, work_dir: Path, idea_id: str, idea_title: str, provider: str): + self.work_dir = Path(work_dir) + self.neurico_dir = self.work_dir / ".neurico" + self.neurico_dir.mkdir(parents=True, exist_ok=True) + + self.session_file = self.neurico_dir / "manager_session.json" + self.conversation_file = self.neurico_dir / "manager_conversation.jsonl" + + # Load or create session + if self.session_file.exists(): + with open(self.session_file) as f: + self.state = json.load(f) + else: + self.state = { + "session_id": str(uuid.uuid4()), + "started_at": datetime.now().isoformat(), + "idea_id": idea_id, + "idea_title": idea_title, + "provider": provider, + "status": "active", + "agents_run": [], + "conversation_summary": "", + "key_findings": [], + "open_questions": [], + "phase": "starting", + "user_preferences": {} + } + self._save_state() + + def _save_state(self): + """Write session state to disk.""" + with open(self.session_file, 'w') as f: + json.dump(self.state, f, indent=2) + + @property + def session_id(self) -> str: + return self.state["session_id"] + + @property + def is_resuming(self) -> bool: + """True if this session was loaded from an existing file.""" + return len(self.state.get("agents_run", [])) > 0 + + def record_agent_start(self, agent_name: str, run_id: str): + """Record that an agent has been started.""" + self.state["agents_run"].append({ + "agent": agent_name, + "run_id": run_id, + "started": datetime.now().isoformat(), + "completed": None, + "success": None, + "exit_code": None + }) + self._save_state() + + def record_agent_complete(self, run_id: str, success: bool, exit_code: Optional[int] = None): + """Record that an agent has completed.""" + for entry in self.state["agents_run"]: + if entry["run_id"] == run_id: + entry["completed"] = datetime.now().isoformat() + entry["success"] = success + entry["exit_code"] = exit_code + break + self._save_state() + + def update_findings(self, key_findings: Optional[List[str]] = None, + open_questions: Optional[List[str]] = None, + phase: Optional[str] = None): + """Update session findings and questions.""" + if key_findings: + # Append new findings (deduplicate) + existing = set(self.state["key_findings"]) + for f in key_findings: + if f not in existing: + self.state["key_findings"].append(f) + if open_questions is not None: + self.state["open_questions"] = open_questions + if phase: + self.state["phase"] = phase + self._save_state() + + def update_conversation_summary(self, summary: str): + """Update the compacted conversation summary.""" + self.state["conversation_summary"] = summary + self._save_state() + + def mark_completed(self): + """Mark the session as completed.""" + self.state["status"] = "completed" + self.state["completed_at"] = datetime.now().isoformat() + self._save_state() + + # --- Conversation history --- + + def append_message(self, message: Dict[str, Any]): + """Append a message to the conversation history.""" + with open(self.conversation_file, 'a') as f: + f.write(json.dumps(message) + '\n') + + def load_conversation(self, max_messages: Optional[int] = None) -> List[Dict[str, Any]]: + """Load conversation history from disk.""" + if not self.conversation_file.exists(): + return [] + + messages = [] + with open(self.conversation_file) as f: + for line in f: + line = line.strip() + if line: + messages.append(json.loads(line)) + + if max_messages and len(messages) > max_messages: + return messages[-max_messages:] + return messages + + def rewrite_conversation(self, messages: List[Dict[str, Any]]): + """Rewrite the conversation history (used during compaction).""" + with open(self.conversation_file, 'w') as f: + for msg in messages: + f.write(json.dumps(msg) + '\n') + + def get_resume_context(self) -> str: + """Build a context string for resuming a session.""" + summary = self.state.get("conversation_summary", "") + findings = self.state.get("key_findings", []) + questions = self.state.get("open_questions", []) + agents = self.state.get("agents_run", []) + phase = self.state.get("phase", "unknown") + + parts = [f"Resuming session (phase: {phase})."] + + if summary: + parts.append(f"\nPrevious session summary:\n{summary}") + + if findings: + parts.append("\nKey findings so far:") + for f in findings: + parts.append(f"- {f}") + + if questions: + parts.append("\nOpen questions:") + for q in questions: + parts.append(f"- {q}") + + if agents: + parts.append("\nAgents run in this session:") + for a in agents: + status = "completed" if a.get("success") else ("failed" if a.get("success") is False else "in progress") + parts.append(f"- {a['agent']} ({a['run_id']}): {status}") + + return "\n".join(parts) + + def generate_run_id(self, agent_name: str) -> str: + """Generate a unique run_id for an agent invocation.""" + # Count existing runs of this agent type + prefix_map = { + 'resource_finder': 'rf', + 'experiment_runner': 'er', + 'paper_writer': 'pw', + 'comment_handler': 'ch' + } + prefix = prefix_map.get(agent_name, agent_name[:2]) + count = sum(1 for a in self.state["agents_run"] if a["agent"] == agent_name) + return f"{prefix}_{count + 1:03d}" diff --git a/src/interactive/tools.py b/src/interactive/tools.py new file mode 100644 index 0000000..3dd7764 --- /dev/null +++ b/src/interactive/tools.py @@ -0,0 +1,332 @@ +""" +Tool Implementations for the Interactive Manager + +Each tool corresponds to an action the manager LLM can take. +Tools are executed by the manager's agent loop when the LLM +returns a tool call. +""" + +from pathlib import Path +from typing import Dict, Any, Optional, List +import json +import os +import subprocess +import shlex +import time +from datetime import datetime + +from interactive.session_state import SessionState + + +class ToolExecutor: + """ + Executes tools called by the manager LLM. + + Holds references to the workspace, session state, and Docker bridge + so that individual tool implementations can access them. + """ + + def __init__(self, work_dir: Path, session: SessionState, + idea_file: Path, provider: str, project_root: Path): + self.work_dir = Path(work_dir) + self.session = session + self.idea_file = idea_file + self.provider = provider + self.project_root = project_root + + # Track running agent processes + self._running_agents: Dict[str, subprocess.Popen] = {} + + def execute(self, tool_name: str, arguments: Dict[str, Any]) -> str: + """ + Execute a tool and return the result as a string. + + Args: + tool_name: Name of the tool to execute + arguments: Tool arguments from the LLM + + Returns: + Result string to feed back to the LLM + """ + handlers = { + "run_agent": self._run_agent, + "check_workspace": self._check_workspace, + "read_agent_logs": self._read_agent_logs, + "ask_user": self._ask_user, + "update_session": self._update_session, + } + + handler = handlers.get(tool_name) + if not handler: + return f"Error: Unknown tool '{tool_name}'. Available: {list(handlers.keys())}" + + try: + return handler(arguments) + except Exception as e: + return f"Error executing {tool_name}: {e}" + + def _run_agent(self, args: Dict[str, Any]) -> str: + """Launch a research agent inside Docker.""" + agent_name = args.get("agent") + if not agent_name: + return "Error: 'agent' parameter is required" + + valid_agents = ["resource_finder", "experiment_runner", "paper_writer", "comment_handler"] + if agent_name not in valid_agents: + return f"Error: Unknown agent '{agent_name}'. Choose from: {valid_agents}" + + provider = args.get("provider", self.provider) + run_id = self.session.generate_run_id(agent_name) + + # Build the Docker command via ./neurico _run-agent + neurico_cmd = str(self.project_root / "neurico") + cmd_parts = [ + neurico_cmd, "_run-agent", agent_name, + "--workspace", str(self.work_dir), + "--provider", provider, + "--run-id", run_id, + "--idea-file", str(self.idea_file), + ] + + # Agent-specific args + if agent_name == "paper_writer" and args.get("paper_style"): + cmd_parts.extend(["--paper-style", args["paper_style"]]) + if agent_name == "experiment_runner" and args.get("use_scribe"): + cmd_parts.append("--use-scribe") + + # Record in session + self.session.record_agent_start(agent_name, run_id) + + # Launch as background subprocess + log_path = self.work_dir / ".neurico" / "runs" / run_id / "manager_stdout.log" + log_path.parent.mkdir(parents=True, exist_ok=True) + + with open(log_path, 'w') as log_f: + process = subprocess.Popen( + cmd_parts, + stdout=log_f, + stderr=subprocess.STDOUT, + text=True + ) + + self._running_agents[run_id] = process + + return ( + f"Agent '{agent_name}' started with run_id '{run_id}' (pid: {process.pid}).\n" + f"Use read_agent_logs with run_id='{run_id}' to check progress.\n" + f"Use check_workspace to inspect outputs when complete." + ) + + def _check_workspace(self, args: Dict[str, Any]) -> str: + """Read files from the workspace.""" + action = args.get("action", "list") + rel_path = args.get("path", ".") + max_lines = args.get("max_lines", 200) + + target = self.work_dir / rel_path + + if not target.exists(): + return f"Path does not exist: {rel_path}" + + # Security: ensure we stay within the workspace + try: + target.resolve().relative_to(self.work_dir.resolve()) + except ValueError: + return f"Error: Path '{rel_path}' is outside the workspace" + + if action == "list": + if target.is_file(): + return f"{rel_path} is a file ({target.stat().st_size} bytes)" + + items = [] + for item in sorted(target.iterdir()): + if item.is_dir(): + # Count items in directory + try: + count = sum(1 for _ in item.iterdir()) + except PermissionError: + count = "?" + items.append(f" {item.name}/ ({count} items)") + else: + size = item.stat().st_size + items.append(f" {item.name} ({size} bytes)") + + if not items: + return f"Directory '{rel_path}' is empty" + + return f"Contents of {rel_path}:\n" + "\n".join(items) + + elif action == "read": + if target.is_dir(): + return f"Error: '{rel_path}' is a directory. Use action='list' instead." + + try: + lines = target.read_text(encoding='utf-8', errors='replace').split('\n') + except Exception as e: + return f"Error reading {rel_path}: {e}" + + if len(lines) > max_lines: + content = '\n'.join(lines[:max_lines]) + return f"[Showing first {max_lines} of {len(lines)} lines]\n{content}\n[... truncated]" + + return '\n'.join(lines) + + else: + return f"Error: Unknown action '{action}'. Use 'list' or 'read'." + + def _read_agent_logs(self, args: Dict[str, Any]) -> str: + """Read logs and status for an agent run.""" + run_id = args.get("run_id") + if not run_id: + return "Error: 'run_id' parameter is required" + + tail_lines = args.get("tail_lines", 100) + run_dir = self.work_dir / ".neurico" / "runs" / run_id + + if not run_dir.exists(): + return f"No run found with id '{run_id}'" + + parts = [] + + # Check process status + process = self._running_agents.get(run_id) + if process: + poll_result = process.poll() + if poll_result is None: + parts.append(f"Status: RUNNING (pid: {process.pid})") + else: + parts.append(f"Status: EXITED (code: {poll_result})") + # Update session + self.session.record_agent_complete(run_id, poll_result == 0, poll_result) + del self._running_agents[run_id] + else: + # Check status.json + status_file = run_dir / "status.json" + if status_file.exists(): + with open(status_file) as f: + status = json.load(f) + parts.append(f"Status: {status.get('status', 'unknown').upper()}") + if status.get("exit_code") is not None: + parts.append(f"Exit code: {status['exit_code']}") + else: + parts.append("Status: UNKNOWN (no status file)") + + # Check for result or error files + result_file = run_dir / "result.json" + error_file = run_dir / "error.json" + + if result_file.exists(): + with open(result_file) as f: + result = json.load(f) + parts.append(f"\nResult: {json.dumps(result, indent=2)}") + + if error_file.exists(): + with open(error_file) as f: + error = json.load(f) + parts.append(f"\nError: {error.get('error', 'Unknown error')}") + if error.get("traceback"): + parts.append(f"Traceback:\n{error['traceback']}") + + # Read log tail + # Try the agent's actual log first, then the manager stdout capture + log_candidates = [ + run_dir / "manager_stdout.log", + ] + # Also check the workspace logs directory for agent-specific logs + for log_file in self.work_dir.glob("logs/*.log"): + log_candidates.append(log_file) + + for log_file in log_candidates: + if log_file.exists() and log_file.stat().st_size > 0: + try: + lines = log_file.read_text(errors='replace').split('\n') + tail = lines[-tail_lines:] if len(lines) > tail_lines else lines + parts.append(f"\nLog ({log_file.name}, last {len(tail)} lines):") + parts.append('\n'.join(tail)) + break # Only show one log + except Exception: + continue + + return '\n'.join(parts) + + def _ask_user(self, args: Dict[str, Any]) -> str: + """Present a message to the user and collect their response.""" + message = args.get("message", "") + options = args.get("options", []) + + print() + print("=" * 70) + print(message) + print("=" * 70) + + if options: + print() + for i, opt in enumerate(options, 1): + print(f" [{i}] {opt}") + print() + + while True: + response = input("Your choice (number or type your response): ").strip() + + # Check if they entered a number + try: + idx = int(response) - 1 + if 0 <= idx < len(options): + response = options[idx] + break + except ValueError: + pass + + # Accept free-form text too + if response: + break + print("Please enter a response.") + else: + print() + response = input("Your response: ").strip() + + return response + + def _update_session(self, args: Dict[str, Any]) -> str: + """Update session state.""" + key_findings = args.get("key_findings") + open_questions = args.get("open_questions") + phase = args.get("phase") + + self.session.update_findings( + key_findings=key_findings, + open_questions=open_questions, + phase=phase + ) + + updates = [] + if key_findings: + updates.append(f"Added {len(key_findings)} key finding(s)") + if open_questions is not None: + updates.append(f"Updated open questions ({len(open_questions)} items)") + if phase: + updates.append(f"Phase set to '{phase}'") + + return "Session updated: " + ", ".join(updates) if updates else "No changes" + + def check_running_agents(self) -> List[Dict[str, Any]]: + """Check status of all running agents. Returns list of completed ones.""" + completed = [] + for run_id, process in list(self._running_agents.items()): + poll_result = process.poll() + if poll_result is not None: + self.session.record_agent_complete(run_id, poll_result == 0, poll_result) + completed.append({ + "run_id": run_id, + "exit_code": poll_result, + "success": poll_result == 0 + }) + del self._running_agents[run_id] + return completed + + @property + def has_running_agents(self) -> bool: + """True if any agents are currently running.""" + # Clean up finished ones first + self.check_running_agents() + return len(self._running_agents) > 0 diff --git a/templates/manager/compact_context.txt b/templates/manager/compact_context.txt new file mode 100644 index 0000000..41aff99 --- /dev/null +++ b/templates/manager/compact_context.txt @@ -0,0 +1,19 @@ +You are compacting the conversation history for a NeuriCo interactive research session. + +The conversation has grown too long and needs to be summarized. Create a concise summary that preserves: + +1. What agents have been run and their key results +2. Important decisions made by the human researcher +3. Current research state (what has been done, what remains) +4. Key findings and open questions +5. Any user preferences or instructions expressed during the conversation + +Do NOT include: +- Raw log output or detailed tool results +- Routine back-and-forth that doesn't affect the research direction +- Technical details about Docker or execution mechanics + +Format as a structured summary with sections. Keep it under 500 words. + +## Conversation to summarize: +{{ conversation }} diff --git a/templates/manager/progress_summary.txt b/templates/manager/progress_summary.txt new file mode 100644 index 0000000..e4b0568 --- /dev/null +++ b/templates/manager/progress_summary.txt @@ -0,0 +1,7 @@ +Summarize the following agent log output for a human researcher. Be concise (2-3 sentences). Focus on what the agent is currently doing, any notable progress, and any issues. + +Agent: {{ agent_name }} +Running for: {{ elapsed_time }} + +Recent log output: +{{ log_tail }} diff --git a/templates/manager/system_prompt.txt b/templates/manager/system_prompt.txt new file mode 100644 index 0000000..9e082d2 --- /dev/null +++ b/templates/manager/system_prompt.txt @@ -0,0 +1,73 @@ +You are the NeuriCo Interactive Research Manager. You help a human researcher conduct AI-assisted research by orchestrating specialized research agents and maintaining a productive dialogue. + +## Your Role + +You are a research collaborator, not an autonomous pipeline. You: +- Analyze research ideas and propose investigation strategies +- Invoke specialized agents (resource finder, experiment runner, paper writer) as needed +- Monitor agent progress and summarize findings concisely +- Engage the human at critical decision points +- Adapt the research direction based on results and human feedback + +The human provides domain expertise, judgment, and research taste. You provide execution capability, thoroughness, and synthesis. + +## How You Work + +You have access to tools that let you run research agents inside a Docker container, inspect the workspace, and communicate with the human. The research agents do the heavy lifting (literature search, coding experiments, writing papers). Your job is to orchestrate them intelligently. + +Research is non-linear. You may need to: +- Run the resource finder, review results, then run it again with different focus +- Start experiments, discover issues, go back to find more resources +- Write a paper draft, get feedback, run additional experiments, revise + +Do not follow a rigid pipeline. Adapt to what the research needs. + +## When to Engage the Human + +Use the `ask_user` tool when: +- An agent has completed and you need to decide whether to proceed, iterate, or pivot +- You encounter an unexpected result or failure that could go multiple ways +- The research direction would benefit from the human's domain expertise +- You are about to start a long-running or resource-intensive operation +- You have been running agents for a while without checking in + +Do NOT engage the human for: +- Routine status updates (just note them in your reasoning) +- Decisions that have a clear correct answer +- Low-level technical details they would not care about + +When you do engage, be concise and interesting: +- Lead with the most surprising or important finding +- Provide 2-3 specific options when asking for a decision +- Suggest follow-up questions the human might want to explore +- Never dump raw logs or thousands of lines of text + +## Communication Style + +Keep summaries to 2-4 paragraphs maximum. Be concrete: reference specific papers, numbers, methods, or results. Highlight what is surprising, concerning, or promising. + +If the human asks "what's happening?" during a long agent run, read the latest logs and give a brief status update. + +## Session Awareness + +You are maintaining a research session that may span multiple interactions. The session state is tracked in `.neurico/manager_session.json`. Use the `update_session` tool to record key findings and open questions as you discover them. + +If you are resuming a previous session, review the session state and conversation history to understand where things left off before taking any action. + +## Research Quality + +The goal is to produce work on par with human experts. This means: +- Do not settle for mediocre results. If an experiment fails or produces weak results, investigate why and try harder. +- If the resource finder misses important papers, run it again with refined queries. +- If experiments have bugs, acknowledge them and fix them rather than presenting flawed results. +- Push for rigor: proper baselines, statistical significance, reproducibility. + +{{ engagement_instructions }} + +## Available Information + +Research idea: {{ idea_title }} +Hypothesis: {{ hypothesis }} +Domain: {{ domain }} +Workspace: {{ workspace_path }} +Provider: {{ provider }} diff --git a/templates/manager/tools.yaml b/templates/manager/tools.yaml new file mode 100644 index 0000000..9ec4785 --- /dev/null +++ b/templates/manager/tools.yaml @@ -0,0 +1,119 @@ +# Tool definitions for the NeuriCo Interactive Manager +# These are loaded into the system prompt for the manager LLM +# and also used to generate API tool schemas when using API backends. + +tools: + - name: run_agent + description: | + Launch a research agent inside the Docker container. The agent runs + in the background and you can check its status later. Each invocation + gets a unique run_id for tracking. + parameters: + agent: + type: string + enum: [resource_finder, experiment_runner, paper_writer, comment_handler] + description: Which agent to run + required: true + provider: + type: string + enum: [claude, codex, gemini] + description: AI provider for the agent (default uses session provider) + required: false + paper_style: + type: string + enum: [neurips, icml, acl, ams] + description: Paper style template (only for paper_writer agent) + required: false + use_scribe: + type: boolean + description: Use Jupyter notebook integration (only for experiment_runner) + required: false + returns: | + A run_id string and immediate status. Use check_workspace or + read_agent_logs to monitor progress. The agent result will be + available at .neurico/runs//result.json when complete. + + - name: check_workspace + description: | + Read files from the research workspace to understand current state. + Can list directory contents or read specific files. + parameters: + action: + type: string + enum: [list, read] + description: "list: show directory contents. read: show file content." + required: true + path: + type: string + description: | + Relative path within the workspace. Use "." for root. + Examples: ".", "literature_review.md", "results/", "logs/resource_finder_claude.log" + required: true + max_lines: + type: integer + description: Maximum lines to return when reading a file (default 200) + required: false + returns: Directory listing or file content as text. + + - name: read_agent_logs + description: | + Read recent log output from a running or completed agent. + Useful for understanding what an agent is doing or why it failed. + parameters: + run_id: + type: string + description: The run_id of the agent invocation to check + required: true + tail_lines: + type: integer + description: Number of lines from the end of the log to return (default 100) + required: false + returns: | + Agent status (running/completed/failed) and recent log lines. + If the agent wrote a result.json or error.json, those are included too. + + - name: ask_user + description: | + Present a message, question, or decision to the human researcher + and wait for their response. Use this for critical decision points, + not routine updates. + parameters: + message: + type: string + description: | + The message to show the user. Should be concise (2-4 paragraphs max). + Lead with findings, end with a question or options. + required: true + options: + type: array + items: + type: string + description: | + Optional list of specific options to present. If provided, + the user will be prompted to choose. If not, free-form input. + required: false + returns: The user's response as a string. + + - name: update_session + description: | + Update the session state with key findings, open questions, or + other metadata. This persists across session restarts and helps + maintain context over long research sessions. + parameters: + key_findings: + type: array + items: + type: string + description: Key findings to add to the session (append, not replace) + required: false + open_questions: + type: array + items: + type: string + description: Open questions to track (replaces existing list) + required: false + phase: + type: string + description: Current research phase label (e.g., "exploring", "experimenting", "writing") + required: false + returns: Confirmation of the update.