diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..b79e2d8 --- /dev/null +++ b/.env.example @@ -0,0 +1,46 @@ +# OpenManus LLM Configuration +# Copy this file to .env and fill in your API keys + +# ============================================ +# BASIC LLM (Required) +# Used by most agents (coordinator, supervisor, etc.) +# ============================================ +BASIC_API_KEY=your-api-key-here +BASIC_BASE_URL=https://api.openai.com/v1 +BASIC_MODEL=gpt-3.5-turbo + +# ============================================ +# REASONING LLM (Optional) +# Used for complex reasoning tasks +# ============================================ +REASONING_API_KEY=your-api-key-here +REASONING_BASE_URL=https://api.deepseek.com/v1 +REASONING_MODEL=deepseek-r1 + +# ============================================ +# VISION-LANGUAGE LLM (Optional) +# Used for tasks involving images +# ============================================ +VL_API_KEY=your-api-key-here +VL_BASE_URL=https://api.openai.com/v1 +VL_MODEL=gpt-4o + +# ============================================ +# AZURE OPENAI (Optional Alternative) +# Uncomment and configure if using Azure +# ============================================ +# AZURE_API_BASE=https://your-resource.openai.azure.com +# AZURE_API_KEY=your-azure-key +# AZURE_API_VERSION=2024-02-15-preview +# BASIC_AZURE_DEPLOYMENT=your-gpt4-deployment-name +# VL_AZURE_DEPLOYMENT=your-vision-deployment-name +# REASONING_AZURE_DEPLOYMENT=your-reasoning-deployment-name + +# ============================================ +# BROWSER CONFIGURATION (Optional) +# ============================================ +# CHROME_INSTANCE_PATH=/path/to/chrome +# CHROME_HEADLESS=True +# CHROME_PROXY_SERVER=http://proxy:port +# CHROME_PROXY_USERNAME=username +# CHROME_PROXY_PASSWORD=password diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..dfdb8b7 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.sh text eol=lf diff --git a/.gitignore b/.gitignore index 15201ac..d14f217 100644 --- a/.gitignore +++ b/.gitignore @@ -169,3 +169,10 @@ cython_debug/ # PyPI configuration file .pypirc + +# IDE-specific directories +.claude/ + +# Internal planning documents +COMMIT_PLAN.md +NEXT_STEPS.md diff --git a/BUGFIX_REPORT.md b/BUGFIX_REPORT.md new file mode 100644 index 0000000..810bcaf --- /dev/null +++ b/BUGFIX_REPORT.md @@ -0,0 +1,413 @@ +# OpenManus Bug Fixes and Implementation Report + +## Executive Summary + +This document details all bugs encountered and fixed in the OpenManus project, along with the implementation of Manus API integration. The project was non-functional due to multiple critical issues across the Docker setup, backend workflow, frontend UI, and LLM integration. + +**Status**: All critical bugs resolved. System now fully operational with Manus API integration. + +--- + +## Bug Fixes (Chronological Order) + +### 1. Shell Script Line Endings Bug +**Severity**: Critical - Container startup failure +**Error**: `: not found` errors when running `start.sh` in Docker container +**Root Cause**: Windows CRLF line endings incompatible with Unix bash + +**Files Affected**: +- `docker/unified/start.sh` + +**Solution**: +- Converted CRLF to LF line endings using `sed -i 's/\r$//'` +- Created `.gitattributes` with `*.sh text eol=lf` to prevent future issues +- Changed Dockerfile CMD from `sh` to `bash` + +**Files Modified**: +- `docker/unified/start.sh` (line ending conversion) +- `.gitattributes` (NEW) +- `docker/unified/Dockerfile` (line 13: `sh` → `bash`) + +--- + +### 2. Missing LangGraph Command Import +**Severity**: Critical - Application crash on startup +**Error**: `TypeError: Expected a Runnable, callable or dict. Instead got an unsupported type: ` +**Root Cause**: Three node files missing `from langgraph.types import Command` import + +**Files Affected**: +- `src/agents/nodes/coordinator_node.py` +- `src/agents/nodes/planner_node.py` +- `src/agents/nodes/browser_node.py` + +**Solution**: +- Added `from langgraph.types import Command` to all three node files + +**Files Modified**: +- `src/agents/nodes/coordinator_node.py` (added line 5) +- `src/agents/nodes/planner_node.py` (added line 7) +- `src/agents/nodes/browser_node.py` (added line 5) + +--- + +### 3. Missing Package __init__.py +**Severity**: Critical - Module import failure +**Error**: Node modules not properly exported +**Root Cause**: `src/agents/nodes/` directory missing `__init__.py` + +**Solution**: +- Created `__init__.py` with proper exports for all node functions + +**Files Created**: +- `src/agents/nodes/__init__.py` (NEW - 17 lines) + +--- + +### 4. Invalid Graph Configuration +**Severity**: Critical - Workflow initialization failure +**Error**: `TypeError: add_edge() got an unexpected keyword argument 'condition'` +**Root Cause**: Graph using incorrect LangGraph API syntax for conditional edges + +**Files Affected**: +- `src/workflow/graph.py` + +**Solution**: +- Simplified graph to only set entry point +- Let Command objects handle routing instead of conditional edges +- Removed invalid `add_edge(..., condition=...)` syntax + +**Files Modified**: +- `src/workflow/graph.py` (lines 13-29, removed conditional edge logic) + +--- + +### 5. Template Regex Bug +**Severity**: High - Template formatting crash +**Error**: `IndexError: tuple index out of range` in string formatting +**Root Cause**: Incorrect regex pattern `r"{1}"` instead of `r"{\1}"` for backreference + +**Files Affected**: +- `src/prompts/template.py` + +**Solution**: +- Fixed regex pattern to use proper backreference syntax + +**Files Modified**: +- `src/prompts/template.py` (line 30: `r"{1}"` → `r"{\1}"`) + +--- + +### 6. Missing TEAM_MEMBERS Variable +**Severity**: High - Prompt template failure +**Error**: `KeyError: 'TEAM_MEMBERS'` +**Root Cause**: Planner prompt uses `TEAM_MEMBERS` variable but template function didn't provide it + +**Files Affected**: +- `src/prompts/template.py` + +**Solution**: +- Updated `apply_prompt_template()` to import and include TEAM_MEMBERS from config +- Added dynamic template variable handling + +**Files Modified**: +- `src/prompts/template.py` (lines 44-70, added TEAM_MEMBERS support) + +--- + +### 7. Message Object Type Mismatch +**Severity**: High - Message processing crash +**Error**: `TypeError: 'ChatMessage' object is not subscriptable` +**Root Cause**: Trying to access Pydantic model with dict syntax `msg["role"]` + +**Files Affected**: +- `src/service/workflow_service.py` + +**Solution**: +- Changed from dict access (`msg["role"]`) to attribute access (`msg.role`) +- Updated message conversion logic for LangChain objects + +**Files Modified**: +- `src/service/workflow_service.py` (lines 26-32) + +--- + +### 8. PlaceholderLLM String Response Handling +**Severity**: Medium - Type error with placeholder LLM +**Error**: `AttributeError: 'str' object has no attribute 'content'` +**Root Cause**: PlaceholderLLM returns strings, code expected message objects with `.content` attribute + +**Files Affected**: +- `src/agents/nodes/coordinator_node.py` + +**Solution**: +- Added type checking to handle both string and message object responses +- Only string responses are used directly, message objects use `.content` + +**Files Modified**: +- `src/agents/nodes/coordinator_node.py` (lines 22-26) + +--- + +### 9. JSON Repair Corrupting Non-JSON +**Severity**: Medium - Message content corruption +**Error**: Plain text messages converted to empty string `""` +**Root Cause**: `json_repair` converting non-JSON plain text to empty JSON + +**Files Affected**: +- `src/agents/nodes/coordinator_node.py` + +**Solution**: +- Only call `repair_json_output()` if content looks like JSON (starts with `{` or `[`) + +**Files Modified**: +- `src/agents/nodes/coordinator_node.py` (lines 28-30) + +--- + +### 10. PlaceholderLLM Missing stream() Method +**Severity**: Medium - Method not found error +**Error**: `AttributeError: 'PlaceholderLLM' object has no attribute 'stream'` +**Root Cause**: `planner_node` calls `llm.stream()` but PlaceholderLLM only had `invoke()` + +**Files Affected**: +- `src/llms/llm.py` + +**Solution**: +- Added `stream()` method to PlaceholderLLM that yields response in chunks + +**Files Modified**: +- `src/llms/llm.py` (lines 32-37) + +--- + +### 11. Non-functional Frontend - No Event Handlers +**Severity**: Critical - UI completely non-interactive +**Error**: User reported "nothing happens when I click send" +**Root Cause**: ChatInput component had no event handlers at all + +**Files Affected**: +- `src/components/ChatInput.js` + +**Solution**: +- Complete rewrite adding useState, onChange, onClick, onKeyPress handlers +- Implemented proper form submission logic + +**Files Modified**: +- `src/components/ChatInput.js` (complete rewrite, 39 lines) + +--- + +### 12. Frontend Not Connected to Backend +**Severity**: Critical - No API communication +**Error**: Generic "network error" shown to user +**Root Cause**: Frontend not calling backend API at all + +**Files Affected**: +- `src/pages/index.js` + +**Solution**: +- Implemented fetch to `/api/chat/stream` with SSE parsing +- Added error handling with helpful messages +- Added response parsing for Python dict format + +**Files Modified**: +- `src/pages/index.js` (lines 9-105, added handleSendMessage function) + +--- + +### 13. Raw Data Displayed in UI +**Severity**: Medium - Poor user experience +**Error**: UI showing `{'coordinator': {'messages': [HumanMessage(...)]}}` instead of actual message +**Root Cause**: Frontend not parsing Python dict format from SSE stream + +**Files Affected**: +- `src/pages/index.js` + +**Solution**: +- Added regex extraction to parse `HumanMessage(content='...')` format +- Properly extract text content from nested structures + +**Files Modified**: +- `src/pages/index.js` (lines 65-75, added parsing logic) + +--- + +### 14. Improved ChatDisplay Styling +**Severity**: Low - UI/UX improvement +**Error**: No visual distinction between messages, poor empty state +**Root Cause**: Basic styling with no message bubbles or formatting + +**Files Affected**: +- `src/components/ChatDisplay.js` + +**Solution**: +- Added message bubbles with different colors for user/assistant +- Added empty state message +- Improved spacing and readability + +**Files Modified**: +- `src/components/ChatDisplay.js` (lines 3-42, enhanced styling) + +--- + +### 15. LLM Always Returns Placeholder +**Severity**: Critical - API integration not working +**Error**: Even with API keys configured, system uses PlaceholderLLM +**Root Cause**: `get_llm_by_type()` loaded config but always returned PlaceholderLLM + +**Files Affected**: +- `src/llms/llm.py` + +**Solution**: +- Added conditional logic to create ChatOpenAI when api_key and base_url exist +- Only return PlaceholderLLM when no API keys configured + +**Files Modified**: +- `src/llms/llm.py` (lines 58-65, added actual LLM client creation) + +--- + +### 16. Missing langchain-openai Package +**Severity**: High - Module import failure +**Error**: `ModuleNotFoundError: No module named 'langchain_openai'` +**Root Cause**: Added import but package not in requirements.txt + +**Files Affected**: +- `requirements.txt` + +**Solution**: +- Added `langchain-openai>=0.2.0` to requirements.txt + +**Files Modified**: +- `requirements.txt` (line 13, added langchain-openai) + +--- + +### 17. Client.py Wrong Configuration +**Severity**: Medium - CLI client broken +**Error**: Connecting to wrong port (5000) and wrong endpoint (/task) +**Root Cause**: Outdated client code didn't match current API + +**Files Affected**: +- `src/client.py` + +**Solution**: +- Updated to port 8000 and `/api/chat/stream` endpoint +- Added SSE stream parsing +- Added response extraction from Python dict format + +**Files Modified**: +- `src/client.py` (complete rewrite, 50 lines) + +--- + +## New Feature: Manus API Integration + +### 18. Manus API Custom Client Implementation +**Severity**: Feature - New functionality +**Challenge**: Manus API is task-based (not OpenAI-compatible chat API) + +**Analysis**: +- Manus API uses task creation pattern: POST `/v1/tasks` → poll GET `/v1/tasks/{task_id}` +- Authentication via `API_KEY` header (not `Authorization: Bearer`) +- Response format contains conversation output array with role/content structure + +**Solution**: +- Created custom `ManusLLM` class implementing LangChain-compatible interface +- Implements `invoke()` and `stream()` methods +- Handles task creation, polling, and response extraction +- Compatible with existing OpenManus agent workflow + +**Files Created**: +- `src/llms/manus_llm.py` (NEW - 176 lines) + +**Files Modified**: +- `src/llms/llm.py` (lines 24, 59-77: added ManusLLM import and detection logic) +- `.env` (configured with Manus API credentials) + +**API Documentation Used**: +- Base URL: `https://api.manus.ai` +- Endpoint: `POST /v1/tasks` (create), `GET /v1/tasks/{task_id}` (retrieve) +- Auth: `API_KEY` header +- Docs: https://open.manus.ai/docs + +--- + +## Documentation Updates + +### 19. Updated README.md +**Changes**: +- Added comprehensive API key configuration section +- Fixed docker-compose example +- Updated API documentation +- Corrected CLI usage examples +- Added supported LLM providers list + +**Files Modified**: +- `README.md` (lines 102-159, Configuration section rewritten) + +--- + +### 20. Created .env.example +**Purpose**: Template for users to configure their own API keys + +**Files Created**: +- `.env.example` (NEW - 47 lines) + +--- + +## Statistics + +**Total Files Modified**: 15 +**Total Files Created**: 5 +**Total Lines Changed**: ~800+ +**Bugs Fixed**: 17 +**New Features**: 1 (Manus API integration) + +**Categories**: +- Docker/Infrastructure: 2 bugs +- Backend/Workflow: 8 bugs +- Frontend/UI: 4 bugs +- LLM Integration: 3 bugs + 1 feature +- Documentation: 2 improvements + +--- + +## Testing Results + +### CLI Client +✅ **Working** - Successfully processes queries and returns responses +```bash +$ python src/client.py --task "What is 2+2?" +Response: 2+2 is 4. +``` + +### Web Interface +✅ **Frontend Running** - http://localhost:3000 +✅ **Backend Running** - http://localhost:8000 +✅ **API Communication** - SSE streaming working + +### Docker Containers +✅ **unified container** - Backend and tools server running +✅ **frontend container** - Next.js dev server running + +--- + +## Recommendations for Future Work + +1. **Multi-turn Conversations**: Implement task continuation using `taskId` parameter in Manus API +2. **Streaming Optimization**: Currently simulates streaming by word-splitting; consider websocket for true streaming +3. **Error Recovery**: Add retry logic for transient Manus API errors +4. **Rate Limiting**: Implement rate limit handling for Manus API (visible in headers) +5. **Configuration UI**: Add web interface for API key configuration +6. **Testing Suite**: Add unit tests for ManusLLM and integration tests for workflow +7. **Vision Support**: Implement attachment handling for Manus vision capabilities +8. **Connector Integration**: Explore Manus connectors for enhanced capabilities + +--- + +## Conclusion + +The OpenManus project had multiple critical bugs preventing any functionality. After systematic debugging and the implementation of Manus API integration, the system is now fully operational. All components (Docker, backend, frontend, CLI) are working correctly with the Manus AI service. + +The Manus API integration required a custom client implementation due to its task-based architecture, which differs from standard OpenAI-compatible APIs. This implementation maintains compatibility with the existing LangChain-based agent workflow while adapting to Manus's unique API patterns. diff --git a/README.md b/README.md index c720ab7..341deb9 100644 --- a/README.md +++ b/README.md @@ -52,15 +52,26 @@ This will launch: - Frontend container serving the Next.js web interface - FastAPI server for task delegation and execution -### 3. Test the System -Once running, you can interact with OpenManus via: -- CLI: Use the provided Python client (`python client.py`) -- API: Send requests to http://localhost:8000 (see API docs below) -- Web UI: Access http://localhost:3000 +### 3. Configure API Keys (Required) -Example CLI command: +Before you can use OpenManus, you need to configure your LLM API keys. See the [Configuration](#configuration) section below for detailed instructions. + +### 4. Test the System +Once configured, you can interact with OpenManus via: +- **CLI**: Use the provided Python client + ```bash + python src/client.py --task "Plan a 3-day trip to Tokyo" + ``` +- **Web UI**: Access http://localhost:3000 in your browser +- **API**: Send requests directly to http://localhost:8000 (see API docs below) + +Example CLI commands: ```bash -python client.py --task "Plan a 3-day trip to Tokyo" +# Simple question +python src/client.py --task "What's the weather like in Tokyo?" + +# Complex task +python src/client.py --task "Analyze Tesla stock trends for the last month" ``` ### Project Structure @@ -100,46 +111,84 @@ OpenManus/ ``` ### Configuration -Edit the `docker-compose.yml` file to customize: + +#### API Keys Setup (Required for AI functionality) + +Create a `.env` file in the project root directory with your LLM API keys: + +```env +# Basic LLM (used for most agents) +BASIC_API_KEY=your-openai-api-key-here +BASIC_BASE_URL=https://api.openai.com/v1 +BASIC_MODEL=gpt-3.5-turbo + +# Reasoning LLM (for complex tasks) - Optional +REASONING_API_KEY=your-deepseek-api-key +REASONING_BASE_URL=https://api.deepseek.com/v1 +REASONING_MODEL=deepseek-r1 + +# Vision-Language LLM (for image tasks) - Optional +VL_API_KEY=your-api-key-here +VL_BASE_URL=https://api.openai.com/v1 +VL_MODEL=gpt-4o +``` + +**Supported LLM Providers:** +- OpenAI (GPT-3.5, GPT-4, GPT-4o) +- DeepSeek (deepseek-r1) +- Google Gemini +- Azure OpenAI +- Any OpenAI-compatible API + +After creating the `.env` file, restart the containers: +```bash +docker-compose restart unified +``` + +#### Docker Compose Configuration + +The current `docker-compose.yml` structure: ```yaml services: - backend: - build: + unified: + build: context: . dockerfile: docker/unified/Dockerfile ports: - - "8000:8000" # FastAPI port - environment: - - WEB_BROWSER_API_KEY=your_key_here - volumes: - - ./src:/app/src - - ./data:/app/data + - "8000:8000" # FastAPI backend server frontend: build: context: . dockerfile: docker/frontend/Dockerfile ports: - - "3000:3000" # Web UI port + - "3000:3000" # Next.js web interface depends_on: - - backend + - unified ``` +Environment variables are loaded from the `.env` file automatically. + ### API Documentation The agent server exposes a REST API at http://localhost:8000. Key endpoints: -**POST /task**: Submit a task for execution. -```json -Body: { "task": "Analyze Tesla stock trends" } -Response: { "status": "success", "result": "..." } +**POST /api/chat/stream**: Submit a message and receive streaming response. +```bash +curl -X POST http://localhost:8000/api/chat/stream \ + -H "Content-Type: application/json" \ + -d '{"messages": [{"role": "user", "content": "Hello, how are you?"}]}' ``` -**GET /status**: Check system health. -```json -Response: { "status": "running" } +Response format (Server-Sent Events): ``` +event: message +data: {"content": "AI response here", "role": "assistant"} +``` + +**GET /docs**: Interactive API documentation (Swagger UI) +- Access at http://localhost:8000/docs -Full API docs are available in `docs/api.md`. +For more details, see the auto-generated API docs at `/docs` when the server is running. ### Contributing We welcome contributions! To get started: diff --git a/docker/unified/Dockerfile b/docker/unified/Dockerfile index 12d5ee5..cb38e1b 100644 --- a/docker/unified/Dockerfile +++ b/docker/unified/Dockerfile @@ -10,4 +10,4 @@ RUN pip install -r requirements.txt COPY . . -CMD ["sh", "/app/docker/unified/start.sh"] \ No newline at end of file +CMD ["bash", "/app/docker/unified/start.sh"] \ No newline at end of file diff --git a/docker/unified/start.sh b/docker/unified/start.sh index f08f33e..03f1d3f 100644 --- a/docker/unified/start.sh +++ b/docker/unified/start.sh @@ -10,4 +10,4 @@ PYTHONPATH=/app:/app/src python3 src/tools/server.py & wait # Exit with the status of the last command -exit $? \ No newline at end of file +exit $? diff --git a/requirements.txt b/requirements.txt index cc75d71..f4c06a8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,7 @@ langchain>=0.1.0 langchain-core>=0.1.0 langchain-community>=0.0.10 langchain-experimental>=0.0.10 +langchain-openai>=0.2.0 json-repair>=0.3.0 pytest>=7.0.0 pytest-cov>=4.0.0 \ No newline at end of file diff --git a/src/agents/nodes/__init__.py b/src/agents/nodes/__init__.py new file mode 100644 index 0000000..e81ffbe --- /dev/null +++ b/src/agents/nodes/__init__.py @@ -0,0 +1,19 @@ +"""Node functions for the OpenManus agent workflow.""" + +from .coordinator_node import coordinator_node +from .planner_node import planner_node +from .supervisor_node import supervisor_node +from .researcher_node import researcher_node +from .coder_node import coder_node +from .browser_node import browser_node +from .reporter_node import reporter_node + +__all__ = [ + "coordinator_node", + "planner_node", + "supervisor_node", + "researcher_node", + "coder_node", + "browser_node", + "reporter_node", +] diff --git a/src/agents/nodes/browser_node.py b/src/agents/nodes/browser_node.py index 98abd43..a44553e 100644 --- a/src/agents/nodes/browser_node.py +++ b/src/agents/nodes/browser_node.py @@ -2,6 +2,7 @@ import json_repair from typing import Literal, Dict, Any from langchain_core.messages import HumanMessage +from langgraph.types import Command from src.agents import browser_agent # Import browser agent from src.utils.json_utils import repair_json_output diff --git a/src/agents/nodes/coordinator_node.py b/src/agents/nodes/coordinator_node.py index 830388a..0d6640d 100644 --- a/src/agents/nodes/coordinator_node.py +++ b/src/agents/nodes/coordinator_node.py @@ -2,6 +2,7 @@ import json_repair from typing import Literal, Dict, Any from langchain_core.messages import HumanMessage +from langgraph.types import Command from src.llms.llm import get_llm_by_type from src.config.agents import AGENT_LLM_MAP @@ -17,18 +18,24 @@ def coordinator_node(state: State) -> Dict[str, Any]: # Modified return type to messages = OpenManusPromptTemplate.apply_prompt_template("coordinator", state) response = get_llm_by_type(AGENT_LLM_MAP["coordinator"]).invoke(messages) logger.debug(f"Current state messages: {state['messages']}") - response_content = response.content - # Attempt to repair potential JSON output - response_content = repair_json_output(response_content) + + # Handle both string responses (PlaceholderLLM) and message objects + if isinstance(response, str): + response_content = response + else: + response_content = response.content + + # Only attempt to repair JSON if it looks like JSON + if response_content.strip().startswith('{') or response_content.strip().startswith('['): + response_content = repair_json_output(response_content) + logger.debug(f"Coordinator response: {response_content}") goto = "__end__" if "handoff_to_planner" in response_content: goto = "planner" - # Update response.content with repaired content - response.content = response_content - return Command( goto=goto, + update={"messages": [HumanMessage(content=response_content, name="coordinator")]}, ) \ No newline at end of file diff --git a/src/agents/nodes/planner_node.py b/src/agents/nodes/planner_node.py index 0f281a2..ac6d71e 100644 --- a/src/agents/nodes/planner_node.py +++ b/src/agents/nodes/planner_node.py @@ -4,6 +4,7 @@ from copy import deepcopy from typing import Literal, Dict, Any from langchain_core.messages import HumanMessage +from langgraph.types import Command from src.llms.llm import get_llm_by_type from src.config.agents import AGENT_LLM_MAP diff --git a/src/client.py b/src/client.py index 37abd37..dc38f4f 100644 --- a/src/client.py +++ b/src/client.py @@ -1,25 +1,60 @@ import argparse import requests import json +import re -def submit_task(task, host="http://localhost:5000"): +def submit_task(task, host="http://localhost:8000"): """Submit a task to the agent server.""" try: - response = requests.post(f"{host}/task", json={"task": task}) + # Use the correct endpoint + response = requests.post( + f"{host}/api/chat/stream", + json={"messages": [{"role": "user", "content": task}]}, + stream=True + ) response.raise_for_status() - return response.json() + + # Parse SSE stream + full_response = "" + for line in response.iter_lines(): + if line: + line = line.decode('utf-8') + if line.startswith('data: '): + try: + data = json.loads(line[6:]) + content = data.get('content', '') + + # Extract message from Python dict format + match = re.search(r"HumanMessage\(content='([^']+)'", content) + if match: + full_response = match.group(1) + else: + full_response += content + except json.JSONDecodeError: + pass + + return {"status": "success", "response": full_response or "No response received"} except requests.exceptions.RequestException as e: - return {"error": str(e)} + return {"status": "error", "error": str(e)} def main(): parser = argparse.ArgumentParser(description="OpenManus CLI Client") parser.add_argument("--task", required=True, help="Task description") - parser.add_argument("--host", default="http://localhost:5000", help="Agent server host") - + parser.add_argument("--host", default="http://localhost:8000", help="Agent server host (default: http://localhost:8000)") + args = parser.parse_args() - + + print(f"Submitting task: {args.task}") + print("-" * 50) + result = submit_task(args.task, args.host) - print(json.dumps(result, indent=2)) + + if result.get("status") == "success": + print("\nResponse:") + print(result["response"]) + else: + print("\nError:") + print(result.get("error", "Unknown error")) if __name__ == "__main__": main() \ No newline at end of file diff --git a/src/components/ChatDisplay.js b/src/components/ChatDisplay.js index 41f20f4..38594b3 100644 --- a/src/components/ChatDisplay.js +++ b/src/components/ChatDisplay.js @@ -2,12 +2,41 @@ import React from 'react'; function ChatDisplay({ messages }) { return ( -
- {messages.map((message, index) => ( -
- {message.sender}: {message.text} +
+ {messages.length === 0 ? ( +
+ No messages yet. Start a conversation!
- ))} + ) : ( + messages.map((message, index) => ( +
+ + {message.sender === 'user' ? 'You' : 'OpenManus'}: + +
+ {message.text} +
+
+ )) + )}
); } diff --git a/src/components/ChatInput.js b/src/components/ChatInput.js index baf69f6..1cb60f5 100644 --- a/src/components/ChatInput.js +++ b/src/components/ChatInput.js @@ -1,10 +1,37 @@ -import React from 'react'; +import React, { useState } from 'react'; + +function ChatInput({ onSendMessage }) { + const [inputValue, setInputValue] = useState(''); + + const handleSend = () => { + if (inputValue.trim()) { + onSendMessage(inputValue); + setInputValue(''); + } + }; + + const handleKeyPress = (e) => { + if (e.key === 'Enter') { + handleSend(); + } + }; -function ChatInput() { return ( -
- - +
+ setInputValue(e.target.value)} + onKeyPress={handleKeyPress} + style={{ width: '80%', padding: '10px', fontSize: '16px' }} + /> +
); } diff --git a/src/llms/llm.py b/src/llms/llm.py index cdb6a54..38e7588 100644 --- a/src/llms/llm.py +++ b/src/llms/llm.py @@ -1,6 +1,8 @@ import os from typing import Literal, Type +from langchain_openai import ChatOpenAI + from src.config.agents import LLMType from src.config.env import ( BASIC_API_KEY, @@ -19,13 +21,21 @@ VL_AZURE_DEPLOYMENT, REASONING_AZURE_DEPLOYMENT ) +from src.llms.manus_llm import ManusLLM class PlaceholderLLM: def __init__(self, model_name): self.model_name = model_name def invoke(self, messages): - return f"Placeholder LLM: {self.model_name} invoked with messages: {messages}" + return f"Placeholder LLM ({self.model_name}) - API key not configured. Please add API keys to .env file." + + def stream(self, messages): + """Generator that yields the response as chunks for streaming.""" + response = self.invoke(messages) + # Yield the response in chunks to simulate streaming + for chunk in response.split(): + yield type('obj', (object,), {'content': chunk + ' '})() def get_llm_by_type(llm_type: LLMType): if llm_type == "reasoning": @@ -46,4 +56,25 @@ def get_llm_by_type(llm_type: LLMType): else: raise ValueError(f"Unknown LLM type: {llm_type}") + # If API key is configured, return real LLM client + if api_key and base_url: + # Check if this is Manus API + if 'manus' in base_url.lower(): + # Remove /v1 suffix if present for Manus base URL + manus_base = base_url.replace('/v1', '') + return ManusLLM( + api_key=api_key, + model="chat", # Use 'chat' task mode for basic LLM + base_url=manus_base + ) + else: + # Use standard OpenAI-compatible client + return ChatOpenAI( + model=model_name, + openai_api_key=api_key, + openai_api_base=base_url, + temperature=0.7, + ) + + # Otherwise return placeholder return PlaceholderLLM(model_name) \ No newline at end of file diff --git a/src/llms/manus_llm.py b/src/llms/manus_llm.py new file mode 100644 index 0000000..173e843 --- /dev/null +++ b/src/llms/manus_llm.py @@ -0,0 +1,175 @@ +"""Custom LLM client for Manus API.""" + +import time +import requests +from typing import List, Dict, Any, Iterator +from langchain_core.messages import BaseMessage, AIMessage + + +class ManusLLM: + """Custom LLM client for Manus API.""" + + def __init__(self, api_key: str, model: str = "chat", base_url: str = "https://api.manus.ai"): + """Initialize Manus LLM client. + + Args: + api_key: Manus API key + model: Task mode ('chat', 'adaptive', or 'agent') + base_url: Base URL for Manus API + """ + self.api_key = api_key + self.model = model # taskMode in Manus API + self.base_url = base_url.rstrip('/') + self.session = requests.Session() + self.session.headers.update({ + 'Content-Type': 'application/json', + 'API_KEY': api_key + }) + + def _create_task(self, prompt: str, task_id: str = None) -> Dict[str, Any]: + """Create a new task in Manus. + + Args: + prompt: The user's message + task_id: Optional task ID for continuing a conversation + + Returns: + Dict with task_id, task_title, task_url + """ + payload = { + "prompt": prompt, + "taskMode": self.model, + } + + if task_id: + payload["taskId"] = task_id + + response = self.session.post( + f"{self.base_url}/v1/tasks", + json=payload, + timeout=30 + ) + response.raise_for_status() + return response.json() + + def _get_task(self, task_id: str) -> Dict[str, Any]: + """Get task status and output. + + Args: + task_id: The task ID + + Returns: + Dict with task details including output + """ + response = self.session.get( + f"{self.base_url}/v1/tasks/{task_id}", + timeout=30 + ) + response.raise_for_status() + return response.json() + + def _wait_for_completion(self, task_id: str, max_wait: int = 300, poll_interval: int = 2) -> Dict[str, Any]: + """Wait for task to complete. + + Args: + task_id: The task ID + max_wait: Maximum time to wait in seconds + poll_interval: Time between polls in seconds + + Returns: + Completed task details + """ + start_time = time.time() + + while time.time() - start_time < max_wait: + task = self._get_task(task_id) + + if task.get('status') == 'completed': + return task + elif task.get('status') == 'failed': + raise RuntimeError(f"Task failed: {task}") + + time.sleep(poll_interval) + + raise TimeoutError(f"Task {task_id} did not complete within {max_wait} seconds") + + def _extract_response(self, task: Dict[str, Any]) -> str: + """Extract the assistant's response from task output. + + Args: + task: The completed task details + + Returns: + Assistant's response text + """ + output = task.get('output', []) + + # Find the last assistant message + for item in reversed(output): + if item.get('role') == 'assistant' and item.get('type') == 'message': + content = item.get('content', []) + for content_item in content: + if content_item.get('type') == 'output_text': + return content_item.get('text', '') + + return "No response from Manus" + + def invoke(self, messages: List[Any]) -> AIMessage: + """Invoke the Manus LLM with a list of messages. + + Args: + messages: List of LangChain message objects or dicts + + Returns: + AIMessage with the response + """ + # Extract the last user message as the prompt + # Manus API doesn't support full conversation history in the same way + prompt = "" + for msg in messages: + # Handle both dict and LangChain message objects + if isinstance(msg, dict): + role = msg.get('role', '') + content = msg.get('content', '') + else: + # LangChain message object + role = getattr(msg, 'type', '') + content = getattr(msg, 'content', '') + + if role in ['user', 'human']: + prompt = content + + if not prompt: + prompt = "Hello" + + # Create task + task_result = self._create_task(prompt) + task_id = task_result['task_id'] + + # Wait for completion + completed_task = self._wait_for_completion(task_id) + + # Extract response + response_text = self._extract_response(completed_task) + + return AIMessage(content=response_text) + + def stream(self, messages: List[Any]) -> Iterator[Any]: + """Stream the response from Manus LLM. + + Note: Manus API doesn't support true streaming, so we simulate it + by yielding the complete response in chunks. + + Args: + messages: List of LangChain message objects or dicts + + Yields: + Chunks of the response + """ + # Get the full response + response = self.invoke(messages) + + # Simulate streaming by yielding words + words = response.content.split() + for word in words: + yield type('obj', (object,), {'content': word + ' '})() diff --git a/src/pages/index.js b/src/pages/index.js index f79d0a4..bc21e3c 100644 --- a/src/pages/index.js +++ b/src/pages/index.js @@ -4,20 +4,127 @@ import React, { useState } from 'react'; function HomePage() { const [messages, setMessages] = useState([]); + const [isLoading, setIsLoading] = useState(false); - const handleSendMessage = (newMessage) => { - setMessages([...messages, { sender: 'user', text: newMessage }]); - // Here you would typically send the message to the backend and handle the response - // For now, let's just add a placeholder response from the AI - setTimeout(() => { - setMessages([...messages, { sender: 'ai', text: 'This is a placeholder response from the AI.' }]); - }, 500); // Simulate a delay for AI response + const handleSendMessage = async (newMessage) => { + // Add user message to chat + const userMsg = { sender: 'user', text: newMessage }; + setMessages(prev => [...prev, userMsg]); + setIsLoading(true); + + try { + // Build the conversation history for the API + const conversationHistory = [...messages, userMsg].map(msg => ({ + role: msg.sender === 'user' ? 'user' : 'assistant', + content: msg.text + })); + + // Send to backend API + const response = await fetch('http://localhost:8000/api/chat/stream', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + messages: conversationHistory + }), + }); + + if (!response.ok) { + // Try to get error details from response + let errorMsg = `Backend returned error (status ${response.status})`; + try { + const errorText = await response.text(); + if (errorText) { + const errorData = JSON.parse(errorText); + errorMsg = errorData.detail || errorMsg; + } + } catch (e) { + // Could not parse error response + } + throw new Error(errorMsg); + } + + // Read the SSE stream + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let aiResponse = ''; + let hasError = false; + let errorMessage = ''; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + const chunk = decoder.decode(value); + const lines = chunk.split('\n'); + + for (const line of lines) { + if (line.startsWith('data: ')) { + try { + const data = JSON.parse(line.slice(6)); + let content = data.content || ''; + + // If content is a stringified object, try to extract the actual message + if (content.startsWith('{') || content.startsWith("{'")) { + try { + // Handle Python dict format with single quotes + const pythonDictMatch = content.match(/HumanMessage\(content='([^']+)'/); + if (pythonDictMatch) { + content = pythonDictMatch[1]; + } + } catch (e) { + // If parsing fails, use the content as-is + console.log('Could not parse nested content, using raw:', content); + } + } + + aiResponse += content; + } catch (e) { + console.error('Error parsing SSE data:', e); + // Check if the stream contains an error + if (line.includes('error') || line.includes('Error') || line.includes('Exception')) { + hasError = true; + errorMessage = line; + } + } + } + } + } + + // Check if we got an error in the stream + if (hasError || (!aiResponse && errorMessage)) { + throw new Error('Backend error processing your request. The system may need API keys configured. Check console for details.'); + } + + // Add AI response to chat + setMessages(prev => [...prev, { sender: 'ai', text: aiResponse || 'No response received from AI. Please check if API keys are configured.' }]); + } catch (error) { + console.error('Error sending message:', error); + + // Provide helpful error messages + let userMessage = error.message; + + if (error.message.includes('Failed to fetch') || error.message.includes('network')) { + userMessage = '❌ Cannot connect to backend server.\n\nPlease check:\n1. Is the backend running? (docker ps)\n2. Is it accessible at http://localhost:8000?'; + } else if (error.message.includes('Backend error')) { + userMessage = '❌ Backend encountered an error.\n\nLikely causes:\n1. No API keys configured - Create a .env file with your LLM API keys\n2. Missing dependencies - Check docker logs\n\nTo configure API keys, create a .env file in the project root with:\nBASIC_API_KEY=your-api-key-here\nBASIC_BASE_URL=https://api.openai.com/v1\nBASIC_MODEL=gpt-3.5-turbo'; + } + + setMessages(prev => [...prev, { + sender: 'ai', + text: userMessage + }]); + } finally { + setIsLoading(false); + } }; return ( -
-

Welcome to OpenManus!

+
+

Welcome to OpenManus!

+ {isLoading &&
AI is thinking...
}
); diff --git a/src/prompts/template.py b/src/prompts/template.py index 5d5485e..04dd78e 100644 --- a/src/prompts/template.py +++ b/src/prompts/template.py @@ -27,7 +27,7 @@ def get_prompt_template(prompt_name: str) -> str: # Escape curly braces for string formatting template = template.replace("{", "{{").replace("}", "}}") # Convert <> to {VAR} format - template = re.sub(r"<<([^>>]+)>>", r"{1}", template) + template = re.sub(r"<<([^>>]+)>>", r"{\1}", template) return template @staticmethod @@ -41,14 +41,33 @@ def apply_prompt_template(prompt_name: str, state: AgentState) -> List[Dict[str, Returns: List of message dictionaries with system prompt and state messages """ + from src.config import TEAM_MEMBERS + # Format current time in a consistent format current_time = datetime.now().strftime("%a %b %d %Y %H:%M:%S %z") + # Get the template + template_str = OpenManusPromptTemplate.get_prompt_template(prompt_name) + + # Build template variables + template_vars = { + "CURRENT_TIME": current_time, + "TEAM_MEMBERS": str(TEAM_MEMBERS), + } + + # Add state variables + template_vars.update(state) + + # Determine which variables are actually in the template + import re + found_vars = re.findall(r'\{([^}]+)\}', template_str) + input_variables = list(set(found_vars)) + # Create and format the system prompt system_prompt = PromptTemplate( - input_variables=["CURRENT_TIME"], - template=OpenManusPromptTemplate.get_prompt_template(prompt_name), - ).format(CURRENT_TIME=current_time, **state) + input_variables=input_variables, + template=template_str, + ).format(**template_vars) # Combine system prompt with existing messages return [{"role": "system", "content": system_prompt}] + state["messages"] \ No newline at end of file diff --git a/src/service/workflow_service.py b/src/service/workflow_service.py index 699cd68..07f0021 100644 --- a/src/service/workflow_service.py +++ b/src/service/workflow_service.py @@ -3,8 +3,9 @@ import asyncio from typing import AsyncGenerator, Dict, List +from langchain_core.messages import HumanMessage, AIMessage + from src.workflow.graph import build_graph -from src.prompts.template import OpenManusPromptTemplate async def run_agent_workflow( @@ -22,16 +23,19 @@ async def run_agent_workflow( # Initialize workflow graph workflow = build_graph() - # Format messages with system prompt - formatted_messages = OpenManusPromptTemplate.apply_prompt_template( - "coordinator", {"messages": messages} - ) + # Convert dict messages to LangChain message objects + langchain_messages = [] + for msg in messages: + if msg.role == "user": + langchain_messages.append(HumanMessage(content=msg.content)) + elif msg.role == "assistant": + langchain_messages.append(AIMessage(content=msg.content)) # Run workflow - async for event in workflow.astream({"messages": formatted_messages}): + async for event in workflow.astream({"messages": langchain_messages}): yield { "event": "message", - "data": {"content": event.get("content", ""), "role": "assistant"} + "data": {"content": str(event), "role": "assistant"} } # Small delay to avoid overwhelming the client await asyncio.sleep(0.1) \ No newline at end of file diff --git a/src/workflow/graph.py b/src/workflow/graph.py index 0abc1df..e746f3a 100644 --- a/src/workflow/graph.py +++ b/src/workflow/graph.py @@ -23,22 +23,7 @@ def build_graph(): builder.add_node("browser", browser_node) builder.add_node("reporter", reporter_node) - # Define edges + # Set entry point builder.add_edge(START, "coordinator") - builder.add_edge("coordinator", "planner") # Coordinator -> Planner - builder.add_edge("planner", "supervisor") # Planner -> Supervisor - builder.add_edge("supervisor", "researcher", condition=lambda state: state['next'] == "researcher") # Supervisor -> Researcher if next agent is researcher - builder.add_edge("supervisor", "coder", condition=lambda state: state['next'] == "coder") # Supervisor -> Coder if next agent is coder - builder.add_edge("supervisor", "browser", condition=lambda state: state['next'] == "browser") # Supervisor -> Browser if next agent is browser - builder.add_edge("supervisor", "reporter", condition=lambda state: state['next'] == "reporter") # Supervisor -> Reporter if next agent is reporter - builder.add_edge("supervisor", "__end__", condition=lambda state: state['next'] == "__end__") # Supervisor -> END if next agent is FINISH - builder.add_edge("researcher", "supervisor") # Researcher -> Supervisor - builder.add_edge("coder", "supervisor") # Coder -> Supervisor - builder.add_edge("browser", "supervisor") # Browser -> Supervisor - builder.add_edge("reporter", "supervisor") # Reporter -> Supervisor - - builder.set_entry_point("coordinator") - builder.set_conditional_edge("supervisor", supervisor_node) # Conditional edge for supervisor node - builder.add_end_point("__end__") return builder.compile() \ No newline at end of file