cuga-project · sami-marreed · Apr 12, 2026 · Feb 12, 2026 · Feb 13, 2026 · Mar 24, 2026
diff --git a/.secrets.baseline b/.secrets.baseline
@@ -3,7 +3,7 @@
     "files": "(.*pnpm-lock.*|.*js.*|node_modules|.venv|.*jinja2.*|.*woff2.*)|^.secrets.baseline$|^.env$",
     "lines": null
   },
-  "generated_at": "2026-04-07T13:10:00Z",
+  "generated_at": "2026-04-12T15:30:49Z",
   "plugins_used": [
     {
       "name": "AWSKeyDetector"
@@ -190,7 +190,7 @@
         "hashed_secret": "e7d9ef2cdcdf94254667dc49ca85306258b571b5",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 661,
+        "line_number": 668,
         "type": "Secret Keyword",
         "verified_result": null
       }
@@ -602,7 +602,6 @@
       },
       {
         "hashed_secret": "37478776645d473cd2ae4155f597d0fef53dcb58",
-        "is_secret": false,
         "is_verified": false,
         "line_number": 394,
         "type": "Secret Keyword",

diff --git a/README.md b/README.md
@@ -870,7 +870,7 @@ instruction_set = "default"  # or any instruction set above
 <summary><em style="color: #666;"> 📹 Optional: Run with memory</em></summary>
 
 1. Install memory dependencies `uv sync --extra memory`
-1. Change `enable_memory = true` in `setting.toml`
+1. Change `enable_memory = true` in `settings.toml`
 2. Run `cuga start memory`
 
 Watch CUGA with Memory enabled
@@ -889,6 +889,112 @@ Watch CUGA with Memory enabled
 
 </details>
 
+<details>
+<summary><em style="color: #666;"> 🧠 Optional: Use Evolve with CugaLite</em></summary>
+
+Evolve can now be used with **CugaLite** to bring task-specific guidance into the prompt before execution and save completed trajectories after the run.
+
+This flow is:
+
+- **Opt-in** - disabled by default
+- **Non-blocking** - Evolve failures do not fail the task
+- **CugaLite-focused** - enabled for lite mode by default
+- **Optional integration** - install `cuga[evolve]` if you want the upstream Evolve package available locally, or let `uvx` fetch it on demand
+
+### Setup Steps:
+
+1. Choose how Evolve will be started.
+
+   Recommended for normal CUGA usage: let the CUGA MCP registry launch Evolve for you.
+
+   In the manager UI, add an MCP tool with:
+
+   - Name: `evolve`
+   - Connection type: `Command (stdio)`
+   - Command: `uvx`
+   - Args:
+
+   ```text
+   --from
+   altk-evolve
+   --with
+   setuptools<70
+   evolve-mcp
+   ```
+
+   Then set the tool environment values in the UI. Recommended defaults:
+
+   ```text
+   EVOLVE_MODEL_NAME=Azure/gpt-4o
+   OPENAI_API_KEY=env://OPENAI_API_KEY # pragma: allowlist secret
+   OPENAI_BASE_URL=env://OPENAI_BASE_URL # pragma: allowlist secret
+   ```
+
+   Notes:
+   - Use a model your gateway/team is actually allowed to access. Replace `Azure/gpt-4o` with the exact allowed model if needed.
+   - `OPENAI_API_KEY=env://OPENAI_API_KEY` means "read the real value from the CUGA process environment at runtime". <!-- pragma: allowlist secret -->
+   - `OPENAI_BASE_URL=env://OPENAI_BASE_URL` means "read the LiteLLM/OpenAI-compatible base URL from the CUGA process environment at runtime". <!-- pragma: allowlist secret -->
+   - `setuptools<70` is included because `milvus-lite` still imports `pkg_resources`.
+
+   Important: this command starts Evolve in `stdio` mode through the upstream Evolve package. It is intended to be launched by the CUGA registry, not run manually in a separate terminal.
+
+   Alternative for standalone/manual debugging: run Evolve yourself as an SSE server:
+
+   ```bash
+   uvx --from altk-evolve --with setuptools<70 evolve-mcp --transport sse --port 8201
+   ```
+2. Edit `./src/cuga/settings.toml` and enable lite mode plus Evolve:
+
+```toml
+[advanced_features]
+lite_mode = true
+
+[evolve]
+enabled = true
+url = "http://127.0.0.1:8201/sse"
+mode = "auto"
+app_name = "evolve"
+lite_mode_only = true
+save_on_success = true
+save_on_failure = true
+async_save = true
+timeout = 30.0
+```
+
+If you use the recommended registry-managed setup above, keep `mode = "auto"` or set `mode = "registry"`.
+
+If you run Evolve manually as a standalone SSE server, keep `url = "http://127.0.0.1:8201/sse"` and set `mode = "direct"` if you want to skip registry lookup entirely.
+
+If you use Evolve tip generation, make sure the environment for the Evolve MCP server includes the required Evolve model settings. Otherwise `save_trajectory` may fail later with a LiteLLM/OpenAI model access error even when the MCP connection itself works.
+
+3. Start CUGA normally:
+
+```bash
+cuga start demo
+```
+
+4. Run a task that routes through CugaLite
+
+### What happens during a run?
+
+1. CUGA derives the task description from the current sub-task or first user message
+2. CugaLite asks Evolve for relevant guidelines
+3. Returned guidelines are appended to the system prompt under an `Evolve Guidelines` section
+4. The task executes normally
+5. The user / assistant trajectory is saved back to Evolve after completion
+
+### Notes
+
+- `async_save = true` saves trajectories in the background and avoids blocking the response
+- `save_on_success` and `save_on_failure` let you control which runs are recorded
+- `mode = "auto"` lets CUGA use a registry-managed Evolve MCP server when available and fall back to the direct SSE URL otherwise
+- `mode = "registry"` is best when you want Evolve to be fully managed as a normal CUGA MCP tool
+- `mode = "direct"` is best when you are manually running an SSE Evolve server outside CUGA
+- If Evolve is unavailable, times out, or returns no guidance, CUGA continues normally
+- This integration is separate from the older `cuga start memory` namespace / tip workflow
+
+</details>
+
 ## 🔧 Advanced Usage
 
 <details>

diff --git a/pyproject.toml b/pyproject.toml
@@ -58,6 +58,7 @@ e2b = ["e2b-code-interpreter>=2.4.1"]
 memory = ["mem0ai[extras]"]
 knowledge = []  # knowledge is now built-in, no extra dependencies needed
 health = ["cuga-oak-health; python_version>='3.12'"]
+evolve = ["altk-evolve>=1.0.6; python_version>='3.12'"]
 # OpenLit (PyPI) pins openai<2; litellm 1.83.x needs openai 2.x — cannot both be core deps for pip/uvx.
 # Note: When using observability extra, ensure python-dotenv>=1.1.0 is installed to avoid conflicts
 observability = ["openlit>=1.40.3"]

diff --git a/readme_cuga_lite_kaizen.md b/readme_cuga_lite_kaizen.md
diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_graph.py
@@ -785,6 +785,29 @@ async def prepare_tools_and_apps(
                 else:
                     logger.warning(f"Tool '{tool.name}' has no callable function, skipping")
 
+            # Fetch Evolve guidelines if enabled
+            from cuga.backend.evolve.integration import EvolveIntegration
+
+            special_instructions_final = base_special_instructions
+            if EvolveIntegration.is_enabled():
+                task_description = ""
+                if state.sub_task:
+                    task_description = state.sub_task
+                elif state.chat_messages:
+                    for msg in state.chat_messages:
+                        if isinstance(msg, HumanMessage):
+                            task_description = msg.content
+                            break
+                if task_description:
+                    evolve_guidelines = await EvolveIntegration.get_guidelines(task_description)
+                    if evolve_guidelines:
+                        evolve_section = f"\n\n## Evolve Guidelines\n{evolve_guidelines}"
+                        special_instructions_final = (special_instructions_final or "") + evolve_section
+                        logger.info("Evolve: Injected guidelines into system prompt")
+                        logger.debug(
+                            f"Evolve: Full special_instructions with guidelines:\n{special_instructions_final}"
+                        )
+
             cfg = config.get("configurable", {}) if config else {}
             _thread_id = cfg.get("thread_id") or ""
             _knowledge_engine = cfg.get("knowledge_engine")
@@ -979,7 +1002,6 @@ async def _wrapped(*args, **kwargs):
                             logger.info(f"Knowledge awareness injected: {len(knowledge_block)} chars")
                 except Exception as e:
                     logger.debug(f"Knowledge awareness injection skipped: {e}")
-
             # Create prompt dynamically
             dynamic_prompt = prompt
 
@@ -995,7 +1017,7 @@ async def _wrapped(*args, **kwargs):
                     or is_autonomous_subtask,
                     prompt_template=selected_prompt_template,
                     enable_find_tools=enable_find_tools,
-                    special_instructions=base_special_instructions,
+                    special_instructions=special_instructions_final,
                     has_knowledge=has_knowledge_tools,
                 )
 

diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_node.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/cuga_lite_node.py
@@ -121,6 +121,7 @@ def __init__(self, langfuse_handler: Optional[Any] = None, prompt_template: Opti
         self.prompt_template = load_one_prompt(prompt_filename)
         self.langfuse_handler = langfuse_handler
         self.hitl_handler = CugaLiteHumanInTheLoopHandler()
+        self._background_tasks: set = set()
 
     @staticmethod
     async def read_text_file(file_path: str) -> Optional[str]:
@@ -388,6 +389,25 @@ async def callback_node(
                 goto=NodeNames.SUGGEST_HUMAN_ACTIONS,
             )
 
+        # Save trajectory to Evolve if enabled
+        from cuga.backend.evolve.integration import EvolveIntegration
+
+        if EvolveIntegration.is_enabled() and state.chat_messages:
+            import asyncio as _asyncio
+
+            task_id = state.sub_task or tracker.task_id or "unknown"
+            state_error = getattr(state, "error", None)
+            success = not (self._has_error(state.final_answer or "") or bool(state_error))
+            messages_snapshot = list(state.chat_messages)
+            if settings.evolve.async_save:
+                task = _asyncio.create_task(
+                    EvolveIntegration.save_trajectory(messages_snapshot, task_id, success)
+                )
+                self._background_tasks.add(task)
+                task.add_done_callback(self._background_tasks.discard)
+            else:
+                await EvolveIntegration.save_trajectory(messages_snapshot, task_id, success)
+
         # Get metadata from state
         metadata = state.cuga_lite_metadata or {}
         initial_var_names = metadata.get("initial_var_names", [])

diff --git a/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_cuga_lite_node.py b/src/cuga/backend/cuga_graph/nodes/cuga_lite/tests/test_cuga_lite_node.py
@@ -0,0 +1,79 @@
+import asyncio
+from unittest.mock import AsyncMock, patch
+
+import pytest
+from langchain_core.messages import HumanMessage
+from langgraph.types import Command
+
+from cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_node import CugaLiteNode
+from cuga.backend.cuga_graph.state.agent_state import AgentState
+
+
+@pytest.mark.asyncio
+async def test_callback_node_does_not_require_agent_state_error_field():
+    state = AgentState(
+        input="test request",
+        url="https://example.com",
+        chat_messages=[HumanMessage(content="hello")],
+        final_answer="Task completed successfully.",
+        sub_task="task_1",
+    )
+    node = CugaLiteNode()
+
+    with (
+        patch("cuga.backend.evolve.integration.EvolveIntegration.is_enabled", return_value=True),
+        patch(
+            "cuga.backend.evolve.integration.EvolveIntegration.save_trajectory",
+            new_callable=AsyncMock,
+        ) as mock_save_trajectory,
+        patch.object(node, "_process_results", new_callable=AsyncMock) as mock_process_results,
+        patch("cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_node.settings.evolve.async_save", False),
+    ):
+        mock_process_results.return_value = Command(update={}, goto="FinalAnswerAgent")
+
+        result = await node.callback_node(state)
+
+    assert result.goto == "FinalAnswerAgent"
+    mock_save_trajectory.assert_awaited_once()
+    saved_messages, task_id, success = mock_save_trajectory.await_args.args
+    assert saved_messages is not state.chat_messages
+    assert [message.content for message in saved_messages] == ["hello"]
+    assert task_id == "task_1"
+    assert success is True
+    mock_process_results.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_callback_node_async_save_uses_chat_message_snapshot():
+    state = AgentState(
+        input="test request",
+        url="https://example.com",
+        chat_messages=[HumanMessage(content="hello")],
+        final_answer="Task completed successfully.",
+        sub_task="task_1",
+    )
+    node = CugaLiteNode()
+
+    with (
+        patch("cuga.backend.evolve.integration.EvolveIntegration.is_enabled", return_value=True),
+        patch(
+            "cuga.backend.evolve.integration.EvolveIntegration.save_trajectory",
+            new_callable=AsyncMock,
+        ) as mock_save_trajectory,
+        patch.object(node, "_process_results", new_callable=AsyncMock) as mock_process_results,
+        patch("cuga.backend.cuga_graph.nodes.cuga_lite.cuga_lite_node.settings.evolve.async_save", True),
+    ):
+        mock_process_results.return_value = Command(update={}, goto="FinalAnswerAgent")
+
+        result = await node.callback_node(state)
+        state.chat_messages.append(HumanMessage(content="mutated later"))
+        await asyncio.sleep(0)
+
+    assert result.goto == "FinalAnswerAgent"
+    mock_save_trajectory.assert_awaited_once()
+    saved_messages, task_id, success = mock_save_trajectory.await_args.args
+    assert saved_messages is not state.chat_messages
+    assert [message.content for message in saved_messages] == ["hello"]
+    assert task_id == "task_1"
+    assert success is True
+    mock_process_results.assert_awaited_once()
diff --git a/src/cuga/backend/evolve/__init__.py b/src/cuga/backend/evolve/__init__.py
@@ -0,0 +1 @@
+"""Evolve integration support for CUGA."""