Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,13 @@ jobs:
- name: Lint with ruff
run: uv run ruff check .

- name: Run tests
- name: Run tests with coverage
run: uv run pytest tests/ -q --tb=short

- name: Upload coverage report
if: always()
uses: actions/upload-artifact@v4
with:
name: coverage-report-py${{ matrix.python-version }}
path: coverage.xml
retention-days: 30
3 changes: 3 additions & 0 deletions operator_use/computer/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,11 @@ def unregister_tools(self, registry: "ToolRegistry") -> None:
def register_hooks(self, hooks: "Hooks") -> None:
self._hooks = hooks
if self._enabled:
hooks.register(HookEvent.BEFORE_LLM_CALL, self._state_hook)
hooks.register(HookEvent.AFTER_TOOL_CALL, self._wait_for_ui_hook)

def unregister_hooks(self, hooks: "Hooks") -> None:
hooks.unregister(HookEvent.BEFORE_LLM_CALL, self._state_hook)
hooks.unregister(HookEvent.AFTER_TOOL_CALL, self._wait_for_ui_hook)

def attach_prompt(self, context: "Context") -> None:
Expand Down Expand Up @@ -133,6 +135,7 @@ async def enable(self) -> None:
"""Dynamically enable computer_use at runtime."""
self._enabled = True
if self._hooks is not None:
self._hooks.register(HookEvent.BEFORE_LLM_CALL, self._state_hook)
self._hooks.register(HookEvent.AFTER_TOOL_CALL, self._wait_for_ui_hook)
if self._registry is not None:
for tool in self.get_tools():
Expand Down
7 changes: 5 additions & 2 deletions operator_use/web/plugin.py
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed browser and computer plugins because they share the desktop/browser state with the main agent, causing context pollution, so I removed them. So now the plugin has a dedicated agent inside it, so those agents have access to these states, so no leakage to the main agent

Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import TYPE_CHECKING

from operator_use.plugins.base import Plugin
from operator_use.agent.hooks.events import HookEvent

if TYPE_CHECKING:
from operator_use.agent.hooks import Hooks
Expand Down Expand Up @@ -96,9 +97,11 @@ def unregister_tools(self, registry: "ToolRegistry") -> None:

def register_hooks(self, hooks: "Hooks") -> None:
self._hooks = hooks
if self._enabled:
hooks.register(HookEvent.BEFORE_LLM_CALL, self._state_hook)

def unregister_hooks(self, hooks: "Hooks") -> None:
pass
hooks.unregister(HookEvent.BEFORE_LLM_CALL, self._state_hook)

def attach_prompt(self, context: "Context") -> None:
self._context = context
Expand All @@ -125,7 +128,7 @@ async def enable(self) -> None:
"""Dynamically enable browser_use at runtime."""
self._enabled = True
if self._hooks is not None:
pass
self._hooks.register(HookEvent.BEFORE_LLM_CALL, self._state_hook)
if self._registry is not None:
if self.browser is not None:
self._registry.set_extension("browser", self.browser)
Expand Down
18 changes: 18 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ dev = [
"pytest>=8.0.0",
"pytest-asyncio>=0.24.0",
"pytest-benchmark>=4.0.0",
"pytest-cov>=5.0.0",
"ruff>=0.9.0",
]
fal = [
Expand All @@ -68,6 +69,23 @@ tavily = [

[tool.pytest.ini_options]
asyncio_mode = "strict"
addopts = "--cov=operator_use --cov-report=xml --cov-report=term-missing --cov-fail-under=25"

[tool.coverage.run]
source = ["operator_use"]
omit = [
"operator_use/web/cdp/*",
"operator_use/computer/macos/ax/*",
"operator_use/computer/windows/uia/*",
"operator_use/computer/windows/vdm/*",
]

[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"if TYPE_CHECKING:",
"raise NotImplementedError",
]

[tool.ruff]
line-length = 100
Expand Down
2 changes: 1 addition & 1 deletion tests/test_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ async def test_agent_run_with_tool_call_then_text(tmp_path):

# Register a simple echo tool
from pydantic import BaseModel
from operator_use.tools.service import Tool
from operator_use.agent.tools.service import Tool

class EchoParams(BaseModel):
message: str
Expand Down
2 changes: 1 addition & 1 deletion tests/test_control_center.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
from unittest.mock import AsyncMock, MagicMock, patch

from operator_use.agent.tools.builtin.control_center import (
from operator_use.tools.control_center import (
control_center,
_set_plugin_enabled,
_get_plugin_enabled,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_local_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

from operator_use.agent.tools.builtin.local_agents import LOCAL_AGENT_DELEGATION_CHAIN, localagents
from operator_use.tools.local_agents import LOCAL_AGENT_DELEGATION_CHAIN, localagents
from operator_use.messages.service import AIMessage


Expand Down
79 changes: 40 additions & 39 deletions tests/test_mcp_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,18 +110,18 @@ async def test_connect_first_agent_opens_connection(self, manager):
agent_id = "agent_a"
server_name = "server_1"

# Mock the _open_session to avoid actual connection
mock_session = AsyncMock()
# Mock the _open_client to avoid actual connection
mock_client = AsyncMock()
mock_tool = MagicMock()
mock_tool.name = "test_tool"
mock_tool.description = "Test"
mock_tool.inputSchema = {"type": "object"}

mock_session.initialize = AsyncMock()
mock_session.list_tools = AsyncMock(return_value=MagicMock(tools=[mock_tool]))
# fastmcp Client.list_tools() returns a list directly
mock_client.list_tools = AsyncMock(return_value=[mock_tool])

with patch.object(
manager, "_open_session", new_callable=AsyncMock, return_value=mock_session
MCPManager, "_open_client", new_callable=AsyncMock, return_value=mock_client
):
# Initially count is 0
assert manager._connection_count.get(server_name, 0) == 0
Expand All @@ -133,37 +133,37 @@ async def test_connect_first_agent_opens_connection(self, manager):
assert manager._connection_count[server_name] == 1
assert manager.is_connected(agent_id, server_name)
assert len(tools) == 1
assert server_name in manager._stacks
assert server_name in manager._clients

@pytest.mark.asyncio
async def test_connect_second_agent_reuses_connection(self, manager):
"""Test that second agent reuses the connection."""
server_name = "server_1"

# Set up first agent's connection
mock_session = AsyncMock()
mock_client = AsyncMock()
mock_tool = MagicMock()
mock_tool.name = "test_tool"
mock_tool.description = "Test"
mock_tool.inputSchema = {"type": "object"}

mock_session.initialize = AsyncMock()
mock_session.list_tools = AsyncMock(return_value=MagicMock(tools=[mock_tool]))
# fastmcp Client.list_tools() returns a list directly
mock_client.list_tools = AsyncMock(return_value=[mock_tool])

with patch.object(
manager, "_open_session", new_callable=AsyncMock, return_value=mock_session
MCPManager, "_open_client", new_callable=AsyncMock, return_value=mock_client
):
# Agent A connects
await manager.connect("agent_a", server_name)
assert manager._connection_count[server_name] == 1
stack_count_1 = len(manager._stacks)
client_count_1 = len(manager._clients)

# Agent B connects to same server
await manager.connect("agent_b", server_name)

# Should reuse connection (no new stack opened)
# Should reuse connection (no new client opened)
assert manager._connection_count[server_name] == 2
assert len(manager._stacks) == stack_count_1 # Same number of stacks
assert len(manager._clients) == client_count_1 # Same number of clients
assert manager.is_connected("agent_a", server_name)
assert manager.is_connected("agent_b", server_name)

Expand All @@ -179,10 +179,10 @@ async def test_disconnect_second_agent_keeps_server_alive(self, manager):

manager._tools[server_name] = [MagicMock(name="tool")]

# Mock stack to avoid actual closing
mock_stack = MagicMock()
mock_stack.aclose = AsyncMock()
manager._stacks[server_name] = mock_stack
# Mock client to avoid actual closing
mock_client = AsyncMock()
mock_client.__aexit__ = AsyncMock(return_value=None)
manager._clients[server_name] = mock_client

# Agent A disconnects
await manager.disconnect("agent_a", server_name)
Expand All @@ -191,8 +191,8 @@ async def test_disconnect_second_agent_keeps_server_alive(self, manager):
assert manager._connection_count[server_name] == 1
assert not manager.is_connected("agent_a", server_name)
assert manager.is_connected("agent_b", server_name)
assert server_name in manager._stacks # Still there!
mock_stack.aclose.assert_not_called() # Not closed
assert server_name in manager._clients # Still there!
mock_client.__aexit__.assert_not_called() # Not closed

@pytest.mark.asyncio
async def test_disconnect_last_agent_kills_server(self, manager):
Expand All @@ -204,27 +204,27 @@ async def test_disconnect_last_agent_kills_server(self, manager):
manager._agent_connections["agent_a"] = {server_name}
manager._tools[server_name] = [MagicMock(name="tool")]

# Mock stack to track if it's closed
mock_stack = MagicMock()
mock_stack.aclose = AsyncMock()
manager._stacks[server_name] = mock_stack
# Mock client to track if it's closed
mock_client = AsyncMock()
mock_client.__aexit__ = AsyncMock(return_value=None)
manager._clients[server_name] = mock_client

# Agent A disconnects
await manager.disconnect("agent_a", server_name)

# Server should be dead
assert manager._connection_count[server_name] == 0
assert not manager.is_server_connected(server_name)
assert server_name not in manager._stacks # Removed!
mock_stack.aclose.assert_called_once() # Was closed
assert server_name not in manager._clients # Removed!
mock_client.__aexit__.assert_called_once() # Was closed


class TestMCPTool:
"""Test MCPTool schema generation."""

def test_json_schema_returns_mcp_schema(self):
"""Test that json_schema returns MCP's inputSchema directly."""
mock_session = MagicMock()
mock_client = MagicMock()
input_schema = {
"type": "object",
"properties": {"path": {"type": "string", "description": "File path"}},
Expand All @@ -236,7 +236,7 @@ def test_json_schema_returns_mcp_schema(self):
mcp_tool_name="read_file",
description="Read a file",
input_schema=input_schema,
session=mock_session,
client=mock_client,
)

schema = tool.json_schema
Expand All @@ -246,32 +246,33 @@ def test_json_schema_returns_mcp_schema(self):

def test_tool_name_namespacing(self):
"""Test that tool names are properly namespaced."""
mock_session = MagicMock()
mock_client = MagicMock()

tool = MCPTool(
server_name="github",
mcp_tool_name="create_issue",
description="Create a GitHub issue",
input_schema={"type": "object"},
session=mock_session,
client=mock_client,
)

assert tool.name == "mcp_github_create_issue"

@pytest.mark.asyncio
async def test_ainvoke_strips_extensions(self):
"""Test that ainvoke strips extension kwargs before calling tool."""
mock_session = AsyncMock()
mock_session.call_tool = AsyncMock(
return_value=MagicMock(content=[MagicMock(text="result")])
)
mock_client = AsyncMock()
# fastmcp Client.call_tool() returns list[Content] directly
mock_content = MagicMock()
mock_content.text = "result"
mock_client.call_tool = AsyncMock(return_value=[mock_content])

tool = MCPTool(
server_name="test",
mcp_tool_name="tool",
description="Test",
input_schema={"type": "object"},
session=mock_session,
client=mock_client,
)

# Call with extensions + real params
Expand All @@ -282,23 +283,23 @@ async def test_ainvoke_strips_extensions(self):
_mcp_manager=MagicMock(),
)

# Should only pass param1 to session.call_tool
mock_session.call_tool.assert_called_once_with("tool", {"param1": "value1"})
# Should only pass param1 to client.call_tool
mock_client.call_tool.assert_called_once_with("tool", {"param1": "value1"})
assert result.success
assert result.output == "result"

@pytest.mark.asyncio
async def test_ainvoke_handles_error(self):
"""Test that ainvoke catches errors."""
mock_session = AsyncMock()
mock_session.call_tool = AsyncMock(side_effect=Exception("Connection lost"))
mock_client = AsyncMock()
mock_client.call_tool = AsyncMock(side_effect=Exception("Connection lost"))

tool = MCPTool(
server_name="test",
mcp_tool_name="tool",
description="Test",
input_schema={"type": "object"},
session=mock_session,
client=mock_client,
)

result = await tool.ainvoke(param="value")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from operator_use.agent.tools.registry import ToolRegistry
from operator_use.agent.hooks.service import Hooks
from operator_use.agent.hooks.events import HookEvent
from operator_use.tools.service import Tool
from operator_use.agent.tools.service import Tool
from pydantic import BaseModel


Expand Down
Loading
Loading