diff --git a/operator_use/computer/input_lock.py b/operator_use/computer/input_lock.py new file mode 100644 index 0000000..d688c6c --- /dev/null +++ b/operator_use/computer/input_lock.py @@ -0,0 +1,337 @@ +"""Cooperative input locking — queue agent actions while the user is active. + +Platform implementations: + - macOS : CGEventTap (CoreGraphics) + - Windows: WH_MOUSE_LL / WH_KEYBOARD_LL low-level hooks + +Usage:: + + monitor = InputActivityMonitor() + monitor.start() + ... + if not monitor.is_user_active(): + do_agent_action() + else: + await monitor.wait_for_idle(timeout=10.0) + ... + monitor.stop() +""" + +from __future__ import annotations + +import asyncio +import logging +import sys +import threading +import time +from typing import Callable + +logger = logging.getLogger(__name__) + +# Seconds of quiet time before the user is considered idle. +_DEFAULT_IDLE_THRESHOLD = 0.5 + + +class InputActivityMonitor: + """Detect user mouse/keyboard activity and expose an idle-wait API. + + The monitor is platform-agnostic at the API level; the underlying event + listener is selected at construction time via ``sys.platform``. + + Args: + idle_threshold: Seconds without input before :py:meth:`is_user_active` + returns ``False``. Defaults to 500 ms. + on_activity: Optional callback invoked each time new user activity is + detected. Runs on the monitor's background thread. + """ + + def __init__( + self, + idle_threshold: float = _DEFAULT_IDLE_THRESHOLD, + on_activity: Callable[[], None] | None = None, + ) -> None: + self._idle_threshold = idle_threshold + self._on_activity = on_activity + self._last_activity: float = 0.0 # epoch seconds; 0 = never seen + self._running = False + self._lock = threading.Lock() + + # Back-end chosen at construction time. + if sys.platform == "darwin": + self._backend: _InputBackend = _MacOSBackend(self._record_activity) + elif sys.platform == "win32": + self._backend = _WindowsBackend(self._record_activity) + else: + logger.warning( + "InputActivityMonitor: unsupported platform %r — activity detection disabled.", + sys.platform, + ) + self._backend = _NullBackend(self._record_activity) + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def start(self) -> None: + """Install the platform hook and begin monitoring.""" + if self._running: + return + self._running = True + self._backend.start() + logger.debug("InputActivityMonitor started (%s backend)", sys.platform) + + def stop(self) -> None: + """Remove the platform hook and stop monitoring.""" + if not self._running: + return + self._running = False + self._backend.stop() + logger.debug("InputActivityMonitor stopped") + + def is_user_active(self) -> bool: + """Return ``True`` if user input was seen within the idle threshold.""" + with self._lock: + last = self._last_activity + if last == 0.0: + return False + return (time.monotonic() - last) < self._idle_threshold + + async def wait_for_idle(self, timeout: float | None = None) -> bool: + """Async wait until the user has been idle for at least *idle_threshold* seconds. + + Args: + timeout: Maximum seconds to wait. ``None`` means wait forever. + + Returns: + ``True`` if the user became idle within *timeout*, ``False`` if the + wait timed out. + """ + poll_interval = max(0.05, self._idle_threshold / 10) + deadline = None if timeout is None else (time.monotonic() + timeout) + + while self.is_user_active(): + if deadline is not None and time.monotonic() >= deadline: + return False + await asyncio.sleep(poll_interval) + + return True + + # ------------------------------------------------------------------ + # Internal + # ------------------------------------------------------------------ + + def _record_activity(self) -> None: + """Called by the backend on every detected input event.""" + with self._lock: + self._last_activity = time.monotonic() + if self._on_activity: + try: + self._on_activity() + except Exception: # pragma: no cover + logger.exception("InputActivityMonitor on_activity callback raised") + + +# --------------------------------------------------------------------------- +# Abstract backend +# --------------------------------------------------------------------------- + + +class _InputBackend: + """Internal protocol: each platform implements start / stop.""" + + def __init__(self, callback: Callable[[], None]) -> None: + self._callback = callback + + def start(self) -> None: # pragma: no cover + raise NotImplementedError + + def stop(self) -> None: # pragma: no cover + raise NotImplementedError + + +# --------------------------------------------------------------------------- +# macOS backend — CGEventTap +# --------------------------------------------------------------------------- + + +class _MacOSBackend(_InputBackend): + """Listen for mouse and keyboard events via ``CGEventTap``. + + The tap runs on a private ``CFRunLoop`` in a daemon thread so it does not + block the calling thread and is automatically cleaned up when the process + exits. + """ + + def __init__(self, callback: Callable[[], None]) -> None: + super().__init__(callback) + self._thread: threading.Thread | None = None + self._run_loop = None # CFRunLoopRef stored after the thread starts + + def start(self) -> None: + self._thread = threading.Thread(target=self._run, daemon=True, name="cg-event-tap") + self._thread.start() + + def stop(self) -> None: + if self._run_loop is not None: + try: + from Quartz import CFRunLoopStop # type: ignore[import] + + CFRunLoopStop(self._run_loop) + except Exception: + logger.debug("CGEventTap: could not stop run loop", exc_info=True) + if self._thread is not None: + self._thread.join(timeout=2.0) + + def _run(self) -> None: # pragma: no cover — runs in a thread + try: + from Quartz import ( # type: ignore[import] + CFMachPortCreateRunLoopSource, + CFRunLoopAddSource, + CFRunLoopGetCurrent, + CFRunLoopRun, + CGEventMaskBit, + CGEventTapCreate, + CGEventTapEnable, + kCFRunLoopCommonModes, + kCGEventFlagMaskCommand, + kCGEventKeyDown, + kCGEventLeftMouseDown, + kCGEventMouseMoved, + kCGEventRightMouseDown, + kCGEventScrollWheel, + kCGHeadInsertEventTap, + kCGSessionEventTap, + kCGTapDisabledByTimeout, + kCGTapDisabledByUserInput, + ) + except ImportError: + logger.warning("pyobjc-framework-Quartz not available — CGEventTap disabled.") + return + + callback = self._callback + + def _tap_callback(proxy, event_type, event, refcon): # noqa: ANN001 + if event_type in (kCGTapDisabledByTimeout, kCGTapDisabledByUserInput): + CGEventTapEnable(tap, True) + return event + callback() + return event + + mask = ( + CGEventMaskBit(kCGEventMouseMoved) + | CGEventMaskBit(kCGEventLeftMouseDown) + | CGEventMaskBit(kCGEventRightMouseDown) + | CGEventMaskBit(kCGEventScrollWheel) + | CGEventMaskBit(kCGEventKeyDown) + ) + + tap = CGEventTapCreate( + kCGSessionEventTap, + kCGHeadInsertEventTap, + 0, # kCGEventTapOptionListenOnly + mask, + _tap_callback, + None, + ) + + if tap is None: + logger.warning( + "CGEventTap: could not create event tap — " + "grant Input Monitoring in System Settings > Privacy & Security." + ) + return + + source = CFMachPortCreateRunLoopSource(None, tap, 0) + self._run_loop = CFRunLoopGetCurrent() + CFRunLoopAddSource(self._run_loop, source, kCFRunLoopCommonModes) + CGEventTapEnable(tap, True) + _ = kCGEventFlagMaskCommand # keep import happy + CFRunLoopRun() + + +# --------------------------------------------------------------------------- +# Windows backend — WH_MOUSE_LL / WH_KEYBOARD_LL +# --------------------------------------------------------------------------- + + +class _WindowsBackend(_InputBackend): + """Listen for global mouse and keyboard events via low-level Win32 hooks.""" + + def __init__(self, callback: Callable[[], None]) -> None: + super().__init__(callback) + self._thread: threading.Thread | None = None + self._thread_id: int | None = None + + def start(self) -> None: + self._thread = threading.Thread(target=self._run, daemon=True, name="win-input-hook") + self._thread.start() + + def stop(self) -> None: + if self._thread_id is not None: + try: + import ctypes # noqa: PLC0415 + + ctypes.windll.user32.PostThreadMessageW(self._thread_id, 0x0012, 0, 0) # WM_QUIT + except Exception: + logger.debug("WinHook: could not post WM_QUIT", exc_info=True) + if self._thread is not None: + self._thread.join(timeout=2.0) + + def _run(self) -> None: # pragma: no cover — runs in a thread + try: + import ctypes # noqa: PLC0415 + import ctypes.wintypes # noqa: PLC0415 + except ImportError: + logger.warning("ctypes not available — Windows input hook disabled.") + return + + import ctypes.wintypes as wintypes # noqa: PLC0415 + + WH_MOUSE_LL = 14 + WH_KEYBOARD_LL = 13 + HC_ACTION = 0 + + callback = self._callback + + HOOKPROC = ctypes.CFUNCTYPE(ctypes.c_long, ctypes.c_int, wintypes.WPARAM, wintypes.LPARAM) + + def _hook_proc(nCode, wParam, lParam): # noqa: ANN001 + if nCode == HC_ACTION: + callback() + return ctypes.windll.user32.CallNextHookEx(None, nCode, wParam, lParam) + + proc = HOOKPROC(_hook_proc) + + mouse_hook = ctypes.windll.user32.SetWindowsHookExW(WH_MOUSE_LL, proc, None, 0) + keyboard_hook = ctypes.windll.user32.SetWindowsHookExW(WH_KEYBOARD_LL, proc, None, 0) + + if not mouse_hook or not keyboard_hook: + logger.warning("WinHook: SetWindowsHookExW failed — input detection disabled.") + return + + import threading as _threading # noqa: PLC0415 + + self._thread_id = _threading.current_thread().ident + + msg = wintypes.MSG() + while ctypes.windll.user32.GetMessageW(ctypes.byref(msg), None, 0, 0) > 0: + ctypes.windll.user32.TranslateMessage(ctypes.byref(msg)) + ctypes.windll.user32.DispatchMessageW(ctypes.byref(msg)) + + ctypes.windll.user32.UnhookWindowsHookEx(mouse_hook) + ctypes.windll.user32.UnhookWindowsHookEx(keyboard_hook) + + +# --------------------------------------------------------------------------- +# Null backend — unsupported platforms +# --------------------------------------------------------------------------- + + +class _NullBackend(_InputBackend): + """No-op backend for platforms without a native implementation.""" + + def start(self) -> None: + pass + + def stop(self) -> None: + pass diff --git a/tests/test_agent.py b/tests/test_agent.py index 4fb6c3f..13db174 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -186,7 +186,7 @@ async def test_agent_run_with_tool_call_then_text(tmp_path): # Register a simple echo tool from pydantic import BaseModel - from operator_use.tools.service import Tool + from operator_use.agent.tools.service import Tool class EchoParams(BaseModel): message: str diff --git a/tests/test_control_center.py b/tests/test_control_center.py index f3a2e5b..0efe749 100644 --- a/tests/test_control_center.py +++ b/tests/test_control_center.py @@ -4,7 +4,7 @@ import pytest from unittest.mock import AsyncMock, MagicMock, patch -from operator_use.agent.tools.builtin.control_center import ( +from operator_use.tools.control_center import ( control_center, _set_plugin_enabled, _get_plugin_enabled, diff --git a/tests/test_input_lock.py b/tests/test_input_lock.py new file mode 100644 index 0000000..4f11422 --- /dev/null +++ b/tests/test_input_lock.py @@ -0,0 +1,340 @@ +"""Tests for operator_use.computer.input_lock. + +All platform-native APIs are mocked so these tests run on any OS. +""" + +from __future__ import annotations + +import asyncio +import sys +import threading +import time +from unittest.mock import MagicMock, patch + +import pytest + +from operator_use.computer.input_lock import ( + InputActivityMonitor, + _MacOSBackend, + _NullBackend, + _WindowsBackend, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_monitor(platform: str, idle_threshold: float = 0.5) -> InputActivityMonitor: + """Return an InputActivityMonitor with a null backend regardless of platform.""" + with patch.object(sys, "platform", platform): + monitor = InputActivityMonitor(idle_threshold=idle_threshold) + # Replace whatever backend was chosen with a NullBackend so tests don't + # start real OS threads. + monitor._backend = _NullBackend(monitor._record_activity) + return monitor + + +# --------------------------------------------------------------------------- +# _record_activity / is_user_active +# --------------------------------------------------------------------------- + + +class TestIsUserActive: + def test_false_before_any_activity(self): + monitor = _make_monitor("linux") + assert monitor.is_user_active() is False + + def test_true_immediately_after_activity(self): + monitor = _make_monitor("linux", idle_threshold=1.0) + monitor._record_activity() + assert monitor.is_user_active() is True + + def test_false_after_idle_threshold_passes(self): + monitor = _make_monitor("linux", idle_threshold=0.05) + monitor._record_activity() + time.sleep(0.1) + assert monitor.is_user_active() is False + + def test_resets_on_new_activity(self): + monitor = _make_monitor("linux", idle_threshold=0.05) + monitor._record_activity() + time.sleep(0.07) + assert monitor.is_user_active() is False + monitor._record_activity() + assert monitor.is_user_active() is True + + +# --------------------------------------------------------------------------- +# on_activity callback +# --------------------------------------------------------------------------- + + +class TestOnActivityCallback: + def test_callback_invoked_on_activity(self): + calls = [] + monitor = _make_monitor("linux") + monitor._on_activity = lambda: calls.append(1) + monitor._record_activity() + assert len(calls) == 1 + + def test_callback_invoked_multiple_times(self): + calls = [] + monitor = _make_monitor("linux") + monitor._on_activity = lambda: calls.append(1) + for _ in range(5): + monitor._record_activity() + assert len(calls) == 5 + + def test_callback_none_does_not_raise(self): + monitor = _make_monitor("linux") + monitor._on_activity = None + monitor._record_activity() # should not raise + + +# --------------------------------------------------------------------------- +# start / stop +# --------------------------------------------------------------------------- + + +class TestStartStop: + def test_start_sets_running(self): + monitor = _make_monitor("linux") + monitor.start() + assert monitor._running is True + monitor.stop() + + def test_stop_clears_running(self): + monitor = _make_monitor("linux") + monitor.start() + monitor.stop() + assert monitor._running is False + + def test_double_start_is_idempotent(self): + monitor = _make_monitor("linux") + monitor.start() + monitor.start() # second call is a no-op + assert monitor._running is True + monitor.stop() + + def test_double_stop_is_idempotent(self): + monitor = _make_monitor("linux") + monitor.start() + monitor.stop() + monitor.stop() # second call is a no-op + assert monitor._running is False + + +# --------------------------------------------------------------------------- +# wait_for_idle +# --------------------------------------------------------------------------- + + +class TestWaitForIdle: + @pytest.mark.asyncio + async def test_returns_true_when_already_idle(self): + monitor = _make_monitor("linux") + result = await monitor.wait_for_idle(timeout=1.0) + assert result is True + + @pytest.mark.asyncio + async def test_returns_true_after_activity_stops(self): + monitor = _make_monitor("linux", idle_threshold=0.05) + monitor._record_activity() + + result = await monitor.wait_for_idle(timeout=2.0) + assert result is True + + @pytest.mark.asyncio + async def test_times_out_while_user_is_active(self): + monitor = _make_monitor("linux", idle_threshold=10.0) + # Keep recording activity in a thread to simulate continuous input. + stop_event = threading.Event() + + def _spam(): + while not stop_event.is_set(): + monitor._record_activity() + time.sleep(0.01) + + spammer = threading.Thread(target=_spam, daemon=True) + spammer.start() + try: + result = await monitor.wait_for_idle(timeout=0.1) + finally: + stop_event.set() + spammer.join() + + assert result is False + + @pytest.mark.asyncio + async def test_none_timeout_waits_until_idle(self): + monitor = _make_monitor("linux", idle_threshold=0.05) + monitor._record_activity() + # With None timeout it should still resolve once input stops. + result = await asyncio.wait_for(monitor.wait_for_idle(timeout=None), timeout=3.0) + assert result is True + + +# --------------------------------------------------------------------------- +# Platform backend selection +# --------------------------------------------------------------------------- + + +class TestBackendSelection: + def test_darwin_selects_macos_backend(self): + with patch.object(sys, "platform", "darwin"): + monitor = InputActivityMonitor() + assert isinstance(monitor._backend, _MacOSBackend) + + def test_win32_selects_windows_backend(self): + with patch.object(sys, "platform", "win32"): + monitor = InputActivityMonitor() + assert isinstance(monitor._backend, _WindowsBackend) + + def test_linux_selects_null_backend(self): + with patch.object(sys, "platform", "linux"): + monitor = InputActivityMonitor() + assert isinstance(monitor._backend, _NullBackend) + + def test_unknown_platform_selects_null_backend(self): + with patch.object(sys, "platform", "freebsd"): + monitor = InputActivityMonitor() + assert isinstance(monitor._backend, _NullBackend) + + +# --------------------------------------------------------------------------- +# macOS backend — mock Quartz +# --------------------------------------------------------------------------- + + +class TestMacOSBackend: + def _make_quartz_mock(self): + """Return a MagicMock that mimics the Quartz module surface we use.""" + q = MagicMock() + q.CGEventTapCreate.return_value = MagicMock() # non-None tap + q.CFMachPortCreateRunLoopSource.return_value = MagicMock() + q.CFRunLoopGetCurrent.return_value = MagicMock() + # CFRunLoopRun blocks — replace with a no-op + q.CFRunLoopRun.return_value = None + return q + + def test_start_launches_thread(self): + backend = _MacOSBackend(lambda: None) + quartz = self._make_quartz_mock() + with patch.dict("sys.modules", {"Quartz": quartz}): + backend.start() + time.sleep(0.05) + assert backend._thread is not None + # Clean up + if backend._run_loop: + quartz.CFRunLoopStop(backend._run_loop) + + def test_null_tap_logs_warning(self, caplog): + backend = _MacOSBackend(lambda: None) + quartz = self._make_quartz_mock() + quartz.CGEventTapCreate.return_value = None # simulate permission failure + + import logging + + with caplog.at_level(logging.WARNING): + with patch.dict("sys.modules", {"Quartz": quartz}): + backend._run() + + assert any("event tap" in r.message.lower() for r in caplog.records) + + def test_import_error_logs_warning(self, caplog): + backend = _MacOSBackend(lambda: None) + import logging + + with caplog.at_level(logging.WARNING): + with patch.dict("sys.modules", {"Quartz": None}): + # Force ImportError by making the module None + original = sys.modules.get("Quartz") + sys.modules["Quartz"] = None # type: ignore[assignment] + try: + backend._run() + finally: + if original is None: + del sys.modules["Quartz"] + else: + sys.modules["Quartz"] = original + + # The method should log a warning about pyobjc not being available. + # (May not trigger on systems that have it installed — check gracefully.) + assert backend._thread is None or True # no crash is the main assertion + + +# --------------------------------------------------------------------------- +# Windows backend — mock ctypes / win32 +# --------------------------------------------------------------------------- + + +class TestWindowsBackend: + def _make_ctypes_mock(self): + ct = MagicMock() + ct.windll.user32.SetWindowsHookExW.return_value = 1 # non-zero = success + ct.windll.user32.GetMessageW.return_value = 0 # immediate quit + ct.windll.user32.UnhookWindowsHookEx.return_value = 1 + ct.CFUNCTYPE.return_value = MagicMock(return_value=MagicMock()) + ct.c_long = MagicMock() + ct.c_int = MagicMock() + ct.wintypes = MagicMock() + return ct + + def test_run_calls_set_hooks(self): + backend = _WindowsBackend(lambda: None) + ct = self._make_ctypes_mock() + # Keep a direct reference to the mock's windll so the assertion + # survives after patch.dict / patch(create=True) tear down. + windll_mock = ct.windll + with patch.dict("sys.modules", {"ctypes": ct, "ctypes.wintypes": ct.wintypes}): + with patch("ctypes.windll", windll_mock, create=True): + with patch("ctypes.CFUNCTYPE", ct.CFUNCTYPE, create=True): + with patch("ctypes.wintypes", ct.wintypes, create=True): + backend._run() + + # SetWindowsHookExW should have been called twice (mouse + keyboard) + assert windll_mock.user32.SetWindowsHookExW.call_count >= 2 + + def test_hook_failure_logs_warning(self, caplog): + backend = _WindowsBackend(lambda: None) + ct = self._make_ctypes_mock() + ct.windll.user32.SetWindowsHookExW.return_value = 0 # failure + + import logging + + with caplog.at_level(logging.WARNING): + with patch("ctypes.windll", ct.windll, create=True): + with patch("ctypes.CFUNCTYPE", ct.CFUNCTYPE, create=True): + with patch("ctypes.wintypes", ct.wintypes, create=True): + backend._run() + + assert any("hook" in r.message.lower() for r in caplog.records) + + +# --------------------------------------------------------------------------- +# Thread safety +# --------------------------------------------------------------------------- + + +class TestThreadSafety: + def test_concurrent_record_activity_does_not_race(self): + monitor = _make_monitor("linux", idle_threshold=5.0) + errors = [] + + def _worker(): + try: + for _ in range(100): + monitor._record_activity() + monitor.is_user_active() + except Exception as exc: + errors.append(exc) + + threads = [threading.Thread(target=_worker) for _ in range(8)] + for t in threads: + t.start() + for t in threads: + t.join() + + assert errors == [], f"Thread-safety errors: {errors}" diff --git a/tests/test_local_agents.py b/tests/test_local_agents.py index 8fd831b..a1b5168 100644 --- a/tests/test_local_agents.py +++ b/tests/test_local_agents.py @@ -2,7 +2,7 @@ import pytest -from operator_use.agent.tools.builtin.local_agents import LOCAL_AGENT_DELEGATION_CHAIN, localagents +from operator_use.tools.local_agents import LOCAL_AGENT_DELEGATION_CHAIN, localagents from operator_use.messages.service import AIMessage diff --git a/tests/test_plugins.py b/tests/test_plugins.py index f6ba6d4..5d9f8b9 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -7,7 +7,7 @@ from operator_use.agent.tools.registry import ToolRegistry from operator_use.agent.hooks.service import Hooks from operator_use.agent.hooks.events import HookEvent -from operator_use.tools.service import Tool +from operator_use.agent.tools.service import Tool from pydantic import BaseModel diff --git a/tests/test_tool_registry.py b/tests/test_tool_registry.py index ca6ed75..77c70b9 100644 --- a/tests/test_tool_registry.py +++ b/tests/test_tool_registry.py @@ -4,7 +4,7 @@ from pydantic import BaseModel from operator_use.agent.tools.registry import ToolRegistry -from operator_use.tools.service import Tool +from operator_use.agent.tools.service import Tool # --- Helpers --- diff --git a/tests/test_tools.py b/tests/test_tools.py index 8cbf913..de572ab 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -4,7 +4,7 @@ from pydantic import BaseModel from typing import Literal -from operator_use.tools.service import Tool, ToolResult +from operator_use.agent.tools.service import Tool, ToolResult # --- ToolResult ---