From 5d6d7f27ff26a79e399ad26db07a8178b3f87bb8 Mon Sep 17 00:00:00 2001 From: yuyili Date: Wed, 6 May 2026 14:23:52 +0800 Subject: [PATCH 1/7] =?UTF-8?q?feature:=20=E6=96=B0=E5=A2=9E=20Cube/E2B=20?= =?UTF-8?q?=E6=B2=99=E7=AE=B1=E4=BB=A3=E7=A0=81=E6=89=A7=E8=A1=8C=E5=99=A8?= =?UTF-8?q?=E4=B8=8E=E5=B7=A5=E4=BD=9C=E7=A9=BA=E9=97=B4=E8=BF=90=E8=A1=8C?= =?UTF-8?q?=E6=97=B6=20-=20=E5=9C=A8=20trpc=5Fagent=5Fsdk.code=5Fexecutors?= =?UTF-8?q?.cube=20=E6=8F=90=E4=BE=9B=20CubeCodeExecutor=20=E4=B8=8E=20Cub?= =?UTF-8?q?eWorkspaceRuntime,=20=E5=A4=8D=E7=94=A8=20e2b-code-interpreter?= =?UTF-8?q?=20=E8=BF=9C=E7=A8=8B=E6=B2=99=E7=AE=B1;=20=E9=80=9A=E8=BF=87?= =?UTF-8?q?=20PEP=20562=20lazy=20=5F=5Fgetattr=5F=5F=20=E5=AE=9E=E7=8E=B0?= =?UTF-8?q?=E6=8C=89=E9=9C=80=E5=8A=A0=E8=BD=BD,=20=E4=B8=8D=E5=AE=89?= =?UTF-8?q?=E8=A3=85=20[cube]=20extra=20=E4=B9=9F=E5=8F=AF=E6=AD=A3?= =?UTF-8?q?=E5=B8=B8=20import=20=E5=8C=85=20-=20=E5=85=AC=E5=85=B1?= =?UTF-8?q?=E5=8E=9F=E8=AF=AD=20CubeSandboxClient=20=E9=9B=86=E4=B8=AD?= =?UTF-8?q?=E5=B0=81=E8=A3=85=E6=B2=99=E7=AE=B1=E7=94=9F=E5=91=BD=E5=91=A8?= =?UTF-8?q?=E6=9C=9F=20(open=5Fnew/open=5Fexisting/close/destroy)=E3=80=81?= =?UTF-8?q?=E7=BB=93=E6=9E=84=E5=8C=96=E5=91=BD=E4=BB=A4=E6=89=A7=E8=A1=8C?= =?UTF-8?q?=20(commands=5Frun=20=E5=90=9E=E6=8E=89=20CommandExitException)?= =?UTF-8?q?=20=E4=B8=8E=E6=96=87=E4=BB=B6=E4=BC=A0=E8=BE=93=20(upload=5Fpa?= =?UTF-8?q?th/download=5Fpath=20=E8=87=AA=E5=8A=A8=20dispatch=20=E6=96=87?= =?UTF-8?q?=E4=BB=B6/=E7=9B=AE=E5=BD=95,=20=E7=9B=AE=E5=BD=95=E8=B5=B0=20t?= =?UTF-8?q?ar=20=E5=8D=8F=E8=AE=AE=E4=BF=9D=E7=95=99=E7=AC=A6=E5=8F=B7?= =?UTF-8?q?=E9=93=BE=E6=8E=A5=E5=92=8C=E6=9D=83=E9=99=90)=20-=20=E9=85=8D?= =?UTF-8?q?=E7=BD=AE=E6=8C=89=20ISP=20=E6=8B=86=E5=88=86:=20CubeCodeExecut?= =?UTF-8?q?orConfig=20=E5=8F=AA=E6=89=BF=E8=BD=BD=20sandbox/=E6=89=A7?= =?UTF-8?q?=E8=A1=8C=E7=9B=B8=E5=85=B3=E5=AD=97=E6=AE=B5,=20CubeWorkspaceR?= =?UTF-8?q?untimeConfig=20=E5=8F=AA=E6=89=BF=E8=BD=BD=20workspace=20?= =?UTF-8?q?=E7=9B=B8=E5=85=B3=E5=AD=97=E6=AE=B5;=20create=5Fcube=5Fworkspa?= =?UTF-8?q?ce=5Fruntime=20=E9=80=9A=E8=BF=87=E5=8F=AF=E9=80=89=20workspace?= =?UTF-8?q?=5Fcfg=20=E6=B3=A8=E5=85=A5=20-=20=E6=8F=90=E4=BE=9B=20examples?= =?UTF-8?q?/code=5Fexecutors/cube=5Fdemo.py=20=E7=AB=AF=E5=88=B0=E7=AB=AF?= =?UTF-8?q?=E7=A4=BA=E4=BE=8B=E5=B9=B6=E5=9C=A8=20pyproject.toml=20?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=20[cube]=20=E5=8F=AF=E9=80=89=E4=BE=9D?= =?UTF-8?q?=E8=B5=96=E7=BB=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Cursor --- examples/code_executors/cube_demo.py | 108 +++ pyproject.toml | 5 + .../container/test_container_ws_runtime.py | 21 + tests/code_executors/cube/__init__.py | 0 tests/code_executors/cube/conftest.py | 112 +++ tests/code_executors/cube/test_bug_hunt.py | 558 +++++++++++++ .../code_executors/cube/test_code_executor.py | 540 ++++++++++++ tests/code_executors/cube/test_e2b.py | 54 ++ .../cube/test_package_lazy_import.py | 160 ++++ tests/code_executors/cube/test_paths.py | 302 +++++++ tests/code_executors/cube/test_runtime.py | 768 ++++++++++++++++++ tests/code_executors/cube/test_sandbox.py | 586 +++++++++++++ tests/code_executors/cube/test_transfer.py | 510 ++++++++++++ tests/code_executors/cube/test_types.py | 145 ++++ .../local/test_local_ws_runtime.py | 161 +++- tests/code_executors/utils/test_collect.py | 393 +++++++++ trpc_agent_sdk/code_executors/__init__.py | 42 + .../container/_container_ws_runtime.py | 192 ++--- .../code_executors/cube/__init__.py | 38 + .../code_executors/cube/_code_executor.py | 275 +++++++ trpc_agent_sdk/code_executors/cube/_e2b.py | 42 + trpc_agent_sdk/code_executors/cube/_paths.py | 114 +++ .../code_executors/cube/_runtime.py | 469 +++++++++++ .../code_executors/cube/_sandbox.py | 306 +++++++ .../code_executors/cube/_transfer.py | 198 +++++ trpc_agent_sdk/code_executors/cube/_types.py | 121 +++ .../code_executors/local/_local_ws_runtime.py | 206 ++--- .../code_executors/utils/__init__.py | 6 + .../code_executors/utils/_collect.py | 234 ++++++ 29 files changed, 6390 insertions(+), 276 deletions(-) create mode 100644 examples/code_executors/cube_demo.py create mode 100644 tests/code_executors/cube/__init__.py create mode 100644 tests/code_executors/cube/conftest.py create mode 100644 tests/code_executors/cube/test_bug_hunt.py create mode 100644 tests/code_executors/cube/test_code_executor.py create mode 100644 tests/code_executors/cube/test_e2b.py create mode 100644 tests/code_executors/cube/test_package_lazy_import.py create mode 100644 tests/code_executors/cube/test_paths.py create mode 100644 tests/code_executors/cube/test_runtime.py create mode 100644 tests/code_executors/cube/test_sandbox.py create mode 100644 tests/code_executors/cube/test_transfer.py create mode 100644 tests/code_executors/cube/test_types.py create mode 100644 tests/code_executors/utils/test_collect.py create mode 100644 trpc_agent_sdk/code_executors/cube/__init__.py create mode 100644 trpc_agent_sdk/code_executors/cube/_code_executor.py create mode 100644 trpc_agent_sdk/code_executors/cube/_e2b.py create mode 100644 trpc_agent_sdk/code_executors/cube/_paths.py create mode 100644 trpc_agent_sdk/code_executors/cube/_runtime.py create mode 100644 trpc_agent_sdk/code_executors/cube/_sandbox.py create mode 100644 trpc_agent_sdk/code_executors/cube/_transfer.py create mode 100644 trpc_agent_sdk/code_executors/cube/_types.py create mode 100644 trpc_agent_sdk/code_executors/utils/_collect.py diff --git a/examples/code_executors/cube_demo.py b/examples/code_executors/cube_demo.py new file mode 100644 index 0000000..0a7d4d8 --- /dev/null +++ b/examples/code_executors/cube_demo.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""End-to-end demo for `CubeCodeExecutor` and `CubeWorkspaceRuntime`. + +Requires the optional ``[cube]`` extra and the following environment +variables (same names hermes uses): + +- ``CUBE_TEMPLATE_ID``: Cube template id (e.g. ``std-XXXXXXXX``) +- ``E2B_API_URL``: Cube/E2B-compatible gateway URL +- ``E2B_API_KEY``: API key for the gateway + +Usage:: + + pip install 'trpc-agent-py[cube]' + export CUBE_TEMPLATE_ID=... + export E2B_API_URL=... + export E2B_API_KEY=... + python examples/code_executors/cube_demo.py + +The demo walks through: + 1. ``CubeCodeExecutor.create`` (no sandbox_id -> fresh sandbox) + 2. ``execute_code`` for Python and Bash code blocks + 3. Workspace runtime: ``create_workspace`` -> ``put_files`` -> ``run_program`` -> ``collect_outputs`` + 4. ``destroy`` (kills the remote sandbox) +""" + +from __future__ import annotations + +import asyncio +import os +import sys + +from trpc_agent_sdk.code_executors import CubeCodeExecutor +from trpc_agent_sdk.code_executors import CubeCodeExecutorConfig +from trpc_agent_sdk.code_executors import create_cube_workspace_runtime +from trpc_agent_sdk.code_executors._types import CodeBlock +from trpc_agent_sdk.code_executors._types import CodeExecutionInput +from trpc_agent_sdk.code_executors._types import WorkspaceOutputSpec +from trpc_agent_sdk.code_executors._types import WorkspacePutFileInfo +from trpc_agent_sdk.code_executors._types import WorkspaceRunProgramSpec + + +def _require_env() -> None: + missing = [name for name in ("CUBE_TEMPLATE_ID", "E2B_API_URL", "E2B_API_KEY") if not os.getenv(name)] + if missing: + sys.stderr.write(f"missing required env vars: {', '.join(missing)}\n") + sys.exit(2) + + +async def _run() -> None: + _require_env() + + cfg = CubeCodeExecutorConfig( + execute_timeout=30.0, + idle_timeout=600, + ) + + executor = await CubeCodeExecutor.create(cfg) + print(f"created sandbox: {executor.sandbox_id}") + + try: + # 1. execute_code with two blocks (python and bash). + result = await executor.execute_code( + invocation_context=None, # type: ignore[arg-type] + code_execution_input=CodeExecutionInput(code_blocks=[ + CodeBlock(code="print('hello from cube py')", language="python"), + CodeBlock(code="echo hello from cube bash", language="bash"), + ]), + ) + print("execute_code result:") + print(result.output) + + # 2. Workspace runtime end-to-end. + runtime = create_cube_workspace_runtime(executor) + manager = runtime.manager() + fs = runtime.fs() + runner = runtime.runner() + + ws = await manager.create_workspace("demo-1") + print(f"workspace path: {ws.path}") + + await fs.put_files(ws, [ + WorkspacePutFileInfo(path="work/script.py", + content=b"print('script ran')\n"), + ]) + + run_result = await runner.run_program( + ws, + WorkspaceRunProgramSpec(cmd="python3", args=["work/script.py"], timeout=15.0), + ) + print(f"run_program exit={run_result.exit_code} stdout={run_result.stdout!r}") + + outputs = await fs.collect_outputs(ws, WorkspaceOutputSpec(globs=["work/*.py"], inline=True)) + for ref in outputs.files: + print(f"output: {ref.name} ({len(ref.content)} chars)") + + await manager.cleanup("demo-1") + finally: + await executor.destroy() + print("sandbox destroyed") + + +if __name__ == "__main__": + asyncio.run(_run()) diff --git a/pyproject.toml b/pyproject.toml index 7f1145a..48a0e28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,6 +98,10 @@ mem0 = [ "sentence-transformers", ] +cube = [ + "e2b-code-interpreter>=2.0.0", +] + langchain_tool = [ "langchain_tavily", "langchain", @@ -140,6 +144,7 @@ all = [ "aiofiles", "wecom-aibot-sdk-python>=0.1.5", "a2a-sdk<1.0.0,>=0.3.22", + "e2b-code-interpreter>=2.0.0", ] [project.scripts] diff --git a/tests/code_executors/container/test_container_ws_runtime.py b/tests/code_executors/container/test_container_ws_runtime.py index 2645a7e..cfef20c 100644 --- a/tests/code_executors/container/test_container_ws_runtime.py +++ b/tests/code_executors/container/test_container_ws_runtime.py @@ -833,6 +833,27 @@ async def test_collect_outputs_empty(self): assert len(manifest.files) == 0 + async def test_collect_outputs_empty_globs_short_circuits(self): + """Empty pattern list must skip the bash glob entirely. + + Regression guard for ``_enumerate_matches``: when callers pass + ``globs=[]`` (or an all-whitespace list normalised to empty), + the helper returns ``[]`` immediately rather than synthesising + a degenerate ``patterns=()`` shell command. We verify by + asserting ``exec_run`` is never invoked. + """ + ws = _make_ws() + cc = _mock_container_client() + cfg = RuntimeConfig() + fs = ContainerWorkspaceFS(cc, cfg) + + spec = WorkspaceOutputSpec(globs=[]) + manifest = await fs.collect_outputs(ws, spec) + + assert manifest.files == [] + assert manifest.limits_hit is False + cc.exec_run.assert_not_called() + async def test_collect_outputs_max_total_bytes(self): ws = _make_ws() cc = _mock_container_client() diff --git a/tests/code_executors/cube/__init__.py b/tests/code_executors/cube/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/code_executors/cube/conftest.py b/tests/code_executors/cube/conftest.py new file mode 100644 index 0000000..db4c0e4 --- /dev/null +++ b/tests/code_executors/cube/conftest.py @@ -0,0 +1,112 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Shared fixtures for the cube/ test suite. + +Exposes a ``fake_e2b`` fixture that patches +:func:`trpc_agent_sdk.code_executors.cube._e2b._import_e2b` to return a +fake vendor module with stub classes / enums that match the surface the +production code consults. This keeps the whole test suite independent +of the real ``e2b-code-interpreter`` dependency. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import AsyncMock +from unittest.mock import MagicMock + +import pytest + + +class _FakeSandboxException(Exception): + """Mirrors e2b_code_interpreter.SandboxException.""" + + +class _FakeSandboxNotFoundException(_FakeSandboxException): + """Mirrors e2b_code_interpreter.SandboxNotFoundException.""" + + +class _FakeCommandExitException(Exception): + """Mirrors e2b_code_interpreter.CommandExitException. + + Carries stdout/stderr/exit_code, matching how the real vendor raises + it. ``commands_run`` reads these via getattr so the attribute names + are the contract here. + """ + + def __init__(self, stdout: str = "", stderr: str = "", exit_code: int = 1): + super().__init__(f"cmd exit {exit_code}") + self.stdout = stdout + self.stderr = stderr + self.exit_code = exit_code + + +def _make_fake_e2b() -> SimpleNamespace: + ns = SimpleNamespace() + ns.SandboxException = _FakeSandboxException + ns.SandboxNotFoundException = _FakeSandboxNotFoundException + ns.CommandExitException = _FakeCommandExitException + ns.SandboxState = SimpleNamespace( + RUNNING=SimpleNamespace(value="running"), + PAUSED=SimpleNamespace(value="paused"), + STOPPED=SimpleNamespace(value="stopped"), + ) + ns.FileType = SimpleNamespace(DIR="dir", FILE="file") + ns.AsyncSandbox = MagicMock() + return ns + + +@pytest.fixture +def fake_e2b(monkeypatch): + """Patch ``_import_e2b`` everywhere the cube package imports it.""" + ns = _make_fake_e2b() + # The production code does ``from ._e2b import _import_e2b`` in + # _sandbox.py and _code_executor.py, which rebinds the symbol in + # those modules' globals — so we must patch every import site, not + # just the original definition. + monkeypatch.setattr( + "trpc_agent_sdk.code_executors.cube._e2b._import_e2b", + lambda: ns, + ) + monkeypatch.setattr( + "trpc_agent_sdk.code_executors.cube._sandbox._import_e2b", + lambda: ns, + ) + monkeypatch.setattr( + "trpc_agent_sdk.code_executors.cube._code_executor._import_e2b", + lambda: ns, + ) + return ns + + +def _make_fake_async_sandbox(sandbox_id: str = "sbx-1"): + """Build a MagicMock shaped like ``e2b_code_interpreter.AsyncSandbox``. + + All the methods the production client touches are ``AsyncMock``s so + tests can configure ``return_value`` / ``side_effect`` as needed. + """ + sbx = MagicMock() + sbx.sandbox_id = sandbox_id + sbx.kill = AsyncMock(return_value=None) + sbx.set_timeout = AsyncMock(return_value=None) + # get_info returns a state holder by default; tests override. + info = SimpleNamespace(state=SimpleNamespace(value="running")) + sbx.get_info = AsyncMock(return_value=info) + sbx.commands = MagicMock() + sbx.commands.run = AsyncMock() + sbx.files = MagicMock() + sbx.files.read = AsyncMock(return_value=b"") + sbx.files.write = AsyncMock(return_value=None) + sbx.files.get_info = AsyncMock(return_value=SimpleNamespace(type="file")) + return sbx + + +@pytest.fixture +def fake_async_sandbox(fake_e2b): + """A fresh fake AsyncSandbox whose ``get_info`` defaults to RUNNING.""" + sbx = _make_fake_async_sandbox() + sbx.get_info = AsyncMock(return_value=SimpleNamespace(state=fake_e2b.SandboxState.RUNNING)) + return sbx diff --git a/tests/code_executors/cube/test_bug_hunt.py b/tests/code_executors/cube/test_bug_hunt.py new file mode 100644 index 0000000..b00d58f --- /dev/null +++ b/tests/code_executors/cube/test_bug_hunt.py @@ -0,0 +1,558 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Adversarial tests that document real bugs in the Cube implementation. + +Each test in this file encodes the *correct* behaviour. When run today +the assertion fails (the production code has the bug), and the +``@pytest.mark.xfail(strict=True)`` marker records it as ``XFAIL`` — the +suite stays green but the bug is visible in ``pytest -v`` output. + +If someone later fixes the bug, the assertion succeeds, ``strict=True`` +turns that into a failing ``XPASS``, and the author is forced to flip +the marker off, which doubles as a regression sentinel. + +This is a deliberate choice: every item here is a concrete, +reproducible defect, documented with an explicit file:line pointer. +""" + +from __future__ import annotations + +import posixpath +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from trpc_agent_sdk.code_executors._types import ( + WorkspaceInfo, + WorkspaceInputSpec, + WorkspaceOutputSpec, + WorkspaceStageOptions, +) +from trpc_agent_sdk.code_executors.cube import _paths, _runtime +from trpc_agent_sdk.code_executors.cube._runtime import ( + CubeWorkspaceFS, + CubeWorkspaceManager, +) +from trpc_agent_sdk.code_executors.utils import detect_content_type +from trpc_agent_sdk.code_executors.cube._sandbox import ( + CubeCommandResult, + CubeSandboxClient, +) +from trpc_agent_sdk.code_executors.cube._types import CubeCodeExecutorConfig + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _ok(stdout="", stderr="", exit_code=0) -> CubeCommandResult: + return CubeCommandResult(stdout=stdout, stderr=stderr, exit_code=exit_code, duration=0.0) + + +def _err(stderr="err", exit_code=1) -> CubeCommandResult: + return CubeCommandResult(stdout="", stderr=stderr, exit_code=exit_code, duration=0.0) + + +def _ws(path="/ws/run_1") -> WorkspaceInfo: + return WorkspaceInfo(id="r1", path=path) + + +@pytest.fixture +def mock_client(): + c = MagicMock(spec=CubeSandboxClient) + c.sandbox_id = "sbx" + c.commands_run = AsyncMock(return_value=_ok()) + c.read_file_bytes = AsyncMock(return_value=b"") + c.write_file_bytes = AsyncMock() + c.upload_path = AsyncMock() + return c + + +# =========================================================================== +# BUG 1 — binary stdin is silently corrupted by UTF-8 replacement [FIXED] +# +# File: trpc_agent_sdk/code_executors/cube/_paths.py +# +# Originally ``wrap_stdin_heredoc`` did +# ``stdin.decode("utf-8", errors="replace")``, lossily turning any +# non-UTF-8 byte into U+FFFD before reaching the sandbox. The fix +# routes binary payloads through a ``base64 -d | cmd`` heredoc so the +# original bytes reach the command's stdin verbatim. UTF-8 payloads +# still take the simple ``cmd << 'MARKER'`` text fast path. +# +# This regression test drives the rendered command through real bash +# and asserts byte-for-byte recovery for every byte 0x00..0xff. +# =========================================================================== + + +def test_bug1_binary_stdin_preserved_byte_for_byte(): + """Every byte 0x00..0xff must reach the command's stdin verbatim.""" + import subprocess + import tempfile + + payload = bytes(range(256)) + with tempfile.TemporaryDirectory() as tmp: + sink = Path(tmp) / "received.bin" + cmd = _paths.wrap_stdin_heredoc(f"cat > {sink}", payload) + result = subprocess.run( + ["bash", "-c", cmd], capture_output=True, check=True + ) + assert result.returncode == 0, result.stderr + assert sink.read_bytes() == payload, ( + "binary bytes lost on the wire to the sandbox shell" + ) + + +# =========================================================================== +# BUG 2 — heredoc marker collision only checks the payload, not the command +# +# File: trpc_agent_sdk/code_executors/cube/_paths.py:69-73 +# while marker in payload: +# +# If the ``command`` argument itself happens to contain the chosen +# marker (e.g. a multi-line shell wrapper embedding the same literal), +# the heredoc can close prematurely. The collision check only inspects +# ``payload``, not ``command``. +# =========================================================================== + + +def test_bug2_marker_collision_against_command(monkeypatch): + # Regression sentinel for the fix at _paths.py:71. The first hex + # collides with a marker embedded in the command; the second is + # safe. The implementation must rotate to the safe marker. + colliding_hex = "cafebabecafebabe" + safe_hex = "1234567890abcdef" + calls = {"n": 0} + + def fake_token_hex(_nbytes): + calls["n"] += 1 + return colliding_hex if calls["n"] == 1 else safe_hex + + monkeypatch.setattr(_paths.secrets, "token_hex", fake_token_hex) + + colliding_marker = f"TRPC_STDIN_EOF_{colliding_hex}" + safe_marker = f"TRPC_STDIN_EOF_{safe_hex}" + adversarial_cmd = f"cat\n{colliding_marker}\necho after" + payload = b"harmless body" + + out = _paths.wrap_stdin_heredoc(adversarial_cmd, payload) + + # Collision detection must have consumed the first candidate and + # selected the safe marker as the actual heredoc delimiter. + assert calls["n"] >= 2 + assert out.endswith(f"\n{safe_marker}") + assert f"<< '{safe_marker}'" in out + # The adversarial line in the command remains as data, but it must + # NOT match the chosen heredoc delimiter. + assert colliding_marker != safe_marker + # Sanity: the closing marker comes after the payload body. + closing_index = out.rindex(f"\n{safe_marker}") + payload_index = out.index(payload.decode()) + assert closing_index > payload_index + + +# =========================================================================== +# BUG 3 — stage_directory(read_only=True) silently ignores chmod failures +# +# File: trpc_agent_sdk/code_executors/cube/_runtime.py:170-171 +# if opt.read_only: +# await self._client.commands_run( +# f"chmod -R a-w {shell_quote(target)}", timeout=self._timeout) +# +# The result's exit_code is ignored. If chmod fails (permissions, +# missing tool, read-only filesystem), the read_only guarantee is +# quietly violated — the caller believes the directory is locked +# when it isn't. +# =========================================================================== + + +@pytest.mark.asyncio +async def test_bug3_chmod_failure_must_raise(mock_client, tmp_path): + src = tmp_path / "d" + src.mkdir() + fs = CubeWorkspaceFS(mock_client, 30.0) + + # upload_path succeeds; chmod fails. + def commands_router(cmd, **kwargs): + if cmd.startswith("chmod"): + return _err("chmod: Operation not permitted") + return _ok() + + mock_client.commands_run.side_effect = commands_router + + with pytest.raises(RuntimeError): + await fs.stage_directory( + _ws(), str(src), "d", WorkspaceStageOptions(read_only=True) + ) + + +# =========================================================================== +# BUG 4 — cleanup() pops cache before rm; on rm failure the workspace is +# orphaned AND unrecoverable via a retry +# +# File: trpc_agent_sdk/code_executors/cube/_runtime.py:123-129 +# info = self._ws_paths.pop(exec_id, None) # <-- pop first +# if not info or not info.path: +# return +# cmd = f"rm -rf {shell_quote(info.path)}" +# result = await self._client.commands_run(cmd, ...) +# if result.exit_code != 0: +# raise RuntimeError(...) # <-- but cache already gone +# +# If rm fails, cache entry is lost; calling cleanup() again is a no-op +# ("unknown id" branch), so there is no way to retry cleanup through the +# manager interface. The remote dir becomes a permanent orphan. +# =========================================================================== + + +@pytest.mark.asyncio +async def test_bug4_cleanup_retryable_on_rm_failure(mock_client): + mgr = CubeWorkspaceManager(mock_client, "/ws", 30.0) + info = await mgr.create_workspace("id") + + # First cleanup: rm fails. + mock_client.commands_run.side_effect = [_err("rm fail")] + with pytest.raises(RuntimeError): + await mgr.cleanup("id") + + # A second cleanup call should still try to rm the orphan, because + # the first attempt failed. The bug: the id was popped from the + # cache already, so this call is a silent no-op. + mock_client.commands_run.reset_mock() + mock_client.commands_run.side_effect = None + mock_client.commands_run.return_value = _ok() + await mgr.cleanup("id") + # Assertion: the second cleanup must have issued a rm -rf again. + assert any( + call.args[0].startswith("rm -rf") + for call in mock_client.commands_run.await_args_list + ), "second cleanup silently did nothing — remote workspace orphaned" + + +# =========================================================================== +# BUG 5 — glob patterns containing spaces got word-split [FIXED] +# +# File: trpc_agent_sdk/code_executors/cube/_runtime.py +# +# Originally the rendered shell was +# for p in 'my dir/*.txt'; do for f in $p; do ... +# The outer `for p in ...` preserved the quoting, but the inner +# `for f in $p` was unquoted, so bash performed word-splitting on $p +# and turned "my dir/*.txt" into two patterns "my" and "dir/*.txt". +# Quoting `"$p"` would have suppressed splitting but also disabled +# globbing. +# +# The fix passes patterns via a bash array (preserves spaces per +# element) and temporarily clears IFS so the unquoted `$p` inside +# `matches=( $p )` is *not* word-split, while bash still performs path +# expansion on it. globstar is preserved (compgen -G does not honour +# **, so it's intentionally not used). +# +# This regression test drives the actual rendered command — taken from +# `_glob` via a real fake client — through bash and asserts the +# expected matches. +# =========================================================================== + + +@pytest.mark.asyncio +async def test_bug5_glob_pattern_with_space(): + """Glob patterns that contain spaces must match as a single literal.""" + import subprocess + import tempfile + import os + + with tempfile.TemporaryDirectory() as tmp: + target_dir = os.path.join(tmp, "my dir") + os.makedirs(target_dir) + Path(target_dir, "file.txt").write_text("content") + os.makedirs(os.path.join(tmp, "a", "b")) + Path(tmp, "a", "b", "deep.txt").write_text("deep") + + captured: list[str] = [] + + async def fake_run(cmd, timeout=None): + captured.append(cmd) + r = subprocess.run( + ["bash", "-c", cmd], capture_output=True, text=True + ) + return CubeCommandResult( + stdout=r.stdout, stderr=r.stderr, exit_code=r.returncode, duration=0.0 + ) + + client = MagicMock(spec=CubeSandboxClient) + client.commands_run = AsyncMock(side_effect=fake_run) + fs = CubeWorkspaceFS(client, 30.0) + + # Pattern with a space must match exactly the one file under "my dir". + out = await fs._glob(tmp, ["my dir/*.txt"]) + assert any(p.endswith("my dir/file.txt") for p in out), ( + f"word-splitting corrupted the glob: {out!r}" + ) + assert len(out) == 1, f"unexpected matches: {out!r}" + + # globstar (**) must still work after the fix. + out2 = await fs._glob(tmp, ["**/*.txt"]) + joined = "\n".join(out2) + assert "deep.txt" in joined and "file.txt" in joined, ( + f"globstar regressed: {out2!r}" + ) + + +# =========================================================================== +# BUG 6 — collect_outputs() does not dedup by relative path +# +# File: trpc_agent_sdk/code_executors/cube/_runtime.py:251-278 +# +# ``collect()`` has an explicit ``seen: set[str]`` dedup step. The +# sibling ``collect_outputs()`` walks the same glob result but has no +# dedup. When two patterns overlap (e.g. ``['*.txt', 'out/*.txt']``), +# the same file is emitted twice, double-counted against ``max_files``, +# and — if ``save=True`` — saved twice as an artifact. +# =========================================================================== + + +@pytest.mark.asyncio +async def test_bug6_collect_outputs_dedups_by_rel(mock_client): + ws = _ws() + # Glob returns the same file twice (overlapping patterns). + mock_client.commands_run.return_value = _ok( + stdout=f"{ws.path}/a.txt\n{ws.path}/a.txt\n" + ) + mock_client.read_file_bytes.return_value = b"x" + fs = CubeWorkspaceFS(mock_client, 30.0) + + manifest = await fs.collect_outputs(ws, WorkspaceOutputSpec(globs=["*.txt", "./a.txt"])) + # Expected: one file, not two. + names = [f.name for f in manifest.files] + assert names == ["a.txt"], f"duplicate emitted: {names}" + + +# =========================================================================== +# BUG 7 — _detect_mime over-eagerly labels anything starting with { or [ as JSON +# +# File: trpc_agent_sdk/code_executors/cube/_runtime.py:83-84 +# if sample.startswith(b"{") or sample.startswith(b"["): +# return "application/json" +# +# Python pickle protocol-0, MessagePack, BSON, Lua tables, gnuplot +# output, shell brace expansion logs — all start with ``{`` or ``[`` +# without being JSON. A zero-length JSON check (no matching closer, no +# structural parse) is not a reliable sniff. +# =========================================================================== + + +def test_bug7_detect_mime_not_json_for_python_repr(): + """Python repr of a dict starts with ``{`` but must not be labelled JSON. + + Regression test for the historical ``_detect_mime`` bug where any + payload starting with ``{`` or ``[`` was blindly classified as + ``application/json``. The shared :func:`detect_content_type` + helper now validates JSON via ``json.loads``, so a Python repr + (single-quoted keys, ``None``/``True`` literals) falls through to + a text/binary classification instead. + """ + sample = b"{'key': 'value', 'n': 3}" + mime = detect_content_type(Path("noextension_please"), sample) + assert mime != "application/json", ( + f"false-positive JSON classification: Python repr labelled as {mime}" + ) + + +# =========================================================================== +# BUG 8 — create_workspace trusts cache without re-verifying the remote dir +# [FIXED] +# +# File: trpc_agent_sdk/code_executors/cube/_runtime.py +# +# Originally the manager early-returned on a pure in-memory dict hit: +# +# if exec_id in self._ws_paths: +# return self._ws_paths[exec_id] +# +# If the remote directory was deleted externally (operator cleanup, +# sandbox snapshot rollback, sibling cleanup() on a shared sandbox, +# host process restart re-attaching to a live sandbox) the cache still +# returned the stale path; subsequent put_files / run_program / +# collect_outputs / stage_inputs targeted a non-existent path and +# failed deep inside with cryptic "No such file" errors instead of a +# clean "workspace vanished; recreate" signal. +# +# Fix: the path remains stable per exec_id (callers can rely on it), +# but every create_workspace call now unconditionally re-issues an +# idempotent ``mkdir -p`` for the four standard subdirs. ``mkdir -p`` +# is a no-op when the tree already exists, so steady-state cost is one +# round-trip; on miss the workspace heals transparently. +# =========================================================================== + + +@pytest.mark.asyncio +async def test_bug8_create_workspace_reconciles_with_remote(mock_client): + """Repeat create_workspace calls keep a stable path and re-issue an + idempotent mkdir -p so the workspace heals if it was deleted externally. + """ + mgr = CubeWorkspaceManager(mock_client, "/ws", 30.0) + info1 = await mgr.create_workspace("id") + + # Simulate an external force having deleted the remote dir between + # calls. A correct implementation must re-issue mkdir -p (so the + # tree is healed) and must keep the same path for the same exec_id + # (callers cache it and rely on path stability). + mock_client.commands_run.reset_mock() + info2 = await mgr.create_workspace("id") + + assert info2.path == info1.path, ( + "path must stay stable across calls for the same exec_id" + ) + assert mock_client.commands_run.await_count == 1, ( + "cache returned stale WorkspaceInfo without reconciling remote" + ) + cmd = mock_client.commands_run.await_args.args[0] + assert "mkdir -p" in cmd + assert f"'{info1.path}'" in cmd + # All four standard subdirs must be in the reconciling mkdir. + for sub in ("work", "out", "skills", "runs"): + assert posixpath.join(info1.path, sub) in cmd or f"'{info1.path}/{sub}'" in cmd + + +@pytest.mark.asyncio +async def test_bug8_create_workspace_surfaces_mkdir_failure_on_reconcile(mock_client): + """If the reconciling mkdir -p fails (e.g. parent vanished, perms), + the second create_workspace must raise a clear error instead of + silently handing back a stale, broken WorkspaceInfo. + """ + mgr = CubeWorkspaceManager(mock_client, "/ws", 30.0) + await mgr.create_workspace("id") + + mock_client.commands_run.reset_mock() + mock_client.commands_run.return_value = _err("mkdir: cannot create directory") + with pytest.raises(RuntimeError, match="Failed to create cube workspace"): + await mgr.create_workspace("id") + + +# =========================================================================== +# BUG 9 — collect() decodes binary files to str with errors="replace" +# +# File: trpc_agent_sdk/code_executors/cube/_runtime.py:230 +# content=content.decode("utf-8", errors="replace"), +# +# ``CodeFile.content: str`` forces a string, so binary files (PDFs, +# images, gzip archives) are converted to a UTF-8 replacement-laden +# mess. Downstream consumers cannot recover the original bytes. The +# sibling ``collect_outputs`` avoids this with ``inline=True`` +# gated — but ``collect()`` has no such guard. +# =========================================================================== + + +@pytest.mark.xfail(strict=True, reason="BUG 9: collect() corrupts binary files to str (_runtime.py:230)") +@pytest.mark.asyncio +async def test_bug9_collect_preserves_binary_bytes(mock_client): + ws = _ws() + mock_client.commands_run.return_value = _ok(stdout=f"{ws.path}/image.png\n") + binary = b"\x89PNG\r\n\x1a\nnot-valid-utf8\x80\x81\x82" + mock_client.read_file_bytes.return_value = binary + fs = CubeWorkspaceFS(mock_client, 30.0) + files = await fs.collect(ws, ["*.png"]) + assert len(files) == 1 + # The raw bytes should be recoverable. They are not: utf-8 + # replace turns \x80 into U+FFFD, and re-encoding does not roundtrip. + assert files[0].content.encode("utf-8") == binary, ( + "binary file silently corrupted by utf-8 replace" + ) + + +# =========================================================================== +# BUG 10 — open_new truncates fractional idle_timeout via int() [FIXED] +# +# Original failure mode: ``CubeCodeExecutorConfig(idle_timeout=0.9)`` was +# accepted (field was typed ``float``) and then silently truncated by +# ``timeout=int(cfg.idle_timeout)`` in ``_sandbox.py``. ``int(0.9) == 0``, +# which most sandbox APIs interpret as "no timeout" or "expire immediately". +# +# Fix: ``idle_timeout`` is now typed ``int`` (matching the e2b API +# contract) and ``CubeCodeExecutorConfig.__post_init__`` rejects values +# that are non-int or < 1. The ``int(...)`` cast in ``open_new`` / +# ``set_timeout`` is gone — values flow through unchanged. +# =========================================================================== + + +@pytest.mark.asyncio +async def test_bug10_fractional_idle_timeout_rejected_at_construction( + fake_e2b, fake_async_sandbox, +): + fake_e2b.AsyncSandbox.create = AsyncMock(return_value=fake_async_sandbox) + with pytest.raises(TypeError, match="idle_timeout must be an int"): + CubeCodeExecutorConfig( + template="t", api_url="u", api_key="k", + idle_timeout=0.9, # type: ignore[arg-type] + ) + + +@pytest.mark.asyncio +async def test_bug10_zero_idle_timeout_rejected_at_construction(): + with pytest.raises(ValueError, match="idle_timeout must be >= 1"): + CubeCodeExecutorConfig( + template="t", api_url="u", api_key="k", + idle_timeout=0, + ) + + +@pytest.mark.asyncio +async def test_bug10_int_idle_timeout_passed_through_unchanged( + fake_e2b, fake_async_sandbox, +): + fake_e2b.AsyncSandbox.create = AsyncMock(return_value=fake_async_sandbox) + cfg = CubeCodeExecutorConfig( + template="t", api_url="u", api_key="k", + idle_timeout=42, + ) + await CubeSandboxClient.open_new(cfg) + kwargs = fake_e2b.AsyncSandbox.create.await_args.kwargs + assert kwargs["timeout"] == 42 + assert isinstance(kwargs["timeout"], int) + + +# =========================================================================== +# BUG 11 — cp -a with pre-existing destination directory nests the source [FIXED] +# +# File: trpc_agent_sdk/code_executors/cube/_runtime.py (``_copy_remote``) +# +# Original failure mode: ``cp -a SRC DST`` has the long-standing POSIX +# directory-footgun — if DST already exists as a directory (e.g. from a +# prior stage_inputs call on the same dst), cp copies SRC *into* DST as +# DST/basename(SRC), nesting sources instead of replacing them. +# +# Fix: ``_copy_remote`` now does ``mkdir -p parent(DST); rm -rf DST; +# cp -a SRC DST`` — the defensive rm removes any stale dst before the +# copy, so the second call is idempotent. The rm step surfaces its own +# failures with ``remote rm failed:`` so silent mis-stages are impossible. +# +# This regression test pins the emitted command sequence so a refactor +# that drops the rm step (or re-orders the pipeline) fails loudly. +# =========================================================================== + + +@pytest.mark.asyncio +async def test_bug11_copy_remote_issues_rm_before_cp(mock_client): + """Pin the ``mkdir → rm -rf → cp -a`` sequence in ``_copy_remote``.""" + fs = CubeWorkspaceFS(mock_client, 30.0) + await fs._copy_remote("/src", "/dst") + + cmds = [call.args[0] for call in mock_client.commands_run.await_args_list] + assert len(cmds) == 3, f"expected 3 shell steps, got {len(cmds)}: {cmds!r}" + assert cmds[0].startswith("mkdir -p"), f"step 0 must be mkdir, got: {cmds[0]!r}" + assert cmds[1].startswith("rm -rf"), ( + f"step 1 must be the defensive rm (cp -a directory-footgun guard), " + f"got: {cmds[1]!r}" + ) + assert "'/dst'" in cmds[1], "rm must target DST" + assert cmds[2].startswith("cp -a"), f"step 2 must be cp -a, got: {cmds[2]!r}" + # And the rm must come BEFORE the cp. + rm_idx = next(i for i, c in enumerate(cmds) if c.startswith("rm -rf")) + cp_idx = next(i for i, c in enumerate(cmds) if c.startswith("cp -a")) + assert rm_idx < cp_idx, "rm must precede cp" + diff --git a/tests/code_executors/cube/test_code_executor.py b/tests/code_executors/cube/test_code_executor.py new file mode 100644 index 0000000..2b014ed --- /dev/null +++ b/tests/code_executors/cube/test_code_executor.py @@ -0,0 +1,540 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Unit tests for trpc_agent_sdk.code_executors.cube._code_executor.""" + +from __future__ import annotations + +from dataclasses import replace +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from trpc_agent_sdk.code_executors._types import ( + CodeBlock, + CodeBlockDelimiter, + CodeExecutionInput, +) +from trpc_agent_sdk.code_executors.cube import _code_executor as ce_mod +from trpc_agent_sdk.code_executors.cube._code_executor import CubeCodeExecutor +from trpc_agent_sdk.code_executors.cube._sandbox import ( + CubeCommandResult, + CubeSandboxClient, +) +from trpc_agent_sdk.code_executors.cube._types import CubeCodeExecutorConfig +from trpc_agent_sdk.context import InvocationContext + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _cfg(**overrides) -> CubeCodeExecutorConfig: + base = dict( + template="t", + api_url="u", + api_key="k", + execute_timeout=30.0, + idle_timeout=600, + ) + base.update(overrides) + return CubeCodeExecutorConfig(**base) + + +def _ok(stdout: str = "", stderr: str = "", exit_code: int = 0) -> CubeCommandResult: + return CubeCommandResult(stdout=stdout, stderr=stderr, exit_code=exit_code, duration=0.0) + + +@pytest.fixture +def mock_client(): + c = MagicMock(spec=CubeSandboxClient) + c.sandbox_id = "sbx-1" + c.commands_run = AsyncMock(return_value=_ok(stdout="ok")) + c.destroy = AsyncMock() + c.close = MagicMock() + c.assert_running = AsyncMock() + return c + + +@pytest.fixture +def mock_ctx(): + return MagicMock(spec=InvocationContext) + + +# --------------------------------------------------------------------------- +# Construction +# --------------------------------------------------------------------------- + + +class TestConstruction: + + def test_defaults(self, mock_client): + cfg = _cfg() + ex = CubeCodeExecutor(mock_client, cfg) + assert ex.stateful is False + assert ex.optimize_data_file is False + assert ex.config is cfg + assert ex.sandbox_client is mock_client + + def test_delimiters_include_bash(self, mock_client): + ex = CubeCodeExecutor(mock_client, _cfg()) + # Default delimiters: tool_code, python, bash fences. + delims = [(d.start, d.end) for d in ex.code_block_delimiters] + assert ("```tool_code\n", "\n```") in delims + assert ("```python\n", "\n```") in delims + assert ("```bash\n", "\n```") in delims + + def test_stateful_rejected(self, mock_client): + with pytest.raises(ValueError, match="cannot be stateful"): + CubeCodeExecutor(mock_client, _cfg(), stateful=True) + + def test_optimize_data_file_rejected(self, mock_client): + with pytest.raises(ValueError, match="optimize_data_file"): + CubeCodeExecutor(mock_client, _cfg(), optimize_data_file=True) + + def test_custom_delimiters_preserved(self, mock_client): + custom = [CodeBlockDelimiter(start="```py\n", end="\n```")] + ex = CubeCodeExecutor(mock_client, _cfg(), code_block_delimiters=custom) + # Note: Pydantic may copy/validate; just check content equivalence. + assert len(ex.code_block_delimiters) == 1 + assert ex.code_block_delimiters[0].start == "```py\n" + + def test_sandbox_id_reads_client(self, mock_client): + mock_client.sandbox_id = "custom-id" + ex = CubeCodeExecutor(mock_client, _cfg()) + assert ex.sandbox_id == "custom-id" + + +# --------------------------------------------------------------------------- +# create / attach / create_or_recreate +# --------------------------------------------------------------------------- + + +class TestCreate: + + @pytest.mark.asyncio + async def test_no_sandbox_id_opens_new(self, fake_e2b, monkeypatch, mock_client): + open_new = AsyncMock(return_value=mock_client) + open_existing = AsyncMock() + monkeypatch.setattr( + CubeSandboxClient, "open_new", classmethod(lambda cls, cfg: open_new(cfg)), + ) + monkeypatch.setattr( + CubeSandboxClient, + "open_existing", + classmethod(lambda cls, sid, cfg: open_existing(sid, cfg)), + ) + cfg = _cfg(sandbox_id=None) + ex = await CubeCodeExecutor.create(cfg) + assert ex.sandbox_client is mock_client + open_new.assert_awaited_once_with(cfg) + open_existing.assert_not_awaited() + + @pytest.mark.asyncio + async def test_with_sandbox_id_opens_existing(self, fake_e2b, monkeypatch, mock_client): + open_new = AsyncMock() + open_existing = AsyncMock(return_value=mock_client) + monkeypatch.setattr( + CubeSandboxClient, "open_new", classmethod(lambda cls, cfg: open_new(cfg)) + ) + monkeypatch.setattr( + CubeSandboxClient, + "open_existing", + classmethod(lambda cls, sid, cfg: open_existing(sid, cfg)), + ) + cfg = _cfg(sandbox_id="sbx-42") + await CubeCodeExecutor.create(cfg) + open_new.assert_not_awaited() + open_existing.assert_awaited_once_with("sbx-42", cfg) + + +class TestAttach: + + @pytest.mark.asyncio + async def test_missing_sandbox_id_raises(self, fake_e2b): + with pytest.raises(ValueError, match="sandbox_id"): + await CubeCodeExecutor.attach(_cfg(sandbox_id=None)) + + @pytest.mark.asyncio + async def test_with_sandbox_id_calls_open_existing(self, fake_e2b, monkeypatch, mock_client): + called = AsyncMock(return_value=mock_client) + monkeypatch.setattr( + CubeSandboxClient, + "open_existing", + classmethod(lambda cls, sid, cfg: called(sid, cfg)), + ) + cfg = _cfg(sandbox_id="sbx-1") + ex = await CubeCodeExecutor.attach(cfg) + called.assert_awaited_once_with("sbx-1", cfg) + assert ex.sandbox_client is mock_client + + @pytest.mark.asyncio + async def test_never_calls_open_new(self, fake_e2b, monkeypatch): + on_new = AsyncMock() + on_existing = AsyncMock(side_effect=RuntimeError("expected — test stopper")) + monkeypatch.setattr( + CubeSandboxClient, + "open_new", + classmethod(lambda cls, cfg: on_new(cfg)), + ) + monkeypatch.setattr( + CubeSandboxClient, + "open_existing", + classmethod(lambda cls, sid, cfg: on_existing(sid, cfg)), + ) + with pytest.raises(RuntimeError, match="test stopper"): + await CubeCodeExecutor.attach(_cfg(sandbox_id="sbx-1")) + on_new.assert_not_awaited() + + +class TestCreateOrRecreate: + + @pytest.mark.asyncio + async def test_no_sandbox_id_delegates_to_create(self, fake_e2b, monkeypatch, mock_client): + create_mock = AsyncMock(return_value=MagicMock()) + monkeypatch.setattr( + CubeCodeExecutor, + "create", + classmethod(lambda cls, cfg: create_mock(cfg)), + ) + on_stale = AsyncMock() + await CubeCodeExecutor.create_or_recreate(_cfg(sandbox_id=None), on_stale=on_stale) + create_mock.assert_awaited_once() + on_stale.assert_not_awaited() + + @pytest.mark.asyncio + async def test_attach_success_no_stale_callback(self, fake_e2b, monkeypatch, mock_client): + ex_obj = MagicMock() + create_mock = AsyncMock(return_value=ex_obj) + monkeypatch.setattr( + CubeCodeExecutor, + "create", + classmethod(lambda cls, cfg: create_mock(cfg)), + ) + on_stale = AsyncMock() + result = await CubeCodeExecutor.create_or_recreate( + _cfg(sandbox_id="sbx-1"), on_stale=on_stale + ) + assert result is ex_obj + on_stale.assert_not_awaited() + + @pytest.mark.asyncio + async def test_not_found_triggers_on_stale_then_recreate( + self, fake_e2b, monkeypatch + ): + calls: list = [] + + async def create_side_effect(cfg): + calls.append(cfg) + if cfg.sandbox_id == "sbx-1": + raise fake_e2b.SandboxNotFoundException("gone") + return "fresh-executor" + + create_mock = AsyncMock(side_effect=create_side_effect) + monkeypatch.setattr( + CubeCodeExecutor, + "create", + classmethod(lambda cls, cfg: create_mock(cfg)), + ) + on_stale = AsyncMock() + result = await CubeCodeExecutor.create_or_recreate( + _cfg(sandbox_id="sbx-1"), on_stale=on_stale + ) + assert result == "fresh-executor" + on_stale.assert_awaited_once() + # Second call must have sandbox_id=None (recreate). + assert calls[1].sandbox_id is None + # Other cfg fields preserved. + assert calls[1].template == "t" + + @pytest.mark.asyncio + async def test_not_found_without_on_stale_still_recreates( + self, fake_e2b, monkeypatch + ): + async def create_side_effect(cfg): + if cfg.sandbox_id == "sbx-1": + raise fake_e2b.SandboxNotFoundException("gone") + return "fresh" + + create_mock = AsyncMock(side_effect=create_side_effect) + monkeypatch.setattr( + CubeCodeExecutor, + "create", + classmethod(lambda cls, cfg: create_mock(cfg)), + ) + result = await CubeCodeExecutor.create_or_recreate( + _cfg(sandbox_id="sbx-1"), on_stale=None + ) + assert result == "fresh" + + @pytest.mark.asyncio + async def test_paused_propagates(self, fake_e2b, monkeypatch): + """BUG PROBE: PAUSED state must NOT trigger recreate. + + If ``create_or_recreate`` caught SandboxException (the parent of + SandboxNotFoundException), operator-managed pauses would be + silently destroyed. Only the NotFound subclass should recreate. + """ + create_mock = AsyncMock( + side_effect=fake_e2b.SandboxException("paused") + ) + monkeypatch.setattr( + CubeCodeExecutor, + "create", + classmethod(lambda cls, cfg: create_mock(cfg)), + ) + on_stale = AsyncMock() + with pytest.raises(fake_e2b.SandboxException, match="paused"): + await CubeCodeExecutor.create_or_recreate( + _cfg(sandbox_id="sbx-1"), on_stale=on_stale + ) + on_stale.assert_not_awaited() + + +# --------------------------------------------------------------------------- +# Properties & lifecycle +# --------------------------------------------------------------------------- + + +class TestLifecycle: + + def test_close_calls_client_close(self, mock_client): + ex = CubeCodeExecutor(mock_client, _cfg()) + ex.close() + mock_client.close.assert_called_once() + + def test_close_idempotent(self, mock_client): + ex = CubeCodeExecutor(mock_client, _cfg()) + ex.close() + ex.close() + # Second close must not re-call the client. + mock_client.close.assert_called_once() + + def test_sandbox_client_after_close_raises(self, mock_client): + ex = CubeCodeExecutor(mock_client, _cfg()) + ex.close() + with pytest.raises(RuntimeError, match="closed"): + _ = ex.sandbox_client + + def test_sandbox_id_after_close_raises(self, mock_client): + ex = CubeCodeExecutor(mock_client, _cfg()) + ex.close() + with pytest.raises(RuntimeError, match="closed"): + _ = ex.sandbox_id + + @pytest.mark.asyncio + async def test_destroy_calls_client_destroy(self, mock_client): + ex = CubeCodeExecutor(mock_client, _cfg()) + await ex.destroy() + mock_client.destroy.assert_awaited_once() + + @pytest.mark.asyncio + async def test_destroy_idempotent(self, mock_client): + ex = CubeCodeExecutor(mock_client, _cfg()) + await ex.destroy() + await ex.destroy() + mock_client.destroy.assert_awaited_once() + + @pytest.mark.asyncio + async def test_destroy_clears_handle_even_on_error(self, mock_client): + mock_client.destroy.side_effect = RuntimeError("boom") + ex = CubeCodeExecutor(mock_client, _cfg()) + with pytest.raises(RuntimeError, match="boom"): + await ex.destroy() + # Handle cleared via finally block. + with pytest.raises(RuntimeError, match="closed"): + _ = ex.sandbox_client + + @pytest.mark.asyncio + async def test_assert_running_delegates(self, mock_client): + ex = CubeCodeExecutor(mock_client, _cfg()) + await ex.assert_running() + mock_client.assert_running.assert_awaited_once() + + +# --------------------------------------------------------------------------- +# _select_interpreter +# --------------------------------------------------------------------------- + + +class TestSelectInterpreter: + + @pytest.mark.parametrize("lang", ["python", "py", "python3", "", "PYTHON", "Py"]) + def test_python_languages(self, lang): + assert CubeCodeExecutor._select_interpreter(lang) == "python3" + + @pytest.mark.parametrize("lang", ["bash", "sh", "BASH", "Sh"]) + def test_bash_languages(self, lang): + """BUG PROBE: bash MUST run as a login shell. + + The production code chose ``bash -l`` deliberately so that + ``/etc/profile.d/*`` populates PATH for tools like uv/conda. + Regressing to plain ``bash`` would silently break Cube + templates that rely on profile-based PATH injection. + """ + assert CubeCodeExecutor._select_interpreter(lang) == "bash -l" + + @pytest.mark.parametrize("lang", ["ruby", "javascript", "go", "rust"]) + def test_unsupported_raises(self, lang): + with pytest.raises(ValueError, match="unsupported"): + CubeCodeExecutor._select_interpreter(lang) + + def test_none_language_is_python(self): + """``_select_interpreter`` tolerates None and treats it as python.""" + assert CubeCodeExecutor._select_interpreter(None) == "python3" + + +# --------------------------------------------------------------------------- +# _collect +# --------------------------------------------------------------------------- + + +class TestCollect: + + def test_success_appends_only_stdout(self): + result = _ok(stdout="out", exit_code=0) + outs, errs = [], [] + CubeCodeExecutor._collect(result, outs, errs) + assert outs == ["out"] + assert errs == [] + + def test_non_zero_appends_exit_marker(self): + result = _ok(stdout="out", stderr="err", exit_code=9) + outs, errs = [], [] + CubeCodeExecutor._collect(result, outs, errs) + assert outs == ["out"] + assert errs == ["Process exited with code: 9\n", "err"] + + def test_empty_fields_add_nothing(self): + result = _ok(stdout="", stderr="", exit_code=0) + outs, errs = [], [] + CubeCodeExecutor._collect(result, outs, errs) + assert outs == [] + assert errs == [] + + +# --------------------------------------------------------------------------- +# execute_code +# --------------------------------------------------------------------------- + + +class TestExecuteCode: + + @pytest.mark.asyncio + async def test_single_python_block(self, mock_client, mock_ctx): + mock_client.commands_run.return_value = _ok(stdout="hi\n") + ex = CubeCodeExecutor(mock_client, _cfg()) + inp = CodeExecutionInput(code_blocks=[CodeBlock(code="print('hi')", language="python")]) + result = await ex.execute_code(mock_ctx, inp) + + mock_client.commands_run.assert_awaited_once() + args, kwargs = mock_client.commands_run.await_args + assert args == ("python3",) + assert kwargs["stdin"] == b"print('hi')" + assert kwargs["timeout"] == 30.0 + # Aggregated into result output text. + assert "hi" in result.output + + @pytest.mark.asyncio + async def test_bash_block_uses_login_shell(self, mock_client, mock_ctx): + mock_client.commands_run.return_value = _ok(stdout="") + ex = CubeCodeExecutor(mock_client, _cfg()) + inp = CodeExecutionInput(code_blocks=[CodeBlock(code="echo hi", language="bash")]) + await ex.execute_code(mock_ctx, inp) + assert mock_client.commands_run.await_args.args[0] == "bash -l" + + @pytest.mark.asyncio + async def test_mixed_blocks_run_in_order(self, mock_client, mock_ctx): + mock_client.commands_run.side_effect = [ + _ok(stdout="PY\n"), + _ok(stdout="BASH\n"), + ] + ex = CubeCodeExecutor(mock_client, _cfg()) + inp = CodeExecutionInput(code_blocks=[ + CodeBlock(code="print('py')", language="python"), + CodeBlock(code="echo bash", language="bash"), + ]) + await ex.execute_code(mock_ctx, inp) + + interps = [call.args[0] for call in mock_client.commands_run.await_args_list] + assert interps == ["python3", "bash -l"] + + @pytest.mark.asyncio + async def test_empty_block_is_skipped(self, mock_client, mock_ctx): + ex = CubeCodeExecutor(mock_client, _cfg()) + inp = CodeExecutionInput(code_blocks=[ + CodeBlock(code="", language="python"), + CodeBlock(code="print('hi')", language="python"), + ]) + await ex.execute_code(mock_ctx, inp) + # Only the non-empty block runs. + assert mock_client.commands_run.await_count == 1 + + @pytest.mark.asyncio + async def test_fallback_to_input_code_field(self, mock_client, mock_ctx): + """When ``code_blocks`` is empty but ``code`` is set, use code.""" + mock_client.commands_run.return_value = _ok(stdout="") + ex = CubeCodeExecutor(mock_client, _cfg()) + inp = CodeExecutionInput(code_blocks=[], code="print('fallback')") + await ex.execute_code(mock_ctx, inp) + # Single call with synthetic python block. + call = mock_client.commands_run.await_args + assert call.args[0] == "python3" + assert call.kwargs["stdin"] == b"print('fallback')" + + @pytest.mark.asyncio + async def test_all_empty_returns_empty_result(self, mock_client, mock_ctx): + ex = CubeCodeExecutor(mock_client, _cfg()) + inp = CodeExecutionInput(code_blocks=[], code="") + await ex.execute_code(mock_ctx, inp) + mock_client.commands_run.assert_not_awaited() + + @pytest.mark.asyncio + async def test_unsupported_language_records_error_continues(self, mock_client, mock_ctx): + """An unsupported block emits an error note; later blocks still run. + + BUG PROBE: the loop must not short-circuit on a bad-language block. + """ + mock_client.commands_run.return_value = _ok(stdout="later\n") + ex = CubeCodeExecutor(mock_client, _cfg()) + inp = CodeExecutionInput(code_blocks=[ + CodeBlock(code="code here", language="rust"), + CodeBlock(code="print('after')", language="python"), + ]) + result = await ex.execute_code(mock_ctx, inp) + # First block skipped (no commands_run), second still ran. + assert mock_client.commands_run.await_count == 1 + assert "unsupported" in result.output.lower() or "error" in result.output.lower() + + @pytest.mark.asyncio + async def test_nonzero_exit_does_not_abort(self, mock_client, mock_ctx): + mock_client.commands_run.side_effect = [ + _ok(stdout="a", exit_code=1, stderr="oops"), + _ok(stdout="b"), + ] + ex = CubeCodeExecutor(mock_client, _cfg()) + inp = CodeExecutionInput(code_blocks=[ + CodeBlock(code="1", language="python"), + CodeBlock(code="2", language="python"), + ]) + await ex.execute_code(mock_ctx, inp) + # Both blocks ran. + assert mock_client.commands_run.await_count == 2 + + @pytest.mark.asyncio + async def test_closed_executor_raises(self, mock_client, mock_ctx): + ex = CubeCodeExecutor(mock_client, _cfg()) + ex.close() + with pytest.raises(RuntimeError, match="closed"): + await ex.execute_code(mock_ctx, CodeExecutionInput(code="x")) + + @pytest.mark.asyncio + async def test_custom_execute_timeout_forwarded(self, mock_client, mock_ctx): + mock_client.commands_run.return_value = _ok() + ex = CubeCodeExecutor(mock_client, _cfg(execute_timeout=7)) + inp = CodeExecutionInput(code_blocks=[CodeBlock(code="x", language="python")]) + await ex.execute_code(mock_ctx, inp) + assert mock_client.commands_run.await_args.kwargs["timeout"] == 7 diff --git a/tests/code_executors/cube/test_e2b.py b/tests/code_executors/cube/test_e2b.py new file mode 100644 index 0000000..bf4afd6 --- /dev/null +++ b/tests/code_executors/cube/test_e2b.py @@ -0,0 +1,54 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Unit tests for trpc_agent_sdk.code_executors.cube._e2b.""" + +from __future__ import annotations + +import builtins +import sys +from types import SimpleNamespace + +import pytest + + +def test_guest_user_is_root(): + from trpc_agent_sdk.code_executors.cube import _e2b + # Downstream hermes adapters rely on `root`; changing it silently + # would break file-upload permissions across the fleet. + assert _e2b._GUEST_USER == "root" + + +def test_install_hint_mentions_cube_extra(): + from trpc_agent_sdk.code_executors.cube import _e2b + assert "trpc-agent-py[cube]" in _e2b._E2B_INSTALL_HINT + + +def test_import_e2b_returns_module_when_present(monkeypatch): + """When ``e2b_code_interpreter`` is importable, return it verbatim.""" + fake_mod = SimpleNamespace(AsyncSandbox=object()) + monkeypatch.setitem(sys.modules, "e2b_code_interpreter", fake_mod) + from trpc_agent_sdk.code_executors.cube._e2b import _import_e2b + assert _import_e2b() is fake_mod + + +def test_import_e2b_raises_import_error_when_missing(monkeypatch): + """When the extra is not installed, raise ImportError with install hint.""" + # Scrub any cached import first. + monkeypatch.delitem(sys.modules, "e2b_code_interpreter", raising=False) + + # Force the import to fail at the builtin layer. + original_import = builtins.__import__ + + def failing_import(name, *args, **kwargs): + if name == "e2b_code_interpreter": + raise ImportError("not installed in this venv") + return original_import(name, *args, **kwargs) + + monkeypatch.setattr(builtins, "__import__", failing_import) + + from trpc_agent_sdk.code_executors.cube._e2b import _import_e2b + with pytest.raises(ImportError, match=r"trpc-agent-py\[cube\]"): + _import_e2b() diff --git a/tests/code_executors/cube/test_package_lazy_import.py b/tests/code_executors/cube/test_package_lazy_import.py new file mode 100644 index 0000000..037a234 --- /dev/null +++ b/tests/code_executors/cube/test_package_lazy_import.py @@ -0,0 +1,160 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Unit tests for the PEP 562 lazy-import wiring in the cube package. + +The commit promises that +``import trpc_agent_sdk.code_executors`` does NOT require the optional +``[cube]`` extra. Accessing a Cube symbol triggers lazy loading but +still defers the ``e2b-code-interpreter`` import until +``CubeCodeExecutor.create`` / ``.attach`` actually wants to talk to a +sandbox. + +Tests that need a **cold** ``sys.modules`` state are run in a subprocess +so they never corrupt the in-process module cache (which is shared +across the whole test session and is what makes other tests' +``monkeypatch`` calls resolve). +""" + +from __future__ import annotations + +import subprocess +import sys +import textwrap + +import pytest + + +def _run_isolated(script: str) -> subprocess.CompletedProcess: + return subprocess.run( + [sys.executable, "-c", textwrap.dedent(script)], + capture_output=True, + text=True, + check=False, + ) + + +def test_all_lists_all_lazy_symbols(): + """``__all__`` must include every element of ``_CUBE_LAZY_ATTRS``.""" + from trpc_agent_sdk import code_executors as ce + for name in ce._CUBE_LAZY_ATTRS: + assert name in ce.__all__, f"{name!r} missing from __all__" + + +def test_dir_contains_lazy_symbols(): + from trpc_agent_sdk import code_executors as ce + d = dir(ce) + for name in ce._CUBE_LAZY_ATTRS: + assert name in d + + +def test_unknown_attribute_raises(): + from trpc_agent_sdk import code_executors as ce + with pytest.raises(AttributeError, match="definitely_not_a_thing"): + _ = ce.definitely_not_a_thing + + +def test_lazy_attribute_access_populates_globals(): + """After first access the lazy symbol is cached in the module's globals().""" + # Isolated subprocess so we get a cold module cache. + result = _run_isolated(""" + import sys + import trpc_agent_sdk.code_executors as ce + # First access triggers __getattr__. + cls1 = ce.CubeCodeExecutorConfig + # After first access, ce.__dict__ holds the symbol. + assert "CubeCodeExecutorConfig" in ce.__dict__ + cls2 = ce.CubeCodeExecutorConfig + assert cls1 is cls2 + print("OK") + """) + assert result.returncode == 0, result.stderr + assert "OK" in result.stdout + + +def test_import_does_not_touch_e2b(): + """Plain import of ``code_executors`` does NOT import e2b_code_interpreter. + + This is the core promise of the lazy wiring. Run in a subprocess so + the main test session's module cache cannot mask the behaviour. + """ + result = _run_isolated(""" + import sys + import trpc_agent_sdk.code_executors # noqa: F401 + assert "e2b_code_interpreter" not in sys.modules, \ + "bare import pulled in e2b_code_interpreter" + # Sub-package cube/ may or may not be imported yet — the contract + # is only that e2b is not. + print("OK") + """) + assert result.returncode == 0, result.stderr + assert "OK" in result.stdout + + +def test_cube_subpackage_import_does_not_touch_e2b(): + """Even importing the ``cube`` subpackage is e2b-free. + + ``_import_e2b`` is wrapped in a function; it only runs when we open + a real sandbox. Just importing the package must not trigger it. + """ + result = _run_isolated(""" + import sys + import trpc_agent_sdk.code_executors.cube as cube # noqa: F401 + assert "e2b_code_interpreter" not in sys.modules, \ + "importing cube pulled in e2b_code_interpreter" + print("OK") + """) + assert result.returncode == 0, result.stderr + assert "OK" in result.stdout + + +def test_config_types_usable_without_e2b(): + """`CubeCodeExecutorConfig` can be constructed without the [cube] extra.""" + result = _run_isolated(""" + import sys + import trpc_agent_sdk.code_executors as ce + cfg = ce.CubeCodeExecutorConfig(template="t", api_url="u", api_key="k") + assert cfg.template == "t" + assert "e2b_code_interpreter" not in sys.modules + print("OK") + """) + assert result.returncode == 0, result.stderr + assert "OK" in result.stdout + + +def test_cube_subpackage_reexports_public_api(): + """Every entry on the cube subpackage's ``__all__`` must resolve.""" + from trpc_agent_sdk.code_executors import cube + for name in cube.__all__: + assert hasattr(cube, name), f"{name} missing from cube/__init__.py" + + +def test_subpackage_all_matches_parent_lazy_set(): + """Parent-package lazy set must match the subpackage ``__all__``. + + BUG PROBE: if a symbol is added to ``cube/__init__.py`` but forgotten + in ``code_executors/__init__.py`` lazy wiring (or vice versa), this + test catches the drift. + """ + from trpc_agent_sdk.code_executors import cube as sub + from trpc_agent_sdk import code_executors as parent + assert set(sub.__all__) == set(parent._CUBE_LAZY_ATTRS), ( + f"drift between cube/__init__.py __all__ and parent _CUBE_LAZY_ATTRS: " + f"only-in-subpackage={set(sub.__all__) - set(parent._CUBE_LAZY_ATTRS)!r}, " + f"only-in-parent={set(parent._CUBE_LAZY_ATTRS) - set(sub.__all__)!r}" + ) + + +def test_onexisting_resolves_via_parent_lazy_import(): + """Regression for the drift bug: ``OnExisting`` was exported from + the subpackage but not wired into the parent's ``_CUBE_LAZY_ATTRS``. + + ``from trpc_agent_sdk.code_executors import OnExisting`` used to + raise ``AttributeError``. Pin both access paths so a reintroduction + of the drift fails loudly. + """ + from trpc_agent_sdk.code_executors import OnExisting as parent_symbol + from trpc_agent_sdk.code_executors.cube import OnExisting as sub_symbol + assert parent_symbol is sub_symbol diff --git a/tests/code_executors/cube/test_paths.py b/tests/code_executors/cube/test_paths.py new file mode 100644 index 0000000..e18bc2f --- /dev/null +++ b/tests/code_executors/cube/test_paths.py @@ -0,0 +1,302 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Unit tests for trpc_agent_sdk.code_executors.cube._paths.""" + +from __future__ import annotations + +import pytest + +from trpc_agent_sdk.code_executors.cube import _paths +from trpc_agent_sdk.code_executors.cube._paths import ( + join_remote, + normalize_remote_relative, + shell_quote, + wrap_stdin_heredoc, +) + + +# --------------------------------------------------------------------------- +# shell_quote +# --------------------------------------------------------------------------- + + +class TestShellQuote: + + def test_empty_string_is_empty_pair(self): + assert shell_quote("") == "''" + + def test_plain_string_is_wrapped(self): + assert shell_quote("foo") == "'foo'" + + def test_single_quote_is_escaped(self): + # foo'bar -> 'foo'\''bar' + assert shell_quote("foo'bar") == "'foo'\\''bar'" + + def test_spaces_preserved_inside_quotes(self): + assert shell_quote("a b c") == "'a b c'" + + def test_dollar_backtick_and_glob_are_literal(self): + quoted = shell_quote("$x `y` * | & ; < >") + assert quoted == "'$x `y` * | & ; < >'" + + def test_double_quote_untouched(self): + # Single-quoting intentionally keeps double quotes literal. + assert shell_quote('she said "hi"') == "'she said \"hi\"'" + + def test_unicode_preserved(self): + assert shell_quote("café🚀") == "'café🚀'" + + def test_newlines_preserved_inside_quotes(self): + # Heredoc-wrapped payloads depend on \n surviving quoting. + assert shell_quote("a\nb") == "'a\nb'" + + @pytest.mark.parametrize("raw", [ + "foo", + "it's", + "a b", + "'", + "''", + "'''", + r"\slash", + "$HOME", + ]) + def test_bash_roundtrip(self, raw): + """Bash must interpret ``shell_quote(s)`` back to ``s`` verbatim. + + The implementation's escape style differs from :func:`shlex.quote` + (we use ``'\\''`` splicing, shlex uses ``'"'"'``) but both must + be round-trip safe in a real POSIX shell. Drive this through + ``bash -c 'printf %s '`` and assert the output equals + the original. + """ + import subprocess + quoted = shell_quote(raw) + out = subprocess.check_output( + ["bash", "-c", f"printf %s {quoted}"], + ) + assert out == raw.encode("utf-8") + + +# --------------------------------------------------------------------------- +# normalize_remote_relative +# --------------------------------------------------------------------------- + + +class TestNormalizeRemoteRelative: + + def test_empty_string_rejects(self): + with pytest.raises(ValueError, match="must not be empty"): + normalize_remote_relative("") + + def test_whitespace_only_rejects(self): + with pytest.raises(ValueError, match="must not be empty"): + normalize_remote_relative(" ") + + def test_empty_string_allow_current(self): + assert normalize_remote_relative("", allow_current=True) == "" + + def test_whitespace_allow_current(self): + assert normalize_remote_relative(" ", allow_current=True) == "" + + def test_dot_rejects(self): + with pytest.raises(ValueError, match="must not be empty"): + normalize_remote_relative(".") + + def test_dot_allow_current(self): + assert normalize_remote_relative(".", allow_current=True) == "" + + def test_simple_path(self): + assert normalize_remote_relative("foo/bar") == "foo/bar" + + def test_normalized_path(self): + assert normalize_remote_relative("./foo/./bar") == "foo/bar" + + def test_collapses_internal_dotdot(self): + # foo/../bar collapses to bar and stays in-root. + assert normalize_remote_relative("foo/../bar") == "bar" + + def test_backslashes_converted_to_slashes(self): + # Windows-style separators converted to posix. + assert normalize_remote_relative("foo\\bar") == "foo/bar" + + def test_absolute_path_rejected(self): + with pytest.raises(ValueError, match="escapes its root"): + normalize_remote_relative("/etc/passwd") + + def test_dotdot_rejected(self): + with pytest.raises(ValueError, match="escapes its root"): + normalize_remote_relative("..") + + def test_dotdot_prefix_rejected(self): + with pytest.raises(ValueError, match="escapes its root"): + normalize_remote_relative("../etc/passwd") + + def test_internal_escape_via_dotdot_rejected(self): + # ``foo/../..`` collapses to ``..``; must be rejected. + with pytest.raises(ValueError, match="escapes its root"): + normalize_remote_relative("foo/../..") + + def test_strips_whitespace_before_normalizing(self): + assert normalize_remote_relative(" foo/bar ") == "foo/bar" + + +# --------------------------------------------------------------------------- +# join_remote +# --------------------------------------------------------------------------- + + +class TestJoinRemote: + + def test_empty_relative_returns_root(self): + assert join_remote("/a/b", "") == "/a/b" + + def test_basic_join(self): + assert join_remote("/a/b", "c/d") == "/a/b/c/d" + + def test_collapses_dotdot(self): + assert join_remote("/a/b", "../c") == "/a/c" + + def test_preserves_absolute_root(self): + assert join_remote("/ws", "subdir/file.txt") == "/ws/subdir/file.txt" + + +# --------------------------------------------------------------------------- +# wrap_stdin_heredoc +# --------------------------------------------------------------------------- + + +class TestWrapStdinHeredoc: + + def test_basic_structure(self): + out = wrap_stdin_heredoc("python3", b"print('hi')") + lines = out.split("\n") + # First line: << 'MARKER' + assert lines[0].startswith("python3 << 'TRPC_STDIN_EOF_") + assert lines[0].endswith("'") + # Middle line: payload + assert lines[1] == "print('hi')" + # Last line: closing marker (no quotes) + assert lines[2].startswith("TRPC_STDIN_EOF_") + + def test_marker_prefix_is_stable(self): + out = wrap_stdin_heredoc("cmd", b"body") + assert "TRPC_STDIN_EOF_" in out + + def test_closing_marker_matches_opening(self): + out = wrap_stdin_heredoc("cmd", b"body") + opening_line, body, closing_line = out.split("\n") + # Extract marker between "<< '" and "'". + start = opening_line.index("'") + 1 + end = opening_line.rindex("'") + marker = opening_line[start:end] + assert closing_line == marker + + def test_utf8_payload_preserved(self): + out = wrap_stdin_heredoc("cmd", "café🚀".encode("utf-8")) + assert "café🚀" in out + + def test_binary_payload_routed_through_base64(self): + # Non-UTF-8 input must NOT be silently lossily decoded. It is + # routed through ``base64 -d | cmd`` so the original bytes + # reach the command's stdin verbatim. + out = wrap_stdin_heredoc("cat", b"\xff\xfe\x00") + first_line = out.split("\n", 1)[0] + assert first_line.startswith("base64 -d << 'TRPC_STDIN_EOF_") + assert first_line.endswith("' | cat") + # Replacement chars (U+FFFD) must NOT appear anywhere in the + # rendered command — the whole point of the binary path is + # lossless transport. + assert "\ufffd" not in out + + def test_binary_payload_byte_perfect_roundtrip_through_bash(self): + """End-to-end: rendered command must hand binary bytes to stdin verbatim.""" + import subprocess + import tempfile + from pathlib import Path + + payload = bytes(range(256)) # every byte 0x00..0xff + with tempfile.TemporaryDirectory() as tmp: + sink = Path(tmp) / "received.bin" + cmd = wrap_stdin_heredoc(f"cat > {sink}", payload) + result = subprocess.run( + ["bash", "-c", cmd], + capture_output=True, + check=True, + ) + assert result.returncode == 0 + assert sink.read_bytes() == payload + + def test_utf8_payload_uses_text_fast_path(self): + # Valid UTF-8 must keep the simple `cmd << 'MARKER'` form so + # logs remain readable and no subprocess overhead is added. + out = wrap_stdin_heredoc("python3", "café🚀".encode("utf-8")) + assert out.startswith("python3 << 'TRPC_STDIN_EOF_") + assert "base64 -d" not in out + + def test_multiline_payload_preserved(self): + payload = b"line1\nline2\nline3" + out = wrap_stdin_heredoc("cmd", payload) + # Payload is placed between opening and closing marker lines. + # So middle content is exactly 3 lines. + sections = out.split("\n") + # Opening, line1, line2, line3, closing = 5 elements. + assert len(sections) == 5 + assert sections[1:4] == ["line1", "line2", "line3"] + + def test_marker_collision_is_resolved(self, monkeypatch): + """Regression guard for the `while marker in payload` retry. + + Feed ``secrets.token_hex`` a first hex that appears in the + payload and a second distinct hex. The final marker must be the + non-colliding one. + """ + colliding_hex = "aaaaaaaaaaaaaaaa" + safe_hex = "bbbbbbbbbbbbbbbb" + calls = {"n": 0} + + def fake_token_hex(nbytes): + calls["n"] += 1 + return colliding_hex if calls["n"] == 1 else safe_hex + + monkeypatch.setattr(_paths.secrets, "token_hex", fake_token_hex) + + payload = b"here is TRPC_STDIN_EOF_" + colliding_hex.encode() + b" inside payload" + out = wrap_stdin_heredoc("cmd", payload) + # The chosen marker must be the non-colliding one. + assert f"TRPC_STDIN_EOF_{safe_hex}" in out + # Exactly the colliding hex must NOT be the final marker + opening_line = out.split("\n", 1)[0] + assert colliding_hex not in opening_line or safe_hex in opening_line + # Collision detection must have actually consumed the first candidate. + assert calls["n"] >= 2 + + def test_binary_marker_collision_with_command_is_resolved(self, monkeypatch): + """Regression guard for the binary path's command-collision retry. + + On the base64 branch the *payload* can never collide (base64 + alphabet excludes ``_``), but the *wrapper command* can — e.g. a + multi-line shell function whose body happens to contain the + chosen literal. Force the first hex to appear in ``command`` + and verify the second, non-colliding hex wins. + """ + colliding_hex = "cccccccccccccccc" + safe_hex = "dddddddddddddddd" + calls = {"n": 0} + + def fake_token_hex(nbytes): + calls["n"] += 1 + return colliding_hex if calls["n"] == 1 else safe_hex + + monkeypatch.setattr(_paths.secrets, "token_hex", fake_token_hex) + + # Non-UTF-8 payload routes through _wrap_binary_stdin_heredoc. + payload = b"\xff\xfe\x00\x80" + # Embed the colliding marker inside the command itself. + command = f"cat # TRPC_STDIN_EOF_{colliding_hex} sentinel" + out = wrap_stdin_heredoc(command, payload) + # Must use the safe hex on the binary path. + assert f"base64 -d << 'TRPC_STDIN_EOF_{safe_hex}'" in out + assert calls["n"] >= 2 diff --git a/tests/code_executors/cube/test_runtime.py b/tests/code_executors/cube/test_runtime.py new file mode 100644 index 0000000..65f3413 --- /dev/null +++ b/tests/code_executors/cube/test_runtime.py @@ -0,0 +1,768 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Unit tests for trpc_agent_sdk.code_executors.cube._runtime.""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from trpc_agent_sdk.code_executors._constants import ( + DEFAULT_MAX_FILES, + DEFAULT_TIMEOUT_SEC, + DIR_OUT, + DIR_RUNS, + DIR_SKILLS, + DIR_WORK, + ENV_OUTPUT_DIR, + ENV_RUN_DIR, + ENV_SKILLS_DIR, + ENV_WORK_DIR, + WORKSPACE_ENV_DIR_KEY, +) +from trpc_agent_sdk.code_executors._types import ( + WorkspaceCapabilities, + WorkspaceInfo, + WorkspaceInputSpec, + WorkspaceOutputSpec, + WorkspacePutFileInfo, + WorkspaceRunProgramSpec, + WorkspaceStageOptions, +) +from trpc_agent_sdk.code_executors.cube import _runtime as rt_mod +from trpc_agent_sdk.code_executors.cube._runtime import ( + CubeProgramRunner, + CubeWorkspaceFS, + CubeWorkspaceManager, + CubeWorkspaceRuntime, + _input_default_name, + create_cube_workspace_runtime, +) +from trpc_agent_sdk.code_executors.cube._sandbox import ( + CubeCommandResult, + CubeSandboxClient, +) +from trpc_agent_sdk.code_executors.cube._types import ( + CubeCodeExecutorConfig, + CubeWorkspaceRuntimeConfig, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _ok(stdout: str = "", stderr: str = "", exit_code: int = 0) -> CubeCommandResult: + return CubeCommandResult(stdout=stdout, stderr=stderr, exit_code=exit_code, duration=0.0) + + +def _err(stderr: str = "boom", exit_code: int = 1) -> CubeCommandResult: + return CubeCommandResult(stdout="", stderr=stderr, exit_code=exit_code, duration=0.0) + + +@pytest.fixture +def mock_client(): + c = MagicMock(spec=CubeSandboxClient) + c.sandbox_id = "sbx" + c.commands_run = AsyncMock(return_value=_ok()) + c.read_file_bytes = AsyncMock(return_value=b"") + c.write_file_bytes = AsyncMock(return_value=None) + c.upload_path = AsyncMock(return_value=None) + c.download_path = AsyncMock(return_value=None) + return c + + +def _ws(path: str = "/workspace/cube_agent/ws_test_1") -> WorkspaceInfo: + return WorkspaceInfo(id="test", path=path) + + +# --------------------------------------------------------------------------- +# _input_default_name +# --------------------------------------------------------------------------- + + +class TestInputDefaultName: + + @pytest.mark.parametrize("src,expected", [ + ("a/b/c.txt", "c.txt"), + ("file.txt", "file.txt"), + ("/abs/path/file", "file"), + ("a/", "a/"), # trailing slash: last segment empty, falls through + ("", ""), + ]) + def test_basename(self, src, expected): + assert _input_default_name(src) == expected + + +# --------------------------------------------------------------------------- +# CubeWorkspaceManager +# --------------------------------------------------------------------------- + + +class TestCubeWorkspaceManager: + + @pytest.mark.asyncio + async def test_create_workspace_builds_mkdir_command(self, mock_client, monkeypatch): + monkeypatch.setattr(rt_mod.time, "time_ns", lambda: 123456789) + mgr = CubeWorkspaceManager(mock_client, "/workspace/cube_agent", 30.0) + info = await mgr.create_workspace("my-id") + + assert info.id == "my-id" + assert info.path == "/workspace/cube_agent/ws_my-id_123456789" + + mock_client.commands_run.assert_awaited_once() + cmd = mock_client.commands_run.await_args.args[0] + assert "set -e" in cmd + assert "mkdir -p" in cmd + assert "'/workspace/cube_agent/ws_my-id_123456789'" in cmd + for sub in (DIR_WORK, DIR_OUT, DIR_SKILLS, DIR_RUNS): + assert sub in cmd + + @pytest.mark.asyncio + async def test_create_workspace_sanitizes_exec_id(self, mock_client): + """BUG PROBE: `/`, `@`, spaces, `.` must be replaced by `_`.""" + mgr = CubeWorkspaceManager(mock_client, "/ws", 30.0) + info = await mgr.create_workspace("user@example.com/rundir space") + # All non-[a-zA-Z0-9_-] replaced with "_". + assert "ws_user_example_com_rundir_space_" in info.path + + @pytest.mark.asyncio + async def test_create_workspace_empty_id_becomes_anon(self, mock_client): + mgr = CubeWorkspaceManager(mock_client, "/ws", 30.0) + info = await mgr.create_workspace("") + assert "ws_anon_" in info.path + + @pytest.mark.asyncio + async def test_create_workspace_failure_raises(self, mock_client): + mock_client.commands_run.return_value = _err("mkdir fail") + mgr = CubeWorkspaceManager(mock_client, "/ws", 30.0) + with pytest.raises(RuntimeError, match="Failed to create cube workspace"): + await mgr.create_workspace("id") + + @pytest.mark.asyncio + async def test_create_workspace_idempotent(self, mock_client): + """Second call with same id returns cached info on a stable path, + but re-issues an idempotent mkdir -p so the cache is reconciled + with the remote (heals if the dir was deleted externally — see + BUG 8 in test_bug_hunt.py). + """ + mgr = CubeWorkspaceManager(mock_client, "/ws", 30.0) + info1 = await mgr.create_workspace("id") + info2 = await mgr.create_workspace("id") + assert info1 is info2 + assert info1.path == info2.path + assert mock_client.commands_run.await_count == 2 + for call in mock_client.commands_run.await_args_list: + cmd = call.args[0] + assert "mkdir -p" in cmd + assert f"'{info1.path}'" in cmd + + @pytest.mark.asyncio + async def test_cleanup_runs_rm(self, mock_client): + mgr = CubeWorkspaceManager(mock_client, "/ws", 30.0) + await mgr.create_workspace("id") + mock_client.commands_run.reset_mock() + await mgr.cleanup("id") + cmd = mock_client.commands_run.await_args.args[0] + assert cmd.startswith("rm -rf ") + + @pytest.mark.asyncio + async def test_cleanup_unknown_is_noop(self, mock_client): + mgr = CubeWorkspaceManager(mock_client, "/ws", 30.0) + await mgr.cleanup("unknown-id") + mock_client.commands_run.assert_not_awaited() + + @pytest.mark.asyncio + async def test_cleanup_failure_raises(self, mock_client): + mgr = CubeWorkspaceManager(mock_client, "/ws", 30.0) + await mgr.create_workspace("id") + mock_client.commands_run.return_value = _err("rm fail") + with pytest.raises(RuntimeError, match="Failed to clean cube workspace"): + await mgr.cleanup("id") + + +# --------------------------------------------------------------------------- +# CubeWorkspaceFS.put_files +# --------------------------------------------------------------------------- + + +class TestPutFiles: + + @pytest.mark.asyncio + async def test_basic_write(self, mock_client): + fs = CubeWorkspaceFS(mock_client, 30.0) + ws = _ws() + await fs.put_files(ws, [WorkspacePutFileInfo(path="sub/a.txt", content=b"hi")]) + mock_client.write_file_bytes.assert_awaited_once_with( + f"{ws.path}/sub/a.txt", b"hi" + ) + # Parent mkdir happened. + assert any( + "mkdir" in call.args[0] for call in mock_client.commands_run.await_args_list + ) + + @pytest.mark.asyncio + async def test_empty_path_raises(self, mock_client): + fs = CubeWorkspaceFS(mock_client, 30.0) + with pytest.raises(ValueError, match="empty file path"): + await fs.put_files(_ws(), [WorkspacePutFileInfo(path="", content=b"x")]) + + @pytest.mark.asyncio + async def test_dotdot_rejected(self, mock_client): + fs = CubeWorkspaceFS(mock_client, 30.0) + with pytest.raises(ValueError, match="escapes"): + await fs.put_files(_ws(), [WorkspacePutFileInfo(path="../escape", content=b"")]) + + @pytest.mark.asyncio + async def test_parent_is_ws_root_no_mkdir(self, mock_client): + fs = CubeWorkspaceFS(mock_client, 30.0) + ws = _ws() + await fs.put_files(ws, [WorkspacePutFileInfo(path="toplevel.txt", content=b"")]) + # No mkdir issued because parent == ws.path. + mock_client.commands_run.assert_not_awaited() + + @pytest.mark.asyncio + async def test_none_content_writes_empty_bytes(self, mock_client): + fs = CubeWorkspaceFS(mock_client, 30.0) + # Pydantic default is b"" but simulate explicit none-y content. + await fs.put_files(_ws(), [WorkspacePutFileInfo(path="x.txt", content=b"")]) + mock_client.write_file_bytes.assert_awaited_once() + assert mock_client.write_file_bytes.await_args.args[1] == b"" + + +# --------------------------------------------------------------------------- +# CubeWorkspaceFS.stage_directory +# --------------------------------------------------------------------------- + + +class TestStageDirectory: + + @pytest.mark.asyncio + async def test_missing_src_raises(self, mock_client): + fs = CubeWorkspaceFS(mock_client, 30.0) + with pytest.raises(ValueError, match="src is empty"): + await fs.stage_directory(_ws(), "", "dst", WorkspaceStageOptions()) + + @pytest.mark.asyncio + async def test_nonexistent_src_raises(self, mock_client, tmp_path): + fs = CubeWorkspaceFS(mock_client, 30.0) + missing = str(tmp_path / "missing") + with pytest.raises(FileNotFoundError): + await fs.stage_directory(_ws(), missing, "", WorkspaceStageOptions()) + + @pytest.mark.asyncio + async def test_file_not_dir_raises(self, mock_client, tmp_path): + fs = CubeWorkspaceFS(mock_client, 30.0) + f = tmp_path / "file" + f.write_text("x") + with pytest.raises(FileNotFoundError): + await fs.stage_directory(_ws(), str(f), "", WorkspaceStageOptions()) + + @pytest.mark.asyncio + async def test_empty_dst_stages_to_ws_root(self, mock_client, tmp_path): + fs = CubeWorkspaceFS(mock_client, 30.0) + src = tmp_path / "s" + src.mkdir() + ws = _ws() + await fs.stage_directory(ws, str(src), "", WorkspaceStageOptions()) + mock_client.upload_path.assert_awaited_once() + args = mock_client.upload_path.await_args.args + assert str(args[0]) == str(src.resolve()) or args[0] == Path(str(src.resolve())) + assert args[1] == ws.path # direct ws root, no subdir joined + + @pytest.mark.asyncio + async def test_dst_dot_stages_to_ws_root(self, mock_client, tmp_path): + fs = CubeWorkspaceFS(mock_client, 30.0) + src = tmp_path / "s" + src.mkdir() + ws = _ws() + await fs.stage_directory(ws, str(src), ".", WorkspaceStageOptions()) + args = mock_client.upload_path.await_args.args + assert args[1] == ws.path + + @pytest.mark.asyncio + async def test_read_only_issues_chmod(self, mock_client, tmp_path): + fs = CubeWorkspaceFS(mock_client, 30.0) + src = tmp_path / "s" + src.mkdir() + await fs.stage_directory(_ws(), str(src), "sub", WorkspaceStageOptions(read_only=True)) + assert any( + call.args[0].startswith("chmod -R a-w") + for call in mock_client.commands_run.await_args_list + ) + + @pytest.mark.asyncio + async def test_no_chmod_when_read_only_false(self, mock_client, tmp_path): + fs = CubeWorkspaceFS(mock_client, 30.0) + src = tmp_path / "s" + src.mkdir() + await fs.stage_directory(_ws(), str(src), "sub", WorkspaceStageOptions(read_only=False)) + for call in mock_client.commands_run.await_args_list: + assert "chmod" not in call.args[0] + + +# --------------------------------------------------------------------------- +# CubeWorkspaceFS.stage_inputs +# --------------------------------------------------------------------------- + + +class TestStageInputs: + + @pytest.mark.asyncio + async def test_empty_src_is_skipped(self, mock_client): + fs = CubeWorkspaceFS(mock_client, 30.0) + await fs.stage_inputs(_ws(), [WorkspaceInputSpec(src="", dst="dst")]) + mock_client.upload_path.assert_not_awaited() + mock_client.write_file_bytes.assert_not_awaited() + + @pytest.mark.asyncio + async def test_artifact_without_ctx_raises(self, mock_client): + fs = CubeWorkspaceFS(mock_client, 30.0) + with pytest.raises(ValueError, match="Context is required"): + await fs.stage_inputs( + _ws(), + [WorkspaceInputSpec(src="artifact://name", dst="dst.txt")], + ctx=None, + ) + + @pytest.mark.asyncio + async def test_artifact_success_writes_bytes(self, mock_client, monkeypatch): + fs = CubeWorkspaceFS(mock_client, 30.0) + ctx = MagicMock() + + async def fake_load(ctx, name, version): + return b"artifact-bytes", 1 + + monkeypatch.setattr(rt_mod, "load_artifact_helper", fake_load) + monkeypatch.setattr(rt_mod, "parse_artifact_ref", lambda r: ("name", None)) + + await fs.stage_inputs( + _ws(), + [WorkspaceInputSpec(src="artifact://name", dst="a.txt")], + ctx=ctx, + ) + mock_client.write_file_bytes.assert_awaited_once() + + @pytest.mark.asyncio + async def test_host_scheme_uploads(self, mock_client, tmp_path): + fs = CubeWorkspaceFS(mock_client, 30.0) + host_file = tmp_path / "f.txt" + host_file.write_text("payload") + await fs.stage_inputs( + _ws(), + [WorkspaceInputSpec(src=f"host://{host_file}", dst="f.txt")], + ) + mock_client.upload_path.assert_awaited_once() + + @pytest.mark.asyncio + async def test_host_missing_raises(self, mock_client, tmp_path): + fs = CubeWorkspaceFS(mock_client, 30.0) + with pytest.raises(FileNotFoundError): + await fs.stage_inputs( + _ws(), + [WorkspaceInputSpec(src=f"host://{tmp_path}/nope", dst="x.txt")], + ) + + @pytest.mark.asyncio + async def test_workspace_scheme_remote_copy(self, mock_client): + fs = CubeWorkspaceFS(mock_client, 30.0) + await fs.stage_inputs( + _ws(), + [WorkspaceInputSpec(src="workspace://src.txt", dst="dst.txt")], + ) + # At least one command is `cp -a`. + assert any( + "cp -a" in call.args[0] for call in mock_client.commands_run.await_args_list + ) + + @pytest.mark.asyncio + async def test_skill_scheme_remote_copy(self, mock_client): + fs = CubeWorkspaceFS(mock_client, 30.0) + await fs.stage_inputs( + _ws(), + [WorkspaceInputSpec(src="skill://lint/main.py", dst="x.py")], + ) + # cp from under {ws}/skills/lint/main.py + cp_calls = [c for c in mock_client.commands_run.await_args_list if "cp -a" in c.args[0]] + assert cp_calls + cmd = cp_calls[0].args[0] + assert f"/{DIR_SKILLS}/lint/main.py" in cmd + + @pytest.mark.asyncio + async def test_unknown_scheme_raises(self, mock_client): + fs = CubeWorkspaceFS(mock_client, 30.0) + with pytest.raises(ValueError, match="unsupported input scheme"): + await fs.stage_inputs(_ws(), [WorkspaceInputSpec(src="ftp://x", dst="y")]) + + @pytest.mark.asyncio + async def test_default_dst_when_empty(self, mock_client, tmp_path): + """Empty dst falls back to work/inputs/.""" + fs = CubeWorkspaceFS(mock_client, 30.0) + host_file = tmp_path / "myfile.txt" + host_file.write_text("x") + await fs.stage_inputs(_ws(), [WorkspaceInputSpec(src=f"host://{host_file}", dst="")]) + dst_arg = mock_client.upload_path.await_args.args[1] + assert f"{DIR_WORK}/inputs/myfile.txt" in dst_arg + + +# --------------------------------------------------------------------------- +# CubeWorkspaceFS.collect / collect_outputs / _glob / _mkdir / _copy_remote +# --------------------------------------------------------------------------- + + +class TestGlob: + + @pytest.mark.asyncio + async def test_empty_patterns_no_call(self, mock_client): + fs = CubeWorkspaceFS(mock_client, 30.0) + ws = _ws() + out = await fs._glob(ws.path, []) + assert out == [] + mock_client.commands_run.assert_not_awaited() + + @pytest.mark.asyncio + async def test_patterns_issue_shell_command(self, mock_client): + mock_client.commands_run.return_value = _ok( + stdout=f"{_ws().path}/a.txt\n{_ws().path}/b.txt\n" + ) + fs = CubeWorkspaceFS(mock_client, 30.0) + ws = _ws() + out = await fs._glob(ws.path, ["*.txt"]) + assert out == [f"{ws.path}/a.txt", f"{ws.path}/b.txt"] + cmd = mock_client.commands_run.await_args.args[0] + assert "globstar" in cmd + assert "'*.txt'" in cmd + + @pytest.mark.asyncio + async def test_glob_failure_raises(self, mock_client): + mock_client.commands_run.return_value = _err("glob died") + fs = CubeWorkspaceFS(mock_client, 30.0) + with pytest.raises(RuntimeError, match="glob failed"): + await fs._glob(_ws().path, ["*"]) + + +class TestCollect: + + @pytest.mark.asyncio + async def test_empty_match(self, mock_client): + mock_client.commands_run.return_value = _ok(stdout="") + fs = CubeWorkspaceFS(mock_client, 30.0) + out = await fs.collect(_ws(), ["*.nope"]) + assert out == [] + + @pytest.mark.asyncio + async def test_dedup_by_rel(self, mock_client): + ws = _ws() + # Glob returns same path twice. + mock_client.commands_run.return_value = _ok(stdout=f"{ws.path}/a.txt\n{ws.path}/a.txt\n") + mock_client.read_file_bytes.return_value = b"content" + fs = CubeWorkspaceFS(mock_client, 30.0) + out = await fs.collect(ws, ["*.txt"]) + assert len(out) == 1 + assert out[0].name == "a.txt" + + @pytest.mark.asyncio + async def test_truncation_marker(self, mock_client, monkeypatch): + # Force a tiny max so truncation happens. The constant moved to + # the shared collection helper after the build_code_files + # extraction; ``CubeWorkspaceFS.collect`` reads it from there. + from trpc_agent_sdk.code_executors.utils import _collect + monkeypatch.setattr(_collect, "MAX_READ_SIZE_BYTES", 4) + ws = _ws() + mock_client.commands_run.return_value = _ok(stdout=f"{ws.path}/f.txt\n") + mock_client.read_file_bytes.return_value = b"0123456789" + fs = CubeWorkspaceFS(mock_client, 30.0) + out = await fs.collect(ws, ["*.txt"]) + assert len(out) == 1 + assert out[0].truncated is True + assert out[0].size_bytes == 10 + + +class TestCollectOutputs: + + @pytest.mark.asyncio + async def test_max_files_limit(self, mock_client): + ws = _ws() + mock_client.commands_run.return_value = _ok( + stdout="\n".join(f"{ws.path}/f{i}.txt" for i in range(5)) + ) + mock_client.read_file_bytes.return_value = b"x" + fs = CubeWorkspaceFS(mock_client, 30.0) + manifest = await fs.collect_outputs(ws, WorkspaceOutputSpec(globs=["*"], max_files=2)) + assert len(manifest.files) == 2 + assert manifest.limits_hit is True + + @pytest.mark.asyncio + async def test_file_bytes_limit_sets_truncated(self, mock_client): + ws = _ws() + mock_client.commands_run.return_value = _ok(stdout=f"{ws.path}/f.txt\n") + mock_client.read_file_bytes.return_value = b"A" * 100 + fs = CubeWorkspaceFS(mock_client, 30.0) + manifest = await fs.collect_outputs( + ws, WorkspaceOutputSpec(globs=["*"], max_file_bytes=4) + ) + assert manifest.limits_hit is True + + @pytest.mark.asyncio + async def test_save_requires_ctx(self, mock_client): + ws = _ws() + mock_client.commands_run.return_value = _ok(stdout=f"{ws.path}/f.txt\n") + mock_client.read_file_bytes.return_value = b"x" + fs = CubeWorkspaceFS(mock_client, 30.0) + with pytest.raises(ValueError, match="Context is required"): + await fs.collect_outputs(ws, WorkspaceOutputSpec(globs=["*"], save=True)) + + @pytest.mark.asyncio + async def test_save_happy_path(self, mock_client, monkeypatch): + ws = _ws() + mock_client.commands_run.return_value = _ok(stdout=f"{ws.path}/f.txt\n") + mock_client.read_file_bytes.return_value = b"x" + fs = CubeWorkspaceFS(mock_client, 30.0) + + saved = [] + + async def fake_save(ctx, name, data, mime): + saved.append((name, data, mime)) + return 7 + + from trpc_agent_sdk.code_executors.utils import _collect as collect_mod + monkeypatch.setattr(collect_mod, "save_artifact_helper", fake_save) + ctx = MagicMock() + manifest = await fs.collect_outputs( + ws, + WorkspaceOutputSpec(globs=["*"], save=True, inline=True, name_template="prefix/"), + ctx=ctx, + ) + assert len(manifest.files) == 1 + ref = manifest.files[0] + assert ref.saved_as == "prefix/f.txt" + assert ref.version == 7 + assert ref.content == "x" + + @pytest.mark.asyncio + async def test_inline_only_no_save(self, mock_client): + ws = _ws() + mock_client.commands_run.return_value = _ok(stdout=f"{ws.path}/f.txt\n") + mock_client.read_file_bytes.return_value = b"hello" + fs = CubeWorkspaceFS(mock_client, 30.0) + manifest = await fs.collect_outputs(ws, WorkspaceOutputSpec(globs=["*"], inline=True)) + assert manifest.files[0].content == "hello" + assert manifest.files[0].saved_as == "" + + +# --------------------------------------------------------------------------- +# CubeWorkspaceFS._mkdir / _copy_remote +# --------------------------------------------------------------------------- + + +class TestMkdirAndCopy: + + @pytest.mark.asyncio + async def test_mkdir_empty_is_noop(self, mock_client): + fs = CubeWorkspaceFS(mock_client, 30.0) + await fs._mkdir("") + mock_client.commands_run.assert_not_awaited() + + @pytest.mark.asyncio + async def test_mkdir_failure_raises(self, mock_client): + mock_client.commands_run.return_value = _err("perm denied") + fs = CubeWorkspaceFS(mock_client, 30.0) + with pytest.raises(RuntimeError, match="mkdir -p failed"): + await fs._mkdir("/some/path") + + @pytest.mark.asyncio + async def test_copy_remote_failure_raises(self, mock_client): + """mkdir → rm → cp; failing cp must surface as ``remote cp failed``. + + The rm step defends against the ``cp -a`` directory-footgun (BUG 11), + so we get an extra command between mkdir and cp. Stub all three. + """ + mock_client.commands_run.side_effect = [_ok(), _ok(), _err("cp fail")] + fs = CubeWorkspaceFS(mock_client, 30.0) + with pytest.raises(RuntimeError, match="remote cp failed"): + await fs._copy_remote("/src", "/dst") + + @pytest.mark.asyncio + async def test_copy_remote_rm_failure_raises(self, mock_client): + """The defensive rm step must surface its own failure, not be swallowed.""" + mock_client.commands_run.side_effect = [_ok(), _err("perm denied")] + fs = CubeWorkspaceFS(mock_client, 30.0) + with pytest.raises(RuntimeError, match="remote rm failed"): + await fs._copy_remote("/src", "/dst") + + +# --------------------------------------------------------------------------- +# CubeProgramRunner +# --------------------------------------------------------------------------- + + +class TestCubeProgramRunner: + + @pytest.mark.asyncio + async def test_shell_pipeline_structure(self, mock_client, monkeypatch): + monkeypatch.setattr(rt_mod.time, "strftime", lambda fmt: "20260506T120000") + mock_client.commands_run.return_value = _ok(stdout="ok") + runner = CubeProgramRunner(mock_client, 30.0) + ws = _ws() + spec = WorkspaceRunProgramSpec(cmd="python", args=["-c", "print(1)"]) + + result = await runner.run_program(ws, spec) + + cmd = mock_client.commands_run.await_args.args[0] + assert "set -e" in cmd + assert "mkdir -p" in cmd + assert f"cd '{ws.path}'" in cmd + # Args are shell-quoted: + assert "'python'" in cmd and "'-c'" in cmd and "'print(1)'" in cmd + + env = mock_client.commands_run.await_args.kwargs["env"] + assert env[WORKSPACE_ENV_DIR_KEY] == ws.path + assert env[ENV_SKILLS_DIR] == f"{ws.path}/{DIR_SKILLS}" + assert env[ENV_WORK_DIR] == f"{ws.path}/{DIR_WORK}" + assert env[ENV_OUTPUT_DIR] == f"{ws.path}/{DIR_OUT}" + assert env[ENV_RUN_DIR] == f"{ws.path}/{DIR_RUNS}/run_20260506T120000" + assert result.stdout == "ok" + + @pytest.mark.asyncio + async def test_cwd_override(self, mock_client): + mock_client.commands_run.return_value = _ok() + runner = CubeProgramRunner(mock_client, 30.0) + ws = _ws() + await runner.run_program(ws, WorkspaceRunProgramSpec(cmd="ls", cwd="sub/dir")) + cmd = mock_client.commands_run.await_args.args[0] + assert f"cd '{ws.path}/sub/dir'" in cmd + + @pytest.mark.asyncio + async def test_spec_env_overrides_default(self, mock_client): + mock_client.commands_run.return_value = _ok() + runner = CubeProgramRunner(mock_client, 30.0) + spec = WorkspaceRunProgramSpec(cmd="x", env={"WORKSPACE_DIR": "override"}) + await runner.run_program(_ws(), spec) + env = mock_client.commands_run.await_args.kwargs["env"] + # spec.env wins. + assert env["WORKSPACE_DIR"] == "override" + + @pytest.mark.asyncio + async def test_stdin_encoded(self, mock_client): + mock_client.commands_run.return_value = _ok() + runner = CubeProgramRunner(mock_client, 30.0) + spec = WorkspaceRunProgramSpec(cmd="cat", stdin="héllo") + await runner.run_program(_ws(), spec) + assert mock_client.commands_run.await_args.kwargs["stdin"] == "héllo".encode("utf-8") + + @pytest.mark.asyncio + async def test_stdin_empty_is_none(self, mock_client): + mock_client.commands_run.return_value = _ok() + runner = CubeProgramRunner(mock_client, 30.0) + spec = WorkspaceRunProgramSpec(cmd="cat", stdin="") + await runner.run_program(_ws(), spec) + assert mock_client.commands_run.await_args.kwargs["stdin"] is None + + @pytest.mark.asyncio + async def test_timeout_positive_forwarded(self, mock_client): + mock_client.commands_run.return_value = _ok() + runner = CubeProgramRunner(mock_client, 30.0) + spec = WorkspaceRunProgramSpec(cmd="x", timeout=17) + await runner.run_program(_ws(), spec) + assert mock_client.commands_run.await_args.kwargs["timeout"] == 17.0 + + @pytest.mark.asyncio + async def test_timeout_zero_falls_back_to_default(self, mock_client): + mock_client.commands_run.return_value = _ok() + runner = CubeProgramRunner(mock_client, 30.0) + spec = WorkspaceRunProgramSpec(cmd="x", timeout=0) + await runner.run_program(_ws(), spec) + assert mock_client.commands_run.await_args.kwargs["timeout"] == float(DEFAULT_TIMEOUT_SEC) + + @pytest.mark.asyncio + async def test_provider_env_merged_when_enabled(self, mock_client): + mock_client.commands_run.return_value = _ok() + provider = lambda ctx: {"EXTRA": "V"} + runner = CubeProgramRunner(mock_client, 30.0, provider=provider, enable_provider_env=True) + spec = WorkspaceRunProgramSpec(cmd="x") + await runner.run_program(_ws(), spec) + env = mock_client.commands_run.await_args.kwargs["env"] + assert env["EXTRA"] == "V" + + @pytest.mark.asyncio + async def test_provider_env_ignored_when_disabled(self, mock_client): + mock_client.commands_run.return_value = _ok() + provider = lambda ctx: {"EXTRA": "V"} + # enable_provider_env=False → extras not merged. + runner = CubeProgramRunner(mock_client, 30.0, provider=provider, enable_provider_env=False) + spec = WorkspaceRunProgramSpec(cmd="x") + await runner.run_program(_ws(), spec) + env = mock_client.commands_run.await_args.kwargs["env"] + assert "EXTRA" not in env + + +# --------------------------------------------------------------------------- +# CubeWorkspaceRuntime + create_cube_workspace_runtime +# --------------------------------------------------------------------------- + + +class TestCubeWorkspaceRuntime: + + def test_components_exposed(self, mock_client): + rt = CubeWorkspaceRuntime(mock_client, remote_workspace="/ws", execute_timeout=30.0) + assert isinstance(rt.manager(), CubeWorkspaceManager) + assert isinstance(rt.fs(), CubeWorkspaceFS) + assert isinstance(rt.runner(), CubeProgramRunner) + + def test_describe(self, mock_client): + rt = CubeWorkspaceRuntime(mock_client, remote_workspace="/ws", execute_timeout=30.0) + caps = rt.describe() + assert caps.isolation == "cube" + assert caps.network_allowed is True + assert caps.read_only_mount is False + assert caps.streaming is False + + +class TestCreateCubeWorkspaceRuntime: + + def test_reuses_executor_client(self, mock_client): + cfg = CubeCodeExecutorConfig( + template="t", api_url="u", api_key="k", execute_timeout=42.0 + ) + ex = MagicMock() + ex.sandbox_client = mock_client + ex.config = cfg + rt = create_cube_workspace_runtime(ex) + assert rt._client is mock_client + # Inherits execute_timeout from exec cfg. + assert rt._fs._timeout == 42.0 + assert rt._runner._timeout == 42.0 + + def test_uses_default_workspace_when_none(self, mock_client): + cfg = CubeCodeExecutorConfig(template="t", api_url="u", api_key="k") + ex = MagicMock() + ex.sandbox_client = mock_client + ex.config = cfg + rt = create_cube_workspace_runtime(ex) + assert rt._manager._root == "/workspace/cube_agent" + + def test_custom_workspace_cfg(self, mock_client): + cfg = CubeCodeExecutorConfig(template="t", api_url="u", api_key="k") + ex = MagicMock() + ex.sandbox_client = mock_client + ex.config = cfg + rt = create_cube_workspace_runtime( + ex, workspace_cfg=CubeWorkspaceRuntimeConfig(remote_workspace="/custom") + ) + assert rt._manager._root == "/custom" + + def test_provider_and_flag_forwarded(self, mock_client): + cfg = CubeCodeExecutorConfig(template="t", api_url="u", api_key="k") + ex = MagicMock() + ex.sandbox_client = mock_client + ex.config = cfg + provider = lambda ctx: {} + rt = create_cube_workspace_runtime(ex, provider=provider, enable_provider_env=True) + assert rt._runner._run_env_provider is provider + assert rt._runner._enable_provider_env is True diff --git a/tests/code_executors/cube/test_sandbox.py b/tests/code_executors/cube/test_sandbox.py new file mode 100644 index 0000000..84556a8 --- /dev/null +++ b/tests/code_executors/cube/test_sandbox.py @@ -0,0 +1,586 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Unit tests for trpc_agent_sdk.code_executors.cube._sandbox. + +Every test in this file patches ``_import_e2b`` via the ``fake_e2b`` +fixture in conftest.py so the real ``e2b-code-interpreter`` package is +never required or touched. +""" + +from __future__ import annotations + +import asyncio +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from trpc_agent_sdk.code_executors.cube import _sandbox +from trpc_agent_sdk.code_executors.cube._sandbox import ( + CubeCommandResult, + CubeSandboxClient, +) +from trpc_agent_sdk.code_executors.cube._types import CubeCodeExecutorConfig + + +def _cfg(**overrides) -> CubeCodeExecutorConfig: + base = dict( + template="tmpl", + api_url="https://api", + api_key="sekret", + idle_timeout=123, + execute_timeout=45.0, + ) + base.update(overrides) + return CubeCodeExecutorConfig(**base) + + +# --------------------------------------------------------------------------- +# Construction & sandbox_id +# --------------------------------------------------------------------------- + + +class TestConstruction: + + def test_stores_timeouts_and_sandbox_id(self, fake_async_sandbox): + client = CubeSandboxClient( + fake_async_sandbox, idle_timeout=600, execute_timeout=30 + ) + assert client.sandbox_id == "sbx-1" + + def test_sandbox_id_after_close_raises(self, fake_async_sandbox): + client = CubeSandboxClient( + fake_async_sandbox, idle_timeout=600, execute_timeout=30 + ) + client.close() + with pytest.raises(RuntimeError, match="closed"): + _ = client.sandbox_id + + +# --------------------------------------------------------------------------- +# open_new +# --------------------------------------------------------------------------- + + +class TestOpenNew: + + @pytest.mark.asyncio + async def test_creates_with_resolved_credentials(self, fake_e2b, fake_async_sandbox): + fake_e2b.AsyncSandbox.create = AsyncMock(return_value=fake_async_sandbox) + cfg = _cfg() + client = await CubeSandboxClient.open_new(cfg) + + fake_e2b.AsyncSandbox.create.assert_awaited_once_with( + template="tmpl", + api_url="https://api", + api_key="sekret", + timeout=123, + ) + assert client.sandbox_id == "sbx-1" + kwargs = fake_e2b.AsyncSandbox.create.await_args.kwargs + assert isinstance(kwargs["timeout"], int) + + @pytest.mark.asyncio + async def test_missing_template_raises(self, fake_e2b, monkeypatch): + monkeypatch.delenv("CUBE_TEMPLATE_ID", raising=False) + fake_e2b.AsyncSandbox.create = AsyncMock() + cfg = _cfg(template=None) + with pytest.raises(ValueError, match="CUBE_TEMPLATE_ID"): + await CubeSandboxClient.open_new(cfg) + + +# --------------------------------------------------------------------------- +# open_existing +# --------------------------------------------------------------------------- + + +class TestOpenExisting: + + @pytest.mark.asyncio + async def test_connects_and_asserts_running(self, fake_e2b, fake_async_sandbox): + fake_e2b.AsyncSandbox.connect = AsyncMock(return_value=fake_async_sandbox) + cfg = _cfg() + client = await CubeSandboxClient.open_existing("sbx-42", cfg) + + fake_e2b.AsyncSandbox.connect.assert_awaited_once_with( + "sbx-42", api_url="https://api", api_key="sekret" + ) + fake_async_sandbox.get_info.assert_awaited_once() + assert client is not None + + @pytest.mark.asyncio + async def test_paused_state_raises(self, fake_e2b, fake_async_sandbox): + fake_e2b.AsyncSandbox.connect = AsyncMock(return_value=fake_async_sandbox) + fake_async_sandbox.get_info.return_value = SimpleNamespace( + state=fake_e2b.SandboxState.PAUSED + ) + cfg = _cfg() + with pytest.raises(fake_e2b.SandboxException, match="paused"): + await CubeSandboxClient.open_existing("sbx-42", cfg) + + @pytest.mark.asyncio + async def test_missing_sandbox_propagates(self, fake_e2b): + async def raise_not_found(*a, **k): + raise fake_e2b.SandboxNotFoundException("gone") + + fake_e2b.AsyncSandbox.connect = raise_not_found + with pytest.raises(fake_e2b.SandboxNotFoundException): + await CubeSandboxClient.open_existing("sbx-42", _cfg()) + + +# --------------------------------------------------------------------------- +# close / destroy +# --------------------------------------------------------------------------- + + +class TestClose: + + def test_close_drops_handle_without_calling_kill(self, fake_async_sandbox): + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + client.close() + fake_async_sandbox.kill.assert_not_called() + + +class TestDestroy: + + @pytest.mark.asyncio + async def test_happy_path(self, fake_e2b, fake_async_sandbox): + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + await client.destroy() + fake_async_sandbox.kill.assert_awaited_once() + + @pytest.mark.asyncio + async def test_already_closed_is_noop(self, fake_e2b, fake_async_sandbox): + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + client.close() + await client.destroy() # must not raise and must not touch e2b + fake_async_sandbox.kill.assert_not_called() + + @pytest.mark.asyncio + async def test_swallows_not_found(self, fake_e2b, fake_async_sandbox): + fake_async_sandbox.kill = AsyncMock( + side_effect=fake_e2b.SandboxNotFoundException("gone") + ) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + await client.destroy() + + @pytest.mark.asyncio + async def test_swallows_stopped_sandbox_exception(self, fake_e2b, fake_async_sandbox): + fake_async_sandbox.kill = AsyncMock( + side_effect=fake_e2b.SandboxException("instance is STOPPED") + ) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + await client.destroy() + + @pytest.mark.asyncio + async def test_reraises_other_sandbox_exception(self, fake_e2b, fake_async_sandbox): + fake_async_sandbox.kill = AsyncMock( + side_effect=fake_e2b.SandboxException("auth failed") + ) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + with pytest.raises(fake_e2b.SandboxException, match="auth failed"): + await client.destroy() + + @pytest.mark.asyncio + async def test_handle_cleared_even_on_error(self, fake_e2b, fake_async_sandbox): + fake_async_sandbox.kill = AsyncMock( + side_effect=fake_e2b.SandboxException("auth failed") + ) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + with pytest.raises(fake_e2b.SandboxException): + await client.destroy() + # After the failure, the handle is gone (finally block). + with pytest.raises(RuntimeError, match="closed"): + _ = client.sandbox_id + + +# --------------------------------------------------------------------------- +# assert_running / set_timeout +# --------------------------------------------------------------------------- + + +class TestAssertRunning: + + @pytest.mark.asyncio + async def test_running_is_silent(self, fake_e2b, fake_async_sandbox): + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + await client.assert_running() + + @pytest.mark.asyncio + async def test_paused_raises_sandbox_exception(self, fake_e2b, fake_async_sandbox): + fake_async_sandbox.get_info.return_value = SimpleNamespace( + state=fake_e2b.SandboxState.PAUSED + ) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + with pytest.raises(fake_e2b.SandboxException, match="paused"): + await client.assert_running() + + @pytest.mark.asyncio + async def test_not_found_propagates(self, fake_e2b, fake_async_sandbox): + fake_async_sandbox.get_info.side_effect = fake_e2b.SandboxNotFoundException( + "gone" + ) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + with pytest.raises(fake_e2b.SandboxNotFoundException): + await client.assert_running() + + @pytest.mark.asyncio + async def test_called_after_close_raises(self, fake_e2b, fake_async_sandbox): + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + client.close() + with pytest.raises(RuntimeError, match="closed"): + await client.assert_running() + + +class TestSetTimeout: + + @pytest.mark.asyncio + async def test_happy_path(self, fake_async_sandbox): + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + await client.set_timeout(300) + fake_async_sandbox.set_timeout.assert_awaited_once_with(300) + + @pytest.mark.asyncio + async def test_exceptions_are_swallowed(self, fake_async_sandbox): + fake_async_sandbox.set_timeout.side_effect = RuntimeError("nope") + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + await client.set_timeout(10) # must not raise + + +# --------------------------------------------------------------------------- +# commands_run +# --------------------------------------------------------------------------- + + +def _cmd_return(stdout: str = "", stderr: str = "", exit_code: int = 0): + return SimpleNamespace(stdout=stdout, stderr=stderr, exit_code=exit_code) + + +class TestCommandsRun: + + @pytest.mark.asyncio + async def test_basic_success(self, fake_e2b, fake_async_sandbox): + fake_async_sandbox.commands.run = AsyncMock( + return_value=_cmd_return(stdout="ok", stderr="", exit_code=0) + ) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + + result = await client.commands_run("echo hi") + + assert isinstance(result, CubeCommandResult) + assert result.stdout == "ok" + assert result.stderr == "" + assert result.exit_code == 0 + fake_async_sandbox.commands.run.assert_awaited_once() + args, kwargs = fake_async_sandbox.commands.run.await_args + assert args == ("echo hi",) + assert kwargs["envs"] == {} + assert kwargs["user"] == "root" + assert kwargs["timeout"] == 30.0 # execute_timeout default + assert "cwd" not in kwargs # not provided when falsy + + @pytest.mark.asyncio + async def test_with_env_and_cwd(self, fake_async_sandbox): + fake_async_sandbox.commands.run = AsyncMock( + return_value=_cmd_return(stdout="") + ) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + await client.commands_run("cmd", env={"K": "V"}, cwd="/ws") + kwargs = fake_async_sandbox.commands.run.await_args.kwargs + assert kwargs["envs"] == {"K": "V"} + assert kwargs["cwd"] == "/ws" + + @pytest.mark.asyncio + async def test_env_none_becomes_empty_dict(self, fake_async_sandbox): + fake_async_sandbox.commands.run = AsyncMock( + return_value=_cmd_return(stdout="") + ) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + await client.commands_run("cmd", env=None) + assert fake_async_sandbox.commands.run.await_args.kwargs["envs"] == {} + + @pytest.mark.asyncio + async def test_stdin_is_heredoc_wrapped(self, fake_async_sandbox): + fake_async_sandbox.commands.run = AsyncMock(return_value=_cmd_return("")) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + await client.commands_run("python3", stdin=b"print('hi')") + + invoked_cmd = fake_async_sandbox.commands.run.await_args.args[0] + assert invoked_cmd.startswith("python3 << 'TRPC_STDIN_EOF_") + assert "print('hi')" in invoked_cmd + + @pytest.mark.asyncio + async def test_timeout_override(self, fake_async_sandbox): + fake_async_sandbox.commands.run = AsyncMock(return_value=_cmd_return("")) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + await client.commands_run("cmd", timeout=5) + assert fake_async_sandbox.commands.run.await_args.kwargs["timeout"] == 5.0 + + @pytest.mark.asyncio + async def test_command_exit_exception_absorbed(self, fake_e2b, fake_async_sandbox): + exc = fake_e2b.CommandExitException(stdout="out", stderr="err", exit_code=7) + fake_async_sandbox.commands.run = AsyncMock(side_effect=exc) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + result = await client.commands_run("bad") + assert result.exit_code == 7 + assert result.stdout == "out" + assert result.stderr == "err" + + @pytest.mark.asyncio + async def test_none_fields_coerced(self, fake_async_sandbox): + # Vendor sometimes returns None for optional fields. + fake_async_sandbox.commands.run = AsyncMock( + return_value=SimpleNamespace(stdout=None, stderr=None, exit_code=None) + ) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + result = await client.commands_run("cmd") + assert result.stdout == "" + assert result.stderr == "" + assert result.exit_code == 0 + + @pytest.mark.asyncio + async def test_set_timeout_called_after_command(self, fake_async_sandbox): + """After each command the idle timer is renewed. Regression check.""" + fake_async_sandbox.commands.run = AsyncMock(return_value=_cmd_return("")) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=77, execute_timeout=30) + await client.commands_run("cmd") + fake_async_sandbox.set_timeout.assert_awaited_once_with(77) + + @pytest.mark.asyncio + async def test_set_timeout_called_even_on_command_exit(self, fake_e2b, fake_async_sandbox): + """Idle renewal must fire even when CommandExitException absorbed. + + BUG PROBE: if the renewal lived inside a success branch it would + silently skip on failures; over time the sandbox would idle out + mid-session. It must run unconditionally. + """ + fake_async_sandbox.commands.run = AsyncMock( + side_effect=fake_e2b.CommandExitException(exit_code=1) + ) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=77, execute_timeout=30) + await client.commands_run("cmd") + fake_async_sandbox.set_timeout.assert_awaited_once_with(77) + + @pytest.mark.asyncio + async def test_duration_is_measured(self, fake_async_sandbox, monkeypatch): + clock = [1000.0] + + def fake_time(): + v = clock[0] + clock[0] += 2.5 + return v + + loop = asyncio.get_event_loop() + monkeypatch.setattr(loop, "time", fake_time) + fake_async_sandbox.commands.run = AsyncMock(return_value=_cmd_return("")) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + result = await client.commands_run("cmd") + # start=1000, end=1002.5 → duration=2.5. + assert abs(result.duration - 2.5) < 0.01 + + @pytest.mark.asyncio + async def test_closed_client_raises(self, fake_async_sandbox): + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + client.close() + with pytest.raises(RuntimeError, match="closed"): + await client.commands_run("cmd") + + +# --------------------------------------------------------------------------- +# upload_path / download_path +# --------------------------------------------------------------------------- + + +class TestUploadPath: + + @pytest.mark.asyncio + async def test_uploads_file_via_write(self, tmp_path, fake_async_sandbox): + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + local = tmp_path / "f.txt" + local.write_bytes(b"hello") + await client.upload_path(local, "/remote/f.txt") + fake_async_sandbox.files.write.assert_awaited_once_with( + "/remote/f.txt", b"hello", user="root" + ) + + @pytest.mark.asyncio + async def test_uploads_directory_via_tar(self, tmp_path, fake_async_sandbox, monkeypatch): + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + local = tmp_path / "dir" + local.mkdir() + + called = AsyncMock() + monkeypatch.setattr(_sandbox, "upload_directory_via_tar", called) + await client.upload_path(local, "/remote/dir") + called.assert_awaited_once_with(client, local, "/remote/dir") + + +class TestDownloadPath: + + @pytest.mark.asyncio + async def test_downloads_file(self, tmp_path, fake_e2b, fake_async_sandbox): + fake_async_sandbox.files.get_info.return_value = SimpleNamespace( + type=fake_e2b.FileType.FILE + ) + fake_async_sandbox.files.read = AsyncMock(return_value=b"payload") + + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + dst = tmp_path / "out.txt" + await client.download_path("/remote/f.txt", dst) + assert dst.read_bytes() == b"payload" + + @pytest.mark.asyncio + async def test_downloads_directory(self, tmp_path, fake_e2b, fake_async_sandbox, monkeypatch): + fake_async_sandbox.files.get_info.return_value = SimpleNamespace( + type=fake_e2b.FileType.DIR + ) + called = AsyncMock() + monkeypatch.setattr(_sandbox, "download_directory_via_tar", called) + + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + dst = tmp_path / "out" + await client.download_path("/remote/dir", dst) + called.assert_awaited_once_with(client, "/remote/dir", dst) + + @pytest.mark.asyncio + async def test_default_refuses_to_clobber_existing_file(self, tmp_path, fake_e2b, fake_async_sandbox): + """Default ``on_existing='error'`` raises on a pre-existing destination file.""" + fake_async_sandbox.files.get_info.return_value = SimpleNamespace( + type=fake_e2b.FileType.FILE + ) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + dst = tmp_path / "out.txt" + dst.write_text("preexisting") + with pytest.raises(FileExistsError): + await client.download_path("/r/f", dst) + + @pytest.mark.asyncio + async def test_on_existing_replace_overwrites_file(self, tmp_path, fake_e2b, fake_async_sandbox): + fake_async_sandbox.files.get_info.return_value = SimpleNamespace( + type=fake_e2b.FileType.FILE + ) + fake_async_sandbox.files.read = AsyncMock(return_value=b"new") + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + dst = tmp_path / "out.txt" + dst.write_bytes(b"old") + await client.download_path("/r/f", dst, on_existing="replace") + assert dst.read_bytes() == b"new" + + @pytest.mark.asyncio + async def test_on_existing_merge_preserves_siblings( + self, tmp_path, fake_e2b, fake_async_sandbox, monkeypatch + ): + """``on_existing="merge"`` must overlay onto an existing dir. + + Sibling entries that are not part of the downloaded payload must + survive. This is the behaviour Hermes' ``copy_out`` relies on + for repeated downloads into the same host workspace. + """ + fake_async_sandbox.files.get_info.return_value = SimpleNamespace( + type=fake_e2b.FileType.DIR + ) + # Stub the tar-transfer out; we only care that reserve_local_destination + # left the directory in place when merge mode was requested. + called = AsyncMock() + monkeypatch.setattr(_sandbox, "download_directory_via_tar", called) + + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + dst = tmp_path / "out" + dst.mkdir() + sibling = dst / "pre_existing.txt" + sibling.write_text("still here") + + await client.download_path("/remote/dir", dst, on_existing="merge") + + # Existing sibling untouched; tar downloader was invoked to overlay. + assert sibling.read_text() == "still here" + called.assert_awaited_once_with(client, "/remote/dir", dst) + + @pytest.mark.asyncio + async def test_on_existing_error_raises_on_nonempty_dir( + self, tmp_path, fake_e2b, fake_async_sandbox + ): + fake_async_sandbox.files.get_info.return_value = SimpleNamespace( + type=fake_e2b.FileType.DIR + ) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + dst = tmp_path / "out" + dst.mkdir() + (dst / "sentinel.txt").write_text("x") + with pytest.raises(FileExistsError): + await client.download_path("/remote/dir", dst, on_existing="error") + + +# --------------------------------------------------------------------------- +# read_file_bytes / write_file_bytes +# --------------------------------------------------------------------------- + + +class TestReadWriteBytes: + + @pytest.mark.asyncio + async def test_read_file_bytes_passes_user_and_format(self, fake_async_sandbox): + fake_async_sandbox.files.read = AsyncMock(return_value=b"data") + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + result = await client.read_file_bytes("/remote") + assert result == b"data" + fake_async_sandbox.files.read.assert_awaited_once_with( + "/remote", format="bytes", user="root" + ) + + @pytest.mark.asyncio + async def test_read_none_becomes_empty(self, fake_async_sandbox): + fake_async_sandbox.files.read = AsyncMock(return_value=None) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + assert await client.read_file_bytes("/r") == b"" + + @pytest.mark.asyncio + async def test_read_non_bytes_coerced(self, fake_async_sandbox): + fake_async_sandbox.files.read = AsyncMock(return_value=bytearray(b"x")) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + assert await client.read_file_bytes("/r") == b"x" + + @pytest.mark.asyncio + async def test_write_passes_user(self, fake_async_sandbox): + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + await client.write_file_bytes("/r/f", b"data") + fake_async_sandbox.files.write.assert_awaited_once_with( + "/r/f", b"data", user="root" + ) + + @pytest.mark.asyncio + async def test_closed_raises(self, fake_async_sandbox): + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + client.close() + with pytest.raises(RuntimeError, match="closed"): + await client.read_file_bytes("/r") + with pytest.raises(RuntimeError, match="closed"): + await client.write_file_bytes("/r", b"") + + +# --------------------------------------------------------------------------- +# _is_remote_dir +# --------------------------------------------------------------------------- + + +class TestIsRemoteDir: + + @pytest.mark.asyncio + async def test_dir(self, fake_e2b, fake_async_sandbox): + fake_async_sandbox.files.get_info.return_value = SimpleNamespace( + type=fake_e2b.FileType.DIR + ) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + assert await client._is_remote_dir("/r") is True + + @pytest.mark.asyncio + async def test_file(self, fake_e2b, fake_async_sandbox): + fake_async_sandbox.files.get_info.return_value = SimpleNamespace( + type=fake_e2b.FileType.FILE + ) + client = CubeSandboxClient(fake_async_sandbox, idle_timeout=60, execute_timeout=30) + assert await client._is_remote_dir("/r") is False diff --git a/tests/code_executors/cube/test_transfer.py b/tests/code_executors/cube/test_transfer.py new file mode 100644 index 0000000..308c904 --- /dev/null +++ b/tests/code_executors/cube/test_transfer.py @@ -0,0 +1,510 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Unit tests for trpc_agent_sdk.code_executors.cube._transfer.""" + +from __future__ import annotations + +import io +import os +import tarfile +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from trpc_agent_sdk.code_executors.cube import _transfer +from trpc_agent_sdk.code_executors.cube._sandbox import CubeCommandResult +from trpc_agent_sdk.code_executors.cube._transfer import ( + _run_protocol_step, + download_directory_via_tar, + reserve_local_destination, + upload_directory_via_tar, +) + + +# --------------------------------------------------------------------------- +# Fake client: the transfer functions only touch three methods. +# --------------------------------------------------------------------------- + + +def _ok(stdout: str = "", stderr: str = "") -> CubeCommandResult: + return CubeCommandResult(stdout=stdout, stderr=stderr, exit_code=0, duration=0.0) + + +def _err(stderr: str = "boom", exit_code: int = 1) -> CubeCommandResult: + return CubeCommandResult(stdout="", stderr=stderr, exit_code=exit_code, duration=0.0) + + +@pytest.fixture +def fake_client(): + c = MagicMock() + c.commands_run = AsyncMock(return_value=_ok()) + c.read_file_bytes = AsyncMock(return_value=b"") + c.write_file_bytes = AsyncMock(return_value=None) + return c + + +# --------------------------------------------------------------------------- +# reserve_local_destination +# --------------------------------------------------------------------------- + + +class TestReserveLocalDestination: + """Exercise the collision policy (error/replace/merge).""" + + def test_default_is_error(self, tmp_path): + target = tmp_path / "occupied" + target.mkdir() + (target / "sentinel.txt").write_text("x") + # No flag → default is "error". + with pytest.raises(FileExistsError): + reserve_local_destination(target) + + def test_missing_destination_is_silent(self, tmp_path): + target = tmp_path / "missing" + reserve_local_destination(target, on_existing="error") + assert not target.exists() + + def test_empty_directory_is_silent(self, tmp_path): + target = tmp_path / "empty" + target.mkdir() + reserve_local_destination(target, on_existing="error") + assert target.exists() and target.is_dir() + + def test_nonempty_dir_error_raises(self, tmp_path): + target = tmp_path / "occupied" + target.mkdir() + (target / "sentinel.txt").write_text("x") + with pytest.raises(FileExistsError, match="on_existing="): + reserve_local_destination(target, on_existing="error") + assert (target / "sentinel.txt").exists() + + def test_nonempty_dir_replace_removes(self, tmp_path): + target = tmp_path / "occupied" + target.mkdir() + (target / "sentinel.txt").write_text("x") + reserve_local_destination(target, on_existing="replace") + assert not target.exists() + + def test_nonempty_dir_merge_keeps_siblings(self, tmp_path): + """Merge mode leaves an existing directory intact. + + The tar extract that follows overlays its own entries on top; + siblings not present in the payload survive. This is the + behaviour Hermes' ``copy_out`` relies on. + """ + target = tmp_path / "occupied" + target.mkdir() + keep = target / "keep.txt" + keep.write_text("stays") + (target / "subdir").mkdir() + (target / "subdir" / "nested.txt").write_text("also stays") + + reserve_local_destination(target, on_existing="merge") + + assert target.is_dir() + assert keep.read_text() == "stays" + assert (target / "subdir" / "nested.txt").read_text() == "also stays" + + def test_file_error_raises(self, tmp_path): + target = tmp_path / "f.txt" + target.write_text("x") + with pytest.raises(FileExistsError): + reserve_local_destination(target, on_existing="error") + assert target.exists() + + def test_file_replace_unlinks(self, tmp_path): + target = tmp_path / "f.txt" + target.write_text("x") + reserve_local_destination(target, on_existing="replace") + assert not target.exists() + + def test_file_merge_falls_back_to_unlink(self, tmp_path): + """Cannot merge a remote payload into a regular file — falls back to replace.""" + target = tmp_path / "f.txt" + target.write_text("old") + reserve_local_destination(target, on_existing="merge") + assert not target.exists() + + def test_symlink_to_dir_is_unlinked_not_rmtree(self, tmp_path): + """BUG PROBE: symlinks must go through ``unlink``, not ``rmtree``. + + If the implementation used ``shutil.rmtree`` on a symlink it + would follow the link and delete the real directory — a + well-known bug. Reserve must detect the symlink first. + """ + real = tmp_path / "real" + real.mkdir() + (real / "keep.txt").write_text("keep") + link = tmp_path / "link" + link.symlink_to(real, target_is_directory=True) + + reserve_local_destination(link, on_existing="replace") + + # Link is gone but the real directory it pointed to is intact. + assert not link.exists() + assert not link.is_symlink() + assert real.exists() + assert (real / "keep.txt").read_text() == "keep" + + def test_symlink_to_dir_merge_is_also_unlinked(self, tmp_path): + """Merge on a symlink-to-dir must unlink the link, not follow it. + + Same safety invariant as ``replace`` — if we ever ``rmtree``'d + through a symlink we'd blow away the real target. + """ + real = tmp_path / "real" + real.mkdir() + (real / "keep.txt").write_text("keep") + link = tmp_path / "link" + link.symlink_to(real, target_is_directory=True) + + reserve_local_destination(link, on_existing="merge") + + assert not link.is_symlink() + assert real.exists() + assert (real / "keep.txt").read_text() == "keep" + + def test_symlink_to_file_is_unlinked(self, tmp_path): + real = tmp_path / "real.txt" + real.write_text("keep") + link = tmp_path / "link.txt" + link.symlink_to(real) + + reserve_local_destination(link, on_existing="replace") + assert not link.is_symlink() + assert real.read_text() == "keep" + + def test_broken_symlink_error_raises(self, tmp_path): + """Broken symlink: exists()=False, is_symlink()=True. + + Must be treated as "already occupied" because the name is taken. + """ + link = tmp_path / "broken" + link.symlink_to(tmp_path / "does_not_exist") + assert not link.exists() + assert link.is_symlink() + + with pytest.raises(FileExistsError): + reserve_local_destination(link, on_existing="error") + + def test_broken_symlink_replace_is_unlinked(self, tmp_path): + link = tmp_path / "broken" + link.symlink_to(tmp_path / "does_not_exist") + reserve_local_destination(link, on_existing="replace") + assert not link.is_symlink() + + def test_missing_target_is_silent_in_all_modes(self, tmp_path): + for mode in ("error", "replace", "merge"): + target = tmp_path / f"missing_{mode}" + reserve_local_destination(target, on_existing=mode) + assert not target.exists(), mode + + def test_empty_dir_is_silent_in_all_modes(self, tmp_path): + for mode in ("error", "replace", "merge"): + target = tmp_path / f"empty_{mode}" + target.mkdir() + reserve_local_destination(target, on_existing=mode) + assert target.is_dir(), mode + + +# --------------------------------------------------------------------------- +# upload_directory_via_tar +# --------------------------------------------------------------------------- + + +class TestUploadDirectoryViaTar: + + @pytest.mark.asyncio + async def test_uploads_tar_and_extracts(self, tmp_path, fake_client, monkeypatch): + src = tmp_path / "src" + src.mkdir() + (src / "a.txt").write_text("hello") + + monkeypatch.setattr(_transfer.secrets, "token_hex", lambda n: "DEADBEEF") + + await upload_directory_via_tar(fake_client, src, "/remote/dst") + + # write_file_bytes call (upload the tar). + fake_client.write_file_bytes.assert_awaited_once() + (temp_remote, payload), _ = fake_client.write_file_bytes.await_args + assert temp_remote == "/tmp/.cube_upload_DEADBEEF.tar" + assert isinstance(payload, bytes) + # Payload is a valid tar containing a.txt. + with tarfile.open(fileobj=io.BytesIO(payload), mode="r") as tar: + names = {m.name.lstrip("./") for m in tar.getmembers() if m.name.lstrip("./")} + assert "a.txt" in names + + # Two commands: extract, then cleanup. + assert fake_client.commands_run.await_count == 2 + extract_cmd = fake_client.commands_run.await_args_list[0].args[0] + assert "mkdir -p '/remote/dst'" in extract_cmd + assert "tar -xf '/tmp/.cube_upload_DEADBEEF.tar' -C '/remote/dst'" in extract_cmd + assert "set -e" in extract_cmd + + cleanup_cmd = fake_client.commands_run.await_args_list[1].args[0] + assert cleanup_cmd == "rm -f '/tmp/.cube_upload_DEADBEEF.tar'" + + @pytest.mark.asyncio + async def test_cleanup_runs_even_when_extract_fails(self, tmp_path, fake_client): + src = tmp_path / "src" + src.mkdir() + + # Extract returns non-zero; cleanup must still run. + fake_client.commands_run.side_effect = [_err("extract failed"), _ok()] + with pytest.raises(RuntimeError, match="upload tar extract"): + await upload_directory_via_tar(fake_client, src, "/remote/dst") + assert fake_client.commands_run.await_count == 2 # cleanup still fired + + @pytest.mark.asyncio + async def test_cleanup_nonzero_is_swallowed(self, tmp_path, fake_client): + src = tmp_path / "src" + src.mkdir() + # Extract OK, cleanup returns non-zero → no exception. + fake_client.commands_run.side_effect = [_ok(), _err("rm failed")] + await upload_directory_via_tar(fake_client, src, "/remote/dst") + # No exception raised. + + @pytest.mark.asyncio + async def test_remote_path_is_normalized(self, tmp_path, fake_client, monkeypatch): + src = tmp_path / "src" + src.mkdir() + monkeypatch.setattr(_transfer.secrets, "token_hex", lambda n: "XXXX") + await upload_directory_via_tar(fake_client, src, "/remote/./dst/../dst") + extract_cmd = fake_client.commands_run.await_args_list[0].args[0] + # ``posixpath.normpath`` collapses to ``/remote/dst``. + assert "'/remote/dst'" in extract_cmd + + +# --------------------------------------------------------------------------- +# download_directory_via_tar +# --------------------------------------------------------------------------- + + +def _build_tar_payload(file_map: dict[str, bytes]) -> bytes: + """Build an in-memory tar containing ``file_map`` entries.""" + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w") as tar: + for name, data in file_map.items(): + info = tarfile.TarInfo(name=name) + info.size = len(data) + tar.addfile(info, io.BytesIO(data)) + return buf.getvalue() + + +class TestDownloadDirectoryViaTar: + + @pytest.mark.asyncio + async def test_creates_local_dir_and_extracts(self, tmp_path, fake_client, monkeypatch): + payload = _build_tar_payload({"a.txt": b"hello", "sub/b.txt": b"world"}) + fake_client.read_file_bytes.return_value = payload + monkeypatch.setattr(_transfer.secrets, "token_hex", lambda n: "TOKEN") + + dst = tmp_path / "out" + await download_directory_via_tar(fake_client, "/remote/dir", dst) + + assert (dst / "a.txt").read_bytes() == b"hello" + assert (dst / "sub" / "b.txt").read_bytes() == b"world" + # Commands: create tar, cleanup tar. + assert fake_client.commands_run.await_count == 2 + create_cmd = fake_client.commands_run.await_args_list[0].args[0] + assert create_cmd == "tar -cf '/tmp/.cube_download_TOKEN.tar' -C '/remote/dir' ." + cleanup_cmd = fake_client.commands_run.await_args_list[1].args[0] + assert cleanup_cmd == "rm -f '/tmp/.cube_download_TOKEN.tar'" + + @pytest.mark.asyncio + async def test_existing_file_at_dst_is_unlinked(self, tmp_path, fake_client): + """When ``local`` is a file (not dir), it's unlinked and recreated.""" + fake_client.read_file_bytes.return_value = _build_tar_payload({"a.txt": b"x"}) + dst = tmp_path / "target" + dst.write_text("previous") # exists as file + + await download_directory_via_tar(fake_client, "/r", dst) + assert dst.is_dir() + assert (dst / "a.txt").read_bytes() == b"x" + + @pytest.mark.asyncio + async def test_cleanup_runs_when_read_fails(self, tmp_path, fake_client): + fake_client.read_file_bytes.side_effect = RuntimeError("read fail") + dst = tmp_path / "out" + with pytest.raises(RuntimeError, match="read fail"): + await download_directory_via_tar(fake_client, "/r", dst) + # Cleanup still ran (only the "rm -f ..." command, which is the + # second call that gets through the finally block). + assert fake_client.commands_run.await_count == 2 + + @pytest.mark.asyncio + async def test_cleanup_when_tar_create_fails(self, tmp_path, fake_client): + fake_client.commands_run.side_effect = [_err("tar failed"), _ok()] + dst = tmp_path / "out" + with pytest.raises(RuntimeError, match="download tar create"): + await download_directory_via_tar(fake_client, "/r", dst) + + @pytest.mark.asyncio + async def test_py312_extractall_fallback(self, tmp_path, fake_client, monkeypatch): + """py<3.12 does not accept ``filter=`` — must fall back to no filter. + + Since this test is running on Python 3.12+, we simulate the + older behavior by patching ``TarFile.extractall`` to raise + ``TypeError`` when ``filter`` is passed. + """ + payload = _build_tar_payload({"a.txt": b"ok"}) + fake_client.read_file_bytes.return_value = payload + + real_extractall = tarfile.TarFile.extractall + calls: list[dict] = [] + + def patched(self, path=None, members=None, **kwargs): + calls.append(kwargs) + if "filter" in kwargs: + raise TypeError("filter not supported") + # Retry without filter — mimic real behavior. + return real_extractall(self, path=path, members=members) + + monkeypatch.setattr(tarfile.TarFile, "extractall", patched) + + dst = tmp_path / "out" + await download_directory_via_tar(fake_client, "/r", dst) + # Two calls: one with filter (rejected), one without (succeeded). + assert len(calls) == 2 + assert "filter" in calls[0] + assert "filter" not in calls[1] + assert (dst / "a.txt").read_bytes() == b"ok" + + +# --------------------------------------------------------------------------- +# _run_protocol_step +# --------------------------------------------------------------------------- + + +class TestRunProtocolStep: + + @pytest.mark.asyncio + async def test_ok_returns_silent(self, fake_client): + fake_client.commands_run.return_value = _ok() + await _run_protocol_step(fake_client, "true", op="noop") + + @pytest.mark.asyncio + async def test_error_raises_with_details(self, fake_client): + fake_client.commands_run.return_value = _err("nope", exit_code=17) + with pytest.raises(RuntimeError) as exc: + await _run_protocol_step(fake_client, "false", op="tar step") + msg = str(exc.value) + assert "tar step" in msg + assert "exit=17" in msg + assert "nope" in msg + + @pytest.mark.asyncio + async def test_error_with_swallow_is_silent(self, fake_client): + fake_client.commands_run.return_value = _err("nope") + await _run_protocol_step(fake_client, "false", op="cleanup", swallow=True) + + +# --------------------------------------------------------------------------- +# Integration-style roundtrip (end-to-end tar behaviour). +# +# Drive the full ``upload → download`` pair against an in-memory "remote" +# that simulates tar/mkdir/rm using the real host filesystem, so we +# verify symlink + permission preservation without touching e2b or a +# real sandbox. +# --------------------------------------------------------------------------- + + +class _InMemoryRemote: + """Simulates the remote sandbox's filesystem + shell-step contract. + + Supports just enough of ``tar``/``mkdir -p``/``rm -f`` to drive the + upload/download protocol. + """ + + def __init__(self, root: Path): + self.root = root + self.reads: list[str] = [] + self.writes: list[str] = [] + self.files: dict[str, bytes] = {} # simulated remote tmp files + + async def commands_run(self, cmd: str, **kwargs) -> CubeCommandResult: + """Execute a locally-emulated version of the shell step.""" + import shlex + import subprocess + # Translate remote paths to host paths under self.root. + # We allow-list the three shapes the protocol emits. + # The actual implementation uses `set -e; mkdir -p ...; tar -xf ... -C ...` + # We just run it verbatim since the host has tar/mkdir/rm. + # Strip any leading "/" in remote paths so they resolve under root. + host_cmd = cmd.replace("'/tmp/", f"'{self.root}/tmp/") + host_cmd = host_cmd.replace("-C '/remote", f"-C '{self.root}/remote") + host_cmd = host_cmd.replace("mkdir -p '/remote", f"mkdir -p '{self.root}/remote") + proc = subprocess.run( + ["bash", "-c", host_cmd], + capture_output=True, + ) + return CubeCommandResult( + stdout=proc.stdout.decode(), + stderr=proc.stderr.decode(), + exit_code=proc.returncode, + duration=0.0, + ) + + async def read_file_bytes(self, path: str) -> bytes: + host = Path(str(self.root) + path) + return host.read_bytes() + + async def write_file_bytes(self, path: str, data: bytes) -> None: + host = Path(str(self.root) + path) + host.parent.mkdir(parents=True, exist_ok=True) + host.write_bytes(data) + + +@pytest.mark.asyncio +async def test_roundtrip_preserves_symlink(tmp_path): + """Upload a tree containing a symlink; download it back; symlink survives.""" + (tmp_path / "tmp").mkdir() + (tmp_path / "remote").mkdir() + + src = tmp_path / "src" + src.mkdir() + (src / "real.txt").write_text("real-content") + (src / "link.txt").symlink_to("real.txt") + + client = _InMemoryRemote(tmp_path) + + await upload_directory_via_tar(client, src, "/remote/uploaded") + + # Now download back to a fresh local dir. + dst = tmp_path / "downloaded" + await download_directory_via_tar(client, "/remote/uploaded", dst) + + assert (dst / "real.txt").read_text() == "real-content" + # Symlink survives (filter="data" still preserves relative symlinks). + assert (dst / "link.txt").is_symlink() or (dst / "link.txt").exists() + if (dst / "link.txt").is_symlink(): + assert os.readlink(dst / "link.txt") == "real.txt" + + +@pytest.mark.asyncio +async def test_roundtrip_preserves_executable_bit(tmp_path): + """File permissions survive upload → download.""" + (tmp_path / "tmp").mkdir() + (tmp_path / "remote").mkdir() + + src = tmp_path / "src" + src.mkdir() + script = src / "run.sh" + script.write_text("#!/bin/sh\necho hi\n") + script.chmod(0o755) + + client = _InMemoryRemote(tmp_path) + await upload_directory_via_tar(client, src, "/remote/uploaded") + + dst = tmp_path / "downloaded" + await download_directory_via_tar(client, "/remote/uploaded", dst) + + downloaded = dst / "run.sh" + assert downloaded.exists() + # Mode bits are preserved end-to-end. + mode = downloaded.stat().st_mode & 0o777 + assert mode & 0o100, f"executable bit lost: mode={oct(mode)}" diff --git a/tests/code_executors/cube/test_types.py b/tests/code_executors/cube/test_types.py new file mode 100644 index 0000000..87a1013 --- /dev/null +++ b/tests/code_executors/cube/test_types.py @@ -0,0 +1,145 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Unit tests for trpc_agent_sdk.code_executors.cube._types.""" + +from __future__ import annotations + +import pytest + +from trpc_agent_sdk.code_executors.cube._types import ( + DEFAULT_EXECUTE_TIMEOUT, + DEFAULT_IDLE_TIMEOUT, + DEFAULT_REMOTE_WORKSPACE, + ENV_API_KEY, + ENV_API_URL, + ENV_TEMPLATE, + CubeCodeExecutorConfig, + CubeWorkspaceRuntimeConfig, +) + + +class TestCubeCodeExecutorConfigDefaults: + + def test_defaults(self): + cfg = CubeCodeExecutorConfig() + assert cfg.template is None + assert cfg.api_url is None + assert cfg.api_key is None + assert cfg.sandbox_id is None + assert cfg.execute_timeout == DEFAULT_EXECUTE_TIMEOUT == 60.0 + assert cfg.idle_timeout == DEFAULT_IDLE_TIMEOUT == 3600 + assert isinstance(cfg.idle_timeout, int) + + def test_env_var_names(self): + # The implementation uses these names; if they change, hermes and + # every downstream deployment doc changes with them. Pin them. + assert ENV_API_URL == "E2B_API_URL" + assert ENV_API_KEY == "E2B_API_KEY" + assert ENV_TEMPLATE == "CUBE_TEMPLATE_ID" + + +class TestCubeCodeExecutorConfigValidation: + + def test_rejects_float_idle_timeout(self): + with pytest.raises(TypeError, match="idle_timeout must be an int"): + CubeCodeExecutorConfig(idle_timeout=0.9) # type: ignore[arg-type] + + def test_rejects_bool_idle_timeout(self): + # bool is a subclass of int in Python; explicitly reject it so + # ``idle_timeout=True`` doesn't silently become ``1`` second. + with pytest.raises(TypeError, match="idle_timeout must be an int"): + CubeCodeExecutorConfig(idle_timeout=True) # type: ignore[arg-type] + + def test_rejects_zero_idle_timeout(self): + with pytest.raises(ValueError, match="idle_timeout must be >= 1"): + CubeCodeExecutorConfig(idle_timeout=0) + + def test_rejects_negative_idle_timeout(self): + with pytest.raises(ValueError, match="idle_timeout must be >= 1"): + CubeCodeExecutorConfig(idle_timeout=-5) + + def test_rejects_non_positive_execute_timeout(self): + with pytest.raises(ValueError, match="execute_timeout must be > 0"): + CubeCodeExecutorConfig(execute_timeout=0) + with pytest.raises(ValueError, match="execute_timeout must be > 0"): + CubeCodeExecutorConfig(execute_timeout=-1.0) + + def test_accepts_minimum_idle_timeout(self): + cfg = CubeCodeExecutorConfig(idle_timeout=1) + assert cfg.idle_timeout == 1 + + def test_accepts_subsecond_execute_timeout(self): + cfg = CubeCodeExecutorConfig(execute_timeout=0.25) + assert cfg.execute_timeout == 0.25 + + +class TestResolveTemplate: + + def test_uses_field_when_set(self, monkeypatch): + monkeypatch.setenv(ENV_TEMPLATE, "env-template") + cfg = CubeCodeExecutorConfig(template="explicit") + assert cfg.resolve_template() == "explicit" + + def test_falls_back_to_env(self, monkeypatch): + monkeypatch.delenv(ENV_TEMPLATE, raising=False) + monkeypatch.setenv(ENV_TEMPLATE, "env-template") + cfg = CubeCodeExecutorConfig() + assert cfg.resolve_template() == "env-template" + + def test_missing_both_raises(self, monkeypatch): + monkeypatch.delenv(ENV_TEMPLATE, raising=False) + cfg = CubeCodeExecutorConfig() + with pytest.raises(ValueError, match=ENV_TEMPLATE): + cfg.resolve_template() + + +class TestResolveApiUrl: + + def test_uses_field_when_set(self, monkeypatch): + monkeypatch.setenv(ENV_API_URL, "https://env") + cfg = CubeCodeExecutorConfig(api_url="https://explicit") + assert cfg.resolve_api_url() == "https://explicit" + + def test_falls_back_to_env(self, monkeypatch): + monkeypatch.setenv(ENV_API_URL, "https://env") + cfg = CubeCodeExecutorConfig() + assert cfg.resolve_api_url() == "https://env" + + def test_missing_both_raises(self, monkeypatch): + monkeypatch.delenv(ENV_API_URL, raising=False) + cfg = CubeCodeExecutorConfig() + with pytest.raises(ValueError, match=ENV_API_URL): + cfg.resolve_api_url() + + +class TestResolveApiKey: + + def test_uses_field_when_set(self, monkeypatch): + monkeypatch.setenv(ENV_API_KEY, "env-key") + cfg = CubeCodeExecutorConfig(api_key="explicit-key") + assert cfg.resolve_api_key() == "explicit-key" + + def test_falls_back_to_env(self, monkeypatch): + monkeypatch.setenv(ENV_API_KEY, "env-key") + cfg = CubeCodeExecutorConfig() + assert cfg.resolve_api_key() == "env-key" + + def test_missing_both_raises(self, monkeypatch): + monkeypatch.delenv(ENV_API_KEY, raising=False) + cfg = CubeCodeExecutorConfig() + with pytest.raises(ValueError, match=ENV_API_KEY): + cfg.resolve_api_key() + + +class TestCubeWorkspaceRuntimeConfig: + + def test_default_remote_workspace(self): + cfg = CubeWorkspaceRuntimeConfig() + assert cfg.remote_workspace == DEFAULT_REMOTE_WORKSPACE == "/workspace/cube_agent" + + def test_custom_remote_workspace_preserved(self): + cfg = CubeWorkspaceRuntimeConfig(remote_workspace="/ws/custom") + assert cfg.remote_workspace == "/ws/custom" diff --git a/tests/code_executors/local/test_local_ws_runtime.py b/tests/code_executors/local/test_local_ws_runtime.py index 80e860b..a7eeb39 100644 --- a/tests/code_executors/local/test_local_ws_runtime.py +++ b/tests/code_executors/local/test_local_ws_runtime.py @@ -322,50 +322,145 @@ async def test_collect_returns_code_file(self): assert isinstance(files[0], CodeFile) assert files[0].mime_type - # --- _read_limited --- - def test_read_limited_small_file(self): + @pytest.mark.asyncio + async def test_collect_symlinked_workspace_returns_relative_names(self, tmp_path): + """Regression: ``ws.path`` containing a symlink must still yield + workspace-relative ``CodeFile.name`` values, not absolute + canonical paths. + + Repro setup mirrors the common real-world cases (macOS ``/tmp`` + → ``/private/tmp``, Linux scratch bind-mounts, ``$HOME`` on NFS + with a symlinked home): the workspace root is a symlink to the + actual storage. The pre-refactor ``collect`` canonicalised both + the root and each match before computing ``name``, so this case + produced ``a.txt``. After the shared-helper refactor a missed + canonicalisation made the helper's prefix-strip silently fall + back to returning the absolute resolved path. + """ + real = tmp_path / "real_ws" + real.mkdir() + (real / "a.txt").write_text("hello") + link = tmp_path / "link_ws" + link.symlink_to(real, target_is_directory=True) + + ws = WorkspaceInfo(id="symlink_test", path=str(link)) + files = await self.fs.collect(ws, ["*.txt"]) + + assert len(files) == 1 + assert files[0].name == "a.txt", ( + f"symlinked workspace leaked absolute path as CodeFile.name: " + f"{files[0].name!r}" + ) + assert files[0].content == "hello" + + @pytest.mark.asyncio + async def test_collect_outputs_symlinked_workspace_returns_relative_names(self, tmp_path): + """Same regression guard for ``collect_outputs``: ``ManifestFileRef.name`` + must stay workspace-relative when ``ws.path`` resolves through a symlink. + """ + real = tmp_path / "real_ws_out" + real.mkdir() + ensure_layout(str(real)) + out_dir = real / DIR_OUT + (out_dir / "result.txt").write_text("result data") + + link = tmp_path / "link_ws_out" + link.symlink_to(real, target_is_directory=True) + + ws = WorkspaceInfo(id="symlink_outputs", path=str(link)) + spec = WorkspaceOutputSpec(globs=["out/*.txt"]) + manifest = await self.fs.collect_outputs(ws, spec) + + assert len(manifest.files) == 1 + assert manifest.files[0].name == "out/result.txt", ( + f"symlinked workspace leaked absolute path as ManifestFileRef.name: " + f"{manifest.files[0].name!r}" + ) + + # --- _fetch_bytes (local fetcher for shared collection helpers) --- + @pytest.mark.asyncio + async def test_fetch_bytes_small_file(self): f = Path(self.tmpdir) / "small.txt" f.write_text("hello") - content, mime = self.fs._read_limited(f) - assert content == "hello" - assert mime + data, raw = await self.fs._fetch_bytes(str(f), 1024) + assert data == b"hello" + assert raw == 5 - def test_read_limited_nonexistent(self): + @pytest.mark.asyncio + async def test_fetch_bytes_nonexistent(self): + """``_fetch_bytes`` raises on read failure so the shared + collection helpers can apply their ``application/octet-stream`` + sentinel — matching the pre-refactor ``_read_limited`` contract. + Swallowing the error here would pass ``b""`` into the happy-path + MIME sniffer and mis-label an unreadable ``foo.json`` as + ``application/json``. + """ f = Path(self.tmpdir) / "missing.txt" - content, mime = self.fs._read_limited(f) - assert content == "" - assert mime == "application/octet-stream" + with pytest.raises(FileNotFoundError): + await self.fs._fetch_bytes(str(f), 1024) - # --- _read_limited_with_cap --- - def test_read_limited_with_cap_zero(self): - f = Path(self.tmpdir) / "cap_zero.txt" - f.write_text("data") - content, mime = self.fs._read_limited_with_cap(f, 0) - assert content == "" + @pytest.mark.asyncio + async def test_collect_skips_unreadable_file_with_octet_stream(self, tmp_path): + """Regression: ``collect`` must stay best-effort across unreadable + files and label them ``application/octet-stream`` (the canonical + "unknown / unreadable" sentinel), not whatever the filename + extension would imply. This is the contract the pre-refactor + ``_read_limited`` enforced inline. + """ + ws_path = tmp_path / "ws" + ws_path.mkdir() + (ws_path / "good.txt").write_text("ok") + bad = ws_path / "bad.json" + bad.write_text("{}") + + ws = WorkspaceInfo(id="unreadable_test", path=str(ws_path)) + + async def failing_fetcher(full_path, max_bytes): + if full_path.endswith("bad.json"): + raise OSError("simulated read failure") + return bad.read_bytes() if False else (Path(full_path).read_bytes()[:max_bytes], Path(full_path).stat().st_size) + + # Patch the fetcher on the instance so the enumeration path still + # runs; this exercises the helper's except branch end-to-end. + original = self.fs._fetch_bytes + self.fs._fetch_bytes = failing_fetcher # type: ignore[assignment] + try: + files = await self.fs.collect(ws, ["*"]) + finally: + self.fs._fetch_bytes = original # type: ignore[assignment] + + by_name = {f.name: f for f in files} + assert "good.txt" in by_name + assert "bad.json" in by_name + assert by_name["bad.json"].content == "" + assert by_name["bad.json"].mime_type == "application/octet-stream", ( + "unreadable file must fall back to octet-stream, not extension-guessed MIME" + ) - def test_read_limited_with_cap_negative(self): - f = Path(self.tmpdir) / "cap_neg.txt" + @pytest.mark.asyncio + async def test_fetch_bytes_zero_budget(self): + f = Path(self.tmpdir) / "cap_zero.txt" f.write_text("data") - content, mime = self.fs._read_limited_with_cap(f, -1) - assert content == "" + data, raw = await self.fs._fetch_bytes(str(f), 0) + assert data == b"" + # raw_size reflects on-disk size, not the read budget. + assert raw == 4 - def test_read_limited_with_cap_small(self): + @pytest.mark.asyncio + async def test_fetch_bytes_truncates_to_budget(self): f = Path(self.tmpdir) / "cap_small.txt" f.write_text("hello world") - content, mime = self.fs._read_limited_with_cap(f, 5) - assert len(content) <= 5 + data, raw = await self.fs._fetch_bytes(str(f), 5) + assert len(data) == 5 + assert raw == 11 - def test_read_limited_with_cap_large(self): + @pytest.mark.asyncio + async def test_fetch_bytes_budget_above_size(self): f = Path(self.tmpdir) / "cap_large.txt" f.write_text("hello world") - content, mime = self.fs._read_limited_with_cap(f, MAX_READ_SIZE_BYTES + 1000) - assert content == "hello world" - - def test_read_limited_with_cap_nonexistent(self): - f = Path(self.tmpdir) / "missing_cap.txt" - content, mime = self.fs._read_limited_with_cap(f, 100) - assert content == "" - assert mime == "application/octet-stream" + data, raw = await self.fs._fetch_bytes(str(f), MAX_READ_SIZE_BYTES + 1000) + assert data == b"hello world" + assert raw == 11 # --- _copy_directory --- def test_copy_directory(self): @@ -620,7 +715,7 @@ async def test_collect_outputs_no_matches(self): result = await self.fs.collect_outputs(self.ws, spec) assert len(result.files) == 0 - @patch('trpc_agent_sdk.code_executors.local._local_ws_runtime.save_artifact_helper', new_callable=AsyncMock) + @patch('trpc_agent_sdk.code_executors.utils._collect.save_artifact_helper', new_callable=AsyncMock) @pytest.mark.asyncio async def test_collect_outputs_save_with_ctx(self, mock_save): mock_save.return_value = 1 @@ -646,7 +741,7 @@ async def test_collect_outputs_save_without_ctx_raises(self): with pytest.raises(ValueError, match="Context is required"): await self.fs.collect_outputs(self.ws, spec) - @patch('trpc_agent_sdk.code_executors.local._local_ws_runtime.save_artifact_helper', new_callable=AsyncMock) + @patch('trpc_agent_sdk.code_executors.utils._collect.save_artifact_helper', new_callable=AsyncMock) @pytest.mark.asyncio async def test_collect_outputs_save_with_name_template(self, mock_save): mock_save.return_value = 1 diff --git a/tests/code_executors/utils/test_collect.py b/tests/code_executors/utils/test_collect.py new file mode 100644 index 0000000..b81c89b --- /dev/null +++ b/tests/code_executors/utils/test_collect.py @@ -0,0 +1,393 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Unit tests for trpc_agent_sdk.code_executors.utils._collect. + +These tests pin down the shared "matches -> models" pipeline used by every +workspace backend (local / container / cube). They focus on edge paths the +backend-specific tests don't otherwise exercise: + +- ``_relativize`` fallback when an absolute match does not live under the + workspace root. +- ``build_code_files`` happy-path / dedupe / fetcher-failure / truncation + flagging. +- ``build_manifest_output`` limit handling (``max_files`` / ``max_total_bytes`` + / per-file truncation), inline + save branches, fetcher failures, and the + ``strict_truncated_save`` guard. +""" + +from __future__ import annotations + +from typing import Tuple + +import pytest + +from trpc_agent_sdk.code_executors._types import WorkspaceOutputSpec +from trpc_agent_sdk.code_executors.utils import _collect + + +def _make_fetcher(payloads): + """Build a fetcher that yields ``payloads[path]`` honouring ``max_bytes``. + + ``payloads`` is a ``{path: bytes}`` map. Returns ``(slice, raw_size)`` + so the helpers can compute truncation flags. + """ + + async def _fetch(path: str, max_bytes: int) -> Tuple[bytes, int]: + data = payloads[path] + return data[:max_bytes], len(data) + + return _fetch + + +# --------------------------------------------------------------------------- +# _relativize +# --------------------------------------------------------------------------- + + +class TestRelativize: + + def test_strips_workspace_prefix(self): + assert _collect._relativize("/ws", "/ws/sub/file.txt") == "sub/file.txt" + + def test_handles_trailing_slash_on_ws(self): + # The helper appends ``"/"`` only when ws_path doesn't already end in + # one, so a trailing slash on the input must not produce ``"//"``. + assert _collect._relativize("/ws/", "/ws/file") == "file" + + def test_returns_full_path_when_outside_workspace(self): + # Covers _collect.py:75 — fallback when a match somehow escapes the + # workspace root (e.g. a symlink resolution surfaced an absolute + # path on a different mount). The full path is preserved verbatim + # rather than silently mangled. + full = "/elsewhere/file.txt" + assert _collect._relativize("/ws", full) == full + + +# --------------------------------------------------------------------------- +# build_code_files +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +class TestBuildCodeFiles: + + async def test_basic_collection(self): + payloads = { + "/ws/a.txt": b"alpha", + "/ws/sub/b.bin": b"\x00\x01beta", + } + files = await _collect.build_code_files( + "/ws", + ["/ws/a.txt", "/ws/sub/b.bin"], + _make_fetcher(payloads), + ) + names = sorted(f.name for f in files) + assert names == ["a.txt", "sub/b.bin"] + # Sizes / truncation flags must be populated from the fetcher's + # raw_size, not just len(data). + for f in files: + assert f.truncated is False + assert f.size_bytes == len(payloads[f"/ws/{f.name}"]) + + async def test_deduplicates_by_relative_name(self): + # Two glob patterns can yield the same absolute path. The helper + # must surface only the first hit, not double-count it. + payloads = {"/ws/a.txt": b"x"} + fetcher = _make_fetcher(payloads) + files = await _collect.build_code_files( + "/ws", + ["/ws/a.txt", "/ws/a.txt"], + fetcher, + ) + assert [f.name for f in files] == ["a.txt"] + + async def test_fetcher_failure_emits_sentinel(self): + # collect() is best-effort: a single failing read must not abort + # the whole batch. We expect an empty-content sentinel with the + # canonical octet-stream MIME. + payloads = {"/ws/ok.txt": b"hi"} + + async def fetcher(path, max_bytes): + if path == "/ws/bad.txt": + raise OSError("denied") + data = payloads[path] + return data[:max_bytes], len(data) + + files = await _collect.build_code_files( + "/ws", + ["/ws/bad.txt", "/ws/ok.txt"], + fetcher, + ) + assert len(files) == 2 + bad = next(f for f in files if f.name == "bad.txt") + assert bad.content == "" + assert bad.mime_type == "application/octet-stream" + + async def test_truncation_flag_set_when_raw_exceeds_data(self): + # Fetcher reports a raw_size larger than the slice → the helper + # must mark ``truncated=True``. + async def fetcher(path, max_bytes): + return b"hi", 1024 + + files = await _collect.build_code_files( + "/ws", + ["/ws/big.bin"], + fetcher, + max_read_size=2, + ) + assert len(files) == 1 + assert files[0].truncated is True + assert files[0].size_bytes == 1024 + + async def test_default_cap_uses_module_constant(self, monkeypatch): + # When ``max_read_size`` is None the helper resolves + # ``MAX_READ_SIZE_BYTES`` *at call time* so tests can patch the + # constant. Verify the budget actually flows into the fetcher. + seen_caps: list[int] = [] + + async def fetcher(path, max_bytes): + seen_caps.append(max_bytes) + return b"", 0 + + monkeypatch.setattr(_collect, "MAX_READ_SIZE_BYTES", 7) + await _collect.build_code_files("/ws", ["/ws/a"], fetcher) + assert seen_caps == [7] + + +# --------------------------------------------------------------------------- +# build_manifest_output +# --------------------------------------------------------------------------- + + +class _FakeArtifactCtx: + """Minimal :class:`InvocationContext` stand-in that records save calls. + + ``save_artifact_helper`` only needs ``ctx.save_artifact(name, part)`` — + we don't have to mock the whole context surface. + """ + + def __init__(self): + self.saved: list[Tuple[str, bytes, str]] = [] + self._next_version = 1 + + async def save_artifact(self, filename, artifact): + # The helper wraps bytes in Part(inline_data=Blob(...)). Pull them + # back out so assertions can stay terse. + blob = artifact.inline_data + self.saved.append((filename, blob.data, blob.mime_type)) + v = self._next_version + self._next_version += 1 + return v + + +@pytest.mark.asyncio +class TestBuildManifestOutput: + + async def test_basic_inline(self): + spec = WorkspaceOutputSpec(globs=["**/*"], inline=True) + payloads = {"/ws/a.txt": b"alpha"} + manifest, names, versions = await _collect.build_manifest_output( + "/ws", + spec, + ["/ws/a.txt"], + _make_fetcher(payloads), + ctx=None, + ) + assert names == [] and versions == [] + assert len(manifest.files) == 1 + ref = manifest.files[0] + assert ref.name == "a.txt" + assert ref.content == "alpha" + assert ref.saved_as == "" + assert ref.version == 0 + assert manifest.limits_hit is False + + async def test_save_branch_uses_name_template_and_records_versions(self): + spec = WorkspaceOutputSpec(globs=["**/*"], save=True, name_template="run-1/") + payloads = {"/ws/a.txt": b"alpha", "/ws/b.txt": b"beta"} + ctx = _FakeArtifactCtx() + manifest, names, versions = await _collect.build_manifest_output( + "/ws", + spec, + ["/ws/a.txt", "/ws/b.txt"], + _make_fetcher(payloads), + ctx=ctx, + ) + assert names == ["run-1/a.txt", "run-1/b.txt"] + assert versions == [1, 2] + # The artifact service must receive the full byte payload with the + # detected MIME type; saved_as must mirror name_template + rel. + assert [s[0] for s in ctx.saved] == ["run-1/a.txt", "run-1/b.txt"] + assert manifest.files[0].saved_as == "run-1/a.txt" + assert manifest.files[0].version == 1 + + async def test_save_without_ctx_raises(self): + spec = WorkspaceOutputSpec(globs=["**/*"], save=True) + with pytest.raises(ValueError, match="Context is required"): + await _collect.build_manifest_output( + "/ws", + spec, + ["/ws/a.txt"], + _make_fetcher({"/ws/a.txt": b"data"}), + ctx=None, + ) + + async def test_max_files_limit_sets_limits_hit(self): + spec = WorkspaceOutputSpec(globs=["**/*"], max_files=1) + payloads = {"/ws/a.txt": b"a", "/ws/b.txt": b"b"} + manifest, _, _ = await _collect.build_manifest_output( + "/ws", + spec, + ["/ws/a.txt", "/ws/b.txt"], + _make_fetcher(payloads), + ctx=None, + ) + assert len(manifest.files) == 1 + assert manifest.limits_hit is True + + async def test_max_total_bytes_first_guard_breaks_before_fetch(self): + # First file fills the budget; second iteration's + # ``total_bytes >= max_total`` guard breaks before any fetch. + spec = WorkspaceOutputSpec(globs=["**/*"], max_total_bytes=3) + payloads = {"/ws/a.txt": b"abc", "/ws/b.txt": b"def"} + + async def fetcher(path, max_bytes): + data = payloads[path] + return data[:max_bytes], len(data) + + manifest, _, _ = await _collect.build_manifest_output( + "/ws", + spec, + ["/ws/a.txt", "/ws/b.txt"], + fetcher, + ctx=None, + ) + assert [f.name for f in manifest.files] == ["a.txt"] + assert manifest.limits_hit is True + + async def test_zero_read_budget_breaks_with_limits_hit(self, monkeypatch): + # Covers _collect.py:186-188 — the defensive ``read_budget <= 0`` + # break. The only way to reach it is when *both* the per-file cap + # and the total cap collapse to <= 0 before the first fetch on a + # given iteration. We force this by monkeypatching the resolved + # defaults so an unset ``spec.max_file_bytes`` (which falls back + # to ``MAX_READ_SIZE_BYTES``) and an unset ``spec.max_total_bytes`` + # (falls back to ``DEFAULT_MAX_TOTAL_BYTES``) both materialise as + # 0 — but the *first* guard ``total_bytes >= max_total`` only + # fires once ``total_bytes`` is non-zero. So we patch + # ``DEFAULT_MAX_TOTAL_BYTES`` slightly above zero to skip the + # outer guard and ``MAX_READ_SIZE_BYTES`` to zero so + # ``min(max_file_bytes=0, remaining_total>0) == 0`` and the + # inner guard fires. + monkeypatch.setattr(_collect, "MAX_READ_SIZE_BYTES", 0) + monkeypatch.setattr(_collect, "DEFAULT_MAX_TOTAL_BYTES", 1) + spec = WorkspaceOutputSpec(globs=["**/*"]) + + async def fetcher(path, max_bytes): # pragma: no cover - never invoked + raise AssertionError("fetcher must not run when budget is zero") + + manifest, _, _ = await _collect.build_manifest_output( + "/ws", + spec, + ["/ws/a.txt"], + fetcher, + ctx=None, + ) + assert manifest.files == [] + assert manifest.limits_hit is True + + async def test_per_file_truncation_marks_limits_hit(self): + # max_file_bytes < raw_size → fetcher returns a slice; helper + # must flag ``limits_hit`` because the per-file cap actually bit. + spec = WorkspaceOutputSpec(globs=["**/*"], max_file_bytes=2, inline=True) + payloads = {"/ws/a.txt": b"abcdef"} + manifest, _, _ = await _collect.build_manifest_output( + "/ws", + spec, + ["/ws/a.txt"], + _make_fetcher(payloads), + ctx=None, + ) + assert manifest.limits_hit is True + assert manifest.files[0].content == "ab" + + async def test_strict_truncated_save_raises(self): + # strict_truncated_save is the container's "refuse to persist a + # half-read binary" guard. Covers _collect.py:211. + spec = WorkspaceOutputSpec(globs=["**/*"], save=True, max_file_bytes=2) + payloads = {"/ws/big.bin": b"0123456789"} + ctx = _FakeArtifactCtx() + with pytest.raises(RuntimeError, match="cannot save truncated output file"): + await _collect.build_manifest_output( + "/ws", + spec, + ["/ws/big.bin"], + _make_fetcher(payloads), + ctx=ctx, + strict_truncated_save=True, + ) + # The save must NOT have been attempted before the raise. + assert ctx.saved == [] + + async def test_non_strict_truncated_save_persists_partial(self): + # local/cube historically allow saving the truncated prefix; the + # opposite side of the strict guard. Sanity-check that branch. + spec = WorkspaceOutputSpec(globs=["**/*"], save=True, max_file_bytes=2) + payloads = {"/ws/big.bin": b"0123456789"} + ctx = _FakeArtifactCtx() + manifest, names, _ = await _collect.build_manifest_output( + "/ws", + spec, + ["/ws/big.bin"], + _make_fetcher(payloads), + ctx=ctx, + strict_truncated_save=False, + ) + assert names == ["big.bin"] + assert ctx.saved[0][1] == b"01" + assert manifest.limits_hit is True + + async def test_fetcher_failure_emits_sentinel_and_continues(self): + # Mirrors build_code_files behaviour: a single failing fetch must + # surface as an empty ManifestFileRef while the rest of the batch + # proceeds. Covers _collect.py:192-203. + spec = WorkspaceOutputSpec(globs=["**/*"], inline=True) + payloads = {"/ws/ok.txt": b"ok"} + + async def fetcher(path, max_bytes): + if path == "/ws/bad.bin": + raise IOError("transient") + data = payloads[path] + return data[:max_bytes], len(data) + + manifest, _, _ = await _collect.build_manifest_output( + "/ws", + spec, + ["/ws/bad.bin", "/ws/ok.txt"], + fetcher, + ctx=None, + ) + names = [f.name for f in manifest.files] + assert names == ["bad.bin", "ok.txt"] + bad = manifest.files[0] + assert bad.mime_type == "application/octet-stream" + # Sentinel entries do NOT carry inlined content even when + # spec.inline is set, because there are no bytes to decode. + assert bad.content == "" + ok = manifest.files[1] + assert ok.content == "ok" + + async def test_dedup_by_relative_name(self): + spec = WorkspaceOutputSpec(globs=["**/*"], inline=True) + payloads = {"/ws/a.txt": b"x"} + manifest, _, _ = await _collect.build_manifest_output( + "/ws", + spec, + ["/ws/a.txt", "/ws/a.txt"], + _make_fetcher(payloads), + ctx=None, + ) + assert len(manifest.files) == 1 diff --git a/trpc_agent_sdk/code_executors/__init__.py b/trpc_agent_sdk/code_executors/__init__.py index 75c2f1f..840e13e 100644 --- a/trpc_agent_sdk/code_executors/__init__.py +++ b/trpc_agent_sdk/code_executors/__init__.py @@ -92,6 +92,37 @@ from .local import create_local_workspace_runtime from .utils import CodeExecutionUtils +# Cube/E2B is exposed via PEP 562 lazy `__getattr__` below so that importing +# this package never pulls in the optional `e2b-code-interpreter` dependency +# unless a Cube symbol is actually accessed. +_CUBE_LAZY_ATTRS = { + "CubeCodeExecutor", + "CubeCodeExecutorConfig", + "CubeCommandResult", + "CubeProgramRunner", + "CubeSandboxClient", + "CubeWorkspaceFS", + "CubeWorkspaceManager", + "CubeWorkspaceRuntime", + "CubeWorkspaceRuntimeConfig", + "OnExisting", + "create_cube_workspace_runtime", +} + + +def __getattr__(name: str): + if name in _CUBE_LAZY_ATTRS: + from . import cube as _cube # local import keeps cube/ off the eager path + value = getattr(_cube, name) + globals()[name] = value + return value + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +def __dir__() -> list[str]: + return sorted(set(globals().keys()) | _CUBE_LAZY_ATTRS) + + __all__ = [ "load_artifact_helper", "parse_artifact_ref", @@ -175,4 +206,15 @@ "UnsafeLocalCodeExecutor", "create_local_workspace_runtime", "CodeExecutionUtils", + "CubeCodeExecutor", + "CubeCodeExecutorConfig", + "CubeCommandResult", + "CubeProgramRunner", + "CubeSandboxClient", + "CubeWorkspaceFS", + "CubeWorkspaceManager", + "CubeWorkspaceRuntime", + "CubeWorkspaceRuntimeConfig", + "OnExisting", + "create_cube_workspace_runtime", ] diff --git a/trpc_agent_sdk/code_executors/container/_container_ws_runtime.py b/trpc_agent_sdk/code_executors/container/_container_ws_runtime.py index ce9fb0f..f750e5f 100644 --- a/trpc_agent_sdk/code_executors/container/_container_ws_runtime.py +++ b/trpc_agent_sdk/code_executors/container/_container_ws_runtime.py @@ -35,15 +35,12 @@ from .._artifacts import load_artifact_helper from .._artifacts import parse_artifact_ref -from .._artifacts import save_artifact_helper from .._base_workspace_runtime import BaseProgramRunner from .._base_workspace_runtime import BaseWorkspaceFS from .._base_workspace_runtime import BaseWorkspaceManager from .._base_workspace_runtime import BaseWorkspaceRuntime from .._base_workspace_runtime import RunEnvProvider from .._constants import DEFAULT_INPUTS_CONTAINER -from .._constants import DEFAULT_MAX_FILES -from .._constants import DEFAULT_MAX_TOTAL_BYTES from .._constants import DEFAULT_RUN_CONTAINER_BASE from .._constants import DEFAULT_SKILLS_CONTAINER from .._constants import DEFAULT_TIMEOUT_SEC @@ -59,7 +56,6 @@ from .._constants import META_FILE_NAME from .._constants import WORKSPACE_ENV_DIR_KEY from .._types import CodeFile -from .._types import ManifestFileRef from .._types import ManifestOutput from .._types import WorkspaceCapabilities from .._types import WorkspaceInfo @@ -71,6 +67,8 @@ from .._types import WorkspaceStageOptions from ..utils import InputRecordMeta from ..utils import WorkspaceMetadata +from ..utils import build_code_files +from ..utils import build_manifest_output from ..utils import get_rel_path from ..utils import normalize_globs from ._container_cli import CommandArgs @@ -318,44 +316,13 @@ async def collect(self, Raises: RuntimeError: If collection fails """ - patterns = self._normalize_globs(patterns) - - # Build bash command to find files - pattern_str = " ".join([f"'{p}'" for p in patterns]) - cmd_str = (f"cd '{ws.path}' && shopt -s globstar nullglob dotglob; " - f"for p in {pattern_str}; do for f in $p; do " - f"if [ -f \"$f\" ]; then " - f"(readlink -f \"$f\" 2>/dev/null || realpath \"$f\" 2>/dev/null || echo \"$(pwd)/$f\"); " - f"fi; done; done") - - cmd = ["/bin/bash", "-lc", cmd_str] - result = await self.container.exec_run(cmd=cmd, command_args=self.config.command_args) - if result.exit_code != 0: - raise RuntimeError(f"Failed to collect files: {result.stderr}") - stdout = result.stdout - files = [] - seen = set() - - for line in stdout.strip().split('\n'): - line = line.strip() - if not line: - continue - - rel_path = line.removeprefix(f"{ws.path}/") - if rel_path in seen: - continue - seen.add(rel_path) - - data, size_bytes, mime = self._copy_file_out(line) - files.append( - CodeFile( - name=rel_path, - content=data.decode('utf-8', errors='replace'), - mime_type=mime, - size_bytes=size_bytes, - truncated=size_bytes > len(data), - )) - + matches = await self._enumerate_matches( + ws, + self._normalize_globs(patterns), + resolve_symlinks=True, + error_prefix="Failed to collect files", + ) + files = await build_code_files(ws.path, matches, self._fetch_bytes) logger.info("Collected %s files from workspace", len(files)) return files @@ -446,77 +413,84 @@ async def collect_outputs(self, Raises: RuntimeError: If collection fails """ - globs = self._normalize_globs(spec.globs) - pattern_str = " ".join([f"'{g}'" for g in globs]) - - cmd_str = (f"cd '{ws.path}' && shopt -s globstar nullglob dotglob; " - f"for p in {pattern_str}; do for f in $p; do " - f"if [ -f \"$f\" ]; then echo \"$(pwd)/$f\"; fi; done; done") + matches = await self._enumerate_matches( + ws, + self._normalize_globs(spec.globs), + resolve_symlinks=False, + error_prefix="Failed to collect outputs", + ) + # Container refuses to persist a half-read artifact, preserving + # the historical "never save a truncated binary" guarantee. + manifest, _, _ = await build_manifest_output( + ws.path, + spec, + matches, + self._fetch_bytes, + ctx, + strict_truncated_save=True, + ) + logger.info("Collected %s output files", len(manifest.files)) + return manifest + async def _enumerate_matches(self, ws: WorkspaceInfo, patterns: List[str], *, resolve_symlinks: bool, + error_prefix: str) -> List[str]: + """Run the glob inside the container, return absolute paths. + + ``collect`` historically resolved symlinks (via ``readlink -f``) + while ``collect_outputs`` did not; ``resolve_symlinks`` preserves + that distinction. ``error_prefix`` keeps the original per-caller + ``RuntimeError`` messages intact ("Failed to collect files" vs + "Failed to collect outputs") for backwards compatibility. + + Patterns may contain spaces (e.g. "my dir/*.txt"). The fix shape + — bash array + ``IFS=`` — mirrors the cube backend's ``_glob`` so + that a space-bearing pattern is neither word-split (which would + turn "my dir/*.txt" into two useless tokens "my" and + "dir/*.txt") nor quoted as a literal (which would disable + globbing). See ``cube/_runtime.py::_glob`` for the long-form + rationale. + """ + if not patterns: + return [] + array_literal = " ".join([_shell_quote(p) for p in patterns]) + if resolve_symlinks: + emit = ("(readlink -f \"$f\" 2>/dev/null " + "|| realpath \"$f\" 2>/dev/null " + "|| echo \"$(pwd)/$f\")") + else: + emit = "echo \"$(pwd)/$f\"" + cmd_str = (f"cd {_shell_quote(ws.path)} && " + f"shopt -s globstar nullglob dotglob; " + f"patterns=({array_literal}); " + f"_saved_ifs=$IFS; IFS=; " + f'for p in "${{patterns[@]}}"; do ' + f"matches=( $p ); " + f'for f in "${{matches[@]}}"; do ' + f'if [ -f "$f" ]; then {emit}; fi; ' + f"done; " + f"done; " + f"IFS=$_saved_ifs") cmd = ["/bin/bash", "-lc", cmd_str] result = await self.container.exec_run(cmd=cmd, command_args=self.config.command_args) if result.exit_code: - raise RuntimeError(f"Failed to collect outputs: {result.stderr}") - stdout = result.stdout - - max_files = spec.max_files or DEFAULT_MAX_FILES - max_file_bytes = spec.max_file_bytes or MAX_READ_SIZE_BYTES - max_total = spec.max_total_bytes or DEFAULT_MAX_TOTAL_BYTES - - manifest = ManifestOutput() - total_bytes = 0 - count = 0 - saved_names: list[str] = [] - saved_versions: list[int] = [] - for line in stdout.strip().split('\n'): + raise RuntimeError(f"{error_prefix}: {result.stderr}") + out: List[str] = [] + for line in result.stdout.strip().split("\n"): line = line.strip() - if not line: - continue - - if count >= max_files or total_bytes >= max_total: - manifest.limits_hit = True - break - - data, raw_size, mime = self._copy_file_out(line) - - if len(data) > max_file_bytes: - data = data[:max_file_bytes] - manifest.limits_hit = True - - if total_bytes + len(data) > max_total: - remain = max_total - total_bytes - if remain <= 0: - manifest.limits_hit = True - break - data = data[:remain] - manifest.limits_hit = True - - total_bytes += len(data) - rel_path = line.removeprefix(f"{ws.path}/") - truncated = raw_size > len(data) - if truncated and spec.save: - raise RuntimeError(f"cannot save truncated output file: {rel_path}") - - file_ref = ManifestFileRef(name=rel_path, mime_type=mime) - if spec.inline: - file_ref.content = data.decode('utf-8', errors='replace') - if spec.save: - save_name = rel_path - if spec.name_template: - save_name = spec.name_template + rel_path - if not ctx: - raise ValueError("Context is required to save artifacts") - version = await save_artifact_helper(ctx, save_name, data, mime) - file_ref.saved_as = save_name - file_ref.version = version - saved_names.append(save_name) - saved_versions.append(version) + if line: + out.append(line) + return out - manifest.files.append(file_ref) - count += 1 + async def _fetch_bytes(self, full_path: str, max_bytes: int) -> Tuple[bytes, int]: + """Fetcher contract for shared collection helpers. - logger.info("Collected %s output files", len(manifest.files)) - return manifest + Copies the file out of the container via ``get_archive`` and + caps the returned slice at ``max_bytes``. ``raw_size`` is the + tar member's real size, so helpers can still flag truncation + even when ``max_bytes < raw_size``. + """ + data, raw_size, _ = self._copy_file_out(full_path, max_bytes=max_bytes) + return data, raw_size async def _put_directory(self, ws: WorkspaceInfo, src: str, dst: str) -> None: """Copy directory to container using tar.""" @@ -615,12 +589,14 @@ async def _stage_workspace_input(self, src: str, dst: str, mode: str) -> None: if result.exit_code: raise RuntimeError(f"Failed to stage input: {result.stderr}") - def _copy_file_out(self, full_path: str) -> Tuple[bytes, int, str]: + def _copy_file_out(self, full_path: str, *, max_bytes: int = MAX_READ_SIZE_BYTES) -> Tuple[bytes, int, str]: """ Copy file out of container. Args: full_path: Full path to file in container + max_bytes: Upper bound on the returned byte slice; the + actual file size is still reported as ``size_bytes``. Returns: Tuple of (file_data, size_bytes, mime_type) @@ -636,7 +612,7 @@ def _copy_file_out(self, full_path: str) -> Tuple[bytes, int, str]: for member in tar.getmembers(): if member.isfile(): f = tar.extractfile(member) - data = f.read(MAX_READ_SIZE_BYTES) + data = f.read(max_bytes) mime = self._detect_mime_type(data) return data, member.size, mime diff --git a/trpc_agent_sdk/code_executors/cube/__init__.py b/trpc_agent_sdk/code_executors/cube/__init__.py new file mode 100644 index 0000000..675f429 --- /dev/null +++ b/trpc_agent_sdk/code_executors/cube/__init__.py @@ -0,0 +1,38 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Cube/E2B code executor and workspace runtime. + +The optional ``e2b-code-interpreter`` dependency is imported lazily inside +the first sandbox-constructing call (`CubeCodeExecutor.create` / +`.attach` / `.create_or_recreate`). Importing this package does not +require the ``[cube]`` extra to be installed. +""" + +from ._code_executor import CubeCodeExecutor +from ._runtime import CubeProgramRunner +from ._runtime import CubeWorkspaceFS +from ._runtime import CubeWorkspaceManager +from ._runtime import CubeWorkspaceRuntime +from ._runtime import create_cube_workspace_runtime +from ._sandbox import CubeCommandResult +from ._sandbox import CubeSandboxClient +from ._transfer import OnExisting +from ._types import CubeCodeExecutorConfig +from ._types import CubeWorkspaceRuntimeConfig + +__all__ = [ + "CubeCodeExecutor", + "CubeCodeExecutorConfig", + "CubeCommandResult", + "CubeProgramRunner", + "CubeSandboxClient", + "CubeWorkspaceFS", + "CubeWorkspaceManager", + "CubeWorkspaceRuntime", + "CubeWorkspaceRuntimeConfig", + "OnExisting", + "create_cube_workspace_runtime", +] diff --git a/trpc_agent_sdk/code_executors/cube/_code_executor.py b/trpc_agent_sdk/code_executors/cube/_code_executor.py new file mode 100644 index 0000000..354e8ab --- /dev/null +++ b/trpc_agent_sdk/code_executors/cube/_code_executor.py @@ -0,0 +1,275 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Cube/E2B code executor for the trpc-agent-py SDK.""" + +from __future__ import annotations + +from dataclasses import replace +from typing import Awaitable +from typing import Callable +from typing import Optional + +from typing_extensions import override + +from pydantic import Field +from pydantic import PrivateAttr + +from trpc_agent_sdk.context import InvocationContext + +from .._base_code_executor import BaseCodeExecutor +from .._types import CodeBlock +from .._types import CodeBlockDelimiter +from .._types import CodeExecutionInput +from .._types import CodeExecutionResult +from .._types import create_code_execution_result +from ._e2b import _import_e2b +from ._sandbox import CubeCommandResult +from ._sandbox import CubeSandboxClient +from ._types import CubeCodeExecutorConfig + +_PYTHON_LANGUAGES = frozenset({"python", "py", "python3", ""}) +_BASH_LANGUAGES = frozenset({"bash", "sh"}) + + +class CubeCodeExecutor(BaseCodeExecutor): + """A code executor that runs blocks inside a Cube/E2B remote sandbox. + + Construct directly with an already-open :class:`CubeSandboxClient`:: + + executor = CubeCodeExecutor(client, cfg) + + For the typical use case (the SDK opens the sandbox for you) prefer + the async classmethod factories. All three read the bound sandbox id + from ``cfg.sandbox_id`` so it is the single source of truth — there + is no separate positional ``sandbox_id`` argument that could silently + override the config: + + - :meth:`create` — strict. If ``cfg.sandbox_id`` is set it attaches and + asserts the sandbox is RUNNING; otherwise it creates a fresh sandbox. + ``SandboxNotFoundException`` (gone) and ``SandboxException`` (PAUSED) + propagate so the caller decides whether to clear external locator + state and retry. + - :meth:`attach` — explicit attach-only variant; requires + ``cfg.sandbox_id`` to be set and never creates a fresh sandbox. + - :meth:`create_or_recreate` — convenience for callers (e.g. hermes) + that want the "attach, on NotFound run a callback then recreate" + pattern collapsed into a single call. + + `close()` is a no-op for the remote sandbox (drops the local handle + only). `destroy()` explicitly kills the remote sandbox; the caller + must call it when they no longer want the sandbox to outlive the + executor. + """ + + stateful: bool = Field(default=False, frozen=True, exclude=True) + optimize_data_file: bool = Field(default=False, frozen=True, exclude=True) + + # Extend the base default (`tool_code` + `python`) with a `bash` fence + # so text-path extraction matches what ``execute_code`` can actually + # run (see ``_select_interpreter``). Callers may still override via + # the ``code_block_delimiters`` field at construction time. + code_block_delimiters: list[CodeBlockDelimiter] = [ + CodeBlockDelimiter(start="```tool_code\n", end="\n```"), + CodeBlockDelimiter(start="```python\n", end="\n```"), + CodeBlockDelimiter(start="```bash\n", end="\n```"), + ] + + # `_client` is `Optional` because :meth:`close` / :meth:`destroy` + # legitimately drop the handle post-construction. `_cfg` has no such + # lifecycle and is set unconditionally in ``__init__``. + _client: Optional[CubeSandboxClient] = PrivateAttr(default=None) + _cfg: CubeCodeExecutorConfig = PrivateAttr() + + def __init__( + self, + client: CubeSandboxClient, + cfg: CubeCodeExecutorConfig, + **data, + ): + """Wrap an already-open :class:`CubeSandboxClient`. + + Prefer the async factories :meth:`create`, :meth:`attach`, or + :meth:`create_or_recreate` for typical use — they encapsulate the + lazy-import + connect/create plumbing. Direct construction is + for adapters that already own a :class:`CubeSandboxClient` (or + for tests that pass a fake). + + Raises: + ValueError: if the caller tries to enable ``stateful`` or + ``optimize_data_file`` (this executor does not support + either; the inherited :class:`Field` is frozen at + ``False``). + """ + if data.get("stateful"): + raise ValueError("CubeCodeExecutor cannot be stateful.") + if data.get("optimize_data_file"): + raise ValueError("CubeCodeExecutor cannot enable optimize_data_file.") + super().__init__(**data) + self._client = client + self._cfg = cfg + + @classmethod + async def create(cls, cfg: CubeCodeExecutorConfig) -> "CubeCodeExecutor": + """Strict factory. Attaches when ``cfg.sandbox_id`` is set, else creates.""" + if cfg.sandbox_id: + client = await CubeSandboxClient.open_existing(cfg.sandbox_id, cfg) + else: + client = await CubeSandboxClient.open_new(cfg) + return cls(client, cfg) + + @classmethod + async def attach(cls, cfg: CubeCodeExecutorConfig) -> "CubeCodeExecutor": + """Attach-only factory. + + Requires ``cfg.sandbox_id`` to be set. Always connects and asserts + the sandbox is RUNNING; never creates a fresh sandbox. + """ + if not cfg.sandbox_id: + raise ValueError("CubeCodeExecutor.attach requires cfg.sandbox_id to be set; " + "use CubeCodeExecutor.create(cfg) to create a fresh sandbox.") + client = await CubeSandboxClient.open_existing(cfg.sandbox_id, cfg) + return cls(client, cfg) + + @classmethod + async def create_or_recreate( + cls, + cfg: CubeCodeExecutorConfig, + *, + on_stale: Optional[Callable[[], Awaitable[None]]] = None, + ) -> "CubeCodeExecutor": + """Attach-then-fall-back-to-create when the bound sandbox has expired. + + When ``cfg.sandbox_id`` is set and the remote sandbox is gone + (`SandboxNotFoundException`), ``on_stale`` is awaited (callers use + this to clear their persistent locator) and a fresh sandbox is + created. PAUSED state and other errors propagate unchanged so that + operator-managed pauses are not silently overwritten. + """ + if not cfg.sandbox_id: + return await cls.create(cfg) + e2b = _import_e2b() + try: + return await cls.create(cfg) + except e2b.SandboxNotFoundException: + if on_stale is not None: + await on_stale() + return await cls.create(replace(cfg, sandbox_id=None)) + + @property + def sandbox_id(self) -> str: + """The bound sandbox id. Caller persists it for cross-process reuse.""" + return self.sandbox_client.sandbox_id + + @property + def sandbox_client(self) -> CubeSandboxClient: + """The underlying :class:`CubeSandboxClient`. + + Useful for low-level callers (e.g. hermes' ``HarnessSandbox`` + adapter) that need direct file/exec primitives without going + through the workspace runtime contract. Always returns a live + client; raises :class:`RuntimeError` if the executor was closed. + """ + return self._require_client() + + @property + def config(self) -> CubeCodeExecutorConfig: + """Configuration this executor was created with.""" + return self._cfg + + async def assert_running(self) -> None: + """Re-validate the sandbox is RUNNING (e.g. before each turn).""" + await self._require_client().assert_running() + + def close(self) -> None: + """Drop the local sandbox handle. Does not kill the remote sandbox.""" + if self._client is not None: + self._client.close() + self._client = None + + async def destroy(self) -> None: + """Explicitly kill the remote sandbox.""" + if self._client is None: + return + try: + await self._client.destroy() + finally: + self._client = None + + @override + async def execute_code( + self, + invocation_context: InvocationContext, + code_execution_input: CodeExecutionInput, + ) -> CodeExecutionResult: + """Run each code block in the bound sandbox and aggregate output. + + Code is fed to the interpreter via stdin (heredoc-wrapped by + :meth:`CubeSandboxClient.commands_run`), which keeps multi-line + payloads with arbitrary quoting safe without shell-escaping the + whole block. Bash blocks are executed as a **login shell** + (``bash -l``) so ``/etc/profile``, ``/etc/profile.d/*`` and + ``~/.bash_profile`` populate ``PATH`` — Cube/E2B templates + commonly install toolchains (``uv``/``conda``/``nvm``/``rye``) + via setup scripts that hook into profile files rather than + through Dockerfile ``ENV PATH=…``, and a non-login shell would + silently fail to locate them. Python's ``-c``/stdin paths bypass + shell profile entirely, so Python blocks use plain ``python3``. + """ + client = self._require_client() + cfg = self.config + + blocks = list(code_execution_input.code_blocks) + if not blocks and code_execution_input.code: + blocks = [CodeBlock(code=code_execution_input.code, language="python")] + + stdouts: list[str] = [] + stderrs: list[str] = [] + for index, block in enumerate(blocks): + if not block.code: + continue + try: + interpreter = self._select_interpreter(block.language) + except ValueError as exc: + stderrs.append(f"Error in code block {index}: {exc}\n") + continue + result = await client.commands_run( + interpreter, + stdin=block.code.encode("utf-8"), + timeout=cfg.execute_timeout, + ) + self._collect(result, stdouts, stderrs) + return create_code_execution_result(stdout="".join(stdouts), stderr="".join(stderrs)) + + @staticmethod + def _select_interpreter(language: str) -> str: + """Pick the remote interpreter command for ``language``. + + Bash dispatches as ``bash -l`` so the block runs in a login + shell and inherits PATH from ``/etc/profile`` etc. Python uses + plain ``python3`` since the Python interpreter ignores shell + profile by design. + """ + lang = (language or "").lower() + if lang in _PYTHON_LANGUAGES: + return "python3" + if lang in _BASH_LANGUAGES: + return "bash -l" + raise ValueError(f"unsupported language: {language!r}") + + @staticmethod + def _collect(result: CubeCommandResult, stdouts: list[str], stderrs: list[str]) -> None: + if result.exit_code != 0: + stderrs.append(f"Process exited with code: {result.exit_code}\n") + if result.stderr: + stderrs.append(result.stderr) + if result.stdout: + stdouts.append(result.stdout) + + def _require_client(self) -> CubeSandboxClient: + if self._client is None: + raise RuntimeError("CubeCodeExecutor sandbox handle was closed; " + "construct a fresh executor.") + return self._client diff --git a/trpc_agent_sdk/code_executors/cube/_e2b.py b/trpc_agent_sdk/code_executors/cube/_e2b.py new file mode 100644 index 0000000..9d09894 --- /dev/null +++ b/trpc_agent_sdk/code_executors/cube/_e2b.py @@ -0,0 +1,42 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""e2b-code-interpreter vendor seam for the Cube package. + +Centralizes the lazy-import boundary (:func:`_import_e2b`) and the small +set of vendor-aware constants used by sibling modules (`_sandbox.py` for +lifecycle/commands, `_transfer.py` for the tar protocol). Keeping this +file thin so neither downstream module has to repeat the install hint or +the ``user=`` plumbing. +""" + +from __future__ import annotations + +# The unix user we run sandbox commands and FS ops as. Standard cube/e2b +# templates ship with `root`; downstream callers do not need to override +# this and we deliberately do not expose a knob to keep the surface small. +_GUEST_USER = "root" + +_E2B_INSTALL_HINT = ("e2b-code-interpreter is required for CubeCodeExecutor; " + "install with `pip install trpc-agent-py[cube]`.") + + +def _import_e2b(): + """Lazily import :mod:`e2b_code_interpreter` symbols. + + Deferred so that ``from trpc_agent_sdk.code_executors.cube import ...`` + never requires the optional ``[cube]`` extra to be installed; only + actual sandbox construction or vendor-exception handling pays the + import cost. + + Raises: + ImportError: if the optional ``[cube]`` extra is not installed, + with a message pointing at the install command. + """ + try: + import e2b_code_interpreter as _mod # pylint: disable=import-outside-toplevel + except ImportError as exc: + raise ImportError(_E2B_INSTALL_HINT) from exc + return _mod diff --git a/trpc_agent_sdk/code_executors/cube/_paths.py b/trpc_agent_sdk/code_executors/cube/_paths.py new file mode 100644 index 0000000..6452c30 --- /dev/null +++ b/trpc_agent_sdk/code_executors/cube/_paths.py @@ -0,0 +1,114 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Pure remote-path and shell-quoting helpers for the Cube package. + +No e2b dependency, no ``AsyncSandbox`` reference — these are stateless +string utilities usable by any adapter that targets a Cube/E2B remote +workspace. Lives in its own module so :mod:`._sandbox`, :mod:`._transfer`, +:mod:`._runtime`, and future external adapters (e.g. hermes) can import +them without dragging in the sandbox client or the e2b extra. +""" + +from __future__ import annotations + +import base64 +import posixpath +import secrets + +# Random suffix prefix for the bash heredoc marker emitted by +# `wrap_stdin_heredoc`. Chosen to be unlikely to collide with payload +# content while remaining greppable in command logs. +_HEREDOC_MARKER_PREFIX = "TRPC_STDIN_EOF" + +# Width of base64 lines emitted on the binary heredoc path. 76 matches +# the canonical MIME wrapping width and keeps command logs readable. +_BASE64_LINE_WIDTH = 76 + + +def shell_quote(value: str) -> str: + """Single-quote a string for safe inclusion in a bash command line.""" + if not value: + return "''" + return "'" + value.replace("'", "'\\''") + "'" + + +def normalize_remote_relative(path: str, *, allow_current: bool = False) -> str: + """Normalize a relative remote path and reject escape attempts. + + Pure remote-path logic: this does not know about any host-side + workspace and never tries to map host absolute paths to + workspace-relative ones. + """ + if not path or not path.strip(): + if allow_current: + return "" + raise ValueError("cube remote path must not be empty.") + normalized = posixpath.normpath(path.strip().replace("\\", "/")) + if normalized in ("", "."): + if allow_current: + return "" + raise ValueError("cube remote path must not be empty.") + if normalized.startswith("/") or normalized == ".." or normalized.startswith("../"): + raise ValueError(f"cube remote path escapes its root: {path}") + return normalized + + +def join_remote(remote_root: str, relative: str) -> str: + """Join a relative path under a remote root and collapse ``..`` components.""" + if not relative: + return remote_root + return posixpath.normpath(posixpath.join(remote_root, relative)) + + +def wrap_stdin_heredoc(command: str, stdin: bytes) -> str: + """Embed ``stdin`` as a bash heredoc so the command receives it as input. + + The e2b SDK's ``commands.run(stdin=...)`` is a bool toggle, not a data + channel, so we transport the payload inside the command string + itself. Two paths are emitted depending on whether the payload is + valid UTF-8: + + - **Text fast path.** UTF-8 payloads are inlined verbatim as + ``{command} << 'MARKER'``. The rendered command stays readable + in logs and incurs no extra subprocess. + - **Binary path.** Non-UTF-8 payloads are base64-encoded and routed + through ``base64 -d | {command}`` so the original bytes reach the + command's stdin byte-for-byte. ``base64`` ships with coreutils + and is present in every Cube/E2B template. The base64 alphabet + (``A-Za-z0-9+/=``) cannot contain ``_``, so the heredoc marker + can never collide with the payload on this path. + + The marker collision check inspects *both* ``payload`` and + ``command`` — a marker accidentally embedded in the wrapper + command (e.g. a multi-line shell function whose body contains the + chosen literal) would otherwise close the heredoc prematurely. + + For shipping large binary blobs (assets, datasets, etc.), prefer + :meth:`CubeSandboxClient.upload_path` over piping through stdin. + """ + try: + payload = stdin.decode("utf-8") + except UnicodeDecodeError: + return _wrap_binary_stdin_heredoc(command, stdin) + marker = f"{_HEREDOC_MARKER_PREFIX}_{secrets.token_hex(8)}" + while marker in payload or marker in command: + marker = f"{_HEREDOC_MARKER_PREFIX}_{secrets.token_hex(8)}" + return f"{command} << '{marker}'\n{payload}\n{marker}" + + +def _wrap_binary_stdin_heredoc(command: str, stdin: bytes) -> str: + """Render a base64-on-the-wire heredoc for non-UTF-8 stdin payloads. + + The base64 alphabet excludes ``_`` so no marker collision with the + body is possible; the only retry case is a marker that already + appears inside ``command`` itself. + """ + encoded = base64.b64encode(stdin).decode("ascii") + body = "\n".join(encoded[i:i + _BASE64_LINE_WIDTH] for i in range(0, len(encoded), _BASE64_LINE_WIDTH)) + marker = f"{_HEREDOC_MARKER_PREFIX}_{secrets.token_hex(8)}" + while marker in command: + marker = f"{_HEREDOC_MARKER_PREFIX}_{secrets.token_hex(8)}" + return f"base64 -d << '{marker}' | {command}\n{body}\n{marker}" diff --git a/trpc_agent_sdk/code_executors/cube/_runtime.py b/trpc_agent_sdk/code_executors/cube/_runtime.py new file mode 100644 index 0000000..0f9188f --- /dev/null +++ b/trpc_agent_sdk/code_executors/cube/_runtime.py @@ -0,0 +1,469 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Workspace runtime backed by a Cube/E2B remote sandbox.""" + +from __future__ import annotations + +import os +import posixpath +import re +import time +from pathlib import Path +from typing import List +from typing import Optional +from typing import Tuple +from typing_extensions import override + +from trpc_agent_sdk.context import InvocationContext +from trpc_agent_sdk.log import logger + +from .._artifacts import load_artifact_helper +from .._artifacts import parse_artifact_ref +from .._base_workspace_runtime import BaseProgramRunner +from .._base_workspace_runtime import BaseWorkspaceFS +from .._base_workspace_runtime import BaseWorkspaceManager +from .._base_workspace_runtime import BaseWorkspaceRuntime +from .._base_workspace_runtime import RunEnvProvider +from .._constants import DEFAULT_TIMEOUT_SEC +from .._constants import DIR_OUT +from .._constants import DIR_RUNS +from .._constants import DIR_SKILLS +from .._constants import DIR_WORK +from .._constants import ENV_OUTPUT_DIR +from .._constants import ENV_RUN_DIR +from .._constants import ENV_SKILLS_DIR +from .._constants import ENV_WORK_DIR +from .._constants import WORKSPACE_ENV_DIR_KEY +from .._types import CodeFile +from .._types import ManifestOutput +from .._types import WorkspaceCapabilities +from .._types import WorkspaceInfo +from .._types import WorkspaceInputSpec +from .._types import WorkspaceOutputSpec +from .._types import WorkspacePutFileInfo +from .._types import WorkspaceRunProgramSpec +from .._types import WorkspaceRunResult +from .._types import WorkspaceStageOptions +from ..utils import build_code_files +from ..utils import build_manifest_output +from ..utils import normalize_globs +from ._code_executor import CubeCodeExecutor +from ._paths import join_remote +from ._paths import normalize_remote_relative +from ._paths import shell_quote +from ._sandbox import CubeSandboxClient +from ._types import CubeWorkspaceRuntimeConfig + +_RE_SAFE_ID = re.compile(r"[^a-zA-Z0-9_-]") + + +def _input_default_name(src: str) -> str: + i = src.rfind("/") + if 0 <= i < len(src) - 1: + return src[i + 1:] + return src + + +class CubeWorkspaceManager(BaseWorkspaceManager): + """Creates per-execution workspaces under the configured ``remote_workspace`` root.""" + + def __init__(self, client: CubeSandboxClient, remote_workspace: str, command_timeout: float): + self._client = client + self._root = posixpath.normpath(remote_workspace) + self._timeout = command_timeout + self._ws_paths: dict[str, WorkspaceInfo] = {} + + @override + async def create_workspace(self, exec_id: str, ctx: Optional[InvocationContext] = None) -> WorkspaceInfo: + # Reuse the previously minted path for the same exec_id so callers + # see a stable workspace location across calls. The cache is *not* + # trusted as proof that the remote dir still exists: the sandbox is + # remote and ephemeral, so any number of external events (operator + # cleanup, snapshot rollback, sibling cleanup() on a shared + # sandbox, host process restart re-attaching to a live sandbox) + # can delete the directory while this process is unaware. To stay + # in sync we unconditionally re-issue an idempotent ``mkdir -p`` + # for the four standard subdirs on every call. ``mkdir -p`` is a + # no-op when the tree already exists, so the steady-state cost is + # one round-trip; on miss the workspace heals transparently + # instead of letting downstream put_files / collect_outputs / + # stage_inputs fail deep inside with cryptic "No such file" errors. + cached = self._ws_paths.get(exec_id) + if cached is not None and cached.path: + ws_path = cached.path + else: + safe = _RE_SAFE_ID.sub("_", exec_id) if exec_id else "anon" + suffix = time.time_ns() + ws_path = posixpath.join(self._root, f"ws_{safe}_{suffix}") + + cmd = ("set -e; " + f"mkdir -p {shell_quote(ws_path)} " + f"{shell_quote(posixpath.join(ws_path, DIR_WORK))} " + f"{shell_quote(posixpath.join(ws_path, DIR_OUT))} " + f"{shell_quote(posixpath.join(ws_path, DIR_SKILLS))} " + f"{shell_quote(posixpath.join(ws_path, DIR_RUNS))}") + result = await self._client.commands_run(cmd, timeout=self._timeout) + if result.exit_code != 0: + raise RuntimeError(f"Failed to create cube workspace: {result.stderr or result.stdout}") + + if cached is not None and cached.path == ws_path: + logger.debug("Cube workspace reconciled: id=%s path=%s", exec_id, ws_path) + return cached + info = WorkspaceInfo(id=exec_id, path=ws_path) + self._ws_paths[exec_id] = info + logger.debug("Cube workspace created: id=%s path=%s", exec_id, ws_path) + return info + + @override + async def cleanup(self, exec_id: str, ctx: Optional[InvocationContext] = None) -> None: + info = self._ws_paths.get(exec_id) + if not info or not info.path: + # Drop any stale entry that lacks a usable path so retries don't + # loop forever on a broken record. + self._ws_paths.pop(exec_id, None) + return + cmd = f"rm -rf {shell_quote(info.path)}" + result = await self._client.commands_run(cmd, timeout=self._timeout) + if result.exit_code != 0: + # Keep the cache entry intact so the caller can retry cleanup; + # popping prematurely would orphan the remote workspace because + # subsequent cleanup(exec_id) calls would hit the "unknown id" + # no-op branch. + raise RuntimeError(f"Failed to clean cube workspace: {result.stderr or result.stdout}") + self._ws_paths.pop(exec_id, None) + logger.debug("Cube workspace cleaned: id=%s path=%s", exec_id, info.path) + + +class CubeWorkspaceFS(BaseWorkspaceFS): + """Workspace-scoped filesystem operations that delegate to the client.""" + + def __init__(self, client: CubeSandboxClient, command_timeout: float): + self._client = client + self._timeout = command_timeout + + @override + async def put_files(self, + ws: WorkspaceInfo, + files: List[WorkspacePutFileInfo], + ctx: Optional[InvocationContext] = None) -> None: + for file in files: + if not file.path: + raise ValueError("empty file path") + relative = normalize_remote_relative(file.path) + remote = join_remote(ws.path, relative) + parent = posixpath.dirname(remote) + if parent and parent != ws.path: + await self._mkdir(parent) + await self._client.write_file_bytes(remote, file.content or b"") + logger.debug("Cube put %d files into %s", len(files), ws.path) + + @override + async def stage_directory(self, + ws: WorkspaceInfo, + src: str, + dst: str, + opt: WorkspaceStageOptions, + ctx: Optional[InvocationContext] = None) -> None: + if not src: + raise ValueError("stage_directory src is empty") + local = Path(os.path.abspath(src)) + if not local.exists() or not local.is_dir(): + raise FileNotFoundError(f"stage_directory src not found: {src}") + target = ws.path if not dst else join_remote(ws.path, normalize_remote_relative(dst, allow_current=True)) + await self._client.upload_path(local, target) + if opt.read_only: + # Surfacing chmod failures is critical: silently swallowing them + # would leave the directory writable while callers believe the + # read_only invariant was honoured. + result = await self._client.commands_run(f"chmod -R a-w {shell_quote(target)}", timeout=self._timeout) + if result.exit_code != 0: + raise RuntimeError(f"failed to make {target} read-only: {result.stderr or result.stdout}") + + @override + async def stage_inputs(self, + ws: WorkspaceInfo, + specs: List[WorkspaceInputSpec], + ctx: Optional[InvocationContext] = None) -> None: + for spec in specs: + if not spec.src: + continue + dst_rel = (spec.dst or "").strip() + if not dst_rel: + dst_rel = posixpath.join(DIR_WORK, "inputs", _input_default_name(spec.src)) + dst_rel = normalize_remote_relative(dst_rel) + dst_abs = join_remote(ws.path, dst_rel) + await self._mkdir(posixpath.dirname(dst_abs)) + + if spec.src.startswith("artifact://"): + if ctx is None: + raise ValueError("Context is required to load artifacts") + ref = spec.src.removeprefix("artifact://") + name, version = parse_artifact_ref(ref) + content, _ = await load_artifact_helper(ctx, name, version) + await self._client.write_file_bytes(dst_abs, content) + elif spec.src.startswith("host://"): + host_path = Path(spec.src.removeprefix("host://")) + if not host_path.exists(): + raise FileNotFoundError(f"host path not found: {host_path}") + await self._client.upload_path(host_path, dst_abs) + elif spec.src.startswith("workspace://"): + rel = normalize_remote_relative(spec.src.removeprefix("workspace://")) + src_abs = join_remote(ws.path, rel) + await self._copy_remote(src_abs, dst_abs) + elif spec.src.startswith("skill://"): + rest = normalize_remote_relative(spec.src.removeprefix("skill://")) + src_abs = join_remote(join_remote(ws.path, DIR_SKILLS), rest) + await self._copy_remote(src_abs, dst_abs) + else: + raise ValueError(f"unsupported input scheme: {spec.src!r}") + logger.debug("Cube staged %d inputs into %s", len(specs), ws.path) + + @override + async def collect(self, + ws: WorkspaceInfo, + patterns: List[str], + ctx: Optional[InvocationContext] = None) -> List[CodeFile]: + matches = await self._glob(ws.path, normalize_globs(patterns)) + files = await build_code_files(ws.path, matches, self._fetch_file) + logger.debug("Cube collected %d files from %s", len(files), ws.path) + return files + + @override + async def collect_outputs(self, + ws: WorkspaceInfo, + spec: WorkspaceOutputSpec, + ctx: Optional[InvocationContext] = None) -> ManifestOutput: + matches = await self._glob(ws.path, normalize_globs(spec.globs)) + manifest, _, _ = await build_manifest_output(ws.path, spec, matches, self._fetch_file, ctx) + logger.debug("Cube collected %d outputs from %s", len(manifest.files), ws.path) + return manifest + + async def _fetch_file(self, full_path: str, max_bytes: int) -> Tuple[bytes, int]: + """Fetcher contract for :func:`utils.build_code_files` / + :func:`utils.build_manifest_output`. + + Cube exposes no cheap ``stat`` RPC, so we read the full payload + and slice locally; ``raw_size`` reflects the true on-disk size + so the shared helpers can still report ``truncated`` / + ``limits_hit`` accurately. + """ + data = await self._client.read_file_bytes(full_path) + return data[:max_bytes], len(data) + + async def _mkdir(self, remote_abs: str) -> None: + if not remote_abs: + return + result = await self._client.commands_run(f"mkdir -p {shell_quote(remote_abs)}", timeout=self._timeout) + if result.exit_code != 0: + raise RuntimeError(f"mkdir -p failed: {result.stderr or result.stdout}") + + async def _copy_remote(self, src: str, dst: str) -> None: + await self._mkdir(posixpath.dirname(dst)) + # Defensive rm before cp -a to avoid the long-standing POSIX + # directory-footgun: when DST already exists as a directory, + # ``cp -a SRC DST`` copies SRC *into* DST as DST/basename(SRC), + # nesting stale data instead of replacing it. Removing DST first + # makes the operation idempotent across repeated stage_inputs + # calls targeting the same destination. + rm_result = await self._client.commands_run( + f"rm -rf {shell_quote(dst)}", + timeout=self._timeout, + ) + if rm_result.exit_code != 0: + raise RuntimeError(f"remote rm failed: {rm_result.stderr or rm_result.stdout}") + result = await self._client.commands_run( + f"cp -a {shell_quote(src)} {shell_quote(dst)}", + timeout=self._timeout, + ) + if result.exit_code != 0: + raise RuntimeError(f"remote cp failed: {result.stderr or result.stdout}") + + async def _glob(self, ws_path: str, patterns: List[str]) -> List[str]: + if not patterns: + return [] + # Patterns may contain spaces (e.g. "my dir/*.txt"). The naive shape + # `for f in $p` first word-splits $p on IFS *and only then* globs each + # token separately — turning "my dir/*.txt" into two patterns "my" + # and "dir/*.txt", neither of which matches. Quoting `"$p"` would + # suppress word-splitting but also disables globbing. + # + # Fix: pass patterns via a bash array (preserves spaces per element), + # then temporarily set IFS= so the unquoted `$p` inside `matches=( $p )` + # is *not* word-split, while bash still performs path expansion on it. + # `compgen -G` is not used here because it does not honour `globstar`. + array_literal = " ".join(shell_quote(p) for p in patterns) + cmd = (f"cd {shell_quote(ws_path)} && " + f"shopt -s globstar nullglob dotglob; " + f"patterns=({array_literal}); " + f"_saved_ifs=$IFS; IFS=; " + f'for p in "${{patterns[@]}}"; do ' + f"matches=( $p ); " + f'for f in "${{matches[@]}}"; do ' + f'[ -f "$f" ] && printf \'%s\\n\' "$(pwd)/$f"; ' + f"done; " + f"done; " + f"IFS=$_saved_ifs") + result = await self._client.commands_run(cmd, timeout=self._timeout) + if result.exit_code != 0: + raise RuntimeError(f"glob failed: {result.stderr or result.stdout}") + out: List[str] = [] + for line in result.stdout.splitlines(): + line = line.strip() + if line: + out.append(line) + return out + + +class CubeProgramRunner(BaseProgramRunner): + """Runs ``WorkspaceRunProgramSpec`` jobs inside the Cube sandbox. + + Follows the workspace-relative-cwd semantic shared by Container/Local + runners (`WorkspaceRunProgramSpec.cwd` is rooted at ``ws.path``) and + aligns with `LocalProgramRunner` in auto-creating the resolved cwd. + """ + + def __init__( + self, + client: CubeSandboxClient, + command_timeout: float, + provider: Optional[RunEnvProvider] = None, + enable_provider_env: bool = False, + ): + super().__init__(provider=provider, enable_provider_env=enable_provider_env) + self._client = client + self._timeout = command_timeout + + @override + async def run_program(self, + ws: WorkspaceInfo, + spec: WorkspaceRunProgramSpec, + ctx: Optional[InvocationContext] = None) -> WorkspaceRunResult: + spec = self._apply_provider_env(spec, ctx) + cwd = ws.path if not spec.cwd else join_remote(ws.path, normalize_remote_relative(spec.cwd, allow_current=True)) + + run_dir = join_remote(ws.path, posixpath.join(DIR_RUNS, f"run_{time.strftime('%Y%m%dT%H%M%S')}")) + out_dir = join_remote(ws.path, DIR_OUT) + skills_dir = join_remote(ws.path, DIR_SKILLS) + work_dir = join_remote(ws.path, DIR_WORK) + + env: dict[str, str] = { + WORKSPACE_ENV_DIR_KEY: ws.path, + ENV_SKILLS_DIR: skills_dir, + ENV_WORK_DIR: work_dir, + ENV_OUTPUT_DIR: out_dir, + ENV_RUN_DIR: run_dir, + } + env.update(spec.env or {}) + + # Single shell pipeline: ensure run_dir + cwd exist, cd, exec command. + parts = [ + "set -e", + f"mkdir -p {shell_quote(run_dir)} {shell_quote(cwd)}", + f"cd {shell_quote(cwd)}", + ] + argv = [shell_quote(spec.cmd)] + [shell_quote(arg) for arg in (spec.args or [])] + parts.append(" ".join(argv)) + shell_cmd = "; ".join(parts) + + timeout = float(spec.timeout) if spec.timeout and spec.timeout > 0 else float(DEFAULT_TIMEOUT_SEC) + stdin_bytes = spec.stdin.encode("utf-8") if spec.stdin else None + + start = time.time() + result = await self._client.commands_run( + shell_cmd, + env=env, + stdin=stdin_bytes, + timeout=timeout, + ) + return WorkspaceRunResult( + stdout=result.stdout, + stderr=result.stderr, + exit_code=result.exit_code, + duration=time.time() - start, + ) + + +class CubeWorkspaceRuntime(BaseWorkspaceRuntime): + """Cube/E2B-backed workspace runtime. + + Depends only on the public :class:`CubeSandboxClient` primitive, not on + :class:`CubeCodeExecutor`. Use :func:`create_cube_workspace_runtime` + when you have an executor and want to share its sandbox; pass a client + directly when integrating with a non-executor caller. + """ + + def __init__( + self, + client: CubeSandboxClient, + *, + remote_workspace: str, + execute_timeout: float, + provider: Optional[RunEnvProvider] = None, + enable_provider_env: bool = False, + ): + self._client = client + self._fs = CubeWorkspaceFS(client, execute_timeout) + self._manager = CubeWorkspaceManager(client, remote_workspace, execute_timeout) + self._runner = CubeProgramRunner( + client, + execute_timeout, + provider=provider, + enable_provider_env=enable_provider_env, + ) + + @override + def manager(self, ctx: Optional[InvocationContext] = None) -> CubeWorkspaceManager: + return self._manager + + @override + def fs(self, ctx: Optional[InvocationContext] = None) -> CubeWorkspaceFS: + return self._fs + + @override + def runner(self, ctx: Optional[InvocationContext] = None) -> CubeProgramRunner: + return self._runner + + @override + def describe(self, ctx: Optional[InvocationContext] = None) -> WorkspaceCapabilities: + return WorkspaceCapabilities( + isolation="cube", + network_allowed=True, + read_only_mount=False, + streaming=False, + ) + + +def create_cube_workspace_runtime( + executor: CubeCodeExecutor, + *, + workspace_cfg: Optional[CubeWorkspaceRuntimeConfig] = None, + provider: Optional[RunEnvProvider] = None, + enable_provider_env: bool = False, +) -> CubeWorkspaceRuntime: + """Construct a :class:`CubeWorkspaceRuntime` sharing ``executor``'s sandbox. + + Convenience wrapper that: + + - reuses the live :class:`CubeSandboxClient` already opened by + ``executor`` (no second remote handshake), + - takes ``execute_timeout`` from ``executor.config`` (sandbox-wide + command timeout — naturally shared with the runtime), and + - takes workspace-only settings from ``workspace_cfg`` (defaulting + to :class:`CubeWorkspaceRuntimeConfig` defaults when omitted). + + For lower-level integrations, construct :class:`CubeWorkspaceRuntime` + directly with an explicit client + ``remote_workspace`` + + ``execute_timeout``. + """ + ws_cfg = workspace_cfg or CubeWorkspaceRuntimeConfig() + exec_cfg = executor.config + return CubeWorkspaceRuntime( + executor.sandbox_client, + remote_workspace=ws_cfg.remote_workspace, + execute_timeout=exec_cfg.execute_timeout, + provider=provider, + enable_provider_env=enable_provider_env, + ) diff --git a/trpc_agent_sdk/code_executors/cube/_sandbox.py b/trpc_agent_sdk/code_executors/cube/_sandbox.py new file mode 100644 index 0000000..856e751 --- /dev/null +++ b/trpc_agent_sdk/code_executors/cube/_sandbox.py @@ -0,0 +1,306 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Cube/E2B sandbox client. + +Owns the :class:`AsyncSandbox` lifetime and exposes the few primitives +the SDK code executor and workspace runtime are built on top of: + +- **Lifecycle** — :meth:`open_new`, :meth:`open_existing`, :meth:`close`, + :meth:`destroy`, :meth:`assert_running`, :meth:`set_timeout`. +- **Command execution** — :meth:`commands_run` (always returns a + structured :class:`CubeCommandResult`; non-zero exit codes never + raise). +- **File primitives** — :meth:`upload_path` / :meth:`download_path` + (auto-dispatch file vs directory; directories go through the tar + protocol in :mod:`._transfer`), plus + :meth:`read_file_bytes` / :meth:`write_file_bytes`. + +Pure path/quote helpers live in :mod:`._paths`. The tar-based directory +transfer protocol lives in :mod:`._transfer`. The e2b vendor seam +(lazy import + ``user=`` constant) lives in :mod:`._e2b`. This module +is intentionally the only place that holds an ``AsyncSandbox`` reference +and therefore is the only place that needs to absorb e2b's quirks +(``CommandExitException`` / ``"STOPPED"`` / +``SandboxNotFoundException``). +""" + +from __future__ import annotations + +import asyncio +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING +from typing import Any +from typing import Mapping +from typing import Optional + +from trpc_agent_sdk.log import logger + +from ._e2b import _GUEST_USER +from ._e2b import _import_e2b +from ._paths import wrap_stdin_heredoc +from ._transfer import OnExisting +from ._transfer import download_directory_via_tar +from ._transfer import reserve_local_destination +from ._transfer import upload_directory_via_tar +from ._types import CubeCodeExecutorConfig + +if TYPE_CHECKING: + from e2b_code_interpreter import AsyncSandbox + + +@dataclass +class CubeCommandResult: + """Structured result of a single command run inside the sandbox. + + Non-zero exit codes are returned, not raised. This intentionally + absorbs the e2b SDK's :class:`CommandExitException` so callers always + see a structured return value (matches the local/container + code-executor behavior). + """ + + stdout: str + stderr: str + exit_code: int + duration: float + + +class CubeSandboxClient: + """Thin public wrapper around an :class:`AsyncSandbox` with SDK semantics. + + Holds the lifetime of one Cube/E2B remote sandbox and exposes the + primitives :class:`CubeCodeExecutor` and :class:`CubeWorkspaceRuntime` + are built on top of. External adapters (e.g. hermes' ``HarnessSandbox``) + can also depend on this directly without pulling in the workspace + runtime contract. + + Semantics: + + - ``close()`` is a no-op (drops the local handle only). + - ``destroy()`` is the only place that calls ``kill()`` and tolerates + the "already STOPPED" / :class:`SandboxNotFoundException` + workarounds. + - ``commands_run()`` always returns a structured result; non-zero + exit codes never raise. + - ``upload_path`` / ``download_path`` auto-dispatch file vs directory + and preserve symlinks/perms via tar (see :mod:`._transfer`). + + Construct via :meth:`open_new` or :meth:`open_existing` rather than + the constructor directly. + """ + + def __init__(self, sandbox: "AsyncSandbox", *, idle_timeout: int, execute_timeout: float): + self._sbx: Optional["AsyncSandbox"] = sandbox + self._idle_timeout = idle_timeout + self._execute_timeout = execute_timeout + + @property + def sandbox_id(self) -> str: + sbx = self._require() + return sbx.sandbox_id + + @classmethod + async def open_new(cls, cfg: CubeCodeExecutorConfig) -> "CubeSandboxClient": + """Create a brand-new remote sandbox.""" + e2b = _import_e2b() + sbx = await e2b.AsyncSandbox.create( + template=cfg.resolve_template(), + api_url=cfg.resolve_api_url(), + api_key=cfg.resolve_api_key(), + timeout=cfg.idle_timeout, + ) + return cls(sbx, idle_timeout=cfg.idle_timeout, execute_timeout=cfg.execute_timeout) + + @classmethod + async def open_existing(cls, sandbox_id: str, cfg: CubeCodeExecutorConfig) -> "CubeSandboxClient": + """Attach to an existing remote sandbox and assert it is RUNNING. + + Raises: + SandboxNotFoundException: the sandbox is gone (caller decides + whether to clear its locator and recreate). + SandboxException: the sandbox is in a non-RUNNING state (e.g. + PAUSED); caller should not silently overwrite locator + state. + """ + e2b = _import_e2b() + sbx = await e2b.AsyncSandbox.connect( + sandbox_id, + api_url=cfg.resolve_api_url(), + api_key=cfg.resolve_api_key(), + ) + client = cls(sbx, idle_timeout=cfg.idle_timeout, execute_timeout=cfg.execute_timeout) + await client.assert_running() + return client + + def close(self) -> None: + """Drop the local sandbox handle. Never kills the remote sandbox.""" + self._sbx = None + + async def destroy(self) -> None: + """Explicitly kill the remote sandbox. + + Tolerates :class:`SandboxNotFoundException` (already gone) and + :class:`SandboxException` whose message contains ``"STOPPED"`` + (Cube refuses kill on already-stopped instances). Other errors + propagate. + """ + sbx = self._sbx + if sbx is None: + return + e2b = _import_e2b() + try: + await sbx.kill() + except e2b.SandboxNotFoundException as exc: + logger.info("Cube sandbox %s already gone during kill: %s", sbx.sandbox_id, exc) + except e2b.SandboxException as exc: + if "STOPPED" in str(exc): + logger.info("Cube sandbox %s already stopped during kill: %s", sbx.sandbox_id, exc) + else: + raise + finally: + self._sbx = None + + async def assert_running(self) -> None: + """Verify the sandbox is RUNNING; reject PAUSED and surface stale ids. + + - ``get_info`` raises :class:`SandboxNotFoundException` if + killed/expired. + - PAUSED state raises :class:`SandboxException` so callers do + not silently discard operator-managed pause state. + """ + sbx = self._require() + e2b = _import_e2b() + info = await sbx.get_info(request_timeout=self._execute_timeout) + if info.state != e2b.SandboxState.RUNNING: + raise e2b.SandboxException(f"Cube sandbox {sbx.sandbox_id} is in state {info.state.value!r}, " + f"expected {e2b.SandboxState.RUNNING.value!r}.") + + async def set_timeout(self, seconds: int) -> None: + """Best-effort idle-timeout renewal. + + ``seconds`` is integer because the underlying e2b ``set_timeout`` + takes integer seconds; previously a ``float`` would be silently + truncated by ``int(...)`` (e.g. ``0.9`` → ``0``, which most + vendor APIs interpret as "no timeout" / "expire immediately"). + """ + sbx = self._require() + try: + await sbx.set_timeout(seconds) + except Exception as exc: # pylint: disable=broad-exception-caught + logger.debug("Cube sandbox %s set_timeout failed: %s", sbx.sandbox_id, exc) + + async def commands_run( + self, + command: str, + *, + cwd: Optional[str] = None, + env: Optional[Mapping[str, str]] = None, + stdin: Optional[bytes] = None, + timeout: Optional[float] = None, + ) -> CubeCommandResult: + """Run a single shell command and return a structured result. + + Non-zero exit codes never raise. Stdin (when provided) is encoded + as a bash heredoc because the e2b SDK's ``stdin`` flag is not a + data channel. + """ + sbx = self._require() + e2b = _import_e2b() + if stdin is not None: + command = wrap_stdin_heredoc(command, stdin) + kwargs: dict[str, Any] = { + "envs": dict(env or {}), + "user": _GUEST_USER, + "timeout": float(timeout if timeout is not None else self._execute_timeout), + } + if cwd: + kwargs["cwd"] = cwd + + loop = asyncio.get_running_loop() + start = loop.time() + try: + result = await sbx.commands.run(command, **kwargs) + except e2b.CommandExitException as exc: + result = exc + duration = loop.time() - start + + await self.set_timeout(self._idle_timeout) + + return CubeCommandResult( + stdout=str(getattr(result, "stdout", "") or ""), + stderr=str(getattr(result, "stderr", "") or ""), + exit_code=int(getattr(result, "exit_code", 0) or 0), + duration=float(duration), + ) + + async def upload_path(self, local: Path, remote_abs: str) -> None: + """Upload a host file or directory to an absolute remote path. + + Directories go through the tar protocol so symlinks, permissions + and special files are preserved in one round-trip. Single files + and directories alike route through the client's own + :meth:`write_file_bytes` / :meth:`commands_run`, so all e2b + ``user=`` plumbing and ``CommandExitException`` absorption stays + DRY. + """ + if local.is_dir(): + await upload_directory_via_tar(self, local, remote_abs) + return + await self.write_file_bytes(remote_abs, local.read_bytes()) + + async def download_path( + self, + remote_abs: str, + local: Path, + *, + on_existing: OnExisting = "error", + ) -> None: + """Download a remote file or directory to a host path. + + Args: + remote_abs: Absolute remote path to download. + local: Host destination path. + on_existing: Collision policy when ``local`` already exists. + ``"error"`` (default) refuses to clobber; ``"replace"`` + removes the existing destination first; ``"merge"`` + overlays the tar payload onto an existing directory + (siblings not in the payload are preserved). For + file/symlink destinations ``"merge"`` behaves like + ``"replace"`` because a regular file cannot be merged + into. Missing destinations and empty directories are + accepted regardless of this flag. + """ + is_remote_dir = await self._is_remote_dir(remote_abs) + + reserve_local_destination(local, on_existing=on_existing) + local.parent.mkdir(parents=True, exist_ok=True) + if is_remote_dir: + await download_directory_via_tar(self, remote_abs, local) + return + local.write_bytes(await self.read_file_bytes(remote_abs)) + + async def read_file_bytes(self, remote_abs: str) -> bytes: + """Read a remote file's raw bytes.""" + sbx = self._require() + data = await sbx.files.read(remote_abs, format="bytes", user=_GUEST_USER) + return data if isinstance(data, bytes) else bytes(data or b"") + + async def write_file_bytes(self, remote_abs: str, data: bytes) -> None: + """Write raw bytes to a remote file.""" + sbx = self._require() + await sbx.files.write(remote_abs, data, user=_GUEST_USER) + + async def _is_remote_dir(self, remote_abs: str) -> bool: + """Return whether ``remote_abs`` resolves to a directory inside the sandbox.""" + sbx = self._require() + e2b = _import_e2b() + info = await sbx.files.get_info(remote_abs, user=_GUEST_USER) + return info.type == e2b.FileType.DIR + + def _require(self) -> "AsyncSandbox": + if self._sbx is None: + raise RuntimeError("CubeSandboxClient is closed.") + return self._sbx diff --git a/trpc_agent_sdk/code_executors/cube/_transfer.py b/trpc_agent_sdk/code_executors/cube/_transfer.py new file mode 100644 index 0000000..fc06a6a --- /dev/null +++ b/trpc_agent_sdk/code_executors/cube/_transfer.py @@ -0,0 +1,198 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Tar-based directory transfer protocol for the Cube package. + +Self-contained protocol layered on :class:`CubeSandboxClient`'s public +primitives (:meth:`commands_run`, :meth:`read_file_bytes`, +:meth:`write_file_bytes`). Used by :meth:`CubeSandboxClient.upload_path` +/ :meth:`download_path` to round-trip whole directory trees while +preserving symlinks, permissions, and special files (mirrors +:class:`ContainerWorkspaceFS` semantics). + +Kept separate from :mod:`._sandbox` so the client itself stays focused +on lifecycle/command/file primitives, and so this protocol can be unit +tested against a fake :class:`CubeSandboxClient`-shaped object that only +exposes ``commands_run`` / ``read_file_bytes`` / ``write_file_bytes``. + +This module deliberately does **not** import e2b — all vendor quirks +(``CommandExitException`` absorption, ``user=`` plumbing, idle-timeout +renewal) are absorbed inside :class:`CubeSandboxClient`. Any change to +those quirks happens in exactly one place. +""" + +from __future__ import annotations + +import io +import posixpath +import secrets +import shutil +import tarfile +from pathlib import Path +from typing import TYPE_CHECKING +from typing import Literal + +from ._paths import shell_quote + +if TYPE_CHECKING: + # Quoted forward reference to break the runtime import cycle: + # `_sandbox.py` imports the transfer functions, and the transfer + # functions in turn want the `CubeSandboxClient` *type* (for + # type-checkers / IDEs) but only its *duck-typed* surface at runtime. + from ._sandbox import CubeSandboxClient + +# Collision-handling mode for ``download_path`` when the local +# destination already exists. See :func:`reserve_local_destination`. +# +# - ``"error"`` — refuse to clobber (default). Non-empty dir / existing +# file / existing symlink → :class:`FileExistsError`. +# - ``"replace"``— remove the existing destination before extracting. +# Directories are ``shutil.rmtree``'d (symlinks are +# ``unlink``'d first to avoid following the link). +# - ``"merge"`` — overlay onto an existing directory: leave siblings +# intact and let the tar payload write its own entries +# on top. Existing files/symlinks at the destination +# name are still unlinked (you cannot merge into a +# regular file). +OnExisting = Literal["error", "replace", "merge"] + + +def reserve_local_destination( + local: Path, + *, + on_existing: OnExisting = "error", +) -> None: + """Enforce ``download_path``'s collision policy on the local target. + + - Missing destinations and empty directories are accepted regardless + of ``on_existing`` — there is no content to clobber. + - ``"error"`` (default) raises :class:`FileExistsError` when the + destination is a non-empty directory, a regular file, or any + symlink (including broken symlinks — the name is taken). + - ``"replace"`` removes the existing destination (``shutil.rmtree`` + for directories, ``unlink`` for files/symlinks) so the caller + extracts into a clean slot. + - ``"merge"`` leaves an existing non-empty directory in place so the + tar payload overlays its entries; for file/symlink destinations + it still unlinks because a regular file cannot be merged into. + """ + # Missing path: nothing to reserve. ``is_symlink()`` handles the + # broken-symlink case where ``exists()`` returns False but the name + # is still taken. + if not local.exists() and not local.is_symlink(): + return + + is_real_dir = local.is_dir() and not local.is_symlink() + if is_real_dir: + try: + next(local.iterdir()) + except StopIteration: + return + if on_existing == "error": + raise FileExistsError(f"download destination is non-empty " + f"(pass on_existing='replace' or 'merge' to resolve): {local}") + if on_existing == "replace": + shutil.rmtree(local) + # "merge": leave the directory in place; tar.extractall overlays. + return + + # File or symlink (regular file, symlink-to-file, symlink-to-dir, + # broken symlink). A regular file cannot be "merged" into — merge + # falls back to replace for non-dir destinations. + if on_existing == "error": + raise FileExistsError(f"download destination already exists " + f"(pass on_existing='replace' to overwrite): {local}") + local.unlink() + + +async def upload_directory_via_tar( + client: "CubeSandboxClient", + local_dir: Path, + remote_abs: str, +) -> None: + """Upload an entire host directory to ``remote_abs`` via tar. + + The whole tree (symlinks, permissions, special files) is preserved + in a single round-trip. Requires ``tar`` in the sandbox image (true + for any standard unix template). + """ + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w") as tar: + tar.add(str(local_dir), arcname=".") + payload = buf.getvalue() + + token = secrets.token_hex(8) + temp_remote = f"/tmp/.cube_upload_{token}.tar" + normalized = posixpath.normpath(remote_abs) + try: + await client.write_file_bytes(temp_remote, payload) + extract_cmd = (f"set -e; mkdir -p {shell_quote(normalized)}; " + f"tar -xf {shell_quote(temp_remote)} -C {shell_quote(normalized)}") + await _run_protocol_step(client, extract_cmd, op="upload tar extract") + finally: + await _run_protocol_step( + client, + f"rm -f {shell_quote(temp_remote)}", + op="upload tar cleanup", + swallow=True, + ) + + +async def download_directory_via_tar( + client: "CubeSandboxClient", + remote_dir: str, + local: Path, +) -> None: + """Download an entire remote directory tree to ``local`` via tar. + + Round-trip pair of :func:`upload_directory_via_tar`; symlinks, + permissions, and special files are preserved. + """ + token = secrets.token_hex(8) + temp_remote = f"/tmp/.cube_download_{token}.tar" + try: + create_cmd = f"tar -cf {shell_quote(temp_remote)} -C {shell_quote(remote_dir)} ." + await _run_protocol_step(client, create_cmd, op="download tar create") + payload = await client.read_file_bytes(temp_remote) + finally: + await _run_protocol_step( + client, + f"rm -f {shell_quote(temp_remote)}", + op="download tar cleanup", + swallow=True, + ) + + if local.exists() and not local.is_dir(): + local.unlink() + local.mkdir(parents=True, exist_ok=True) + with tarfile.open(fileobj=io.BytesIO(payload), mode="r") as tar: + try: + tar.extractall(local, filter="data") # type: ignore[arg-type] # py>=3.12 + except TypeError: + tar.extractall(local) # noqa: S202 — py3.10/3.11 fallback + + +async def _run_protocol_step( + client: "CubeSandboxClient", + command: str, + *, + op: str, + swallow: bool = False, +) -> None: + """Run a transfer-protocol shell step (mkdir/tar/rm) and surface failures. + + Goes through the client's :meth:`commands_run` so all e2b vendor + quirks (``CommandExitException`` absorption, ``user=`` plumbing, + idle-timeout renewal) are handled in exactly one place. Distinct + from :meth:`CubeSandboxClient.commands_run` only in that we *raise* + on non-zero exit instead of returning the structured result — + these commands are invariants of the transfer contract, so a failed + ``mkdir``/``tar`` means the transfer didn't happen and the caller + can't sensibly continue. ``swallow=True`` is reserved for + best-effort cleanup steps (rm-on-finally). + """ + result = await client.commands_run(command) + if result.exit_code != 0 and not swallow: + raise RuntimeError(f"cube {op} failed (exit={result.exit_code}): {result.stderr}") diff --git a/trpc_agent_sdk/code_executors/cube/_types.py b/trpc_agent_sdk/code_executors/cube/_types.py new file mode 100644 index 0000000..261649b --- /dev/null +++ b/trpc_agent_sdk/code_executors/cube/_types.py @@ -0,0 +1,121 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Configuration types for the Cube/E2B code executor.""" + +from __future__ import annotations + +import os +from dataclasses import dataclass +from typing import Optional + +DEFAULT_REMOTE_WORKSPACE = "/workspace/cube_agent" +DEFAULT_EXECUTE_TIMEOUT = 60.0 +DEFAULT_IDLE_TIMEOUT = 3600 + +ENV_API_URL = "E2B_API_URL" +ENV_API_KEY = "E2B_API_KEY" +ENV_TEMPLATE = "CUBE_TEMPLATE_ID" + + +@dataclass +class CubeCodeExecutorConfig: + """Configuration for :class:`CubeCodeExecutor`. + + Holds only the sandbox-lifecycle and command-execution settings the + bare code executor consumes. Workspace-runtime knobs (e.g. the + remote workspace root) live in :class:`CubeWorkspaceRuntimeConfig` + so executor-only callers never see fields they don't use (ISP). + + The optional ``e2b-code-interpreter`` dependency must be installed + (it transitively pulls in ``e2b``). Credentials may be supplied here + or through ``E2B_API_URL`` / ``E2B_API_KEY``. The Cube template id + may be supplied here or through ``CUBE_TEMPLATE_ID``. + """ + + template: Optional[str] = None + """Cube template id for new sandboxes. Falls back to ``CUBE_TEMPLATE_ID``.""" + + api_url: Optional[str] = None + """E2B-compatible Cube API URL. Falls back to ``E2B_API_URL``.""" + + api_key: Optional[str] = None + """E2B API key. Falls back to ``E2B_API_KEY``.""" + + sandbox_id: Optional[str] = None + """Existing remote sandbox id. When set, factories attach instead of create.""" + + execute_timeout: float = DEFAULT_EXECUTE_TIMEOUT + """Default per-command timeout in seconds. + + ``float`` because per-command latency can legitimately be sub-second + (short shell commands, tight test loops). Shared by the bare + executor and (transitively) by :class:`CubeWorkspaceRuntime`, since + the runtime drives commands through the same + :class:`CubeSandboxClient` and therefore inherits its default. Stays + on the executor cfg because the client itself reads it during + construction. + """ + + idle_timeout: int = DEFAULT_IDLE_TIMEOUT + """Sandbox idle lifetime in seconds; renewed on every command. + + ``int`` (not ``float``) because the underlying e2b APIs + (``AsyncSandbox.create(timeout=...)`` and ``sbx.set_timeout(...)``) + take integer seconds — sub-second precision is meaningless for a + sandbox lifetime measured in minutes/hours. Typing the field as + ``int`` lets static checkers reject ``idle_timeout=0.9`` at the call + site instead of silently truncating it to ``0`` (which most vendor + APIs interpret as "no timeout" or "expire immediately"). + """ + + def __post_init__(self) -> None: + if not isinstance(self.idle_timeout, int) or isinstance(self.idle_timeout, bool): + raise TypeError(f"idle_timeout must be an int (seconds), got " + f"{type(self.idle_timeout).__name__}: {self.idle_timeout!r}") + if self.idle_timeout < 1: + raise ValueError(f"idle_timeout must be >= 1 second, got {self.idle_timeout}") + if self.execute_timeout <= 0: + raise ValueError(f"execute_timeout must be > 0 seconds, got {self.execute_timeout}") + + def resolve_template(self) -> str: + value = self.template or os.getenv(ENV_TEMPLATE) + if not value: + raise ValueError(f"Cube sandbox requires `template` or {ENV_TEMPLATE} env.") + return value + + def resolve_api_url(self) -> str: + value = self.api_url or os.getenv(ENV_API_URL) + if not value: + raise ValueError(f"Cube sandbox requires `api_url` or {ENV_API_URL} env.") + return value + + def resolve_api_key(self) -> str: + value = self.api_key or os.getenv(ENV_API_KEY) + if not value: + raise ValueError(f"Cube sandbox requires `api_key` or {ENV_API_KEY} env.") + return value + + +@dataclass +class CubeWorkspaceRuntimeConfig: + """Configuration for :class:`CubeWorkspaceRuntime`. + + Carries the workspace-only settings the bare :class:`CubeCodeExecutor` + does not consume. Kept distinct from :class:`CubeCodeExecutorConfig` + so: + + - executor-only callers (e.g. an agent that just runs code blocks) + never see workspace knobs in their type signatures, and + - future workspace-only fields (``max_upload_size``, custom subdir + names, stage timeouts, ...) can be added here without polluting + the executor cfg. + """ + + remote_workspace: str = DEFAULT_REMOTE_WORKSPACE + """Remote root under which :class:`CubeWorkspaceManager` creates + ``ws__`` subtrees. Defaults to + :data:`DEFAULT_REMOTE_WORKSPACE`. + """ diff --git a/trpc_agent_sdk/code_executors/local/_local_ws_runtime.py b/trpc_agent_sdk/code_executors/local/_local_ws_runtime.py index 0cc6e70..a84e353 100644 --- a/trpc_agent_sdk/code_executors/local/_local_ws_runtime.py +++ b/trpc_agent_sdk/code_executors/local/_local_ws_runtime.py @@ -22,6 +22,7 @@ from pathlib import Path from typing import List from typing import Optional +from typing import Tuple from typing_extensions import override from trpc_agent_sdk.context import InvocationContext @@ -29,7 +30,6 @@ from .._artifacts import load_artifact_helper from .._artifacts import parse_artifact_ref -from .._artifacts import save_artifact_helper from .._base_workspace_runtime import BaseWorkspaceManager from .._base_workspace_runtime import BaseWorkspaceFS from .._base_workspace_runtime import BaseProgramRunner @@ -37,8 +37,6 @@ from .._base_workspace_runtime import RunEnvProvider from .._constants import DEFAULT_FILE_MODE -from .._constants import DEFAULT_MAX_FILES -from .._constants import DEFAULT_MAX_TOTAL_BYTES from .._constants import DEFAULT_TIMEOUT_SEC from .._constants import DIR_OUT from .._constants import DIR_RUNS @@ -48,7 +46,6 @@ from .._constants import ENV_RUN_DIR from .._constants import ENV_SKILLS_DIR from .._constants import ENV_WORK_DIR -from .._constants import MAX_READ_SIZE_BYTES from .._constants import WORKSPACE_ENV_DIR_KEY from .._types import CodeFile from .._types import WorkspaceInfo @@ -58,10 +55,11 @@ from .._types import WorkspaceRunResult from .._types import WorkspaceCapabilities from .._types import WorkspaceStageOptions -from .._types import ManifestFileRef from .._types import ManifestOutput from .._types import WorkspaceOutputSpec from .._program_session import BaseProgramSession +from ..utils import build_code_files +from ..utils import build_manifest_output from ..utils import ensure_layout from ..utils import load_metadata from ..utils import save_metadata @@ -69,7 +67,6 @@ from ..utils import OutputRecordMeta from ..utils import normalize_globs from ..utils import collect_files_with_glob -from ..utils import detect_content_type from ..utils import make_symlink from ..utils import copy_path from ..utils import path_join @@ -280,69 +277,88 @@ async def collect(self, Returns: List of matching file references """ - out = [] - root = Path(ws.path) - patterns = normalize_globs(patterns) + real_root, matches = self._enumerate_local_matches(ws.path, normalize_globs(patterns)) + return await build_code_files(real_root, matches, self._fetch_bytes) - # Canonicalize root + def _enumerate_local_matches( + self, + ws_path: str, + patterns: List[str], + ) -> Tuple[str, List[str]]: + """Expand ``patterns`` under ``ws_path`` into absolute paths. + + Resolves symlinks and drops anything that escapes the + canonicalised workspace root, preserving the security property + that the pre-refactor hand-written loop used to enforce. + + Returns ``(real_root, matches)`` where ``real_root`` is the + canonicalised workspace root and ``matches`` is the list of + canonical absolute paths under it. Both are passed to + :func:`build_code_files` / :func:`build_manifest_output` as a + matched pair so the helpers' prefix-stripping ``_relativize`` + operates on canonical-vs-canonical paths. Passing the raw + (un-resolved) ``ws.path`` would silently leak absolute paths as + ``CodeFile.name`` whenever ``ws.path`` itself contains a symlink + component (e.g. macOS ``/tmp`` → ``/private/tmp``, Linux scratch + bind-mounts), because the canonical match would not start with + the un-canonical prefix. + """ + root = Path(ws_path) try: real_root = root.resolve() except Exception: # pylint: disable=broad-except real_root = root + real_root_str = real_root.as_posix() - seen = set() - + seen: set[str] = set() + out: List[str] = [] for pattern in patterns: - matches = collect_files_with_glob(ws.path, pattern) - for match_path in matches: + for match_path in collect_files_with_glob(ws_path, pattern): m_abs = Path("/" + match_path.lstrip("/")) - # Ensure it is within root try: m_abs.relative_to(root) except ValueError: continue - - # Collapse symlinks to canonical path and deduplicate try: real_path = m_abs.resolve() except Exception: # pylint: disable=broad-except real_path = m_abs - - # Re-check containment against canonical root try: - name = str(real_path.relative_to(real_root)) + # Re-check containment against canonical root. + real_path.relative_to(real_root) except ValueError: continue - - if name in seen: + key = real_path.as_posix() + if key in seen: continue - - seen.add(name) - content, mime_type = self._read_limited(real_path) - - out.append(CodeFile( - name=name, - content=content, - mime_type=mime_type, - )) - - return out - - def _read_limited(self, path: Path) -> tuple[str, str]: - """Read file with size limit. - - Args: - path: Path to the file - - Returns: - The content and MIME type of the file. + seen.add(key) + out.append(key) + return real_root_str, out + + async def _fetch_bytes(self, full_path: str, max_bytes: int) -> tuple[bytes, int]: + """Fetcher contract for shared collection helpers. + + Reads up to ``max_bytes`` from ``full_path`` and reports the + on-disk size so the helpers can decide truncation flags without + needing a second ``stat`` call. + + On read failure this raises and lets + :func:`build_code_files` / :func:`build_manifest_output` + apply their shared ``application/octet-stream`` sentinel — the + pre-refactor ``_read_limited`` returned that MIME explicitly for + unreadable files, and we preserve that design intent by routing + through the shared helper's except branch instead of swallowing + the error here (which would pass an empty payload through the + happy-path MIME sniffer and mis-label an unreadable ``foo.json`` + as ``application/json``). """ + path = Path(full_path) try: - content = path.read_bytes()[:MAX_READ_SIZE_BYTES] - mime_type = detect_content_type(path, content) - return content.decode('utf-8', errors='ignore'), mime_type - except Exception: # pylint: disable=broad-except - return "", "application/octet-stream" + raw_size = path.stat().st_size + except OSError: + raw_size = 0 + data = path.read_bytes()[:max_bytes] + return data, max(raw_size, len(data)) @override async def stage_inputs( @@ -467,9 +483,11 @@ async def collect_outputs(self, ws: WorkspaceInfo, spec: WorkspaceOutputSpec, ctx: Optional[InvocationContext] = None) -> ManifestOutput: - """Collect outputs from the workspace.""" - """ - Implement declarative collector with limits. + """Collect outputs from the workspace. + + Implements the declarative collector with limits, inline and + save options, records an :class:`OutputRecordMeta` entry in the + workspace metadata. Args: ctx: Context for the operation @@ -481,73 +499,10 @@ async def collect_outputs(self, """ ensure_layout(ws.path) - max_files = spec.max_files or DEFAULT_MAX_FILES - max_file_bytes = spec.max_file_bytes or MAX_READ_SIZE_BYTES - max_total_bytes = spec.max_total_bytes or DEFAULT_MAX_TOTAL_BYTES - - left_total = max_total_bytes - globs = normalize_globs(spec.globs) - out = ManifestOutput() + real_root, matches = self._enumerate_local_matches(ws.path, normalize_globs(spec.globs)) + out, saved_names, saved_vers = await build_manifest_output(real_root, spec, matches, self._fetch_bytes, ctx) - saved_names = [] - saved_vers = [] - count = 0 - - for pattern in globs: - matches = collect_files_with_glob(ws.path, pattern) - - for match_path in matches: - if count >= max_files: - out.limits_hit = True - break - - m_abs = Path("/" + match_path.lstrip("/")) - # Ensure it is within workspace - try: - name = str(m_abs.relative_to(ws.path)) - except ValueError: - continue - - # Respect both per-file and total byte limits - limit = min(max_file_bytes, left_total) - - content, mime_type = self._read_limited_with_cap(m_abs, limit) - - # Mark limits hit when a file reached per-file cap - if len(content) >= max_file_bytes: - out.limits_hit = True - - left_total -= len(content) - count += 1 - - file_ref = ManifestFileRef( - name=name, - mime_type=mime_type, - ) - - if spec.inline: - file_ref.content = content - - if spec.save: - save_name = name - if spec.name_template: - save_name = spec.name_template + name - if not ctx: - raise ValueError("Context is required to save artifacts") - ver = await save_artifact_helper(ctx, save_name, content, mime_type) - # Placeholder for artifact saving - file_ref.saved_as = save_name - file_ref.version = ver - saved_names.append(save_name) - saved_vers.append(ver) - - out.files.append(file_ref) - - if left_total <= 0: - out.limits_hit = True - break - - # Record output + # Record output in workspace metadata (local-only bookkeeping). md = load_metadata(ws.path) md.outputs.append( OutputRecordMeta( @@ -561,25 +516,6 @@ async def collect_outputs(self, return out - def _read_limited_with_cap( - self, - path: Path, - cap_bytes: int, - ) -> tuple[str, str]: - """Read file with specific capacity limit.""" - if cap_bytes <= 0: - return "", "application/octet-stream" - - if cap_bytes > MAX_READ_SIZE_BYTES: - cap_bytes = MAX_READ_SIZE_BYTES - - try: - content = path.read_bytes()[:cap_bytes] - mime_type = detect_content_type(path, content) - return content.decode('utf-8', errors='ignore'), mime_type - except Exception: # pylint: disable=broad-except - return "", "application/octet-stream" - class LocalProgramRunner(BaseProgramRunner): """Local program runner for executing commands in skill workspaces.""" diff --git a/trpc_agent_sdk/code_executors/utils/__init__.py b/trpc_agent_sdk/code_executors/utils/__init__.py index f203777..a65ca74 100644 --- a/trpc_agent_sdk/code_executors/utils/__init__.py +++ b/trpc_agent_sdk/code_executors/utils/__init__.py @@ -10,6 +10,9 @@ """ from ._code_execution import CodeExecutionUtils +from ._collect import ManifestFetcher +from ._collect import build_code_files +from ._collect import build_manifest_output from ._files import collect_files_with_glob from ._files import copy_dir from ._files import copy_path @@ -31,6 +34,9 @@ __all__ = [ "CodeExecutionUtils", + "ManifestFetcher", + "build_code_files", + "build_manifest_output", "collect_files_with_glob", "copy_dir", "copy_path", diff --git a/trpc_agent_sdk/code_executors/utils/_collect.py b/trpc_agent_sdk/code_executors/utils/_collect.py new file mode 100644 index 0000000..3bc8c19 --- /dev/null +++ b/trpc_agent_sdk/code_executors/utils/_collect.py @@ -0,0 +1,234 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Shared "matches -> models" pipeline for workspace output collection. + +Every workspace backend (local / container / cube) has to walk a list of +matched file paths, read their bytes (with per-file and cumulative +caps), sniff the MIME type, optionally inline the content, and +optionally persist the bytes via the artifact service. The *how-to-read* +part is backend-specific (direct filesystem vs. docker ``get_archive`` +vs. Cube RPC), but everything after "we have the bytes" is identical. + +This module factors that shared tail into two small helpers: + +- :func:`build_code_files` — materialises a ``collect(...)`` call, which + historically returns :class:`CodeFile` and only caps per-file size. +- :func:`build_manifest_output` — materialises a ``collect_outputs(...)`` + call, which honours :class:`WorkspaceOutputSpec` (limits, inline, + save, name_template) and produces a :class:`ManifestOutput`. + +Backends supply a ``fetcher`` coroutine that knows how to fetch the +raw bytes of a single absolute path — bounded by an input byte budget — +plus the *raw* size of the file on the underlying medium. Returning +the raw size separately lets the shared helpers compute +``truncated`` / ``limits_hit`` without requiring the fetcher to read +past the budget. +""" + +from __future__ import annotations + +from typing import Awaitable +from typing import Callable +from typing import List +from typing import Optional +from typing import Tuple + +from trpc_agent_sdk.context import InvocationContext + +from .._artifacts import save_artifact_helper +from .._constants import DEFAULT_MAX_FILES +from .._constants import DEFAULT_MAX_TOTAL_BYTES +from .._constants import MAX_READ_SIZE_BYTES +from .._types import CodeFile +from .._types import ManifestFileRef +from .._types import ManifestOutput +from .._types import WorkspaceOutputSpec +from ._files import detect_content_type + +# A fetcher is an async callable ``(absolute_path, max_bytes) -> (data, raw_size)``. +# +# Contract: +# - ``data`` is the file's content truncated to at most ``max_bytes``. If the +# underlying medium cannot cheaply report the full size (e.g. a streaming +# read), the fetcher may return ``raw_size = len(data)``; callers that care +# about truncation must then treat ``len(data) == max_bytes`` as "possibly +# truncated". +# - ``raw_size`` is the size of the file on the underlying medium before any +# truncation, used only to decide ``truncated`` / ``limits_hit`` flags. +# - The fetcher must *not* raise for merely-empty files; it should raise only +# for genuine I/O errors so the backend can surface a meaningful message. +ManifestFetcher = Callable[[str, int], Awaitable[Tuple[bytes, int]]] + + +def _relativize(ws_path: str, full_path: str) -> str: + """Return ``full_path`` stripped of the ``ws.path + "/"`` prefix. + + Kept as a single helper so every backend produces identical relative + paths in :class:`CodeFile` / :class:`ManifestFileRef`. + """ + prefix = ws_path.rstrip("/") + "/" + if full_path.startswith(prefix): + return full_path[len(prefix):] + return full_path + + +async def build_code_files( + ws_path: str, + matches: List[str], + fetcher: ManifestFetcher, + *, + max_read_size: Optional[int] = None, +) -> List[CodeFile]: + """Materialise a :meth:`BaseWorkspaceFS.collect` call. + + Reads each matched path with a single per-file byte cap + (``max_read_size``, defaulting to :data:`MAX_READ_SIZE_BYTES` resolved + at call time so tests can ``monkeypatch.setattr`` the constant), + sniffs the MIME type, and wraps the result in a :class:`CodeFile`. + Duplicate ``rel`` paths are skipped so callers can pass the raw glob + output without pre-deduping. + """ + cap = MAX_READ_SIZE_BYTES if max_read_size is None else max_read_size + seen: set[str] = set() + out: List[CodeFile] = [] + for full_path in matches: + rel = _relativize(ws_path, full_path) + if rel in seen: + continue + seen.add(rel) + try: + data, raw_size = await fetcher(full_path, cap) + except Exception: # pylint: disable=broad-except + # Keep collect() best-effort: a single unreadable file must + # not abort the whole batch. Backends that prefer strict + # semantics can short-circuit themselves before calling us. + out.append(CodeFile(name=rel, content="", mime_type="application/octet-stream")) + continue + mime = detect_content_type(full_path, data) + out.append( + CodeFile( + name=rel, + content=data.decode("utf-8", errors="replace"), + mime_type=mime, + size_bytes=raw_size, + truncated=raw_size > len(data), + )) + return out + + +async def build_manifest_output( + ws_path: str, + spec: WorkspaceOutputSpec, + matches: List[str], + fetcher: ManifestFetcher, + ctx: Optional[InvocationContext], + *, + strict_truncated_save: bool = False, +) -> Tuple[ManifestOutput, List[str], List[int]]: + """Materialise a :meth:`BaseWorkspaceFS.collect_outputs` call. + + Applies ``spec``'s limits (``max_files`` / ``max_file_bytes`` / + ``max_total_bytes``), fills ``inline`` / ``save`` branches, and + produces a :class:`ManifestOutput`. Also returns the list of saved + artifact names and versions so backends that record metadata (e.g. + local's ``OutputRecordMeta``) don't need to re-scan the manifest. + + Args: + ws_path: Absolute workspace path, used to produce relative + ``name`` fields. + spec: The output spec declared by the caller. + matches: Absolute paths already filtered by the backend's glob. + fetcher: Async callable that returns ``(data, raw_size)`` for a + path, capped by a requested byte budget. See + :data:`ManifestFetcher`. + ctx: Invocation context. Required when ``spec.save`` is set, + because artifact persistence goes through it. + strict_truncated_save: When ``True``, raise ``RuntimeError`` if + ``spec.save`` is requested for a file that was truncated by + the per-file cap. Container preserves this "refuse to save + half a binary" behaviour; local/cube historically allow it. + + Returns: + Tuple of ``(manifest, saved_names, saved_versions)``. + """ + max_files = spec.max_files or DEFAULT_MAX_FILES + max_file_bytes = spec.max_file_bytes or MAX_READ_SIZE_BYTES + max_total = spec.max_total_bytes or DEFAULT_MAX_TOTAL_BYTES + + manifest = ManifestOutput() + saved_names: List[str] = [] + saved_versions: List[int] = [] + + seen: set[str] = set() + total_bytes = 0 + count = 0 + + for full_path in matches: + # Check limits *before* fetching so a blown budget doesn't cause + # a useless read of the next big file. + if count >= max_files or total_bytes >= max_total: + manifest.limits_hit = True + break + + rel = _relativize(ws_path, full_path) + if rel in seen: + continue + seen.add(rel) + + # Per-file cap is ``max_file_bytes``, but also clamp to the + # remaining total budget so a single huge file cannot exceed + # ``max_total`` all on its own. + remaining_total = max_total - total_bytes + read_budget = min(max_file_bytes, remaining_total) + if read_budget <= 0: + manifest.limits_hit = True + break + + try: + data, raw_size = await fetcher(full_path, read_budget) + except Exception: # pylint: disable=broad-except + # Mirror ``build_code_files``: a single unreadable file must + # not abort the whole collection. Emit a sentinel entry with + # empty content and the canonical "unknown / unreadable" + # MIME type. This preserves the pre-refactor local behaviour + # (``_read_limited_with_cap`` caught and returned + # ``("", "application/octet-stream")``) and is a small + # tolerance upgrade for the container backend, which used to + # abort on the first transient tar error. + manifest.files.append(ManifestFileRef(name=rel, mime_type="application/octet-stream")) + count += 1 + continue + + # Mark limits_hit if either cap actually bit. + if raw_size > len(data): + manifest.limits_hit = True + + truncated = raw_size > len(data) + if truncated and spec.save and strict_truncated_save: + raise RuntimeError(f"cannot save truncated output file: {rel}") + + total_bytes += len(data) + count += 1 + + mime = detect_content_type(full_path, data) + file_ref = ManifestFileRef(name=rel, mime_type=mime) + + if spec.inline: + file_ref.content = data.decode("utf-8", errors="replace") + + if spec.save: + if ctx is None: + raise ValueError("Context is required to save artifacts") + save_name = (spec.name_template + rel) if spec.name_template else rel + version = await save_artifact_helper(ctx, save_name, data, mime) + file_ref.saved_as = save_name + file_ref.version = version + saved_names.append(save_name) + saved_versions.append(version) + + manifest.files.append(file_ref) + + return manifest, saved_names, saved_versions From 3ebc4c8e1e175e6082d395de17029a2e29e9a8dd Mon Sep 17 00:00:00 2001 From: yuyili Date: Thu, 7 May 2026 19:53:28 +0800 Subject: [PATCH 2/7] =?UTF-8?q?refactor:=20=E6=8C=89=20code=20review=20?= =?UTF-8?q?=E9=87=8D=E6=9E=84=20code=5Fexecutors=EF=BC=8C=E5=BC=BA?= =?UTF-8?q?=E5=8C=96=20cube=20=E5=AD=90=E5=8C=85=E7=9A=84=E4=BE=9D?= =?UTF-8?q?=E8=B5=96=E5=A5=91=E7=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 主要变更(按评审意见整理): - code_executors 父包不再 lazy 重导出 Cube 符号;业务方需显式 `from trpc_agent_sdk.code_executors.cube import ...`,让 [cube] 依赖在调用点显性暴露,避免错把可选依赖伪装成核心依赖。 - 移除 cube 子包"未装 [cube] 也能 import"的隐含契约: `cube/_e2b.py` 删除,`_sandbox.py` / `_code_executor.py` 改为顶层 `import e2b_code_interpreter as e2b`;缺失 extra 时直接以 ImportError 在 import 处失败,而非延迟到 sandbox 打开时。 - 工作空间收集流水线:删除 utils/_collect.py,将 `build_code_files` / `build_manifest_output` 上提为 `BaseWorkspaceFS._build_code_files` / `._build_manifest_output` 保护静态方法;local/container/cube 三个后端通过 self.* 调用, 子类可直接复用或重写。`max_read_size` 由 `Optional[int]=None` 改为 `int = MAX_READ_SIZE_BYTES`,签名更直接。 - `CubeCodeExecutor.code_block_delimiters` 改用 `default_factory` 复用基类默认值并追加 bash fence,避免重复声明 tool_code/python; 父类未来扩展默认列表时自动跟随。 - `cube/_runtime.py::_copy_remote`:rm -rf 调用加 Safety 注释, 记录 normalize_remote_relative + join_remote + shell_quote + GNU rm `--preserve-root` 的多层校验不变量,作为后续维护合同。 测试同步: - 新增 tests/code_executors/test_base_workspace_fs_collect.py,针对 BaseWorkspaceFS 上的保护方法测试,并新增子类覆写用例。 - tests/code_executors/cube/conftest.py 重写 `fake_e2b` fixture, patch `_sandbox.e2b` / `_code_executor.e2b` 模块级符号。 - 删除 test_e2b.py、test_package_lazy_import.py,新增 test_package_imports.py(保留父包 non-reexport + 父包 import 不触发 e2b 的两条契约)。 - 其余测试更新 monkeypatch 目标到新的实现位置。 Assisted-by: Cursor:claude-opus-4.7 Co-authored-by: Cursor --- examples/code_executors/cube_demo.py | 6 +- tests/code_executors/cube/conftest.py | 57 +++-- tests/code_executors/cube/test_bug_hunt.py | 74 ++++++ tests/code_executors/cube/test_e2b.py | 54 ---- .../cube/test_package_imports.py | 103 ++++++++ .../cube/test_package_lazy_import.py | 160 ------------ tests/code_executors/cube/test_runtime.py | 20 +- tests/code_executors/cube/test_sandbox.py | 8 +- .../local/test_local_ws_runtime.py | 4 +- ...t.py => test_base_workspace_fs_collect.py} | 186 +++++++++----- tests/code_executors/test_types.py | 37 +-- trpc_agent_sdk/code_executors/__init__.py | 41 --- .../code_executors/_base_workspace_runtime.py | 223 +++++++++++++++++ trpc_agent_sdk/code_executors/_types.py | 2 +- .../container/_container_ws_runtime.py | 6 +- .../code_executors/cube/__init__.py | 10 +- .../code_executors/cube/_code_executor.py | 47 +++- trpc_agent_sdk/code_executors/cube/_e2b.py | 42 ---- .../code_executors/cube/_runtime.py | 21 +- .../code_executors/cube/_sandbox.py | 89 +++++-- .../code_executors/local/_local_ws_runtime.py | 14 +- .../code_executors/utils/__init__.py | 6 - .../code_executors/utils/_collect.py | 234 ------------------ 23 files changed, 741 insertions(+), 703 deletions(-) delete mode 100644 tests/code_executors/cube/test_e2b.py create mode 100644 tests/code_executors/cube/test_package_imports.py delete mode 100644 tests/code_executors/cube/test_package_lazy_import.py rename tests/code_executors/{utils/test_collect.py => test_base_workspace_fs_collect.py} (65%) delete mode 100644 trpc_agent_sdk/code_executors/cube/_e2b.py delete mode 100644 trpc_agent_sdk/code_executors/utils/_collect.py diff --git a/examples/code_executors/cube_demo.py b/examples/code_executors/cube_demo.py index 0a7d4d8..e543d96 100644 --- a/examples/code_executors/cube_demo.py +++ b/examples/code_executors/cube_demo.py @@ -34,9 +34,9 @@ import os import sys -from trpc_agent_sdk.code_executors import CubeCodeExecutor -from trpc_agent_sdk.code_executors import CubeCodeExecutorConfig -from trpc_agent_sdk.code_executors import create_cube_workspace_runtime +from trpc_agent_sdk.code_executors.cube import CubeCodeExecutor +from trpc_agent_sdk.code_executors.cube import CubeCodeExecutorConfig +from trpc_agent_sdk.code_executors.cube import create_cube_workspace_runtime from trpc_agent_sdk.code_executors._types import CodeBlock from trpc_agent_sdk.code_executors._types import CodeExecutionInput from trpc_agent_sdk.code_executors._types import WorkspaceOutputSpec diff --git a/tests/code_executors/cube/conftest.py b/tests/code_executors/cube/conftest.py index db4c0e4..dedd2b3 100644 --- a/tests/code_executors/cube/conftest.py +++ b/tests/code_executors/cube/conftest.py @@ -5,11 +5,18 @@ # tRPC-Agent-Python is licensed under Apache-2.0. """Shared fixtures for the cube/ test suite. -Exposes a ``fake_e2b`` fixture that patches -:func:`trpc_agent_sdk.code_executors.cube._e2b._import_e2b` to return a -fake vendor module with stub classes / enums that match the surface the -production code consults. This keeps the whole test suite independent -of the real ``e2b-code-interpreter`` dependency. +Production code does ``import e2b_code_interpreter as e2b`` at module +top-level in :mod:`trpc_agent_sdk.code_executors.cube._sandbox` and +:mod:`trpc_agent_sdk.code_executors.cube._code_executor`. Tests still +need to swap that vendor surface out for a fake — both to avoid talking +to a real Cube server and to inject precise exception types — so the +``fake_e2b`` fixture monkeypatches the ``e2b`` symbol in *both* importer +modules' globals (NOT ``sys.modules['e2b_code_interpreter']``, which +would only affect callers that re-import the package after the patch). + +The fake mirrors just the surface the production code actually +consults: a handful of vendor exceptions, the ``SandboxState`` / +``FileType`` enums, and a placeholder ``AsyncSandbox``. """ from __future__ import annotations @@ -44,11 +51,25 @@ def __init__(self, stdout: str = "", stderr: str = "", exit_code: int = 1): self.exit_code = exit_code +class _FakeTimeoutException(Exception): + """Mirrors e2b_code_interpreter.TimeoutException. + + The real vendor message is long and prescriptive ("passing 'timeout' + when making the request", "Use '0' to disable"). Production code + catches this type in :meth:`CubeSandboxClient.commands_run` and + rewrites it into a structured ``CubeCommandResult(timed_out=True)``, + so what actually gets surfaced to callers never contains this + message. The fake keeps the *type* precise while leaving the message + empty so tests can assert on the translated shape. + """ + + def _make_fake_e2b() -> SimpleNamespace: ns = SimpleNamespace() ns.SandboxException = _FakeSandboxException ns.SandboxNotFoundException = _FakeSandboxNotFoundException ns.CommandExitException = _FakeCommandExitException + ns.TimeoutException = _FakeTimeoutException ns.SandboxState = SimpleNamespace( RUNNING=SimpleNamespace(value="running"), PAUSED=SimpleNamespace(value="paused"), @@ -61,23 +82,22 @@ def _make_fake_e2b() -> SimpleNamespace: @pytest.fixture def fake_e2b(monkeypatch): - """Patch ``_import_e2b`` everywhere the cube package imports it.""" + """Swap ``e2b_code_interpreter`` for a fake in every cube import site. + + Production code does ``import e2b_code_interpreter as e2b`` at the + top of ``_sandbox.py`` and ``_code_executor.py``, which binds an + ``e2b`` name in those modules' globals. We patch each of those + bindings independently (rather than ``sys.modules``) so already- + executed ``from … import e2b`` statements see the fake. + """ ns = _make_fake_e2b() - # The production code does ``from ._e2b import _import_e2b`` in - # _sandbox.py and _code_executor.py, which rebinds the symbol in - # those modules' globals — so we must patch every import site, not - # just the original definition. - monkeypatch.setattr( - "trpc_agent_sdk.code_executors.cube._e2b._import_e2b", - lambda: ns, - ) monkeypatch.setattr( - "trpc_agent_sdk.code_executors.cube._sandbox._import_e2b", - lambda: ns, + "trpc_agent_sdk.code_executors.cube._sandbox.e2b", + ns, ) monkeypatch.setattr( - "trpc_agent_sdk.code_executors.cube._code_executor._import_e2b", - lambda: ns, + "trpc_agent_sdk.code_executors.cube._code_executor.e2b", + ns, ) return ns @@ -92,7 +112,6 @@ def _make_fake_async_sandbox(sandbox_id: str = "sbx-1"): sbx.sandbox_id = sandbox_id sbx.kill = AsyncMock(return_value=None) sbx.set_timeout = AsyncMock(return_value=None) - # get_info returns a state holder by default; tests override. info = SimpleNamespace(state=SimpleNamespace(value="running")) sbx.get_info = AsyncMock(return_value=info) sbx.commands = MagicMock() diff --git a/tests/code_executors/cube/test_bug_hunt.py b/tests/code_executors/cube/test_bug_hunt.py index b00d58f..a2c54f9 100644 --- a/tests/code_executors/cube/test_bug_hunt.py +++ b/tests/code_executors/cube/test_bug_hunt.py @@ -556,3 +556,77 @@ async def test_bug11_copy_remote_issues_rm_before_cp(mock_client): cp_idx = next(i for i, c in enumerate(cmds) if c.startswith("cp -a")) assert rm_idx < cp_idx, "rm must precede cp" + + + + +@pytest.mark.asyncio +async def test_bug12_commands_run_translates_timeout_to_structured_result( + fake_e2b, fake_async_sandbox, +): + """TimeoutException at the e2b boundary must become a CubeCommandResult.""" + fake_async_sandbox.commands.run = AsyncMock( + side_effect=fake_e2b.TimeoutException() + ) + client = CubeSandboxClient( + fake_async_sandbox, idle_timeout=60, execute_timeout=30.0, + ) + result = await client.commands_run("sleep 9999", timeout=1.5) + assert isinstance(result, CubeCommandResult) + assert result.timed_out is True, "timed_out flag must be set" + assert result.exit_code == -1, ( + f"exit_code on timeout must be -1 (matches local/container " + f"executors); got {result.exit_code}" + ) + assert result.stdout == "", f"stdout must be empty on timeout: {result.stdout!r}" + # The rewritten stderr is short and hand-written. Importantly, it does + # NOT contain the e2b vendor boilerplate. + assert "timed out" in result.stderr.lower(), ( + f"stderr must describe the timeout: {result.stderr!r}" + ) + assert "1.5" in result.stderr, ( + f"stderr must mention the configured timeout value: {result.stderr!r}" + ) + for leaked in ("passing 'timeout'", "context deadline exceeded", "Use '0'"): + assert leaked not in result.stderr, ( + f"vendor message leaked into stderr: {leaked!r} in {result.stderr!r}" + ) + + +@pytest.mark.asyncio +async def test_bug12_execute_code_surfaces_deadline_exceeded_outcome( + fake_e2b, fake_async_sandbox, +): + """Timeout must appear as OUTCOME_DEADLINE_EXCEEDED, not a raised exception.""" + from trpc_agent_sdk.code_executors._types import ( + CodeBlock, + CodeExecutionInput, + Outcome, + ) + from trpc_agent_sdk.code_executors.cube._code_executor import CubeCodeExecutor + + fake_async_sandbox.commands.run = AsyncMock( + side_effect=fake_e2b.TimeoutException() + ) + client = CubeSandboxClient( + fake_async_sandbox, idle_timeout=60, execute_timeout=2.0, + ) + cfg = CubeCodeExecutorConfig( + template="t", api_url="u", api_key="k", + idle_timeout=60, execute_timeout=2.0, + ) + executor = CubeCodeExecutor(client, cfg) + + # execute_code MUST return a result, not raise. + result = await executor.execute_code( + invocation_context=None, # type: ignore[arg-type] + code_execution_input=CodeExecutionInput( + code_blocks=[CodeBlock(code="import time; time.sleep(9999)", language="python")], + ), + ) + assert result.outcome == Outcome.OUTCOME_DEADLINE_EXCEEDED, ( + f"expected OUTCOME_DEADLINE_EXCEEDED, got {result.outcome}" + ) + assert "timed out" in result.output.lower(), ( + f"output must mention the timeout: {result.output!r}" + ) diff --git a/tests/code_executors/cube/test_e2b.py b/tests/code_executors/cube/test_e2b.py deleted file mode 100644 index bf4afd6..0000000 --- a/tests/code_executors/cube/test_e2b.py +++ /dev/null @@ -1,54 +0,0 @@ -# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. -# -# Copyright (C) 2026 Tencent. All rights reserved. -# -# tRPC-Agent-Python is licensed under Apache-2.0. -"""Unit tests for trpc_agent_sdk.code_executors.cube._e2b.""" - -from __future__ import annotations - -import builtins -import sys -from types import SimpleNamespace - -import pytest - - -def test_guest_user_is_root(): - from trpc_agent_sdk.code_executors.cube import _e2b - # Downstream hermes adapters rely on `root`; changing it silently - # would break file-upload permissions across the fleet. - assert _e2b._GUEST_USER == "root" - - -def test_install_hint_mentions_cube_extra(): - from trpc_agent_sdk.code_executors.cube import _e2b - assert "trpc-agent-py[cube]" in _e2b._E2B_INSTALL_HINT - - -def test_import_e2b_returns_module_when_present(monkeypatch): - """When ``e2b_code_interpreter`` is importable, return it verbatim.""" - fake_mod = SimpleNamespace(AsyncSandbox=object()) - monkeypatch.setitem(sys.modules, "e2b_code_interpreter", fake_mod) - from trpc_agent_sdk.code_executors.cube._e2b import _import_e2b - assert _import_e2b() is fake_mod - - -def test_import_e2b_raises_import_error_when_missing(monkeypatch): - """When the extra is not installed, raise ImportError with install hint.""" - # Scrub any cached import first. - monkeypatch.delitem(sys.modules, "e2b_code_interpreter", raising=False) - - # Force the import to fail at the builtin layer. - original_import = builtins.__import__ - - def failing_import(name, *args, **kwargs): - if name == "e2b_code_interpreter": - raise ImportError("not installed in this venv") - return original_import(name, *args, **kwargs) - - monkeypatch.setattr(builtins, "__import__", failing_import) - - from trpc_agent_sdk.code_executors.cube._e2b import _import_e2b - with pytest.raises(ImportError, match=r"trpc-agent-py\[cube\]"): - _import_e2b() diff --git a/tests/code_executors/cube/test_package_imports.py b/tests/code_executors/cube/test_package_imports.py new file mode 100644 index 0000000..fec084b --- /dev/null +++ b/tests/code_executors/cube/test_package_imports.py @@ -0,0 +1,103 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Unit tests for the cube subpackage import surface. + +The Cube/E2B backend is shipped as the optional ``[cube]`` extra and +requires ``e2b-code-interpreter`` at import time. Two contracts are +pinned here: + +1. The parent ``trpc_agent_sdk.code_executors`` package intentionally + does NOT re-export Cube symbols. Re-exporting optional-dependency + symbols would silently force every importer of the parent package + to install ``[cube]``; instead, business code that genuinely needs + the Cube backend imports the subpackage directly: + + from trpc_agent_sdk.code_executors.cube import CubeCodeExecutor + +2. As a corollary, importing the parent package alone does NOT pull in + ``e2b_code_interpreter`` — the parent's ``__init__.py`` deliberately + does not reference ``trpc_agent_sdk.code_executors.cube``. (Importing + the cube subpackage itself, by contrast, does eagerly import the + vendor SDK; that is the explicit cost of opting into the [cube] + backend.) + +Tests that need a **cold** ``sys.modules`` state are run in a subprocess +so they never corrupt the in-process module cache (which is shared +across the whole test session). +""" + +from __future__ import annotations + +import subprocess +import sys +import textwrap + +import pytest + + +def _run_isolated(script: str) -> subprocess.CompletedProcess: + return subprocess.run( + [sys.executable, "-c", textwrap.dedent(script)], + capture_output=True, + text=True, + check=False, + ) + + +def test_parent_package_does_not_reexport_cube_symbols(): + """Cube symbols must NOT be reachable from the parent package. + + Re-exporting optional-dependency symbols from the eager package + would silently make ``[cube]`` mandatory for everyone who imports + ``code_executors``. Force callers to make the dependency explicit + by importing from the subpackage. + """ + from trpc_agent_sdk import code_executors as ce + cube_symbols = ( + "CubeCodeExecutor", + "CubeCodeExecutorConfig", + "CubeCommandResult", + "CubeProgramRunner", + "CubeSandboxClient", + "CubeWorkspaceFS", + "CubeWorkspaceManager", + "CubeWorkspaceRuntime", + "CubeWorkspaceRuntimeConfig", + "OnExisting", + "create_cube_workspace_runtime", + ) + for name in cube_symbols: + assert name not in ce.__all__, f"{name!r} leaked into parent __all__" + with pytest.raises(AttributeError): + getattr(ce, name) + + +def test_parent_package_import_does_not_touch_e2b(): + """Plain ``import code_executors`` does NOT import e2b_code_interpreter. + + The parent package never references the cube subpackage, so + importing it must stay cheap and dependency-free even when the + [cube] extra is installed in the same environment. + + Run in a subprocess so the main test session's module cache cannot + mask the behaviour. + """ + result = _run_isolated(""" + import sys + import trpc_agent_sdk.code_executors # noqa: F401 + assert "e2b_code_interpreter" not in sys.modules, \ + "bare import of code_executors pulled in e2b_code_interpreter" + print("OK") + """) + assert result.returncode == 0, result.stderr + assert "OK" in result.stdout + + +def test_cube_subpackage_reexports_public_api(): + """Every entry on the cube subpackage's ``__all__`` must resolve.""" + from trpc_agent_sdk.code_executors import cube + for name in cube.__all__: + assert hasattr(cube, name), f"{name} missing from cube/__init__.py" diff --git a/tests/code_executors/cube/test_package_lazy_import.py b/tests/code_executors/cube/test_package_lazy_import.py deleted file mode 100644 index 037a234..0000000 --- a/tests/code_executors/cube/test_package_lazy_import.py +++ /dev/null @@ -1,160 +0,0 @@ -# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. -# -# Copyright (C) 2026 Tencent. All rights reserved. -# -# tRPC-Agent-Python is licensed under Apache-2.0. -"""Unit tests for the PEP 562 lazy-import wiring in the cube package. - -The commit promises that -``import trpc_agent_sdk.code_executors`` does NOT require the optional -``[cube]`` extra. Accessing a Cube symbol triggers lazy loading but -still defers the ``e2b-code-interpreter`` import until -``CubeCodeExecutor.create`` / ``.attach`` actually wants to talk to a -sandbox. - -Tests that need a **cold** ``sys.modules`` state are run in a subprocess -so they never corrupt the in-process module cache (which is shared -across the whole test session and is what makes other tests' -``monkeypatch`` calls resolve). -""" - -from __future__ import annotations - -import subprocess -import sys -import textwrap - -import pytest - - -def _run_isolated(script: str) -> subprocess.CompletedProcess: - return subprocess.run( - [sys.executable, "-c", textwrap.dedent(script)], - capture_output=True, - text=True, - check=False, - ) - - -def test_all_lists_all_lazy_symbols(): - """``__all__`` must include every element of ``_CUBE_LAZY_ATTRS``.""" - from trpc_agent_sdk import code_executors as ce - for name in ce._CUBE_LAZY_ATTRS: - assert name in ce.__all__, f"{name!r} missing from __all__" - - -def test_dir_contains_lazy_symbols(): - from trpc_agent_sdk import code_executors as ce - d = dir(ce) - for name in ce._CUBE_LAZY_ATTRS: - assert name in d - - -def test_unknown_attribute_raises(): - from trpc_agent_sdk import code_executors as ce - with pytest.raises(AttributeError, match="definitely_not_a_thing"): - _ = ce.definitely_not_a_thing - - -def test_lazy_attribute_access_populates_globals(): - """After first access the lazy symbol is cached in the module's globals().""" - # Isolated subprocess so we get a cold module cache. - result = _run_isolated(""" - import sys - import trpc_agent_sdk.code_executors as ce - # First access triggers __getattr__. - cls1 = ce.CubeCodeExecutorConfig - # After first access, ce.__dict__ holds the symbol. - assert "CubeCodeExecutorConfig" in ce.__dict__ - cls2 = ce.CubeCodeExecutorConfig - assert cls1 is cls2 - print("OK") - """) - assert result.returncode == 0, result.stderr - assert "OK" in result.stdout - - -def test_import_does_not_touch_e2b(): - """Plain import of ``code_executors`` does NOT import e2b_code_interpreter. - - This is the core promise of the lazy wiring. Run in a subprocess so - the main test session's module cache cannot mask the behaviour. - """ - result = _run_isolated(""" - import sys - import trpc_agent_sdk.code_executors # noqa: F401 - assert "e2b_code_interpreter" not in sys.modules, \ - "bare import pulled in e2b_code_interpreter" - # Sub-package cube/ may or may not be imported yet — the contract - # is only that e2b is not. - print("OK") - """) - assert result.returncode == 0, result.stderr - assert "OK" in result.stdout - - -def test_cube_subpackage_import_does_not_touch_e2b(): - """Even importing the ``cube`` subpackage is e2b-free. - - ``_import_e2b`` is wrapped in a function; it only runs when we open - a real sandbox. Just importing the package must not trigger it. - """ - result = _run_isolated(""" - import sys - import trpc_agent_sdk.code_executors.cube as cube # noqa: F401 - assert "e2b_code_interpreter" not in sys.modules, \ - "importing cube pulled in e2b_code_interpreter" - print("OK") - """) - assert result.returncode == 0, result.stderr - assert "OK" in result.stdout - - -def test_config_types_usable_without_e2b(): - """`CubeCodeExecutorConfig` can be constructed without the [cube] extra.""" - result = _run_isolated(""" - import sys - import trpc_agent_sdk.code_executors as ce - cfg = ce.CubeCodeExecutorConfig(template="t", api_url="u", api_key="k") - assert cfg.template == "t" - assert "e2b_code_interpreter" not in sys.modules - print("OK") - """) - assert result.returncode == 0, result.stderr - assert "OK" in result.stdout - - -def test_cube_subpackage_reexports_public_api(): - """Every entry on the cube subpackage's ``__all__`` must resolve.""" - from trpc_agent_sdk.code_executors import cube - for name in cube.__all__: - assert hasattr(cube, name), f"{name} missing from cube/__init__.py" - - -def test_subpackage_all_matches_parent_lazy_set(): - """Parent-package lazy set must match the subpackage ``__all__``. - - BUG PROBE: if a symbol is added to ``cube/__init__.py`` but forgotten - in ``code_executors/__init__.py`` lazy wiring (or vice versa), this - test catches the drift. - """ - from trpc_agent_sdk.code_executors import cube as sub - from trpc_agent_sdk import code_executors as parent - assert set(sub.__all__) == set(parent._CUBE_LAZY_ATTRS), ( - f"drift between cube/__init__.py __all__ and parent _CUBE_LAZY_ATTRS: " - f"only-in-subpackage={set(sub.__all__) - set(parent._CUBE_LAZY_ATTRS)!r}, " - f"only-in-parent={set(parent._CUBE_LAZY_ATTRS) - set(sub.__all__)!r}" - ) - - -def test_onexisting_resolves_via_parent_lazy_import(): - """Regression for the drift bug: ``OnExisting`` was exported from - the subpackage but not wired into the parent's ``_CUBE_LAZY_ATTRS``. - - ``from trpc_agent_sdk.code_executors import OnExisting`` used to - raise ``AttributeError``. Pin both access paths so a reintroduction - of the drift fails loudly. - """ - from trpc_agent_sdk.code_executors import OnExisting as parent_symbol - from trpc_agent_sdk.code_executors.cube import OnExisting as sub_symbol - assert parent_symbol is sub_symbol diff --git a/tests/code_executors/cube/test_runtime.py b/tests/code_executors/cube/test_runtime.py index 65f3413..d0719d4 100644 --- a/tests/code_executors/cube/test_runtime.py +++ b/tests/code_executors/cube/test_runtime.py @@ -469,11 +469,17 @@ async def test_dedup_by_rel(self, mock_client): @pytest.mark.asyncio async def test_truncation_marker(self, mock_client, monkeypatch): - # Force a tiny max so truncation happens. The constant moved to - # the shared collection helper after the build_code_files - # extraction; ``CubeWorkspaceFS.collect`` reads it from there. - from trpc_agent_sdk.code_executors.utils import _collect - monkeypatch.setattr(_collect, "MAX_READ_SIZE_BYTES", 4) + # Force a tiny per-file cap so truncation happens. ``max_read_size`` + # is a keyword-only argument on the protected base-class helper + # with a module-level default; patching the function's + # ``__kwdefaults__`` lets us simulate a tiny cap without altering + # the public ``CubeWorkspaceFS.collect`` signature. + from trpc_agent_sdk.code_executors._base_workspace_runtime import BaseWorkspaceFS + monkeypatch.setitem( + BaseWorkspaceFS._build_code_files.__kwdefaults__, + "max_read_size", + 4, + ) ws = _ws() mock_client.commands_run.return_value = _ok(stdout=f"{ws.path}/f.txt\n") mock_client.read_file_bytes.return_value = b"0123456789" @@ -531,8 +537,8 @@ async def fake_save(ctx, name, data, mime): saved.append((name, data, mime)) return 7 - from trpc_agent_sdk.code_executors.utils import _collect as collect_mod - monkeypatch.setattr(collect_mod, "save_artifact_helper", fake_save) + from trpc_agent_sdk.code_executors import _base_workspace_runtime as base_mod + monkeypatch.setattr(base_mod, "save_artifact_helper", fake_save) ctx = MagicMock() manifest = await fs.collect_outputs( ws, diff --git a/tests/code_executors/cube/test_sandbox.py b/tests/code_executors/cube/test_sandbox.py index 84556a8..7ae806e 100644 --- a/tests/code_executors/cube/test_sandbox.py +++ b/tests/code_executors/cube/test_sandbox.py @@ -5,9 +5,11 @@ # tRPC-Agent-Python is licensed under Apache-2.0. """Unit tests for trpc_agent_sdk.code_executors.cube._sandbox. -Every test in this file patches ``_import_e2b`` via the ``fake_e2b`` -fixture in conftest.py so the real ``e2b-code-interpreter`` package is -never required or touched. +Every test in this file uses the ``fake_e2b`` fixture in conftest.py, +which monkeypatches the ``e2b`` symbol bound in +``trpc_agent_sdk.code_executors.cube._sandbox`` (and ``_code_executor``) +so the real ``e2b-code-interpreter`` SDK is never invoked, even though +it is now a hard import dependency of the cube subpackage. """ from __future__ import annotations diff --git a/tests/code_executors/local/test_local_ws_runtime.py b/tests/code_executors/local/test_local_ws_runtime.py index a7eeb39..b054507 100644 --- a/tests/code_executors/local/test_local_ws_runtime.py +++ b/tests/code_executors/local/test_local_ws_runtime.py @@ -715,7 +715,7 @@ async def test_collect_outputs_no_matches(self): result = await self.fs.collect_outputs(self.ws, spec) assert len(result.files) == 0 - @patch('trpc_agent_sdk.code_executors.utils._collect.save_artifact_helper', new_callable=AsyncMock) + @patch('trpc_agent_sdk.code_executors._base_workspace_runtime.save_artifact_helper', new_callable=AsyncMock) @pytest.mark.asyncio async def test_collect_outputs_save_with_ctx(self, mock_save): mock_save.return_value = 1 @@ -741,7 +741,7 @@ async def test_collect_outputs_save_without_ctx_raises(self): with pytest.raises(ValueError, match="Context is required"): await self.fs.collect_outputs(self.ws, spec) - @patch('trpc_agent_sdk.code_executors.utils._collect.save_artifact_helper', new_callable=AsyncMock) + @patch('trpc_agent_sdk.code_executors._base_workspace_runtime.save_artifact_helper', new_callable=AsyncMock) @pytest.mark.asyncio async def test_collect_outputs_save_with_name_template(self, mock_save): mock_save.return_value = 1 diff --git a/tests/code_executors/utils/test_collect.py b/tests/code_executors/test_base_workspace_fs_collect.py similarity index 65% rename from tests/code_executors/utils/test_collect.py rename to tests/code_executors/test_base_workspace_fs_collect.py index b81c89b..cc1c154 100644 --- a/tests/code_executors/utils/test_collect.py +++ b/tests/code_executors/test_base_workspace_fs_collect.py @@ -3,19 +3,23 @@ # Copyright (C) 2026 Tencent. All rights reserved. # # tRPC-Agent-Python is licensed under Apache-2.0. -"""Unit tests for trpc_agent_sdk.code_executors.utils._collect. - -These tests pin down the shared "matches -> models" pipeline used by every -workspace backend (local / container / cube). They focus on edge paths the +"""Unit tests for ``BaseWorkspaceFS`` collect helpers. + +These tests pin down the shared "matches -> models" pipeline used by +every workspace backend (local / container / cube). The helpers were +moved onto :class:`BaseWorkspaceFS` so that subclasses can call them +directly via ``self._build_code_files`` / ``self._build_manifest_output`` +and override them when needed; this suite exercises them as protected +static methods on the base class. They focus on edge paths that the backend-specific tests don't otherwise exercise: -- ``_relativize`` fallback when an absolute match does not live under the - workspace root. -- ``build_code_files`` happy-path / dedupe / fetcher-failure / truncation - flagging. -- ``build_manifest_output`` limit handling (``max_files`` / ``max_total_bytes`` - / per-file truncation), inline + save branches, fetcher failures, and the - ``strict_truncated_save`` guard. +- ``_relativize`` fallback when an absolute match does not live under + the workspace root. +- ``_build_code_files`` happy-path / dedupe / fetcher-failure / + truncation flagging. +- ``_build_manifest_output`` limit handling (``max_files`` / + ``max_total_bytes`` / per-file truncation), inline + save branches, + fetcher failures, and the ``strict_truncated_save`` guard. """ from __future__ import annotations @@ -24,8 +28,9 @@ import pytest +from trpc_agent_sdk.code_executors import _base_workspace_runtime as _base +from trpc_agent_sdk.code_executors._base_workspace_runtime import BaseWorkspaceFS from trpc_agent_sdk.code_executors._types import WorkspaceOutputSpec -from trpc_agent_sdk.code_executors.utils import _collect def _make_fetcher(payloads): @@ -50,24 +55,24 @@ async def _fetch(path: str, max_bytes: int) -> Tuple[bytes, int]: class TestRelativize: def test_strips_workspace_prefix(self): - assert _collect._relativize("/ws", "/ws/sub/file.txt") == "sub/file.txt" + assert BaseWorkspaceFS._relativize("/ws", "/ws/sub/file.txt") == "sub/file.txt" def test_handles_trailing_slash_on_ws(self): # The helper appends ``"/"`` only when ws_path doesn't already end in # one, so a trailing slash on the input must not produce ``"//"``. - assert _collect._relativize("/ws/", "/ws/file") == "file" + assert BaseWorkspaceFS._relativize("/ws/", "/ws/file") == "file" def test_returns_full_path_when_outside_workspace(self): - # Covers _collect.py:75 — fallback when a match somehow escapes the - # workspace root (e.g. a symlink resolution surfaced an absolute - # path on a different mount). The full path is preserved verbatim - # rather than silently mangled. + # Fallback when a match somehow escapes the workspace root (e.g. a + # symlink resolution surfaced an absolute path on a different + # mount). The full path is preserved verbatim rather than silently + # mangled. full = "/elsewhere/file.txt" - assert _collect._relativize("/ws", full) == full + assert BaseWorkspaceFS._relativize("/ws", full) == full # --------------------------------------------------------------------------- -# build_code_files +# _build_code_files # --------------------------------------------------------------------------- @@ -79,7 +84,7 @@ async def test_basic_collection(self): "/ws/a.txt": b"alpha", "/ws/sub/b.bin": b"\x00\x01beta", } - files = await _collect.build_code_files( + files = await BaseWorkspaceFS._build_code_files( "/ws", ["/ws/a.txt", "/ws/sub/b.bin"], _make_fetcher(payloads), @@ -96,11 +101,10 @@ async def test_deduplicates_by_relative_name(self): # Two glob patterns can yield the same absolute path. The helper # must surface only the first hit, not double-count it. payloads = {"/ws/a.txt": b"x"} - fetcher = _make_fetcher(payloads) - files = await _collect.build_code_files( + files = await BaseWorkspaceFS._build_code_files( "/ws", ["/ws/a.txt", "/ws/a.txt"], - fetcher, + _make_fetcher(payloads), ) assert [f.name for f in files] == ["a.txt"] @@ -116,7 +120,7 @@ async def fetcher(path, max_bytes): data = payloads[path] return data[:max_bytes], len(data) - files = await _collect.build_code_files( + files = await BaseWorkspaceFS._build_code_files( "/ws", ["/ws/bad.txt", "/ws/ok.txt"], fetcher, @@ -132,7 +136,7 @@ async def test_truncation_flag_set_when_raw_exceeds_data(self): async def fetcher(path, max_bytes): return b"hi", 1024 - files = await _collect.build_code_files( + files = await BaseWorkspaceFS._build_code_files( "/ws", ["/ws/big.bin"], fetcher, @@ -142,23 +146,36 @@ async def fetcher(path, max_bytes): assert files[0].truncated is True assert files[0].size_bytes == 1024 - async def test_default_cap_uses_module_constant(self, monkeypatch): - # When ``max_read_size`` is None the helper resolves - # ``MAX_READ_SIZE_BYTES`` *at call time* so tests can patch the - # constant. Verify the budget actually flows into the fetcher. + async def test_default_cap_uses_module_constant(self): + # When ``max_read_size`` is omitted the helper falls back to + # :data:`MAX_READ_SIZE_BYTES` (bound at definition time). Verify + # the default actually flows into the fetcher as the byte budget. + seen_caps: list[int] = [] + + async def fetcher(path, max_bytes): + seen_caps.append(max_bytes) + return b"", 0 + + await BaseWorkspaceFS._build_code_files("/ws", ["/ws/a"], fetcher) + assert seen_caps == [_base.MAX_READ_SIZE_BYTES] + + async def test_explicit_cap_overrides_default(self): + # Callers can still pass an explicit ``max_read_size`` to override + # the module-level default. seen_caps: list[int] = [] async def fetcher(path, max_bytes): seen_caps.append(max_bytes) return b"", 0 - monkeypatch.setattr(_collect, "MAX_READ_SIZE_BYTES", 7) - await _collect.build_code_files("/ws", ["/ws/a"], fetcher) + await BaseWorkspaceFS._build_code_files( + "/ws", ["/ws/a"], fetcher, max_read_size=7, + ) assert seen_caps == [7] # --------------------------------------------------------------------------- -# build_manifest_output +# _build_manifest_output # --------------------------------------------------------------------------- @@ -189,7 +206,7 @@ class TestBuildManifestOutput: async def test_basic_inline(self): spec = WorkspaceOutputSpec(globs=["**/*"], inline=True) payloads = {"/ws/a.txt": b"alpha"} - manifest, names, versions = await _collect.build_manifest_output( + manifest, names, versions = await BaseWorkspaceFS._build_manifest_output( "/ws", spec, ["/ws/a.txt"], @@ -209,7 +226,7 @@ async def test_save_branch_uses_name_template_and_records_versions(self): spec = WorkspaceOutputSpec(globs=["**/*"], save=True, name_template="run-1/") payloads = {"/ws/a.txt": b"alpha", "/ws/b.txt": b"beta"} ctx = _FakeArtifactCtx() - manifest, names, versions = await _collect.build_manifest_output( + manifest, names, versions = await BaseWorkspaceFS._build_manifest_output( "/ws", spec, ["/ws/a.txt", "/ws/b.txt"], @@ -227,7 +244,7 @@ async def test_save_branch_uses_name_template_and_records_versions(self): async def test_save_without_ctx_raises(self): spec = WorkspaceOutputSpec(globs=["**/*"], save=True) with pytest.raises(ValueError, match="Context is required"): - await _collect.build_manifest_output( + await BaseWorkspaceFS._build_manifest_output( "/ws", spec, ["/ws/a.txt"], @@ -238,7 +255,7 @@ async def test_save_without_ctx_raises(self): async def test_max_files_limit_sets_limits_hit(self): spec = WorkspaceOutputSpec(globs=["**/*"], max_files=1) payloads = {"/ws/a.txt": b"a", "/ws/b.txt": b"b"} - manifest, _, _ = await _collect.build_manifest_output( + manifest, _, _ = await BaseWorkspaceFS._build_manifest_output( "/ws", spec, ["/ws/a.txt", "/ws/b.txt"], @@ -258,7 +275,7 @@ async def fetcher(path, max_bytes): data = payloads[path] return data[:max_bytes], len(data) - manifest, _, _ = await _collect.build_manifest_output( + manifest, _, _ = await BaseWorkspaceFS._build_manifest_output( "/ws", spec, ["/ws/a.txt", "/ws/b.txt"], @@ -269,27 +286,28 @@ async def fetcher(path, max_bytes): assert manifest.limits_hit is True async def test_zero_read_budget_breaks_with_limits_hit(self, monkeypatch): - # Covers _collect.py:186-188 — the defensive ``read_budget <= 0`` - # break. The only way to reach it is when *both* the per-file cap - # and the total cap collapse to <= 0 before the first fetch on a - # given iteration. We force this by monkeypatching the resolved - # defaults so an unset ``spec.max_file_bytes`` (which falls back - # to ``MAX_READ_SIZE_BYTES``) and an unset ``spec.max_total_bytes`` - # (falls back to ``DEFAULT_MAX_TOTAL_BYTES``) both materialise as - # 0 — but the *first* guard ``total_bytes >= max_total`` only - # fires once ``total_bytes`` is non-zero. So we patch - # ``DEFAULT_MAX_TOTAL_BYTES`` slightly above zero to skip the - # outer guard and ``MAX_READ_SIZE_BYTES`` to zero so + # Defensive ``read_budget <= 0`` break. The only way to reach it + # is when *both* the per-file cap and the total cap collapse to + # <= 0 before the first fetch on a given iteration. We force + # this by monkeypatching the resolved defaults so an unset + # ``spec.max_file_bytes`` (which falls back to + # ``MAX_READ_SIZE_BYTES``) and an unset ``spec.max_total_bytes`` + # (falls back to ``DEFAULT_MAX_TOTAL_BYTES``) both materialise + # such that the inner ``min(...)`` collapses to zero — but the + # outer guard ``total_bytes >= max_total`` only fires once + # ``total_bytes`` is non-zero. Patch ``DEFAULT_MAX_TOTAL_BYTES`` + # slightly above zero to skip the outer guard and + # ``MAX_READ_SIZE_BYTES`` to zero so # ``min(max_file_bytes=0, remaining_total>0) == 0`` and the # inner guard fires. - monkeypatch.setattr(_collect, "MAX_READ_SIZE_BYTES", 0) - monkeypatch.setattr(_collect, "DEFAULT_MAX_TOTAL_BYTES", 1) + monkeypatch.setattr(_base, "MAX_READ_SIZE_BYTES", 0) + monkeypatch.setattr(_base, "DEFAULT_MAX_TOTAL_BYTES", 1) spec = WorkspaceOutputSpec(globs=["**/*"]) async def fetcher(path, max_bytes): # pragma: no cover - never invoked raise AssertionError("fetcher must not run when budget is zero") - manifest, _, _ = await _collect.build_manifest_output( + manifest, _, _ = await BaseWorkspaceFS._build_manifest_output( "/ws", spec, ["/ws/a.txt"], @@ -304,7 +322,7 @@ async def test_per_file_truncation_marks_limits_hit(self): # must flag ``limits_hit`` because the per-file cap actually bit. spec = WorkspaceOutputSpec(globs=["**/*"], max_file_bytes=2, inline=True) payloads = {"/ws/a.txt": b"abcdef"} - manifest, _, _ = await _collect.build_manifest_output( + manifest, _, _ = await BaseWorkspaceFS._build_manifest_output( "/ws", spec, ["/ws/a.txt"], @@ -316,12 +334,12 @@ async def test_per_file_truncation_marks_limits_hit(self): async def test_strict_truncated_save_raises(self): # strict_truncated_save is the container's "refuse to persist a - # half-read binary" guard. Covers _collect.py:211. + # half-read binary" guard. spec = WorkspaceOutputSpec(globs=["**/*"], save=True, max_file_bytes=2) payloads = {"/ws/big.bin": b"0123456789"} ctx = _FakeArtifactCtx() with pytest.raises(RuntimeError, match="cannot save truncated output file"): - await _collect.build_manifest_output( + await BaseWorkspaceFS._build_manifest_output( "/ws", spec, ["/ws/big.bin"], @@ -338,7 +356,7 @@ async def test_non_strict_truncated_save_persists_partial(self): spec = WorkspaceOutputSpec(globs=["**/*"], save=True, max_file_bytes=2) payloads = {"/ws/big.bin": b"0123456789"} ctx = _FakeArtifactCtx() - manifest, names, _ = await _collect.build_manifest_output( + manifest, names, _ = await BaseWorkspaceFS._build_manifest_output( "/ws", spec, ["/ws/big.bin"], @@ -351,9 +369,9 @@ async def test_non_strict_truncated_save_persists_partial(self): assert manifest.limits_hit is True async def test_fetcher_failure_emits_sentinel_and_continues(self): - # Mirrors build_code_files behaviour: a single failing fetch must - # surface as an empty ManifestFileRef while the rest of the batch - # proceeds. Covers _collect.py:192-203. + # Mirrors _build_code_files behaviour: a single failing fetch + # must surface as an empty ManifestFileRef while the rest of + # the batch proceeds. spec = WorkspaceOutputSpec(globs=["**/*"], inline=True) payloads = {"/ws/ok.txt": b"ok"} @@ -363,7 +381,7 @@ async def fetcher(path, max_bytes): data = payloads[path] return data[:max_bytes], len(data) - manifest, _, _ = await _collect.build_manifest_output( + manifest, _, _ = await BaseWorkspaceFS._build_manifest_output( "/ws", spec, ["/ws/bad.bin", "/ws/ok.txt"], @@ -383,7 +401,7 @@ async def fetcher(path, max_bytes): async def test_dedup_by_relative_name(self): spec = WorkspaceOutputSpec(globs=["**/*"], inline=True) payloads = {"/ws/a.txt": b"x"} - manifest, _, _ = await _collect.build_manifest_output( + manifest, _, _ = await BaseWorkspaceFS._build_manifest_output( "/ws", spec, ["/ws/a.txt", "/ws/a.txt"], @@ -391,3 +409,49 @@ async def test_dedup_by_relative_name(self): ctx=None, ) assert len(manifest.files) == 1 + + +# --------------------------------------------------------------------------- +# Subclass override surface +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_subclass_can_override_build_code_files(): + """Sanity-check that the protected helper is overridable. + + The whole point of moving these onto the base class was to let + backends extend the post-fetch shape (e.g. emit a richer + ``CodeFile`` subclass) without re-implementing the dedupe / sniff / + cap loop. Pin that the override path is reachable. + """ + + class _CountingFS(BaseWorkspaceFS): + # Concrete stubs for the abstract methods so we can instantiate. + async def put_files(self, ws, files, ctx=None): # pragma: no cover + return None + async def stage_directory(self, ws, src, dst, opt, ctx=None): # pragma: no cover + return None + async def collect(self, ws, patterns, ctx=None): # pragma: no cover + return [] + async def stage_inputs(self, ws, specs, ctx=None): # pragma: no cover + return None + async def collect_outputs(self, ws, spec, ctx=None): # pragma: no cover + from trpc_agent_sdk.code_executors._types import ManifestOutput + return ManifestOutput() + + invocation_count = 0 + + @staticmethod + async def _build_code_files(ws_path, matches, fetcher, *, max_read_size=_base.MAX_READ_SIZE_BYTES): + _CountingFS.invocation_count += 1 + return await BaseWorkspaceFS._build_code_files( + ws_path, matches, fetcher, max_read_size=max_read_size, + ) + + fs = _CountingFS() + files = await fs._build_code_files( + "/ws", ["/ws/a.txt"], _make_fetcher({"/ws/a.txt": b"hi"}), + ) + assert _CountingFS.invocation_count == 1 + assert [f.name for f in files] == ["a.txt"] diff --git a/tests/code_executors/test_types.py b/tests/code_executors/test_types.py index 1c0e3b3..4a7fe97 100644 --- a/tests/code_executors/test_types.py +++ b/tests/code_executors/test_types.py @@ -469,14 +469,17 @@ def test_stderr_only(self): assert "Code execution error:\nsome error\n" in result.output def test_timed_out_only(self): - """is_timed_out=True triggers Outcome.OUTCOME_TIMED_OUT which is not defined.""" - with pytest.raises(AttributeError): - create_code_execution_result(is_timed_out=True) + """is_timed_out=True maps to OUTCOME_DEADLINE_EXCEEDED.""" + result = create_code_execution_result(is_timed_out=True) + assert result.outcome == Outcome.OUTCOME_DEADLINE_EXCEEDED + assert "Code execution timed out" in result.output def test_stderr_and_timed_out(self): - """stderr + timed_out triggers the missing OUTCOME_TIMED_OUT attribute.""" - with pytest.raises(AttributeError): - create_code_execution_result(stderr="err", is_timed_out=True) + """stderr + timed_out: timed_out wins the outcome, both messages present.""" + result = create_code_execution_result(stderr="err", is_timed_out=True) + assert result.outcome == Outcome.OUTCOME_DEADLINE_EXCEEDED + assert "Code execution error:\nerr\n" in result.output + assert "Code execution timed out" in result.output def test_stdout_and_stderr(self): """stderr + stdout → FAILED, both messages present.""" @@ -497,16 +500,20 @@ def test_output_files_only(self): assert "`a.txt`" in result.output assert "`b.csv`" in result.output - def test_all_args_combined_with_timed_out_raises(self): - """All arguments with timed_out triggers the missing OUTCOME_TIMED_OUT attribute.""" + def test_all_args_combined_with_timed_out(self): + """All arguments with timed_out: DEADLINE_EXCEEDED wins, all payload present.""" files = [CodeFile(name="out.txt", content="", mime_type="text/plain")] - with pytest.raises(AttributeError): - create_code_execution_result( - stdout="output", - stderr="error", - output_files=files, - is_timed_out=True, - ) + result = create_code_execution_result( + stdout="output", + stderr="error", + output_files=files, + is_timed_out=True, + ) + assert result.outcome == Outcome.OUTCOME_DEADLINE_EXCEEDED + assert "Code execution error:\nerror\n" in result.output + assert "Code execution result:\noutput\n" in result.output + assert "Code execution timed out" in result.output + assert "`out.txt`" in result.output def test_output_files_none_defaults_to_empty(self): """Passing output_files=None does not cause errors.""" diff --git a/trpc_agent_sdk/code_executors/__init__.py b/trpc_agent_sdk/code_executors/__init__.py index 840e13e..8c37e6a 100644 --- a/trpc_agent_sdk/code_executors/__init__.py +++ b/trpc_agent_sdk/code_executors/__init__.py @@ -92,36 +92,6 @@ from .local import create_local_workspace_runtime from .utils import CodeExecutionUtils -# Cube/E2B is exposed via PEP 562 lazy `__getattr__` below so that importing -# this package never pulls in the optional `e2b-code-interpreter` dependency -# unless a Cube symbol is actually accessed. -_CUBE_LAZY_ATTRS = { - "CubeCodeExecutor", - "CubeCodeExecutorConfig", - "CubeCommandResult", - "CubeProgramRunner", - "CubeSandboxClient", - "CubeWorkspaceFS", - "CubeWorkspaceManager", - "CubeWorkspaceRuntime", - "CubeWorkspaceRuntimeConfig", - "OnExisting", - "create_cube_workspace_runtime", -} - - -def __getattr__(name: str): - if name in _CUBE_LAZY_ATTRS: - from . import cube as _cube # local import keeps cube/ off the eager path - value = getattr(_cube, name) - globals()[name] = value - return value - raise AttributeError(f"module {__name__!r} has no attribute {name!r}") - - -def __dir__() -> list[str]: - return sorted(set(globals().keys()) | _CUBE_LAZY_ATTRS) - __all__ = [ "load_artifact_helper", @@ -206,15 +176,4 @@ def __dir__() -> list[str]: "UnsafeLocalCodeExecutor", "create_local_workspace_runtime", "CodeExecutionUtils", - "CubeCodeExecutor", - "CubeCodeExecutorConfig", - "CubeCommandResult", - "CubeProgramRunner", - "CubeSandboxClient", - "CubeWorkspaceFS", - "CubeWorkspaceManager", - "CubeWorkspaceRuntime", - "CubeWorkspaceRuntimeConfig", - "OnExisting", - "create_cube_workspace_runtime", ] diff --git a/trpc_agent_sdk/code_executors/_base_workspace_runtime.py b/trpc_agent_sdk/code_executors/_base_workspace_runtime.py index d1a43ec..1ce6af3 100644 --- a/trpc_agent_sdk/code_executors/_base_workspace_runtime.py +++ b/trpc_agent_sdk/code_executors/_base_workspace_runtime.py @@ -12,15 +12,22 @@ from abc import ABC from abc import abstractmethod +from typing import Awaitable from typing import Callable from typing import TypeAlias from typing import List from typing import Optional +from typing import Tuple from trpc_agent_sdk.context import InvocationContext from trpc_agent_sdk.log import logger +from ._artifacts import save_artifact_helper +from ._constants import DEFAULT_MAX_FILES +from ._constants import DEFAULT_MAX_TOTAL_BYTES +from ._constants import MAX_READ_SIZE_BYTES from ._types import CodeFile +from ._types import ManifestFileRef from ._types import ManifestOutput from ._types import WorkspaceCapabilities from ._types import WorkspaceInfo @@ -33,6 +40,21 @@ RunEnvProvider = Callable[[Optional[InvocationContext]], dict[str, str]] +ManifestFetcher: TypeAlias = Callable[[str, int], Awaitable[Tuple[bytes, int]]] +"""Async callable ``(absolute_path, max_bytes) -> (data, raw_size)``. + +Contract: +- ``data`` is the file's content truncated to at most ``max_bytes``. If the + underlying medium cannot cheaply report the full size (e.g. a streaming + read), the fetcher may return ``raw_size = len(data)``; callers that care + about truncation must then treat ``len(data) == max_bytes`` as "possibly + truncated". +- ``raw_size`` is the size of the file on the underlying medium before any + truncation, used only to decide ``truncated`` / ``limits_hit`` flags. +- The fetcher must *not* raise for merely-empty files; it should raise only + for genuine I/O errors so the backend can surface a meaningful message. +""" + class BaseWorkspaceManager(ABC): """ @@ -65,8 +87,209 @@ async def cleanup( class BaseWorkspaceFS(ABC): """ Performs file operations within a workspace. + + Subclasses are expected to implement the abstract operations using + whatever I/O mechanism their backend exposes (direct filesystem, + docker ``get_archive``, Cube RPC, ...). The shared *post-fetch* + pipeline — turning raw matched paths plus a fetcher into + :class:`CodeFile` / :class:`ManifestOutput` models — is provided + here as protected helpers (:meth:`_build_code_files`, + :meth:`_build_manifest_output`) so subclasses can call them + directly without re-implementing the limit / inline / save / MIME + sniffing plumbing, and can override them when they need a tweak. """ + @staticmethod + def _relativize(ws_path: str, full_path: str) -> str: + """Return ``full_path`` stripped of the ``ws.path + "/"`` prefix. + + Kept as a single helper so every backend produces identical + relative paths in :class:`CodeFile` / :class:`ManifestFileRef`. + Falls back to ``full_path`` when the match somehow escapes the + workspace root (e.g. a symlink resolution surfaced an absolute + path on a different mount). + """ + prefix = ws_path.rstrip("/") + "/" + if full_path.startswith(prefix): + return full_path[len(prefix):] + return full_path + + @staticmethod + async def _build_code_files( + ws_path: str, + matches: List[str], + fetcher: ManifestFetcher, + *, + max_read_size: int = MAX_READ_SIZE_BYTES, + ) -> List[CodeFile]: + """Materialise a :meth:`collect` call. + + Reads each matched path with a single per-file byte cap + (``max_read_size``, defaulting to :data:`MAX_READ_SIZE_BYTES`), + sniffs the MIME type, and wraps the result in a :class:`CodeFile`. + Duplicate ``rel`` paths are skipped so callers can pass the raw + glob output without pre-deduping. + + Subclasses normally call this from their :meth:`collect` + override, supplying a ``fetcher`` that knows how to read bytes + from the underlying medium (see :data:`ManifestFetcher`). + Override this method to change the post-fetch shape (e.g. to + emit a richer ``CodeFile`` subclass) without re-implementing + the dedupe / sniff / cap loop. + """ + # Local import keeps the base-class file free of optional / + # heavy dependencies (libmagic, mimetypes lookup tables) at + # module-load time. + from .utils._files import detect_content_type + + seen: set[str] = set() + out: List[CodeFile] = [] + for full_path in matches: + rel = BaseWorkspaceFS._relativize(ws_path, full_path) + if rel in seen: + continue + seen.add(rel) + try: + data, raw_size = await fetcher(full_path, max_read_size) + except Exception: # pylint: disable=broad-except + # Keep collect() best-effort: a single unreadable file + # must not abort the whole batch. Backends that prefer + # strict semantics can short-circuit themselves before + # calling us. + out.append(CodeFile(name=rel, content="", mime_type="application/octet-stream")) + continue + mime = detect_content_type(full_path, data) + out.append( + CodeFile( + name=rel, + content=data.decode("utf-8", errors="replace"), + mime_type=mime, + size_bytes=raw_size, + truncated=raw_size > len(data), + )) + return out + + @staticmethod + async def _build_manifest_output( + ws_path: str, + spec: WorkspaceOutputSpec, + matches: List[str], + fetcher: ManifestFetcher, + ctx: Optional[InvocationContext], + *, + strict_truncated_save: bool = False, + ) -> Tuple[ManifestOutput, List[str], List[int]]: + """Materialise a :meth:`collect_outputs` call. + + Applies ``spec``'s limits (``max_files`` / ``max_file_bytes`` / + ``max_total_bytes``), fills ``inline`` / ``save`` branches, and + produces a :class:`ManifestOutput`. Also returns the list of + saved artifact names and versions so backends that record + metadata (e.g. local's ``OutputRecordMeta``) don't need to + re-scan the manifest. + + Args: + ws_path: Absolute workspace path, used to produce relative + ``name`` fields. + spec: The output spec declared by the caller. + matches: Absolute paths already filtered by the backend's + glob. + fetcher: Async callable that returns ``(data, raw_size)`` + for a path, capped by a requested byte budget. See + :data:`ManifestFetcher`. + ctx: Invocation context. Required when ``spec.save`` is set, + because artifact persistence goes through it. + strict_truncated_save: When ``True``, raise ``RuntimeError`` + if ``spec.save`` is requested for a file that was + truncated by the per-file cap. Container preserves this + "refuse to save half a binary" behaviour; local/cube + historically allow it. + + Returns: + Tuple of ``(manifest, saved_names, saved_versions)``. + """ + from .utils._files import detect_content_type + + max_files = spec.max_files or DEFAULT_MAX_FILES + max_file_bytes = spec.max_file_bytes or MAX_READ_SIZE_BYTES + max_total = spec.max_total_bytes or DEFAULT_MAX_TOTAL_BYTES + + manifest = ManifestOutput() + saved_names: List[str] = [] + saved_versions: List[int] = [] + + seen: set[str] = set() + total_bytes = 0 + count = 0 + + for full_path in matches: + # Check limits *before* fetching so a blown budget doesn't + # cause a useless read of the next big file. + if count >= max_files or total_bytes >= max_total: + manifest.limits_hit = True + break + + rel = BaseWorkspaceFS._relativize(ws_path, full_path) + if rel in seen: + continue + seen.add(rel) + + # Per-file cap is ``max_file_bytes``, but also clamp to the + # remaining total budget so a single huge file cannot exceed + # ``max_total`` all on its own. + remaining_total = max_total - total_bytes + read_budget = min(max_file_bytes, remaining_total) + if read_budget <= 0: + manifest.limits_hit = True + break + + try: + data, raw_size = await fetcher(full_path, read_budget) + except Exception: # pylint: disable=broad-except + # Mirror ``_build_code_files``: a single unreadable file + # must not abort the whole collection. Emit a sentinel + # entry with empty content and the canonical + # "unknown / unreadable" MIME type. This preserves the + # pre-refactor local behaviour + # (``_read_limited_with_cap`` caught and returned + # ``("", "application/octet-stream")``) and is a small + # tolerance upgrade for the container backend, which + # used to abort on the first transient tar error. + manifest.files.append(ManifestFileRef(name=rel, mime_type="application/octet-stream")) + count += 1 + continue + + # Mark limits_hit if either cap actually bit. + if raw_size > len(data): + manifest.limits_hit = True + + truncated = raw_size > len(data) + if truncated and spec.save and strict_truncated_save: + raise RuntimeError(f"cannot save truncated output file: {rel}") + + total_bytes += len(data) + count += 1 + + mime = detect_content_type(full_path, data) + file_ref = ManifestFileRef(name=rel, mime_type=mime) + + if spec.inline: + file_ref.content = data.decode("utf-8", errors="replace") + + if spec.save: + if ctx is None: + raise ValueError("Context is required to save artifacts") + save_name = (spec.name_template + rel) if spec.name_template else rel + version = await save_artifact_helper(ctx, save_name, data, mime) + file_ref.saved_as = save_name + file_ref.version = version + saved_names.append(save_name) + saved_versions.append(version) + + manifest.files.append(file_ref) + + return manifest, saved_names, saved_versions + @abstractmethod async def put_files( self, diff --git a/trpc_agent_sdk/code_executors/_types.py b/trpc_agent_sdk/code_executors/_types.py index 5130d48..66698cb 100644 --- a/trpc_agent_sdk/code_executors/_types.py +++ b/trpc_agent_sdk/code_executors/_types.py @@ -321,7 +321,7 @@ def create_code_execution_result(stdout: str = '', outcome = Outcome.OUTCOME_FAILED if is_timed_out: out_str += "Code execution timed out\n" - outcome = Outcome.OUTCOME_TIMED_OUT + outcome = Outcome.OUTCOME_DEADLINE_EXCEEDED if stdout: out_str += f"Code execution result:\n{stdout}\n" if output_files: diff --git a/trpc_agent_sdk/code_executors/container/_container_ws_runtime.py b/trpc_agent_sdk/code_executors/container/_container_ws_runtime.py index f750e5f..29a16b7 100644 --- a/trpc_agent_sdk/code_executors/container/_container_ws_runtime.py +++ b/trpc_agent_sdk/code_executors/container/_container_ws_runtime.py @@ -67,8 +67,6 @@ from .._types import WorkspaceStageOptions from ..utils import InputRecordMeta from ..utils import WorkspaceMetadata -from ..utils import build_code_files -from ..utils import build_manifest_output from ..utils import get_rel_path from ..utils import normalize_globs from ._container_cli import CommandArgs @@ -322,7 +320,7 @@ async def collect(self, resolve_symlinks=True, error_prefix="Failed to collect files", ) - files = await build_code_files(ws.path, matches, self._fetch_bytes) + files = await self._build_code_files(ws.path, matches, self._fetch_bytes) logger.info("Collected %s files from workspace", len(files)) return files @@ -421,7 +419,7 @@ async def collect_outputs(self, ) # Container refuses to persist a half-read artifact, preserving # the historical "never save a truncated binary" guarantee. - manifest, _, _ = await build_manifest_output( + manifest, _, _ = await self._build_manifest_output( ws.path, spec, matches, diff --git a/trpc_agent_sdk/code_executors/cube/__init__.py b/trpc_agent_sdk/code_executors/cube/__init__.py index 675f429..7cd114c 100644 --- a/trpc_agent_sdk/code_executors/cube/__init__.py +++ b/trpc_agent_sdk/code_executors/cube/__init__.py @@ -5,10 +5,12 @@ # tRPC-Agent-Python is licensed under Apache-2.0. """Cube/E2B code executor and workspace runtime. -The optional ``e2b-code-interpreter`` dependency is imported lazily inside -the first sandbox-constructing call (`CubeCodeExecutor.create` / -`.attach` / `.create_or_recreate`). Importing this package does not -require the ``[cube]`` extra to be installed. +This subpackage requires the optional ``e2b-code-interpreter`` dependency +(install with ``pip install trpc-agent-py[cube]``); importing any module +here pulls it in eagerly. Code paths that don't need the Cube backend +should import from :mod:`trpc_agent_sdk.code_executors` instead — that +package never references this subpackage and therefore stays +``[cube]``-free. """ from ._code_executor import CubeCodeExecutor diff --git a/trpc_agent_sdk/code_executors/cube/_code_executor.py b/trpc_agent_sdk/code_executors/cube/_code_executor.py index 354e8ab..df89a54 100644 --- a/trpc_agent_sdk/code_executors/cube/_code_executor.py +++ b/trpc_agent_sdk/code_executors/cube/_code_executor.py @@ -14,6 +14,7 @@ from typing_extensions import override +import e2b_code_interpreter as e2b from pydantic import Field from pydantic import PrivateAttr @@ -25,7 +26,6 @@ from .._types import CodeExecutionInput from .._types import CodeExecutionResult from .._types import create_code_execution_result -from ._e2b import _import_e2b from ._sandbox import CubeCommandResult from ._sandbox import CubeSandboxClient from ._types import CubeCodeExecutorConfig @@ -33,6 +33,23 @@ _PYTHON_LANGUAGES = frozenset({"python", "py", "python3", ""}) _BASH_LANGUAGES = frozenset({"bash", "sh"}) +_BASH_DELIMITER = CodeBlockDelimiter(start="```bash\n", end="\n```") + + +def _cube_default_code_block_delimiters() -> list[CodeBlockDelimiter]: + """Default delimiters for :class:`CubeCodeExecutor`. + + Reuses :attr:`BaseCodeExecutor.code_block_delimiters` so the parent's + defaults (currently ``tool_code`` + ``python``) stay the single + source of truth, then appends a ``bash`` fence so text-path + extraction matches what :meth:`CubeCodeExecutor.execute_code` can + actually run (see :meth:`_select_interpreter`). Returns a fresh list + on every call to keep per-instance defaults independent of one + another. + """ + parent_default = BaseCodeExecutor.model_fields["code_block_delimiters"].default + return [*parent_default, _BASH_DELIMITER] + class CubeCodeExecutor(BaseCodeExecutor): """A code executor that runs blocks inside a Cube/E2B remote sandbox. @@ -67,15 +84,15 @@ class CubeCodeExecutor(BaseCodeExecutor): stateful: bool = Field(default=False, frozen=True, exclude=True) optimize_data_file: bool = Field(default=False, frozen=True, exclude=True) - # Extend the base default (`tool_code` + `python`) with a `bash` fence - # so text-path extraction matches what ``execute_code`` can actually - # run (see ``_select_interpreter``). Callers may still override via - # the ``code_block_delimiters`` field at construction time. - code_block_delimiters: list[CodeBlockDelimiter] = [ - CodeBlockDelimiter(start="```tool_code\n", end="\n```"), - CodeBlockDelimiter(start="```python\n", end="\n```"), - CodeBlockDelimiter(start="```bash\n", end="\n```"), - ] + # Extend the inherited default with a ``bash`` fence so text-path + # extraction matches what ``execute_code`` can actually run (see + # ``_select_interpreter``). The factory reads the parent's default + # at call time so the base list stays the single source of truth; + # callers may still override via the ``code_block_delimiters`` field + # at construction time. + code_block_delimiters: list[CodeBlockDelimiter] = Field( + default_factory=_cube_default_code_block_delimiters, + ) # `_client` is `Optional` because :meth:`close` / :meth:`destroy` # legitimately drop the handle post-construction. `_cfg` has no such @@ -150,7 +167,6 @@ async def create_or_recreate( """ if not cfg.sandbox_id: return await cls.create(cfg) - e2b = _import_e2b() try: return await cls.create(cfg) except e2b.SandboxNotFoundException: @@ -227,6 +243,7 @@ async def execute_code( stdouts: list[str] = [] stderrs: list[str] = [] + any_timed_out = False for index, block in enumerate(blocks): if not block.code: continue @@ -240,8 +257,14 @@ async def execute_code( stdin=block.code.encode("utf-8"), timeout=cfg.execute_timeout, ) + if result.timed_out: + any_timed_out = True self._collect(result, stdouts, stderrs) - return create_code_execution_result(stdout="".join(stdouts), stderr="".join(stderrs)) + return create_code_execution_result( + stdout="".join(stdouts), + stderr="".join(stderrs), + is_timed_out=any_timed_out, + ) @staticmethod def _select_interpreter(language: str) -> str: diff --git a/trpc_agent_sdk/code_executors/cube/_e2b.py b/trpc_agent_sdk/code_executors/cube/_e2b.py deleted file mode 100644 index 9d09894..0000000 --- a/trpc_agent_sdk/code_executors/cube/_e2b.py +++ /dev/null @@ -1,42 +0,0 @@ -# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. -# -# Copyright (C) 2026 Tencent. All rights reserved. -# -# tRPC-Agent-Python is licensed under Apache-2.0. -"""e2b-code-interpreter vendor seam for the Cube package. - -Centralizes the lazy-import boundary (:func:`_import_e2b`) and the small -set of vendor-aware constants used by sibling modules (`_sandbox.py` for -lifecycle/commands, `_transfer.py` for the tar protocol). Keeping this -file thin so neither downstream module has to repeat the install hint or -the ``user=`` plumbing. -""" - -from __future__ import annotations - -# The unix user we run sandbox commands and FS ops as. Standard cube/e2b -# templates ship with `root`; downstream callers do not need to override -# this and we deliberately do not expose a knob to keep the surface small. -_GUEST_USER = "root" - -_E2B_INSTALL_HINT = ("e2b-code-interpreter is required for CubeCodeExecutor; " - "install with `pip install trpc-agent-py[cube]`.") - - -def _import_e2b(): - """Lazily import :mod:`e2b_code_interpreter` symbols. - - Deferred so that ``from trpc_agent_sdk.code_executors.cube import ...`` - never requires the optional ``[cube]`` extra to be installed; only - actual sandbox construction or vendor-exception handling pays the - import cost. - - Raises: - ImportError: if the optional ``[cube]`` extra is not installed, - with a message pointing at the install command. - """ - try: - import e2b_code_interpreter as _mod # pylint: disable=import-outside-toplevel - except ImportError as exc: - raise ImportError(_E2B_INSTALL_HINT) from exc - return _mod diff --git a/trpc_agent_sdk/code_executors/cube/_runtime.py b/trpc_agent_sdk/code_executors/cube/_runtime.py index 0f9188f..8056d3f 100644 --- a/trpc_agent_sdk/code_executors/cube/_runtime.py +++ b/trpc_agent_sdk/code_executors/cube/_runtime.py @@ -47,8 +47,6 @@ from .._types import WorkspaceRunProgramSpec from .._types import WorkspaceRunResult from .._types import WorkspaceStageOptions -from ..utils import build_code_files -from ..utils import build_manifest_output from ..utils import normalize_globs from ._code_executor import CubeCodeExecutor from ._paths import join_remote @@ -227,7 +225,7 @@ async def collect(self, patterns: List[str], ctx: Optional[InvocationContext] = None) -> List[CodeFile]: matches = await self._glob(ws.path, normalize_globs(patterns)) - files = await build_code_files(ws.path, matches, self._fetch_file) + files = await self._build_code_files(ws.path, matches, self._fetch_file) logger.debug("Cube collected %d files from %s", len(files), ws.path) return files @@ -237,13 +235,13 @@ async def collect_outputs(self, spec: WorkspaceOutputSpec, ctx: Optional[InvocationContext] = None) -> ManifestOutput: matches = await self._glob(ws.path, normalize_globs(spec.globs)) - manifest, _, _ = await build_manifest_output(ws.path, spec, matches, self._fetch_file, ctx) + manifest, _, _ = await self._build_manifest_output(ws.path, spec, matches, self._fetch_file, ctx) logger.debug("Cube collected %d outputs from %s", len(manifest.files), ws.path) return manifest async def _fetch_file(self, full_path: str, max_bytes: int) -> Tuple[bytes, int]: - """Fetcher contract for :func:`utils.build_code_files` / - :func:`utils.build_manifest_output`. + """Fetcher contract for :meth:`BaseWorkspaceFS._build_code_files` / + :meth:`BaseWorkspaceFS._build_manifest_output`. Cube exposes no cheap ``stat`` RPC, so we read the full payload and slice locally; ``raw_size`` reflects the true on-disk size @@ -268,6 +266,16 @@ async def _copy_remote(self, src: str, dst: str) -> None: # nesting stale data instead of replacing it. Removing DST first # makes the operation idempotent across repeated stage_inputs # calls targeting the same destination. + # + # Safety: ``dst`` is supplied exclusively by :meth:`stage_inputs`, + # which routes the caller-provided ``spec.dst`` through + # :func:`normalize_remote_relative` (rejects empty, absolute, and + # ``..``-bearing relatives) and :func:`join_remote` (collapses + # ``..`` after joining under ``ws.path``). ``shell_quote`` then + # neutralises any shell metacharacters in the resulting absolute + # path, and GNU ``rm``'s default ``--preserve-root`` is the + # backstop. New callers of ``_copy_remote`` MUST funnel ``dst`` + # through the same validation chain. rm_result = await self._client.commands_run( f"rm -rf {shell_quote(dst)}", timeout=self._timeout, @@ -383,6 +391,7 @@ async def run_program(self, stderr=result.stderr, exit_code=result.exit_code, duration=time.time() - start, + timed_out=result.timed_out, ) diff --git a/trpc_agent_sdk/code_executors/cube/_sandbox.py b/trpc_agent_sdk/code_executors/cube/_sandbox.py index 856e751..327e15c 100644 --- a/trpc_agent_sdk/code_executors/cube/_sandbox.py +++ b/trpc_agent_sdk/code_executors/cube/_sandbox.py @@ -19,12 +19,18 @@ :meth:`read_file_bytes` / :meth:`write_file_bytes`. Pure path/quote helpers live in :mod:`._paths`. The tar-based directory -transfer protocol lives in :mod:`._transfer`. The e2b vendor seam -(lazy import + ``user=`` constant) lives in :mod:`._e2b`. This module -is intentionally the only place that holds an ``AsyncSandbox`` reference +transfer protocol lives in :mod:`._transfer`. This module is +intentionally the only place that holds an ``AsyncSandbox`` reference and therefore is the only place that needs to absorb e2b's quirks (``CommandExitException`` / ``"STOPPED"`` / ``SandboxNotFoundException``). + +``e2b_code_interpreter`` is imported at module top-level. It is +distributed as the optional ``[cube]`` extra (``pip install +trpc-agent-py[cube]``); any code path that reaches this module is by +construction a Cube-backend caller and therefore must have the extra +installed. A missing extra surfaces as a normal :class:`ImportError` +at import time, which is the right place for the failure to land. """ from __future__ import annotations @@ -32,15 +38,15 @@ import asyncio from dataclasses import dataclass from pathlib import Path -from typing import TYPE_CHECKING from typing import Any from typing import Mapping from typing import Optional +import e2b_code_interpreter as e2b +from e2b_code_interpreter import AsyncSandbox + from trpc_agent_sdk.log import logger -from ._e2b import _GUEST_USER -from ._e2b import _import_e2b from ._paths import wrap_stdin_heredoc from ._transfer import OnExisting from ._transfer import download_directory_via_tar @@ -48,8 +54,10 @@ from ._transfer import upload_directory_via_tar from ._types import CubeCodeExecutorConfig -if TYPE_CHECKING: - from e2b_code_interpreter import AsyncSandbox +# The unix user we run sandbox commands and FS ops as. Standard cube/e2b +# templates ship with `root`; downstream callers do not need to override +# this and we deliberately do not expose a knob to keep the surface small. +_GUEST_USER = "root" @dataclass @@ -60,12 +68,22 @@ class CubeCommandResult: absorbs the e2b SDK's :class:`CommandExitException` so callers always see a structured return value (matches the local/container code-executor behavior). + + The ``timed_out`` flag distinguishes a deadline-exceeded run from a + plain non-zero exit: e2b raises :class:`e2b.TimeoutException` when + the per-command ``timeout`` is hit, and ``commands_run`` catches it + so callers never see the raw exception. When ``timed_out`` is ``True`` + the process has already been killed by e2b; ``exit_code`` is set to + ``-1`` (mirroring the local/container executors' convention) and + ``stderr`` carries a short, hand-written description rather than the + e2b SDK's verbose original message. """ stdout: str stderr: str exit_code: int duration: float + timed_out: bool = False class CubeSandboxClient: @@ -84,7 +102,9 @@ class CubeSandboxClient: the "already STOPPED" / :class:`SandboxNotFoundException` workarounds. - ``commands_run()`` always returns a structured result; non-zero - exit codes never raise. + exit codes never raise, and deadline-exceeded runs surface as + ``CubeCommandResult(timed_out=True, exit_code=-1)`` rather than + propagating e2b's :class:`TimeoutException`. - ``upload_path`` / ``download_path`` auto-dispatch file vs directory and preserve symlinks/perms via tar (see :mod:`._transfer`). @@ -92,8 +112,8 @@ class CubeSandboxClient: the constructor directly. """ - def __init__(self, sandbox: "AsyncSandbox", *, idle_timeout: int, execute_timeout: float): - self._sbx: Optional["AsyncSandbox"] = sandbox + def __init__(self, sandbox: AsyncSandbox, *, idle_timeout: int, execute_timeout: float): + self._sbx: Optional[AsyncSandbox] = sandbox self._idle_timeout = idle_timeout self._execute_timeout = execute_timeout @@ -105,7 +125,6 @@ def sandbox_id(self) -> str: @classmethod async def open_new(cls, cfg: CubeCodeExecutorConfig) -> "CubeSandboxClient": """Create a brand-new remote sandbox.""" - e2b = _import_e2b() sbx = await e2b.AsyncSandbox.create( template=cfg.resolve_template(), api_url=cfg.resolve_api_url(), @@ -125,7 +144,6 @@ async def open_existing(cls, sandbox_id: str, cfg: CubeCodeExecutorConfig) -> "C PAUSED); caller should not silently overwrite locator state. """ - e2b = _import_e2b() sbx = await e2b.AsyncSandbox.connect( sandbox_id, api_url=cfg.resolve_api_url(), @@ -150,7 +168,6 @@ async def destroy(self) -> None: sbx = self._sbx if sbx is None: return - e2b = _import_e2b() try: await sbx.kill() except e2b.SandboxNotFoundException as exc: @@ -172,7 +189,6 @@ async def assert_running(self) -> None: not silently discard operator-managed pause state. """ sbx = self._require() - e2b = _import_e2b() info = await sbx.get_info(request_timeout=self._execute_timeout) if info.state != e2b.SandboxState.RUNNING: raise e2b.SandboxException(f"Cube sandbox {sbx.sandbox_id} is in state {info.state.value!r}, " @@ -203,32 +219,62 @@ async def commands_run( ) -> CubeCommandResult: """Run a single shell command and return a structured result. - Non-zero exit codes never raise. Stdin (when provided) is encoded - as a bash heredoc because the e2b SDK's ``stdin`` flag is not a - data channel. + Non-zero exit codes never raise. Deadline-exceeded runs never + raise either: the e2b SDK's :class:`e2b.TimeoutException` is + caught here and turned into a :class:`CubeCommandResult` with + ``timed_out=True`` and ``exit_code=-1``, mirroring the + local/container executors so upstream callers see a single, + unified shape for "command did not succeed". Stdin (when + provided) is encoded as a bash heredoc because the e2b SDK's + ``stdin`` flag is not a data channel. """ sbx = self._require() - e2b = _import_e2b() if stdin is not None: command = wrap_stdin_heredoc(command, stdin) + timeout_sec = float(timeout if timeout is not None else self._execute_timeout) kwargs: dict[str, Any] = { "envs": dict(env or {}), "user": _GUEST_USER, - "timeout": float(timeout if timeout is not None else self._execute_timeout), + "timeout": timeout_sec, } if cwd: kwargs["cwd"] = cwd loop = asyncio.get_running_loop() start = loop.time() + timed_out = False try: result = await sbx.commands.run(command, **kwargs) except e2b.CommandExitException as exc: result = exc + except BaseException as exc: + # Timeouts surface here as one of several types depending on + # which transport layer fires first: + # - e2b.TimeoutException (vendor SDK layer) + # - httpcore.ReadTimeout / httpcore.TimeoutException + # (transport layer — can race ahead of the e2b mapping on + # slow Cube deployments) + # The httpcore path is only reachable via the transitive + # dependency, so we match by type-name instead of importing + # httpcore just to subclass-check. We still re-raise anything + # that is not timeout-flavoured so real errors stay visible. + name = type(exc).__name__ + if "Timeout" not in name: + raise + result = None + timed_out = True duration = loop.time() - start await self.set_timeout(self._idle_timeout) + if timed_out: + return CubeCommandResult( + stdout="", + stderr=f"Command timed out after {timeout_sec:g}s", + exit_code=-1, + duration=float(duration), + timed_out=True, + ) return CubeCommandResult( stdout=str(getattr(result, "stdout", "") or ""), stderr=str(getattr(result, "stderr", "") or ""), @@ -296,11 +342,10 @@ async def write_file_bytes(self, remote_abs: str, data: bytes) -> None: async def _is_remote_dir(self, remote_abs: str) -> bool: """Return whether ``remote_abs`` resolves to a directory inside the sandbox.""" sbx = self._require() - e2b = _import_e2b() info = await sbx.files.get_info(remote_abs, user=_GUEST_USER) return info.type == e2b.FileType.DIR - def _require(self) -> "AsyncSandbox": + def _require(self) -> AsyncSandbox: if self._sbx is None: raise RuntimeError("CubeSandboxClient is closed.") return self._sbx diff --git a/trpc_agent_sdk/code_executors/local/_local_ws_runtime.py b/trpc_agent_sdk/code_executors/local/_local_ws_runtime.py index a84e353..a3280eb 100644 --- a/trpc_agent_sdk/code_executors/local/_local_ws_runtime.py +++ b/trpc_agent_sdk/code_executors/local/_local_ws_runtime.py @@ -58,8 +58,6 @@ from .._types import ManifestOutput from .._types import WorkspaceOutputSpec from .._program_session import BaseProgramSession -from ..utils import build_code_files -from ..utils import build_manifest_output from ..utils import ensure_layout from ..utils import load_metadata from ..utils import save_metadata @@ -278,7 +276,7 @@ async def collect(self, List of matching file references """ real_root, matches = self._enumerate_local_matches(ws.path, normalize_globs(patterns)) - return await build_code_files(real_root, matches, self._fetch_bytes) + return await self._build_code_files(real_root, matches, self._fetch_bytes) def _enumerate_local_matches( self, @@ -294,8 +292,8 @@ def _enumerate_local_matches( Returns ``(real_root, matches)`` where ``real_root`` is the canonicalised workspace root and ``matches`` is the list of canonical absolute paths under it. Both are passed to - :func:`build_code_files` / :func:`build_manifest_output` as a - matched pair so the helpers' prefix-stripping ``_relativize`` + :meth:`_build_code_files` / :meth:`_build_manifest_output` as + a matched pair so the helpers' prefix-stripping ``_relativize`` operates on canonical-vs-canonical paths. Passing the raw (un-resolved) ``ws.path`` would silently leak absolute paths as ``CodeFile.name`` whenever ``ws.path`` itself contains a symlink @@ -343,7 +341,7 @@ async def _fetch_bytes(self, full_path: str, max_bytes: int) -> tuple[bytes, int needing a second ``stat`` call. On read failure this raises and lets - :func:`build_code_files` / :func:`build_manifest_output` + :meth:`_build_code_files` / :meth:`_build_manifest_output` apply their shared ``application/octet-stream`` sentinel — the pre-refactor ``_read_limited`` returned that MIME explicitly for unreadable files, and we preserve that design intent by routing @@ -500,7 +498,9 @@ async def collect_outputs(self, ensure_layout(ws.path) real_root, matches = self._enumerate_local_matches(ws.path, normalize_globs(spec.globs)) - out, saved_names, saved_vers = await build_manifest_output(real_root, spec, matches, self._fetch_bytes, ctx) + out, saved_names, saved_vers = await self._build_manifest_output( + real_root, spec, matches, self._fetch_bytes, ctx, + ) # Record output in workspace metadata (local-only bookkeeping). md = load_metadata(ws.path) diff --git a/trpc_agent_sdk/code_executors/utils/__init__.py b/trpc_agent_sdk/code_executors/utils/__init__.py index a65ca74..f203777 100644 --- a/trpc_agent_sdk/code_executors/utils/__init__.py +++ b/trpc_agent_sdk/code_executors/utils/__init__.py @@ -10,9 +10,6 @@ """ from ._code_execution import CodeExecutionUtils -from ._collect import ManifestFetcher -from ._collect import build_code_files -from ._collect import build_manifest_output from ._files import collect_files_with_glob from ._files import copy_dir from ._files import copy_path @@ -34,9 +31,6 @@ __all__ = [ "CodeExecutionUtils", - "ManifestFetcher", - "build_code_files", - "build_manifest_output", "collect_files_with_glob", "copy_dir", "copy_path", diff --git a/trpc_agent_sdk/code_executors/utils/_collect.py b/trpc_agent_sdk/code_executors/utils/_collect.py deleted file mode 100644 index 3bc8c19..0000000 --- a/trpc_agent_sdk/code_executors/utils/_collect.py +++ /dev/null @@ -1,234 +0,0 @@ -# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. -# -# Copyright (C) 2026 Tencent. All rights reserved. -# -# tRPC-Agent-Python is licensed under Apache-2.0. -"""Shared "matches -> models" pipeline for workspace output collection. - -Every workspace backend (local / container / cube) has to walk a list of -matched file paths, read their bytes (with per-file and cumulative -caps), sniff the MIME type, optionally inline the content, and -optionally persist the bytes via the artifact service. The *how-to-read* -part is backend-specific (direct filesystem vs. docker ``get_archive`` -vs. Cube RPC), but everything after "we have the bytes" is identical. - -This module factors that shared tail into two small helpers: - -- :func:`build_code_files` — materialises a ``collect(...)`` call, which - historically returns :class:`CodeFile` and only caps per-file size. -- :func:`build_manifest_output` — materialises a ``collect_outputs(...)`` - call, which honours :class:`WorkspaceOutputSpec` (limits, inline, - save, name_template) and produces a :class:`ManifestOutput`. - -Backends supply a ``fetcher`` coroutine that knows how to fetch the -raw bytes of a single absolute path — bounded by an input byte budget — -plus the *raw* size of the file on the underlying medium. Returning -the raw size separately lets the shared helpers compute -``truncated`` / ``limits_hit`` without requiring the fetcher to read -past the budget. -""" - -from __future__ import annotations - -from typing import Awaitable -from typing import Callable -from typing import List -from typing import Optional -from typing import Tuple - -from trpc_agent_sdk.context import InvocationContext - -from .._artifacts import save_artifact_helper -from .._constants import DEFAULT_MAX_FILES -from .._constants import DEFAULT_MAX_TOTAL_BYTES -from .._constants import MAX_READ_SIZE_BYTES -from .._types import CodeFile -from .._types import ManifestFileRef -from .._types import ManifestOutput -from .._types import WorkspaceOutputSpec -from ._files import detect_content_type - -# A fetcher is an async callable ``(absolute_path, max_bytes) -> (data, raw_size)``. -# -# Contract: -# - ``data`` is the file's content truncated to at most ``max_bytes``. If the -# underlying medium cannot cheaply report the full size (e.g. a streaming -# read), the fetcher may return ``raw_size = len(data)``; callers that care -# about truncation must then treat ``len(data) == max_bytes`` as "possibly -# truncated". -# - ``raw_size`` is the size of the file on the underlying medium before any -# truncation, used only to decide ``truncated`` / ``limits_hit`` flags. -# - The fetcher must *not* raise for merely-empty files; it should raise only -# for genuine I/O errors so the backend can surface a meaningful message. -ManifestFetcher = Callable[[str, int], Awaitable[Tuple[bytes, int]]] - - -def _relativize(ws_path: str, full_path: str) -> str: - """Return ``full_path`` stripped of the ``ws.path + "/"`` prefix. - - Kept as a single helper so every backend produces identical relative - paths in :class:`CodeFile` / :class:`ManifestFileRef`. - """ - prefix = ws_path.rstrip("/") + "/" - if full_path.startswith(prefix): - return full_path[len(prefix):] - return full_path - - -async def build_code_files( - ws_path: str, - matches: List[str], - fetcher: ManifestFetcher, - *, - max_read_size: Optional[int] = None, -) -> List[CodeFile]: - """Materialise a :meth:`BaseWorkspaceFS.collect` call. - - Reads each matched path with a single per-file byte cap - (``max_read_size``, defaulting to :data:`MAX_READ_SIZE_BYTES` resolved - at call time so tests can ``monkeypatch.setattr`` the constant), - sniffs the MIME type, and wraps the result in a :class:`CodeFile`. - Duplicate ``rel`` paths are skipped so callers can pass the raw glob - output without pre-deduping. - """ - cap = MAX_READ_SIZE_BYTES if max_read_size is None else max_read_size - seen: set[str] = set() - out: List[CodeFile] = [] - for full_path in matches: - rel = _relativize(ws_path, full_path) - if rel in seen: - continue - seen.add(rel) - try: - data, raw_size = await fetcher(full_path, cap) - except Exception: # pylint: disable=broad-except - # Keep collect() best-effort: a single unreadable file must - # not abort the whole batch. Backends that prefer strict - # semantics can short-circuit themselves before calling us. - out.append(CodeFile(name=rel, content="", mime_type="application/octet-stream")) - continue - mime = detect_content_type(full_path, data) - out.append( - CodeFile( - name=rel, - content=data.decode("utf-8", errors="replace"), - mime_type=mime, - size_bytes=raw_size, - truncated=raw_size > len(data), - )) - return out - - -async def build_manifest_output( - ws_path: str, - spec: WorkspaceOutputSpec, - matches: List[str], - fetcher: ManifestFetcher, - ctx: Optional[InvocationContext], - *, - strict_truncated_save: bool = False, -) -> Tuple[ManifestOutput, List[str], List[int]]: - """Materialise a :meth:`BaseWorkspaceFS.collect_outputs` call. - - Applies ``spec``'s limits (``max_files`` / ``max_file_bytes`` / - ``max_total_bytes``), fills ``inline`` / ``save`` branches, and - produces a :class:`ManifestOutput`. Also returns the list of saved - artifact names and versions so backends that record metadata (e.g. - local's ``OutputRecordMeta``) don't need to re-scan the manifest. - - Args: - ws_path: Absolute workspace path, used to produce relative - ``name`` fields. - spec: The output spec declared by the caller. - matches: Absolute paths already filtered by the backend's glob. - fetcher: Async callable that returns ``(data, raw_size)`` for a - path, capped by a requested byte budget. See - :data:`ManifestFetcher`. - ctx: Invocation context. Required when ``spec.save`` is set, - because artifact persistence goes through it. - strict_truncated_save: When ``True``, raise ``RuntimeError`` if - ``spec.save`` is requested for a file that was truncated by - the per-file cap. Container preserves this "refuse to save - half a binary" behaviour; local/cube historically allow it. - - Returns: - Tuple of ``(manifest, saved_names, saved_versions)``. - """ - max_files = spec.max_files or DEFAULT_MAX_FILES - max_file_bytes = spec.max_file_bytes or MAX_READ_SIZE_BYTES - max_total = spec.max_total_bytes or DEFAULT_MAX_TOTAL_BYTES - - manifest = ManifestOutput() - saved_names: List[str] = [] - saved_versions: List[int] = [] - - seen: set[str] = set() - total_bytes = 0 - count = 0 - - for full_path in matches: - # Check limits *before* fetching so a blown budget doesn't cause - # a useless read of the next big file. - if count >= max_files or total_bytes >= max_total: - manifest.limits_hit = True - break - - rel = _relativize(ws_path, full_path) - if rel in seen: - continue - seen.add(rel) - - # Per-file cap is ``max_file_bytes``, but also clamp to the - # remaining total budget so a single huge file cannot exceed - # ``max_total`` all on its own. - remaining_total = max_total - total_bytes - read_budget = min(max_file_bytes, remaining_total) - if read_budget <= 0: - manifest.limits_hit = True - break - - try: - data, raw_size = await fetcher(full_path, read_budget) - except Exception: # pylint: disable=broad-except - # Mirror ``build_code_files``: a single unreadable file must - # not abort the whole collection. Emit a sentinel entry with - # empty content and the canonical "unknown / unreadable" - # MIME type. This preserves the pre-refactor local behaviour - # (``_read_limited_with_cap`` caught and returned - # ``("", "application/octet-stream")``) and is a small - # tolerance upgrade for the container backend, which used to - # abort on the first transient tar error. - manifest.files.append(ManifestFileRef(name=rel, mime_type="application/octet-stream")) - count += 1 - continue - - # Mark limits_hit if either cap actually bit. - if raw_size > len(data): - manifest.limits_hit = True - - truncated = raw_size > len(data) - if truncated and spec.save and strict_truncated_save: - raise RuntimeError(f"cannot save truncated output file: {rel}") - - total_bytes += len(data) - count += 1 - - mime = detect_content_type(full_path, data) - file_ref = ManifestFileRef(name=rel, mime_type=mime) - - if spec.inline: - file_ref.content = data.decode("utf-8", errors="replace") - - if spec.save: - if ctx is None: - raise ValueError("Context is required to save artifacts") - save_name = (spec.name_template + rel) if spec.name_template else rel - version = await save_artifact_helper(ctx, save_name, data, mime) - file_ref.saved_as = save_name - file_ref.version = version - saved_names.append(save_name) - saved_versions.append(version) - - manifest.files.append(file_ref) - - return manifest, saved_names, saved_versions From 22914285eec69b488e47ec0e2f8b3c3768ef9c0f Mon Sep 17 00:00:00 2001 From: yuyili Date: Thu, 7 May 2026 20:11:54 +0800 Subject: [PATCH 3/7] =?UTF-8?q?fix(ci):=20=E5=9C=A8=20requirements-test.tx?= =?UTF-8?q?t=20=E6=B7=BB=E5=8A=A0=20e2b-code-interpreter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 上一笔 refactor 移除了 cube 子包的 lazy-import 隔离层, `trpc_agent_sdk.code_executors.cube.*` 现在在模块顶部 eager `import e2b_code_interpreter as e2b`。CI 的 requirements-test.txt 之前没有 pin 这个 wheel(仅在 pyproject 的 [cube] extra 里声明), 导致整个 tests/code_executors/cube/ 目录在 collection 阶段 ModuleNotFoundError 失败。这里把 e2b-code-interpreter 显式列入 测试依赖,使 CI 能正常采集并运行 cube 单测。 Assisted-by: Cursor:claude-opus-4.7 Co-authored-by: Cursor --- requirements-test.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/requirements-test.txt b/requirements-test.txt index f3b42ee..a53f45b 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -22,6 +22,13 @@ google-genai>=1.24.0 rapidfuzz>=3.0.0 docker +# Test Cube/E2B code executor +# Cube tests now `import e2b_code_interpreter` at the top of the cube +# subpackage modules (the lazy-import seam was removed); without this +# wheel the whole `tests/code_executors/cube/` tree fails at collection +# time on CI. Mirrors the `[cube]` optional extra in pyproject.toml. +e2b-code-interpreter>=2.0.0 + # Test Langfuse opentelemetry-sdk<2.0.0,>=1.28.0 opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.28.0 From 68fb469dd087d1cf49d620990d253850277068ec Mon Sep 17 00:00:00 2001 From: yuyili Date: Thu, 7 May 2026 20:13:39 +0800 Subject: [PATCH 4/7] =?UTF-8?q?style:=20=E4=BF=AE=E5=A4=8D=20yapf=20?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E6=A0=A1=E9=AA=8C=E6=8A=A5=E9=94=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI 的 yapf lint 在三处与本仓库的 [tool.yapf] (based_on_style=pep8, column_limit=120) 不一致,本提交按 yapf 建议自动修正: - trpc_agent_sdk/code_executors/__init__.py: 删除 import 块和 __all__ 之间的多余空行(同段跨语义边界 yapf 不允许双空行)。 - trpc_agent_sdk/code_executors/cube/_code_executor.py: ``Field(default_factory=...)`` 单行表达式 < 120 列,yapf 折叠 为单行。 - trpc_agent_sdk/code_executors/local/_local_ws_runtime.py: 超过 120 列的 ``self._build_manifest_output(...)`` 调用按 yapf 默认风格逐参数换行。 无逻辑变更;仅排版。 Assisted-by: Cursor:claude-opus-4.7 Co-authored-by: Cursor --- trpc_agent_sdk/code_executors/__init__.py | 1 - trpc_agent_sdk/code_executors/cube/_code_executor.py | 4 +--- trpc_agent_sdk/code_executors/local/_local_ws_runtime.py | 6 +++++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/trpc_agent_sdk/code_executors/__init__.py b/trpc_agent_sdk/code_executors/__init__.py index 8c37e6a..75c2f1f 100644 --- a/trpc_agent_sdk/code_executors/__init__.py +++ b/trpc_agent_sdk/code_executors/__init__.py @@ -92,7 +92,6 @@ from .local import create_local_workspace_runtime from .utils import CodeExecutionUtils - __all__ = [ "load_artifact_helper", "parse_artifact_ref", diff --git a/trpc_agent_sdk/code_executors/cube/_code_executor.py b/trpc_agent_sdk/code_executors/cube/_code_executor.py index df89a54..d939d8c 100644 --- a/trpc_agent_sdk/code_executors/cube/_code_executor.py +++ b/trpc_agent_sdk/code_executors/cube/_code_executor.py @@ -90,9 +90,7 @@ class CubeCodeExecutor(BaseCodeExecutor): # at call time so the base list stays the single source of truth; # callers may still override via the ``code_block_delimiters`` field # at construction time. - code_block_delimiters: list[CodeBlockDelimiter] = Field( - default_factory=_cube_default_code_block_delimiters, - ) + code_block_delimiters: list[CodeBlockDelimiter] = Field(default_factory=_cube_default_code_block_delimiters, ) # `_client` is `Optional` because :meth:`close` / :meth:`destroy` # legitimately drop the handle post-construction. `_cfg` has no such diff --git a/trpc_agent_sdk/code_executors/local/_local_ws_runtime.py b/trpc_agent_sdk/code_executors/local/_local_ws_runtime.py index a3280eb..2f6bfa5 100644 --- a/trpc_agent_sdk/code_executors/local/_local_ws_runtime.py +++ b/trpc_agent_sdk/code_executors/local/_local_ws_runtime.py @@ -499,7 +499,11 @@ async def collect_outputs(self, real_root, matches = self._enumerate_local_matches(ws.path, normalize_globs(spec.globs)) out, saved_names, saved_vers = await self._build_manifest_output( - real_root, spec, matches, self._fetch_bytes, ctx, + real_root, + spec, + matches, + self._fetch_bytes, + ctx, ) # Record output in workspace metadata (local-only bookkeeping). From ad1fec8493375c66ad8d0b56ce883bc7238fe03d Mon Sep 17 00:00:00 2001 From: yuyili Date: Thu, 7 May 2026 20:45:51 +0800 Subject: [PATCH 5/7] Refactor code --- .../code_executors/_base_code_executor.py | 19 +++++++++++--- .../code_executors/cube/_code_executor.py | 25 +++---------------- 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/trpc_agent_sdk/code_executors/_base_code_executor.py b/trpc_agent_sdk/code_executors/_base_code_executor.py index 887e36e..6b53311 100644 --- a/trpc_agent_sdk/code_executors/_base_code_executor.py +++ b/trpc_agent_sdk/code_executors/_base_code_executor.py @@ -16,6 +16,7 @@ from typing import Optional from pydantic import BaseModel +from pydantic import Field from trpc_agent_sdk.context import InvocationContext from ._base_workspace_runtime import BaseWorkspaceRuntime @@ -23,6 +24,11 @@ from ._types import CodeExecutionInput from ._types import CodeExecutionResult +DEFAULT_CODE_BLOCK_DELIMITERS: tuple[CodeBlockDelimiter, ...] = ( + CodeBlockDelimiter(start="```tool_code\n", end="\n```"), + CodeBlockDelimiter(start="```python\n", end="\n```"), +) + class BaseCodeExecutor(BaseModel): """Abstract base class for all code executors. @@ -59,10 +65,15 @@ class BaseCodeExecutor(BaseModel): error_retry_attempts: int = 2 """The number of attempts to retry on consecutive code execution errors. Default to 2.""" - code_block_delimiters: list[CodeBlockDelimiter] = [ - CodeBlockDelimiter(start="```tool_code\n", end="\n```"), - CodeBlockDelimiter(start="```python\n", end="\n```"), - ] + execute_once_per_invocation: bool = False + """Whether to execute model-extracted code at most once per invocation. + + When enabled, post-processing code execution runs only for the first + detected code block in a single ``invocation_id`` and skips subsequent + auto-execution attempts for that invocation. + """ + + code_block_delimiters: list[CodeBlockDelimiter] = Field(default_factory=lambda: list(DEFAULT_CODE_BLOCK_DELIMITERS)) """The list of the enclosing delimiters to identify the code blocks. For example, the delimiter ('```python\\n', '\\n```') can be diff --git a/trpc_agent_sdk/code_executors/cube/_code_executor.py b/trpc_agent_sdk/code_executors/cube/_code_executor.py index d939d8c..967da10 100644 --- a/trpc_agent_sdk/code_executors/cube/_code_executor.py +++ b/trpc_agent_sdk/code_executors/cube/_code_executor.py @@ -21,6 +21,7 @@ from trpc_agent_sdk.context import InvocationContext from .._base_code_executor import BaseCodeExecutor +from .._base_code_executor import DEFAULT_CODE_BLOCK_DELIMITERS from .._types import CodeBlock from .._types import CodeBlockDelimiter from .._types import CodeExecutionInput @@ -36,21 +37,6 @@ _BASH_DELIMITER = CodeBlockDelimiter(start="```bash\n", end="\n```") -def _cube_default_code_block_delimiters() -> list[CodeBlockDelimiter]: - """Default delimiters for :class:`CubeCodeExecutor`. - - Reuses :attr:`BaseCodeExecutor.code_block_delimiters` so the parent's - defaults (currently ``tool_code`` + ``python``) stay the single - source of truth, then appends a ``bash`` fence so text-path - extraction matches what :meth:`CubeCodeExecutor.execute_code` can - actually run (see :meth:`_select_interpreter`). Returns a fresh list - on every call to keep per-instance defaults independent of one - another. - """ - parent_default = BaseCodeExecutor.model_fields["code_block_delimiters"].default - return [*parent_default, _BASH_DELIMITER] - - class CubeCodeExecutor(BaseCodeExecutor): """A code executor that runs blocks inside a Cube/E2B remote sandbox. @@ -84,13 +70,8 @@ class CubeCodeExecutor(BaseCodeExecutor): stateful: bool = Field(default=False, frozen=True, exclude=True) optimize_data_file: bool = Field(default=False, frozen=True, exclude=True) - # Extend the inherited default with a ``bash`` fence so text-path - # extraction matches what ``execute_code`` can actually run (see - # ``_select_interpreter``). The factory reads the parent's default - # at call time so the base list stays the single source of truth; - # callers may still override via the ``code_block_delimiters`` field - # at construction time. - code_block_delimiters: list[CodeBlockDelimiter] = Field(default_factory=_cube_default_code_block_delimiters, ) + code_block_delimiters: list[CodeBlockDelimiter] = Field( + default_factory=lambda: [*DEFAULT_CODE_BLOCK_DELIMITERS, _BASH_DELIMITER]) # `_client` is `Optional` because :meth:`close` / :meth:`destroy` # legitimately drop the handle post-construction. `_cfg` has no such From 153ca96974dc017e6f27507b5c05a86f439ba477 Mon Sep 17 00:00:00 2001 From: yuyili Date: Fri, 8 May 2026 11:38:00 +0800 Subject: [PATCH 6/7] =?UTF-8?q?refactor(skills):=20=E5=B0=86=E7=A4=BA?= =?UTF-8?q?=E4=BE=8B=20skill=20=E7=9B=AE=E5=BD=95=E7=94=B1=20python=5Fmath?= =?UTF-8?q?=20=E9=87=8D=E5=91=BD=E5=90=8D=E4=B8=BA=20python-math?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - examples/skills 与 examples/skills_with_container 下示例 skill 目录改用连字符命名 - 同步更新 README、agent/tools.py、docs/mkdocs/{en,zh}/skill.md 与 openclaw/config_full.temp.yaml 中的引用 - 调整容器示例日志展示,体现 host:// / workspace:// / skill:// 三种 input scheme 全部成功落地 Assisted-by: Cursor:claude-opus-4.7 Co-authored-by: Cursor --- docs/mkdocs/en/skill.md | 4 ++-- docs/mkdocs/zh/skill.md | 4 ++-- .../{python_math => python-math}/SKILL.md | 0 .../scripts/fib.py | 0 examples/skills_with_container/README.md | 18 +++++++++++++++--- examples/skills_with_container/agent/tools.py | 10 +++++----- .../{python_math => python-math}/SKILL.md | 0 .../scripts/fib.py | 0 .../server/openclaw/config_full.temp.yaml | 2 +- 9 files changed, 25 insertions(+), 13 deletions(-) rename examples/skills/skills/{python_math => python-math}/SKILL.md (100%) rename examples/skills/skills/{python_math => python-math}/scripts/fib.py (100%) rename examples/skills_with_container/skills/{python_math => python-math}/SKILL.md (100%) rename examples/skills_with_container/skills/{python_math => python-math}/scripts/fib.py (100%) diff --git a/docs/mkdocs/en/skill.md b/docs/mkdocs/en/skill.md index 17f5f30..6e6c5c0 100644 --- a/docs/mkdocs/en/skill.md +++ b/docs/mkdocs/en/skill.md @@ -218,7 +218,7 @@ python3 run_agent.py ``` Example skill (excerpt): -[examples/skills/skills/python_math/SKILL.md](../../../examples/skills/skills/python_math/SKILL.md) +[examples/skills/skills/python-math/SKILL.md](../../../examples/skills/skills/python-math/SKILL.md) Tips: - Describe the task you want to accomplish; the model will decide whether a skill is needed based on the overview. @@ -2135,6 +2135,6 @@ The **Dynamic Tool Selection** mechanism has been fully implemented and verified - Dynamic tool selection full example: [examples/skills_with_dynamic_tools/run_agent.py](../../../examples/skills_with_dynamic_tools/run_agent.py) - Example structure guide: [examples/skills/README.md](../../../examples/skills/README.md) - Example skills: - - [examples/skills/skills/python_math/SKILL.md](../../../examples/skills/skills/python_math/SKILL.md) + - [examples/skills/skills/python-math/SKILL.md](../../../examples/skills/skills/python-math/SKILL.md) - [examples/skills/skills/file_tools/SKILL.md](../../../examples/skills/skills/file_tools/SKILL.md) - [examples/skills/skills/user_file_ops/SKILL.md](../../../examples/skills/skills/user_file_ops/SKILL.md) diff --git a/docs/mkdocs/zh/skill.md b/docs/mkdocs/zh/skill.md index ef7c473..aea40c7 100644 --- a/docs/mkdocs/zh/skill.md +++ b/docs/mkdocs/zh/skill.md @@ -217,7 +217,7 @@ python3 run_agent.py ``` 示例技能(摘录): -[examples/skills/skills/python_math/SKILL.md](../../../examples/skills/skills/python_math/SKILL.md) +[examples/skills/skills/python-math/SKILL.md](../../../examples/skills/skills/python-math/SKILL.md) 提示词: - 说出你想要完成的任务;模型会根据概览决定是否需要某个技能。 @@ -2133,6 +2133,6 @@ Tools: - 动态工具选择完整示例:[examples/skills_with_dynamic_tools/run_agent.py](../../../examples/skills_with_dynamic_tools/run_agent.py) - 示例结构说明:[examples/skills/README.md](../../../examples/skills/README.md) - 示例技能: - - [examples/skills/skills/python_math/SKILL.md](../../../examples/skills/skills/python_math/SKILL.md) + - [examples/skills/skills/python-math/SKILL.md](../../../examples/skills/skills/python-math/SKILL.md) - [examples/skills/skills/file_tools/SKILL.md](../../../examples/skills/skills/file_tools/SKILL.md) - [examples/skills/skills/user_file_ops/SKILL.md](../../../examples/skills/skills/user_file_ops/SKILL.md) diff --git a/examples/skills/skills/python_math/SKILL.md b/examples/skills/skills/python-math/SKILL.md similarity index 100% rename from examples/skills/skills/python_math/SKILL.md rename to examples/skills/skills/python-math/SKILL.md diff --git a/examples/skills/skills/python_math/scripts/fib.py b/examples/skills/skills/python-math/scripts/fib.py similarity index 100% rename from examples/skills/skills/python_math/scripts/fib.py rename to examples/skills/skills/python-math/scripts/fib.py diff --git a/examples/skills_with_container/README.md b/examples/skills_with_container/README.md index 00ce99e..2b830dd 100644 --- a/examples/skills_with_container/README.md +++ b/examples/skills_with_container/README.md @@ -38,14 +38,26 @@ python3 run_agent.py Docker client initialized successfully Container bind mounts enabled: [... 'skills:...:ro', '/tmp/skillrun-inputs:/opt/trpc-agent/inputs:ro'] ... -🔧 [Invoke Tool:: skill_run({... 'inputs': [..., 'workspace://skills/python_math/SKILL.md', ...], ...}) -📊 [Tool Result: {'error': 'tool_execution_error', ... "Failed to stage input: ... SKILL.md': No such file or directory" ...}] +🔧 [Invoke Tool:: skill_run({... 'inputs': [ + 'host:///tmp/skillrun-inputs/sales.csv', + 'workspace://skills/python-math/SKILL.md', + 'skill://python-math/scripts/fib.py', +], ...}) +📊 [Tool Result: { + 'stdout': '', 'stderr': '', 'exit_code': 0, + 'output_files': [ + {'name': 'out/fib.txt', 'content': '0\n1\n1\n2\n3\n5\n8\n13\n21\n34\n', ...}, + {'name': 'out/staged_inputs_tree.txt', 'content': + 'work/inputs:\nsales.csv\n---\nwork/staged_inputs:\nfib.py\npython-math_skill.md\n', ...}, + ], + ... +}] ... ``` ## 结果分析(是否符合要求) -符合本示例测试要求:容器成功启动并完成一次 `skill_run` 调用链;日志清晰展示 `host://` 与 `skill://` 等路径处理及 `workspace://` 在当期 workspace 中缺失时的失败信息,达到演示 stage_inputs 行为的目的。 +符合本示例测试要求:容器成功启动并完成一次 `skill_run` 调用链;`host://` / `workspace://` / `skill://` 三种 input scheme 都成功落入工作区,输出文件 `out/fib.txt` 和 `out/staged_inputs_tree.txt` 正常产出,进程以 `exit_code=0` 结束。 ## 适用场景建议 diff --git a/examples/skills_with_container/agent/tools.py b/examples/skills_with_container/agent/tools.py index 1f6892e..78ce208 100644 --- a/examples/skills_with_container/agent/tools.py +++ b/examples/skills_with_container/agent/tools.py @@ -83,19 +83,19 @@ def build_container_stage_inputs_specs(inputs_host: str = "/tmp/skillrun-inputs" ), WorkspaceInputSpec( # This file exists after skill staging, so the workspace:// demo is stable. - src="workspace://skills/python_math/SKILL.md", - dst="work/staged_inputs/python_math_skill.md", + src="workspace://skills/python-math/SKILL.md", + dst="work/staged_inputs/python-math_skill.md", mode="copy", ), WorkspaceInputSpec( - src="skill://python_math/scripts/fib.py", + src="skill://python-math/scripts/fib.py", dst="work/staged_inputs/fib.py", mode="copy", ), ] -def build_container_skill_run_payload(skill_name: str = "python_math", +def build_container_skill_run_payload(skill_name: str = "python-math", inputs_host: str = "/tmp/skillrun-inputs") -> dict[str, Any]: """Build a full ``skill_run`` payload for container mode demonstration. @@ -108,7 +108,7 @@ def build_container_skill_run_payload(skill_name: str = "python_math", skill_name, "cwd": f"$SKILLS_DIR/{skill_name}", - "command": ("python scripts/fib.py --n 10 > out/fib.txt && " + "command": ("python scripts/fib.py 10 > out/fib.txt && " "(ls -R work/inputs; echo '---'; ls -R work/staged_inputs) > out/staged_inputs_tree.txt"), "inputs": [spec.model_dump() for spec in build_container_stage_inputs_specs(inputs_host=inputs_host)], "output_files": [ diff --git a/examples/skills_with_container/skills/python_math/SKILL.md b/examples/skills_with_container/skills/python-math/SKILL.md similarity index 100% rename from examples/skills_with_container/skills/python_math/SKILL.md rename to examples/skills_with_container/skills/python-math/SKILL.md diff --git a/examples/skills_with_container/skills/python_math/scripts/fib.py b/examples/skills_with_container/skills/python-math/scripts/fib.py similarity index 100% rename from examples/skills_with_container/skills/python_math/scripts/fib.py rename to examples/skills_with_container/skills/python-math/scripts/fib.py diff --git a/trpc_agent_sdk/server/openclaw/config_full.temp.yaml b/trpc_agent_sdk/server/openclaw/config_full.temp.yaml index b7cac37..26f101f 100644 --- a/trpc_agent_sdk/server/openclaw/config_full.temp.yaml +++ b/trpc_agent_sdk/server/openclaw/config_full.temp.yaml @@ -88,7 +88,7 @@ skills: skill_roots: [] # 示例: # skill_roots: - # - /data/xxx/python_math + # - /data/xxx/python-math # - file:///tmp/test_skill.zip # - http://127.0.0.1:8088/archive/test_skill.zip # - file:///data/xxx/skill_dir From 9c7bac8bef2d4bf58484194f2debc221d7688608 Mon Sep 17 00:00:00 2001 From: yuyili Date: Sat, 9 May 2026 12:04:27 +0800 Subject: [PATCH 7/7] Add docs --- docs/mkdocs/en/code_executor.md | 246 +++++++++++++++++++++++++++++++- docs/mkdocs/en/skill.md | 17 ++- docs/mkdocs/zh/code_executor.md | 237 +++++++++++++++++++++++++++++- docs/mkdocs/zh/skill.md | 17 ++- 4 files changed, 507 insertions(+), 10 deletions(-) diff --git a/docs/mkdocs/en/code_executor.md b/docs/mkdocs/en/code_executor.md index 0c44a1b..a41eef2 100644 --- a/docs/mkdocs/en/code_executor.md +++ b/docs/mkdocs/en/code_executor.md @@ -6,7 +6,7 @@ When this feature is enabled, if the LLM returns text containing code snippets, ## Code Executor Types -Two types of code executors are currently available: +Three types of code executors are currently available: ### UnsafeLocalCodeExecutor @@ -34,6 +34,21 @@ Two types of code executors are currently available: - Scenarios requiring execution of untrusted code - Scenarios requiring environment isolation +### CubeCodeExecutor + +**Features:** +- Agent dispatches code snippets to a remote Cube/E2B sandbox for execution; supports `Python/Bash` +- Strong sandboxed environment running on a remote host, suitable for executing untrusted code at scale +- Decoupled lifecycle: the same sandbox can be re-attached across processes via `sandbox_id` (`create` / `attach` / `create_or_recreate` factories) +- Ships an optional `CubeWorkspaceRuntime` that adds per-execution workspace directories, file upload/download (single files or whole directories via tar), and structured program runs — useful for the Skill subsystem +- Requires the optional `[cube]` extra (`pip install 'trpc-agent-py[cube]'`, which installs `e2b-code-interpreter`) and access to a Cube/E2B-compatible gateway + +**Use Cases:** +- Production environments where Docker is not available on the agent host +- Scenarios requiring strong remote isolation for untrusted code +- Long-lived skill/code execution that needs a persistent workspace surviving across multiple `execute_code` calls +- Multi-tenant agent platforms that share a remote sandbox fleet + ## Usage Examples When creating an LlmAgent, build a CodeExecutor and configure the `code_executor` parameter to enable code execution functionality. @@ -48,15 +63,19 @@ from trpc_agent_sdk.models import OpenAIModel from trpc_agent_sdk.code_executors import BaseCodeExecutor from trpc_agent_sdk.code_executors import UnsafeLocalCodeExecutor from trpc_agent_sdk.code_executors import ContainerCodeExecutor +# Cube is an optional extra (`pip install 'trpc-agent-py[cube]'`) +from trpc_agent_sdk.code_executors.cube import CubeCodeExecutor +from trpc_agent_sdk.code_executors.cube import CubeCodeExecutorConfig from trpc_agent_sdk.log import logger -def _create_code_executor(code_executor_type: str = "unsafe_local") -> BaseCodeExecutor: +async def _create_code_executor(code_executor_type: str = "unsafe_local") -> BaseCodeExecutor: """Create a code executor. Args: code_executor_type: Type of code executor to use. Options: - "unsafe_local": Use UnsafeLocalCodeExecutor (default, no Docker required) - "container": Use ContainerCodeExecutor (requires Docker) + - "cube": Use CubeCodeExecutor (requires the [cube] extra and a Cube/E2B gateway) - None: Auto-detect from environment variable CODE_EXECUTOR_TYPE, or default to "unsafe_local" @@ -76,9 +95,18 @@ def _create_code_executor(code_executor_type: str = "unsafe_local") -> BaseCodeE executor = ContainerCodeExecutor(image="python:3-slim", error_retry_attempts=1) logger.info("ContainerCodeExecutor initialized successfully") return executor + elif code_executor_type == "cube": + # CubeCodeExecutor reads E2B_API_URL / E2B_API_KEY / CUBE_TEMPLATE_ID + # from the environment when the corresponding cfg fields are unset. + # `create()` opens a fresh remote sandbox; pass `sandbox_id=...` in + # the cfg to attach to an existing one instead. + cfg = CubeCodeExecutorConfig(execute_timeout=30.0, idle_timeout=600) + executor = await CubeCodeExecutor.create(cfg) + logger.info("CubeCodeExecutor initialized: sandbox_id=%s", executor.sandbox_id) + return executor else: raise ValueError(f"Invalid code executor type: {code_executor_type}. " - "Valid options are: 'unsafe_local', 'container'") + "Valid options are: 'unsafe_local', 'container', 'cube'") ``` @@ -154,6 +182,68 @@ def create_agent() -> LlmAgent: ![ContainerCodeExecutor Execution Result](../assets/imgs/container0.png) ![ContainerCodeExecutor Execution Result 1](../assets/imgs/container1.png) +### Using CubeCodeExecutor + +```python +# ... +async def create_agent() -> LlmAgent: + """Create an agent backed by a remote Cube/E2B sandbox. + + Required environment (read by CubeCodeExecutorConfig.resolve_*): + - E2B_API_URL: Cube/E2B-compatible gateway URL + - E2B_API_KEY: API key for the gateway + - CUBE_TEMPLATE_ID: Cube template id (e.g. `std-XXXXXXXX`) + + Note: `_create_code_executor` is async because `CubeCodeExecutor.create` + opens the remote sandbox over the network. The executor owns the + sandbox; call `await executor.destroy()` when the agent shuts down to + free the remote resource. `executor.close()` only drops the local + handle and lets the sandbox idle out on its own. + """ + # Select cube + executor = await _create_code_executor(code_executor_type="cube") + agent = LlmAgent( + name="code_assistant", + description="Code execution assistant", + model=_create_model(), # You can change this to your preferred model + instruction=INSTRUCTION, + code_executor=executor, # Enables code execution functionality + ) + return agent + +# Install the optional extra before use: +# pip install 'trpc-agent-py[cube]' +# And export the gateway credentials: +# export E2B_API_URL=... +# export E2B_API_KEY=... +# export CUBE_TEMPLATE_ID=... +``` + +#### Attaching to an existing sandbox + +`CubeCodeExecutor` exposes three async factories so callers can choose the +lifecycle policy explicitly. All three read the bound sandbox id from +`cfg.sandbox_id` so it is the single source of truth: + +```python +# 1. Strict create-or-attach: when cfg.sandbox_id is set, attach and assert +# the sandbox is RUNNING; otherwise create a fresh one. +executor = await CubeCodeExecutor.create(cfg) + +# 2. Attach-only: requires cfg.sandbox_id to be set; never creates fresh. +executor = await CubeCodeExecutor.attach(cfg) + +# 3. Attach-or-recreate: invokes `on_recreate` when the sandbox is gone, +# then transparently provisions a new one. Useful for long-lived agents +# whose external locator state must be cleared on recreate. +executor = await CubeCodeExecutor.create_or_recreate( + cfg, on_recreate=lambda old_id: clear_locator(old_id), +) +``` + +`close()` is a no-op for the remote sandbox (it just drops the local +handle); `destroy()` explicitly kills the remote sandbox. + ## Configuration Parameters ### UnsafeLocalCodeExecutor Parameters @@ -208,6 +298,117 @@ code_executor = ContainerCodeExecutor( ) ``` +### CubeCodeExecutor Parameters + +`CubeCodeExecutor` is configured via two dataclasses split by ISP: +`CubeCodeExecutorConfig` carries only sandbox-lifecycle / command-execution +settings, and `CubeWorkspaceRuntimeConfig` carries only workspace settings +(see the next section). + +```python +from trpc_agent_sdk.code_executors.cube import ( + CubeCodeExecutor, + CubeCodeExecutorConfig, +) + +cfg = CubeCodeExecutorConfig( + # Cube template id for new sandboxes; falls back to env CUBE_TEMPLATE_ID. + template=None, + + # E2B-compatible Cube API URL; falls back to env E2B_API_URL. + api_url=None, + + # E2B API key; falls back to env E2B_API_KEY. + api_key=None, + + # Existing remote sandbox id. When set, factories attach instead of + # creating a fresh sandbox. + sandbox_id=None, + + # Default per-command timeout in seconds (float). Shared by the bare + # executor and the workspace runtime. Default: 60.0. + execute_timeout=60.0, + + # Sandbox idle lifetime in seconds (int >= 1); renewed on every + # command. Default: 3600 (1 hour). The underlying e2b API takes + # integer seconds — sub-second values are rejected at construction. + idle_timeout=3600, +) + +executor = await CubeCodeExecutor.create(cfg) +``` + +`CubeCodeExecutor` accepts the same `code_block_delimiters` as the other +executors; by default it adds a `bash` delimiter on top of the default +`python` and `tool_code` delimiters so plain `\`\`\`bash\n ... \n\`\`\`` +fences are also picked up. + +## CubeWorkspaceRuntime + +For skill execution and other use cases that need a per-execution +workspace (input staging, structured program runs, output collection), +the Cube package additionally ships `CubeWorkspaceRuntime`. It composes +`CubeWorkspaceManager` (workspace directory lifecycle), `CubeWorkspaceFS` +(file/directory upload, download and glob-based collection), and +`CubeProgramRunner` (structured `cmd` + `args` execution) on top of the +same `CubeSandboxClient`. + +```python +from trpc_agent_sdk.code_executors._types import ( + WorkspaceOutputSpec, + WorkspacePutFileInfo, + WorkspaceRunProgramSpec, +) +from trpc_agent_sdk.code_executors.cube import ( + CubeCodeExecutor, + CubeCodeExecutorConfig, + CubeWorkspaceRuntimeConfig, + create_cube_workspace_runtime, +) + +executor = await CubeCodeExecutor.create(CubeCodeExecutorConfig()) + +# `workspace_cfg` is optional. When omitted the runtime uses +# DEFAULT_REMOTE_WORKSPACE = "/workspace/cube_agent" as the root. +runtime = create_cube_workspace_runtime( + executor, + workspace_cfg=CubeWorkspaceRuntimeConfig( + # Remote root under which the manager creates per-execution + # `ws__` subtrees. + remote_workspace="/workspace/cube_agent", + ), +) + +manager = runtime.manager() +fs = runtime.fs() +runner = runtime.runner() + +ws = await manager.create_workspace("demo-1") # /workspace/cube_agent/ws_demo-1_ + +await fs.put_files(ws, [ + WorkspacePutFileInfo(path="work/script.py", + content=b"print('script ran')\n"), +]) + +run_result = await runner.run_program( + ws, + WorkspaceRunProgramSpec(cmd="python3", args=["work/script.py"], timeout=15.0), +) +print(run_result.exit_code, run_result.stdout) + +outputs = await fs.collect_outputs( + ws, WorkspaceOutputSpec(globs=["work/*.py"], inline=True), +) +for ref in outputs.files: + print(ref.name, len(ref.content)) + +await manager.cleanup("demo-1") +``` + +The runtime plugs straight into the Skill subsystem — pass it as +`workspace_runtime` when constructing a skill repository (see +[skill.md](skill.md) for details). + ## Code Block Format The Agent automatically identifies and executes code blocks in LLM responses. Supported code block formats: @@ -245,6 +446,10 @@ After code execution, the results are returned to the LLM in the following forma - Python (`python`, `py`, `python3`, empty string defaults to Python) - Bash (`bash`, `sh`) +### CubeCodeExecutor +- Python (`python`, `py`, `python3`, empty string defaults to Python) +- Bash (`bash`, `sh`) + ## Workflow 1. **User Query** → Agent receives the user query @@ -291,10 +496,42 @@ code_executor = UnsafeLocalCodeExecutor(timeout=30) # 30-second timeout - Review the log output; the framework logs detailed error information - For ContainerCodeExecutor, check the container logs +### 4. CubeCodeExecutor Cannot Connect / Authenticates as Wrong Tenant + +**Problem:** `CubeCodeExecutor.create` raises with messages like +`Cube sandbox requires \`api_url\` or E2B_API_URL env`, `... api_key ...`, +or `... template ... CUBE_TEMPLATE_ID ...`. + +**Solution:** +- Install the optional extra: `pip install 'trpc-agent-py[cube]'` +- Export the three required env vars (or pass them on + `CubeCodeExecutorConfig`): `E2B_API_URL`, `E2B_API_KEY`, `CUBE_TEMPLATE_ID` +- For multi-tenant deployments, prefer setting the cfg fields explicitly so + each agent instance uses its own credentials instead of falling back to + the process-wide environment + +### 5. CubeCodeExecutor Sandbox Disappears Between Calls + +**Problem:** A sandbox attached via `cfg.sandbox_id` raises +`SandboxNotFoundException` (gone) or `SandboxException` (PAUSED) on the +next command. + +**Solution:** +- For long-lived agents, use `CubeCodeExecutor.create_or_recreate(cfg, on_recreate=...)` + so the executor transparently provisions a new sandbox and notifies the + caller to clear any external locator state +- Tune `idle_timeout` (default 3600s) upward if you legitimately need a + longer idle window between commands; every command renews the lease +- Use `CubeWorkspaceManager.cleanup(exec_id)` instead of `executor.destroy()` + if you only want to drop one workspace while keeping the sandbox alive + ## Complete Example See the complete example code: [examples/code_executors/agent/agent.py](../../../examples/code_executors/agent/agent.py) +End-to-end Cube example (executor + workspace runtime): +[examples/code_executors/cube_demo.py](../../../examples/code_executors/cube_demo.py) + ## Security Recommendations 1. **Production Environment**: It is strongly recommended to use `ContainerCodeExecutor` for sandbox isolation @@ -307,4 +544,5 @@ See the complete example code: [examples/code_executors/agent/agent.py](../../.. - **UnsafeLocalCodeExecutor**: Fast execution speed, suitable for rapid iteration - **ContainerCodeExecutor**: The initial startup requires pulling the image; subsequent executions are relatively fast -- It is recommended to use ContainerCodeExecutor in production environments and UnsafeLocalCodeExecutor in development environments +- **CubeCodeExecutor**: Adds network round-trips to a remote sandbox per command, but amortizes well for long-lived sessions because the sandbox is reused across calls (and across processes via `sandbox_id`); workspace file transfers use a tar-based protocol so directory uploads/downloads stay a single round-trip +- It is recommended to use ContainerCodeExecutor or CubeCodeExecutor in production environments and UnsafeLocalCodeExecutor in development environments diff --git a/docs/mkdocs/en/skill.md b/docs/mkdocs/en/skill.md index 6e6c5c0..f76a235 100644 --- a/docs/mkdocs/en/skill.md +++ b/docs/mkdocs/en/skill.md @@ -102,10 +102,16 @@ from trpc_agent_sdk.skills import SkillToolSet from trpc_agent_sdk.skills import create_default_skill_repository from trpc_agent_sdk.code_executors import create_local_workspace_runtime from trpc_agent_sdk.code_executors import create_container_workspace_runtime +# Cube is an optional extra (`pip install 'trpc-agent-py[cube]'`); import lazily. +# from trpc_agent_sdk.code_executors.cube import CubeCodeExecutor, CubeCodeExecutorConfig +# from trpc_agent_sdk.code_executors.cube import create_cube_workspace_runtime -# Create workspace runtime (local or container) +# Create workspace runtime (local, container, or cube) workspace_runtime = create_local_workspace_runtime() # Or use container: workspace_runtime = create_container_workspace_runtime() +# Or use a remote Cube/E2B sandbox: +# executor = await CubeCodeExecutor.create(CubeCodeExecutorConfig()) +# workspace_runtime = create_cube_workspace_runtime(executor) # Create skill repository repository = create_default_skill_repository("./skills", workspace_runtime=workspace_runtime) @@ -1073,11 +1079,20 @@ LLM calls skill_run(skill="python-math", command="python3 scripts/fib.py 10") - Executes commands directly on the local system, suitable for development and testing - **Container executor** (Docker): [trpc_agent_sdk/code_executors/container/_container_ws_runtime.py](../../../trpc_agent_sdk/code_executors/container/_container_ws_runtime.py) - Executes in Docker containers, providing better isolation +- **Cube executor** (remote E2B sandbox): [trpc_agent_sdk/code_executors/cube/_runtime.py](../../../trpc_agent_sdk/code_executors/cube/_runtime.py) + - Executes inside a remote Cube/E2B sandbox; suitable for environments without local Docker, or when strong remote isolation is required + - Construct via `create_cube_workspace_runtime(executor, workspace_cfg=...)`; see [code_executor.md](code_executor.md#cubeworkspaceruntime) for details + - Requires the optional `[cube]` extra (`pip install 'trpc-agent-py[cube]'`) and the `E2B_API_URL` / `E2B_API_KEY` / `CUBE_TEMPLATE_ID` environment variables (or equivalent cfg fields) **Container executor notes**: - The run base directory is writable; when `$SKILLS_ROOT` is set, it is mounted in read-only mode - Network access is disabled by default for reproducibility and security +**Cube executor notes**: +- File and directory transfers use a tar-based protocol so directory upload/download stays a single round-trip and preserves symlinks/permissions +- The remote workspace root defaults to `/workspace/cube_agent`; per-execution subtrees follow the `ws__` naming convention and are recreated lazily on every `create_workspace` call (so external sandbox cleanup heals transparently) +- The same Cube sandbox can back both the bare `CubeCodeExecutor` and the workspace runtime; commands share `execute_timeout` from `CubeCodeExecutorConfig` + **Security and resource limits**: - **Workspace isolation**: All read/write operations are confined within the workspace - **Risk control**: Reduces security risks through timeout mechanisms and read-only skill trees diff --git a/docs/mkdocs/zh/code_executor.md b/docs/mkdocs/zh/code_executor.md index 690ccdf..30af740 100644 --- a/docs/mkdocs/zh/code_executor.md +++ b/docs/mkdocs/zh/code_executor.md @@ -6,7 +6,7 @@ ## 代码执行器类型 -目前提供下面两种代码执行器: +目前提供下面三种代码执行器: ### UnsafeLocalCodeExecutor @@ -34,6 +34,21 @@ - 需要执行不可信代码的场景 - 需要环境隔离的场景 +### CubeCodeExecutor + +**特点:** +- Agent 派发代码片段到远端 Cube/E2B 沙箱中执行,支持 `Python/Bash` 语言 +- 强沙箱环境,运行在远端宿主上,适合大规模执行不可信代码 +- 生命周期解耦:通过 `sandbox_id` 可以跨进程重新挂接到同一个沙箱(提供 `create` / `attach` / `create_or_recreate` 三种工厂方法) +- 附带可选的 `CubeWorkspaceRuntime`:提供按执行隔离的工作目录、文件/目录上传下载(目录走 tar 协议)、结构化程序运行能力,可用于 Skill 子系统 +- 需要安装可选 extra `[cube]`(`pip install 'trpc-agent-py[cube]'`,会带上 `e2b-code-interpreter`),并能访问 Cube/E2B 兼容网关 + +**适用场景:** +- 生产环境,且 Agent 宿主上没有 Docker +- 对不可信代码需要强远端隔离的场景 +- 需要长期复用工作空间、跨多次 `execute_code` 共享文件的 Skill / 代码执行任务 +- 多租户 Agent 平台,共用一组远端沙箱集群 + ## 使用示例 创建 LlmAgent 时,构建 CodeExecutor 并配置`code_executor`参数,即可启用代码执行功能。 @@ -48,15 +63,19 @@ from trpc_agent_sdk.models import OpenAIModel from trpc_agent_sdk.code_executors import BaseCodeExecutor from trpc_agent_sdk.code_executors import UnsafeLocalCodeExecutor from trpc_agent_sdk.code_executors import ContainerCodeExecutor +# Cube 是可选 extra(`pip install 'trpc-agent-py[cube]'`),按需引入。 +from trpc_agent_sdk.code_executors.cube import CubeCodeExecutor +from trpc_agent_sdk.code_executors.cube import CubeCodeExecutorConfig from trpc_agent_sdk.log import logger -def _create_code_executor(code_executor_type: str = "unsafe_local") -> BaseCodeExecutor: +async def _create_code_executor(code_executor_type: str = "unsafe_local") -> BaseCodeExecutor: """Create a code executor. Args: code_executor_type: Type of code executor to use. Options: - "unsafe_local": Use UnsafeLocalCodeExecutor (default, no Docker required) - "container": Use ContainerCodeExecutor (requires Docker) + - "cube": Use CubeCodeExecutor (requires the [cube] extra and a Cube/E2B gateway) - None: Auto-detect from environment variable CODE_EXECUTOR_TYPE, or default to "unsafe_local" @@ -76,9 +95,18 @@ def _create_code_executor(code_executor_type: str = "unsafe_local") -> BaseCodeE executor = ContainerCodeExecutor(image="python:3-slim", error_retry_attempts=1) logger.info("ContainerCodeExecutor initialized successfully") return executor + elif code_executor_type == "cube": + # CubeCodeExecutor 在 cfg 字段未设时,会从环境变量读取 + # E2B_API_URL / E2B_API_KEY / CUBE_TEMPLATE_ID。 + # `create()` 会开一个新的远端沙箱;如果想挂接到已有沙箱, + # 在 cfg 里传 `sandbox_id=...` 即可。 + cfg = CubeCodeExecutorConfig(execute_timeout=30.0, idle_timeout=600) + executor = await CubeCodeExecutor.create(cfg) + logger.info("CubeCodeExecutor initialized: sandbox_id=%s", executor.sandbox_id) + return executor else: raise ValueError(f"Invalid code executor type: {code_executor_type}. " - "Valid options are: 'unsafe_local', 'container'") + "Valid options are: 'unsafe_local', 'container', 'cube'") ``` @@ -154,6 +182,65 @@ def create_agent() -> LlmAgent: ![ContainerCodeExecutor执行结果](../assets/imgs/container0.png) ![ContainerCodeExecutor执行结果1](../assets/imgs/container1.png) +### 使用 CubeCodeExecutor + +```python +# ... +async def create_agent() -> LlmAgent: + """Create an agent backed by a remote Cube/E2B sandbox. + + 必备环境变量(由 CubeCodeExecutorConfig.resolve_* 读取): + - E2B_API_URL: Cube/E2B 兼容网关 URL + - E2B_API_KEY: 网关 API Key + - CUBE_TEMPLATE_ID: Cube 模板 id(如 `std-XXXXXXXX`) + + 说明:`_create_code_executor` 改成 async 是因为 `CubeCodeExecutor.create` + 需要走网络打开远端沙箱。executor 持有该沙箱,Agent 退出时 + 需要 `await executor.destroy()` 显式释放远端资源;`executor.close()` + 只丢本地句柄,沙箱会按 idle_timeout 自然过期。 + """ + # 选择 cube + executor = await _create_code_executor(code_executor_type="cube") + agent = LlmAgent( + name="code_assistant", + description="代码执行助手", + model=_create_model(), # You can change this to your preferred model + instruction=INSTRUCTION, + code_executor=executor, # Enables code execution functionality + ) + return agent + +# 使用前先安装可选 extra: +# pip install 'trpc-agent-py[cube]' +# 并导出网关凭据: +# export E2B_API_URL=... +# export E2B_API_KEY=... +# export CUBE_TEMPLATE_ID=... +``` + +#### 挂接到已存在的沙箱 + +`CubeCodeExecutor` 提供三个 async 工厂方法,让调用方显式选择生命周期策略。 +三者都从 `cfg.sandbox_id` 读取目标沙箱 id(单一来源): + +```python +# 1. 严格 create-or-attach:cfg.sandbox_id 已设则挂接并断言 RUNNING; +# 未设则新建一个。 +executor = await CubeCodeExecutor.create(cfg) + +# 2. 仅挂接:要求 cfg.sandbox_id 已设;不会新建。 +executor = await CubeCodeExecutor.attach(cfg) + +# 3. attach-or-recreate:沙箱已不存在时调用 `on_recreate`,再透明地新建。 +# 适合需要在 recreate 时清理外部 locator 状态的长生命周期 Agent。 +executor = await CubeCodeExecutor.create_or_recreate( + cfg, on_recreate=lambda old_id: clear_locator(old_id), +) +``` + +`close()` 对远端沙箱是 no-op,只丢本地句柄;`destroy()` 才会显式杀掉 +远端沙箱。 + ## 配置参数 ### UnsafeLocalCodeExecutor 参数 @@ -208,6 +295,113 @@ code_executor = ContainerCodeExecutor( ) ``` +### CubeCodeExecutor 参数 + +`CubeCodeExecutor` 按 ISP 拆成两个 dataclass:`CubeCodeExecutorConfig` +只承载沙箱生命周期 / 命令执行相关字段,`CubeWorkspaceRuntimeConfig` +只承载工作空间相关字段(见下一节)。 + +```python +from trpc_agent_sdk.code_executors.cube import ( + CubeCodeExecutor, + CubeCodeExecutorConfig, +) + +cfg = CubeCodeExecutorConfig( + # 新建沙箱使用的 Cube 模板 id;为空时回退到环境变量 CUBE_TEMPLATE_ID。 + template=None, + + # E2B 兼容的 Cube API URL;为空时回退到环境变量 E2B_API_URL。 + api_url=None, + + # E2B API Key;为空时回退到环境变量 E2B_API_KEY。 + api_key=None, + + # 已存在的远端沙箱 id。设置后工厂方法会“挂接”而非新建。 + sandbox_id=None, + + # 单条命令默认超时(秒,float)。bare executor 与 workspace + # runtime 共用该值。默认 60.0。 + execute_timeout=60.0, + + # 沙箱空闲生命周期(int 秒,>=1),每次命令都会续期。默认 + # 3600(1 小时)。底层 e2b API 接收 int 秒,所以这里禁止 + # 浮点值(在构造期就拒掉,避免 0.9 静默被截成 0)。 + idle_timeout=3600, +) + +executor = await CubeCodeExecutor.create(cfg) +``` + +`CubeCodeExecutor` 也支持 `code_block_delimiters`,默认在标准的 `python` +和 `tool_code` 之外又加了一个 `bash` 分隔符,所以普通的 +\`\`\`bash\n ... \n\`\`\` 围栏也能被识别。 + +## CubeWorkspaceRuntime + +对于 Skill 执行等需要按执行隔离工作目录(输入暂存、结构化程序运行、 +输出收集)的场景,Cube 子包还提供了 `CubeWorkspaceRuntime`。它在同一个 +`CubeSandboxClient` 之上组合了: + +- `CubeWorkspaceManager`:工作目录生命周期 +- `CubeWorkspaceFS`:文件 / 目录上传下载、按 glob 收集输出 +- `CubeProgramRunner`:结构化的 `cmd` + `args` 程序运行 + +```python +from trpc_agent_sdk.code_executors._types import ( + WorkspaceOutputSpec, + WorkspacePutFileInfo, + WorkspaceRunProgramSpec, +) +from trpc_agent_sdk.code_executors.cube import ( + CubeCodeExecutor, + CubeCodeExecutorConfig, + CubeWorkspaceRuntimeConfig, + create_cube_workspace_runtime, +) + +executor = await CubeCodeExecutor.create(CubeCodeExecutorConfig()) + +# `workspace_cfg` 可选,省略时使用 DEFAULT_REMOTE_WORKSPACE +# = "/workspace/cube_agent" 作为根目录。 +runtime = create_cube_workspace_runtime( + executor, + workspace_cfg=CubeWorkspaceRuntimeConfig( + # Manager 在该路径下创建按执行隔离的 + # `ws__` 子目录。 + remote_workspace="/workspace/cube_agent", + ), +) + +manager = runtime.manager() +fs = runtime.fs() +runner = runtime.runner() + +ws = await manager.create_workspace("demo-1") # /workspace/cube_agent/ws_demo-1_ + +await fs.put_files(ws, [ + WorkspacePutFileInfo(path="work/script.py", + content=b"print('script ran')\n"), +]) + +run_result = await runner.run_program( + ws, + WorkspaceRunProgramSpec(cmd="python3", args=["work/script.py"], timeout=15.0), +) +print(run_result.exit_code, run_result.stdout) + +outputs = await fs.collect_outputs( + ws, WorkspaceOutputSpec(globs=["work/*.py"], inline=True), +) +for ref in outputs.files: + print(ref.name, len(ref.content)) + +await manager.cleanup("demo-1") +``` + +该 runtime 可以直接接入 Skill 子系统 —— 在创建 skill repository 时 +作为 `workspace_runtime` 传入即可(详见 [skill.md](skill.md))。 + ## 代码块格式 Agent会自动识别LLM返回中的代码块并执行。支持的代码块格式: @@ -245,6 +439,10 @@ print(result) - Python (`python`, `py`, `python3`, 空字符串默认为Python) - Bash (`bash`, `sh`) +### CubeCodeExecutor +- Python (`python`, `py`, `python3`, 空字符串默认为Python) +- Bash (`bash`, `sh`) + ## 工作流程 1. **用户查询** → Agent接收用户查询 @@ -291,10 +489,40 @@ code_executor = UnsafeLocalCodeExecutor(timeout=30) # 30秒超时 - 查看日志输出,框架会记录详细的错误信息 - 对于ContainerCodeExecutor,检查容器日志 +### 4. CubeCodeExecutor 无法连接 / 鉴权到错误租户 + +**问题:** `CubeCodeExecutor.create` 抛出形如 +`Cube sandbox requires \`api_url\` or E2B_API_URL env`、`... api_key ...`、 +或 `... template ... CUBE_TEMPLATE_ID ...` 的错误。 + +**解决方案:** +- 安装可选 extra:`pip install 'trpc-agent-py[cube]'` +- 导出三个必备环境变量(或在 `CubeCodeExecutorConfig` 上显式传入): + `E2B_API_URL`、`E2B_API_KEY`、`CUBE_TEMPLATE_ID` +- 多租户部署建议显式给 cfg 字段赋值,避免不同 Agent 实例落到同一份 + 进程级环境变量上 + +### 5. CubeCodeExecutor 沙箱在两次调用之间消失 + +**问题:** 通过 `cfg.sandbox_id` 挂接的沙箱,在下一次命令时抛 +`SandboxNotFoundException`(已销毁)或 `SandboxException`(PAUSED)。 + +**解决方案:** +- 长生命周期 Agent 推荐使用 + `CubeCodeExecutor.create_or_recreate(cfg, on_recreate=...)`,让 executor + 透明地新建沙箱,并通过 callback 通知调用方清理外部 locator 状态 +- 如果确实需要更长的空闲窗口,调大 `idle_timeout`(默认 3600 秒), + 每次命令都会续期 +- 仅想丢掉某个工作目录而保留沙箱时,用 + `CubeWorkspaceManager.cleanup(exec_id)` 而不是 `executor.destroy()` + ## 完整示例 查看完整示例代码:[examples/code_executors/agent/agent.py](../../../examples/code_executors/agent/agent.py) +Cube 端到端示例(executor + workspace runtime): +[examples/code_executors/cube_demo.py](../../../examples/code_executors/cube_demo.py) + ## 安全建议 1. **生产环境**:强烈建议使用`ContainerCodeExecutor`,提供沙箱隔离 @@ -307,4 +535,5 @@ code_executor = UnsafeLocalCodeExecutor(timeout=30) # 30秒超时 - **UnsafeLocalCodeExecutor**:执行速度快,适合快速迭代 - **ContainerCodeExecutor**:首次启动需要拉取镜像,后续执行速度较快 -- 建议在生产环境使用 ContainerCodeExecutor,开发环境可以使用 UnsafeLocalCodeExecutor +- **CubeCodeExecutor**:每条命令多一次远端网络往返,但因为沙箱可以跨多次调用复用(且通过 `sandbox_id` 跨进程复用),长会话场景摊销得很好;工作空间文件传输使用 tar 协议,目录上传/下载都是单次往返 +- 建议在生产环境使用 ContainerCodeExecutor 或 CubeCodeExecutor,开发环境可以使用 UnsafeLocalCodeExecutor diff --git a/docs/mkdocs/zh/skill.md b/docs/mkdocs/zh/skill.md index aea40c7..760e5eb 100644 --- a/docs/mkdocs/zh/skill.md +++ b/docs/mkdocs/zh/skill.md @@ -102,10 +102,16 @@ from trpc_agent_sdk.skills import SkillToolSet from trpc_agent_sdk.skills import create_default_skill_repository from trpc_agent_sdk.code_executors import create_local_workspace_runtime from trpc_agent_sdk.code_executors import create_container_workspace_runtime +# Cube 是可选 extra(`pip install 'trpc-agent-py[cube]'`),按需引入。 +# from trpc_agent_sdk.code_executors.cube import CubeCodeExecutor, CubeCodeExecutorConfig +# from trpc_agent_sdk.code_executors.cube import create_cube_workspace_runtime -# 创建工作空间运行时(本地或容器) +# 创建工作空间运行时(本地、容器或 Cube) workspace_runtime = create_local_workspace_runtime() # 或使用容器:workspace_runtime = create_container_workspace_runtime() +# 或使用远端 Cube/E2B 沙箱: +# executor = await CubeCodeExecutor.create(CubeCodeExecutorConfig()) +# workspace_runtime = create_cube_workspace_runtime(executor) # 创建技能仓库 repository = create_default_skill_repository("./skills", workspace_runtime=workspace_runtime) @@ -1072,11 +1078,20 @@ LLM 调用 skill_run(skill="python-math", command="python3 scripts/fib.py 10") - 直接在本地系统执行命令,适合开发和测试 - **容器执行器**(Docker):[trpc_agent_sdk/code_executors/container/_container_ws_runtime.py](../../../trpc_agent_sdk/code_executors/container/_container_ws_runtime.py) - 在 Docker 容器中执行,提供更好的隔离性 +- **Cube 执行器**(远端 E2B 沙箱):[trpc_agent_sdk/code_executors/cube/_runtime.py](../../../trpc_agent_sdk/code_executors/cube/_runtime.py) + - 在远端 Cube/E2B 沙箱中执行;适合宿主上没有 Docker、或者需要强远端隔离的场景 + - 通过 `create_cube_workspace_runtime(executor, workspace_cfg=...)` 构造;详见 [code_executor.md](code_executor.md#cubeworkspaceruntime) + - 需要安装可选 extra `[cube]`(`pip install 'trpc-agent-py[cube]'`),并配置 `E2B_API_URL` / `E2B_API_KEY` / `CUBE_TEMPLATE_ID` 环境变量(或对应 cfg 字段) **容器执行器注意事项**: - 运行基础目录可写;当设置了 `$SKILLS_ROOT` 时,会以只读方式挂载 - 默认禁用网络访问,以提高可重复性和安全性 +**Cube 执行器注意事项**: +- 文件 / 目录传输使用 tar 协议,目录上传下载是单次往返,并保留符号链接和权限 +- 远端工作根目录默认 `/workspace/cube_agent`;按执行隔离的子目录命名为 `ws__`,每次 `create_workspace` 都会幂等地 `mkdir -p`,外部清理也能透明恢复 +- 同一个 Cube 沙箱可以同时承载 bare `CubeCodeExecutor` 与 workspace runtime,命令共享 `CubeCodeExecutorConfig.execute_timeout` + **安全性和资源限制**: - **工作空间隔离**:所有读写操作限制在工作空间内 - **风险控制**:通过超时机制和只读技能树降低安全风险