test(state-store-s3): live S3 round-trip suite — provisioned + verified

kapaleshreyas · claude · kapaleshreyas · commit c3eaf15e5523 · 2026-05-18T08:14:34.000-04:00
scripts/test-sandboxes-s3.py — 9 assertions across 4 tests, run against real AWS S3 (bucket: clawagent-sandbox-snapshots-test in us-east-2): - s3_list_endpoint GET /snapshots?stateStore=s3 returns 200 - s3_snapshot_round_trip attachment-seed → snapshot → S3 PUT verified by `aws s3 ls` (meta.json + workdir.tar.gz) → restore into new sandbox → agent reads file (STARFISH-7 recovered through the full pipeline) - s3_autosave_on_dispose autoSave wired to s3 → DELETE → snapshot in bucket - s3_delete_snapshot DELETE /snapshots/:id removes both S3 objects 9/9 passed end-to-end against https://api.clawagent.sh. Resources provisioned (account 182399701833): - S3 bucket: clawagent-sandbox-snapshots-test (us-east-2) public access blocked + SSE-S3 default encryption - IAM user: clawagent-sandbox-snapshots inline policy scoped to ListBucket / Get|Put|DeleteObject on this single bucket — denied for ListAllMyBuckets etc. - Access key: live, wired into EC2 systemd EnvironmentFile EC2 service env now resolves defaultStateStore: s3 (registered: memory, s3). Imports the shared helpers from test-sandboxes.py rather than duplicating HTTP/SSE/sandbox-create code. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
diff --git a/scripts/test-sandboxes-s3.py b/scripts/test-sandboxes-s3.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python3
+"""
+Wedge 1.13 — S3-backed StateStore round-trip tests.
+
+Hits the live ComputerAgentServer (default https://api.clawagent.sh) with
+stateStore: { kind: "s3", options: { bucket: <env S3_BUCKET> } }.
+
+Requires:
+  - S3_BUCKET    — the bucket the server was configured with
+                   (otherwise it lists from a different bucket and tests fail)
+  - AWS creds (for cross-check via aws cli) optional; tests still run if the
+    server has its own creds wired via env / instance role.
+
+Run:
+  S3_BUCKET=clawagent-sandbox-snapshots-test \
+    python3 scripts/test-sandboxes-s3.py --base https://api.clawagent.sh
+"""
+
+import argparse, json, os, subprocess, sys, time, threading
+from contextlib import contextmanager
+from concurrent.futures import ThreadPoolExecutor
+
+# Share helpers with the main suite by importing it as a module. Keeps
+# wire/SSE/sandbox-create code in one place.
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from importlib import import_module
+ts = import_module("test-sandboxes")
+
+# Mirror its globals + helpers we use directly.
+http = ts.http
+jget = ts.jget
+jpost = ts.jpost
+jdelete = ts.jdelete
+sse_chat = ts.sse_chat
+Report = ts.Report
+sandbox = ts.sandbox
+DEFAULT_BODY = ts.DEFAULT_BODY
+
+
+def t_s3_list_endpoint(base, r, bucket):
+    """Cheapest possible health check: list against real S3."""
+    s, doc = jget(f"{base}/snapshots?stateStore=s3&bucket={bucket}")
+    r.add("GET /snapshots?stateStore=s3 returns 200 + snapshots array",
+          s == 200 and "snapshots" in doc,
+          f"status={s} keys={list(doc.keys())}")
+
+
+def t_s3_snapshot_round_trip(base, r, bucket):
+    """Seed file via attachment, snapshot to real S3, restore into new sandbox, read back."""
+    body = {
+        **DEFAULT_BODY,
+        "attachments": [
+            {"path": "test-s3-marker.txt", "content": "STARFISH-7", "encoding": "utf8"},
+        ],
+    }
+    s, doc = jpost(f"{base}/sandboxes", body)
+    if s != 201:
+        r.add("create with s3 attachments", False, f"got {s}: {doc}")
+        return
+    sid = doc["sandboxId"]
+    snapshot_id = None
+    new_sid = None
+    try:
+        sse_chat(base, sid, "Reply with: OK")
+
+        sn = jpost(f"{base}/sandboxes/{sid}/snapshot",
+                   {"stateStore": {"kind": "s3", "options": {"bucket": bucket}}})
+        r.add("S3 snapshot returns 200 + snapshotId",
+              sn[0] == 200 and sn[1].get("snapshotId", "").startswith("snap_"),
+              str(sn[1]))
+        snapshot_id = sn[1].get("snapshotId", "")
+        size = sn[1].get("sizeBytes", 0)
+        files = sn[1].get("fileCount", 0)
+        r.add("S3 snapshot reports byte count + file count",
+              size > 0 and files > 0,
+              f"size={size}B files={files}")
+
+        # Independently verify the objects landed in S3 via the AWS CLI.
+        # (Best-effort — only runs if the test host has creds.)
+        try:
+            meta = subprocess.run(
+                ["aws", "s3", "ls", f"s3://{bucket}/sandboxes/snapshots/{snapshot_id}/"],
+                capture_output=True, text=True, timeout=10,
+            )
+            ok = "meta.json" in meta.stdout and "workdir.tar.gz" in meta.stdout
+            r.add("S3 objects (meta.json + workdir.tar.gz) actually written",
+                  ok, meta.stdout.strip().replace("\n", " ; ") if ok else f"out={meta.stdout!r} err={meta.stderr!r}")
+        except (FileNotFoundError, subprocess.TimeoutExpired) as e:
+            r.skip("S3 objects cross-check (aws cli)", repr(e))
+
+        rr = jpost(f"{base}/sandboxes/restore", {
+            "snapshotId": snapshot_id,
+            "stateStore": {"kind": "s3", "options": {"bucket": bucket}},
+            "target": "new",
+        })
+        r.add("S3 restore returns 201 + new sandboxId",
+              rr[0] == 201 and rr[1].get("sandboxId", "").startswith("sbx_") and rr[1].get("restored"),
+              str(rr[1]))
+        new_sid = rr[1].get("sandboxId", "")
+
+        _, _, txt, _ = sse_chat(base, new_sid, "Use the Read tool to read test-s3-marker.txt and reply with only its content.")
+        r.add("restored workdir from S3 contains seed file (read by agent)",
+              "STARFISH-7" in (txt or ""),
+              f"got {txt!r}")
+    finally:
+        if new_sid: jdelete(f"{base}/sandboxes/{new_sid}")
+        jdelete(f"{base}/sandboxes/{sid}")
+        # Clean up the test snapshot in S3 so the bucket doesn't grow on each run.
+        if snapshot_id:
+            jdelete(f"{base}/snapshots/{snapshot_id}?stateStore=s3&bucket={bucket}")
+
+
+def t_s3_autosave_on_dispose(base, r, bucket):
+    """autoSave wired to s3 lands a snapshot in the bucket on DELETE."""
+    body = {
+        **DEFAULT_BODY,
+        "idleTtlMs": 120000,
+        "ttlMs": 240000,
+        "autoSave": {"stateStore": {"kind": "s3", "options": {"bucket": bucket}}},
+    }
+    s, doc = jpost(f"{base}/sandboxes", body)
+    if s != 201:
+        r.add("autoSave-s3 create", False, f"got {s}: {doc}")
+        return
+    sid = doc["sandboxId"]
+    try:
+        sse_chat(base, sid, "Reply with: OK")
+        d_s, d_doc = jdelete(f"{base}/sandboxes/{sid}")
+        r.add("DELETE returns autoSaved:true (s3)",
+              d_s == 200 and d_doc.get("autoSaved") is True,
+              str(d_doc))
+        # Wait for S3 consistency (us-east-2 is strong now but be polite).
+        time.sleep(1)
+        l_s, l_doc = jget(f"{base}/snapshots?stateStore=s3&bucket={bucket}")
+        matches = [x for x in l_doc.get("snapshots", []) if x.get("sourceSandboxId") == sid]
+        r.add("autoSave-s3 produced a discoverable snapshot in the bucket",
+              len(matches) >= 1,
+              f"matches={len(matches)} all={[x.get('snapshotId') for x in l_doc.get('snapshots', [])][:5]}")
+        # Cleanup the autosave snapshot.
+        if matches:
+            jdelete(f"{base}/snapshots/{matches[0]['snapshotId']}?stateStore=s3&bucket={bucket}")
+    finally:
+        jdelete(f"{base}/sandboxes/{sid}")
+
+
+def t_s3_delete_snapshot(base, r, bucket):
+    """DELETE /snapshots/:id removes both objects from S3."""
+    # Create a small snapshot first.
+    body = {**DEFAULT_BODY, "attachments": [{"path": "ephemeral.txt", "content": "z", "encoding": "utf8"}]}
+    s, doc = jpost(f"{base}/sandboxes", body)
+    if s != 201:
+        r.add("delete snapshot create", False, str(doc))
+        return
+    sid = doc["sandboxId"]
+    try:
+        sse_chat(base, sid, "Reply with: OK")
+        sn = jpost(f"{base}/sandboxes/{sid}/snapshot",
+                   {"stateStore": {"kind": "s3", "options": {"bucket": bucket}}})
+        if sn[0] != 200:
+            r.add("delete snapshot precondition", False, str(sn[1]))
+            return
+        snap_id = sn[1]["snapshotId"]
+        d_s, d_doc = jdelete(f"{base}/snapshots/{snap_id}?stateStore=s3&bucket={bucket}")
+        r.add("DELETE /snapshots/:id returns ok:true",
+              d_s == 200 and d_doc.get("ok") is True,
+              str(d_doc))
+        # Verify S3 actually emptied.
+        try:
+            ls = subprocess.run(
+                ["aws", "s3", "ls", f"s3://{bucket}/sandboxes/snapshots/{snap_id}/"],
+                capture_output=True, text=True, timeout=10,
+            )
+            r.add("S3 objects gone after DELETE",
+                  ls.stdout.strip() == "",
+                  f"stdout={ls.stdout!r}")
+        except (FileNotFoundError, subprocess.TimeoutExpired) as e:
+            r.skip("S3 cleanup cross-check (aws cli)", repr(e))
+    finally:
+        jdelete(f"{base}/sandboxes/{sid}")
+
+
+TESTS = [
+    ("s3_list_endpoint",          t_s3_list_endpoint),
+    ("s3_snapshot_round_trip",    t_s3_snapshot_round_trip),
+    ("s3_autosave_on_dispose",    t_s3_autosave_on_dispose),
+    ("s3_delete_snapshot",        t_s3_delete_snapshot),
+]
+
+
+def main():
+    p = argparse.ArgumentParser()
+    p.add_argument("--base", default="https://api.clawagent.sh")
+    p.add_argument("--bucket", default=os.environ.get("S3_BUCKET"))
+    p.add_argument("--only", help="comma-separated subset of test names")
+    args = p.parse_args()
+    if not args.bucket:
+        print("ERROR: pass --bucket or set S3_BUCKET", file=sys.stderr)
+        sys.exit(2)
+    only = set(s.strip() for s in args.only.split(",")) if args.only else None
+    print(f"\nTesting {args.base} against bucket {args.bucket}\n")
+    r = Report()
+    for name, fn in TESTS:
+        if only and name not in only: continue
+        print(f"\n── {name} ─────────────────")
+        try:
+            fn(args.base, r, args.bucket)
+        except Exception as e:
+            r.add(name + " (uncaught)", False, repr(e))
+    ok = r.summary()
+    sys.exit(0 if ok else 1)
+
+
+if __name__ == "__main__":
+    main()