|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Wedge 1.13 — S3-backed StateStore round-trip tests. |
| 4 | +
|
| 5 | +Hits the live ComputerAgentServer (default https://api.clawagent.sh) with |
| 6 | +stateStore: { kind: "s3", options: { bucket: <env S3_BUCKET> } }. |
| 7 | +
|
| 8 | +Requires: |
| 9 | + - S3_BUCKET — the bucket the server was configured with |
| 10 | + (otherwise it lists from a different bucket and tests fail) |
| 11 | + - AWS creds (for cross-check via aws cli) optional; tests still run if the |
| 12 | + server has its own creds wired via env / instance role. |
| 13 | +
|
| 14 | +Run: |
| 15 | + S3_BUCKET=clawagent-sandbox-snapshots-test \ |
| 16 | + python3 scripts/test-sandboxes-s3.py --base https://api.clawagent.sh |
| 17 | +""" |
| 18 | + |
| 19 | +import argparse, json, os, subprocess, sys, time, threading |
| 20 | +from contextlib import contextmanager |
| 21 | +from concurrent.futures import ThreadPoolExecutor |
| 22 | + |
| 23 | +# Share helpers with the main suite by importing it as a module. Keeps |
| 24 | +# wire/SSE/sandbox-create code in one place. |
| 25 | +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) |
| 26 | +from importlib import import_module |
| 27 | +ts = import_module("test-sandboxes") |
| 28 | + |
| 29 | +# Mirror its globals + helpers we use directly. |
| 30 | +http = ts.http |
| 31 | +jget = ts.jget |
| 32 | +jpost = ts.jpost |
| 33 | +jdelete = ts.jdelete |
| 34 | +sse_chat = ts.sse_chat |
| 35 | +Report = ts.Report |
| 36 | +sandbox = ts.sandbox |
| 37 | +DEFAULT_BODY = ts.DEFAULT_BODY |
| 38 | + |
| 39 | + |
| 40 | +def t_s3_list_endpoint(base, r, bucket): |
| 41 | + """Cheapest possible health check: list against real S3.""" |
| 42 | + s, doc = jget(f"{base}/snapshots?stateStore=s3&bucket={bucket}") |
| 43 | + r.add("GET /snapshots?stateStore=s3 returns 200 + snapshots array", |
| 44 | + s == 200 and "snapshots" in doc, |
| 45 | + f"status={s} keys={list(doc.keys())}") |
| 46 | + |
| 47 | + |
| 48 | +def t_s3_snapshot_round_trip(base, r, bucket): |
| 49 | + """Seed file via attachment, snapshot to real S3, restore into new sandbox, read back.""" |
| 50 | + body = { |
| 51 | + **DEFAULT_BODY, |
| 52 | + "attachments": [ |
| 53 | + {"path": "test-s3-marker.txt", "content": "STARFISH-7", "encoding": "utf8"}, |
| 54 | + ], |
| 55 | + } |
| 56 | + s, doc = jpost(f"{base}/sandboxes", body) |
| 57 | + if s != 201: |
| 58 | + r.add("create with s3 attachments", False, f"got {s}: {doc}") |
| 59 | + return |
| 60 | + sid = doc["sandboxId"] |
| 61 | + snapshot_id = None |
| 62 | + new_sid = None |
| 63 | + try: |
| 64 | + sse_chat(base, sid, "Reply with: OK") |
| 65 | + |
| 66 | + sn = jpost(f"{base}/sandboxes/{sid}/snapshot", |
| 67 | + {"stateStore": {"kind": "s3", "options": {"bucket": bucket}}}) |
| 68 | + r.add("S3 snapshot returns 200 + snapshotId", |
| 69 | + sn[0] == 200 and sn[1].get("snapshotId", "").startswith("snap_"), |
| 70 | + str(sn[1])) |
| 71 | + snapshot_id = sn[1].get("snapshotId", "") |
| 72 | + size = sn[1].get("sizeBytes", 0) |
| 73 | + files = sn[1].get("fileCount", 0) |
| 74 | + r.add("S3 snapshot reports byte count + file count", |
| 75 | + size > 0 and files > 0, |
| 76 | + f"size={size}B files={files}") |
| 77 | + |
| 78 | + # Independently verify the objects landed in S3 via the AWS CLI. |
| 79 | + # (Best-effort — only runs if the test host has creds.) |
| 80 | + try: |
| 81 | + meta = subprocess.run( |
| 82 | + ["aws", "s3", "ls", f"s3://{bucket}/sandboxes/snapshots/{snapshot_id}/"], |
| 83 | + capture_output=True, text=True, timeout=10, |
| 84 | + ) |
| 85 | + ok = "meta.json" in meta.stdout and "workdir.tar.gz" in meta.stdout |
| 86 | + r.add("S3 objects (meta.json + workdir.tar.gz) actually written", |
| 87 | + ok, meta.stdout.strip().replace("\n", " ; ") if ok else f"out={meta.stdout!r} err={meta.stderr!r}") |
| 88 | + except (FileNotFoundError, subprocess.TimeoutExpired) as e: |
| 89 | + r.skip("S3 objects cross-check (aws cli)", repr(e)) |
| 90 | + |
| 91 | + rr = jpost(f"{base}/sandboxes/restore", { |
| 92 | + "snapshotId": snapshot_id, |
| 93 | + "stateStore": {"kind": "s3", "options": {"bucket": bucket}}, |
| 94 | + "target": "new", |
| 95 | + }) |
| 96 | + r.add("S3 restore returns 201 + new sandboxId", |
| 97 | + rr[0] == 201 and rr[1].get("sandboxId", "").startswith("sbx_") and rr[1].get("restored"), |
| 98 | + str(rr[1])) |
| 99 | + new_sid = rr[1].get("sandboxId", "") |
| 100 | + |
| 101 | + _, _, txt, _ = sse_chat(base, new_sid, "Use the Read tool to read test-s3-marker.txt and reply with only its content.") |
| 102 | + r.add("restored workdir from S3 contains seed file (read by agent)", |
| 103 | + "STARFISH-7" in (txt or ""), |
| 104 | + f"got {txt!r}") |
| 105 | + finally: |
| 106 | + if new_sid: jdelete(f"{base}/sandboxes/{new_sid}") |
| 107 | + jdelete(f"{base}/sandboxes/{sid}") |
| 108 | + # Clean up the test snapshot in S3 so the bucket doesn't grow on each run. |
| 109 | + if snapshot_id: |
| 110 | + jdelete(f"{base}/snapshots/{snapshot_id}?stateStore=s3&bucket={bucket}") |
| 111 | + |
| 112 | + |
| 113 | +def t_s3_autosave_on_dispose(base, r, bucket): |
| 114 | + """autoSave wired to s3 lands a snapshot in the bucket on DELETE.""" |
| 115 | + body = { |
| 116 | + **DEFAULT_BODY, |
| 117 | + "idleTtlMs": 120000, |
| 118 | + "ttlMs": 240000, |
| 119 | + "autoSave": {"stateStore": {"kind": "s3", "options": {"bucket": bucket}}}, |
| 120 | + } |
| 121 | + s, doc = jpost(f"{base}/sandboxes", body) |
| 122 | + if s != 201: |
| 123 | + r.add("autoSave-s3 create", False, f"got {s}: {doc}") |
| 124 | + return |
| 125 | + sid = doc["sandboxId"] |
| 126 | + try: |
| 127 | + sse_chat(base, sid, "Reply with: OK") |
| 128 | + d_s, d_doc = jdelete(f"{base}/sandboxes/{sid}") |
| 129 | + r.add("DELETE returns autoSaved:true (s3)", |
| 130 | + d_s == 200 and d_doc.get("autoSaved") is True, |
| 131 | + str(d_doc)) |
| 132 | + # Wait for S3 consistency (us-east-2 is strong now but be polite). |
| 133 | + time.sleep(1) |
| 134 | + l_s, l_doc = jget(f"{base}/snapshots?stateStore=s3&bucket={bucket}") |
| 135 | + matches = [x for x in l_doc.get("snapshots", []) if x.get("sourceSandboxId") == sid] |
| 136 | + r.add("autoSave-s3 produced a discoverable snapshot in the bucket", |
| 137 | + len(matches) >= 1, |
| 138 | + f"matches={len(matches)} all={[x.get('snapshotId') for x in l_doc.get('snapshots', [])][:5]}") |
| 139 | + # Cleanup the autosave snapshot. |
| 140 | + if matches: |
| 141 | + jdelete(f"{base}/snapshots/{matches[0]['snapshotId']}?stateStore=s3&bucket={bucket}") |
| 142 | + finally: |
| 143 | + jdelete(f"{base}/sandboxes/{sid}") |
| 144 | + |
| 145 | + |
| 146 | +def t_s3_delete_snapshot(base, r, bucket): |
| 147 | + """DELETE /snapshots/:id removes both objects from S3.""" |
| 148 | + # Create a small snapshot first. |
| 149 | + body = {**DEFAULT_BODY, "attachments": [{"path": "ephemeral.txt", "content": "z", "encoding": "utf8"}]} |
| 150 | + s, doc = jpost(f"{base}/sandboxes", body) |
| 151 | + if s != 201: |
| 152 | + r.add("delete snapshot create", False, str(doc)) |
| 153 | + return |
| 154 | + sid = doc["sandboxId"] |
| 155 | + try: |
| 156 | + sse_chat(base, sid, "Reply with: OK") |
| 157 | + sn = jpost(f"{base}/sandboxes/{sid}/snapshot", |
| 158 | + {"stateStore": {"kind": "s3", "options": {"bucket": bucket}}}) |
| 159 | + if sn[0] != 200: |
| 160 | + r.add("delete snapshot precondition", False, str(sn[1])) |
| 161 | + return |
| 162 | + snap_id = sn[1]["snapshotId"] |
| 163 | + d_s, d_doc = jdelete(f"{base}/snapshots/{snap_id}?stateStore=s3&bucket={bucket}") |
| 164 | + r.add("DELETE /snapshots/:id returns ok:true", |
| 165 | + d_s == 200 and d_doc.get("ok") is True, |
| 166 | + str(d_doc)) |
| 167 | + # Verify S3 actually emptied. |
| 168 | + try: |
| 169 | + ls = subprocess.run( |
| 170 | + ["aws", "s3", "ls", f"s3://{bucket}/sandboxes/snapshots/{snap_id}/"], |
| 171 | + capture_output=True, text=True, timeout=10, |
| 172 | + ) |
| 173 | + r.add("S3 objects gone after DELETE", |
| 174 | + ls.stdout.strip() == "", |
| 175 | + f"stdout={ls.stdout!r}") |
| 176 | + except (FileNotFoundError, subprocess.TimeoutExpired) as e: |
| 177 | + r.skip("S3 cleanup cross-check (aws cli)", repr(e)) |
| 178 | + finally: |
| 179 | + jdelete(f"{base}/sandboxes/{sid}") |
| 180 | + |
| 181 | + |
| 182 | +TESTS = [ |
| 183 | + ("s3_list_endpoint", t_s3_list_endpoint), |
| 184 | + ("s3_snapshot_round_trip", t_s3_snapshot_round_trip), |
| 185 | + ("s3_autosave_on_dispose", t_s3_autosave_on_dispose), |
| 186 | + ("s3_delete_snapshot", t_s3_delete_snapshot), |
| 187 | +] |
| 188 | + |
| 189 | + |
| 190 | +def main(): |
| 191 | + p = argparse.ArgumentParser() |
| 192 | + p.add_argument("--base", default="https://api.clawagent.sh") |
| 193 | + p.add_argument("--bucket", default=os.environ.get("S3_BUCKET")) |
| 194 | + p.add_argument("--only", help="comma-separated subset of test names") |
| 195 | + args = p.parse_args() |
| 196 | + if not args.bucket: |
| 197 | + print("ERROR: pass --bucket or set S3_BUCKET", file=sys.stderr) |
| 198 | + sys.exit(2) |
| 199 | + only = set(s.strip() for s in args.only.split(",")) if args.only else None |
| 200 | + print(f"\nTesting {args.base} against bucket {args.bucket}\n") |
| 201 | + r = Report() |
| 202 | + for name, fn in TESTS: |
| 203 | + if only and name not in only: continue |
| 204 | + print(f"\n── {name} ─────────────────") |
| 205 | + try: |
| 206 | + fn(args.base, r, args.bucket) |
| 207 | + except Exception as e: |
| 208 | + r.add(name + " (uncaught)", False, repr(e)) |
| 209 | + ok = r.summary() |
| 210 | + sys.exit(0 if ok else 1) |
| 211 | + |
| 212 | + |
| 213 | +if __name__ == "__main__": |
| 214 | + main() |
0 commit comments