Skip to content

Commit c3eaf15

Browse files
kapaleshreyasclaude
andcommitted
test(state-store-s3): live S3 round-trip suite — provisioned + verified
scripts/test-sandboxes-s3.py — 9 assertions across 4 tests, run against real AWS S3 (bucket: clawagent-sandbox-snapshots-test in us-east-2): - s3_list_endpoint GET /snapshots?stateStore=s3 returns 200 - s3_snapshot_round_trip attachment-seed → snapshot → S3 PUT verified by `aws s3 ls` (meta.json + workdir.tar.gz) → restore into new sandbox → agent reads file (STARFISH-7 recovered through the full pipeline) - s3_autosave_on_dispose autoSave wired to s3 → DELETE → snapshot in bucket - s3_delete_snapshot DELETE /snapshots/:id removes both S3 objects 9/9 passed end-to-end against https://api.clawagent.sh. Resources provisioned (account 182399701833): - S3 bucket: clawagent-sandbox-snapshots-test (us-east-2) public access blocked + SSE-S3 default encryption - IAM user: clawagent-sandbox-snapshots inline policy scoped to ListBucket / Get|Put|DeleteObject on this single bucket — denied for ListAllMyBuckets etc. - Access key: live, wired into EC2 systemd EnvironmentFile EC2 service env now resolves defaultStateStore: s3 (registered: memory, s3). Imports the shared helpers from test-sandboxes.py rather than duplicating HTTP/SSE/sandbox-create code. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 47f4188 commit c3eaf15

1 file changed

Lines changed: 214 additions & 0 deletions

File tree

scripts/test-sandboxes-s3.py

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Wedge 1.13 — S3-backed StateStore round-trip tests.
4+
5+
Hits the live ComputerAgentServer (default https://api.clawagent.sh) with
6+
stateStore: { kind: "s3", options: { bucket: <env S3_BUCKET> } }.
7+
8+
Requires:
9+
- S3_BUCKET — the bucket the server was configured with
10+
(otherwise it lists from a different bucket and tests fail)
11+
- AWS creds (for cross-check via aws cli) optional; tests still run if the
12+
server has its own creds wired via env / instance role.
13+
14+
Run:
15+
S3_BUCKET=clawagent-sandbox-snapshots-test \
16+
python3 scripts/test-sandboxes-s3.py --base https://api.clawagent.sh
17+
"""
18+
19+
import argparse, json, os, subprocess, sys, time, threading
20+
from contextlib import contextmanager
21+
from concurrent.futures import ThreadPoolExecutor
22+
23+
# Share helpers with the main suite by importing it as a module. Keeps
24+
# wire/SSE/sandbox-create code in one place.
25+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
26+
from importlib import import_module
27+
ts = import_module("test-sandboxes")
28+
29+
# Mirror its globals + helpers we use directly.
30+
http = ts.http
31+
jget = ts.jget
32+
jpost = ts.jpost
33+
jdelete = ts.jdelete
34+
sse_chat = ts.sse_chat
35+
Report = ts.Report
36+
sandbox = ts.sandbox
37+
DEFAULT_BODY = ts.DEFAULT_BODY
38+
39+
40+
def t_s3_list_endpoint(base, r, bucket):
41+
"""Cheapest possible health check: list against real S3."""
42+
s, doc = jget(f"{base}/snapshots?stateStore=s3&bucket={bucket}")
43+
r.add("GET /snapshots?stateStore=s3 returns 200 + snapshots array",
44+
s == 200 and "snapshots" in doc,
45+
f"status={s} keys={list(doc.keys())}")
46+
47+
48+
def t_s3_snapshot_round_trip(base, r, bucket):
49+
"""Seed file via attachment, snapshot to real S3, restore into new sandbox, read back."""
50+
body = {
51+
**DEFAULT_BODY,
52+
"attachments": [
53+
{"path": "test-s3-marker.txt", "content": "STARFISH-7", "encoding": "utf8"},
54+
],
55+
}
56+
s, doc = jpost(f"{base}/sandboxes", body)
57+
if s != 201:
58+
r.add("create with s3 attachments", False, f"got {s}: {doc}")
59+
return
60+
sid = doc["sandboxId"]
61+
snapshot_id = None
62+
new_sid = None
63+
try:
64+
sse_chat(base, sid, "Reply with: OK")
65+
66+
sn = jpost(f"{base}/sandboxes/{sid}/snapshot",
67+
{"stateStore": {"kind": "s3", "options": {"bucket": bucket}}})
68+
r.add("S3 snapshot returns 200 + snapshotId",
69+
sn[0] == 200 and sn[1].get("snapshotId", "").startswith("snap_"),
70+
str(sn[1]))
71+
snapshot_id = sn[1].get("snapshotId", "")
72+
size = sn[1].get("sizeBytes", 0)
73+
files = sn[1].get("fileCount", 0)
74+
r.add("S3 snapshot reports byte count + file count",
75+
size > 0 and files > 0,
76+
f"size={size}B files={files}")
77+
78+
# Independently verify the objects landed in S3 via the AWS CLI.
79+
# (Best-effort — only runs if the test host has creds.)
80+
try:
81+
meta = subprocess.run(
82+
["aws", "s3", "ls", f"s3://{bucket}/sandboxes/snapshots/{snapshot_id}/"],
83+
capture_output=True, text=True, timeout=10,
84+
)
85+
ok = "meta.json" in meta.stdout and "workdir.tar.gz" in meta.stdout
86+
r.add("S3 objects (meta.json + workdir.tar.gz) actually written",
87+
ok, meta.stdout.strip().replace("\n", " ; ") if ok else f"out={meta.stdout!r} err={meta.stderr!r}")
88+
except (FileNotFoundError, subprocess.TimeoutExpired) as e:
89+
r.skip("S3 objects cross-check (aws cli)", repr(e))
90+
91+
rr = jpost(f"{base}/sandboxes/restore", {
92+
"snapshotId": snapshot_id,
93+
"stateStore": {"kind": "s3", "options": {"bucket": bucket}},
94+
"target": "new",
95+
})
96+
r.add("S3 restore returns 201 + new sandboxId",
97+
rr[0] == 201 and rr[1].get("sandboxId", "").startswith("sbx_") and rr[1].get("restored"),
98+
str(rr[1]))
99+
new_sid = rr[1].get("sandboxId", "")
100+
101+
_, _, txt, _ = sse_chat(base, new_sid, "Use the Read tool to read test-s3-marker.txt and reply with only its content.")
102+
r.add("restored workdir from S3 contains seed file (read by agent)",
103+
"STARFISH-7" in (txt or ""),
104+
f"got {txt!r}")
105+
finally:
106+
if new_sid: jdelete(f"{base}/sandboxes/{new_sid}")
107+
jdelete(f"{base}/sandboxes/{sid}")
108+
# Clean up the test snapshot in S3 so the bucket doesn't grow on each run.
109+
if snapshot_id:
110+
jdelete(f"{base}/snapshots/{snapshot_id}?stateStore=s3&bucket={bucket}")
111+
112+
113+
def t_s3_autosave_on_dispose(base, r, bucket):
114+
"""autoSave wired to s3 lands a snapshot in the bucket on DELETE."""
115+
body = {
116+
**DEFAULT_BODY,
117+
"idleTtlMs": 120000,
118+
"ttlMs": 240000,
119+
"autoSave": {"stateStore": {"kind": "s3", "options": {"bucket": bucket}}},
120+
}
121+
s, doc = jpost(f"{base}/sandboxes", body)
122+
if s != 201:
123+
r.add("autoSave-s3 create", False, f"got {s}: {doc}")
124+
return
125+
sid = doc["sandboxId"]
126+
try:
127+
sse_chat(base, sid, "Reply with: OK")
128+
d_s, d_doc = jdelete(f"{base}/sandboxes/{sid}")
129+
r.add("DELETE returns autoSaved:true (s3)",
130+
d_s == 200 and d_doc.get("autoSaved") is True,
131+
str(d_doc))
132+
# Wait for S3 consistency (us-east-2 is strong now but be polite).
133+
time.sleep(1)
134+
l_s, l_doc = jget(f"{base}/snapshots?stateStore=s3&bucket={bucket}")
135+
matches = [x for x in l_doc.get("snapshots", []) if x.get("sourceSandboxId") == sid]
136+
r.add("autoSave-s3 produced a discoverable snapshot in the bucket",
137+
len(matches) >= 1,
138+
f"matches={len(matches)} all={[x.get('snapshotId') for x in l_doc.get('snapshots', [])][:5]}")
139+
# Cleanup the autosave snapshot.
140+
if matches:
141+
jdelete(f"{base}/snapshots/{matches[0]['snapshotId']}?stateStore=s3&bucket={bucket}")
142+
finally:
143+
jdelete(f"{base}/sandboxes/{sid}")
144+
145+
146+
def t_s3_delete_snapshot(base, r, bucket):
147+
"""DELETE /snapshots/:id removes both objects from S3."""
148+
# Create a small snapshot first.
149+
body = {**DEFAULT_BODY, "attachments": [{"path": "ephemeral.txt", "content": "z", "encoding": "utf8"}]}
150+
s, doc = jpost(f"{base}/sandboxes", body)
151+
if s != 201:
152+
r.add("delete snapshot create", False, str(doc))
153+
return
154+
sid = doc["sandboxId"]
155+
try:
156+
sse_chat(base, sid, "Reply with: OK")
157+
sn = jpost(f"{base}/sandboxes/{sid}/snapshot",
158+
{"stateStore": {"kind": "s3", "options": {"bucket": bucket}}})
159+
if sn[0] != 200:
160+
r.add("delete snapshot precondition", False, str(sn[1]))
161+
return
162+
snap_id = sn[1]["snapshotId"]
163+
d_s, d_doc = jdelete(f"{base}/snapshots/{snap_id}?stateStore=s3&bucket={bucket}")
164+
r.add("DELETE /snapshots/:id returns ok:true",
165+
d_s == 200 and d_doc.get("ok") is True,
166+
str(d_doc))
167+
# Verify S3 actually emptied.
168+
try:
169+
ls = subprocess.run(
170+
["aws", "s3", "ls", f"s3://{bucket}/sandboxes/snapshots/{snap_id}/"],
171+
capture_output=True, text=True, timeout=10,
172+
)
173+
r.add("S3 objects gone after DELETE",
174+
ls.stdout.strip() == "",
175+
f"stdout={ls.stdout!r}")
176+
except (FileNotFoundError, subprocess.TimeoutExpired) as e:
177+
r.skip("S3 cleanup cross-check (aws cli)", repr(e))
178+
finally:
179+
jdelete(f"{base}/sandboxes/{sid}")
180+
181+
182+
TESTS = [
183+
("s3_list_endpoint", t_s3_list_endpoint),
184+
("s3_snapshot_round_trip", t_s3_snapshot_round_trip),
185+
("s3_autosave_on_dispose", t_s3_autosave_on_dispose),
186+
("s3_delete_snapshot", t_s3_delete_snapshot),
187+
]
188+
189+
190+
def main():
191+
p = argparse.ArgumentParser()
192+
p.add_argument("--base", default="https://api.clawagent.sh")
193+
p.add_argument("--bucket", default=os.environ.get("S3_BUCKET"))
194+
p.add_argument("--only", help="comma-separated subset of test names")
195+
args = p.parse_args()
196+
if not args.bucket:
197+
print("ERROR: pass --bucket or set S3_BUCKET", file=sys.stderr)
198+
sys.exit(2)
199+
only = set(s.strip() for s in args.only.split(",")) if args.only else None
200+
print(f"\nTesting {args.base} against bucket {args.bucket}\n")
201+
r = Report()
202+
for name, fn in TESTS:
203+
if only and name not in only: continue
204+
print(f"\n── {name} ─────────────────")
205+
try:
206+
fn(args.base, r, args.bucket)
207+
except Exception as e:
208+
r.add(name + " (uncaught)", False, repr(e))
209+
ok = r.summary()
210+
sys.exit(0 if ok else 1)
211+
212+
213+
if __name__ == "__main__":
214+
main()

0 commit comments

Comments
 (0)