Skip to content

Commit 28ba04c

Browse files
committed
update benchmarking code
1 parent cc4855c commit 28ba04c

6 files changed

Lines changed: 1030 additions & 129 deletions

File tree

scratch/merged_context_registry_2025-09-04T08:32:08.486247.json

Lines changed: 845 additions & 0 deletions
Large diffs are not rendered by default.

scratch/notebooks/collect_perf_commits.ipynb

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 4,
5+
"execution_count": 2,
66
"id": "13626f75",
77
"metadata": {},
88
"outputs": [
@@ -12,6 +12,13 @@
1212
"text": [
1313
"/mnt/sdd1/atharvas/formulacode/datasmith\n"
1414
]
15+
},
16+
{
17+
"name": "stderr",
18+
"output_type": "stream",
19+
"text": [
20+
"03:16:55 WARNING simple_useragent.core: Falling back to historic user agent.\n"
21+
]
1522
}
1623
],
1724
"source": [
@@ -28,7 +35,7 @@
2835
},
2936
{
3037
"cell_type": "code",
31-
"execution_count": 5,
38+
"execution_count": 3,
3239
"id": "b4179f19",
3340
"metadata": {},
3441
"outputs": [],
@@ -48,7 +55,7 @@
4855
},
4956
{
5057
"cell_type": "code",
51-
"execution_count": 6,
58+
"execution_count": 4,
5259
"id": "6624689c",
5360
"metadata": {},
5461
"outputs": [
@@ -218,7 +225,7 @@
218225
"4 numpy/numpy-financial "
219226
]
220227
},
221-
"execution_count": 6,
228+
"execution_count": 4,
222229
"metadata": {},
223230
"output_type": "execute_result"
224231
}
@@ -231,7 +238,7 @@
231238
},
232239
{
233240
"cell_type": "code",
234-
"execution_count": 7,
241+
"execution_count": 5,
235242
"id": "79905eb5",
236243
"metadata": {},
237244
"outputs": [
@@ -291,17 +298,7 @@
291298
"execution_count": null,
292299
"id": "567cdaa5",
293300
"metadata": {},
294-
"outputs": [
295-
{
296-
"name": "stderr",
297-
"output_type": "stream",
298-
"text": [
299-
"10:28:37 INFO datasmith.docker.context: Context registry saved to scratch/artifacts/pipeflush/chunk_0/context_registry.json\n",
300-
"10:28:41 INFO datasmith.docker.context: Context registry saved to scratch/artifacts/pipeflush/chunk_1/context_registry.json\n",
301-
"10:28:53 INFO datasmith.docker.context: Context registry saved to scratch/artifacts/pipeflush/chunk_2/context_registry.json\n"
302-
]
303-
}
304-
],
301+
"outputs": [],
305302
"source": [
306303
"# break parquet into three chunks with fixed ratios.\n",
307304
"ratios = [64, 56, 127]\n",
@@ -319,31 +316,29 @@
319316
"\n",
320317
"chunks = [df1, df2, df3]\n",
321318
"cmds = []\n",
322-
"for i, (chunk, ratio) in enumerate(zip(chunks, ratios)):\n",
319+
"for i, (_, ratio) in enumerate(zip(chunks, ratios)):\n",
323320
" pth = Path(f\"scratch/artifacts/pipeflush/chunk_{i}/commits_perfonly.parquet\")\n",
324321
" pth.parent.mkdir(parents=True, exist_ok=True)\n",
325-
" chunk.to_parquet(pth)\n",
322+
" # chunk.to_parquet(pth)\n",
326323
" # Make a new context registry:\n",
327-
" cr.save_to_file(pth.parent / \"context_registry.json\")\n",
324+
" # cr.save_to_file(pth.parent / \"context_registry.json\")\n",
328325
" cmd_i = cmd.format(output_dir=pth.parent, ncpus=(ratio // 2))\n",
329326
" cmds.append(cmd_i)"
330327
]
331328
},
332329
{
333330
"cell_type": "code",
334-
"execution_count": 8,
331+
"execution_count": null,
335332
"id": "3fafdd1c",
336333
"metadata": {},
337334
"outputs": [
338335
{
339-
"ename": "NameError",
340-
"evalue": "name 'cmds' is not defined",
341-
"output_type": "error",
342-
"traceback": [
343-
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
344-
"\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
345-
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[8]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m\"\u001b[39m.join(\u001b[43mcmds\u001b[49m).replace(\u001b[33m\"\u001b[39m\u001b[33m \u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m))\n",
346-
"\u001b[31mNameError\u001b[39m: name 'cmds' is not defined"
336+
"name": "stdout",
337+
"output_type": "stream",
338+
"text": [
339+
"python scratch/scripts/synthesize_contexts.py --commits scratch/artifacts/pipeflush/chunk_0/commits_perfonly.parquet --output-dir scratch/artifacts/pipeflush/chunk_0/results_synthesis/ --context-registry scratch/artifacts/pipeflush/chunk_0/context_registry.json --max-workers 32 --limit-per-repo 2 --max-attempts 3 --max-steps 10\n",
340+
"python scratch/scripts/synthesize_contexts.py --commits scratch/artifacts/pipeflush/chunk_1/commits_perfonly.parquet --output-dir scratch/artifacts/pipeflush/chunk_1/results_synthesis/ --context-registry scratch/artifacts/pipeflush/chunk_1/context_registry.json --max-workers 28 --limit-per-repo 2 --max-attempts 3 --max-steps 10\n",
341+
"python scratch/scripts/synthesize_contexts.py --commits scratch/artifacts/pipeflush/chunk_2/commits_perfonly.parquet --output-dir scratch/artifacts/pipeflush/chunk_2/results_synthesis/ --context-registry scratch/artifacts/pipeflush/chunk_2/context_registry.json --max-workers 63 --limit-per-repo 2 --max-attempts 3 --max-steps 10\n"
347342
]
348343
}
349344
],

scratch/scripts/benchmark_commits.py

Lines changed: 60 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,24 @@
88
import os
99
import pickle
1010
import shutil
11-
from concurrent.futures import ThreadPoolExecutor, as_completed
11+
from collections import defaultdict
1212
from pathlib import Path
1313

1414
import asv
1515
import pandas as pd
1616

1717
from datasmith.benchmark.collection import BenchmarkCollection
18-
from datasmith.docker.context import ContextRegistry
18+
from datasmith.docker.context import ContextRegistry, DockerContext, Task
1919
from datasmith.docker.orchestrator import (
20-
build_repo_sha_image,
2120
get_docker_client,
2221
orchestrate,
2322
)
24-
from datasmith.docker.validation import BuildResult, Task
23+
from datasmith.execution.collect_commits_offline import find_parent_releases
2524
from datasmith.logging_config import configure_logging
2625
from datasmith.scrape.utils import _parse_commit_url
2726

28-
# logger = configure_logging(level=logging.DEBUG, stream=open(Path(__file__).with_suffix(".log"), "w"))
29-
logger = configure_logging(level=logging.DEBUG)
27+
logger = configure_logging(level=logging.DEBUG, stream=open(Path(__file__).with_suffix(".log"), "w")) # noqa: SIM115
28+
# logger = configure_logging(level=logging.DEBUG)
3029

3130

3231
def parse_args() -> argparse.Namespace:
@@ -107,11 +106,13 @@ def process_inputs(args: argparse.Namespace) -> dict[tuple[str, str], set[tuple[
107106
else:
108107
all_states[(owner, repo)].add((sha, 0.0))
109108
elif args.commits:
110-
commits = pd.read_json(args.commits, lines=True)
109+
commits = (
110+
pd.read_json(args.commits, lines=True) if args.commits.suffix == ".jsonl" else pd.read_parquet(args.commits)
111+
)
111112
all_states = {}
112113
for _, row in commits.iterrows():
113114
repo_name = row["repo_name"]
114-
sha = row["commit_sha"]
115+
sha = row["sha"]
115116
has_asv = row.get("has_asv", True)
116117
if not has_asv:
117118
logger.debug(f"Skipping {repo_name} commit {sha} as it does not have ASV benchmarks.")
@@ -135,16 +136,30 @@ def main(args: argparse.Namespace) -> None:
135136
context_registry = ContextRegistry.load_from_file(path=args.context_registry)
136137

137138
# Prepare tasks
138-
tasks: list[Task] = []
139+
tasks: list[tuple[Task, DockerContext]] = []
140+
repo_commit_pairs = defaultdict(list)
139141
for (owner, repo), uniq in all_states.items():
140142
limited = list(uniq)[: max(0, args.limit_per_repo)] if args.limit_per_repo > 0 else list(uniq)
141143
for sha, date in limited:
142144
task = Task(owner, repo, sha, commit_date=date)
143145
if task in context_registry:
144-
tasks.append(task)
146+
tasks.append((task, context_registry.get(task)))
147+
repo_commit_pairs[f"{owner}/{repo}"].append(task)
148+
# also add the parent commit.
145149
else:
146150
logger.debug(f"main: skipping {task} as not in context registry")
147151

152+
# get all parent commits and add them as tasks as well.
153+
for repo_name, tsks in repo_commit_pairs.items():
154+
owner, repo = repo_name.split("/")
155+
shas = [t.sha for t in tsks]
156+
parent_commits = find_parent_releases(repo_name, shas, add_first=True, incl_datetime=True)
157+
for i, (parent_sha, date) in enumerate(parent_commits):
158+
parent_task = Task(owner=owner, repo=repo, sha=parent_sha, commit_date=date) # pyright: ignore[reportArgumentType]
159+
# use the child context.
160+
ctx = context_registry.get(tsks[i])
161+
tasks.append((parent_task, ctx))
162+
148163
max_concurrency = (
149164
args.max_concurrency if args.max_concurrency != -1 else max(4, math.floor(0.5 * (os.cpu_count() or 1)))
150165
)
@@ -170,44 +185,44 @@ def main(args: argparse.Namespace) -> None:
170185
}
171186
logger.debug("main: machine_defaults keys=%d", len(machine_defaults))
172187

173-
builds: list[BuildResult] = []
174-
if args.max_concurrency < 1:
175-
for t in tasks:
176-
build_res: BuildResult = build_repo_sha_image(
177-
client=client,
178-
context_registry=context_registry,
179-
task=t,
180-
force=args.force_rebuild,
181-
)
182-
builds.append(build_res)
183-
else:
184-
with ThreadPoolExecutor(max_workers=args.max_concurrency) as pool:
185-
futures = [
186-
pool.submit(
187-
build_repo_sha_image,
188-
client,
189-
context_registry,
190-
task,
191-
args.force_rebuild,
192-
)
193-
for task in tasks
194-
]
195-
for fut in as_completed(futures):
196-
builds.append(fut.result())
197-
198-
successful_builds = [b for b in builds if b.rc != 1]
199-
200-
logger.info("Running benchmarks for %d images", len(successful_builds))
201-
logger.info("Failed builds for %d images", len(builds) - len(successful_builds))
202-
for b in builds:
203-
if b.rc == 1:
204-
logger.warning("Build failed for %s", b.image_name)
188+
# builds: list[BuildResult] = []
189+
# if args.max_concurrency < 1:
190+
# for t in tasks:
191+
# build_res: BuildResult = build_repo_sha_image(
192+
# client=client,
193+
# context_registry=context_registry,
194+
# task=t,
195+
# force=args.force_rebuild,
196+
# )
197+
# builds.append(build_res)
198+
# else:
199+
# with ThreadPoolExecutor(max_workers=args.max_concurrency) as pool:
200+
# futures = [
201+
# pool.submit(
202+
# build_repo_sha_image,
203+
# client,
204+
# context_registry,
205+
# task,
206+
# args.force_rebuild,
207+
# )
208+
# for task in tasks
209+
# ]
210+
# for fut in as_completed(futures):
211+
# builds.append(fut.result())
212+
213+
# successful_builds = [b for b in builds if b.rc != 1]
214+
215+
# logger.info("Running benchmarks for %d images", len(successful_builds))
216+
# logger.info("Failed builds for %d images", len(builds) - len(successful_builds))
217+
# for b in builds:
218+
# if b.rc == 1:
219+
# logger.warning("Build failed for %s", b.image_name)
205220

206221
machine_args: dict[str, str] = asv.machine.Machine.get_defaults() # pyright: ignore[reportAttributeAccessIssue]
207222
machine_args["num_cpu"] = str(args.num_cores)
208-
files_by_image: dict[str, dict[str, str]] = asyncio.run(
223+
files_by_image: dict[Task, dict[str, str]] = asyncio.run(
209224
orchestrate(
210-
docker_image_names=[b.image_name for b in successful_builds],
225+
contexts=tasks,
211226
asv_args=asv_args,
212227
machine_args=machine_args,
213228
max_concurrency=max_concurrency,
@@ -217,7 +232,7 @@ def main(args: argparse.Namespace) -> None:
217232
)
218233
)
219234
# save the files by image as a pickle file.
220-
with open(output_dir / "files_by_image.pkl", "wb") as f:
235+
with open(output_dir / "files_by_image.json", "wb") as f:
221236
pickle.dump(files_by_image, f)
222237

223238
# save the files by image as a JSON file

src/datasmith/agents/context_synthesis.py

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from datasmith.agents.tool_executor import ContainerToolExecutor
1717
from datasmith.docker.context import BuildResult, ContextRegistry, DockerContext
18+
from datasmith.docker.orchestrator import gen_run_labels
1819
from datasmith.docker.validation import Task
1920

2021
logger = logging.getLogger(__name__)
@@ -23,6 +24,7 @@
2324
def remove_containers_by_label(client: docker.DockerClient, run_id: str) -> None:
2425
with contextlib.suppress(Exception):
2526
for c in client.containers.list(all=True, filters={"label": f"datasmith.run={run_id}"}):
27+
logger.debug("Removing container %s", c.name)
2628
c.remove(force=True)
2729

2830

@@ -32,6 +34,7 @@ def remove_images_by_label(client: docker.DockerClient, run_id: str) -> None:
3234
imgs = client.images.list(filters={"label": f"datasmith.run={run_id}"})
3335
for img in imgs:
3436
try:
37+
logger.debug("Removing image %s (%s)", img.tags, img.id)
3538
client.images.remove(img.id, force=True, noprune=False)
3639
except (ImageNotFound, NotFound):
3740
pass
@@ -42,14 +45,6 @@ def remove_images_by_label(client: docker.DockerClient, run_id: str) -> None:
4245
pass
4346

4447

45-
def gen_run_labels(t: Task, runid: str) -> dict[str, str]:
46-
return {
47-
"datasmith.run": runid,
48-
"datasmith.task": f"{t.owner}/{t.repo}",
49-
"datasmith.sha": t.sha if t.sha else "unknown",
50-
}
51-
52-
5348
def _preview(s: str, n: int = 160) -> str:
5449
s = s or ""
5550
s = s.replace("\n", "\\n")
@@ -645,24 +640,27 @@ def agent_build_and_validate( # noqa: C901
645640
with contextlib.suppress(Exception):
646641
tool_exec.shutdown()
647642

648-
run_id = run_labels.get("datasmith.run", "unknown")
649-
remove_containers_by_label(client, run_id)
650-
for name in [
651-
task.with_tag("env").get_container_name(),
652-
task.with_tag("pkg").get_container_name(),
653-
f"{task.with_tag('env').get_container_name()}-{run_id[:8]}",
654-
f"{task.with_tag('pkg').get_container_name()}-{run_id[:8]}",
655-
]:
656-
with contextlib.suppress(Exception, NotFound):
657-
c = client.containers.get(name)
658-
c.remove(force=True)
659-
660-
remove_images_by_label(client, run_id)
661-
for tag in [task.with_tag("env").get_image_name(), task.with_tag("pkg").get_image_name()]:
662-
with contextlib.suppress(NotFound, ImageNotFound):
663-
client.images.remove(tag, force=True, noprune=False)
664-
665643
try:
666-
client.images.prune(filters={"dangling": True})
644+
run_id = run_labels.get("datasmith.run", "unknown")
645+
remove_containers_by_label(client, run_id)
646+
for name in [
647+
task.with_tag("env").get_container_name(),
648+
task.with_tag("pkg").get_container_name(),
649+
f"{task.with_tag('env').get_container_name()}-{run_id[:8]}",
650+
f"{task.with_tag('pkg').get_container_name()}-{run_id[:8]}",
651+
]:
652+
with contextlib.suppress(Exception, NotFound):
653+
c = client.containers.get(name)
654+
c.remove(force=True)
655+
656+
remove_images_by_label(client, run_id)
657+
for tag in [task.with_tag("env").get_image_name(), task.with_tag("pkg").get_image_name()]:
658+
with contextlib.suppress(NotFound, ImageNotFound):
659+
client.images.remove(tag, force=True, noprune=False)
660+
661+
try:
662+
client.images.prune(filters={"dangling": True})
663+
except Exception:
664+
logger.exception("image prune failed")
667665
except Exception:
668-
logger.exception("image prune failed")
666+
logger.exception("agent_build_and_validate: cleanup error")

0 commit comments

Comments
 (0)