Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 0 additions & 47 deletions scratch/context_registry.json

This file was deleted.

365 changes: 349 additions & 16 deletions scratch/notebooks/collect_perf_commits.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions scratch/notebooks/compare_benchmarks.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "7d96c171",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -631,7 +631,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
"version": "3.12.11"
}
},
"nbformat": 4,
Expand Down
150 changes: 87 additions & 63 deletions scratch/scripts/initialize_context_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
# ROOT_PATH=${ROOT_PATH:-$PWD} # Usually /workspace/repo
# REPO_ROOT="$ROOT_PATH"
# TARGET_VERSIONS="${PY_VERSION:-${ASV_PY_VERSIONS:-}}"
# EXTRAS="${ALL_EXTRAS:+[$ALL_EXTRAS]}"
# if [[ -z "${TARGET_VERSIONS}" ]]; then
# echo "Error: No PY_VERSION set and ASV_PY_VERSIONS not found." >&2
# exit 1
Expand Down Expand Up @@ -109,8 +110,11 @@
# micromamba run -n "$ENV_NAME" pip install -e . scipy matplotlib

# # Editable install (no build isolation preferrably). Toolchain lives in the env already.
# # $EXTRAS is an optional argument to install all discovered extra dependencies.
# # It will be empty if pyproject.toml does not exist or has no [project.optional-dependencies].
# # In case setup.py is used, no need to append $EXTRAS.
# log "Editable install with --no-build-isolation"
# PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"
# PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"$EXTRAS

# # Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1)
# log "Running smoke checks"
Expand Down Expand Up @@ -141,6 +145,7 @@
ROOT_PATH=${ROOT_PATH:-$PWD} # Usually /workspace/repo
REPO_ROOT="$ROOT_PATH"
TARGET_VERSIONS="${PY_VERSION:-${ASV_PY_VERSIONS:-}}"
EXTRAS="${ALL_EXTRAS:+[$ALL_EXTRAS]}"
if [[ -z "${TARGET_VERSIONS}" ]]; then
echo "Error: No PY_VERSION set and ASV_PY_VERSIONS not found." >&2
exit 1
Expand Down Expand Up @@ -226,8 +231,11 @@
micromamba run -n "$ENV_NAME" pip install meson-python cython

# Editable install (no build isolation preferrably). Toolchain lives in the env already.
# $EXTRAS is an optional argument to install all discovered extra dependencies.
# It will be empty if pyproject.toml does not exist or has no [project.optional-dependencies].
# In case setup.py is used, no need to append $EXTRAS.
log "Editable install with --no-build-isolation"
PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"
PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"$EXTRAS

# Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1)
log "Running smoke checks"
Expand Down Expand Up @@ -258,6 +266,7 @@
ROOT_PATH=${ROOT_PATH:-$PWD} # Usually /workspace/repo
REPO_ROOT="$ROOT_PATH"
TARGET_VERSIONS="${PY_VERSION:-${ASV_PY_VERSIONS:-}}"
EXTRAS="${ALL_EXTRAS:+[$ALL_EXTRAS]}"
if [[ -z "${TARGET_VERSIONS}" ]]; then
echo "Error: No PY_VERSION set and ASV_PY_VERSIONS not found." >&2
exit 1
Expand Down Expand Up @@ -344,8 +353,11 @@
export CFLAGS="${CFLAGS:-} -Wno-error=incompatible-pointer-types"

# Editable install (no build isolation preferrably). Toolchain lives in the env already.
# $EXTRAS is an optional argument to install all discovered extra dependencies.
# It will be empty if pyproject.toml does not exist or has no [project.optional-dependencies].
# In case setup.py is used, no need to append $EXTRAS.
log "Editable install with --no-build-isolation"
PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"
PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"$EXTRAS

# Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1)
log "Running smoke checks"
Expand Down Expand Up @@ -377,6 +389,7 @@
ROOT_PATH=${ROOT_PATH:-$PWD} # Usually /workspace/repo
REPO_ROOT="$ROOT_PATH"
TARGET_VERSIONS="${PY_VERSION:-${ASV_PY_VERSIONS:-}}"
EXTRAS="${ALL_EXTRAS:+[$ALL_EXTRAS]}"
if [[ -z "${TARGET_VERSIONS}" ]]; then
echo "Error: No PY_VERSION set and ASV_PY_VERSIONS not found." >&2
exit 1
Expand Down Expand Up @@ -469,8 +482,11 @@
micromamba run -n "$ENV_NAME" python "${ROOT_PATH}/build_lib.py"

# Editable install (no build isolation preferrably). Toolchain lives in the env already.
# $EXTRAS is an optional argument to install all discovered extra dependencies.
# It will be empty if pyproject.toml does not exist or has no [project.optional-dependencies].
# In case setup.py is used, no need to append $EXTRAS.
log "Editable install with --no-build-isolation"
PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"
PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"$EXTRAS

# Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1)
log "Running smoke checks"
Expand Down Expand Up @@ -503,6 +519,7 @@
ROOT_PATH=${ROOT_PATH:-$PWD} # Usually /workspace/repo
REPO_ROOT="$ROOT_PATH"
TARGET_VERSIONS="${PY_VERSION:-${ASV_PY_VERSIONS:-}}"
EXTRAS="${ALL_EXTRAS:+[$ALL_EXTRAS]}"
if [[ -z "${TARGET_VERSIONS}" ]]; then
echo "Error: No PY_VERSION set and ASV_PY_VERSIONS not found." >&2
exit 1
Expand Down Expand Up @@ -590,8 +607,11 @@
fi

# Editable install (no build isolation preferrably). Toolchain lives in the env already.
# $EXTRAS is an optional argument to install all discovered extra dependencies.
# It will be empty if pyproject.toml does not exist or has no [project.optional-dependencies].
# In case setup.py is used, no need to append $EXTRAS.
log "Editable install with --no-build-isolation"
PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"
PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"$EXTRAS

# Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1)
log "Running smoke checks"
Expand Down Expand Up @@ -624,6 +644,7 @@
# ROOT_PATH=${ROOT_PATH:-$PWD} # Usually /workspace/repo
# REPO_ROOT="$ROOT_PATH"
# TARGET_VERSIONS="${PY_VERSION:-${ASV_PY_VERSIONS:-}}"
# EXTRAS="${ALL_EXTRAS:+[$ALL_EXTRAS]}"
# if [[ -z "${TARGET_VERSIONS}" ]]; then
# echo "Error: No PY_VERSION set and ASV_PY_VERSIONS not found." >&2
# exit 1
Expand Down Expand Up @@ -715,8 +736,11 @@
# cd "$working_dir" || exit 1
# else
# # Editable install (no build isolation preferrably). Toolchain lives in the env already.
# # $EXTRAS is an optional argument to install all discovered extra dependencies.
# # It will be empty if pyproject.toml does not exist or has no [project.optional-dependencies].
# # In case setup.py is used, no need to append $EXTRAS.
# log "Editable install with --no-build-isolation"
# PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"
# PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"$EXTRAS
# fi


Expand All @@ -734,60 +758,60 @@
# )


# if __name__ == "__main__":
# from pathlib import Path

# CONTEXT_REGISTRY.save_to_file(Path("scratch/context_registry_init.json"))
# # for each context, build an image with the context.
# import docker

# from datasmith.docker.context import ContextRegistry, DockerContext, Task

# client = docker.from_env()

# import concurrent.futures

# fails = dict()

# def build_context(task_context):
# task, context = task_context
# if not task.sha:
# import requests

# resp = requests.get(f"https://api.github.com/repos/{task.owner}/{task.repo}")
# resp.raise_for_status()
# commit_sha = resp.json().get("default_branch", "main")
# task = Task(owner=task.owner, repo=task.repo, sha=commit_sha, tag=task.tag)
# print(f"Building image for {task.get_image_name()} at {task.sha}")
# res = context.build_container_streaming(
# client=client,
# image_name=task.get_image_name(),
# build_args={
# "REPO_URL": f"https://www.github.com/{task.owner}/{task.repo}",
# "COMMIT_SHA": task.sha, # pyright: ignore[reportArgumentType]
# },
# force=True,
# timeout_s=1200,
# pull=True,
# )
# return (task, res)

# with concurrent.futures.ProcessPoolExecutor() as executor:
# futures = {executor.submit(build_context, item): item[0] for item in CONTEXT_REGISTRY.registry.items()}
# for future in concurrent.futures.as_completed(futures):
# task = futures[future]
# try:
# task, res = future.result()
# if res.ok:
# print(f"Built image {task.get_image_name()} successfully")
# else:
# print(f"Failed to build image {task.get_image_name()}")
# fails[task] = res
# except Exception as exc:
# print(f"Exception building image {task.get_image_name()}: {exc}")
# fails[task] = exc

# if fails:
# import IPython

# IPython.embed()
if __name__ == "__main__":
from pathlib import Path

CONTEXT_REGISTRY.save_to_file(Path("scratch/context_registry_init.json"))
# # for each context, build an image with the context.
# import docker

# from datasmith.docker.context import ContextRegistry, DockerContext, Task

# client = docker.from_env()

# import concurrent.futures

# fails = dict()

# def build_context(task_context):
# task, context = task_context
# if not task.sha:
# import requests

# resp = requests.get(f"https://api.github.com/repos/{task.owner}/{task.repo}")
# resp.raise_for_status()
# commit_sha = resp.json().get("default_branch", "main")
# task = Task(owner=task.owner, repo=task.repo, sha=commit_sha, tag=task.tag)
# print(f"Building image for {task.get_image_name()} at {task.sha}")
# res = context.build_container_streaming(
# client=client,
# image_name=task.get_image_name(),
# build_args={
# "REPO_URL": f"https://www.github.com/{task.owner}/{task.repo}",
# "COMMIT_SHA": task.sha, # pyright: ignore[reportArgumentType]
# },
# force=True,
# timeout_s=1200,
# pull=True,
# )
# return (task, res)

# with concurrent.futures.ProcessPoolExecutor() as executor:
# futures = {executor.submit(build_context, item): item[0] for item in CONTEXT_REGISTRY.registry.items()}
# for future in concurrent.futures.as_completed(futures):
# task = futures[future]
# try:
# task, res = future.result()
# if res.ok:
# print(f"Built image {task.get_image_name()} successfully")
# else:
# print(f"Failed to build image {task.get_image_name()}")
# fails[task] = res
# except Exception as exc:
# print(f"Exception building image {task.get_image_name()}: {exc}")
# fails[task] = exc

# if fails:
# import IPython

# IPython.embed()
8 changes: 4 additions & 4 deletions scratch/scripts/synthesize_contexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
configure_agent_backends(PORTKEY_MODEL_NAME="@anthropic/claude-3-5-sonnet-latest")

logger = configure_logging(level=10)
# logger = configure_logging(level=10, stream=open(Path(__file__).with_suffix(".log"), "w"))
# logger = configure_logging(level=10, stream=open(Path(__file__).with_suffix(".tiny.log"), "w"))


def parse_args() -> argparse.Namespace:
Expand Down Expand Up @@ -178,13 +178,13 @@ def main(args: argparse.Namespace) -> None:
for fut in as_completed(futures):
res = fut.result()
results.append(res)
with _err_lock, open(args.output_dir / "results.jsonl", "a") as jf:
jf.write(json.dumps(res) + "\n")

if int(res["rc"]) != 1:
logger.info("main: SUCCESS %s/%s@%s", res["owner"], res["repo"], res["sha"])
context_registry.save_to_file(path=args.context_registry)

with _err_lock, open(args.output_dir / "results.jsonl", "a") as jf:
jf.write(json.dumps(res) + "\n")

# Rollup (minimal, quick to read)
rollup = {
r["image_name"]: {
Expand Down
Loading
Loading