From 9cd53e91c2cd188b0ef7e9019d22ea1355c7f1b5 Mon Sep 17 00:00:00 2001 From: Atharva Sehgal Date: Wed, 3 Sep 2025 09:21:47 +0000 Subject: [PATCH] updating Docker containers. Synthesis now working on tiny set. starting large scale runs. --- scratch/context_registry.json | 47 --- scratch/notebooks/collect_perf_commits.ipynb | 365 +++++++++++++++++- scratch/notebooks/compare_benchmarks.ipynb | 4 +- .../scripts/initialize_context_registry.py | 150 ++++--- scratch/scripts/synthesize_contexts.py | 8 +- src/datasmith/agents/context_synthesis.py | 112 ++++-- src/datasmith/docker/context.py | 4 +- src/datasmith/docker/docker_build_env.sh | 153 +++++++- src/datasmith/docker/docker_build_pkg.sh | 13 +- src/datasmith/docker/validation.py | 1 + 10 files changed, 677 insertions(+), 180 deletions(-) delete mode 100644 scratch/context_registry.json diff --git a/scratch/context_registry.json b/scratch/context_registry.json deleted file mode 100644 index e947916..0000000 --- a/scratch/context_registry.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "contexts": { - "Task(owner='astropy', repo='astropy', sha=None, commit_date=0.0, tag='pkg')": { - "building_data": "#!/usr/bin/env bash\n# Purpose: Build/install the repo (editable) in one or more ASV micromamba envs, then run health checks.\nset -euo pipefail\n\n###### SETUP CODE (NOT TO BE MODIFIED) ######\n# Loads micromamba, common helpers, and persisted variables from the env stage.\nsource /etc/profile.d/asv_utils.sh || true\nsource /etc/profile.d/asv_build_vars.sh || true\neval \"$(micromamba shell hook --shell=bash)\"\n\nROOT_PATH=${ROOT_PATH:-$PWD} # Usually /workspace/repo\nREPO_ROOT=\"$ROOT_PATH\"\nTARGET_VERSIONS=\"${PY_VERSION:-${ASV_PY_VERSIONS:-}}\"\nif [[ -z \"${TARGET_VERSIONS}\" ]]; then\n echo \"Error: No PY_VERSION set and ASV_PY_VERSIONS not found.\" >&2\n exit 1\nfi\n###### END SETUP CODE ######\n\n# -----------------------------\n# Agent guidance (read-first)\n# -----------------------------\n# GOAL: For each Python version below, install the project in EDITABLE mode into env asv_{version},\n# with NO build isolation, then run health checks.\n#\n# Below this comment, you should do whatever is necessary to build the project without errors. Including (but not limited to):\n# - Add extra conda/pip dependencies needed to build this project.\n# - Run repo-specific pre-steps (e.g., submodules, generating Cython, env vars).\n# - Run arbitrary micromamba/pip commands in the target env.\n# - Set CFLAGS/CXXFLAGS/LDFLAGS if needed for this repo.\n# - Change files in the repo if needed (e.g., fix a missing #include).\n# - Anything else needed to get a successful editable install.\n#\n# MUST:\n# - Keep this script idempotent.\n# - Use: `pip install --no-build-isolation -v -e .` or `pip install -e .` or equivalent.\n# - Do not modify the SETUP CODE or helper functions below.\n#\n# DO NOT:\n# - Change env names or Python versions outside MODEL EDIT AREA.\n# - Use build isolation unless absolutely necessary.\n\n# -----------------------------\n# Helpers (do not modify)\n# -----------------------------\nlog() { printf \"\u001b[1;34m[build]\u001b[0m %s\n\" \"$*\"; }\nwarn() { printf \"\u001b[1;33m[warn]\u001b[0m %s\n\" \"$*\" >&2; }\ndie() { printf \"\u001b[1;31m[fail]\u001b[0m %s\n\" \"$*\" >&2; exit 1; }\n\n# Conservative default parallelism (override if the repo benefits)\nexport CMAKE_BUILD_PARALLEL_LEVEL=\"${CMAKE_BUILD_PARALLEL_LEVEL:-2}\"\nexport NPY_NUM_BUILD_JOBS=\"${NPY_NUM_BUILD_JOBS:-2}\"\n\n# -----------------------------\n# Build & test across envs\n# -----------------------------\nfor version in $TARGET_VERSIONS; do\n ENV_NAME=\"asv_${version}\"\n log \"==> Building in env: $ENV_NAME (python=$version)\"\n\n if ! micromamba env list | awk '{print $1}' | grep -qx \"$ENV_NAME\"; then\n die \"Env $ENV_NAME not found. Did docker_build_env.sh run?\"\n fi\n\n # Import name resolution (kept simple for the agent)\n IMP=\"${IMPORT_NAME:-}\"\n if [[ -z \"$IMP\" ]]; then\n if ! IMP=\"$(asv_detect_import_name --repo-root \"$REPO_ROOT\" 2>/dev/null)\"; then\n die \"Could not determine import name. Set IMPORT_NAME in /etc/profile.d/asv_build_vars.sh\"\n fi\n fi\n log \"Using import name: $IMP\"\n\n # -----------------------------\n # MODEL EDIT AREA: repo-specific tweaks (optional)\n # -----------------------------\n # Examples (uncomment if needed for this repo):\n #\n # log \"Updating submodules\"\n # git -C \"$REPO_ROOT\" submodule update --init --recursive\n #\n # log \"Installing extra system libs via conda-forge\"\n # micromamba install -y -n \"$ENV_NAME\" -c conda-forge 'openblas' 'blas=*=openblas' 'libopenmp'\n #\n # log \"Pre-generating Cython sources\"\n # micromamba run -n \"$ENV_NAME\" python -m cython --version\n #\n # export CFLAGS=\"${CFLAGS:-}\"\n # export CXXFLAGS=\"${CXXFLAGS:-}\"\n # export LDFLAGS=\"${LDFLAGS:-}\"\n # -----------------------------\n\n # Install some basic micromamba packages.\n micromamba install -y -n \"$ENV_NAME\" -c conda-forge git conda mamba \"libmambapy<=1.9.9\"\n\n export CFLAGS=\"${CFLAGS:-} -Wno-error=incompatible-pointer-types\"\n micromamba run -n \"$ENV_NAME\" pip install -e . scipy matplotlib\n\n # Editable install (no build isolation preferrably). Toolchain lives in the env already.\n log \"Editable install with --no-build-isolation\"\n PIP_NO_BUILD_ISOLATION=1 micromamba run -n \"$ENV_NAME\" python -m pip install --no-build-isolation -v -e \"$REPO_ROOT\"\n\n # Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1)\n log \"Running smoke checks\"\n micromamba run -n \"$ENV_NAME\" asv_smokecheck.py --import-name \"$IMP\" --repo-root \"$REPO_ROOT\" ${RUN_PYTEST_SMOKE:+--pytest-smoke}\n\n # Machine-readable markers (useful in logs)\n echo \"::import_name=${IMP}::env=${ENV_NAME}\"\ndone\n\nlog \"All builds complete \u2705\"", - "dockerfile_data": "# syntax=docker/dockerfile:1.7\n\nFROM buildpack-deps:jammy AS base\n\nARG REPO_URL\nARG COMMIT_SHA\n\nRUN apt-get update && \\\n apt-get install -y --no-install-recommends \\\n curl git build-essential jq cmake ninja-build && \\\n rm -rf /var/lib/apt/lists/*\n\nRUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest \\\n | tar -xvj -C /usr/local/bin --strip-components=1 bin/micromamba\n\nENV MAMBA_ROOT_PREFIX=/opt/conda \\\n PATH=/opt/conda/bin:$PATH \\\n MAMBA_DOCKERFILE_ACTIVATE=1 \\\n OPENBLAS_NUM_THREADS=1 \\\n MKL_NUM_THREADS=1 \\\n OMP_NUM_THREADS=1\n\nRUN micromamba install -y -p $MAMBA_ROOT_PREFIX -c conda-forge \\\n python=3.10 \\\n git asv pyperf mamba conda libmambapy jq && \\\n micromamba clean --all --yes\n\nRUN mkdir -p /workspace /output\nWORKDIR /workspace\n\nFROM base AS env\nARG REPO_URL\nARG COMMIT_SHA\n\n# Entrypoint is inherited by pkg\nCOPY entrypoint.sh /entrypoint.sh\nRUN chmod +x /entrypoint.sh\nENTRYPOINT [\"/entrypoint.sh\"]\n\nRUN git clone \"$REPO_URL\" /workspace/repo\nWORKDIR /workspace/repo\nRUN git checkout \"$COMMIT_SHA\"\nLABEL vcs.url=\"$REPO_URL\" vcs.ref=\"$COMMIT_SHA\"\n\n# ---- ENV STAGE: create envs, persist vars, install smoke tool ----\nCOPY docker_build_env.sh /workspace/repo/docker_build_env.sh\nRUN chmod +x /workspace/repo/docker_build_env.sh && \\\n /workspace/repo/docker_build_env.sh\n\n\nFROM env AS pkg\n\n# ---- PKG STAGE: build+test the package for each ASV Python ----\nCOPY docker_build_pkg.sh /workspace/repo/docker_build_pkg.sh\nRUN chmod +x /workspace/repo/docker_build_pkg.sh && \\\n /workspace/repo/docker_build_pkg.sh\n# If you want to restrict to one version at build time, replace with:\n# RUN PY_VERSION=3.10 /workspace/repo/docker_build_pkg.sh\n", - "entrypoint_data": "#!/usr/bin/env bash\n# set -euo pipefail\nset -x\n: \"${ASV_ARGS:?Need to set ASV_ARGS}\"\n: \"${ASV_MACHINE:=?Need to set ASV_MACHINE}\"\n: \"${ASV_OS:=?Need to set ASV_OS}\"\n: \"${ASV_NUM_CPU:=?Need to set ASV_NUM_CPU}\"\n: \"${ASV_ARCH:=?Need to set ASV_ARCH}\"\n: \"${ASV_CPU:=?Need to set ASV_CPU}\"\n: \"${ASV_RAM:=?Need to set ASV_RAM}\"\n\n\ncd_asv_json_dir() {\n local match\n match=$(find . -type f -name \"asv.*.json\" | head -n 1)\n\n if [[ -n \"$match\" ]]; then\n local dir\n dir=$(dirname \"$match\")\n cd \"$dir\" || echo \"Failed to change directory to $dir\"\n else\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n fi\n}\n\neval \"$(micromamba shell hook --shell=bash)\"\n\nmicromamba activate base\nROOT_PATH=${PWD}\ncd_asv_json_dir || exit 1\n\n# the conf name is one of \"asv.conf.json\" or \"asv.ci.conf.json\" or \"asv.*.json\"\nCONF_NAME=$(basename \"$(find . -type f -name \"asv.*.json\" | head -n 1)\")\nif [[ -z \"$CONF_NAME\" ]]; then\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n exit 1\nfi\n\n# Read the python versions from the asv.conf.json\npython_versions=$(python -c \"import asv; pythons = asv.config.Config.load('$CONF_NAME').pythons; print(' '.join(pythons))\")\n# change the \"results_dir\" in asv.conf.json to \"/output/{COMMIT_SHA}/\"\nfor version in $python_versions; do\n # Create per\u2011Python env and install ASV\n python -c \"import asv, os, pathlib\npath = pathlib.Path('/output/'\\\"$COMMIT_SHA\\\"'/''\\\"$version\\\"')\npath.mkdir(parents=True, exist_ok=True)\n\nconfig = asv.config.Config.load('$CONF_NAME')\nconfig.results_dir = str(path / 'results')\nconfig.html_dir = str(path / 'html')\nconfig.branches = ['HEAD']\n\nasv.util.write_json('$CONF_NAME', config.__dict__, api_version=config.api_version)\nasv.util.write_json(path / '$CONF_NAME', config.__dict__, api_version=config.api_version)\n\"\n micromamba run -n \"asv_${version}\" asv machine --yes --config $CONF_NAME --machine \"$ASV_MACHINE\" --os \"$ASV_OS\" --num_cpu \"$ASV_NUM_CPU\" --arch \"$ASV_ARCH\" --cpu \"$ASV_CPU\" --ram \"$ASV_RAM\"\n micromamba run -n \"asv_${version}\" asv run --show-stderr ${ASV_ARGS} --config $CONF_NAME\ndone\n\necho \"Benchmarks complete.\"\n", - "env_building_data": "#!/usr/bin/env bash\nset -euo pipefail\n\n# -------- Helpers installed for all shells --------\ninstall_profile_helpers() {\n cat >/etc/profile.d/asv_utils.sh <<'EOF'\n# asv_utils.sh \u2014 login/interactive shell helpers for ASV builds\nexport MAMBA_ROOT_PREFIX=\"${MAMBA_ROOT_PREFIX:-/opt/conda}\"\n\n# Initialize micromamba for bash shells (no-op if not present)\nif command -v micromamba >/dev/null 2>&1; then\n eval \"$(micromamba shell hook --shell=bash)\"\nfi\n\n# Find and cd into the first directory that contains an asv.*.json\ncd_asv_json_dir() {\n local match\n match=$(find . -type f -name \"asv.*.json\" | head -n 1)\n if [[ -n \"$match\" ]]; then\n cd \"$(dirname \"$match\")\" || echo \"Failed to change directory\"\n else\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n return 1\n fi\n}\n\n# Return just the conf filename (e.g., asv.conf.json)\nasv_conf_name() {\n local f\n f=$(find . -type f -name \"asv.*.json\" | head -n 1)\n [[ -n \"$f\" ]] && basename \"$f\" || return 1\n}\n\n# Build performance knobs (overridable)\nexport MAKEFLAGS=\"${MAKEFLAGS:--j$(nproc)}\"\nexport CMAKE_BUILD_PARALLEL_LEVEL=\"${CMAKE_BUILD_PARALLEL_LEVEL:-$(nproc)}\"\nexport NPY_NUM_BUILD_JOBS=\"${NPY_NUM_BUILD_JOBS:-$(nproc)}\"\n\n# Shared pip cache to speed repeated editable builds\nexport PIP_CACHE_DIR=\"${PIP_CACHE_DIR:-/opt/pipcache}\"\nmkdir -p \"$PIP_CACHE_DIR\"\nEOF\n}\n\n# -------- Persisted build variables --------\nwrite_build_vars() {\n local py_versions=\"$1\"\n local import_name=\"$2\"\n\n mkdir -p /etc/asv_env\n echo \"$py_versions\" > /etc/asv_env/py_versions\n echo \"$import_name\" > /etc/asv_env/import_name\n\n # Exported for every future shell (pkg script, interactive, etc.)\n cat >/etc/profile.d/asv_build_vars.sh </usr/local/bin/asv_smokecheck.py <<'PY'\n#!/usr/bin/env python\nimport argparse, importlib, pathlib, sys\n\ndef import_and_version(name: str):\n m = importlib.import_module(name)\n ver = getattr(m, \"__version__\", \"unknown\")\n print(f\"{name} imported ok; __version__={ver}\")\n\ndef probe_compiled(name: str, max_ext: int = 10):\n m = importlib.import_module(name)\n if not hasattr(m, \"__path__\"):\n print(\"No package __path__ (likely a single-module dist); skipping compiled probe.\")\n return\n pkg_path = pathlib.Path(list(m.__path__)[0])\n so_like = list(pkg_path.rglob(\"*.so\")) + list(pkg_path.rglob(\"*.pyd\"))\n failed = []\n for ext in so_like[:max_ext]:\n rel = ext.relative_to(pkg_path).with_suffix(\"\")\n dotted = \".\".join([name] + list(rel.parts))\n try:\n importlib.import_module(dotted)\n except Exception as e:\n failed.append((dotted, str(e)))\n if failed:\n print(\"Some compiled submodules failed to import:\")\n for d, err in failed:\n print(\" -\", d, \"->\", err)\n sys.exit(1)\n else:\n print(\"Compiled submodules (if any) import ok\")\n\ndef main():\n p = argparse.ArgumentParser()\n p.add_argument(\"--import-name\", required=True)\n p.add_argument(\"--repo-root\", default=\".\")\n p.add_argument(\"--pytest-smoke\", action=\"store_true\",\n help=\"Run a quick pytest smoke: -k 'not slow' --maxfail=1\")\n p.add_argument(\"--max-ext\", type=int, default=10)\n args = p.parse_args()\n\n import_and_version(args.import_name)\n probe_compiled(args.import_name, max_ext=args.max_ext)\n\n if args.pytest_smoke:\n import subprocess, os\n if any((pathlib.Path(args.repo_root)/p).exists() for p in (\"tests\", \"pytest.ini\", \"pyproject.toml\")):\n print(\"Running pytest smoke...\")\n rc = subprocess.call([sys.executable, \"-m\", \"pytest\", \"-q\", \"-k\", \"not slow\", \"--maxfail=1\"], cwd=args.repo_root)\n if rc != 0:\n sys.exit(rc)\n else:\n print(\"No tests detected; skipping pytest smoke.\")\n print(\"Smokecheck OK \u2705\")\n\nif __name__ == \"__main__\":\n main()\nPY\n chmod +x /usr/local/bin/asv_smokecheck.py\n}\ninstall_smokecheck\n\n# -------- Install an import-name detector CLI --------\ninstall_detect_import_name() {\n cat >/usr/local/bin/detect_import_name <<'PY'\n#!/usr/bin/env python\nimport argparse, pathlib, re, sys, subprocess, configparser, json\n\n# --- optional TOML loader (py3.11+: tomllib; else tomli if available) ---\ntry:\n import tomllib as toml\nexcept Exception:\n try:\n import tomli as toml\n except Exception:\n toml = None\n\nEXCEPTIONS = {\n # common dist\u2192import mismatches\n \"scikit-learn\": \"sklearn\",\n \"opencv-python\": \"cv2\",\n \"pyyaml\": \"yaml\",\n \"beautifulsoup4\": \"bs4\",\n \"pillow\": \"PIL\",\n \"mysqlclient\": \"MySQLdb\",\n \"psycopg2-binary\": \"psycopg2\",\n \"opencv-contrib-python\": \"cv2\",\n \"protobuf\": \"google\", # top-level package\n \"apache-beam\": \"apache_beam\",\n}\n\n# All the package names we typically query.\nEXCEPTIONS.update({\n # --- core scientific stack ---\n \"scikit-learn\": \"sklearn\",\n \"numpy\": \"numpy\",\n \"pandas\": \"pandas\",\n \"scipy\": \"scipy\",\n \"scikit-image\": \"skimage\",\n \"pywt\": \"pywt\",\n \"xarray\": \"xarray\",\n \"bottleneck\": \"bottleneck\",\n \"h5py\": \"h5py\",\n \"networkx\": \"networkx\",\n \"shapely\": \"shapely\",\n\n # --- ML / stats / optimization / viz ---\n \"optuna\": \"optuna\",\n \"arviz\": \"arviz\",\n \"pymc\": \"pymc\",\n \"kedro\": \"kedro\",\n \"modin\": \"modin\",\n \"napari\": \"napari\",\n \"deepchecks\": \"deepchecks\",\n \"voyager\": \"voyager\", # spotify/voyager\n \"warp\": \"warp\", # NVIDIA/warp\n \"newton\": \"newton\", # newton-physics/newton\n\n # --- domain / ecosystem libs ---\n \"geopandas\": \"geopandas\",\n \"cartopy\": \"cartopy\",\n \"iris\": \"iris\",\n \"anndata\": \"anndata\",\n \"scanpy\": \"scanpy\",\n \"sunpy\": \"sunpy\",\n \"pvlib-python\": \"pvlib\",\n \"PyBaMM\": \"pybamm\",\n \"momepy\": \"momepy\",\n \"satpy\": \"satpy\",\n \"pydicom\": \"pydicom\",\n \"pynetdicom\": \"pynetdicom\",\n\n # --- file formats / IO / infra ---\n \"asdf\": \"asdf\",\n \"arrow\": \"pyarrow\", # apache/arrow\n \"ArcticDB\": \"arcticdb\",\n \"arctic\": \"arctic\",\n\n # --- web / frameworks / utils ---\n \"django-components\": \"django_components\",\n \"h11\": \"h11\",\n \"tqdm\": \"tqdm\",\n \"rich\": \"rich\",\n \"posthog\": \"posthog\",\n \"datalad\": \"datalad\",\n \"ipyparallel\": \"ipyparallel\",\n\n # --- numerical / symbolic / control ---\n \"autograd\": \"autograd\",\n \"python-control\": \"control\",\n \"loopy\": \"loopy\",\n \"thermo\": \"thermo\",\n \"chempy\": \"chempy\",\n \"adaptive\": \"adaptive\",\n\n # --- scientific image / signal ---\n \"metric-learn\": \"metric_learn\",\n\n # --- quantum / physics ---\n \"Cirq\": \"cirq\",\n \"memray\": \"memray\",\n \"devito\": \"devito\",\n\n # --- bio / chem / data ---\n \"sourmash\": \"sourmash\",\n \"dipy\": \"dipy\",\n\n # --- protocol buffers / codegen / outlines ---\n \"python-betterproto\": \"betterproto\",\n \"outlines\": \"outlines\",\n\n # --- DS viz / raster ---\n \"datashader\": \"datashader\",\n \"xarray-spatial\": \"xarray_spatial\",\n\n # --- misc ---\n \"enlighten\": \"enlighten\",\n \"xorbits\": \"xorbits\",\n \"geopandas\": \"geopandas\",\n \"lmfit-py\": \"lmfit\",\n \"mdanalysis\": \"MDAnalysis\",\n \"nilearn\": \"nilearn\",\n})\n\n\nEXCLUDE_DIRS = {\n \".git\", \".hg\", \".svn\", \".tox\", \".nox\", \".venv\", \"venv\",\n \"build\", \"dist\", \"__pycache__\", \".mypy_cache\", \".pytest_cache\",\n \"docs\", \"doc\", \"site\", \"examples\", \"benchmarks\", \"tests\", \"testing\",\n}\n\ndef _norm(s: str) -> str:\n return re.sub(r\"[-_.]+\", \"\", s).lower()\n\ndef read_pyproject(root: pathlib.Path):\n cfg = {}\n p = root / \"pyproject.toml\"\n if toml and p.exists():\n try:\n cfg = toml.loads(p.read_text(encoding=\"utf-8\"))\n except Exception:\n pass\n return cfg\n\ndef read_setup_cfg(root: pathlib.Path):\n p = root / \"setup.cfg\"\n cp = configparser.ConfigParser()\n if p.exists():\n try:\n cp.read(p, encoding=\"utf-8\")\n except Exception:\n pass\n return cp\n\ndef dist_name_from_config(pyproject, setup_cfg):\n # PEP 621 name\n name = (pyproject.get(\"project\", {}) or {}).get(\"name\")\n if not name:\n # setup.cfg [metadata] name\n if setup_cfg.has_section(\"metadata\"):\n name = setup_cfg.get(\"metadata\", \"name\", fallback=None)\n # setup.py --name as last resort\n return name\n\ndef package_roots_from_config(root, pyproject, setup_cfg):\n roots = set([root])\n # setuptools package-dir mapping\n # pyproject: [tool.setuptools.package-dir] \"\" = \"src\"\n pkgdir = ((pyproject.get(\"tool\", {}) or {}).get(\"setuptools\", {}) or {}).get(\"package-dir\", {})\n if isinstance(pkgdir, dict):\n if \"\" in pkgdir:\n roots.add((root / pkgdir[\"\"]).resolve())\n for _, d in pkgdir.items():\n try:\n roots.add((root / d).resolve())\n except Exception:\n pass\n # setup.cfg [options] package_dir\n if setup_cfg.has_section(\"options\"):\n raw = setup_cfg.get(\"options\", \"package_dir\", fallback=None)\n if raw:\n # can be \"=\\nsrc\" or mapping lines\n lines = [l.strip() for l in raw.splitlines() if l.strip()]\n # accept simple \"=src\" or \"\" = \"src\"\n for ln in lines:\n m = re.match(r'^(\"?\\'?)*\\s*=?\\s*(\"?\\'?)*\\s*(?P[^#;]+)$', ln)\n if m:\n roots.add((root / m.group(\"path\").strip()).resolve())\n # setup.cfg [options.packages.find] where\n if setup_cfg.has_section(\"options.packages.find\"):\n where = setup_cfg.get(\"options.packages.find\", \"where\", fallback=None)\n if where:\n for w in re.split(r\"[,\\s]+\", where):\n if w:\n roots.add((root / w).resolve())\n return [r for r in roots if r.exists()]\n\ndef explicit_modules_from_config(pyproject, setup_cfg):\n mods = set()\n # pyproject (tool.setuptools) py-modules / packages\n st = ((pyproject.get(\"tool\", {}) or {}).get(\"setuptools\", {}) or {})\n for key in (\"py-modules\", \"packages\"):\n val = st.get(key)\n if isinstance(val, list):\n mods.update(val)\n # setup.cfg [options] py_modules / packages\n if setup_cfg.has_section(\"options\"):\n for key in (\"py_modules\", \"packages\"):\n raw = setup_cfg.get(\"options\", key, fallback=None)\n if raw:\n for tok in re.split(r\"[\\s,]+\", raw.strip()):\n if tok and tok != \"find:\":\n mods.add(tok)\n return sorted(mods)\n\ndef read_top_level_from_egg_info(root):\n # editable installs often leave ./.egg-info/top_level.txt\n for ei in root.rglob(\"*.egg-info\"):\n tl = ei / \"top_level.txt\"\n if tl.exists():\n try:\n names = [l.strip() for l in tl.read_text(encoding=\"utf-8\").splitlines() if l.strip()]\n if names:\n return names\n except Exception:\n pass\n # also consider dist-info during local builds\n for di in root.rglob(\"*.dist-info\"):\n tl = di / \"top_level.txt\"\n if tl.exists():\n try:\n names = [l.strip() for l in tl.read_text(encoding=\"utf-8\").splitlines() if l.strip()]\n if names:\n return names\n except Exception:\n pass\n return None\n\ndef walk_candidates(roots):\n \"\"\"Return set of plausible top-level import names under candidate roots.\"\"\"\n cands = set()\n for r in roots:\n for path in r.rglob(\"__init__.py\"):\n try:\n pkg_dir = path.parent\n # skip excluded dirs anywhere in the path\n if any(part in EXCLUDE_DIRS for part in pkg_dir.parts):\n continue\n # Construct package name relative to the nearest search root\n try:\n rel = pkg_dir.relative_to(r)\n except Exception:\n continue\n if not rel.parts:\n continue\n top = rel.parts[0]\n if top.startswith(\"_\"):\n # usually private tooling\n continue\n cands.add(top)\n except Exception:\n pass\n # standalone modules at top-level of roots (py_modules case)\n for mod in r.glob(\"*.py\"):\n if mod.stem not in (\"setup\",):\n cands.add(mod.stem)\n return sorted(cands)\n\ndef score_candidates(cands, dist_name):\n \"\"\"Assign a score preferring names that match the dist name.\"\"\"\n scores = {}\n n_dist = _norm(dist_name) if dist_name else None\n prefer = None\n if dist_name and dist_name.lower() in EXCEPTIONS:\n prefer = EXCEPTIONS[dist_name.lower()]\n # also try normalized exception keys (e.g. capitalization)\n for k, v in EXCEPTIONS.items():\n if _norm(k) == _norm(dist_name or \"\"):\n prefer = v\n\n for c in cands:\n s = 0\n if prefer and _norm(c) == _norm(prefer):\n s += 100\n if n_dist and _norm(c) == n_dist:\n s += 80\n if n_dist and (_norm(c).startswith(n_dist) or n_dist.startswith(_norm(c))):\n s += 20\n # shorter, simpler names get a slight bump\n s += max(0, 10 - len(c))\n scores[c] = s\n return sorted(cands, key=lambda x: (-scores.get(x, 0), x)), scores\n\ndef detect(root: str, return_all=False):\n root = pathlib.Path(root).resolve()\n\n pyproject = read_pyproject(root)\n setup_cfg = read_setup_cfg(root)\n dist_name = dist_name_from_config(pyproject, setup_cfg)\n\n # 1) top_level.txt (best signal if present)\n top = read_top_level_from_egg_info(root)\n if top:\n if return_all:\n return top\n # If multiple, score them\n ordered, _ = score_candidates(top, dist_name or \"\")\n return [ordered[0]]\n\n # 2) explicit declarations (py_modules / packages lists)\n explicit = explicit_modules_from_config(pyproject, setup_cfg)\n\n # 3) find correct search roots (src layout, package_dir, etc.)\n roots = package_roots_from_config(root, pyproject, setup_cfg)\n\n # 4) walk code to infer candidates\n walked = walk_candidates(roots)\n\n # merge explicit + walked\n cands = list(dict.fromkeys(explicit + walked)) # keep order & de-dup\n\n # 5) fallback from dist name heuristics/exceptions if still empty\n if not cands and dist_name:\n # exception or simple normalization\n guess = EXCEPTIONS.get(dist_name.lower()) or re.sub(r\"[-\\.]+\", \"_\", dist_name)\n cands = [guess]\n\n if not cands:\n return []\n\n if return_all:\n # return ordered list\n ordered, _ = score_candidates(cands, dist_name or \"\")\n return ordered\n else:\n ordered, _ = score_candidates(cands, dist_name or \"\")\n return [ordered[0]]\n\ndef main():\n ap = argparse.ArgumentParser(description=\"Detect the top-level Python import name for a repo.\")\n ap.add_argument(\"--repo-root\", default=\".\", help=\"Path to repository root\")\n ap.add_argument(\"--all\", action=\"store_true\", help=\"Print all plausible names (JSON list)\")\n args = ap.parse_args()\n\n names = detect(args.repo_root, return_all=args.all)\n if not names:\n sys.exit(1)\n if args.all:\n print(json.dumps(names))\n else:\n print(names[0])\n\nif __name__ == \"__main__\":\n main()\nPY\n chmod +x /usr/local/bin/detect_import_name\n}\n\ninstall_detect_import_name\n\n# -------- Script body --------\n\ninstall_profile_helpers\n# shellcheck disable=SC1091\nsource /etc/profile.d/asv_utils.sh\n\n# Ensure base micromamba is active for introspecting ASV config\nmicromamba activate base\n\ninstall_detect_import_name\ninstall_smokecheck\n\nIMPORT_NAME=\"$(detect_import_name || true)\"\nif [[ -z \"$IMPORT_NAME\" ]]; then\n echo \"WARN: Could not determine import name; the pkg stage will fall back to local detection.\"\nfi\n\n\n# Move into the directory that contains asv.*.json\ncd_asv_json_dir || { echo \"No 'asv.*.json' file found.\" >&2; exit 1; }\n\nCONF_NAME=\"$(asv_conf_name || true)\"\nif [[ -z \"${CONF_NAME:-}\" ]]; then\n echo \"No 'asv.*.json' file found.\" >&2\n exit 1\nfi\n\n# Make sure tomli is available in base for pyproject parsing\nmicromamba install -y -n base -c conda-forge tomli >/dev/null\n\n# Read python versions from the ASV config\nPY_VERSIONS=$(python - <&2\n exit 1\nfi\n###### END SETUP CODE ######\n\n# -----------------------------\n# Agent guidance (read-first)\n# -----------------------------\n# GOAL: For each Python version below, install the project in EDITABLE mode into env asv_{version},\n# with NO build isolation, then run health checks.\n#\n# Below this comment, you should do whatever is necessary to build the project without errors. Including (but not limited to):\n# - Add extra conda/pip dependencies needed to build this project.\n# - Run repo-specific pre-steps (e.g., submodules, generating Cython, env vars).\n# - Run arbitrary micromamba/pip commands in the target env.\n# - Set CFLAGS/CXXFLAGS/LDFLAGS if needed for this repo.\n# - Change files in the repo if needed (e.g., fix a missing #include).\n# - Anything else needed to get a successful editable install.\n#\n# MUST:\n# - Keep this script idempotent.\n# - Use: `pip install --no-build-isolation -v -e .` or `pip install -e .` or equivalent.\n# - Do not modify the SETUP CODE or helper functions below.\n#\n# DO NOT:\n# - Change env names or Python versions outside MODEL EDIT AREA.\n# - Use build isolation unless absolutely necessary.\n\n# -----------------------------\n# Helpers (do not modify)\n# -----------------------------\nlog() { printf \"\\033[1;34m[build]\\033[0m %s\\n\" \"$*\"; }\nwarn() { printf \"\\033[1;33m[warn]\\033[0m %s\\n\" \"$*\" >&2; }\ndie() { printf \"\\033[1;31m[fail]\\033[0m %s\\n\" \"$*\" >&2; exit 1; }\n\n# Conservative default parallelism (override if the repo benefits)\nexport CMAKE_BUILD_PARALLEL_LEVEL=\"${CMAKE_BUILD_PARALLEL_LEVEL:-2}\"\nexport NPY_NUM_BUILD_JOBS=\"${NPY_NUM_BUILD_JOBS:-2}\"\n\n# -----------------------------\n# Build & test across envs\n# -----------------------------\nfor version in $TARGET_VERSIONS; do\n ENV_NAME=\"asv_${version}\"\n log \"==> Building in env: $ENV_NAME (python=$version)\"\n\n if ! micromamba env list | awk '{print $1}' | grep -qx \"$ENV_NAME\"; then\n die \"Env $ENV_NAME not found. Did docker_build_env.sh run?\"\n fi\n\n # Import name resolution (kept simple for the agent)\n IMP=\"${IMPORT_NAME:-}\"\n if [[ -z \"$IMP\" ]]; then\n if ! IMP=\"$(detect_import_name --repo-root \"$REPO_ROOT\" 2>/dev/null)\"; then\n die \"Could not determine import name. Set IMPORT_NAME in /etc/profile.d/asv_build_vars.sh\"\n fi\n fi\n log \"Using import name: $IMP\"\n\n # -----------------------------\n # MODEL EDIT AREA: repo-specific tweaks (optional)\n # -----------------------------\n # Examples (uncomment if needed for this repo):\n #\n # log \"Updating submodules\"\n # git -C \"$REPO_ROOT\" submodule update --init --recursive\n #\n # log \"Installing extra system libs via conda-forge\"\n # micromamba install -y -n \"$ENV_NAME\" -c conda-forge 'openblas' 'blas=*=openblas' 'libopenmp'\n #\n # log \"Pre-generating Cython sources\"\n # micromamba run -n \"$ENV_NAME\" python -m cython --version\n #\n # export CFLAGS=\"${CFLAGS:-}\"\n # export CXXFLAGS=\"${CXXFLAGS:-}\"\n # export LDFLAGS=\"${LDFLAGS:-}\"\n # -----------------------------\n\n # Install some generic packages needed for building/testing.\n micromamba install -y -n \"$ENV_NAME\" -c conda-forge pip git conda mamba libmambapy \\\n numpy scipy cython joblib threadpoolctl pytest \\\n compilers meson-python cmake ninja pkg-config tomli\n\n # Editable install (no build isolation preferrably). Toolchain lives in the env already.\n log \"Editable install with --no-build-isolation\"\n PIP_NO_BUILD_ISOLATION=1 micromamba run -n \"$ENV_NAME\" python -m pip install --no-build-isolation -v -e \"$REPO_ROOT\"\n\n # Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1)\n log \"Running smoke checks\"\n micromamba run -n \"$ENV_NAME\" asv_smokecheck.py --import-name \\\"$IMP\\\" --repo-root \\\"$REPO_ROOT\\\" ${RUN_PYTEST_SMOKE:+--pytest-smoke}\n\n echo \"::import_name=${IMP}::env=${ENV_NAME}\"\ndone\n\nlog \"All builds complete \u2705\"\n", - "dockerfile_data": "# syntax=docker/dockerfile:1.7\n\nFROM buildpack-deps:jammy AS base\n\nARG REPO_URL\nARG COMMIT_SHA\n\nRUN apt-get update && \\\n apt-get install -y --no-install-recommends \\\n curl git build-essential jq cmake ninja-build && \\\n rm -rf /var/lib/apt/lists/*\n\nRUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest \\\n | tar -xvj -C /usr/local/bin --strip-components=1 bin/micromamba\n\nENV MAMBA_ROOT_PREFIX=/opt/conda \\\n PATH=/opt/conda/bin:$PATH \\\n MAMBA_DOCKERFILE_ACTIVATE=1 \\\n OPENBLAS_NUM_THREADS=1 \\\n MKL_NUM_THREADS=1 \\\n OMP_NUM_THREADS=1\n\nRUN micromamba install -y -p $MAMBA_ROOT_PREFIX -c conda-forge \\\n python=3.10 \\\n git asv pyperf mamba conda libmambapy jq && \\\n micromamba clean --all --yes\n\nRUN mkdir -p /workspace /output\nWORKDIR /workspace\n\nFROM base AS env\nARG REPO_URL\nARG COMMIT_SHA\n\n# Entrypoint is inherited by pkg\nCOPY entrypoint.sh /entrypoint.sh\nRUN chmod +x /entrypoint.sh\nENTRYPOINT [\"/entrypoint.sh\"]\n\nRUN git clone \"$REPO_URL\" /workspace/repo\nWORKDIR /workspace/repo\nRUN git checkout \"$COMMIT_SHA\"\nLABEL vcs.url=\"$REPO_URL\" vcs.ref=\"$COMMIT_SHA\"\n\n# ---- ENV STAGE: create envs, persist vars, install smoke tool ----\nCOPY docker_build_env.sh /workspace/repo/docker_build_env.sh\nRUN chmod +x /workspace/repo/docker_build_env.sh && \\\n /workspace/repo/docker_build_env.sh\n\n\nFROM env AS pkg\n\n# ---- PKG STAGE: build+test the package for each ASV Python ----\nCOPY docker_build_pkg.sh /workspace/repo/docker_build_pkg.sh\nRUN chmod +x /workspace/repo/docker_build_pkg.sh && \\\n /workspace/repo/docker_build_pkg.sh\n# If you want to restrict to one version at build time, replace with:\n# RUN PY_VERSION=3.10 /workspace/repo/docker_build_pkg.sh\n", - "entrypoint_data": "#!/usr/bin/env bash\n# set -euo pipefail\nset -x\n: \"${ASV_ARGS:?Need to set ASV_ARGS}\"\n: \"${ASV_MACHINE:=?Need to set ASV_MACHINE}\"\n: \"${ASV_OS:=?Need to set ASV_OS}\"\n: \"${ASV_NUM_CPU:=?Need to set ASV_NUM_CPU}\"\n: \"${ASV_ARCH:=?Need to set ASV_ARCH}\"\n: \"${ASV_CPU:=?Need to set ASV_CPU}\"\n: \"${ASV_RAM:=?Need to set ASV_RAM}\"\n\n\ncd_asv_json_dir() {\n local match\n match=$(find . -type f -name \"asv.*.json\" | head -n 1)\n\n if [[ -n \"$match\" ]]; then\n local dir\n dir=$(dirname \"$match\")\n cd \"$dir\" || echo \"Failed to change directory to $dir\"\n else\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n fi\n}\n\neval \"$(micromamba shell hook --shell=bash)\"\n\nmicromamba activate base\nROOT_PATH=${PWD}\ncd_asv_json_dir || exit 1\n\n# the conf name is one of \"asv.conf.json\" or \"asv.ci.conf.json\" or \"asv.*.json\"\nCONF_NAME=$(basename \"$(find . -type f -name \"asv.*.json\" | head -n 1)\")\nif [[ -z \"$CONF_NAME\" ]]; then\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n exit 1\nfi\n\n# Read the python versions from the asv.conf.json\npython_versions=$(python -c \"import asv; pythons = asv.config.Config.load('$CONF_NAME').pythons; print(' '.join(pythons))\")\n# change the \"results_dir\" in asv.conf.json to \"/output/{COMMIT_SHA}/\"\nfor version in $python_versions; do\n # Create per\u2011Python env and install ASV\n python -c \"import asv, os, pathlib\npath = pathlib.Path('/output/'\\\"$COMMIT_SHA\\\"'/''\\\"$version\\\"')\npath.mkdir(parents=True, exist_ok=True)\n\nconfig = asv.config.Config.load('$CONF_NAME')\nconfig.results_dir = str(path / 'results')\nconfig.html_dir = str(path / 'html')\nconfig.branches = ['HEAD']\n\nasv.util.write_json('$CONF_NAME', config.__dict__, api_version=config.api_version)\nasv.util.write_json(path / '$CONF_NAME', config.__dict__, api_version=config.api_version)\n\"\n micromamba run -n \"asv_${version}\" asv machine --yes --config $CONF_NAME --machine \"$ASV_MACHINE\" --os \"$ASV_OS\" --num_cpu \"$ASV_NUM_CPU\" --arch \"$ASV_ARCH\" --cpu \"$ASV_CPU\" --ram \"$ASV_RAM\"\n micromamba run -n \"asv_${version}\" asv run --show-stderr ${ASV_ARGS} --config $CONF_NAME\ndone\n\necho \"Benchmarks complete.\"\n", - "env_building_data": "#!/usr/bin/env bash\nset -euo pipefail\n\n# -------- Helpers installed for all shells --------\ninstall_profile_helpers() {\n cat >/etc/profile.d/asv_utils.sh <<'EOF'\n# asv_utils.sh \u2014 login/interactive shell helpers for ASV builds\nexport MAMBA_ROOT_PREFIX=\"${MAMBA_ROOT_PREFIX:-/opt/conda}\"\n\n# Initialize micromamba for bash shells (no-op if not present)\nif command -v micromamba >/dev/null 2>&1; then\n eval \"$(micromamba shell hook --shell=bash)\"\nfi\n\n# Find and cd into the first directory that contains an asv.*.json\ncd_asv_json_dir() {\n local match\n match=$(find . -type f -name \"asv.*.json\" | head -n 1)\n if [[ -n \"$match\" ]]; then\n cd \"$(dirname \"$match\")\" || echo \"Failed to change directory\"\n else\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n return 1\n fi\n}\n\n# Return just the conf filename (e.g., asv.conf.json)\nasv_conf_name() {\n local f\n f=$(find . -type f -name \"asv.*.json\" | head -n 1)\n [[ -n \"$f\" ]] && basename \"$f\" || return 1\n}\n\n# Build performance knobs (overridable)\nexport MAKEFLAGS=\"${MAKEFLAGS:--j$(nproc)}\"\nexport CMAKE_BUILD_PARALLEL_LEVEL=\"${CMAKE_BUILD_PARALLEL_LEVEL:-$(nproc)}\"\nexport NPY_NUM_BUILD_JOBS=\"${NPY_NUM_BUILD_JOBS:-$(nproc)}\"\n\n# Shared pip cache to speed repeated editable builds\nexport PIP_CACHE_DIR=\"${PIP_CACHE_DIR:-/opt/pipcache}\"\nmkdir -p \"$PIP_CACHE_DIR\"\nEOF\n}\n\n# -------- Persisted build variables --------\nwrite_build_vars() {\n local py_versions=\"$1\"\n local import_name=\"$2\"\n\n mkdir -p /etc/asv_env\n echo \"$py_versions\" > /etc/asv_env/py_versions\n echo \"$import_name\" > /etc/asv_env/import_name\n\n # Exported for every future shell (pkg script, interactive, etc.)\n cat >/etc/profile.d/asv_build_vars.sh </usr/local/bin/asv_smokecheck.py <<'PY'\n#!/usr/bin/env python\nimport argparse, importlib, pathlib, sys\n\ndef import_and_version(name: str):\n m = importlib.import_module(name)\n ver = getattr(m, \"__version__\", \"unknown\")\n print(f\"{name} imported ok; __version__={ver}\")\n\ndef probe_compiled(name: str, max_ext: int = 10):\n m = importlib.import_module(name)\n if not hasattr(m, \"__path__\"):\n print(\"No package __path__ (likely a single-module dist); skipping compiled probe.\")\n return\n pkg_path = pathlib.Path(list(m.__path__)[0])\n so_like = list(pkg_path.rglob(\"*.so\")) + list(pkg_path.rglob(\"*.pyd\"))\n failed = []\n for ext in so_like[:max_ext]:\n rel = ext.relative_to(pkg_path).with_suffix(\"\")\n dotted = \".\".join([name] + list(rel.parts))\n try:\n importlib.import_module(dotted)\n except Exception as e:\n failed.append((dotted, str(e)))\n if failed:\n print(\"Some compiled submodules failed to import:\")\n for d, err in failed:\n print(\" -\", d, \"->\", err)\n sys.exit(1)\n else:\n print(\"Compiled submodules (if any) import ok\")\n\ndef main():\n p = argparse.ArgumentParser()\n p.add_argument(\"--import-name\", required=True)\n p.add_argument(\"--repo-root\", default=\".\")\n p.add_argument(\"--pytest-smoke\", action=\"store_true\",\n help=\"Run a quick pytest smoke: -k 'not slow' --maxfail=1\")\n p.add_argument(\"--max-ext\", type=int, default=10)\n args = p.parse_args()\n\n import_and_version(args.import_name)\n probe_compiled(args.import_name, max_ext=args.max_ext)\n\n if args.pytest_smoke:\n import subprocess, os\n if any((pathlib.Path(args.repo_root)/p).exists() for p in (\"tests\", \"pytest.ini\", \"pyproject.toml\")):\n print(\"Running pytest smoke...\")\n rc = subprocess.call([sys.executable, \"-m\", \"pytest\", \"-q\", \"-k\", \"not slow\", \"--maxfail=1\"], cwd=args.repo_root)\n if rc != 0:\n sys.exit(rc)\n else:\n print(\"No tests detected; skipping pytest smoke.\")\n print(\"Smokecheck OK \u2705\")\n\nif __name__ == \"__main__\":\n main()\nPY\n chmod +x /usr/local/bin/asv_smokecheck.py\n}\ninstall_smokecheck\n\n# -------- Install an import-name detector CLI --------\ninstall_detect_import_name() {\n cat >/usr/local/bin/detect_import_name <<'PY'\n#!/usr/bin/env python\nimport argparse, pathlib, re, sys, subprocess, configparser, json\n\n# --- optional TOML loader (py3.11+: tomllib; else tomli if available) ---\ntry:\n import tomllib as toml\nexcept Exception:\n try:\n import tomli as toml\n except Exception:\n toml = None\n\nEXCEPTIONS = {\n # common dist\u2192import mismatches\n \"scikit-learn\": \"sklearn\",\n \"opencv-python\": \"cv2\",\n \"pyyaml\": \"yaml\",\n \"beautifulsoup4\": \"bs4\",\n \"pillow\": \"PIL\",\n \"mysqlclient\": \"MySQLdb\",\n \"psycopg2-binary\": \"psycopg2\",\n \"opencv-contrib-python\": \"cv2\",\n \"protobuf\": \"google\", # top-level package\n \"apache-beam\": \"apache_beam\",\n}\n\n# All the package names we typically query.\nEXCEPTIONS.update({\n # --- core scientific stack ---\n \"scikit-learn\": \"sklearn\",\n \"numpy\": \"numpy\",\n \"pandas\": \"pandas\",\n \"scipy\": \"scipy\",\n \"scikit-image\": \"skimage\",\n \"pywt\": \"pywt\",\n \"xarray\": \"xarray\",\n \"bottleneck\": \"bottleneck\",\n \"h5py\": \"h5py\",\n \"networkx\": \"networkx\",\n \"shapely\": \"shapely\",\n\n # --- ML / stats / optimization / viz ---\n \"optuna\": \"optuna\",\n \"arviz\": \"arviz\",\n \"pymc\": \"pymc\",\n \"kedro\": \"kedro\",\n \"modin\": \"modin\",\n \"napari\": \"napari\",\n \"deepchecks\": \"deepchecks\",\n \"voyager\": \"voyager\", # spotify/voyager\n \"warp\": \"warp\", # NVIDIA/warp\n \"newton\": \"newton\", # newton-physics/newton\n\n # --- domain / ecosystem libs ---\n \"geopandas\": \"geopandas\",\n \"cartopy\": \"cartopy\",\n \"iris\": \"iris\",\n \"anndata\": \"anndata\",\n \"scanpy\": \"scanpy\",\n \"sunpy\": \"sunpy\",\n \"pvlib-python\": \"pvlib\",\n \"PyBaMM\": \"pybamm\",\n \"momepy\": \"momepy\",\n \"satpy\": \"satpy\",\n \"pydicom\": \"pydicom\",\n \"pynetdicom\": \"pynetdicom\",\n\n # --- file formats / IO / infra ---\n \"asdf\": \"asdf\",\n \"arrow\": \"pyarrow\", # apache/arrow\n \"ArcticDB\": \"arcticdb\",\n \"arctic\": \"arctic\",\n\n # --- web / frameworks / utils ---\n \"django-components\": \"django_components\",\n \"h11\": \"h11\",\n \"tqdm\": \"tqdm\",\n \"rich\": \"rich\",\n \"posthog\": \"posthog\",\n \"datalad\": \"datalad\",\n \"ipyparallel\": \"ipyparallel\",\n\n # --- numerical / symbolic / control ---\n \"autograd\": \"autograd\",\n \"python-control\": \"control\",\n \"loopy\": \"loopy\",\n \"thermo\": \"thermo\",\n \"chempy\": \"chempy\",\n \"adaptive\": \"adaptive\",\n\n # --- scientific image / signal ---\n \"metric-learn\": \"metric_learn\",\n\n # --- quantum / physics ---\n \"Cirq\": \"cirq\",\n \"memray\": \"memray\",\n \"devito\": \"devito\",\n\n # --- bio / chem / data ---\n \"sourmash\": \"sourmash\",\n \"dipy\": \"dipy\",\n\n # --- protocol buffers / codegen / outlines ---\n \"python-betterproto\": \"betterproto\",\n \"outlines\": \"outlines\",\n\n # --- DS viz / raster ---\n \"datashader\": \"datashader\",\n \"xarray-spatial\": \"xarray_spatial\",\n\n # --- misc ---\n \"enlighten\": \"enlighten\",\n \"xorbits\": \"xorbits\",\n \"geopandas\": \"geopandas\",\n \"lmfit-py\": \"lmfit\",\n \"mdanalysis\": \"MDAnalysis\",\n \"nilearn\": \"nilearn\",\n})\n\n\nEXCLUDE_DIRS = {\n \".git\", \".hg\", \".svn\", \".tox\", \".nox\", \".venv\", \"venv\",\n \"build\", \"dist\", \"__pycache__\", \".mypy_cache\", \".pytest_cache\",\n \"docs\", \"doc\", \"site\", \"examples\", \"benchmarks\", \"tests\", \"testing\",\n}\n\ndef _norm(s: str) -> str:\n return re.sub(r\"[-_.]+\", \"\", s).lower()\n\ndef read_pyproject(root: pathlib.Path):\n cfg = {}\n p = root / \"pyproject.toml\"\n if toml and p.exists():\n try:\n cfg = toml.loads(p.read_text(encoding=\"utf-8\"))\n except Exception:\n pass\n return cfg\n\ndef read_setup_cfg(root: pathlib.Path):\n p = root / \"setup.cfg\"\n cp = configparser.ConfigParser()\n if p.exists():\n try:\n cp.read(p, encoding=\"utf-8\")\n except Exception:\n pass\n return cp\n\ndef dist_name_from_config(pyproject, setup_cfg):\n # PEP 621 name\n name = (pyproject.get(\"project\", {}) or {}).get(\"name\")\n if not name:\n # setup.cfg [metadata] name\n if setup_cfg.has_section(\"metadata\"):\n name = setup_cfg.get(\"metadata\", \"name\", fallback=None)\n # setup.py --name as last resort\n return name\n\ndef package_roots_from_config(root, pyproject, setup_cfg):\n roots = set([root])\n # setuptools package-dir mapping\n # pyproject: [tool.setuptools.package-dir] \"\" = \"src\"\n pkgdir = ((pyproject.get(\"tool\", {}) or {}).get(\"setuptools\", {}) or {}).get(\"package-dir\", {})\n if isinstance(pkgdir, dict):\n if \"\" in pkgdir:\n roots.add((root / pkgdir[\"\"]).resolve())\n for _, d in pkgdir.items():\n try:\n roots.add((root / d).resolve())\n except Exception:\n pass\n # setup.cfg [options] package_dir\n if setup_cfg.has_section(\"options\"):\n raw = setup_cfg.get(\"options\", \"package_dir\", fallback=None)\n if raw:\n # can be \"=\\nsrc\" or mapping lines\n lines = [l.strip() for l in raw.splitlines() if l.strip()]\n # accept simple \"=src\" or \"\" = \"src\"\n for ln in lines:\n m = re.match(r'^(\"?\\'?)*\\s*=?\\s*(\"?\\'?)*\\s*(?P[^#;]+)$', ln)\n if m:\n roots.add((root / m.group(\"path\").strip()).resolve())\n # setup.cfg [options.packages.find] where\n if setup_cfg.has_section(\"options.packages.find\"):\n where = setup_cfg.get(\"options.packages.find\", \"where\", fallback=None)\n if where:\n for w in re.split(r\"[,\\s]+\", where):\n if w:\n roots.add((root / w).resolve())\n return [r for r in roots if r.exists()]\n\ndef explicit_modules_from_config(pyproject, setup_cfg):\n mods = set()\n # pyproject (tool.setuptools) py-modules / packages\n st = ((pyproject.get(\"tool\", {}) or {}).get(\"setuptools\", {}) or {})\n for key in (\"py-modules\", \"packages\"):\n val = st.get(key)\n if isinstance(val, list):\n mods.update(val)\n # setup.cfg [options] py_modules / packages\n if setup_cfg.has_section(\"options\"):\n for key in (\"py_modules\", \"packages\"):\n raw = setup_cfg.get(\"options\", key, fallback=None)\n if raw:\n for tok in re.split(r\"[\\s,]+\", raw.strip()):\n if tok and tok != \"find:\":\n mods.add(tok)\n return sorted(mods)\n\ndef read_top_level_from_egg_info(root):\n # editable installs often leave ./.egg-info/top_level.txt\n for ei in root.rglob(\"*.egg-info\"):\n tl = ei / \"top_level.txt\"\n if tl.exists():\n try:\n names = [l.strip() for l in tl.read_text(encoding=\"utf-8\").splitlines() if l.strip()]\n if names:\n return names\n except Exception:\n pass\n # also consider dist-info during local builds\n for di in root.rglob(\"*.dist-info\"):\n tl = di / \"top_level.txt\"\n if tl.exists():\n try:\n names = [l.strip() for l in tl.read_text(encoding=\"utf-8\").splitlines() if l.strip()]\n if names:\n return names\n except Exception:\n pass\n return None\n\ndef walk_candidates(roots):\n \"\"\"Return set of plausible top-level import names under candidate roots.\"\"\"\n cands = set()\n for r in roots:\n for path in r.rglob(\"__init__.py\"):\n try:\n pkg_dir = path.parent\n # skip excluded dirs anywhere in the path\n if any(part in EXCLUDE_DIRS for part in pkg_dir.parts):\n continue\n # Construct package name relative to the nearest search root\n try:\n rel = pkg_dir.relative_to(r)\n except Exception:\n continue\n if not rel.parts:\n continue\n top = rel.parts[0]\n if top.startswith(\"_\"):\n # usually private tooling\n continue\n cands.add(top)\n except Exception:\n pass\n # standalone modules at top-level of roots (py_modules case)\n for mod in r.glob(\"*.py\"):\n if mod.stem not in (\"setup\",):\n cands.add(mod.stem)\n return sorted(cands)\n\ndef score_candidates(cands, dist_name):\n \"\"\"Assign a score preferring names that match the dist name.\"\"\"\n scores = {}\n n_dist = _norm(dist_name) if dist_name else None\n prefer = None\n if dist_name and dist_name.lower() in EXCEPTIONS:\n prefer = EXCEPTIONS[dist_name.lower()]\n # also try normalized exception keys (e.g. capitalization)\n for k, v in EXCEPTIONS.items():\n if _norm(k) == _norm(dist_name or \"\"):\n prefer = v\n\n for c in cands:\n s = 0\n if prefer and _norm(c) == _norm(prefer):\n s += 100\n if n_dist and _norm(c) == n_dist:\n s += 80\n if n_dist and (_norm(c).startswith(n_dist) or n_dist.startswith(_norm(c))):\n s += 20\n # shorter, simpler names get a slight bump\n s += max(0, 10 - len(c))\n scores[c] = s\n return sorted(cands, key=lambda x: (-scores.get(x, 0), x)), scores\n\ndef detect(root: str, return_all=False):\n root = pathlib.Path(root).resolve()\n\n pyproject = read_pyproject(root)\n setup_cfg = read_setup_cfg(root)\n dist_name = dist_name_from_config(pyproject, setup_cfg)\n\n # 1) top_level.txt (best signal if present)\n top = read_top_level_from_egg_info(root)\n if top:\n if return_all:\n return top\n # If multiple, score them\n ordered, _ = score_candidates(top, dist_name or \"\")\n return [ordered[0]]\n\n # 2) explicit declarations (py_modules / packages lists)\n explicit = explicit_modules_from_config(pyproject, setup_cfg)\n\n # 3) find correct search roots (src layout, package_dir, etc.)\n roots = package_roots_from_config(root, pyproject, setup_cfg)\n\n # 4) walk code to infer candidates\n walked = walk_candidates(roots)\n\n # merge explicit + walked\n cands = list(dict.fromkeys(explicit + walked)) # keep order & de-dup\n\n # 5) fallback from dist name heuristics/exceptions if still empty\n if not cands and dist_name:\n # exception or simple normalization\n guess = EXCEPTIONS.get(dist_name.lower()) or re.sub(r\"[-\\.]+\", \"_\", dist_name)\n cands = [guess]\n\n if not cands:\n return []\n\n if return_all:\n # return ordered list\n ordered, _ = score_candidates(cands, dist_name or \"\")\n return ordered\n else:\n ordered, _ = score_candidates(cands, dist_name or \"\")\n return [ordered[0]]\n\ndef main():\n ap = argparse.ArgumentParser(description=\"Detect the top-level Python import name for a repo.\")\n ap.add_argument(\"--repo-root\", default=\".\", help=\"Path to repository root\")\n ap.add_argument(\"--all\", action=\"store_true\", help=\"Print all plausible names (JSON list)\")\n args = ap.parse_args()\n\n names = detect(args.repo_root, return_all=args.all)\n if not names:\n sys.exit(1)\n if args.all:\n print(json.dumps(names))\n else:\n print(names[0])\n\nif __name__ == \"__main__\":\n main()\nPY\n chmod +x /usr/local/bin/detect_import_name\n}\n\ninstall_detect_import_name\n\n# -------- Script body --------\n\ninstall_profile_helpers\n# shellcheck disable=SC1091\nsource /etc/profile.d/asv_utils.sh\n\n# Ensure base micromamba is active for introspecting ASV config\nmicromamba activate base\n\ninstall_detect_import_name\ninstall_smokecheck\n\nIMPORT_NAME=\"$(detect_import_name || true)\"\nif [[ -z \"$IMPORT_NAME\" ]]; then\n echo \"WARN: Could not determine import name; the pkg stage will fall back to local detection.\"\nfi\n\n\n# Move into the directory that contains asv.*.json\ncd_asv_json_dir || { echo \"No 'asv.*.json' file found.\" >&2; exit 1; }\n\nCONF_NAME=\"$(asv_conf_name || true)\"\nif [[ -z \"${CONF_NAME:-}\" ]]; then\n echo \"No 'asv.*.json' file found.\" >&2\n exit 1\nfi\n\n# Make sure tomli is available in base for pyproject parsing\nmicromamba install -y -n base -c conda-forge tomli >/dev/null\n\n# Read python versions from the ASV config\nPY_VERSIONS=$(python - <&2\n exit 1\nfi\n###### END SETUP CODE ######\n\n# -----------------------------\n# Agent guidance (read-first)\n# -----------------------------\n# GOAL: For each Python version below, install the project in EDITABLE mode into env asv_{version},\n# with NO build isolation, then run health checks.\n#\n# Below this comment, you should do whatever is necessary to build the project without errors. Including (but not limited to):\n# - Add extra conda/pip dependencies needed to build this project.\n# - Run repo-specific pre-steps (e.g., submodules, generating Cython, env vars).\n# - Run arbitrary micromamba/pip commands in the target env.\n# - Set CFLAGS/CXXFLAGS/LDFLAGS if needed for this repo.\n# - Change files in the repo if needed (e.g., fix a missing #include).\n# - Anything else needed to get a successful editable install.\n#\n# MUST:\n# - Keep this script idempotent.\n# - Use: `pip install --no-build-isolation -v -e .` or `pip install -e .` or equivalent.\n# - Do not modify the SETUP CODE or helper functions below.\n#\n# DO NOT:\n# - Change env names or Python versions outside MODEL EDIT AREA.\n# - Use build isolation unless absolutely necessary.\n\n# -----------------------------\n# Helpers (do not modify)\n# -----------------------------\nlog() { printf \"\u001b[1;34m[build]\u001b[0m %s\n\" \"$*\"; }\nwarn() { printf \"\u001b[1;33m[warn]\u001b[0m %s\n\" \"$*\" >&2; }\ndie() { printf \"\u001b[1;31m[fail]\u001b[0m %s\n\" \"$*\" >&2; exit 1; }\n\n# Conservative default parallelism (override if the repo benefits)\nexport CMAKE_BUILD_PARALLEL_LEVEL=\"${CMAKE_BUILD_PARALLEL_LEVEL:-2}\"\nexport NPY_NUM_BUILD_JOBS=\"${NPY_NUM_BUILD_JOBS:-2}\"\n\n# -----------------------------\n# Build & test across envs\n# -----------------------------\nfor version in $TARGET_VERSIONS; do\n ENV_NAME=\"asv_${version}\"\n log \"==> Building in env: $ENV_NAME (python=$version)\"\n\n if ! micromamba env list | awk '{print $1}' | grep -qx \"$ENV_NAME\"; then\n die \"Env $ENV_NAME not found. Did docker_build_env.sh run?\"\n fi\n\n # Import name resolution (kept simple for the agent)\n IMP=\"${IMPORT_NAME:-}\"\n if [[ -z \"$IMP\" ]]; then\n if ! IMP=\"$(asv_detect_import_name --repo-root \"$REPO_ROOT\" 2>/dev/null)\"; then\n die \"Could not determine import name. Set IMPORT_NAME in /etc/profile.d/asv_build_vars.sh\"\n fi\n fi\n log \"Using import name: $IMP\"\n\n # -----------------------------\n # MODEL EDIT AREA: repo-specific tweaks (optional)\n # -----------------------------\n # Examples (uncomment if needed for this repo):\n #\n # log \"Updating submodules\"\n # git -C \"$REPO_ROOT\" submodule update --init --recursive\n #\n # log \"Installing extra system libs via conda-forge\"\n # micromamba install -y -n \"$ENV_NAME\" -c conda-forge 'openblas' 'blas=*=openblas' 'libopenmp'\n #\n # log \"Pre-generating Cython sources\"\n # micromamba run -n \"$ENV_NAME\" python -m cython --version\n #\n # export CFLAGS=\"${CFLAGS:-}\"\n # export CXXFLAGS=\"${CXXFLAGS:-}\"\n # export LDFLAGS=\"${LDFLAGS:-}\"\n # -----------------------------\n\n # Install some basic micromamba packages.\n\n micromamba install -y -n \"$ENV_NAME\" -c conda-forge git conda mamba \"libmambapy<=1.9.9\" numpy scipy \"cython<3\" joblib threadpoolctl pytest compilers meson-python\n # if maintainer/install_all.sh exists run it with develop\n if [[ -f \"maintainer/install_all.sh\" ]]; then\n micromamba activate \"$ENV_NAME\"\n working_dir=$(pwd)\n cd \"$ROOT_PATH\" || exit 1\n bash maintainer/install_all.sh develop\n cd \"$working_dir\" || exit 1\n else\n # Editable install (no build isolation preferrably). Toolchain lives in the env already.\n log \"Editable install with --no-build-isolation\"\n PIP_NO_BUILD_ISOLATION=1 micromamba run -n \"$ENV_NAME\" python -m pip install --no-build-isolation -v -e \"$REPO_ROOT\"\n fi\n\n\n # Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1)\n log \"Running smoke checks\"\n micromamba run -n \"$ENV_NAME\" asv_smokecheck.py --import-name \"$IMP\" --repo-root \"$REPO_ROOT\" ${RUN_PYTEST_SMOKE:+--pytest-smoke}\n\n # Machine-readable markers (useful in logs)\n echo \"::import_name=${IMP}::env=${ENV_NAME}\"\ndone\n\nlog \"All builds complete \u2705\"", - "dockerfile_data": "# syntax=docker/dockerfile:1.7\n\nFROM buildpack-deps:jammy AS base\n\nARG REPO_URL\nARG COMMIT_SHA\n\nRUN apt-get update && \\\n apt-get install -y --no-install-recommends \\\n curl git build-essential jq cmake ninja-build && \\\n rm -rf /var/lib/apt/lists/*\n\nRUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest \\\n | tar -xvj -C /usr/local/bin --strip-components=1 bin/micromamba\n\nENV MAMBA_ROOT_PREFIX=/opt/conda \\\n PATH=/opt/conda/bin:$PATH \\\n MAMBA_DOCKERFILE_ACTIVATE=1 \\\n OPENBLAS_NUM_THREADS=1 \\\n MKL_NUM_THREADS=1 \\\n OMP_NUM_THREADS=1\n\nRUN micromamba install -y -p $MAMBA_ROOT_PREFIX -c conda-forge \\\n python=3.10 \\\n git asv pyperf mamba conda libmambapy jq && \\\n micromamba clean --all --yes\n\nRUN mkdir -p /workspace /output\nWORKDIR /workspace\n\nFROM base AS env\nARG REPO_URL\nARG COMMIT_SHA\n\n# Entrypoint is inherited by pkg\nCOPY entrypoint.sh /entrypoint.sh\nRUN chmod +x /entrypoint.sh\nENTRYPOINT [\"/entrypoint.sh\"]\n\nRUN git clone \"$REPO_URL\" /workspace/repo\nWORKDIR /workspace/repo\nRUN git checkout \"$COMMIT_SHA\"\nLABEL vcs.url=\"$REPO_URL\" vcs.ref=\"$COMMIT_SHA\"\n\n# ---- ENV STAGE: create envs, persist vars, install smoke tool ----\nCOPY docker_build_env.sh /workspace/repo/docker_build_env.sh\nRUN chmod +x /workspace/repo/docker_build_env.sh && \\\n /workspace/repo/docker_build_env.sh\n\n\nFROM env AS pkg\n\n# ---- PKG STAGE: build+test the package for each ASV Python ----\nCOPY docker_build_pkg.sh /workspace/repo/docker_build_pkg.sh\nRUN chmod +x /workspace/repo/docker_build_pkg.sh && \\\n /workspace/repo/docker_build_pkg.sh\n# If you want to restrict to one version at build time, replace with:\n# RUN PY_VERSION=3.10 /workspace/repo/docker_build_pkg.sh\n", - "entrypoint_data": "#!/usr/bin/env bash\n# set -euo pipefail\nset -x\n: \"${ASV_ARGS:?Need to set ASV_ARGS}\"\n: \"${ASV_MACHINE:=?Need to set ASV_MACHINE}\"\n: \"${ASV_OS:=?Need to set ASV_OS}\"\n: \"${ASV_NUM_CPU:=?Need to set ASV_NUM_CPU}\"\n: \"${ASV_ARCH:=?Need to set ASV_ARCH}\"\n: \"${ASV_CPU:=?Need to set ASV_CPU}\"\n: \"${ASV_RAM:=?Need to set ASV_RAM}\"\n\n\ncd_asv_json_dir() {\n local match\n match=$(find . -type f -name \"asv.*.json\" | head -n 1)\n\n if [[ -n \"$match\" ]]; then\n local dir\n dir=$(dirname \"$match\")\n cd \"$dir\" || echo \"Failed to change directory to $dir\"\n else\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n fi\n}\n\neval \"$(micromamba shell hook --shell=bash)\"\n\nmicromamba activate base\nROOT_PATH=${PWD}\ncd_asv_json_dir || exit 1\n\n# the conf name is one of \"asv.conf.json\" or \"asv.ci.conf.json\" or \"asv.*.json\"\nCONF_NAME=$(basename \"$(find . -type f -name \"asv.*.json\" | head -n 1)\")\nif [[ -z \"$CONF_NAME\" ]]; then\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n exit 1\nfi\n\n# Read the python versions from the asv.conf.json\npython_versions=$(python -c \"import asv; pythons = asv.config.Config.load('$CONF_NAME').pythons; print(' '.join(pythons))\")\n# change the \"results_dir\" in asv.conf.json to \"/output/{COMMIT_SHA}/\"\nfor version in $python_versions; do\n # Create per\u2011Python env and install ASV\n python -c \"import asv, os, pathlib\npath = pathlib.Path('/output/'\\\"$COMMIT_SHA\\\"'/''\\\"$version\\\"')\npath.mkdir(parents=True, exist_ok=True)\n\nconfig = asv.config.Config.load('$CONF_NAME')\nconfig.results_dir = str(path / 'results')\nconfig.html_dir = str(path / 'html')\nconfig.branches = ['HEAD']\n\nasv.util.write_json('$CONF_NAME', config.__dict__, api_version=config.api_version)\nasv.util.write_json(path / '$CONF_NAME', config.__dict__, api_version=config.api_version)\n\"\n micromamba run -n \"asv_${version}\" asv machine --yes --config $CONF_NAME --machine \"$ASV_MACHINE\" --os \"$ASV_OS\" --num_cpu \"$ASV_NUM_CPU\" --arch \"$ASV_ARCH\" --cpu \"$ASV_CPU\" --ram \"$ASV_RAM\"\n micromamba run -n \"asv_${version}\" asv run --show-stderr ${ASV_ARGS} --config $CONF_NAME\ndone\n\necho \"Benchmarks complete.\"\n", - "env_building_data": "#!/usr/bin/env bash\nset -euo pipefail\n\n# -------- Helpers installed for all shells --------\ninstall_profile_helpers() {\n cat >/etc/profile.d/asv_utils.sh <<'EOF'\n# asv_utils.sh \u2014 login/interactive shell helpers for ASV builds\nexport MAMBA_ROOT_PREFIX=\"${MAMBA_ROOT_PREFIX:-/opt/conda}\"\n\n# Initialize micromamba for bash shells (no-op if not present)\nif command -v micromamba >/dev/null 2>&1; then\n eval \"$(micromamba shell hook --shell=bash)\"\nfi\n\n# Find and cd into the first directory that contains an asv.*.json\ncd_asv_json_dir() {\n local match\n match=$(find . -type f -name \"asv.*.json\" | head -n 1)\n if [[ -n \"$match\" ]]; then\n cd \"$(dirname \"$match\")\" || echo \"Failed to change directory\"\n else\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n return 1\n fi\n}\n\n# Return just the conf filename (e.g., asv.conf.json)\nasv_conf_name() {\n local f\n f=$(find . -type f -name \"asv.*.json\" | head -n 1)\n [[ -n \"$f\" ]] && basename \"$f\" || return 1\n}\n\n# Build performance knobs (overridable)\nexport MAKEFLAGS=\"${MAKEFLAGS:--j$(nproc)}\"\nexport CMAKE_BUILD_PARALLEL_LEVEL=\"${CMAKE_BUILD_PARALLEL_LEVEL:-$(nproc)}\"\nexport NPY_NUM_BUILD_JOBS=\"${NPY_NUM_BUILD_JOBS:-$(nproc)}\"\n\n# Shared pip cache to speed repeated editable builds\nexport PIP_CACHE_DIR=\"${PIP_CACHE_DIR:-/opt/pipcache}\"\nmkdir -p \"$PIP_CACHE_DIR\"\nEOF\n}\n\n# -------- Persisted build variables --------\nwrite_build_vars() {\n local py_versions=\"$1\"\n local import_name=\"$2\"\n\n mkdir -p /etc/asv_env\n echo \"$py_versions\" > /etc/asv_env/py_versions\n echo \"$import_name\" > /etc/asv_env/import_name\n\n # Exported for every future shell (pkg script, interactive, etc.)\n cat >/etc/profile.d/asv_build_vars.sh </usr/local/bin/asv_smokecheck.py <<'PY'\n#!/usr/bin/env python\nimport argparse, importlib, pathlib, sys\n\ndef import_and_version(name: str):\n m = importlib.import_module(name)\n ver = getattr(m, \"__version__\", \"unknown\")\n print(f\"{name} imported ok; __version__={ver}\")\n\ndef probe_compiled(name: str, max_ext: int = 10):\n m = importlib.import_module(name)\n if not hasattr(m, \"__path__\"):\n print(\"No package __path__ (likely a single-module dist); skipping compiled probe.\")\n return\n pkg_path = pathlib.Path(list(m.__path__)[0])\n so_like = list(pkg_path.rglob(\"*.so\")) + list(pkg_path.rglob(\"*.pyd\"))\n failed = []\n for ext in so_like[:max_ext]:\n rel = ext.relative_to(pkg_path).with_suffix(\"\")\n dotted = \".\".join([name] + list(rel.parts))\n try:\n importlib.import_module(dotted)\n except Exception as e:\n failed.append((dotted, str(e)))\n if failed:\n print(\"Some compiled submodules failed to import:\")\n for d, err in failed:\n print(\" -\", d, \"->\", err)\n sys.exit(1)\n else:\n print(\"Compiled submodules (if any) import ok\")\n\ndef main():\n p = argparse.ArgumentParser()\n p.add_argument(\"--import-name\", required=True)\n p.add_argument(\"--repo-root\", default=\".\")\n p.add_argument(\"--pytest-smoke\", action=\"store_true\",\n help=\"Run a quick pytest smoke: -k 'not slow' --maxfail=1\")\n p.add_argument(\"--max-ext\", type=int, default=10)\n args = p.parse_args()\n\n import_and_version(args.import_name)\n probe_compiled(args.import_name, max_ext=args.max_ext)\n\n if args.pytest_smoke:\n import subprocess, os\n if any((pathlib.Path(args.repo_root)/p).exists() for p in (\"tests\", \"pytest.ini\", \"pyproject.toml\")):\n print(\"Running pytest smoke...\")\n rc = subprocess.call([sys.executable, \"-m\", \"pytest\", \"-q\", \"-k\", \"not slow\", \"--maxfail=1\"], cwd=args.repo_root)\n if rc != 0:\n sys.exit(rc)\n else:\n print(\"No tests detected; skipping pytest smoke.\")\n print(\"Smokecheck OK \u2705\")\n\nif __name__ == \"__main__\":\n main()\nPY\n chmod +x /usr/local/bin/asv_smokecheck.py\n}\ninstall_smokecheck\n\n# -------- Install an import-name detector CLI --------\ninstall_detect_import_name() {\n cat >/usr/local/bin/detect_import_name <<'PY'\n#!/usr/bin/env python\nimport argparse, pathlib, re, sys, subprocess, configparser, json\n\n# --- optional TOML loader (py3.11+: tomllib; else tomli if available) ---\ntry:\n import tomllib as toml\nexcept Exception:\n try:\n import tomli as toml\n except Exception:\n toml = None\n\nEXCEPTIONS = {\n # common dist\u2192import mismatches\n \"scikit-learn\": \"sklearn\",\n \"opencv-python\": \"cv2\",\n \"pyyaml\": \"yaml\",\n \"beautifulsoup4\": \"bs4\",\n \"pillow\": \"PIL\",\n \"mysqlclient\": \"MySQLdb\",\n \"psycopg2-binary\": \"psycopg2\",\n \"opencv-contrib-python\": \"cv2\",\n \"protobuf\": \"google\", # top-level package\n \"apache-beam\": \"apache_beam\",\n}\n\n# All the package names we typically query.\nEXCEPTIONS.update({\n # --- core scientific stack ---\n \"scikit-learn\": \"sklearn\",\n \"numpy\": \"numpy\",\n \"pandas\": \"pandas\",\n \"scipy\": \"scipy\",\n \"scikit-image\": \"skimage\",\n \"pywt\": \"pywt\",\n \"xarray\": \"xarray\",\n \"bottleneck\": \"bottleneck\",\n \"h5py\": \"h5py\",\n \"networkx\": \"networkx\",\n \"shapely\": \"shapely\",\n\n # --- ML / stats / optimization / viz ---\n \"optuna\": \"optuna\",\n \"arviz\": \"arviz\",\n \"pymc\": \"pymc\",\n \"kedro\": \"kedro\",\n \"modin\": \"modin\",\n \"napari\": \"napari\",\n \"deepchecks\": \"deepchecks\",\n \"voyager\": \"voyager\", # spotify/voyager\n \"warp\": \"warp\", # NVIDIA/warp\n \"newton\": \"newton\", # newton-physics/newton\n\n # --- domain / ecosystem libs ---\n \"geopandas\": \"geopandas\",\n \"cartopy\": \"cartopy\",\n \"iris\": \"iris\",\n \"anndata\": \"anndata\",\n \"scanpy\": \"scanpy\",\n \"sunpy\": \"sunpy\",\n \"pvlib-python\": \"pvlib\",\n \"PyBaMM\": \"pybamm\",\n \"momepy\": \"momepy\",\n \"satpy\": \"satpy\",\n \"pydicom\": \"pydicom\",\n \"pynetdicom\": \"pynetdicom\",\n\n # --- file formats / IO / infra ---\n \"asdf\": \"asdf\",\n \"arrow\": \"pyarrow\", # apache/arrow\n \"ArcticDB\": \"arcticdb\",\n \"arctic\": \"arctic\",\n\n # --- web / frameworks / utils ---\n \"django-components\": \"django_components\",\n \"h11\": \"h11\",\n \"tqdm\": \"tqdm\",\n \"rich\": \"rich\",\n \"posthog\": \"posthog\",\n \"datalad\": \"datalad\",\n \"ipyparallel\": \"ipyparallel\",\n\n # --- numerical / symbolic / control ---\n \"autograd\": \"autograd\",\n \"python-control\": \"control\",\n \"loopy\": \"loopy\",\n \"thermo\": \"thermo\",\n \"chempy\": \"chempy\",\n \"adaptive\": \"adaptive\",\n\n # --- scientific image / signal ---\n \"metric-learn\": \"metric_learn\",\n\n # --- quantum / physics ---\n \"Cirq\": \"cirq\",\n \"memray\": \"memray\",\n \"devito\": \"devito\",\n\n # --- bio / chem / data ---\n \"sourmash\": \"sourmash\",\n \"dipy\": \"dipy\",\n\n # --- protocol buffers / codegen / outlines ---\n \"python-betterproto\": \"betterproto\",\n \"outlines\": \"outlines\",\n\n # --- DS viz / raster ---\n \"datashader\": \"datashader\",\n \"xarray-spatial\": \"xarray_spatial\",\n\n # --- misc ---\n \"enlighten\": \"enlighten\",\n \"xorbits\": \"xorbits\",\n \"geopandas\": \"geopandas\",\n \"lmfit-py\": \"lmfit\",\n \"mdanalysis\": \"MDAnalysis\",\n \"nilearn\": \"nilearn\",\n})\n\n\nEXCLUDE_DIRS = {\n \".git\", \".hg\", \".svn\", \".tox\", \".nox\", \".venv\", \"venv\",\n \"build\", \"dist\", \"__pycache__\", \".mypy_cache\", \".pytest_cache\",\n \"docs\", \"doc\", \"site\", \"examples\", \"benchmarks\", \"tests\", \"testing\",\n}\n\ndef _norm(s: str) -> str:\n return re.sub(r\"[-_.]+\", \"\", s).lower()\n\ndef read_pyproject(root: pathlib.Path):\n cfg = {}\n p = root / \"pyproject.toml\"\n if toml and p.exists():\n try:\n cfg = toml.loads(p.read_text(encoding=\"utf-8\"))\n except Exception:\n pass\n return cfg\n\ndef read_setup_cfg(root: pathlib.Path):\n p = root / \"setup.cfg\"\n cp = configparser.ConfigParser()\n if p.exists():\n try:\n cp.read(p, encoding=\"utf-8\")\n except Exception:\n pass\n return cp\n\ndef dist_name_from_config(pyproject, setup_cfg):\n # PEP 621 name\n name = (pyproject.get(\"project\", {}) or {}).get(\"name\")\n if not name:\n # setup.cfg [metadata] name\n if setup_cfg.has_section(\"metadata\"):\n name = setup_cfg.get(\"metadata\", \"name\", fallback=None)\n # setup.py --name as last resort\n return name\n\ndef package_roots_from_config(root, pyproject, setup_cfg):\n roots = set([root])\n # setuptools package-dir mapping\n # pyproject: [tool.setuptools.package-dir] \"\" = \"src\"\n pkgdir = ((pyproject.get(\"tool\", {}) or {}).get(\"setuptools\", {}) or {}).get(\"package-dir\", {})\n if isinstance(pkgdir, dict):\n if \"\" in pkgdir:\n roots.add((root / pkgdir[\"\"]).resolve())\n for _, d in pkgdir.items():\n try:\n roots.add((root / d).resolve())\n except Exception:\n pass\n # setup.cfg [options] package_dir\n if setup_cfg.has_section(\"options\"):\n raw = setup_cfg.get(\"options\", \"package_dir\", fallback=None)\n if raw:\n # can be \"=\\nsrc\" or mapping lines\n lines = [l.strip() for l in raw.splitlines() if l.strip()]\n # accept simple \"=src\" or \"\" = \"src\"\n for ln in lines:\n m = re.match(r'^(\"?\\'?)*\\s*=?\\s*(\"?\\'?)*\\s*(?P[^#;]+)$', ln)\n if m:\n roots.add((root / m.group(\"path\").strip()).resolve())\n # setup.cfg [options.packages.find] where\n if setup_cfg.has_section(\"options.packages.find\"):\n where = setup_cfg.get(\"options.packages.find\", \"where\", fallback=None)\n if where:\n for w in re.split(r\"[,\\s]+\", where):\n if w:\n roots.add((root / w).resolve())\n return [r for r in roots if r.exists()]\n\ndef explicit_modules_from_config(pyproject, setup_cfg):\n mods = set()\n # pyproject (tool.setuptools) py-modules / packages\n st = ((pyproject.get(\"tool\", {}) or {}).get(\"setuptools\", {}) or {})\n for key in (\"py-modules\", \"packages\"):\n val = st.get(key)\n if isinstance(val, list):\n mods.update(val)\n # setup.cfg [options] py_modules / packages\n if setup_cfg.has_section(\"options\"):\n for key in (\"py_modules\", \"packages\"):\n raw = setup_cfg.get(\"options\", key, fallback=None)\n if raw:\n for tok in re.split(r\"[\\s,]+\", raw.strip()):\n if tok and tok != \"find:\":\n mods.add(tok)\n return sorted(mods)\n\ndef read_top_level_from_egg_info(root):\n # editable installs often leave ./.egg-info/top_level.txt\n for ei in root.rglob(\"*.egg-info\"):\n tl = ei / \"top_level.txt\"\n if tl.exists():\n try:\n names = [l.strip() for l in tl.read_text(encoding=\"utf-8\").splitlines() if l.strip()]\n if names:\n return names\n except Exception:\n pass\n # also consider dist-info during local builds\n for di in root.rglob(\"*.dist-info\"):\n tl = di / \"top_level.txt\"\n if tl.exists():\n try:\n names = [l.strip() for l in tl.read_text(encoding=\"utf-8\").splitlines() if l.strip()]\n if names:\n return names\n except Exception:\n pass\n return None\n\ndef walk_candidates(roots):\n \"\"\"Return set of plausible top-level import names under candidate roots.\"\"\"\n cands = set()\n for r in roots:\n for path in r.rglob(\"__init__.py\"):\n try:\n pkg_dir = path.parent\n # skip excluded dirs anywhere in the path\n if any(part in EXCLUDE_DIRS for part in pkg_dir.parts):\n continue\n # Construct package name relative to the nearest search root\n try:\n rel = pkg_dir.relative_to(r)\n except Exception:\n continue\n if not rel.parts:\n continue\n top = rel.parts[0]\n if top.startswith(\"_\"):\n # usually private tooling\n continue\n cands.add(top)\n except Exception:\n pass\n # standalone modules at top-level of roots (py_modules case)\n for mod in r.glob(\"*.py\"):\n if mod.stem not in (\"setup\",):\n cands.add(mod.stem)\n return sorted(cands)\n\ndef score_candidates(cands, dist_name):\n \"\"\"Assign a score preferring names that match the dist name.\"\"\"\n scores = {}\n n_dist = _norm(dist_name) if dist_name else None\n prefer = None\n if dist_name and dist_name.lower() in EXCEPTIONS:\n prefer = EXCEPTIONS[dist_name.lower()]\n # also try normalized exception keys (e.g. capitalization)\n for k, v in EXCEPTIONS.items():\n if _norm(k) == _norm(dist_name or \"\"):\n prefer = v\n\n for c in cands:\n s = 0\n if prefer and _norm(c) == _norm(prefer):\n s += 100\n if n_dist and _norm(c) == n_dist:\n s += 80\n if n_dist and (_norm(c).startswith(n_dist) or n_dist.startswith(_norm(c))):\n s += 20\n # shorter, simpler names get a slight bump\n s += max(0, 10 - len(c))\n scores[c] = s\n return sorted(cands, key=lambda x: (-scores.get(x, 0), x)), scores\n\ndef detect(root: str, return_all=False):\n root = pathlib.Path(root).resolve()\n\n pyproject = read_pyproject(root)\n setup_cfg = read_setup_cfg(root)\n dist_name = dist_name_from_config(pyproject, setup_cfg)\n\n # 1) top_level.txt (best signal if present)\n top = read_top_level_from_egg_info(root)\n if top:\n if return_all:\n return top\n # If multiple, score them\n ordered, _ = score_candidates(top, dist_name or \"\")\n return [ordered[0]]\n\n # 2) explicit declarations (py_modules / packages lists)\n explicit = explicit_modules_from_config(pyproject, setup_cfg)\n\n # 3) find correct search roots (src layout, package_dir, etc.)\n roots = package_roots_from_config(root, pyproject, setup_cfg)\n\n # 4) walk code to infer candidates\n walked = walk_candidates(roots)\n\n # merge explicit + walked\n cands = list(dict.fromkeys(explicit + walked)) # keep order & de-dup\n\n # 5) fallback from dist name heuristics/exceptions if still empty\n if not cands and dist_name:\n # exception or simple normalization\n guess = EXCEPTIONS.get(dist_name.lower()) or re.sub(r\"[-\\.]+\", \"_\", dist_name)\n cands = [guess]\n\n if not cands:\n return []\n\n if return_all:\n # return ordered list\n ordered, _ = score_candidates(cands, dist_name or \"\")\n return ordered\n else:\n ordered, _ = score_candidates(cands, dist_name or \"\")\n return [ordered[0]]\n\ndef main():\n ap = argparse.ArgumentParser(description=\"Detect the top-level Python import name for a repo.\")\n ap.add_argument(\"--repo-root\", default=\".\", help=\"Path to repository root\")\n ap.add_argument(\"--all\", action=\"store_true\", help=\"Print all plausible names (JSON list)\")\n args = ap.parse_args()\n\n names = detect(args.repo_root, return_all=args.all)\n if not names:\n sys.exit(1)\n if args.all:\n print(json.dumps(names))\n else:\n print(names[0])\n\nif __name__ == \"__main__\":\n main()\nPY\n chmod +x /usr/local/bin/detect_import_name\n}\n\ninstall_detect_import_name\n\n# -------- Script body --------\n\ninstall_profile_helpers\n# shellcheck disable=SC1091\nsource /etc/profile.d/asv_utils.sh\n\n# Ensure base micromamba is active for introspecting ASV config\nmicromamba activate base\n\ninstall_detect_import_name\ninstall_smokecheck\n\nIMPORT_NAME=\"$(detect_import_name || true)\"\nif [[ -z \"$IMPORT_NAME\" ]]; then\n echo \"WARN: Could not determine import name; the pkg stage will fall back to local detection.\"\nfi\n\n\n# Move into the directory that contains asv.*.json\ncd_asv_json_dir || { echo \"No 'asv.*.json' file found.\" >&2; exit 1; }\n\nCONF_NAME=\"$(asv_conf_name || true)\"\nif [[ -z \"${CONF_NAME:-}\" ]]; then\n echo \"No 'asv.*.json' file found.\" >&2\n exit 1\nfi\n\n# Make sure tomli is available in base for pyproject parsing\nmicromamba install -y -n base -c conda-forge tomli >/dev/null\n\n# Read python versions from the ASV config\nPY_VERSIONS=$(python - <&2\n exit 1\nfi\n###### END SETUP CODE ######\n\n# -----------------------------\n# Agent guidance (read-first)\n# -----------------------------\n# GOAL: For each Python version below, install the project in EDITABLE mode into env asv_{version},\n# with NO build isolation, then run health checks.\n#\n# Below this comment, you should do whatever is necessary to build the project without errors. Including (but not limited to):\n# - Add extra conda/pip dependencies needed to build this project.\n# - Run repo-specific pre-steps (e.g., submodules, generating Cython, env vars).\n# - Run arbitrary micromamba/pip commands in the target env.\n# - Set CFLAGS/CXXFLAGS/LDFLAGS if needed for this repo.\n# - Change files in the repo if needed (e.g., fix a missing #include).\n# - Anything else needed to get a successful editable install.\n#\n# MUST:\n# - Keep this script idempotent.\n# - Use: `pip install --no-build-isolation -v -e .` or `pip install -e .` or equivalent.\n# - Do not modify the SETUP CODE or helper functions below.\n#\n# DO NOT:\n# - Change env names or Python versions outside MODEL EDIT AREA.\n# - Use build isolation unless absolutely necessary.\n\n# -----------------------------\n# Helpers (do not modify)\n# -----------------------------\nlog() { printf \"\u001b[1;34m[build]\u001b[0m %s\n\" \"$*\"; }\nwarn() { printf \"\u001b[1;33m[warn]\u001b[0m %s\n\" \"$*\" >&2; }\ndie() { printf \"\u001b[1;31m[fail]\u001b[0m %s\n\" \"$*\" >&2; exit 1; }\n\n# Conservative default parallelism (override if the repo benefits)\nexport CMAKE_BUILD_PARALLEL_LEVEL=\"${CMAKE_BUILD_PARALLEL_LEVEL:-2}\"\nexport NPY_NUM_BUILD_JOBS=\"${NPY_NUM_BUILD_JOBS:-2}\"\n\n# only run the below if condition if bvh.cpp is present\ngrep -q '^#include ' \"${ROOT_PATH}/warp/native/bvh.cpp\" || sed -i 's|#include |#include \n#include |' \"${ROOT_PATH}/warp/native/bvh.cpp\"\n\n \n# -----------------------------\n# Build & test across envs\n# -----------------------------\nfor version in $TARGET_VERSIONS; do\n ENV_NAME=\"asv_${version}\"\n log \"==> Building in env: $ENV_NAME (python=$version)\"\n\n if ! micromamba env list | awk '{print $1}' | grep -qx \"$ENV_NAME\"; then\n die \"Env $ENV_NAME not found. Did docker_build_env.sh run?\"\n fi\n\n # Import name resolution (kept simple for the agent)\n IMP=\"${IMPORT_NAME:-}\"\n if [[ -z \"$IMP\" ]]; then\n if ! IMP=\"$(asv_detect_import_name --repo-root \"$REPO_ROOT\" 2>/dev/null)\"; then\n die \"Could not determine import name. Set IMPORT_NAME in /etc/profile.d/asv_build_vars.sh\"\n fi\n fi\n log \"Using import name: $IMP\"\n\n # -----------------------------\n # MODEL EDIT AREA: repo-specific tweaks (optional)\n # -----------------------------\n # Examples (uncomment if needed for this repo):\n #\n # log \"Updating submodules\"\n # git -C \"$REPO_ROOT\" submodule update --init --recursive\n #\n # log \"Installing extra system libs via conda-forge\"\n # micromamba install -y -n \"$ENV_NAME\" -c conda-forge 'openblas' 'blas=*=openblas' 'libopenmp'\n #\n # log \"Pre-generating Cython sources\"\n # micromamba run -n \"$ENV_NAME\" python -m cython --version\n #\n # export CFLAGS=\"${CFLAGS:-}\"\n # export CXXFLAGS=\"${CXXFLAGS:-}\"\n # export LDFLAGS=\"${LDFLAGS:-}\"\n # -----------------------------\n\n # Install some basic micromamba packages.\n\n micromamba install -y -n \"$ENV_NAME\" -c conda-forge git conda mamba \"libmambapy<=1.9.9\" numpy scipy cython joblib threadpoolctl pytest compilers\n micromamba run -n \"$ENV_NAME\" pip install meson-python cython\n export CFLAGS=\"${CFLAGS:-} -Wno-error=incompatible-pointer-types\"\n micromamba run -n \"$ENV_NAME\" python \"${ROOT_PATH}/build_lib.py\"\n\n # Editable install (no build isolation preferrably). Toolchain lives in the env already.\n log \"Editable install with --no-build-isolation\"\n PIP_NO_BUILD_ISOLATION=1 micromamba run -n \"$ENV_NAME\" python -m pip install --no-build-isolation -v -e \"$REPO_ROOT\"\n\n # Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1)\n log \"Running smoke checks\"\n micromamba run -n \"$ENV_NAME\" asv_smokecheck.py --import-name \"$IMP\" --repo-root \"$REPO_ROOT\" ${RUN_PYTEST_SMOKE:+--pytest-smoke}\n\n # Machine-readable markers (useful in logs)\n echo \"::import_name=${IMP}::env=${ENV_NAME}\"\ndone\n\nlog \"All builds complete \u2705\"", - "dockerfile_data": "# syntax=docker/dockerfile:1.7\n\nFROM buildpack-deps:jammy AS base\n\nARG REPO_URL\nARG COMMIT_SHA\n\nRUN apt-get update && \\\n apt-get install -y --no-install-recommends \\\n curl git build-essential jq cmake ninja-build && \\\n rm -rf /var/lib/apt/lists/*\n\nRUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest \\\n | tar -xvj -C /usr/local/bin --strip-components=1 bin/micromamba\n\nENV MAMBA_ROOT_PREFIX=/opt/conda \\\n PATH=/opt/conda/bin:$PATH \\\n MAMBA_DOCKERFILE_ACTIVATE=1 \\\n OPENBLAS_NUM_THREADS=1 \\\n MKL_NUM_THREADS=1 \\\n OMP_NUM_THREADS=1\n\nRUN micromamba install -y -p $MAMBA_ROOT_PREFIX -c conda-forge \\\n python=3.10 \\\n git asv pyperf mamba conda libmambapy jq && \\\n micromamba clean --all --yes\n\nRUN mkdir -p /workspace /output\nWORKDIR /workspace\n\nFROM base AS env\nARG REPO_URL\nARG COMMIT_SHA\n\n# Entrypoint is inherited by pkg\nCOPY entrypoint.sh /entrypoint.sh\nRUN chmod +x /entrypoint.sh\nENTRYPOINT [\"/entrypoint.sh\"]\n\nRUN git clone \"$REPO_URL\" /workspace/repo\nWORKDIR /workspace/repo\nRUN git checkout \"$COMMIT_SHA\"\nLABEL vcs.url=\"$REPO_URL\" vcs.ref=\"$COMMIT_SHA\"\n\n# ---- ENV STAGE: create envs, persist vars, install smoke tool ----\nCOPY docker_build_env.sh /workspace/repo/docker_build_env.sh\nRUN chmod +x /workspace/repo/docker_build_env.sh && \\\n /workspace/repo/docker_build_env.sh\n\n\nFROM env AS pkg\n\n# ---- PKG STAGE: build+test the package for each ASV Python ----\nCOPY docker_build_pkg.sh /workspace/repo/docker_build_pkg.sh\nRUN chmod +x /workspace/repo/docker_build_pkg.sh && \\\n /workspace/repo/docker_build_pkg.sh\n# If you want to restrict to one version at build time, replace with:\n# RUN PY_VERSION=3.10 /workspace/repo/docker_build_pkg.sh\n", - "entrypoint_data": "#!/usr/bin/env bash\n# set -euo pipefail\nset -x\n: \"${ASV_ARGS:?Need to set ASV_ARGS}\"\n: \"${ASV_MACHINE:=?Need to set ASV_MACHINE}\"\n: \"${ASV_OS:=?Need to set ASV_OS}\"\n: \"${ASV_NUM_CPU:=?Need to set ASV_NUM_CPU}\"\n: \"${ASV_ARCH:=?Need to set ASV_ARCH}\"\n: \"${ASV_CPU:=?Need to set ASV_CPU}\"\n: \"${ASV_RAM:=?Need to set ASV_RAM}\"\n\n\ncd_asv_json_dir() {\n local match\n match=$(find . -type f -name \"asv.*.json\" | head -n 1)\n\n if [[ -n \"$match\" ]]; then\n local dir\n dir=$(dirname \"$match\")\n cd \"$dir\" || echo \"Failed to change directory to $dir\"\n else\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n fi\n}\n\neval \"$(micromamba shell hook --shell=bash)\"\n\nmicromamba activate base\nROOT_PATH=${PWD}\ncd_asv_json_dir || exit 1\n\n# the conf name is one of \"asv.conf.json\" or \"asv.ci.conf.json\" or \"asv.*.json\"\nCONF_NAME=$(basename \"$(find . -type f -name \"asv.*.json\" | head -n 1)\")\nif [[ -z \"$CONF_NAME\" ]]; then\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n exit 1\nfi\n\n# Read the python versions from the asv.conf.json\npython_versions=$(python -c \"import asv; pythons = asv.config.Config.load('$CONF_NAME').pythons; print(' '.join(pythons))\")\n# change the \"results_dir\" in asv.conf.json to \"/output/{COMMIT_SHA}/\"\nfor version in $python_versions; do\n # Create per\u2011Python env and install ASV\n python -c \"import asv, os, pathlib\npath = pathlib.Path('/output/'\\\"$COMMIT_SHA\\\"'/''\\\"$version\\\"')\npath.mkdir(parents=True, exist_ok=True)\n\nconfig = asv.config.Config.load('$CONF_NAME')\nconfig.results_dir = str(path / 'results')\nconfig.html_dir = str(path / 'html')\nconfig.branches = ['HEAD']\n\nasv.util.write_json('$CONF_NAME', config.__dict__, api_version=config.api_version)\nasv.util.write_json(path / '$CONF_NAME', config.__dict__, api_version=config.api_version)\n\"\n micromamba run -n \"asv_${version}\" asv machine --yes --config $CONF_NAME --machine \"$ASV_MACHINE\" --os \"$ASV_OS\" --num_cpu \"$ASV_NUM_CPU\" --arch \"$ASV_ARCH\" --cpu \"$ASV_CPU\" --ram \"$ASV_RAM\"\n micromamba run -n \"asv_${version}\" asv run --show-stderr ${ASV_ARGS} --config $CONF_NAME\ndone\n\necho \"Benchmarks complete.\"\n", - "env_building_data": "#!/usr/bin/env bash\nset -euo pipefail\n\n# -------- Helpers installed for all shells --------\ninstall_profile_helpers() {\n cat >/etc/profile.d/asv_utils.sh <<'EOF'\n# asv_utils.sh \u2014 login/interactive shell helpers for ASV builds\nexport MAMBA_ROOT_PREFIX=\"${MAMBA_ROOT_PREFIX:-/opt/conda}\"\n\n# Initialize micromamba for bash shells (no-op if not present)\nif command -v micromamba >/dev/null 2>&1; then\n eval \"$(micromamba shell hook --shell=bash)\"\nfi\n\n# Find and cd into the first directory that contains an asv.*.json\ncd_asv_json_dir() {\n local match\n match=$(find . -type f -name \"asv.*.json\" | head -n 1)\n if [[ -n \"$match\" ]]; then\n cd \"$(dirname \"$match\")\" || echo \"Failed to change directory\"\n else\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n return 1\n fi\n}\n\n# Return just the conf filename (e.g., asv.conf.json)\nasv_conf_name() {\n local f\n f=$(find . -type f -name \"asv.*.json\" | head -n 1)\n [[ -n \"$f\" ]] && basename \"$f\" || return 1\n}\n\n# Build performance knobs (overridable)\nexport MAKEFLAGS=\"${MAKEFLAGS:--j$(nproc)}\"\nexport CMAKE_BUILD_PARALLEL_LEVEL=\"${CMAKE_BUILD_PARALLEL_LEVEL:-$(nproc)}\"\nexport NPY_NUM_BUILD_JOBS=\"${NPY_NUM_BUILD_JOBS:-$(nproc)}\"\n\n# Shared pip cache to speed repeated editable builds\nexport PIP_CACHE_DIR=\"${PIP_CACHE_DIR:-/opt/pipcache}\"\nmkdir -p \"$PIP_CACHE_DIR\"\nEOF\n}\n\n# -------- Persisted build variables --------\nwrite_build_vars() {\n local py_versions=\"$1\"\n local import_name=\"$2\"\n\n mkdir -p /etc/asv_env\n echo \"$py_versions\" > /etc/asv_env/py_versions\n echo \"$import_name\" > /etc/asv_env/import_name\n\n # Exported for every future shell (pkg script, interactive, etc.)\n cat >/etc/profile.d/asv_build_vars.sh </usr/local/bin/asv_smokecheck.py <<'PY'\n#!/usr/bin/env python\nimport argparse, importlib, pathlib, sys\n\ndef import_and_version(name: str):\n m = importlib.import_module(name)\n ver = getattr(m, \"__version__\", \"unknown\")\n print(f\"{name} imported ok; __version__={ver}\")\n\ndef probe_compiled(name: str, max_ext: int = 10):\n m = importlib.import_module(name)\n if not hasattr(m, \"__path__\"):\n print(\"No package __path__ (likely a single-module dist); skipping compiled probe.\")\n return\n pkg_path = pathlib.Path(list(m.__path__)[0])\n so_like = list(pkg_path.rglob(\"*.so\")) + list(pkg_path.rglob(\"*.pyd\"))\n failed = []\n for ext in so_like[:max_ext]:\n rel = ext.relative_to(pkg_path).with_suffix(\"\")\n dotted = \".\".join([name] + list(rel.parts))\n try:\n importlib.import_module(dotted)\n except Exception as e:\n failed.append((dotted, str(e)))\n if failed:\n print(\"Some compiled submodules failed to import:\")\n for d, err in failed:\n print(\" -\", d, \"->\", err)\n sys.exit(1)\n else:\n print(\"Compiled submodules (if any) import ok\")\n\ndef main():\n p = argparse.ArgumentParser()\n p.add_argument(\"--import-name\", required=True)\n p.add_argument(\"--repo-root\", default=\".\")\n p.add_argument(\"--pytest-smoke\", action=\"store_true\",\n help=\"Run a quick pytest smoke: -k 'not slow' --maxfail=1\")\n p.add_argument(\"--max-ext\", type=int, default=10)\n args = p.parse_args()\n\n import_and_version(args.import_name)\n probe_compiled(args.import_name, max_ext=args.max_ext)\n\n if args.pytest_smoke:\n import subprocess, os\n if any((pathlib.Path(args.repo_root)/p).exists() for p in (\"tests\", \"pytest.ini\", \"pyproject.toml\")):\n print(\"Running pytest smoke...\")\n rc = subprocess.call([sys.executable, \"-m\", \"pytest\", \"-q\", \"-k\", \"not slow\", \"--maxfail=1\"], cwd=args.repo_root)\n if rc != 0:\n sys.exit(rc)\n else:\n print(\"No tests detected; skipping pytest smoke.\")\n print(\"Smokecheck OK \u2705\")\n\nif __name__ == \"__main__\":\n main()\nPY\n chmod +x /usr/local/bin/asv_smokecheck.py\n}\ninstall_smokecheck\n\n# -------- Install an import-name detector CLI --------\ninstall_detect_import_name() {\n cat >/usr/local/bin/detect_import_name <<'PY'\n#!/usr/bin/env python\nimport argparse, pathlib, re, sys, subprocess, configparser, json\n\n# --- optional TOML loader (py3.11+: tomllib; else tomli if available) ---\ntry:\n import tomllib as toml\nexcept Exception:\n try:\n import tomli as toml\n except Exception:\n toml = None\n\nEXCEPTIONS = {\n # common dist\u2192import mismatches\n \"scikit-learn\": \"sklearn\",\n \"opencv-python\": \"cv2\",\n \"pyyaml\": \"yaml\",\n \"beautifulsoup4\": \"bs4\",\n \"pillow\": \"PIL\",\n \"mysqlclient\": \"MySQLdb\",\n \"psycopg2-binary\": \"psycopg2\",\n \"opencv-contrib-python\": \"cv2\",\n \"protobuf\": \"google\", # top-level package\n \"apache-beam\": \"apache_beam\",\n}\n\n# All the package names we typically query.\nEXCEPTIONS.update({\n # --- core scientific stack ---\n \"scikit-learn\": \"sklearn\",\n \"numpy\": \"numpy\",\n \"pandas\": \"pandas\",\n \"scipy\": \"scipy\",\n \"scikit-image\": \"skimage\",\n \"pywt\": \"pywt\",\n \"xarray\": \"xarray\",\n \"bottleneck\": \"bottleneck\",\n \"h5py\": \"h5py\",\n \"networkx\": \"networkx\",\n \"shapely\": \"shapely\",\n\n # --- ML / stats / optimization / viz ---\n \"optuna\": \"optuna\",\n \"arviz\": \"arviz\",\n \"pymc\": \"pymc\",\n \"kedro\": \"kedro\",\n \"modin\": \"modin\",\n \"napari\": \"napari\",\n \"deepchecks\": \"deepchecks\",\n \"voyager\": \"voyager\", # spotify/voyager\n \"warp\": \"warp\", # NVIDIA/warp\n \"newton\": \"newton\", # newton-physics/newton\n\n # --- domain / ecosystem libs ---\n \"geopandas\": \"geopandas\",\n \"cartopy\": \"cartopy\",\n \"iris\": \"iris\",\n \"anndata\": \"anndata\",\n \"scanpy\": \"scanpy\",\n \"sunpy\": \"sunpy\",\n \"pvlib-python\": \"pvlib\",\n \"PyBaMM\": \"pybamm\",\n \"momepy\": \"momepy\",\n \"satpy\": \"satpy\",\n \"pydicom\": \"pydicom\",\n \"pynetdicom\": \"pynetdicom\",\n\n # --- file formats / IO / infra ---\n \"asdf\": \"asdf\",\n \"arrow\": \"pyarrow\", # apache/arrow\n \"ArcticDB\": \"arcticdb\",\n \"arctic\": \"arctic\",\n\n # --- web / frameworks / utils ---\n \"django-components\": \"django_components\",\n \"h11\": \"h11\",\n \"tqdm\": \"tqdm\",\n \"rich\": \"rich\",\n \"posthog\": \"posthog\",\n \"datalad\": \"datalad\",\n \"ipyparallel\": \"ipyparallel\",\n\n # --- numerical / symbolic / control ---\n \"autograd\": \"autograd\",\n \"python-control\": \"control\",\n \"loopy\": \"loopy\",\n \"thermo\": \"thermo\",\n \"chempy\": \"chempy\",\n \"adaptive\": \"adaptive\",\n\n # --- scientific image / signal ---\n \"metric-learn\": \"metric_learn\",\n\n # --- quantum / physics ---\n \"Cirq\": \"cirq\",\n \"memray\": \"memray\",\n \"devito\": \"devito\",\n\n # --- bio / chem / data ---\n \"sourmash\": \"sourmash\",\n \"dipy\": \"dipy\",\n\n # --- protocol buffers / codegen / outlines ---\n \"python-betterproto\": \"betterproto\",\n \"outlines\": \"outlines\",\n\n # --- DS viz / raster ---\n \"datashader\": \"datashader\",\n \"xarray-spatial\": \"xarray_spatial\",\n\n # --- misc ---\n \"enlighten\": \"enlighten\",\n \"xorbits\": \"xorbits\",\n \"geopandas\": \"geopandas\",\n \"lmfit-py\": \"lmfit\",\n \"mdanalysis\": \"MDAnalysis\",\n \"nilearn\": \"nilearn\",\n})\n\n\nEXCLUDE_DIRS = {\n \".git\", \".hg\", \".svn\", \".tox\", \".nox\", \".venv\", \"venv\",\n \"build\", \"dist\", \"__pycache__\", \".mypy_cache\", \".pytest_cache\",\n \"docs\", \"doc\", \"site\", \"examples\", \"benchmarks\", \"tests\", \"testing\",\n}\n\ndef _norm(s: str) -> str:\n return re.sub(r\"[-_.]+\", \"\", s).lower()\n\ndef read_pyproject(root: pathlib.Path):\n cfg = {}\n p = root / \"pyproject.toml\"\n if toml and p.exists():\n try:\n cfg = toml.loads(p.read_text(encoding=\"utf-8\"))\n except Exception:\n pass\n return cfg\n\ndef read_setup_cfg(root: pathlib.Path):\n p = root / \"setup.cfg\"\n cp = configparser.ConfigParser()\n if p.exists():\n try:\n cp.read(p, encoding=\"utf-8\")\n except Exception:\n pass\n return cp\n\ndef dist_name_from_config(pyproject, setup_cfg):\n # PEP 621 name\n name = (pyproject.get(\"project\", {}) or {}).get(\"name\")\n if not name:\n # setup.cfg [metadata] name\n if setup_cfg.has_section(\"metadata\"):\n name = setup_cfg.get(\"metadata\", \"name\", fallback=None)\n # setup.py --name as last resort\n return name\n\ndef package_roots_from_config(root, pyproject, setup_cfg):\n roots = set([root])\n # setuptools package-dir mapping\n # pyproject: [tool.setuptools.package-dir] \"\" = \"src\"\n pkgdir = ((pyproject.get(\"tool\", {}) or {}).get(\"setuptools\", {}) or {}).get(\"package-dir\", {})\n if isinstance(pkgdir, dict):\n if \"\" in pkgdir:\n roots.add((root / pkgdir[\"\"]).resolve())\n for _, d in pkgdir.items():\n try:\n roots.add((root / d).resolve())\n except Exception:\n pass\n # setup.cfg [options] package_dir\n if setup_cfg.has_section(\"options\"):\n raw = setup_cfg.get(\"options\", \"package_dir\", fallback=None)\n if raw:\n # can be \"=\\nsrc\" or mapping lines\n lines = [l.strip() for l in raw.splitlines() if l.strip()]\n # accept simple \"=src\" or \"\" = \"src\"\n for ln in lines:\n m = re.match(r'^(\"?\\'?)*\\s*=?\\s*(\"?\\'?)*\\s*(?P[^#;]+)$', ln)\n if m:\n roots.add((root / m.group(\"path\").strip()).resolve())\n # setup.cfg [options.packages.find] where\n if setup_cfg.has_section(\"options.packages.find\"):\n where = setup_cfg.get(\"options.packages.find\", \"where\", fallback=None)\n if where:\n for w in re.split(r\"[,\\s]+\", where):\n if w:\n roots.add((root / w).resolve())\n return [r for r in roots if r.exists()]\n\ndef explicit_modules_from_config(pyproject, setup_cfg):\n mods = set()\n # pyproject (tool.setuptools) py-modules / packages\n st = ((pyproject.get(\"tool\", {}) or {}).get(\"setuptools\", {}) or {})\n for key in (\"py-modules\", \"packages\"):\n val = st.get(key)\n if isinstance(val, list):\n mods.update(val)\n # setup.cfg [options] py_modules / packages\n if setup_cfg.has_section(\"options\"):\n for key in (\"py_modules\", \"packages\"):\n raw = setup_cfg.get(\"options\", key, fallback=None)\n if raw:\n for tok in re.split(r\"[\\s,]+\", raw.strip()):\n if tok and tok != \"find:\":\n mods.add(tok)\n return sorted(mods)\n\ndef read_top_level_from_egg_info(root):\n # editable installs often leave ./.egg-info/top_level.txt\n for ei in root.rglob(\"*.egg-info\"):\n tl = ei / \"top_level.txt\"\n if tl.exists():\n try:\n names = [l.strip() for l in tl.read_text(encoding=\"utf-8\").splitlines() if l.strip()]\n if names:\n return names\n except Exception:\n pass\n # also consider dist-info during local builds\n for di in root.rglob(\"*.dist-info\"):\n tl = di / \"top_level.txt\"\n if tl.exists():\n try:\n names = [l.strip() for l in tl.read_text(encoding=\"utf-8\").splitlines() if l.strip()]\n if names:\n return names\n except Exception:\n pass\n return None\n\ndef walk_candidates(roots):\n \"\"\"Return set of plausible top-level import names under candidate roots.\"\"\"\n cands = set()\n for r in roots:\n for path in r.rglob(\"__init__.py\"):\n try:\n pkg_dir = path.parent\n # skip excluded dirs anywhere in the path\n if any(part in EXCLUDE_DIRS for part in pkg_dir.parts):\n continue\n # Construct package name relative to the nearest search root\n try:\n rel = pkg_dir.relative_to(r)\n except Exception:\n continue\n if not rel.parts:\n continue\n top = rel.parts[0]\n if top.startswith(\"_\"):\n # usually private tooling\n continue\n cands.add(top)\n except Exception:\n pass\n # standalone modules at top-level of roots (py_modules case)\n for mod in r.glob(\"*.py\"):\n if mod.stem not in (\"setup\",):\n cands.add(mod.stem)\n return sorted(cands)\n\ndef score_candidates(cands, dist_name):\n \"\"\"Assign a score preferring names that match the dist name.\"\"\"\n scores = {}\n n_dist = _norm(dist_name) if dist_name else None\n prefer = None\n if dist_name and dist_name.lower() in EXCEPTIONS:\n prefer = EXCEPTIONS[dist_name.lower()]\n # also try normalized exception keys (e.g. capitalization)\n for k, v in EXCEPTIONS.items():\n if _norm(k) == _norm(dist_name or \"\"):\n prefer = v\n\n for c in cands:\n s = 0\n if prefer and _norm(c) == _norm(prefer):\n s += 100\n if n_dist and _norm(c) == n_dist:\n s += 80\n if n_dist and (_norm(c).startswith(n_dist) or n_dist.startswith(_norm(c))):\n s += 20\n # shorter, simpler names get a slight bump\n s += max(0, 10 - len(c))\n scores[c] = s\n return sorted(cands, key=lambda x: (-scores.get(x, 0), x)), scores\n\ndef detect(root: str, return_all=False):\n root = pathlib.Path(root).resolve()\n\n pyproject = read_pyproject(root)\n setup_cfg = read_setup_cfg(root)\n dist_name = dist_name_from_config(pyproject, setup_cfg)\n\n # 1) top_level.txt (best signal if present)\n top = read_top_level_from_egg_info(root)\n if top:\n if return_all:\n return top\n # If multiple, score them\n ordered, _ = score_candidates(top, dist_name or \"\")\n return [ordered[0]]\n\n # 2) explicit declarations (py_modules / packages lists)\n explicit = explicit_modules_from_config(pyproject, setup_cfg)\n\n # 3) find correct search roots (src layout, package_dir, etc.)\n roots = package_roots_from_config(root, pyproject, setup_cfg)\n\n # 4) walk code to infer candidates\n walked = walk_candidates(roots)\n\n # merge explicit + walked\n cands = list(dict.fromkeys(explicit + walked)) # keep order & de-dup\n\n # 5) fallback from dist name heuristics/exceptions if still empty\n if not cands and dist_name:\n # exception or simple normalization\n guess = EXCEPTIONS.get(dist_name.lower()) or re.sub(r\"[-\\.]+\", \"_\", dist_name)\n cands = [guess]\n\n if not cands:\n return []\n\n if return_all:\n # return ordered list\n ordered, _ = score_candidates(cands, dist_name or \"\")\n return ordered\n else:\n ordered, _ = score_candidates(cands, dist_name or \"\")\n return [ordered[0]]\n\ndef main():\n ap = argparse.ArgumentParser(description=\"Detect the top-level Python import name for a repo.\")\n ap.add_argument(\"--repo-root\", default=\".\", help=\"Path to repository root\")\n ap.add_argument(\"--all\", action=\"store_true\", help=\"Print all plausible names (JSON list)\")\n args = ap.parse_args()\n\n names = detect(args.repo_root, return_all=args.all)\n if not names:\n sys.exit(1)\n if args.all:\n print(json.dumps(names))\n else:\n print(names[0])\n\nif __name__ == \"__main__\":\n main()\nPY\n chmod +x /usr/local/bin/detect_import_name\n}\n\ninstall_detect_import_name\n\n# -------- Script body --------\n\ninstall_profile_helpers\n# shellcheck disable=SC1091\nsource /etc/profile.d/asv_utils.sh\n\n# Ensure base micromamba is active for introspecting ASV config\nmicromamba activate base\n\ninstall_detect_import_name\ninstall_smokecheck\n\nIMPORT_NAME=\"$(detect_import_name || true)\"\nif [[ -z \"$IMPORT_NAME\" ]]; then\n echo \"WARN: Could not determine import name; the pkg stage will fall back to local detection.\"\nfi\n\n\n# Move into the directory that contains asv.*.json\ncd_asv_json_dir || { echo \"No 'asv.*.json' file found.\" >&2; exit 1; }\n\nCONF_NAME=\"$(asv_conf_name || true)\"\nif [[ -z \"${CONF_NAME:-}\" ]]; then\n echo \"No 'asv.*.json' file found.\" >&2\n exit 1\nfi\n\n# Make sure tomli is available in base for pyproject parsing\nmicromamba install -y -n base -c conda-forge tomli >/dev/null\n\n# Read python versions from the ASV config\nPY_VERSIONS=$(python - <&2\n exit 1\nfi\n###### END SETUP CODE ######\n\n# -----------------------------\n# Agent guidance (read-first)\n# -----------------------------\n# GOAL: For each Python version below, install the project in EDITABLE mode into env asv_{version},\n# with NO build isolation, then run health checks.\n#\n# Below this comment, you should do whatever is necessary to build the project without errors. Including (but not limited to):\n# - Add extra conda/pip dependencies needed to build this project.\n# - Run repo-specific pre-steps (e.g., submodules, generating Cython, env vars).\n# - Run arbitrary micromamba/pip commands in the target env.\n# - Set CFLAGS/CXXFLAGS/LDFLAGS if needed for this repo.\n# - Change files in the repo if needed (e.g., fix a missing #include).\n# - Anything else needed to get a successful editable install.\n#\n# MUST:\n# - Keep this script idempotent.\n# - Use: `pip install --no-build-isolation -v -e .` or `pip install -e .` or equivalent.\n# - Do not modify the SETUP CODE or helper functions below.\n#\n# DO NOT:\n# - Change env names or Python versions outside MODEL EDIT AREA.\n# - Use build isolation unless absolutely necessary.\n\n# -----------------------------\n# Helpers (do not modify)\n# -----------------------------\nlog() { printf \"\u001b[1;34m[build]\u001b[0m %s\n\" \"$*\"; }\nwarn() { printf \"\u001b[1;33m[warn]\u001b[0m %s\n\" \"$*\" >&2; }\ndie() { printf \"\u001b[1;31m[fail]\u001b[0m %s\n\" \"$*\" >&2; exit 1; }\n\n# Conservative default parallelism (override if the repo benefits)\nexport CMAKE_BUILD_PARALLEL_LEVEL=\"${CMAKE_BUILD_PARALLEL_LEVEL:-2}\"\nexport NPY_NUM_BUILD_JOBS=\"${NPY_NUM_BUILD_JOBS:-2}\"\n\n# -----------------------------\n# Build & test across envs\n# -----------------------------\nfor version in $TARGET_VERSIONS; do\n ENV_NAME=\"asv_${version}\"\n log \"==> Building in env: $ENV_NAME (python=$version)\"\n\n if ! micromamba env list | awk '{print $1}' | grep -qx \"$ENV_NAME\"; then\n die \"Env $ENV_NAME not found. Did docker_build_env.sh run?\"\n fi\n\n # Import name resolution (kept simple for the agent)\n IMP=\"${IMPORT_NAME:-}\"\n if [[ -z \"$IMP\" ]]; then\n if ! IMP=\"$(asv_detect_import_name --repo-root \"$REPO_ROOT\" 2>/dev/null)\"; then\n die \"Could not determine import name. Set IMPORT_NAME in /etc/profile.d/asv_build_vars.sh\"\n fi\n fi\n log \"Using import name: $IMP\"\n\n # -----------------------------\n # MODEL EDIT AREA: repo-specific tweaks (optional)\n # -----------------------------\n # Examples (uncomment if needed for this repo):\n #\n # log \"Updating submodules\"\n # git -C \"$REPO_ROOT\" submodule update --init --recursive\n #\n # log \"Installing extra system libs via conda-forge\"\n # micromamba install -y -n \"$ENV_NAME\" -c conda-forge 'openblas' 'blas=*=openblas' 'libopenmp'\n #\n # log \"Pre-generating Cython sources\"\n # micromamba run -n \"$ENV_NAME\" python -m cython --version\n #\n # export CFLAGS=\"${CFLAGS:-}\"\n # export CXXFLAGS=\"${CXXFLAGS:-}\"\n # export LDFLAGS=\"${LDFLAGS:-}\"\n # -----------------------------\n\n # Install some basic micromamba packages.\n\n micromamba install -y -n \"$ENV_NAME\" -c conda-forge git conda mamba \"libmambapy<=1.9.9\" numpy scipy cython joblib threadpoolctl pytest compilers\n if [[ -f \"${ROOT_PATH}/make_version.py\" ]]; then\n micromamba run -n \"$ENV_NAME\" python \"${ROOT_PATH}/make_version.py\"\n fi\n\n # Editable install (no build isolation preferrably). Toolchain lives in the env already.\n log \"Editable install with --no-build-isolation\"\n PIP_NO_BUILD_ISOLATION=1 micromamba run -n \"$ENV_NAME\" python -m pip install --no-build-isolation -v -e \"$REPO_ROOT\"\n\n # Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1)\n log \"Running smoke checks\"\n micromamba run -n \"$ENV_NAME\" asv_smokecheck.py --import-name \"$IMP\" --repo-root \"$REPO_ROOT\" ${RUN_PYTEST_SMOKE:+--pytest-smoke}\n\n # Machine-readable markers (useful in logs)\n echo \"::import_name=${IMP}::env=${ENV_NAME}\"\ndone\n\nlog \"All builds complete \u2705\"", - "dockerfile_data": "# syntax=docker/dockerfile:1.7\n\nFROM buildpack-deps:jammy AS base\n\nARG REPO_URL\nARG COMMIT_SHA\n\nRUN apt-get update && \\\n apt-get install -y --no-install-recommends \\\n curl git build-essential jq cmake ninja-build && \\\n rm -rf /var/lib/apt/lists/*\n\nRUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest \\\n | tar -xvj -C /usr/local/bin --strip-components=1 bin/micromamba\n\nENV MAMBA_ROOT_PREFIX=/opt/conda \\\n PATH=/opt/conda/bin:$PATH \\\n MAMBA_DOCKERFILE_ACTIVATE=1 \\\n OPENBLAS_NUM_THREADS=1 \\\n MKL_NUM_THREADS=1 \\\n OMP_NUM_THREADS=1\n\nRUN micromamba install -y -p $MAMBA_ROOT_PREFIX -c conda-forge \\\n python=3.10 \\\n git asv pyperf mamba conda libmambapy jq && \\\n micromamba clean --all --yes\n\nRUN mkdir -p /workspace /output\nWORKDIR /workspace\n\nFROM base AS env\nARG REPO_URL\nARG COMMIT_SHA\n\n# Entrypoint is inherited by pkg\nCOPY entrypoint.sh /entrypoint.sh\nRUN chmod +x /entrypoint.sh\nENTRYPOINT [\"/entrypoint.sh\"]\n\nRUN git clone \"$REPO_URL\" /workspace/repo\nWORKDIR /workspace/repo\nRUN git checkout \"$COMMIT_SHA\"\nLABEL vcs.url=\"$REPO_URL\" vcs.ref=\"$COMMIT_SHA\"\n\n# ---- ENV STAGE: create envs, persist vars, install smoke tool ----\nCOPY docker_build_env.sh /workspace/repo/docker_build_env.sh\nRUN chmod +x /workspace/repo/docker_build_env.sh && \\\n /workspace/repo/docker_build_env.sh\n\n\nFROM env AS pkg\n\n# ---- PKG STAGE: build+test the package for each ASV Python ----\nCOPY docker_build_pkg.sh /workspace/repo/docker_build_pkg.sh\nRUN chmod +x /workspace/repo/docker_build_pkg.sh && \\\n /workspace/repo/docker_build_pkg.sh\n# If you want to restrict to one version at build time, replace with:\n# RUN PY_VERSION=3.10 /workspace/repo/docker_build_pkg.sh\n", - "entrypoint_data": "#!/usr/bin/env bash\n# set -euo pipefail\nset -x\n: \"${ASV_ARGS:?Need to set ASV_ARGS}\"\n: \"${ASV_MACHINE:=?Need to set ASV_MACHINE}\"\n: \"${ASV_OS:=?Need to set ASV_OS}\"\n: \"${ASV_NUM_CPU:=?Need to set ASV_NUM_CPU}\"\n: \"${ASV_ARCH:=?Need to set ASV_ARCH}\"\n: \"${ASV_CPU:=?Need to set ASV_CPU}\"\n: \"${ASV_RAM:=?Need to set ASV_RAM}\"\n\n\ncd_asv_json_dir() {\n local match\n match=$(find . -type f -name \"asv.*.json\" | head -n 1)\n\n if [[ -n \"$match\" ]]; then\n local dir\n dir=$(dirname \"$match\")\n cd \"$dir\" || echo \"Failed to change directory to $dir\"\n else\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n fi\n}\n\neval \"$(micromamba shell hook --shell=bash)\"\n\nmicromamba activate base\nROOT_PATH=${PWD}\ncd_asv_json_dir || exit 1\n\n# the conf name is one of \"asv.conf.json\" or \"asv.ci.conf.json\" or \"asv.*.json\"\nCONF_NAME=$(basename \"$(find . -type f -name \"asv.*.json\" | head -n 1)\")\nif [[ -z \"$CONF_NAME\" ]]; then\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n exit 1\nfi\n\n# Read the python versions from the asv.conf.json\npython_versions=$(python -c \"import asv; pythons = asv.config.Config.load('$CONF_NAME').pythons; print(' '.join(pythons))\")\n# change the \"results_dir\" in asv.conf.json to \"/output/{COMMIT_SHA}/\"\nfor version in $python_versions; do\n # Create per\u2011Python env and install ASV\n python -c \"import asv, os, pathlib\npath = pathlib.Path('/output/'\\\"$COMMIT_SHA\\\"'/''\\\"$version\\\"')\npath.mkdir(parents=True, exist_ok=True)\n\nconfig = asv.config.Config.load('$CONF_NAME')\nconfig.results_dir = str(path / 'results')\nconfig.html_dir = str(path / 'html')\nconfig.branches = ['HEAD']\n\nasv.util.write_json('$CONF_NAME', config.__dict__, api_version=config.api_version)\nasv.util.write_json(path / '$CONF_NAME', config.__dict__, api_version=config.api_version)\n\"\n micromamba run -n \"asv_${version}\" asv machine --yes --config $CONF_NAME --machine \"$ASV_MACHINE\" --os \"$ASV_OS\" --num_cpu \"$ASV_NUM_CPU\" --arch \"$ASV_ARCH\" --cpu \"$ASV_CPU\" --ram \"$ASV_RAM\"\n micromamba run -n \"asv_${version}\" asv run --show-stderr ${ASV_ARGS} --config $CONF_NAME\ndone\n\necho \"Benchmarks complete.\"\n", - "env_building_data": "#!/usr/bin/env bash\nset -euo pipefail\n\n# -------- Helpers installed for all shells --------\ninstall_profile_helpers() {\n cat >/etc/profile.d/asv_utils.sh <<'EOF'\n# asv_utils.sh \u2014 login/interactive shell helpers for ASV builds\nexport MAMBA_ROOT_PREFIX=\"${MAMBA_ROOT_PREFIX:-/opt/conda}\"\n\n# Initialize micromamba for bash shells (no-op if not present)\nif command -v micromamba >/dev/null 2>&1; then\n eval \"$(micromamba shell hook --shell=bash)\"\nfi\n\n# Find and cd into the first directory that contains an asv.*.json\ncd_asv_json_dir() {\n local match\n match=$(find . -type f -name \"asv.*.json\" | head -n 1)\n if [[ -n \"$match\" ]]; then\n cd \"$(dirname \"$match\")\" || echo \"Failed to change directory\"\n else\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n return 1\n fi\n}\n\n# Return just the conf filename (e.g., asv.conf.json)\nasv_conf_name() {\n local f\n f=$(find . -type f -name \"asv.*.json\" | head -n 1)\n [[ -n \"$f\" ]] && basename \"$f\" || return 1\n}\n\n# Build performance knobs (overridable)\nexport MAKEFLAGS=\"${MAKEFLAGS:--j$(nproc)}\"\nexport CMAKE_BUILD_PARALLEL_LEVEL=\"${CMAKE_BUILD_PARALLEL_LEVEL:-$(nproc)}\"\nexport NPY_NUM_BUILD_JOBS=\"${NPY_NUM_BUILD_JOBS:-$(nproc)}\"\n\n# Shared pip cache to speed repeated editable builds\nexport PIP_CACHE_DIR=\"${PIP_CACHE_DIR:-/opt/pipcache}\"\nmkdir -p \"$PIP_CACHE_DIR\"\nEOF\n}\n\n# -------- Persisted build variables --------\nwrite_build_vars() {\n local py_versions=\"$1\"\n local import_name=\"$2\"\n\n mkdir -p /etc/asv_env\n echo \"$py_versions\" > /etc/asv_env/py_versions\n echo \"$import_name\" > /etc/asv_env/import_name\n\n # Exported for every future shell (pkg script, interactive, etc.)\n cat >/etc/profile.d/asv_build_vars.sh </usr/local/bin/asv_smokecheck.py <<'PY'\n#!/usr/bin/env python\nimport argparse, importlib, pathlib, sys\n\ndef import_and_version(name: str):\n m = importlib.import_module(name)\n ver = getattr(m, \"__version__\", \"unknown\")\n print(f\"{name} imported ok; __version__={ver}\")\n\ndef probe_compiled(name: str, max_ext: int = 10):\n m = importlib.import_module(name)\n if not hasattr(m, \"__path__\"):\n print(\"No package __path__ (likely a single-module dist); skipping compiled probe.\")\n return\n pkg_path = pathlib.Path(list(m.__path__)[0])\n so_like = list(pkg_path.rglob(\"*.so\")) + list(pkg_path.rglob(\"*.pyd\"))\n failed = []\n for ext in so_like[:max_ext]:\n rel = ext.relative_to(pkg_path).with_suffix(\"\")\n dotted = \".\".join([name] + list(rel.parts))\n try:\n importlib.import_module(dotted)\n except Exception as e:\n failed.append((dotted, str(e)))\n if failed:\n print(\"Some compiled submodules failed to import:\")\n for d, err in failed:\n print(\" -\", d, \"->\", err)\n sys.exit(1)\n else:\n print(\"Compiled submodules (if any) import ok\")\n\ndef main():\n p = argparse.ArgumentParser()\n p.add_argument(\"--import-name\", required=True)\n p.add_argument(\"--repo-root\", default=\".\")\n p.add_argument(\"--pytest-smoke\", action=\"store_true\",\n help=\"Run a quick pytest smoke: -k 'not slow' --maxfail=1\")\n p.add_argument(\"--max-ext\", type=int, default=10)\n args = p.parse_args()\n\n import_and_version(args.import_name)\n probe_compiled(args.import_name, max_ext=args.max_ext)\n\n if args.pytest_smoke:\n import subprocess, os\n if any((pathlib.Path(args.repo_root)/p).exists() for p in (\"tests\", \"pytest.ini\", \"pyproject.toml\")):\n print(\"Running pytest smoke...\")\n rc = subprocess.call([sys.executable, \"-m\", \"pytest\", \"-q\", \"-k\", \"not slow\", \"--maxfail=1\"], cwd=args.repo_root)\n if rc != 0:\n sys.exit(rc)\n else:\n print(\"No tests detected; skipping pytest smoke.\")\n print(\"Smokecheck OK \u2705\")\n\nif __name__ == \"__main__\":\n main()\nPY\n chmod +x /usr/local/bin/asv_smokecheck.py\n}\ninstall_smokecheck\n\n# -------- Install an import-name detector CLI --------\ninstall_detect_import_name() {\n cat >/usr/local/bin/detect_import_name <<'PY'\n#!/usr/bin/env python\nimport argparse, pathlib, re, sys, subprocess, configparser, json\n\n# --- optional TOML loader (py3.11+: tomllib; else tomli if available) ---\ntry:\n import tomllib as toml\nexcept Exception:\n try:\n import tomli as toml\n except Exception:\n toml = None\n\nEXCEPTIONS = {\n # common dist\u2192import mismatches\n \"scikit-learn\": \"sklearn\",\n \"opencv-python\": \"cv2\",\n \"pyyaml\": \"yaml\",\n \"beautifulsoup4\": \"bs4\",\n \"pillow\": \"PIL\",\n \"mysqlclient\": \"MySQLdb\",\n \"psycopg2-binary\": \"psycopg2\",\n \"opencv-contrib-python\": \"cv2\",\n \"protobuf\": \"google\", # top-level package\n \"apache-beam\": \"apache_beam\",\n}\n\n# All the package names we typically query.\nEXCEPTIONS.update({\n # --- core scientific stack ---\n \"scikit-learn\": \"sklearn\",\n \"numpy\": \"numpy\",\n \"pandas\": \"pandas\",\n \"scipy\": \"scipy\",\n \"scikit-image\": \"skimage\",\n \"pywt\": \"pywt\",\n \"xarray\": \"xarray\",\n \"bottleneck\": \"bottleneck\",\n \"h5py\": \"h5py\",\n \"networkx\": \"networkx\",\n \"shapely\": \"shapely\",\n\n # --- ML / stats / optimization / viz ---\n \"optuna\": \"optuna\",\n \"arviz\": \"arviz\",\n \"pymc\": \"pymc\",\n \"kedro\": \"kedro\",\n \"modin\": \"modin\",\n \"napari\": \"napari\",\n \"deepchecks\": \"deepchecks\",\n \"voyager\": \"voyager\", # spotify/voyager\n \"warp\": \"warp\", # NVIDIA/warp\n \"newton\": \"newton\", # newton-physics/newton\n\n # --- domain / ecosystem libs ---\n \"geopandas\": \"geopandas\",\n \"cartopy\": \"cartopy\",\n \"iris\": \"iris\",\n \"anndata\": \"anndata\",\n \"scanpy\": \"scanpy\",\n \"sunpy\": \"sunpy\",\n \"pvlib-python\": \"pvlib\",\n \"PyBaMM\": \"pybamm\",\n \"momepy\": \"momepy\",\n \"satpy\": \"satpy\",\n \"pydicom\": \"pydicom\",\n \"pynetdicom\": \"pynetdicom\",\n\n # --- file formats / IO / infra ---\n \"asdf\": \"asdf\",\n \"arrow\": \"pyarrow\", # apache/arrow\n \"ArcticDB\": \"arcticdb\",\n \"arctic\": \"arctic\",\n\n # --- web / frameworks / utils ---\n \"django-components\": \"django_components\",\n \"h11\": \"h11\",\n \"tqdm\": \"tqdm\",\n \"rich\": \"rich\",\n \"posthog\": \"posthog\",\n \"datalad\": \"datalad\",\n \"ipyparallel\": \"ipyparallel\",\n\n # --- numerical / symbolic / control ---\n \"autograd\": \"autograd\",\n \"python-control\": \"control\",\n \"loopy\": \"loopy\",\n \"thermo\": \"thermo\",\n \"chempy\": \"chempy\",\n \"adaptive\": \"adaptive\",\n\n # --- scientific image / signal ---\n \"metric-learn\": \"metric_learn\",\n\n # --- quantum / physics ---\n \"Cirq\": \"cirq\",\n \"memray\": \"memray\",\n \"devito\": \"devito\",\n\n # --- bio / chem / data ---\n \"sourmash\": \"sourmash\",\n \"dipy\": \"dipy\",\n\n # --- protocol buffers / codegen / outlines ---\n \"python-betterproto\": \"betterproto\",\n \"outlines\": \"outlines\",\n\n # --- DS viz / raster ---\n \"datashader\": \"datashader\",\n \"xarray-spatial\": \"xarray_spatial\",\n\n # --- misc ---\n \"enlighten\": \"enlighten\",\n \"xorbits\": \"xorbits\",\n \"geopandas\": \"geopandas\",\n \"lmfit-py\": \"lmfit\",\n \"mdanalysis\": \"MDAnalysis\",\n \"nilearn\": \"nilearn\",\n})\n\n\nEXCLUDE_DIRS = {\n \".git\", \".hg\", \".svn\", \".tox\", \".nox\", \".venv\", \"venv\",\n \"build\", \"dist\", \"__pycache__\", \".mypy_cache\", \".pytest_cache\",\n \"docs\", \"doc\", \"site\", \"examples\", \"benchmarks\", \"tests\", \"testing\",\n}\n\ndef _norm(s: str) -> str:\n return re.sub(r\"[-_.]+\", \"\", s).lower()\n\ndef read_pyproject(root: pathlib.Path):\n cfg = {}\n p = root / \"pyproject.toml\"\n if toml and p.exists():\n try:\n cfg = toml.loads(p.read_text(encoding=\"utf-8\"))\n except Exception:\n pass\n return cfg\n\ndef read_setup_cfg(root: pathlib.Path):\n p = root / \"setup.cfg\"\n cp = configparser.ConfigParser()\n if p.exists():\n try:\n cp.read(p, encoding=\"utf-8\")\n except Exception:\n pass\n return cp\n\ndef dist_name_from_config(pyproject, setup_cfg):\n # PEP 621 name\n name = (pyproject.get(\"project\", {}) or {}).get(\"name\")\n if not name:\n # setup.cfg [metadata] name\n if setup_cfg.has_section(\"metadata\"):\n name = setup_cfg.get(\"metadata\", \"name\", fallback=None)\n # setup.py --name as last resort\n return name\n\ndef package_roots_from_config(root, pyproject, setup_cfg):\n roots = set([root])\n # setuptools package-dir mapping\n # pyproject: [tool.setuptools.package-dir] \"\" = \"src\"\n pkgdir = ((pyproject.get(\"tool\", {}) or {}).get(\"setuptools\", {}) or {}).get(\"package-dir\", {})\n if isinstance(pkgdir, dict):\n if \"\" in pkgdir:\n roots.add((root / pkgdir[\"\"]).resolve())\n for _, d in pkgdir.items():\n try:\n roots.add((root / d).resolve())\n except Exception:\n pass\n # setup.cfg [options] package_dir\n if setup_cfg.has_section(\"options\"):\n raw = setup_cfg.get(\"options\", \"package_dir\", fallback=None)\n if raw:\n # can be \"=\\nsrc\" or mapping lines\n lines = [l.strip() for l in raw.splitlines() if l.strip()]\n # accept simple \"=src\" or \"\" = \"src\"\n for ln in lines:\n m = re.match(r'^(\"?\\'?)*\\s*=?\\s*(\"?\\'?)*\\s*(?P[^#;]+)$', ln)\n if m:\n roots.add((root / m.group(\"path\").strip()).resolve())\n # setup.cfg [options.packages.find] where\n if setup_cfg.has_section(\"options.packages.find\"):\n where = setup_cfg.get(\"options.packages.find\", \"where\", fallback=None)\n if where:\n for w in re.split(r\"[,\\s]+\", where):\n if w:\n roots.add((root / w).resolve())\n return [r for r in roots if r.exists()]\n\ndef explicit_modules_from_config(pyproject, setup_cfg):\n mods = set()\n # pyproject (tool.setuptools) py-modules / packages\n st = ((pyproject.get(\"tool\", {}) or {}).get(\"setuptools\", {}) or {})\n for key in (\"py-modules\", \"packages\"):\n val = st.get(key)\n if isinstance(val, list):\n mods.update(val)\n # setup.cfg [options] py_modules / packages\n if setup_cfg.has_section(\"options\"):\n for key in (\"py_modules\", \"packages\"):\n raw = setup_cfg.get(\"options\", key, fallback=None)\n if raw:\n for tok in re.split(r\"[\\s,]+\", raw.strip()):\n if tok and tok != \"find:\":\n mods.add(tok)\n return sorted(mods)\n\ndef read_top_level_from_egg_info(root):\n # editable installs often leave ./.egg-info/top_level.txt\n for ei in root.rglob(\"*.egg-info\"):\n tl = ei / \"top_level.txt\"\n if tl.exists():\n try:\n names = [l.strip() for l in tl.read_text(encoding=\"utf-8\").splitlines() if l.strip()]\n if names:\n return names\n except Exception:\n pass\n # also consider dist-info during local builds\n for di in root.rglob(\"*.dist-info\"):\n tl = di / \"top_level.txt\"\n if tl.exists():\n try:\n names = [l.strip() for l in tl.read_text(encoding=\"utf-8\").splitlines() if l.strip()]\n if names:\n return names\n except Exception:\n pass\n return None\n\ndef walk_candidates(roots):\n \"\"\"Return set of plausible top-level import names under candidate roots.\"\"\"\n cands = set()\n for r in roots:\n for path in r.rglob(\"__init__.py\"):\n try:\n pkg_dir = path.parent\n # skip excluded dirs anywhere in the path\n if any(part in EXCLUDE_DIRS for part in pkg_dir.parts):\n continue\n # Construct package name relative to the nearest search root\n try:\n rel = pkg_dir.relative_to(r)\n except Exception:\n continue\n if not rel.parts:\n continue\n top = rel.parts[0]\n if top.startswith(\"_\"):\n # usually private tooling\n continue\n cands.add(top)\n except Exception:\n pass\n # standalone modules at top-level of roots (py_modules case)\n for mod in r.glob(\"*.py\"):\n if mod.stem not in (\"setup\",):\n cands.add(mod.stem)\n return sorted(cands)\n\ndef score_candidates(cands, dist_name):\n \"\"\"Assign a score preferring names that match the dist name.\"\"\"\n scores = {}\n n_dist = _norm(dist_name) if dist_name else None\n prefer = None\n if dist_name and dist_name.lower() in EXCEPTIONS:\n prefer = EXCEPTIONS[dist_name.lower()]\n # also try normalized exception keys (e.g. capitalization)\n for k, v in EXCEPTIONS.items():\n if _norm(k) == _norm(dist_name or \"\"):\n prefer = v\n\n for c in cands:\n s = 0\n if prefer and _norm(c) == _norm(prefer):\n s += 100\n if n_dist and _norm(c) == n_dist:\n s += 80\n if n_dist and (_norm(c).startswith(n_dist) or n_dist.startswith(_norm(c))):\n s += 20\n # shorter, simpler names get a slight bump\n s += max(0, 10 - len(c))\n scores[c] = s\n return sorted(cands, key=lambda x: (-scores.get(x, 0), x)), scores\n\ndef detect(root: str, return_all=False):\n root = pathlib.Path(root).resolve()\n\n pyproject = read_pyproject(root)\n setup_cfg = read_setup_cfg(root)\n dist_name = dist_name_from_config(pyproject, setup_cfg)\n\n # 1) top_level.txt (best signal if present)\n top = read_top_level_from_egg_info(root)\n if top:\n if return_all:\n return top\n # If multiple, score them\n ordered, _ = score_candidates(top, dist_name or \"\")\n return [ordered[0]]\n\n # 2) explicit declarations (py_modules / packages lists)\n explicit = explicit_modules_from_config(pyproject, setup_cfg)\n\n # 3) find correct search roots (src layout, package_dir, etc.)\n roots = package_roots_from_config(root, pyproject, setup_cfg)\n\n # 4) walk code to infer candidates\n walked = walk_candidates(roots)\n\n # merge explicit + walked\n cands = list(dict.fromkeys(explicit + walked)) # keep order & de-dup\n\n # 5) fallback from dist name heuristics/exceptions if still empty\n if not cands and dist_name:\n # exception or simple normalization\n guess = EXCEPTIONS.get(dist_name.lower()) or re.sub(r\"[-\\.]+\", \"_\", dist_name)\n cands = [guess]\n\n if not cands:\n return []\n\n if return_all:\n # return ordered list\n ordered, _ = score_candidates(cands, dist_name or \"\")\n return ordered\n else:\n ordered, _ = score_candidates(cands, dist_name or \"\")\n return [ordered[0]]\n\ndef main():\n ap = argparse.ArgumentParser(description=\"Detect the top-level Python import name for a repo.\")\n ap.add_argument(\"--repo-root\", default=\".\", help=\"Path to repository root\")\n ap.add_argument(\"--all\", action=\"store_true\", help=\"Print all plausible names (JSON list)\")\n args = ap.parse_args()\n\n names = detect(args.repo_root, return_all=args.all)\n if not names:\n sys.exit(1)\n if args.all:\n print(json.dumps(names))\n else:\n print(names[0])\n\nif __name__ == \"__main__\":\n main()\nPY\n chmod +x /usr/local/bin/detect_import_name\n}\n\ninstall_detect_import_name\n\n# -------- Script body --------\n\ninstall_profile_helpers\n# shellcheck disable=SC1091\nsource /etc/profile.d/asv_utils.sh\n\n# Ensure base micromamba is active for introspecting ASV config\nmicromamba activate base\n\ninstall_detect_import_name\ninstall_smokecheck\n\nIMPORT_NAME=\"$(detect_import_name || true)\"\nif [[ -z \"$IMPORT_NAME\" ]]; then\n echo \"WARN: Could not determine import name; the pkg stage will fall back to local detection.\"\nfi\n\n\n# Move into the directory that contains asv.*.json\ncd_asv_json_dir || { echo \"No 'asv.*.json' file found.\" >&2; exit 1; }\n\nCONF_NAME=\"$(asv_conf_name || true)\"\nif [[ -z \"${CONF_NAME:-}\" ]]; then\n echo \"No 'asv.*.json' file found.\" >&2\n exit 1\nfi\n\n# Make sure tomli is available in base for pyproject parsing\nmicromamba install -y -n base -c conda-forge tomli >/dev/null\n\n# Read python versions from the ASV config\nPY_VERSIONS=$(python - <&2\n exit 1\nfi\n###### END SETUP CODE ######\n\n# -----------------------------\n# Agent guidance (read-first)\n# -----------------------------\n# GOAL: For each Python version below, install the project in EDITABLE mode into env asv_{version},\n# with NO build isolation, then run health checks.\n#\n# Below this comment, you should do whatever is necessary to build the project without errors. Including (but not limited to):\n# - Add extra conda/pip dependencies needed to build this project.\n# - Run repo-specific pre-steps (e.g., submodules, generating Cython, env vars).\n# - Run arbitrary micromamba/pip commands in the target env.\n# - Set CFLAGS/CXXFLAGS/LDFLAGS if needed for this repo.\n# - Change files in the repo if needed (e.g., fix a missing #include).\n# - Anything else needed to get a successful editable install.\n#\n# MUST:\n# - Keep this script idempotent.\n# - Use: `pip install --no-build-isolation -v -e .` or `pip install -e .` or equivalent.\n# - Do not modify the SETUP CODE or helper functions below.\n#\n# DO NOT:\n# - Change env names or Python versions outside MODEL EDIT AREA.\n# - Use build isolation unless absolutely necessary.\n\n# -----------------------------\n# Helpers (do not modify)\n# -----------------------------\nlog() { printf \"\u001b[1;34m[build]\u001b[0m %s\n\" \"$*\"; }\nwarn() { printf \"\u001b[1;33m[warn]\u001b[0m %s\n\" \"$*\" >&2; }\ndie() { printf \"\u001b[1;31m[fail]\u001b[0m %s\n\" \"$*\" >&2; exit 1; }\n\n# Conservative default parallelism (override if the repo benefits)\nexport CMAKE_BUILD_PARALLEL_LEVEL=\"${CMAKE_BUILD_PARALLEL_LEVEL:-2}\"\nexport NPY_NUM_BUILD_JOBS=\"${NPY_NUM_BUILD_JOBS:-2}\"\n\n# -----------------------------\n# Build & test across envs\n# -----------------------------\nfor version in $TARGET_VERSIONS; do\n ENV_NAME=\"asv_${version}\"\n log \"==> Building in env: $ENV_NAME (python=$version)\"\n\n if ! micromamba env list | awk '{print $1}' | grep -qx \"$ENV_NAME\"; then\n die \"Env $ENV_NAME not found. Did docker_build_env.sh run?\"\n fi\n\n # Import name resolution (kept simple for the agent)\n IMP=\"${IMPORT_NAME:-}\"\n if [[ -z \"$IMP\" ]]; then\n if ! IMP=\"$(asv_detect_import_name --repo-root \"$REPO_ROOT\" 2>/dev/null)\"; then\n die \"Could not determine import name. Set IMPORT_NAME in /etc/profile.d/asv_build_vars.sh\"\n fi\n fi\n log \"Using import name: $IMP\"\n\n # -----------------------------\n # MODEL EDIT AREA: repo-specific tweaks (optional)\n # -----------------------------\n # Examples (uncomment if needed for this repo):\n #\n # log \"Updating submodules\"\n # git -C \"$REPO_ROOT\" submodule update --init --recursive\n #\n # log \"Installing extra system libs via conda-forge\"\n # micromamba install -y -n \"$ENV_NAME\" -c conda-forge 'openblas' 'blas=*=openblas' 'libopenmp'\n #\n # log \"Pre-generating Cython sources\"\n # micromamba run -n \"$ENV_NAME\" python -m cython --version\n #\n # export CFLAGS=\"${CFLAGS:-}\"\n # export CXXFLAGS=\"${CXXFLAGS:-}\"\n # export LDFLAGS=\"${LDFLAGS:-}\"\n # -----------------------------\n\n # Install some basic micromamba packages.\n\n micromamba install -y -n \"$ENV_NAME\" -c conda-forge git conda mamba \"libmambapy<=1.9.9\" numpy scipy cython joblib threadpoolctl pytest compilers\n micromamba run -n \"$ENV_NAME\" pip install -U meson-python \"cython<3\" \"numpy<2\" \"setuptools==60\" \"scipy<1.14\"\n export CFLAGS=\"${CFLAGS:-} -Wno-error=incompatible-pointer-types\"\n\n # Editable install (no build isolation preferrably). Toolchain lives in the env already.\n log \"Editable install with --no-build-isolation\"\n PIP_NO_BUILD_ISOLATION=1 micromamba run -n \"$ENV_NAME\" python -m pip install --no-build-isolation -v -e \"$REPO_ROOT\"\n\n # Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1)\n log \"Running smoke checks\"\n micromamba run -n \"$ENV_NAME\" asv_smokecheck.py --import-name \"$IMP\" --repo-root \"$REPO_ROOT\" ${RUN_PYTEST_SMOKE:+--pytest-smoke}\n\n # Machine-readable markers (useful in logs)\n echo \"::import_name=${IMP}::env=${ENV_NAME}\"\ndone\n\nlog \"All builds complete \u2705\"", - "dockerfile_data": "# syntax=docker/dockerfile:1.7\n\nFROM buildpack-deps:jammy AS base\n\nARG REPO_URL\nARG COMMIT_SHA\n\nRUN apt-get update && \\\n apt-get install -y --no-install-recommends \\\n curl git build-essential jq cmake ninja-build && \\\n rm -rf /var/lib/apt/lists/*\n\nRUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest \\\n | tar -xvj -C /usr/local/bin --strip-components=1 bin/micromamba\n\nENV MAMBA_ROOT_PREFIX=/opt/conda \\\n PATH=/opt/conda/bin:$PATH \\\n MAMBA_DOCKERFILE_ACTIVATE=1 \\\n OPENBLAS_NUM_THREADS=1 \\\n MKL_NUM_THREADS=1 \\\n OMP_NUM_THREADS=1\n\nRUN micromamba install -y -p $MAMBA_ROOT_PREFIX -c conda-forge \\\n python=3.10 \\\n git asv pyperf mamba conda libmambapy jq && \\\n micromamba clean --all --yes\n\nRUN mkdir -p /workspace /output\nWORKDIR /workspace\n\nFROM base AS env\nARG REPO_URL\nARG COMMIT_SHA\n\n# Entrypoint is inherited by pkg\nCOPY entrypoint.sh /entrypoint.sh\nRUN chmod +x /entrypoint.sh\nENTRYPOINT [\"/entrypoint.sh\"]\n\nRUN git clone \"$REPO_URL\" /workspace/repo\nWORKDIR /workspace/repo\nRUN git checkout \"$COMMIT_SHA\"\nLABEL vcs.url=\"$REPO_URL\" vcs.ref=\"$COMMIT_SHA\"\n\n# ---- ENV STAGE: create envs, persist vars, install smoke tool ----\nCOPY docker_build_env.sh /workspace/repo/docker_build_env.sh\nRUN chmod +x /workspace/repo/docker_build_env.sh && \\\n /workspace/repo/docker_build_env.sh\n\n\nFROM env AS pkg\n\n# ---- PKG STAGE: build+test the package for each ASV Python ----\nCOPY docker_build_pkg.sh /workspace/repo/docker_build_pkg.sh\nRUN chmod +x /workspace/repo/docker_build_pkg.sh && \\\n /workspace/repo/docker_build_pkg.sh\n# If you want to restrict to one version at build time, replace with:\n# RUN PY_VERSION=3.10 /workspace/repo/docker_build_pkg.sh\n", - "entrypoint_data": "#!/usr/bin/env bash\n# set -euo pipefail\nset -x\n: \"${ASV_ARGS:?Need to set ASV_ARGS}\"\n: \"${ASV_MACHINE:=?Need to set ASV_MACHINE}\"\n: \"${ASV_OS:=?Need to set ASV_OS}\"\n: \"${ASV_NUM_CPU:=?Need to set ASV_NUM_CPU}\"\n: \"${ASV_ARCH:=?Need to set ASV_ARCH}\"\n: \"${ASV_CPU:=?Need to set ASV_CPU}\"\n: \"${ASV_RAM:=?Need to set ASV_RAM}\"\n\n\ncd_asv_json_dir() {\n local match\n match=$(find . -type f -name \"asv.*.json\" | head -n 1)\n\n if [[ -n \"$match\" ]]; then\n local dir\n dir=$(dirname \"$match\")\n cd \"$dir\" || echo \"Failed to change directory to $dir\"\n else\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n fi\n}\n\neval \"$(micromamba shell hook --shell=bash)\"\n\nmicromamba activate base\nROOT_PATH=${PWD}\ncd_asv_json_dir || exit 1\n\n# the conf name is one of \"asv.conf.json\" or \"asv.ci.conf.json\" or \"asv.*.json\"\nCONF_NAME=$(basename \"$(find . -type f -name \"asv.*.json\" | head -n 1)\")\nif [[ -z \"$CONF_NAME\" ]]; then\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n exit 1\nfi\n\n# Read the python versions from the asv.conf.json\npython_versions=$(python -c \"import asv; pythons = asv.config.Config.load('$CONF_NAME').pythons; print(' '.join(pythons))\")\n# change the \"results_dir\" in asv.conf.json to \"/output/{COMMIT_SHA}/\"\nfor version in $python_versions; do\n # Create per\u2011Python env and install ASV\n python -c \"import asv, os, pathlib\npath = pathlib.Path('/output/'\\\"$COMMIT_SHA\\\"'/''\\\"$version\\\"')\npath.mkdir(parents=True, exist_ok=True)\n\nconfig = asv.config.Config.load('$CONF_NAME')\nconfig.results_dir = str(path / 'results')\nconfig.html_dir = str(path / 'html')\nconfig.branches = ['HEAD']\n\nasv.util.write_json('$CONF_NAME', config.__dict__, api_version=config.api_version)\nasv.util.write_json(path / '$CONF_NAME', config.__dict__, api_version=config.api_version)\n\"\n micromamba run -n \"asv_${version}\" asv machine --yes --config $CONF_NAME --machine \"$ASV_MACHINE\" --os \"$ASV_OS\" --num_cpu \"$ASV_NUM_CPU\" --arch \"$ASV_ARCH\" --cpu \"$ASV_CPU\" --ram \"$ASV_RAM\"\n micromamba run -n \"asv_${version}\" asv run --show-stderr ${ASV_ARGS} --config $CONF_NAME\ndone\n\necho \"Benchmarks complete.\"\n", - "env_building_data": "#!/usr/bin/env bash\nset -euo pipefail\n\n# -------- Helpers installed for all shells --------\ninstall_profile_helpers() {\n cat >/etc/profile.d/asv_utils.sh <<'EOF'\n# asv_utils.sh \u2014 login/interactive shell helpers for ASV builds\nexport MAMBA_ROOT_PREFIX=\"${MAMBA_ROOT_PREFIX:-/opt/conda}\"\n\n# Initialize micromamba for bash shells (no-op if not present)\nif command -v micromamba >/dev/null 2>&1; then\n eval \"$(micromamba shell hook --shell=bash)\"\nfi\n\n# Find and cd into the first directory that contains an asv.*.json\ncd_asv_json_dir() {\n local match\n match=$(find . -type f -name \"asv.*.json\" | head -n 1)\n if [[ -n \"$match\" ]]; then\n cd \"$(dirname \"$match\")\" || echo \"Failed to change directory\"\n else\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n return 1\n fi\n}\n\n# Return just the conf filename (e.g., asv.conf.json)\nasv_conf_name() {\n local f\n f=$(find . -type f -name \"asv.*.json\" | head -n 1)\n [[ -n \"$f\" ]] && basename \"$f\" || return 1\n}\n\n# Build performance knobs (overridable)\nexport MAKEFLAGS=\"${MAKEFLAGS:--j$(nproc)}\"\nexport CMAKE_BUILD_PARALLEL_LEVEL=\"${CMAKE_BUILD_PARALLEL_LEVEL:-$(nproc)}\"\nexport NPY_NUM_BUILD_JOBS=\"${NPY_NUM_BUILD_JOBS:-$(nproc)}\"\n\n# Shared pip cache to speed repeated editable builds\nexport PIP_CACHE_DIR=\"${PIP_CACHE_DIR:-/opt/pipcache}\"\nmkdir -p \"$PIP_CACHE_DIR\"\nEOF\n}\n\n# -------- Persisted build variables --------\nwrite_build_vars() {\n local py_versions=\"$1\"\n local import_name=\"$2\"\n\n mkdir -p /etc/asv_env\n echo \"$py_versions\" > /etc/asv_env/py_versions\n echo \"$import_name\" > /etc/asv_env/import_name\n\n # Exported for every future shell (pkg script, interactive, etc.)\n cat >/etc/profile.d/asv_build_vars.sh </usr/local/bin/asv_smokecheck.py <<'PY'\n#!/usr/bin/env python\nimport argparse, importlib, pathlib, sys\n\ndef import_and_version(name: str):\n m = importlib.import_module(name)\n ver = getattr(m, \"__version__\", \"unknown\")\n print(f\"{name} imported ok; __version__={ver}\")\n\ndef probe_compiled(name: str, max_ext: int = 10):\n m = importlib.import_module(name)\n if not hasattr(m, \"__path__\"):\n print(\"No package __path__ (likely a single-module dist); skipping compiled probe.\")\n return\n pkg_path = pathlib.Path(list(m.__path__)[0])\n so_like = list(pkg_path.rglob(\"*.so\")) + list(pkg_path.rglob(\"*.pyd\"))\n failed = []\n for ext in so_like[:max_ext]:\n rel = ext.relative_to(pkg_path).with_suffix(\"\")\n dotted = \".\".join([name] + list(rel.parts))\n try:\n importlib.import_module(dotted)\n except Exception as e:\n failed.append((dotted, str(e)))\n if failed:\n print(\"Some compiled submodules failed to import:\")\n for d, err in failed:\n print(\" -\", d, \"->\", err)\n sys.exit(1)\n else:\n print(\"Compiled submodules (if any) import ok\")\n\ndef main():\n p = argparse.ArgumentParser()\n p.add_argument(\"--import-name\", required=True)\n p.add_argument(\"--repo-root\", default=\".\")\n p.add_argument(\"--pytest-smoke\", action=\"store_true\",\n help=\"Run a quick pytest smoke: -k 'not slow' --maxfail=1\")\n p.add_argument(\"--max-ext\", type=int, default=10)\n args = p.parse_args()\n\n import_and_version(args.import_name)\n probe_compiled(args.import_name, max_ext=args.max_ext)\n\n if args.pytest_smoke:\n import subprocess, os\n if any((pathlib.Path(args.repo_root)/p).exists() for p in (\"tests\", \"pytest.ini\", \"pyproject.toml\")):\n print(\"Running pytest smoke...\")\n rc = subprocess.call([sys.executable, \"-m\", \"pytest\", \"-q\", \"-k\", \"not slow\", \"--maxfail=1\"], cwd=args.repo_root)\n if rc != 0:\n sys.exit(rc)\n else:\n print(\"No tests detected; skipping pytest smoke.\")\n print(\"Smokecheck OK \u2705\")\n\nif __name__ == \"__main__\":\n main()\nPY\n chmod +x /usr/local/bin/asv_smokecheck.py\n}\ninstall_smokecheck\n\n# -------- Install an import-name detector CLI --------\ninstall_detect_import_name() {\n cat >/usr/local/bin/detect_import_name <<'PY'\n#!/usr/bin/env python\nimport argparse, pathlib, re, sys, subprocess, configparser, json\n\n# --- optional TOML loader (py3.11+: tomllib; else tomli if available) ---\ntry:\n import tomllib as toml\nexcept Exception:\n try:\n import tomli as toml\n except Exception:\n toml = None\n\nEXCEPTIONS = {\n # common dist\u2192import mismatches\n \"scikit-learn\": \"sklearn\",\n \"opencv-python\": \"cv2\",\n \"pyyaml\": \"yaml\",\n \"beautifulsoup4\": \"bs4\",\n \"pillow\": \"PIL\",\n \"mysqlclient\": \"MySQLdb\",\n \"psycopg2-binary\": \"psycopg2\",\n \"opencv-contrib-python\": \"cv2\",\n \"protobuf\": \"google\", # top-level package\n \"apache-beam\": \"apache_beam\",\n}\n\n# All the package names we typically query.\nEXCEPTIONS.update({\n # --- core scientific stack ---\n \"scikit-learn\": \"sklearn\",\n \"numpy\": \"numpy\",\n \"pandas\": \"pandas\",\n \"scipy\": \"scipy\",\n \"scikit-image\": \"skimage\",\n \"pywt\": \"pywt\",\n \"xarray\": \"xarray\",\n \"bottleneck\": \"bottleneck\",\n \"h5py\": \"h5py\",\n \"networkx\": \"networkx\",\n \"shapely\": \"shapely\",\n\n # --- ML / stats / optimization / viz ---\n \"optuna\": \"optuna\",\n \"arviz\": \"arviz\",\n \"pymc\": \"pymc\",\n \"kedro\": \"kedro\",\n \"modin\": \"modin\",\n \"napari\": \"napari\",\n \"deepchecks\": \"deepchecks\",\n \"voyager\": \"voyager\", # spotify/voyager\n \"warp\": \"warp\", # NVIDIA/warp\n \"newton\": \"newton\", # newton-physics/newton\n\n # --- domain / ecosystem libs ---\n \"geopandas\": \"geopandas\",\n \"cartopy\": \"cartopy\",\n \"iris\": \"iris\",\n \"anndata\": \"anndata\",\n \"scanpy\": \"scanpy\",\n \"sunpy\": \"sunpy\",\n \"pvlib-python\": \"pvlib\",\n \"PyBaMM\": \"pybamm\",\n \"momepy\": \"momepy\",\n \"satpy\": \"satpy\",\n \"pydicom\": \"pydicom\",\n \"pynetdicom\": \"pynetdicom\",\n\n # --- file formats / IO / infra ---\n \"asdf\": \"asdf\",\n \"arrow\": \"pyarrow\", # apache/arrow\n \"ArcticDB\": \"arcticdb\",\n \"arctic\": \"arctic\",\n\n # --- web / frameworks / utils ---\n \"django-components\": \"django_components\",\n \"h11\": \"h11\",\n \"tqdm\": \"tqdm\",\n \"rich\": \"rich\",\n \"posthog\": \"posthog\",\n \"datalad\": \"datalad\",\n \"ipyparallel\": \"ipyparallel\",\n\n # --- numerical / symbolic / control ---\n \"autograd\": \"autograd\",\n \"python-control\": \"control\",\n \"loopy\": \"loopy\",\n \"thermo\": \"thermo\",\n \"chempy\": \"chempy\",\n \"adaptive\": \"adaptive\",\n\n # --- scientific image / signal ---\n \"metric-learn\": \"metric_learn\",\n\n # --- quantum / physics ---\n \"Cirq\": \"cirq\",\n \"memray\": \"memray\",\n \"devito\": \"devito\",\n\n # --- bio / chem / data ---\n \"sourmash\": \"sourmash\",\n \"dipy\": \"dipy\",\n\n # --- protocol buffers / codegen / outlines ---\n \"python-betterproto\": \"betterproto\",\n \"outlines\": \"outlines\",\n\n # --- DS viz / raster ---\n \"datashader\": \"datashader\",\n \"xarray-spatial\": \"xarray_spatial\",\n\n # --- misc ---\n \"enlighten\": \"enlighten\",\n \"xorbits\": \"xorbits\",\n \"geopandas\": \"geopandas\",\n \"lmfit-py\": \"lmfit\",\n \"mdanalysis\": \"MDAnalysis\",\n \"nilearn\": \"nilearn\",\n})\n\n\nEXCLUDE_DIRS = {\n \".git\", \".hg\", \".svn\", \".tox\", \".nox\", \".venv\", \"venv\",\n \"build\", \"dist\", \"__pycache__\", \".mypy_cache\", \".pytest_cache\",\n \"docs\", \"doc\", \"site\", \"examples\", \"benchmarks\", \"tests\", \"testing\",\n}\n\ndef _norm(s: str) -> str:\n return re.sub(r\"[-_.]+\", \"\", s).lower()\n\ndef read_pyproject(root: pathlib.Path):\n cfg = {}\n p = root / \"pyproject.toml\"\n if toml and p.exists():\n try:\n cfg = toml.loads(p.read_text(encoding=\"utf-8\"))\n except Exception:\n pass\n return cfg\n\ndef read_setup_cfg(root: pathlib.Path):\n p = root / \"setup.cfg\"\n cp = configparser.ConfigParser()\n if p.exists():\n try:\n cp.read(p, encoding=\"utf-8\")\n except Exception:\n pass\n return cp\n\ndef dist_name_from_config(pyproject, setup_cfg):\n # PEP 621 name\n name = (pyproject.get(\"project\", {}) or {}).get(\"name\")\n if not name:\n # setup.cfg [metadata] name\n if setup_cfg.has_section(\"metadata\"):\n name = setup_cfg.get(\"metadata\", \"name\", fallback=None)\n # setup.py --name as last resort\n return name\n\ndef package_roots_from_config(root, pyproject, setup_cfg):\n roots = set([root])\n # setuptools package-dir mapping\n # pyproject: [tool.setuptools.package-dir] \"\" = \"src\"\n pkgdir = ((pyproject.get(\"tool\", {}) or {}).get(\"setuptools\", {}) or {}).get(\"package-dir\", {})\n if isinstance(pkgdir, dict):\n if \"\" in pkgdir:\n roots.add((root / pkgdir[\"\"]).resolve())\n for _, d in pkgdir.items():\n try:\n roots.add((root / d).resolve())\n except Exception:\n pass\n # setup.cfg [options] package_dir\n if setup_cfg.has_section(\"options\"):\n raw = setup_cfg.get(\"options\", \"package_dir\", fallback=None)\n if raw:\n # can be \"=\\nsrc\" or mapping lines\n lines = [l.strip() for l in raw.splitlines() if l.strip()]\n # accept simple \"=src\" or \"\" = \"src\"\n for ln in lines:\n m = re.match(r'^(\"?\\'?)*\\s*=?\\s*(\"?\\'?)*\\s*(?P[^#;]+)$', ln)\n if m:\n roots.add((root / m.group(\"path\").strip()).resolve())\n # setup.cfg [options.packages.find] where\n if setup_cfg.has_section(\"options.packages.find\"):\n where = setup_cfg.get(\"options.packages.find\", \"where\", fallback=None)\n if where:\n for w in re.split(r\"[,\\s]+\", where):\n if w:\n roots.add((root / w).resolve())\n return [r for r in roots if r.exists()]\n\ndef explicit_modules_from_config(pyproject, setup_cfg):\n mods = set()\n # pyproject (tool.setuptools) py-modules / packages\n st = ((pyproject.get(\"tool\", {}) or {}).get(\"setuptools\", {}) or {})\n for key in (\"py-modules\", \"packages\"):\n val = st.get(key)\n if isinstance(val, list):\n mods.update(val)\n # setup.cfg [options] py_modules / packages\n if setup_cfg.has_section(\"options\"):\n for key in (\"py_modules\", \"packages\"):\n raw = setup_cfg.get(\"options\", key, fallback=None)\n if raw:\n for tok in re.split(r\"[\\s,]+\", raw.strip()):\n if tok and tok != \"find:\":\n mods.add(tok)\n return sorted(mods)\n\ndef read_top_level_from_egg_info(root):\n # editable installs often leave ./.egg-info/top_level.txt\n for ei in root.rglob(\"*.egg-info\"):\n tl = ei / \"top_level.txt\"\n if tl.exists():\n try:\n names = [l.strip() for l in tl.read_text(encoding=\"utf-8\").splitlines() if l.strip()]\n if names:\n return names\n except Exception:\n pass\n # also consider dist-info during local builds\n for di in root.rglob(\"*.dist-info\"):\n tl = di / \"top_level.txt\"\n if tl.exists():\n try:\n names = [l.strip() for l in tl.read_text(encoding=\"utf-8\").splitlines() if l.strip()]\n if names:\n return names\n except Exception:\n pass\n return None\n\ndef walk_candidates(roots):\n \"\"\"Return set of plausible top-level import names under candidate roots.\"\"\"\n cands = set()\n for r in roots:\n for path in r.rglob(\"__init__.py\"):\n try:\n pkg_dir = path.parent\n # skip excluded dirs anywhere in the path\n if any(part in EXCLUDE_DIRS for part in pkg_dir.parts):\n continue\n # Construct package name relative to the nearest search root\n try:\n rel = pkg_dir.relative_to(r)\n except Exception:\n continue\n if not rel.parts:\n continue\n top = rel.parts[0]\n if top.startswith(\"_\"):\n # usually private tooling\n continue\n cands.add(top)\n except Exception:\n pass\n # standalone modules at top-level of roots (py_modules case)\n for mod in r.glob(\"*.py\"):\n if mod.stem not in (\"setup\",):\n cands.add(mod.stem)\n return sorted(cands)\n\ndef score_candidates(cands, dist_name):\n \"\"\"Assign a score preferring names that match the dist name.\"\"\"\n scores = {}\n n_dist = _norm(dist_name) if dist_name else None\n prefer = None\n if dist_name and dist_name.lower() in EXCEPTIONS:\n prefer = EXCEPTIONS[dist_name.lower()]\n # also try normalized exception keys (e.g. capitalization)\n for k, v in EXCEPTIONS.items():\n if _norm(k) == _norm(dist_name or \"\"):\n prefer = v\n\n for c in cands:\n s = 0\n if prefer and _norm(c) == _norm(prefer):\n s += 100\n if n_dist and _norm(c) == n_dist:\n s += 80\n if n_dist and (_norm(c).startswith(n_dist) or n_dist.startswith(_norm(c))):\n s += 20\n # shorter, simpler names get a slight bump\n s += max(0, 10 - len(c))\n scores[c] = s\n return sorted(cands, key=lambda x: (-scores.get(x, 0), x)), scores\n\ndef detect(root: str, return_all=False):\n root = pathlib.Path(root).resolve()\n\n pyproject = read_pyproject(root)\n setup_cfg = read_setup_cfg(root)\n dist_name = dist_name_from_config(pyproject, setup_cfg)\n\n # 1) top_level.txt (best signal if present)\n top = read_top_level_from_egg_info(root)\n if top:\n if return_all:\n return top\n # If multiple, score them\n ordered, _ = score_candidates(top, dist_name or \"\")\n return [ordered[0]]\n\n # 2) explicit declarations (py_modules / packages lists)\n explicit = explicit_modules_from_config(pyproject, setup_cfg)\n\n # 3) find correct search roots (src layout, package_dir, etc.)\n roots = package_roots_from_config(root, pyproject, setup_cfg)\n\n # 4) walk code to infer candidates\n walked = walk_candidates(roots)\n\n # merge explicit + walked\n cands = list(dict.fromkeys(explicit + walked)) # keep order & de-dup\n\n # 5) fallback from dist name heuristics/exceptions if still empty\n if not cands and dist_name:\n # exception or simple normalization\n guess = EXCEPTIONS.get(dist_name.lower()) or re.sub(r\"[-\\.]+\", \"_\", dist_name)\n cands = [guess]\n\n if not cands:\n return []\n\n if return_all:\n # return ordered list\n ordered, _ = score_candidates(cands, dist_name or \"\")\n return ordered\n else:\n ordered, _ = score_candidates(cands, dist_name or \"\")\n return [ordered[0]]\n\ndef main():\n ap = argparse.ArgumentParser(description=\"Detect the top-level Python import name for a repo.\")\n ap.add_argument(\"--repo-root\", default=\".\", help=\"Path to repository root\")\n ap.add_argument(\"--all\", action=\"store_true\", help=\"Print all plausible names (JSON list)\")\n args = ap.parse_args()\n\n names = detect(args.repo_root, return_all=args.all)\n if not names:\n sys.exit(1)\n if args.all:\n print(json.dumps(names))\n else:\n print(names[0])\n\nif __name__ == \"__main__\":\n main()\nPY\n chmod +x /usr/local/bin/detect_import_name\n}\n\ninstall_detect_import_name\n\n# -------- Script body --------\n\ninstall_profile_helpers\n# shellcheck disable=SC1091\nsource /etc/profile.d/asv_utils.sh\n\n# Ensure base micromamba is active for introspecting ASV config\nmicromamba activate base\n\ninstall_detect_import_name\ninstall_smokecheck\n\nIMPORT_NAME=\"$(detect_import_name || true)\"\nif [[ -z \"$IMPORT_NAME\" ]]; then\n echo \"WARN: Could not determine import name; the pkg stage will fall back to local detection.\"\nfi\n\n\n# Move into the directory that contains asv.*.json\ncd_asv_json_dir || { echo \"No 'asv.*.json' file found.\" >&2; exit 1; }\n\nCONF_NAME=\"$(asv_conf_name || true)\"\nif [[ -z \"${CONF_NAME:-}\" ]]; then\n echo \"No 'asv.*.json' file found.\" >&2\n exit 1\nfi\n\n# Make sure tomli is available in base for pyproject parsing\nmicromamba install -y -n base -c conda-forge tomli >/dev/null\n\n# Read python versions from the ASV config\nPY_VERSIONS=$(python - <&2\n exit 1\nfi\n###### END SETUP CODE ######\n\n# -----------------------------\n# Agent guidance (read-first)\n# -----------------------------\n# GOAL: For each Python version below, install the project in EDITABLE mode into env asv_{version},\n# with NO build isolation, then run health checks.\n#\n# Below this comment, you should do whatever is necessary to build the project without errors. Including (but not limited to):\n# - Add extra conda/pip dependencies needed to build this project.\n# - Run repo-specific pre-steps (e.g., submodules, generating Cython, env vars).\n# - Run arbitrary micromamba/pip commands in the target env.\n# - Set CFLAGS/CXXFLAGS/LDFLAGS if needed for this repo.\n# - Change files in the repo if needed (e.g., fix a missing #include).\n# - Anything else needed to get a successful editable install.\n#\n# MUST:\n# - Keep this script idempotent.\n# - Use: `pip install --no-build-isolation -v -e .` or `pip install -e .` or equivalent.\n# - Do not modify the SETUP CODE or helper functions below.\n#\n# DO NOT:\n# - Change env names or Python versions outside MODEL EDIT AREA.\n# - Use build isolation unless absolutely necessary.\n\n# -----------------------------\n# Helpers (do not modify)\n# -----------------------------\nlog() { printf \"\u001b[1;34m[build]\u001b[0m %s\n\" \"$*\"; }\nwarn() { printf \"\u001b[1;33m[warn]\u001b[0m %s\n\" \"$*\" >&2; }\ndie() { printf \"\u001b[1;31m[fail]\u001b[0m %s\n\" \"$*\" >&2; exit 1; }\n\n# Conservative default parallelism (override if the repo benefits)\nexport CMAKE_BUILD_PARALLEL_LEVEL=\"${CMAKE_BUILD_PARALLEL_LEVEL:-2}\"\nexport NPY_NUM_BUILD_JOBS=\"${NPY_NUM_BUILD_JOBS:-2}\"\n\n# -----------------------------\n# Build & test across envs\n# -----------------------------\nfor version in $TARGET_VERSIONS; do\n ENV_NAME=\"asv_${version}\"\n log \"==> Building in env: $ENV_NAME (python=$version)\"\n\n if ! micromamba env list | awk '{print $1}' | grep -qx \"$ENV_NAME\"; then\n die \"Env $ENV_NAME not found. Did docker_build_env.sh run?\"\n fi\n\n # Import name resolution (kept simple for the agent)\n IMP=\"${IMPORT_NAME:-}\"\n if [[ -z \"$IMP\" ]]; then\n if ! IMP=\"$(asv_detect_import_name --repo-root \"$REPO_ROOT\" 2>/dev/null)\"; then\n die \"Could not determine import name. Set IMPORT_NAME in /etc/profile.d/asv_build_vars.sh\"\n fi\n fi\n log \"Using import name: $IMP\"\n\n # -----------------------------\n # MODEL EDIT AREA: repo-specific tweaks (optional)\n # -----------------------------\n # Examples (uncomment if needed for this repo):\n #\n # log \"Updating submodules\"\n # git -C \"$REPO_ROOT\" submodule update --init --recursive\n #\n # log \"Installing extra system libs via conda-forge\"\n # micromamba install -y -n \"$ENV_NAME\" -c conda-forge 'openblas' 'blas=*=openblas' 'libopenmp'\n #\n # log \"Pre-generating Cython sources\"\n # micromamba run -n \"$ENV_NAME\" python -m cython --version\n #\n # export CFLAGS=\"${CFLAGS:-}\"\n # export CXXFLAGS=\"${CXXFLAGS:-}\"\n # export LDFLAGS=\"${LDFLAGS:-}\"\n # -----------------------------\n\n # Install some basic micromamba packages.\n\n micromamba install -y -n \"$ENV_NAME\" -c conda-forge git conda mamba \"libmambapy<=1.9.9\" numpy scipy cython joblib threadpoolctl pytest compilers\n micromamba run -n \"$ENV_NAME\" pip install meson-python cython\n\n # Editable install (no build isolation preferrably). Toolchain lives in the env already.\n log \"Editable install with --no-build-isolation\"\n PIP_NO_BUILD_ISOLATION=1 micromamba run -n \"$ENV_NAME\" python -m pip install --no-build-isolation -v -e \"$REPO_ROOT\"\n\n # Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1)\n log \"Running smoke checks\"\n micromamba run -n \"$ENV_NAME\" asv_smokecheck.py --import-name \"$IMP\" --repo-root \"$REPO_ROOT\" ${RUN_PYTEST_SMOKE:+--pytest-smoke}\n\n # Machine-readable markers (useful in logs)\n echo \"::import_name=${IMP}::env=${ENV_NAME}\"\ndone\n\nlog \"All builds complete \u2705\"", - "dockerfile_data": "# syntax=docker/dockerfile:1.7\n\nFROM buildpack-deps:jammy AS base\n\nARG REPO_URL\nARG COMMIT_SHA\n\nRUN apt-get update && \\\n apt-get install -y --no-install-recommends \\\n curl git build-essential jq cmake ninja-build && \\\n rm -rf /var/lib/apt/lists/*\n\nRUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest \\\n | tar -xvj -C /usr/local/bin --strip-components=1 bin/micromamba\n\nENV MAMBA_ROOT_PREFIX=/opt/conda \\\n PATH=/opt/conda/bin:$PATH \\\n MAMBA_DOCKERFILE_ACTIVATE=1 \\\n OPENBLAS_NUM_THREADS=1 \\\n MKL_NUM_THREADS=1 \\\n OMP_NUM_THREADS=1\n\nRUN micromamba install -y -p $MAMBA_ROOT_PREFIX -c conda-forge \\\n python=3.10 \\\n git asv pyperf mamba conda libmambapy jq && \\\n micromamba clean --all --yes\n\nRUN mkdir -p /workspace /output\nWORKDIR /workspace\n\nFROM base AS env\nARG REPO_URL\nARG COMMIT_SHA\n\n# Entrypoint is inherited by pkg\nCOPY entrypoint.sh /entrypoint.sh\nRUN chmod +x /entrypoint.sh\nENTRYPOINT [\"/entrypoint.sh\"]\n\nRUN git clone \"$REPO_URL\" /workspace/repo\nWORKDIR /workspace/repo\nRUN git checkout \"$COMMIT_SHA\"\nLABEL vcs.url=\"$REPO_URL\" vcs.ref=\"$COMMIT_SHA\"\n\n# ---- ENV STAGE: create envs, persist vars, install smoke tool ----\nCOPY docker_build_env.sh /workspace/repo/docker_build_env.sh\nRUN chmod +x /workspace/repo/docker_build_env.sh && \\\n /workspace/repo/docker_build_env.sh\n\n\nFROM env AS pkg\n\n# ---- PKG STAGE: build+test the package for each ASV Python ----\nCOPY docker_build_pkg.sh /workspace/repo/docker_build_pkg.sh\nRUN chmod +x /workspace/repo/docker_build_pkg.sh && \\\n /workspace/repo/docker_build_pkg.sh\n# If you want to restrict to one version at build time, replace with:\n# RUN PY_VERSION=3.10 /workspace/repo/docker_build_pkg.sh\n", - "entrypoint_data": "#!/usr/bin/env bash\n# set -euo pipefail\nset -x\n: \"${ASV_ARGS:?Need to set ASV_ARGS}\"\n: \"${ASV_MACHINE:=?Need to set ASV_MACHINE}\"\n: \"${ASV_OS:=?Need to set ASV_OS}\"\n: \"${ASV_NUM_CPU:=?Need to set ASV_NUM_CPU}\"\n: \"${ASV_ARCH:=?Need to set ASV_ARCH}\"\n: \"${ASV_CPU:=?Need to set ASV_CPU}\"\n: \"${ASV_RAM:=?Need to set ASV_RAM}\"\n\n\ncd_asv_json_dir() {\n local match\n match=$(find . -type f -name \"asv.*.json\" | head -n 1)\n\n if [[ -n \"$match\" ]]; then\n local dir\n dir=$(dirname \"$match\")\n cd \"$dir\" || echo \"Failed to change directory to $dir\"\n else\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n fi\n}\n\neval \"$(micromamba shell hook --shell=bash)\"\n\nmicromamba activate base\nROOT_PATH=${PWD}\ncd_asv_json_dir || exit 1\n\n# the conf name is one of \"asv.conf.json\" or \"asv.ci.conf.json\" or \"asv.*.json\"\nCONF_NAME=$(basename \"$(find . -type f -name \"asv.*.json\" | head -n 1)\")\nif [[ -z \"$CONF_NAME\" ]]; then\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n exit 1\nfi\n\n# Read the python versions from the asv.conf.json\npython_versions=$(python -c \"import asv; pythons = asv.config.Config.load('$CONF_NAME').pythons; print(' '.join(pythons))\")\n# change the \"results_dir\" in asv.conf.json to \"/output/{COMMIT_SHA}/\"\nfor version in $python_versions; do\n # Create per\u2011Python env and install ASV\n python -c \"import asv, os, pathlib\npath = pathlib.Path('/output/'\\\"$COMMIT_SHA\\\"'/''\\\"$version\\\"')\npath.mkdir(parents=True, exist_ok=True)\n\nconfig = asv.config.Config.load('$CONF_NAME')\nconfig.results_dir = str(path / 'results')\nconfig.html_dir = str(path / 'html')\nconfig.branches = ['HEAD']\n\nasv.util.write_json('$CONF_NAME', config.__dict__, api_version=config.api_version)\nasv.util.write_json(path / '$CONF_NAME', config.__dict__, api_version=config.api_version)\n\"\n micromamba run -n \"asv_${version}\" asv machine --yes --config $CONF_NAME --machine \"$ASV_MACHINE\" --os \"$ASV_OS\" --num_cpu \"$ASV_NUM_CPU\" --arch \"$ASV_ARCH\" --cpu \"$ASV_CPU\" --ram \"$ASV_RAM\"\n micromamba run -n \"asv_${version}\" asv run --show-stderr ${ASV_ARGS} --config $CONF_NAME\ndone\n\necho \"Benchmarks complete.\"\n", - "env_building_data": "#!/usr/bin/env bash\nset -euo pipefail\n\n# -------- Helpers installed for all shells --------\ninstall_profile_helpers() {\n cat >/etc/profile.d/asv_utils.sh <<'EOF'\n# asv_utils.sh \u2014 login/interactive shell helpers for ASV builds\nexport MAMBA_ROOT_PREFIX=\"${MAMBA_ROOT_PREFIX:-/opt/conda}\"\n\n# Initialize micromamba for bash shells (no-op if not present)\nif command -v micromamba >/dev/null 2>&1; then\n eval \"$(micromamba shell hook --shell=bash)\"\nfi\n\n# Find and cd into the first directory that contains an asv.*.json\ncd_asv_json_dir() {\n local match\n match=$(find . -type f -name \"asv.*.json\" | head -n 1)\n if [[ -n \"$match\" ]]; then\n cd \"$(dirname \"$match\")\" || echo \"Failed to change directory\"\n else\n echo \"No 'asv.*.json' file found in current directory or subdirectories.\"\n return 1\n fi\n}\n\n# Return just the conf filename (e.g., asv.conf.json)\nasv_conf_name() {\n local f\n f=$(find . -type f -name \"asv.*.json\" | head -n 1)\n [[ -n \"$f\" ]] && basename \"$f\" || return 1\n}\n\n# Build performance knobs (overridable)\nexport MAKEFLAGS=\"${MAKEFLAGS:--j$(nproc)}\"\nexport CMAKE_BUILD_PARALLEL_LEVEL=\"${CMAKE_BUILD_PARALLEL_LEVEL:-$(nproc)}\"\nexport NPY_NUM_BUILD_JOBS=\"${NPY_NUM_BUILD_JOBS:-$(nproc)}\"\n\n# Shared pip cache to speed repeated editable builds\nexport PIP_CACHE_DIR=\"${PIP_CACHE_DIR:-/opt/pipcache}\"\nmkdir -p \"$PIP_CACHE_DIR\"\nEOF\n}\n\n# -------- Persisted build variables --------\nwrite_build_vars() {\n local py_versions=\"$1\"\n local import_name=\"$2\"\n\n mkdir -p /etc/asv_env\n echo \"$py_versions\" > /etc/asv_env/py_versions\n echo \"$import_name\" > /etc/asv_env/import_name\n\n # Exported for every future shell (pkg script, interactive, etc.)\n cat >/etc/profile.d/asv_build_vars.sh </usr/local/bin/asv_smokecheck.py <<'PY'\n#!/usr/bin/env python\nimport argparse, importlib, pathlib, sys\n\ndef import_and_version(name: str):\n m = importlib.import_module(name)\n ver = getattr(m, \"__version__\", \"unknown\")\n print(f\"{name} imported ok; __version__={ver}\")\n\ndef probe_compiled(name: str, max_ext: int = 10):\n m = importlib.import_module(name)\n if not hasattr(m, \"__path__\"):\n print(\"No package __path__ (likely a single-module dist); skipping compiled probe.\")\n return\n pkg_path = pathlib.Path(list(m.__path__)[0])\n so_like = list(pkg_path.rglob(\"*.so\")) + list(pkg_path.rglob(\"*.pyd\"))\n failed = []\n for ext in so_like[:max_ext]:\n rel = ext.relative_to(pkg_path).with_suffix(\"\")\n dotted = \".\".join([name] + list(rel.parts))\n try:\n importlib.import_module(dotted)\n except Exception as e:\n failed.append((dotted, str(e)))\n if failed:\n print(\"Some compiled submodules failed to import:\")\n for d, err in failed:\n print(\" -\", d, \"->\", err)\n sys.exit(1)\n else:\n print(\"Compiled submodules (if any) import ok\")\n\ndef main():\n p = argparse.ArgumentParser()\n p.add_argument(\"--import-name\", required=True)\n p.add_argument(\"--repo-root\", default=\".\")\n p.add_argument(\"--pytest-smoke\", action=\"store_true\",\n help=\"Run a quick pytest smoke: -k 'not slow' --maxfail=1\")\n p.add_argument(\"--max-ext\", type=int, default=10)\n args = p.parse_args()\n\n import_and_version(args.import_name)\n probe_compiled(args.import_name, max_ext=args.max_ext)\n\n if args.pytest_smoke:\n import subprocess, os\n if any((pathlib.Path(args.repo_root)/p).exists() for p in (\"tests\", \"pytest.ini\", \"pyproject.toml\")):\n print(\"Running pytest smoke...\")\n rc = subprocess.call([sys.executable, \"-m\", \"pytest\", \"-q\", \"-k\", \"not slow\", \"--maxfail=1\"], cwd=args.repo_root)\n if rc != 0:\n sys.exit(rc)\n else:\n print(\"No tests detected; skipping pytest smoke.\")\n print(\"Smokecheck OK \u2705\")\n\nif __name__ == \"__main__\":\n main()\nPY\n chmod +x /usr/local/bin/asv_smokecheck.py\n}\ninstall_smokecheck\n\n# -------- Install an import-name detector CLI --------\ninstall_detect_import_name() {\n cat >/usr/local/bin/detect_import_name <<'PY'\n#!/usr/bin/env python\nimport argparse, pathlib, re, sys, subprocess, configparser, json\n\n# --- optional TOML loader (py3.11+: tomllib; else tomli if available) ---\ntry:\n import tomllib as toml\nexcept Exception:\n try:\n import tomli as toml\n except Exception:\n toml = None\n\nEXCEPTIONS = {\n # common dist\u2192import mismatches\n \"scikit-learn\": \"sklearn\",\n \"opencv-python\": \"cv2\",\n \"pyyaml\": \"yaml\",\n \"beautifulsoup4\": \"bs4\",\n \"pillow\": \"PIL\",\n \"mysqlclient\": \"MySQLdb\",\n \"psycopg2-binary\": \"psycopg2\",\n \"opencv-contrib-python\": \"cv2\",\n \"protobuf\": \"google\", # top-level package\n \"apache-beam\": \"apache_beam\",\n}\n\n# All the package names we typically query.\nEXCEPTIONS.update({\n # --- core scientific stack ---\n \"scikit-learn\": \"sklearn\",\n \"numpy\": \"numpy\",\n \"pandas\": \"pandas\",\n \"scipy\": \"scipy\",\n \"scikit-image\": \"skimage\",\n \"pywt\": \"pywt\",\n \"xarray\": \"xarray\",\n \"bottleneck\": \"bottleneck\",\n \"h5py\": \"h5py\",\n \"networkx\": \"networkx\",\n \"shapely\": \"shapely\",\n\n # --- ML / stats / optimization / viz ---\n \"optuna\": \"optuna\",\n \"arviz\": \"arviz\",\n \"pymc\": \"pymc\",\n \"kedro\": \"kedro\",\n \"modin\": \"modin\",\n \"napari\": \"napari\",\n \"deepchecks\": \"deepchecks\",\n \"voyager\": \"voyager\", # spotify/voyager\n \"warp\": \"warp\", # NVIDIA/warp\n \"newton\": \"newton\", # newton-physics/newton\n\n # --- domain / ecosystem libs ---\n \"geopandas\": \"geopandas\",\n \"cartopy\": \"cartopy\",\n \"iris\": \"iris\",\n \"anndata\": \"anndata\",\n \"scanpy\": \"scanpy\",\n \"sunpy\": \"sunpy\",\n \"pvlib-python\": \"pvlib\",\n \"PyBaMM\": \"pybamm\",\n \"momepy\": \"momepy\",\n \"satpy\": \"satpy\",\n \"pydicom\": \"pydicom\",\n \"pynetdicom\": \"pynetdicom\",\n\n # --- file formats / IO / infra ---\n \"asdf\": \"asdf\",\n \"arrow\": \"pyarrow\", # apache/arrow\n \"ArcticDB\": \"arcticdb\",\n \"arctic\": \"arctic\",\n\n # --- web / frameworks / utils ---\n \"django-components\": \"django_components\",\n \"h11\": \"h11\",\n \"tqdm\": \"tqdm\",\n \"rich\": \"rich\",\n \"posthog\": \"posthog\",\n \"datalad\": \"datalad\",\n \"ipyparallel\": \"ipyparallel\",\n\n # --- numerical / symbolic / control ---\n \"autograd\": \"autograd\",\n \"python-control\": \"control\",\n \"loopy\": \"loopy\",\n \"thermo\": \"thermo\",\n \"chempy\": \"chempy\",\n \"adaptive\": \"adaptive\",\n\n # --- scientific image / signal ---\n \"metric-learn\": \"metric_learn\",\n\n # --- quantum / physics ---\n \"Cirq\": \"cirq\",\n \"memray\": \"memray\",\n \"devito\": \"devito\",\n\n # --- bio / chem / data ---\n \"sourmash\": \"sourmash\",\n \"dipy\": \"dipy\",\n\n # --- protocol buffers / codegen / outlines ---\n \"python-betterproto\": \"betterproto\",\n \"outlines\": \"outlines\",\n\n # --- DS viz / raster ---\n \"datashader\": \"datashader\",\n \"xarray-spatial\": \"xarray_spatial\",\n\n # --- misc ---\n \"enlighten\": \"enlighten\",\n \"xorbits\": \"xorbits\",\n \"geopandas\": \"geopandas\",\n \"lmfit-py\": \"lmfit\",\n \"mdanalysis\": \"MDAnalysis\",\n \"nilearn\": \"nilearn\",\n})\n\n\nEXCLUDE_DIRS = {\n \".git\", \".hg\", \".svn\", \".tox\", \".nox\", \".venv\", \"venv\",\n \"build\", \"dist\", \"__pycache__\", \".mypy_cache\", \".pytest_cache\",\n \"docs\", \"doc\", \"site\", \"examples\", \"benchmarks\", \"tests\", \"testing\",\n}\n\ndef _norm(s: str) -> str:\n return re.sub(r\"[-_.]+\", \"\", s).lower()\n\ndef read_pyproject(root: pathlib.Path):\n cfg = {}\n p = root / \"pyproject.toml\"\n if toml and p.exists():\n try:\n cfg = toml.loads(p.read_text(encoding=\"utf-8\"))\n except Exception:\n pass\n return cfg\n\ndef read_setup_cfg(root: pathlib.Path):\n p = root / \"setup.cfg\"\n cp = configparser.ConfigParser()\n if p.exists():\n try:\n cp.read(p, encoding=\"utf-8\")\n except Exception:\n pass\n return cp\n\ndef dist_name_from_config(pyproject, setup_cfg):\n # PEP 621 name\n name = (pyproject.get(\"project\", {}) or {}).get(\"name\")\n if not name:\n # setup.cfg [metadata] name\n if setup_cfg.has_section(\"metadata\"):\n name = setup_cfg.get(\"metadata\", \"name\", fallback=None)\n # setup.py --name as last resort\n return name\n\ndef package_roots_from_config(root, pyproject, setup_cfg):\n roots = set([root])\n # setuptools package-dir mapping\n # pyproject: [tool.setuptools.package-dir] \"\" = \"src\"\n pkgdir = ((pyproject.get(\"tool\", {}) or {}).get(\"setuptools\", {}) or {}).get(\"package-dir\", {})\n if isinstance(pkgdir, dict):\n if \"\" in pkgdir:\n roots.add((root / pkgdir[\"\"]).resolve())\n for _, d in pkgdir.items():\n try:\n roots.add((root / d).resolve())\n except Exception:\n pass\n # setup.cfg [options] package_dir\n if setup_cfg.has_section(\"options\"):\n raw = setup_cfg.get(\"options\", \"package_dir\", fallback=None)\n if raw:\n # can be \"=\\nsrc\" or mapping lines\n lines = [l.strip() for l in raw.splitlines() if l.strip()]\n # accept simple \"=src\" or \"\" = \"src\"\n for ln in lines:\n m = re.match(r'^(\"?\\'?)*\\s*=?\\s*(\"?\\'?)*\\s*(?P[^#;]+)$', ln)\n if m:\n roots.add((root / m.group(\"path\").strip()).resolve())\n # setup.cfg [options.packages.find] where\n if setup_cfg.has_section(\"options.packages.find\"):\n where = setup_cfg.get(\"options.packages.find\", \"where\", fallback=None)\n if where:\n for w in re.split(r\"[,\\s]+\", where):\n if w:\n roots.add((root / w).resolve())\n return [r for r in roots if r.exists()]\n\ndef explicit_modules_from_config(pyproject, setup_cfg):\n mods = set()\n # pyproject (tool.setuptools) py-modules / packages\n st = ((pyproject.get(\"tool\", {}) or {}).get(\"setuptools\", {}) or {})\n for key in (\"py-modules\", \"packages\"):\n val = st.get(key)\n if isinstance(val, list):\n mods.update(val)\n # setup.cfg [options] py_modules / packages\n if setup_cfg.has_section(\"options\"):\n for key in (\"py_modules\", \"packages\"):\n raw = setup_cfg.get(\"options\", key, fallback=None)\n if raw:\n for tok in re.split(r\"[\\s,]+\", raw.strip()):\n if tok and tok != \"find:\":\n mods.add(tok)\n return sorted(mods)\n\ndef read_top_level_from_egg_info(root):\n # editable installs often leave ./.egg-info/top_level.txt\n for ei in root.rglob(\"*.egg-info\"):\n tl = ei / \"top_level.txt\"\n if tl.exists():\n try:\n names = [l.strip() for l in tl.read_text(encoding=\"utf-8\").splitlines() if l.strip()]\n if names:\n return names\n except Exception:\n pass\n # also consider dist-info during local builds\n for di in root.rglob(\"*.dist-info\"):\n tl = di / \"top_level.txt\"\n if tl.exists():\n try:\n names = [l.strip() for l in tl.read_text(encoding=\"utf-8\").splitlines() if l.strip()]\n if names:\n return names\n except Exception:\n pass\n return None\n\ndef walk_candidates(roots):\n \"\"\"Return set of plausible top-level import names under candidate roots.\"\"\"\n cands = set()\n for r in roots:\n for path in r.rglob(\"__init__.py\"):\n try:\n pkg_dir = path.parent\n # skip excluded dirs anywhere in the path\n if any(part in EXCLUDE_DIRS for part in pkg_dir.parts):\n continue\n # Construct package name relative to the nearest search root\n try:\n rel = pkg_dir.relative_to(r)\n except Exception:\n continue\n if not rel.parts:\n continue\n top = rel.parts[0]\n if top.startswith(\"_\"):\n # usually private tooling\n continue\n cands.add(top)\n except Exception:\n pass\n # standalone modules at top-level of roots (py_modules case)\n for mod in r.glob(\"*.py\"):\n if mod.stem not in (\"setup\",):\n cands.add(mod.stem)\n return sorted(cands)\n\ndef score_candidates(cands, dist_name):\n \"\"\"Assign a score preferring names that match the dist name.\"\"\"\n scores = {}\n n_dist = _norm(dist_name) if dist_name else None\n prefer = None\n if dist_name and dist_name.lower() in EXCEPTIONS:\n prefer = EXCEPTIONS[dist_name.lower()]\n # also try normalized exception keys (e.g. capitalization)\n for k, v in EXCEPTIONS.items():\n if _norm(k) == _norm(dist_name or \"\"):\n prefer = v\n\n for c in cands:\n s = 0\n if prefer and _norm(c) == _norm(prefer):\n s += 100\n if n_dist and _norm(c) == n_dist:\n s += 80\n if n_dist and (_norm(c).startswith(n_dist) or n_dist.startswith(_norm(c))):\n s += 20\n # shorter, simpler names get a slight bump\n s += max(0, 10 - len(c))\n scores[c] = s\n return sorted(cands, key=lambda x: (-scores.get(x, 0), x)), scores\n\ndef detect(root: str, return_all=False):\n root = pathlib.Path(root).resolve()\n\n pyproject = read_pyproject(root)\n setup_cfg = read_setup_cfg(root)\n dist_name = dist_name_from_config(pyproject, setup_cfg)\n\n # 1) top_level.txt (best signal if present)\n top = read_top_level_from_egg_info(root)\n if top:\n if return_all:\n return top\n # If multiple, score them\n ordered, _ = score_candidates(top, dist_name or \"\")\n return [ordered[0]]\n\n # 2) explicit declarations (py_modules / packages lists)\n explicit = explicit_modules_from_config(pyproject, setup_cfg)\n\n # 3) find correct search roots (src layout, package_dir, etc.)\n roots = package_roots_from_config(root, pyproject, setup_cfg)\n\n # 4) walk code to infer candidates\n walked = walk_candidates(roots)\n\n # merge explicit + walked\n cands = list(dict.fromkeys(explicit + walked)) # keep order & de-dup\n\n # 5) fallback from dist name heuristics/exceptions if still empty\n if not cands and dist_name:\n # exception or simple normalization\n guess = EXCEPTIONS.get(dist_name.lower()) or re.sub(r\"[-\\.]+\", \"_\", dist_name)\n cands = [guess]\n\n if not cands:\n return []\n\n if return_all:\n # return ordered list\n ordered, _ = score_candidates(cands, dist_name or \"\")\n return ordered\n else:\n ordered, _ = score_candidates(cands, dist_name or \"\")\n return [ordered[0]]\n\ndef main():\n ap = argparse.ArgumentParser(description=\"Detect the top-level Python import name for a repo.\")\n ap.add_argument(\"--repo-root\", default=\".\", help=\"Path to repository root\")\n ap.add_argument(\"--all\", action=\"store_true\", help=\"Print all plausible names (JSON list)\")\n args = ap.parse_args()\n\n names = detect(args.repo_root, return_all=args.all)\n if not names:\n sys.exit(1)\n if args.all:\n print(json.dumps(names))\n else:\n print(names[0])\n\nif __name__ == \"__main__\":\n main()\nPY\n chmod +x /usr/local/bin/detect_import_name\n}\n\ninstall_detect_import_name\n\n# -------- Script body --------\n\ninstall_profile_helpers\n# shellcheck disable=SC1091\nsource /etc/profile.d/asv_utils.sh\n\n# Ensure base micromamba is active for introspecting ASV config\nmicromamba activate base\n\ninstall_detect_import_name\ninstall_smokecheck\n\nIMPORT_NAME=\"$(detect_import_name || true)\"\nif [[ -z \"$IMPORT_NAME\" ]]; then\n echo \"WARN: Could not determine import name; the pkg stage will fall back to local detection.\"\nfi\n\n\n# Move into the directory that contains asv.*.json\ncd_asv_json_dir || { echo \"No 'asv.*.json' file found.\" >&2; exit 1; }\n\nCONF_NAME=\"$(asv_conf_name || true)\"\nif [[ -z \"${CONF_NAME:-}\" ]]; then\n echo \"No 'asv.*.json' file found.\" >&2\n exit 1\nfi\n\n# Make sure tomli is available in base for pyproject parsing\nmicromamba install -y -n base -c conda-forge tomli >/dev/null\n\n# Read python versions from the ASV config\nPY_VERSIONS=$(python - <\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
shadatemessagetotal_additionstotal_deletionstotal_files_changedfiles_changedpatchhas_asvfile_change_summarykindrepo_name
03263e718a6cc2d10ae4e3e4ba4d4c7ed41ee12e82024-07-06T09:38:32+08:00Merge pull request #125 from Kai-Striega/broad...133663numpy_financial/_financial.py\\nnumpy_financial...From a00ab5f0443d2f1c52875b70f19f334c73a17729 ...True| File | Lines Added | Lines Removed | Total C...commitnumpy/numpy-financial
13f67c275e1e575c902027ca07586b9d35f38033a2024-05-07T15:04:23+10:00Merge pull request #122 from Eugenia-Mazur/irr...62471numpy_financial/_financial.pyFrom a00ab5f0443d2f1c52875b70f19f334c73a17729 ...True| File | Lines Added | Lines Removed | Total C...commitnumpy/numpy-financial
25c66fb06ec95192d4b427b4de171b6ab9e1528a62024-05-04T11:03:28+10:00Merge pull request #124 from Kai-Striega/confi...8183asv.conf.json\\ndoc/source/dev/running_the_benc...From 646f292a26089dc212e4315f0939c183f660ccea ...True| File | Lines Added | Lines Removed | Total C...commitnumpy/numpy-financial
36c40b8efb727eacf8a865789afbe65ee2d4bb5c02024-04-04T14:13:19+11:00Merge pull request #120 from Kai-Striega/enh/n...621numpy_financial/_cfinancial.pyxFrom 5b134ac31419fea11db1dda25315d1bd192d8430 ...True| File | Lines Added | Lines Removed | Total C...commitnumpy/numpy-financial
4858358697fce8fb96530f9c299d285286e5192e52024-04-04T10:36:54+11:00Merge pull request #118 from Kai-Striega/enh/n...95293numpy_financial/_cfinancial.pyx\\nnumpy_financi...From 6b6f7b5ba1a50a1199c408b99538c397ef54d0ba ...True| File | Lines Added | Lines Removed | Total C...commitnumpy/numpy-financial
\n", + "" + ], + "text/plain": [ + " sha date \\\n", + "0 3263e718a6cc2d10ae4e3e4ba4d4c7ed41ee12e8 2024-07-06T09:38:32+08:00 \n", + "1 3f67c275e1e575c902027ca07586b9d35f38033a 2024-05-07T15:04:23+10:00 \n", + "2 5c66fb06ec95192d4b427b4de171b6ab9e1528a6 2024-05-04T11:03:28+10:00 \n", + "3 6c40b8efb727eacf8a865789afbe65ee2d4bb5c0 2024-04-04T14:13:19+11:00 \n", + "4 858358697fce8fb96530f9c299d285286e5192e5 2024-04-04T10:36:54+11:00 \n", + "\n", + " message total_additions \\\n", + "0 Merge pull request #125 from Kai-Striega/broad... 133 \n", + "1 Merge pull request #122 from Eugenia-Mazur/irr... 62 \n", + "2 Merge pull request #124 from Kai-Striega/confi... 8 \n", + "3 Merge pull request #120 from Kai-Striega/enh/n... 6 \n", + "4 Merge pull request #118 from Kai-Striega/enh/n... 95 \n", + "\n", + " total_deletions total_files_changed \\\n", + "0 66 3 \n", + "1 47 1 \n", + "2 18 3 \n", + "3 2 1 \n", + "4 29 3 \n", + "\n", + " files_changed \\\n", + "0 numpy_financial/_financial.py\\nnumpy_financial... \n", + "1 numpy_financial/_financial.py \n", + "2 asv.conf.json\\ndoc/source/dev/running_the_benc... \n", + "3 numpy_financial/_cfinancial.pyx \n", + "4 numpy_financial/_cfinancial.pyx\\nnumpy_financi... \n", + "\n", + " patch has_asv \\\n", + "0 From a00ab5f0443d2f1c52875b70f19f334c73a17729 ... True \n", + "1 From a00ab5f0443d2f1c52875b70f19f334c73a17729 ... True \n", + "2 From 646f292a26089dc212e4315f0939c183f660ccea ... True \n", + "3 From 5b134ac31419fea11db1dda25315d1bd192d8430 ... True \n", + "4 From 6b6f7b5ba1a50a1199c408b99538c397ef54d0ba ... True \n", + "\n", + " file_change_summary kind \\\n", + "0 | File | Lines Added | Lines Removed | Total C... commit \n", + "1 | File | Lines Added | Lines Removed | Total C... commit \n", + "2 | File | Lines Added | Lines Removed | Total C... commit \n", + "3 | File | Lines Added | Lines Removed | Total C... commit \n", + "4 | File | Lines Added | Lines Removed | Total C... commit \n", + "\n", + " repo_name \n", + "0 numpy/numpy-financial \n", + "1 numpy/numpy-financial \n", + "2 numpy/numpy-financial \n", + "3 numpy/numpy-financial \n", + "4 numpy/numpy-financial " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cr = ContextRegistry.load_from_file(Path(\"scratch/artifacts/pipeflush/context_registry.json\"))\n", + "commit_pth = Path(\"scratch/artifacts/pipeflush/commits_perfonly.parquet\")\n", + "commit_df = pd.read_parquet(commit_pth)\n", + "commit_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "79905eb5", + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Optuna commits: 412, Other commits: 52\n" + " repo_name sha\n", + "0 numpy/numpy-financial 3263e718a6cc2d10ae4e3e4ba4d4c7ed41ee12e8\n", + "1 numpy/numpy-financial 3f67c275e1e575c902027ca07586b9d35f38033a\n", + "2 numpy/numpy-financial 5c66fb06ec95192d4b427b4de171b6ab9e1528a6\n", + "3 numpy/numpy-financial 6c40b8efb727eacf8a865789afbe65ee2d4bb5c0\n", + "4 numpy/numpy-financial 858358697fce8fb96530f9c299d285286e5192e5\n", + "... ... ...\n", + "46947 PostHog/posthog f079a1efe07a3dd062967ba4bc72078177149953\n", + "46948 PostHog/posthog f832d5783e20180261f490e04d40d329af819bdd\n", + "46949 PostHog/posthog f9dd7ef5d6823cb9a6dbf1e91fe51fb3873375ee\n", + "46950 PostHog/posthog fe12a8b9945b8a6c14252003ad983327c662fe7b\n", + "46951 PostHog/posthog fe82d214b66041c0a6e6026351a93da52e153163\n", + "\n", + "[46952 rows x 2 columns]\n" ] } ], "source": [ - "optuna_jsonl = \"scratch/artifacts/pipeflush/commits_perfonly_optuna.jsonl\"\n", - "other_jsonl = \"scratch/artifacts/pipeflush/commits_perfonly.jsonl\"\n", - "\n", - "df1 = pd.read_json(optuna_jsonl, lines=True)\n", - "df2 = pd.read_json(other_jsonl, lines=True)\n", - "print(f\"Optuna commits: {len(df1)}, Other commits: {len(df2)}\")\n", - "df = pd.concat([df1, df2], ignore_index=True)\n", - "df.drop_duplicates(subset=[\"sha\"], inplace=True)" + "print(commit_df[[\"repo_name\", \"sha\"]])" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "02ecc321", "metadata": {}, "outputs": [], "source": [ - "commits_parquet = \"scratch/artifacts/pipeflush/commits_filtered.parquet\"\n", - "pd.read_parquet(commits_parquet)" + "# # Break parquet file into 6 chunks. Try to put equal number of repos in each chunk.\n", + "# n_chunks = 6\n", + "# # (1036, 7)\n", + "# chunk_size = commit_df.shape[0] // n_chunks\n", + "# # shuffle rows and split into chunks\n", + "# commit_df = commit_df.sample(frac=1, random_state=42, replace=False).reset_index(drop=True)\n", + "# chunks = [commit_df.iloc[i * chunk_size : (i + 1) * chunk_size] for i in range(n_chunks - 1)]\n", + "# chunks.append(commit_df.iloc[(n_chunks - 1) * chunk_size :]) # last chunk gets the remainder\n", + "# cmds = []\n", + "# for i, chunk in enumerate(chunks):\n", + "# pth = Path(f\"scratch/artifacts/pipeflush/chunk_{i}/commits_perfonly.parquet\")\n", + "# pth.parent.mkdir(parents=True, exist_ok=True)\n", + "# chunk.to_parquet(pth)\n", + "# # Make a new context registry:\n", + "# cr.save_to_file(pth.parent / \"context_registry.json\")\n", + "# cmd_i = cmd.format(output_dir=pth.parent)\n", + "# cmds.append(cmd_i)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "567cdaa5", "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "09:17:16 INFO datasmith.docker.context: Context registry saved to scratch/artifacts/pipeflush/chunk_0/context_registry.json\n", + "09:17:36 INFO datasmith.docker.context: Context registry saved to scratch/artifacts/pipeflush/chunk_1/context_registry.json\n", + "09:17:58 INFO datasmith.docker.context: Context registry saved to scratch/artifacts/pipeflush/chunk_2/context_registry.json\n" + ] + } + ], + "source": [ + "# break parquet into three chunks with fixed ratios.\n", + "ratios = [64, 56, 127]\n", + "total = sum(ratios)\n", + "# Compute split sizes\n", + "sizes = [int(commit_df.shape[0] * r / total) for r in ratios]\n", + "\n", + "# Adjust last size to cover remainder (to avoid row loss due to rounding)\n", + "sizes[-1] = commit_df.shape[0] - sum(sizes[:-1])\n", + "\n", + "# Split dataframe\n", + "df1 = commit_df.iloc[: sizes[0]]\n", + "df2 = commit_df.iloc[sizes[0] : sizes[0] + sizes[1]]\n", + "df3 = commit_df.iloc[sizes[0] + sizes[1] :]\n", + "\n", + "chunks = [df1, df2, df3]\n", + "cmds = []\n", + "for i, (chunk, ratio) in enumerate(zip(chunks, ratios)):\n", + " pth = Path(f\"scratch/artifacts/pipeflush/chunk_{i}/commits_perfonly.parquet\")\n", + " pth.parent.mkdir(parents=True, exist_ok=True)\n", + " chunk.to_parquet(pth)\n", + " # Make a new context registry:\n", + " cr.save_to_file(pth.parent / \"context_registry.json\")\n", + " cmd_i = cmd.format(output_dir=pth.parent, ncpus=(ratio // 2))\n", + " cmds.append(cmd_i)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "3fafdd1c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python scratch/scripts/synthesize_contexts.py --commits scratch/artifacts/pipeflush/chunk_0/commits_perfonly.parquet --output-dir scratch/artifacts/pipeflush/chunk_0/results_synthesis/ --context-registry scratch/artifacts/pipeflush/chunk_0/context_registry.json --max-workers 32 --limit-per-repo 2 --max-attempts 3 --max-steps 10\n", + "python scratch/scripts/synthesize_contexts.py --commits scratch/artifacts/pipeflush/chunk_1/commits_perfonly.parquet --output-dir scratch/artifacts/pipeflush/chunk_1/results_synthesis/ --context-registry scratch/artifacts/pipeflush/chunk_1/context_registry.json --max-workers 28 --limit-per-repo 2 --max-attempts 3 --max-steps 10\n", + "python scratch/scripts/synthesize_contexts.py --commits scratch/artifacts/pipeflush/chunk_2/commits_perfonly.parquet --output-dir scratch/artifacts/pipeflush/chunk_2/results_synthesis/ --context-registry scratch/artifacts/pipeflush/chunk_2/context_registry.json --max-workers 63 --limit-per-repo 2 --max-attempts 3 --max-steps 10\n" + ] + } + ], + "source": [ + "print(\"\\n\".join(cmds).replace(\" \", \"\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d5d4ac2", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "07:25:05 INFO datasmith.docker.context: Context registry saved to scratch/artifacts/pipeflush/tiny/context_registry.json\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python scratch/scripts/synthesize_contexts.py --commits scratch/artifacts/pipeflush/tiny/commits_perfonly.parquet --output-dir scratch/artifacts/pipeflush/tiny/results_synthesis/ --context-registry scratch/artifacts/pipeflush/tiny/context_registry.json --max-workers 2 --limit-per-repo 2 --max-attempts 3 --max-steps 10\n" + ] + } + ], + "source": [ + "# # make a tiny task with just two commits for testing\n", + "# tiny = commit_df.sample(n=2, random_state=141, replace=False).reset_index(drop=True)\n", + "# tiny_pth = Path(\"scratch/artifacts/pipeflush/tiny/commits_perfonly.parquet\")\n", + "# tiny_pth.parent.mkdir(parents=True, exist_ok=True)\n", + "# # tiny.to_parquet(tiny_pth)\n", + "# cr.save_to_file(tiny_pth.parent / \"context_registry.json\")\n", + "# cmd_tiny = cmd.format(output_dir=tiny_pth.parent, ncpus=2)\n", + "# print(cmd_tiny)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5133158", + "metadata": {}, "outputs": [], "source": [] } diff --git a/scratch/notebooks/compare_benchmarks.ipynb b/scratch/notebooks/compare_benchmarks.ipynb index 749cb2e..ba08ece 100644 --- a/scratch/notebooks/compare_benchmarks.ipynb +++ b/scratch/notebooks/compare_benchmarks.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "7d96c171", "metadata": {}, "outputs": [ @@ -631,7 +631,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.5" + "version": "3.12.11" } }, "nbformat": 4, diff --git a/scratch/scripts/initialize_context_registry.py b/scratch/scripts/initialize_context_registry.py index e98e752..b5f362e 100644 --- a/scratch/scripts/initialize_context_registry.py +++ b/scratch/scripts/initialize_context_registry.py @@ -23,6 +23,7 @@ # ROOT_PATH=${ROOT_PATH:-$PWD} # Usually /workspace/repo # REPO_ROOT="$ROOT_PATH" # TARGET_VERSIONS="${PY_VERSION:-${ASV_PY_VERSIONS:-}}" +# EXTRAS="${ALL_EXTRAS:+[$ALL_EXTRAS]}" # if [[ -z "${TARGET_VERSIONS}" ]]; then # echo "Error: No PY_VERSION set and ASV_PY_VERSIONS not found." >&2 # exit 1 @@ -109,8 +110,11 @@ # micromamba run -n "$ENV_NAME" pip install -e . scipy matplotlib # # Editable install (no build isolation preferrably). Toolchain lives in the env already. +# # $EXTRAS is an optional argument to install all discovered extra dependencies. +# # It will be empty if pyproject.toml does not exist or has no [project.optional-dependencies]. +# # In case setup.py is used, no need to append $EXTRAS. # log "Editable install with --no-build-isolation" -# PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT" +# PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"$EXTRAS # # Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1) # log "Running smoke checks" @@ -141,6 +145,7 @@ ROOT_PATH=${ROOT_PATH:-$PWD} # Usually /workspace/repo REPO_ROOT="$ROOT_PATH" TARGET_VERSIONS="${PY_VERSION:-${ASV_PY_VERSIONS:-}}" +EXTRAS="${ALL_EXTRAS:+[$ALL_EXTRAS]}" if [[ -z "${TARGET_VERSIONS}" ]]; then echo "Error: No PY_VERSION set and ASV_PY_VERSIONS not found." >&2 exit 1 @@ -226,8 +231,11 @@ micromamba run -n "$ENV_NAME" pip install meson-python cython # Editable install (no build isolation preferrably). Toolchain lives in the env already. + # $EXTRAS is an optional argument to install all discovered extra dependencies. + # It will be empty if pyproject.toml does not exist or has no [project.optional-dependencies]. + # In case setup.py is used, no need to append $EXTRAS. log "Editable install with --no-build-isolation" - PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT" + PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"$EXTRAS # Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1) log "Running smoke checks" @@ -258,6 +266,7 @@ ROOT_PATH=${ROOT_PATH:-$PWD} # Usually /workspace/repo REPO_ROOT="$ROOT_PATH" TARGET_VERSIONS="${PY_VERSION:-${ASV_PY_VERSIONS:-}}" +EXTRAS="${ALL_EXTRAS:+[$ALL_EXTRAS]}" if [[ -z "${TARGET_VERSIONS}" ]]; then echo "Error: No PY_VERSION set and ASV_PY_VERSIONS not found." >&2 exit 1 @@ -344,8 +353,11 @@ export CFLAGS="${CFLAGS:-} -Wno-error=incompatible-pointer-types" # Editable install (no build isolation preferrably). Toolchain lives in the env already. + # $EXTRAS is an optional argument to install all discovered extra dependencies. + # It will be empty if pyproject.toml does not exist or has no [project.optional-dependencies]. + # In case setup.py is used, no need to append $EXTRAS. log "Editable install with --no-build-isolation" - PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT" + PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"$EXTRAS # Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1) log "Running smoke checks" @@ -377,6 +389,7 @@ ROOT_PATH=${ROOT_PATH:-$PWD} # Usually /workspace/repo REPO_ROOT="$ROOT_PATH" TARGET_VERSIONS="${PY_VERSION:-${ASV_PY_VERSIONS:-}}" +EXTRAS="${ALL_EXTRAS:+[$ALL_EXTRAS]}" if [[ -z "${TARGET_VERSIONS}" ]]; then echo "Error: No PY_VERSION set and ASV_PY_VERSIONS not found." >&2 exit 1 @@ -469,8 +482,11 @@ micromamba run -n "$ENV_NAME" python "${ROOT_PATH}/build_lib.py" # Editable install (no build isolation preferrably). Toolchain lives in the env already. + # $EXTRAS is an optional argument to install all discovered extra dependencies. + # It will be empty if pyproject.toml does not exist or has no [project.optional-dependencies]. + # In case setup.py is used, no need to append $EXTRAS. log "Editable install with --no-build-isolation" - PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT" + PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"$EXTRAS # Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1) log "Running smoke checks" @@ -503,6 +519,7 @@ ROOT_PATH=${ROOT_PATH:-$PWD} # Usually /workspace/repo REPO_ROOT="$ROOT_PATH" TARGET_VERSIONS="${PY_VERSION:-${ASV_PY_VERSIONS:-}}" +EXTRAS="${ALL_EXTRAS:+[$ALL_EXTRAS]}" if [[ -z "${TARGET_VERSIONS}" ]]; then echo "Error: No PY_VERSION set and ASV_PY_VERSIONS not found." >&2 exit 1 @@ -590,8 +607,11 @@ fi # Editable install (no build isolation preferrably). Toolchain lives in the env already. + # $EXTRAS is an optional argument to install all discovered extra dependencies. + # It will be empty if pyproject.toml does not exist or has no [project.optional-dependencies]. + # In case setup.py is used, no need to append $EXTRAS. log "Editable install with --no-build-isolation" - PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT" + PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"$EXTRAS # Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1) log "Running smoke checks" @@ -624,6 +644,7 @@ # ROOT_PATH=${ROOT_PATH:-$PWD} # Usually /workspace/repo # REPO_ROOT="$ROOT_PATH" # TARGET_VERSIONS="${PY_VERSION:-${ASV_PY_VERSIONS:-}}" +# EXTRAS="${ALL_EXTRAS:+[$ALL_EXTRAS]}" # if [[ -z "${TARGET_VERSIONS}" ]]; then # echo "Error: No PY_VERSION set and ASV_PY_VERSIONS not found." >&2 # exit 1 @@ -715,8 +736,11 @@ # cd "$working_dir" || exit 1 # else # # Editable install (no build isolation preferrably). Toolchain lives in the env already. +# # $EXTRAS is an optional argument to install all discovered extra dependencies. +# # It will be empty if pyproject.toml does not exist or has no [project.optional-dependencies]. +# # In case setup.py is used, no need to append $EXTRAS. # log "Editable install with --no-build-isolation" -# PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT" +# PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"$EXTRAS # fi @@ -734,60 +758,60 @@ # ) -# if __name__ == "__main__": -# from pathlib import Path - -# CONTEXT_REGISTRY.save_to_file(Path("scratch/context_registry_init.json")) -# # for each context, build an image with the context. -# import docker - -# from datasmith.docker.context import ContextRegistry, DockerContext, Task - -# client = docker.from_env() - -# import concurrent.futures - -# fails = dict() - -# def build_context(task_context): -# task, context = task_context -# if not task.sha: -# import requests - -# resp = requests.get(f"https://api.github.com/repos/{task.owner}/{task.repo}") -# resp.raise_for_status() -# commit_sha = resp.json().get("default_branch", "main") -# task = Task(owner=task.owner, repo=task.repo, sha=commit_sha, tag=task.tag) -# print(f"Building image for {task.get_image_name()} at {task.sha}") -# res = context.build_container_streaming( -# client=client, -# image_name=task.get_image_name(), -# build_args={ -# "REPO_URL": f"https://www.github.com/{task.owner}/{task.repo}", -# "COMMIT_SHA": task.sha, # pyright: ignore[reportArgumentType] -# }, -# force=True, -# timeout_s=1200, -# pull=True, -# ) -# return (task, res) - -# with concurrent.futures.ProcessPoolExecutor() as executor: -# futures = {executor.submit(build_context, item): item[0] for item in CONTEXT_REGISTRY.registry.items()} -# for future in concurrent.futures.as_completed(futures): -# task = futures[future] -# try: -# task, res = future.result() -# if res.ok: -# print(f"Built image {task.get_image_name()} successfully") -# else: -# print(f"Failed to build image {task.get_image_name()}") -# fails[task] = res -# except Exception as exc: -# print(f"Exception building image {task.get_image_name()}: {exc}") -# fails[task] = exc - -# if fails: -# import IPython - -# IPython.embed() +if __name__ == "__main__": + from pathlib import Path + + CONTEXT_REGISTRY.save_to_file(Path("scratch/context_registry_init.json")) + # # for each context, build an image with the context. + # import docker + + # from datasmith.docker.context import ContextRegistry, DockerContext, Task + + # client = docker.from_env() + + # import concurrent.futures + + # fails = dict() + + # def build_context(task_context): + # task, context = task_context + # if not task.sha: + # import requests + + # resp = requests.get(f"https://api.github.com/repos/{task.owner}/{task.repo}") + # resp.raise_for_status() + # commit_sha = resp.json().get("default_branch", "main") + # task = Task(owner=task.owner, repo=task.repo, sha=commit_sha, tag=task.tag) + # print(f"Building image for {task.get_image_name()} at {task.sha}") + # res = context.build_container_streaming( + # client=client, + # image_name=task.get_image_name(), + # build_args={ + # "REPO_URL": f"https://www.github.com/{task.owner}/{task.repo}", + # "COMMIT_SHA": task.sha, # pyright: ignore[reportArgumentType] + # }, + # force=True, + # timeout_s=1200, + # pull=True, + # ) + # return (task, res) + + # with concurrent.futures.ProcessPoolExecutor() as executor: + # futures = {executor.submit(build_context, item): item[0] for item in CONTEXT_REGISTRY.registry.items()} + # for future in concurrent.futures.as_completed(futures): + # task = futures[future] + # try: + # task, res = future.result() + # if res.ok: + # print(f"Built image {task.get_image_name()} successfully") + # else: + # print(f"Failed to build image {task.get_image_name()}") + # fails[task] = res + # except Exception as exc: + # print(f"Exception building image {task.get_image_name()}: {exc}") + # fails[task] = exc + + # if fails: + # import IPython + + # IPython.embed() diff --git a/scratch/scripts/synthesize_contexts.py b/scratch/scripts/synthesize_contexts.py index 5b1a771..87bef3d 100644 --- a/scratch/scripts/synthesize_contexts.py +++ b/scratch/scripts/synthesize_contexts.py @@ -21,7 +21,7 @@ configure_agent_backends(PORTKEY_MODEL_NAME="@anthropic/claude-3-5-sonnet-latest") logger = configure_logging(level=10) -# logger = configure_logging(level=10, stream=open(Path(__file__).with_suffix(".log"), "w")) +# logger = configure_logging(level=10, stream=open(Path(__file__).with_suffix(".tiny.log"), "w")) def parse_args() -> argparse.Namespace: @@ -178,13 +178,13 @@ def main(args: argparse.Namespace) -> None: for fut in as_completed(futures): res = fut.result() results.append(res) - with _err_lock, open(args.output_dir / "results.jsonl", "a") as jf: - jf.write(json.dumps(res) + "\n") - if int(res["rc"]) != 1: logger.info("main: SUCCESS %s/%s@%s", res["owner"], res["repo"], res["sha"]) context_registry.save_to_file(path=args.context_registry) + with _err_lock, open(args.output_dir / "results.jsonl", "a") as jf: + jf.write(json.dumps(res) + "\n") + # Rollup (minimal, quick to read) rollup = { r["image_name"]: { diff --git a/src/datasmith/agents/context_synthesis.py b/src/datasmith/agents/context_synthesis.py index a0885a0..668062b 100644 --- a/src/datasmith/agents/context_synthesis.py +++ b/src/datasmith/agents/context_synthesis.py @@ -12,7 +12,7 @@ from datasmith.agents.tool_executor import ContainerToolExecutor from datasmith.docker.context import BuildResult, ContextRegistry, DockerContext -from datasmith.docker.validation import Task, validate_one +from datasmith.docker.validation import Task logger = logging.getLogger(__name__) @@ -344,7 +344,7 @@ def build_once_with_context( return res -def agent_build_and_validate( +def agent_build_and_validate( # noqa: C901 task: Task, args: argparse.Namespace, client: docker.DockerClient, @@ -395,11 +395,25 @@ def agent_build_and_validate( tail_chars=args.tail_chars, probe=True, pull=True, - force=False, # If the env is already present, don't rebuild (saves time) + force=True, # If the env is already present, don't rebuild (saves time) ) if not env_res.ok: logger.warning("agent_build_and_validate: probe build failed; something is wrong with Dockerfile") - raise RuntimeError("probe build failed; check Dockerfile.") + # raise RuntimeError("probe build failed; check Dockerfile.") + return { + "ok": False, + "rc": env_res.rc, + "stage": "probe", + "owner": task.owner, + "repo": task.repo, + "sha": task.sha, + "image_name": task.with_tag("pkg").get_image_name(), + "duration_s": env_res.duration_s, + "stderr_tail": env_res.stderr_tail, + "stdout_tail": env_res.stdout_tail, + "attempts": [], + "context_pickle": None, + } tool_exec = ContainerToolExecutor( docker_client=client, @@ -481,56 +495,77 @@ def agent_build_and_validate( if build_res.ok: with context_registry.get_lock(): context_registry.register(task.with_tag("pkg"), ctx) - # import_works = False - # import_check_res = None - # try: - # import_check_res = tool_exec.import_check(cmd_python="python") - # logger.info( - # "agent_build_and_validate: import_check ok=%s candidates=%s", - # import_check_res.get("ok"), - # import_check_res.get("candidates"), - # ) - # except Exception as e: - # logger.warning("agent_build_and_validate: import_check error: %s", e, exc_info=True) - - # import_works = import_check_res.get("ok") if import_check_res else False - # if not import_works and import_check_res: - # # modify build_res to include import_check result - # build_res.stderr_tail = ( - # (build_res.stderr_tail or "") + "\n" + (import_check_res.get("stderr_tail") or "") - # ) - # build_res.stdout_tail = ( - # (build_res.stdout_tail or "") + "\n" + (import_check_res.get("stdout_tail") or "") - # ) - # elif not import_works: - # build_res.stderr_tail = ( - # (build_res.stderr_tail or "") + "\n" + "[import_check] failed with unknown error" - # ) # Save final pickle and then run full validation using your pipeline final_pickle = args.output_dir / f"{task.owner}-{task.repo}-{task.sha}-final.pkl" _save_pickle(ctx, final_pickle) - logger.info("agent_build_and_validate: build succeeded; starting validation run") - result = validate_one(task.with_tag("pkg"), args, client, context_registry, machine_defaults) + logger.info("agent_build_and_validate: build succeeded") + result = attempts[-1].build_result + if result is None: + raise RuntimeError("Unexpected: result is None after successful build") + result_dict = { + "owner": task.owner, + "repo": task.repo, + "sha": task.sha, + "image_name": task.with_tag("pkg").get_image_name(), + "ok": result.ok, + "rc": result.rc, + "duration_s": result.duration_s, + "stderr_tail": result.stderr_tail, + "stdout_tail": result.stdout_tail, + "stage": "build", + } + # result = validate_one(task.with_tag("pkg"), args, client, context_registry, machine_defaults) logger.info( "agent_build_and_validate: validation stage=%s ok=%s rc=%s", - result.get("stage"), - result.get("ok"), - result.get("rc"), + result_dict.get("stage"), + result_dict.get("ok"), + result_dict.get("rc"), ) - result["attempts"] = [ + result_dict["attempts"] = [ { "attempt": a.attempt_idx, "ok": (a.build_result.ok if a.build_result else False), "rc": (a.build_result.rc if a.build_result else None), "stderr_tail": (a.build_result.stderr_tail if a.build_result else ""), "stdout_tail": (a.build_result.stdout_tail if a.build_result else ""), + "building_data": a.building_data, } for a in attempts ] - result["context_pickle"] = str(final_pickle) - return result + result_dict["context_pickle"] = str(final_pickle) + return result_dict + + # If the stderr stream doesn't mention docker_build_pkg.sh at all, then iteration is futile + if build_res.stderr_tail and "docker_build_pkg.sh" not in build_res.stderr_tail: + logger.error( + "agent_build_and_validate: build failed without mentioning docker_build_pkg.sh; not worth iterating" + ) + return { + "ok": False, + "rc": build_res.rc, + "stage": "build", + "owner": task.owner, + "repo": task.repo, + "sha": task.sha, + "image_name": task.with_tag("pkg").get_image_name(), + "duration_s": build_res.duration_s, + "stderr_tail": build_res.stderr_tail, + "stdout_tail": build_res.stdout_tail, + "attempts": [ + { + "attempt": a.attempt_idx, + "ok": (a.build_result.ok if a.build_result else False), + "rc": (a.build_result.rc if a.build_result else None), + "stderr_tail": (a.build_result.stderr_tail if a.build_result else ""), + "stdout_tail": (a.build_result.stdout_tail if a.build_result else ""), + "building_data": a.building_data, + } + for a in attempts + ], + "context_pickle": None, + } # otherwise iterate with new logs logger.warning( @@ -559,6 +594,9 @@ def agent_build_and_validate( "attempt": a.attempt_idx, "ok": (a.build_result.ok if a.build_result else False), "rc": (a.build_result.rc if a.build_result else None), + "stderr_tail": (a.build_result.stderr_tail if a.build_result else ""), + "stdout_tail": (a.build_result.stdout_tail if a.build_result else ""), + "building_data": a.building_data, } for a in attempts ], diff --git a/src/datasmith/docker/context.py b/src/datasmith/docker/context.py index ddb07aa..fd6c6e3 100644 --- a/src/datasmith/docker/context.py +++ b/src/datasmith/docker/context.py @@ -208,7 +208,7 @@ def build_container( fileobj=self.build_tarball_stream(probe=probe), custom_context=True, tag=image_name, - buildargs=build_args, + buildargs={**build_args, "BUILDKIT_INLINE_CACHE": "1"}, target=target, ) except DockerException: @@ -278,7 +278,7 @@ def build_container_streaming( # noqa: C901 fileobj=tar_stream, custom_context=True, tag=image_name, - buildargs=build_args, + buildargs={**build_args, "BUILDKIT_INLINE_CACHE": "1"}, decode=True, rm=True, pull=pull, diff --git a/src/datasmith/docker/docker_build_env.sh b/src/datasmith/docker/docker_build_env.sh index fe2b379..f641d0b 100644 --- a/src/datasmith/docker/docker_build_env.sh +++ b/src/datasmith/docker/docker_build_env.sh @@ -59,6 +59,22 @@ export IMPORT_NAME="${import_name}" EOF } +# Append install-related variables (extras/specs) so the follow-up script can use them. +append_install_vars() { + local extras_all="$1" + local setuppy_cmd="$2" + + mkdir -p /etc/asv_env + printf "%s\n" "$extras_all" > /etc/asv_env/extras_all + printf "%s\n" "$setuppy_cmd" > /etc/asv_env/setuppy_cmd + + # Export for future shells + cat >>/etc/profile.d/asv_build_vars.sh </usr/local/bin/asv_smokecheck.py <<'PY' @@ -493,6 +509,120 @@ PY install_detect_import_name +install_detect_extras() { + cat >/usr/local/bin/detect_extras <<'PY' +#!/usr/bin/env python +""" +Emit space-separated extras discovered in a repo. +Sources: + - pyproject.toml -> [project.optional-dependencies] / [tool.poetry.extras] + - setup.cfg -> [options.extras_require] + - setup.py -> via `egg_info` then parse *.egg-info/{PKG-INFO,requires.txt} +""" +import argparse, pathlib, sys, subprocess, configparser, re +try: + import tomllib as toml +except Exception: + try: + import tomli as toml + except Exception: + toml = None + +def read_pyproject(root: pathlib.Path): + p = root / "pyproject.toml" + if toml and p.exists(): + try: + return toml.loads(p.read_text(encoding="utf-8")) + except Exception: + pass + return {} + +def read_setup_cfg(root: pathlib.Path): + p = root / "setup.cfg" + cp = configparser.ConfigParser() + if p.exists(): + try: + cp.read(p, encoding="utf-8") + except Exception: + pass + return cp + +def extras_from_pyproject(pyproject): + names = set() + proj = (pyproject.get("project", {}) or {}) + opt = proj.get("optional-dependencies", {}) or {} + names.update(opt.keys()) + poetry = ((pyproject.get("tool", {}) or {}).get("poetry", {}) or {}).get("extras", {}) or {} + names.update(poetry.keys()) + return names + +def extras_from_setup_cfg(setup_cfg): + names = set() + sec = "options.extras_require" + if setup_cfg.has_section(sec): + names.update(setup_cfg.options(sec)) + return names + +def ensure_egg_info(root: pathlib.Path): + if (root / "setup.py").exists(): + try: + subprocess.run([sys.executable, "setup.py", "-q", "egg_info"], + cwd=root, check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + except Exception: + pass + +def extras_from_egg_info(root: pathlib.Path): + names=set() + for ei in root.glob("*.egg-info"): + pkgi = ei / "PKG-INFO" + if pkgi.exists(): + try: + for line in pkgi.read_text(encoding="utf-8", errors="ignore").splitlines(): + if line.startswith("Provides-Extra:"): + names.add(line.split(":",1)[1].strip()) + except Exception: + pass + req = ei / "requires.txt" + if req.exists(): + try: + for line in req.read_text(encoding="utf-8", errors="ignore").splitlines(): + m = re.match(r"^\[(.+)\]$", line.strip()) + if m: + names.add(m.group(1).strip()) + except Exception: + pass + return names + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--repo-root", default=".") + args = ap.parse_args() + root = pathlib.Path(args.repo_root).resolve() + + pyproject = read_pyproject(root) + setup_cfg = read_setup_cfg(root) + + names = set() + names |= extras_from_pyproject(pyproject) + names |= extras_from_setup_cfg(setup_cfg) + + if (root / "setup.py").exists(): + ensure_egg_info(root) + names |= extras_from_egg_info(root) + + # Print space-separated (sorted) list; empty output if none + if names: + print(" ".join(sorted(names))) + else: + print("", end="") + +if __name__ == "__main__": + main() +PY + chmod +x /usr/local/bin/detect_extras +} +install_detect_extras + # -------- Script body -------- install_profile_helpers @@ -502,15 +632,14 @@ source /etc/profile.d/asv_utils.sh # Ensure base micromamba is active for introspecting ASV config micromamba activate base -install_detect_import_name -install_smokecheck +# Minimal tools in base to parse metadata (pyproject & egg-info) +micromamba install -y -n base -c conda-forge python tomli setuptools >/dev/null IMPORT_NAME="$(detect_import_name || true)" if [[ -z "$IMPORT_NAME" ]]; then echo "WARN: Could not determine import name; the pkg stage will fall back to local detection." fi - # Move into the directory that contains asv.*.json cd_asv_json_dir || { echo "No 'asv.*.json' file found." >&2; exit 1; } @@ -527,10 +656,19 @@ micromamba install -y -n base -c conda-forge tomli >/dev/null PY_VERSIONS=$(python - <= (3,7)] print(" ".join(cfg.pythons)) PY ) +# If none found, throw a noticeable error and exit +if [[ -z "$PY_VERSIONS" ]]; then + echo "No Satisfying PY_VERSIONS found in $CONF_NAME" >&2 + # echo asv config for debugging + cat "$CONF_NAME" >&2 + exit 1 +fi # Create the per-version envs with common build deps & ASV @@ -543,7 +681,7 @@ for version in $PY_VERSIONS; do # Generic toolchain useful for many compiled projects (installed once here) micromamba install -y -n "$ENV_NAME" -c conda-forge \ - pip git conda mamba libmambapy \ + pip git conda mamba "libmambapy<2" \ numpy scipy cython joblib threadpoolctl pytest \ compilers meson-python cmake ninja pkg-config tomli @@ -552,8 +690,13 @@ for version in $PY_VERSIONS; do micromamba run -n "$ENV_NAME" bash -lc "pip install --no-cache-dir git+https://github.com/airspeed-velocity/asv" done -# Persist variables +# Persist base variables write_build_vars "$PY_VERSIONS" "${IMPORT_NAME:-}" +REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" +ALL_EXTRAS="$(detect_extras --repo-root "$REPO_ROOT" 2>/dev/null | tr -s ' ' | tr ' ' ',')" + +SETUPPY_CMD="develop" +append_install_vars "${ALL_EXTRAS}" "${SETUPPY_CMD}" echo "Environment setup complete." diff --git a/src/datasmith/docker/docker_build_pkg.sh b/src/datasmith/docker/docker_build_pkg.sh index b9ebd95..1f49be2 100644 --- a/src/datasmith/docker/docker_build_pkg.sh +++ b/src/datasmith/docker/docker_build_pkg.sh @@ -10,6 +10,7 @@ source /etc/profile.d/asv_build_vars.sh || true ROOT_PATH=${ROOT_PATH:-$PWD} # Usually /workspace/repo REPO_ROOT="$ROOT_PATH" TARGET_VERSIONS="${PY_VERSION:-${ASV_PY_VERSIONS:-}}" +EXTRAS="${ALL_EXTRAS:+[$ALL_EXTRAS]}" if [[ -z "${TARGET_VERSIONS}" ]]; then echo "Error: No PY_VERSION set and ASV_PY_VERSIONS not found." >&2 exit 1 @@ -90,17 +91,21 @@ for version in $TARGET_VERSIONS; do # ----------------------------- # Install some generic packages needed for building/testing. - micromamba install -y -n "$ENV_NAME" -c conda-forge pip git conda mamba libmambapy \ - numpy scipy cython joblib threadpoolctl pytest \ + # Libmambapy must be < 2 avoid issues running airspeed-velocity. + micromamba install -y -n "$ENV_NAME" -c conda-forge pip git conda mamba "libmambapy<=1.9.9" \ + numpy scipy cython joblib fakeredis threadpoolctl pytest \ compilers meson-python cmake ninja pkg-config tomli # Editable install (no build isolation preferrably). Toolchain lives in the env already. + # $EXTRAS is an optional argument to install all discovered extra dependencies. + # It will be empty if pyproject.toml does not exist or has no [project.optional-dependencies]. + # In case setup.py is used, no need to append $EXTRAS. log "Editable install with --no-build-isolation" - PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT" + PIP_NO_BUILD_ISOLATION=1 micromamba run -n "$ENV_NAME" python -m pip install --no-build-isolation -v -e "$REPO_ROOT"$EXTRAS # Health checks (import + compiled extension probe; optional pytest smoke with RUN_PYTEST_SMOKE=1) log "Running smoke checks" - micromamba run -n "$ENV_NAME" asv_smokecheck.py --import-name $IMP --repo-root $REPO_ROOT ${RUN_PYTEST_SMOKE:+--pytest-smoke} + micromamba run -n "$ENV_NAME" asv_smokecheck.py --import-name "$IMP" --repo-root "$REPO_ROOT" ${RUN_PYTEST_SMOKE:+--pytest-smoke} echo "::import_name=${IMP}::env=${ENV_NAME}" done diff --git a/src/datasmith/docker/validation.py b/src/datasmith/docker/validation.py index 37c6851..834b95d 100644 --- a/src/datasmith/docker/validation.py +++ b/src/datasmith/docker/validation.py @@ -219,6 +219,7 @@ def validate_one( # noqa: C901 container = None files = {} try: + logger.debug("validate_one: running container %s", task.get_container_name()) container = client.containers.run( image=task.get_image_name(), detach=True,