|
| 1 | +import json |
| 2 | +import shutil |
| 3 | +from pathlib import Path |
| 4 | +from typing import Optional |
| 5 | + |
| 6 | +import pandas as pd |
| 7 | +from asv.commands.publish import Publish # type: ignore[import-untyped] |
| 8 | +from asv.config import Config # type: ignore[import-untyped] |
| 9 | +from asv.util import write_json # type: ignore[import-untyped] |
| 10 | +from git import Repo |
| 11 | + |
| 12 | + |
| 13 | +def _get_all_commits_dict(all_commits_df: pd.DataFrame) -> dict: |
| 14 | + """Return a dict mapping commit_sha to metadata.""" |
| 15 | + return all_commits_df.set_index("commit_sha", inplace=False).to_dict(orient="index") |
| 16 | + |
| 17 | + |
| 18 | +def _update_dict(pth: Path, new_data: dict) -> None: |
| 19 | + """ |
| 20 | + Update a JSON file at the given path with new data. |
| 21 | +
|
| 22 | + Args: |
| 23 | + path (Path): Path to the JSON file. |
| 24 | + new_data (dict): New data to update the JSON file with. |
| 25 | + """ |
| 26 | + if not pth.exists(): |
| 27 | + pth.parent.mkdir(parents=True, exist_ok=True) |
| 28 | + with open(pth, "w", encoding="utf-8") as f: |
| 29 | + json.dump(new_data, f) |
| 30 | + else: |
| 31 | + with open(pth, "r+", encoding="utf-8") as f: |
| 32 | + try: |
| 33 | + saved_benchmarks = json.load(f) |
| 34 | + except json.JSONDecodeError: |
| 35 | + saved_benchmarks = {} |
| 36 | + saved_benchmarks.update(new_data) |
| 37 | + |
| 38 | + with open(pth, "w", encoding="utf-8") as f: |
| 39 | + json.dump(saved_benchmarks, f) |
| 40 | + |
| 41 | + |
| 42 | +def _update_json(src_path: Path, dest_path: Path) -> None: |
| 43 | + """Load a JSON file and save it to dest_path using _update_dict.""" |
| 44 | + with open(src_path, encoding="utf-8") as f: |
| 45 | + data = json.load(f) |
| 46 | + _update_dict(dest_path, data) |
| 47 | + |
| 48 | + |
| 49 | +def _update_machine_jsons(runid: Path, runid_newpath: Path, default_machine_name: str) -> Optional[dict]: |
| 50 | + """Update machine name in machine.json and params['machine'] in other json files.""" |
| 51 | + machine_data = None |
| 52 | + old_file_names = [f.name for f in runid.iterdir()] |
| 53 | + for fname in old_file_names: |
| 54 | + src_file = runid / fname |
| 55 | + dest_file = runid_newpath / fname |
| 56 | + if fname == "machine.json": |
| 57 | + with open(src_file, encoding="utf-8") as f: |
| 58 | + machine_data = json.load(f) |
| 59 | + machine_data["machine"] = default_machine_name |
| 60 | + with open(dest_file, "w", encoding="utf-8") as f: |
| 61 | + json.dump(machine_data, f) |
| 62 | + elif fname.endswith(".json"): |
| 63 | + with open(src_file, encoding="utf-8") as f: |
| 64 | + run_data = json.load(f) |
| 65 | + if "params" in run_data and "machine" in run_data["params"]: |
| 66 | + run_data["params"]["machine"] = default_machine_name |
| 67 | + with open(dest_file, "w", encoding="utf-8") as f: |
| 68 | + json.dump(run_data, f) |
| 69 | + return machine_data |
| 70 | + |
| 71 | + |
| 72 | +def _process_runid_folder(runid: Path, runid_newpath: Path, default_machine_name: Optional[str]) -> Optional[dict]: |
| 73 | + """Copy and update runid folder, handling machine name if needed.""" |
| 74 | + machine_data = None |
| 75 | + if default_machine_name is not None: |
| 76 | + if runid_newpath.exists(): |
| 77 | + shutil.rmtree(runid_newpath) |
| 78 | + runid_newpath.mkdir(parents=True, exist_ok=True) |
| 79 | + machine_data = _update_machine_jsons(runid, runid_newpath, default_machine_name) |
| 80 | + else: |
| 81 | + if runid_newpath.exists(): |
| 82 | + shutil.rmtree(runid_newpath) |
| 83 | + shutil.copytree(runid, runid_newpath) |
| 84 | + return machine_data |
| 85 | + |
| 86 | + |
| 87 | +def aggregate_benchmark_runs( |
| 88 | + all_commits_df: pd.DataFrame, results_dir: Path, output_dir: Path, default_machine_name: Optional[str] = None |
| 89 | +) -> list[dict]: |
| 90 | + """ |
| 91 | + Aggregates benchmark runs from the specified results directory and saves them to the output directory. |
| 92 | +
|
| 93 | + Args: |
| 94 | + all_commits_df (pd.DataFrame): DataFrame containing commit metadata. |
| 95 | + results_dir (Path): Path to the directory containing benchmark results. |
| 96 | + output_dir (Path): Path to the directory where merged benchmarks will be saved. |
| 97 | + """ |
| 98 | + stats = [] |
| 99 | + all_commits_dict = _get_all_commits_dict(all_commits_df) |
| 100 | + |
| 101 | + for commit_pth in results_dir.glob(r'*/"[0-9].[0-9]*"/results/'): |
| 102 | + commit_id = commit_pth.parent.parent.name |
| 103 | + if commit_id not in all_commits_dict: |
| 104 | + continue |
| 105 | + commit_metadata = all_commits_dict[commit_id] |
| 106 | + repo_path = (commit_metadata["repo_name"]).replace("/", "_") |
| 107 | + repo_out_dir = output_dir / repo_path |
| 108 | + repo_out_dir.mkdir(parents=True, exist_ok=True) |
| 109 | + |
| 110 | + benchmarks_path = commit_pth / "benchmarks.json" |
| 111 | + if benchmarks_path.exists(): |
| 112 | + _update_json(benchmarks_path, repo_out_dir / "benchmarks.json") |
| 113 | + |
| 114 | + asv_conf_path = commit_pth.parent / "asv.conf.json" |
| 115 | + if asv_conf_path.exists(): |
| 116 | + _update_json(asv_conf_path, repo_out_dir / "asv.conf.json") |
| 117 | + n_runids = 0 |
| 118 | + machine_data = None |
| 119 | + for runid in commit_pth.iterdir(): |
| 120 | + if not runid.is_dir(): |
| 121 | + continue |
| 122 | + n_runids += 1 |
| 123 | + name = default_machine_name if default_machine_name is not None else runid.name |
| 124 | + runid_newpath = output_dir / repo_path / name |
| 125 | + machine_data = _process_runid_folder(runid, runid_newpath, default_machine_name) or machine_data |
| 126 | + |
| 127 | + if default_machine_name is not None and machine_data is not None: |
| 128 | + saved_machine_path = output_dir / repo_path / "machine.json" |
| 129 | + with open(saved_machine_path, "w", encoding="utf-8") as f: |
| 130 | + json.dump(machine_data, f) |
| 131 | + |
| 132 | + stats.append({ |
| 133 | + "repo_path": repo_path, |
| 134 | + "metadata": commit_metadata, |
| 135 | + "commit_sha": commit_id, |
| 136 | + "n_runids": n_runids, |
| 137 | + }) |
| 138 | + return stats |
| 139 | + |
| 140 | + |
| 141 | +def publish_repo( |
| 142 | + repo_url: str, |
| 143 | + repo_local_dir: Path, |
| 144 | + asv_conf_path: Path, |
| 145 | + results_dir: Path, |
| 146 | + html_dir: Path, |
| 147 | + *, |
| 148 | + skip_if_present: bool = True, |
| 149 | +) -> None: |
| 150 | + """ |
| 151 | + Ensure *repo_local_dir* contains an up-to-date clone of *repo_url*, |
| 152 | + rewrite the ASV config at *asv_conf_path* to use the supplied |
| 153 | + directories, then publish the results. |
| 154 | +
|
| 155 | + Parameters |
| 156 | + ---------- |
| 157 | + repo_url : str |
| 158 | + Full Git URL, e.g. ``"https://github.com/pandas-dev/pandas.git"``. |
| 159 | + repo_local_dir : pathlib.Path |
| 160 | + Where the repo should live locally. |
| 161 | + asv_conf_path : pathlib.Path |
| 162 | + Path to the repository's ``asv.conf.json`` on disk. |
| 163 | + results_dir : pathlib.Path |
| 164 | + Directory for ASV's benchmark result files. |
| 165 | + html_dir : pathlib.Path |
| 166 | + Directory where ASV should write its HTML report. |
| 167 | + skip_if_present : bool, default True |
| 168 | + If *repo_local_dir* already exists, skip cloning and simply use it. |
| 169 | + Set to ``False`` to force a fresh clone each time. |
| 170 | +
|
| 171 | + Raises |
| 172 | + ------ |
| 173 | + FileNotFoundError |
| 174 | + If *asv_conf_path* does not exist. |
| 175 | + """ |
| 176 | + # Clone or reuse the repository |
| 177 | + if repo_local_dir.exists(): |
| 178 | + if skip_if_present: |
| 179 | + print(f"Repository {repo_local_dir} already exists - reusing.") |
| 180 | + else: |
| 181 | + print(f"Removing {repo_local_dir} for a fresh clone…") |
| 182 | + shutil.rmtree(repo_local_dir) |
| 183 | + Repo.clone_from(repo_url, repo_local_dir) |
| 184 | + else: |
| 185 | + Repo.clone_from(repo_url, repo_local_dir) |
| 186 | + |
| 187 | + # Load & patch asv.conf.json |
| 188 | + with asv_conf_path.open(encoding="utf-8") as f: |
| 189 | + asv_conf = json.load(f) |
| 190 | + |
| 191 | + asv_conf.update( |
| 192 | + repo=str(repo_local_dir.resolve()), |
| 193 | + results_dir=str(results_dir.resolve()), |
| 194 | + html_dir=str(html_dir.resolve()), |
| 195 | + ) |
| 196 | + |
| 197 | + cfg = Config.from_json(asv_conf) |
| 198 | + write_json(path=asv_conf_path, data=cfg.__dict__, api_version=1) |
| 199 | + |
| 200 | + # Publish the results |
| 201 | + Publish.run(cfg) |
0 commit comments