diff --git a/.zenodo.json b/.zenodo.json index 3f641a4..6e1f5a9 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -1,5 +1,5 @@ { - "title": "CADET-RDM: Version 1.1.0", + "title": "CADET-RDM: Version 1.1.1", "upload_type": "software", "creators": [ { @@ -32,7 +32,7 @@ "keywords": [ "research data management", "simulation", "reproducibility", "containerization" ], - "version": "1.1.0", + "version": "1.1.1", "access_right": "open", "communities": [ { "identifier": "open-source" } diff --git a/cadetrdm/__init__.py b/cadetrdm/__init__.py index d706e4b..b9898b5 100644 --- a/cadetrdm/__init__.py +++ b/cadetrdm/__init__.py @@ -1,4 +1,4 @@ -__version__ = "1.1.0" +__version__ = "1.1.1" from cadetrdm.conda_env_utils import prepare_conda_env from cadetrdm.options import Options diff --git a/cadetrdm/logging.py b/cadetrdm/logging.py index 3dc71e2..45c2d99 100644 --- a/cadetrdm/logging.py +++ b/cadetrdm/logging.py @@ -15,7 +15,7 @@ def __init__( output_repo_commit_hash: str, project_repo_branch: str, project_repo_commit_hash: str, - project_repo_folder_name: str, + project_repo_directory_name: str, project_repo_remotes: str, python_sys_args: str, tags: str, @@ -28,7 +28,7 @@ def __init__( self.output_repo_commit_hash = output_repo_commit_hash self.project_repo_branch = project_repo_branch self.project_repo_commit_hash = project_repo_commit_hash - self.project_repo_folder_name = project_repo_folder_name + self.project_repo_directory_name = project_repo_directory_name self.project_repo_remotes = project_repo_remotes self.python_sys_args = python_sys_args self.tags = tags diff --git a/cadetrdm/repositories.py b/cadetrdm/repositories.py index 1d97e88..99eb310 100644 --- a/cadetrdm/repositories.py +++ b/cadetrdm/repositories.py @@ -21,7 +21,7 @@ from typing import List, Optional, Any from urllib.request import urlretrieve -from semantic_version import Version, SimpleSpec +from semantic_version import Version import cadetrdm from cadetrdm import Options @@ -83,7 +83,7 @@ def __init__(self, path=None, search_parent_directories=True, *args, **kwargs): self.add = self._git.add - def __enter__(self) -> "Repo": + def __enter__(self) -> git.Repo: return self def __exit__(self, *args: Any) -> None: @@ -541,7 +541,13 @@ def __init__(self, path=None, search_parent_directories=True, *args, **kwargs): super().__init__(path, search_parent_directories, *args, **kwargs) self._metadata = self.load_metadata() - def load_metadata(self): + @property + def metadata(self) -> dict: + """Return metadata information about CADET-RDM repository.""" + return self._metadata + + def load_metadata(self) -> dict: + """Load metadata from file.""" with open(self.data_json_path, "r", encoding="utf-8") as handle: metadata = json.load(handle) if "output_remotes" not in metadata and metadata["is_project_repo"]: @@ -552,6 +558,11 @@ def load_metadata(self): metadata["output_remotes"] = output_remotes return metadata + def save_metadata(self) -> None: + """Save metadata to file.""" + with open(self.data_json_path, "w", encoding="utf-8") as f: + json.dump(self.metadata, f, indent=2) + def add_remote(self, remote_url, remote_name=None): """ Add a remote to the repository. @@ -563,7 +574,7 @@ def add_remote(self, remote_url, remote_name=None): if remote_name is None: remote_name = "origin" self._git_repo.create_remote(remote_name, url=remote_url) - if self._metadata["is_project_repo"]: + if self.metadata["is_project_repo"]: # This directory is a project repository. Use a project repo class to easily access the output repo. output_repo = ProjectRepo(self.path).output_repo @@ -574,7 +585,7 @@ def add_remote(self, remote_url, remote_name=None): output_repo.add_list_of_remotes_in_readme_file("Link to Project Repository", self.remote_urls) output_repo.add("README.md") output_repo.commit("Add remote for project repo", verbosity=0, add_all=False) - if self._metadata["is_output_repo"]: + if self.metadata["is_output_repo"]: # This directory is an output repository. project_repo = ProjectRepo(self.path.parent) project_repo.update_output_remotes_json() @@ -817,22 +828,23 @@ def __init__( test_for_lfs() if output_directory is not None: - print("Deprecation Warning. Setting the output directory manually during repo instantiation is deprecated" - " and will be removed in a future update.") + print( + "Deprecation Warning. Setting the output directory manually during " + "repo instantiation is deprecated and will be removed in a future " + " update." + ) if not self.data_json_path.exists(): raise RuntimeError(f"Directory {self.path} does not appear to be a CADET-RDM repository.") - self._project_uuid = self._metadata["project_uuid"] - self._output_uuid = self._metadata["output_uuid"] - self._output_directory = self._metadata["output_remotes"]["output_directory_name"] - self._update_version() + changes_were_made = self._update_version() - if not (self.path / self._output_directory).exists(): + if not (self.path / self.output_directory).exists(): print("Output repository was missing, cloning now.") self._clone_output_repo() + self.output_repo = OutputRepo( - self.path / self._output_directory, + self.path / self.output_directory, self, ) @@ -844,6 +856,36 @@ def __init__( self._package_dir = package_dir + if changes_were_made: + cadetrdm_version = Version(cadetrdm.__version__) + print( + f"Repo version {self.metadata['cadet_rdm_version']} was outdated. " + f"Current CADET-RDM version is {cadetrdm.__version__}.\n" + "Repo has been updated." + ) + self.metadata["cadet_rdm_version"] = str(cadetrdm_version) + self.save_metadata() + self.add(self.data_json_path) + self.commit( + f"Update CADET-RDM version to {cadetrdm_version}", + add_all=False + ) + + @property + def project_uuid(self) -> str: + """Return Project UUID.""" + return self.metadata.project_uuid + + @property + def output_uuid(self) -> str: + """Return Project UUID.""" + return self.metadata["output_uuid"] + + @property + def output_directory(self) -> str: + """Return output directory.""" + return self.metadata["output_remotes"]["output_directory_name"] + @property def name(self): return self.path.parts[-1] @@ -868,9 +910,8 @@ def module(self) -> ModuleType: def _update_version(self) -> None: """Update project repo to latest CADET-RDM specs.""" - metadata = self._metadata cadetrdm_version = Version(cadetrdm.__version__) - current_version = Version(metadata["cadet_rdm_version"]) + current_version = Version(self.metadata["cadet_rdm_version"]) # Skip if versions match if cadetrdm_version == current_version: @@ -886,24 +927,19 @@ def _update_version(self) -> None: output_remotes_path = self.path / "output_remotes.json" delete_path(output_remotes_path) self.add(output_remotes_path) - if changes_were_made: - print( - f"Repo version {metadata['cadet_rdm_version']} was outdated. " - f"Current CADET-RDM version is {cadetrdm.__version__}.\n" - "Repo has been updated." - ) - metadata["cadet_rdm_version"] = str(cadetrdm_version) - with open(self.data_json_path, "w", encoding="utf-8") as f: - json.dump(metadata, f, indent=2) - self.add(self.data_json_path) - self.commit( - f"Update CADET-RDM version to {cadetrdm_version}", - add_all=False - ) + if current_version < Version("1.1.1"): + changes_were_made = True + output_remotes = self.metadata.get("output_remotes") + if isinstance(output_remotes, dict): + if "output_folder_name" in output_remotes: + output_remotes["output_directory_name"] = output_remotes.pop( + "output_folder_name" + ) + + return changes_were_made def _clone_output_repo(self, multi_options: List[str] = None): - metadata = self.load_metadata() - output_remotes = metadata["output_remotes"] + output_remotes = self.metadata["output_remotes"] output_path = self.path / output_remotes["output_directory_name"] ssh_remotes = list(output_remotes["output_remotes"].values()) if len(ssh_remotes) == 0: @@ -1061,7 +1097,7 @@ def update_output_main_logs( output_repo_commit_hash=output_repo_hash, project_repo_branch=str(self.active_branch), project_repo_commit_hash=str(self.head.commit), - project_repo_folder_name=self.path.name, + project_repo_directory_name=self.path.name, project_repo_remotes=self.remote_urls, python_sys_args=str(sys.argv), tags=", ".join(self.tags), @@ -1139,18 +1175,15 @@ def check(self, commit=True): if commit: self.output_repo.commit(message="Update remote links", add_all=False, verbosity=1) - def update_output_remotes_json(self): + def update_output_remotes_json(self, load_metadata=True): output_repo_remotes = self.output_repo.remote_urls self.add_list_of_remotes_in_readme_file("Link to Output Repository", output_repo_remotes) - with open(self.data_json_path, "r", encoding="utf-8") as file_handle: - metadata = json.load(file_handle) - + metadata = self.load_metadata() if load_metadata else self.metadata remotes_dict = {remote.name: str(remote.url) for remote in self.output_repo.remotes} - metadata["output_remotes"] = {"output_directory_name": self._output_directory, "output_remotes": remotes_dict} - - with open(self.data_json_path, "w", encoding="utf-8") as file_handle: - json.dump(metadata, file_handle, indent=2) + metadata["output_remotes"] = {"output_directory_name": self.output_directory, "output_remotes": remotes_dict} + self._metadata = metadata + self.save_metadata() self.add(self.data_json_path) @@ -1200,8 +1233,8 @@ def remove_cached_files(self): """ Delete all previously cached results. """ - if (self.path / (self._output_directory + "_cached")).exists(): - delete_path(self.path / (self._output_directory + "_cached")) + if (self.path / (self.output_directory + "_cached")).exists(): + delete_path(self.path / (self.output_directory + "_cached")) def import_static_data(self, source_path: Path | str, commit_message): """ @@ -1322,7 +1355,7 @@ def cache_folder_for_branch(self, branch_name=None): branch_name_path = branch_name.replace("/", "_") # Define the target directory - cache_folder = self.path / f"{self._output_directory}_cached" / str(branch_name_path) + cache_folder = self.path / f"{self.output_directory}_cached" / str(branch_name_path) return cache_folder def copy_data_to_cache(self, branch_name=None, target_folder=None): @@ -1431,7 +1464,7 @@ def _commit_output_data( commit_return = self.output_repo._git.commit("-m", message) self.copy_data_to_cache() self.update_output_main_logs(output_dict, options) - main_cach_path = self.path / (self._output_directory + "_cached") / self.output_repo.main_branch + main_cach_path = self.path / (self.output_directory + "_cached") / self.output_repo.main_branch if main_cach_path.exists(): delete_path(main_cach_path) self.copy_data_to_cache(self.output_repo.main_branch) @@ -1588,7 +1621,7 @@ def add_filetype_to_lfs(self, file_type): def _update_version(self) -> None: """Update output repo to latest CADET-RDM specs.""" - metadata = self._metadata + metadata = self.metadata cadetrdm_version = Version(cadetrdm.__version__) current_version = Version(metadata["cadet_rdm_version"]) @@ -1598,6 +1631,14 @@ def _update_version(self) -> None: changes_were_made = False + if current_version < Version("1.1.1"): + changes_were_made = True + if self.output_log_file_path.exists(): + warnings.warn( + "Repo version has outdated options hashes. " + "Updating option hashes in output log.tsv." + ) + self._rename_project_repo_folder_to_directory_in_log() if current_version < Version("0.0.9"): changes_were_made = True self._convert_csv_to_tsv_if_necessary() @@ -1764,6 +1805,46 @@ def _update_log_hashes(self): self.add(self.output_log_file_path) self.commit(message="Updated log hashes", add_all=False) + def _rename_project_repo_folder_to_directory_in_log(self) -> None: + """Rename the TSV column header from folder to directory.""" + self.checkout(self.main_branch) + + with open(self.output_log_file_path, "r") as f: + reader = csv.DictReader(f, delimiter="\t") + rows = list(reader) + + if not rows: + return + + old_key = "project_repo_folder_name" + new_key = "project_repo_directory_name" + + # Nothing to do if the old column does not exist + if old_key not in rows[0]: + return + + fieldnames = list(rows[0].keys()) + + # Rename key in rows + for row in rows: + row[new_key] = row.pop(old_key) + + # Rename key in header, keep position + idx = fieldnames.index(old_key) + fieldnames[idx] = new_key + + # Write updated data back to file + with open(self.output_log_file_path, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter="\t") + writer.writeheader() + writer.writerows(rows) + + self.add("log.tsv") + self.commit( + message="Rename 'project_repo_folder_name' to 'project_repo_directory_name' in log.tsv", + add_all=False, + ) + def _add_branch_name_to_log(self) -> None: """ Update the TSV file by adding a 'project_repo_branch' column.