From 90fa8ee3c9ca65982f0050fded55670344cb6381 Mon Sep 17 00:00:00 2001 From: Hannah Lanzrath Date: Tue, 3 Feb 2026 16:38:58 +0100 Subject: [PATCH 1/8] Change attribute names from "folder" to "directory" in LogEntry and ProjectRepo classes Change attribute name in ProjectRepo --- cadetrdm/logging.py | 4 ++-- cadetrdm/repositories.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cadetrdm/logging.py b/cadetrdm/logging.py index 3dc71e2..45c2d99 100644 --- a/cadetrdm/logging.py +++ b/cadetrdm/logging.py @@ -15,7 +15,7 @@ def __init__( output_repo_commit_hash: str, project_repo_branch: str, project_repo_commit_hash: str, - project_repo_folder_name: str, + project_repo_directory_name: str, project_repo_remotes: str, python_sys_args: str, tags: str, @@ -28,7 +28,7 @@ def __init__( self.output_repo_commit_hash = output_repo_commit_hash self.project_repo_branch = project_repo_branch self.project_repo_commit_hash = project_repo_commit_hash - self.project_repo_folder_name = project_repo_folder_name + self.project_repo_directory_name = project_repo_directory_name self.project_repo_remotes = project_repo_remotes self.python_sys_args = python_sys_args self.tags = tags diff --git a/cadetrdm/repositories.py b/cadetrdm/repositories.py index 1d97e88..424f6e6 100644 --- a/cadetrdm/repositories.py +++ b/cadetrdm/repositories.py @@ -1061,7 +1061,7 @@ def update_output_main_logs( output_repo_commit_hash=output_repo_hash, project_repo_branch=str(self.active_branch), project_repo_commit_hash=str(self.head.commit), - project_repo_folder_name=self.path.name, + project_repo_directory_name=self.path.name, project_repo_remotes=self.remote_urls, python_sys_args=str(sys.argv), tags=", ".join(self.tags), From 694ab58016d7d205f7156156a1eced039a26cedb Mon Sep 17 00:00:00 2001 From: Hannah Lanzrath Date: Tue, 3 Feb 2026 16:41:09 +0100 Subject: [PATCH 2/8] Add "directory" naming backwarts compatibility to ProjectRepo and OutputRepo Add "directory" naming backwarts compatibility to OutputRepo --- cadetrdm/repositories.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/cadetrdm/repositories.py b/cadetrdm/repositories.py index 424f6e6..8e2bf20 100644 --- a/cadetrdm/repositories.py +++ b/cadetrdm/repositories.py @@ -886,6 +886,13 @@ def _update_version(self) -> None: output_remotes_path = self.path / "output_remotes.json" delete_path(output_remotes_path) self.add(output_remotes_path) + if current_version < Version("1.1.1"): + output_remotes = metadata.get("output_remotes") + if isinstance(output_remotes, dict): + if "output_folder_name" in output_remotes: + output_remotes["output_directory_name"] = output_remotes.pop( + "output_folder_name" + ) if changes_were_made: print( f"Repo version {metadata['cadet_rdm_version']} was outdated. " @@ -1598,6 +1605,14 @@ def _update_version(self) -> None: changes_were_made = False + if current_version < Version("1.1.1"): + changes_were_made = True + if self.output_log_file_path.exists(): + warnings.warn( + "Repo version has outdated options hashes. " + "Updating option hashes in output log.tsv." + ) + self._rename_project_repo_folder_to_directory_in_log() if current_version < Version("0.0.9"): changes_were_made = True self._convert_csv_to_tsv_if_necessary() From c0c49928887e75b798739129f05e38e8f501c935 Mon Sep 17 00:00:00 2001 From: Hannah Lanzrath Date: Tue, 3 Feb 2026 16:43:06 +0100 Subject: [PATCH 3/8] Add function to update "folder" to "directory" in metadata --- cadetrdm/repositories.py | 43 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/cadetrdm/repositories.py b/cadetrdm/repositories.py index 8e2bf20..888d886 100644 --- a/cadetrdm/repositories.py +++ b/cadetrdm/repositories.py @@ -1779,6 +1779,49 @@ def _update_log_hashes(self): self.add(self.output_log_file_path) self.commit(message="Updated log hashes", add_all=False) + def _rename_project_repo_folder_to_directory_in_log(self) -> None: + """ + Rename the TSV column header 'project_repo_folder_name' + to 'project_repo_directory_name'. + """ + self.checkout(self.main_branch) + + with open(self.output_log_file_path, "r") as f: + reader = csv.DictReader(f, delimiter="\t") + rows = list(reader) + + if not rows: + return + + old_key = "project_repo_folder_name" + new_key = "project_repo_directory_name" + + # Nothing to do if the old column does not exist + if old_key not in rows[0]: + return + + fieldnames = list(rows[0].keys()) + + # Rename key in rows + for row in rows: + row[new_key] = row.pop(old_key) + + # Rename key in header, keep position + idx = fieldnames.index(old_key) + fieldnames[idx] = new_key + + # Write updated data back to file + with open(self.output_log_file_path, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter="\t") + writer.writeheader() + writer.writerows(rows) + + self.add("log.tsv") + self.commit( + message="Rename 'project_repo_folder_name' to 'project_repo_directory_name' in log.tsv", + add_all=False, + ) + def _add_branch_name_to_log(self) -> None: """ Update the TSV file by adding a 'project_repo_branch' column. From 91b168a5aaece5308e9e2cb1acc52d0c6db8a699 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Schm=C3=B6lder?= Date: Wed, 4 Feb 2026 19:00:38 +0100 Subject: [PATCH 4/8] chore: Cleanup --- cadetrdm/repositories.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/cadetrdm/repositories.py b/cadetrdm/repositories.py index 888d886..b953580 100644 --- a/cadetrdm/repositories.py +++ b/cadetrdm/repositories.py @@ -21,7 +21,7 @@ from typing import List, Optional, Any from urllib.request import urlretrieve -from semantic_version import Version, SimpleSpec +from semantic_version import Version import cadetrdm from cadetrdm import Options @@ -83,7 +83,7 @@ def __init__(self, path=None, search_parent_directories=True, *args, **kwargs): self.add = self._git.add - def __enter__(self) -> "Repo": + def __enter__(self) -> git.Repo: return self def __exit__(self, *args: Any) -> None: @@ -541,7 +541,8 @@ def __init__(self, path=None, search_parent_directories=True, *args, **kwargs): super().__init__(path, search_parent_directories, *args, **kwargs) self._metadata = self.load_metadata() - def load_metadata(self): + def load_metadata(self) -> dict: + """Load metadata from file.""" with open(self.data_json_path, "r", encoding="utf-8") as handle: metadata = json.load(handle) if "output_remotes" not in metadata and metadata["is_project_repo"]: @@ -817,8 +818,11 @@ def __init__( test_for_lfs() if output_directory is not None: - print("Deprecation Warning. Setting the output directory manually during repo instantiation is deprecated" - " and will be removed in a future update.") + print( + "Deprecation Warning. Setting the output directory manually during " + "repo instantiation is deprecated and will be removed in a future " + " update." + ) if not self.data_json_path.exists(): raise RuntimeError(f"Directory {self.path} does not appear to be a CADET-RDM repository.") @@ -1780,10 +1784,7 @@ def _update_log_hashes(self): self.commit(message="Updated log hashes", add_all=False) def _rename_project_repo_folder_to_directory_in_log(self) -> None: - """ - Rename the TSV column header 'project_repo_folder_name' - to 'project_repo_directory_name'. - """ + """Rename the TSV column header from folder to directory.""" self.checkout(self.main_branch) with open(self.output_log_file_path, "r") as f: From 9ff94fc6e79110a7bb3fc5f93088a633a9cb488e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Schm=C3=B6lder?= Date: Thu, 5 Feb 2026 15:32:24 +0100 Subject: [PATCH 5/8] Add properties for metadata, uuids, and output directory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also move version update before the metadata is checked Co-authored-by: Johannes Schmölder Co-authored-by: Hannah Lanzrath --- cadetrdm/repositories.py | 52 ++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/cadetrdm/repositories.py b/cadetrdm/repositories.py index b953580..3ef923c 100644 --- a/cadetrdm/repositories.py +++ b/cadetrdm/repositories.py @@ -541,6 +541,11 @@ def __init__(self, path=None, search_parent_directories=True, *args, **kwargs): super().__init__(path, search_parent_directories, *args, **kwargs) self._metadata = self.load_metadata() + @property + def metadata(self) -> dict: + """Return metadata information about CADET-RDM repository.""" + return self._metadata + def load_metadata(self) -> dict: """Load metadata from file.""" with open(self.data_json_path, "r", encoding="utf-8") as handle: @@ -564,7 +569,7 @@ def add_remote(self, remote_url, remote_name=None): if remote_name is None: remote_name = "origin" self._git_repo.create_remote(remote_name, url=remote_url) - if self._metadata["is_project_repo"]: + if self.metadata["is_project_repo"]: # This directory is a project repository. Use a project repo class to easily access the output repo. output_repo = ProjectRepo(self.path).output_repo @@ -575,7 +580,7 @@ def add_remote(self, remote_url, remote_name=None): output_repo.add_list_of_remotes_in_readme_file("Link to Project Repository", self.remote_urls) output_repo.add("README.md") output_repo.commit("Add remote for project repo", verbosity=0, add_all=False) - if self._metadata["is_output_repo"]: + if self.metadata["is_output_repo"]: # This directory is an output repository. project_repo = ProjectRepo(self.path.parent) project_repo.update_output_remotes_json() @@ -827,16 +832,14 @@ def __init__( if not self.data_json_path.exists(): raise RuntimeError(f"Directory {self.path} does not appear to be a CADET-RDM repository.") - self._project_uuid = self._metadata["project_uuid"] - self._output_uuid = self._metadata["output_uuid"] - self._output_directory = self._metadata["output_remotes"]["output_directory_name"] self._update_version() - if not (self.path / self._output_directory).exists(): + if not (self.path / self.output_directory).exists(): print("Output repository was missing, cloning now.") self._clone_output_repo() + self.output_repo = OutputRepo( - self.path / self._output_directory, + self.path / self.output_directory, self, ) @@ -848,6 +851,21 @@ def __init__( self._package_dir = package_dir + @property + def project_uuid(self) -> str: + """Return Project UUID.""" + return self.metadata.project_uuid + + @property + def output_uuid(self) -> str: + """Return Project UUID.""" + return self.metadata["output_uuid"] + + @property + def output_directory(self) -> str: + """Return output directory.""" + return self.metadata["output_remotes"]["output_directory_name"] + @property def name(self): return self.path.parts[-1] @@ -872,9 +890,8 @@ def module(self) -> ModuleType: def _update_version(self) -> None: """Update project repo to latest CADET-RDM specs.""" - metadata = self._metadata cadetrdm_version = Version(cadetrdm.__version__) - current_version = Version(metadata["cadet_rdm_version"]) + current_version = Version(self.metadata["cadet_rdm_version"]) # Skip if versions match if cadetrdm_version == current_version: @@ -891,7 +908,7 @@ def _update_version(self) -> None: delete_path(output_remotes_path) self.add(output_remotes_path) if current_version < Version("1.1.1"): - output_remotes = metadata.get("output_remotes") + output_remotes = self.metadata.get("output_remotes") if isinstance(output_remotes, dict): if "output_folder_name" in output_remotes: output_remotes["output_directory_name"] = output_remotes.pop( @@ -913,8 +930,7 @@ def _update_version(self) -> None: ) def _clone_output_repo(self, multi_options: List[str] = None): - metadata = self.load_metadata() - output_remotes = metadata["output_remotes"] + output_remotes = self.metadata["output_remotes"] output_path = self.path / output_remotes["output_directory_name"] ssh_remotes = list(output_remotes["output_remotes"].values()) if len(ssh_remotes) == 0: @@ -1158,7 +1174,7 @@ def update_output_remotes_json(self): metadata = json.load(file_handle) remotes_dict = {remote.name: str(remote.url) for remote in self.output_repo.remotes} - metadata["output_remotes"] = {"output_directory_name": self._output_directory, "output_remotes": remotes_dict} + metadata["output_remotes"] = {"output_directory_name": self.output_directory, "output_remotes": remotes_dict} with open(self.data_json_path, "w", encoding="utf-8") as file_handle: json.dump(metadata, file_handle, indent=2) @@ -1211,8 +1227,8 @@ def remove_cached_files(self): """ Delete all previously cached results. """ - if (self.path / (self._output_directory + "_cached")).exists(): - delete_path(self.path / (self._output_directory + "_cached")) + if (self.path / (self.output_directory + "_cached")).exists(): + delete_path(self.path / (self.output_directory + "_cached")) def import_static_data(self, source_path: Path | str, commit_message): """ @@ -1333,7 +1349,7 @@ def cache_folder_for_branch(self, branch_name=None): branch_name_path = branch_name.replace("/", "_") # Define the target directory - cache_folder = self.path / f"{self._output_directory}_cached" / str(branch_name_path) + cache_folder = self.path / f"{self.output_directory}_cached" / str(branch_name_path) return cache_folder def copy_data_to_cache(self, branch_name=None, target_folder=None): @@ -1442,7 +1458,7 @@ def _commit_output_data( commit_return = self.output_repo._git.commit("-m", message) self.copy_data_to_cache() self.update_output_main_logs(output_dict, options) - main_cach_path = self.path / (self._output_directory + "_cached") / self.output_repo.main_branch + main_cach_path = self.path / (self.output_directory + "_cached") / self.output_repo.main_branch if main_cach_path.exists(): delete_path(main_cach_path) self.copy_data_to_cache(self.output_repo.main_branch) @@ -1599,7 +1615,7 @@ def add_filetype_to_lfs(self, file_type): def _update_version(self) -> None: """Update output repo to latest CADET-RDM specs.""" - metadata = self._metadata + metadata = self.metadata cadetrdm_version = Version(cadetrdm.__version__) current_version = Version(metadata["cadet_rdm_version"]) From 2fbcce998160363fa9ad12e1cb9ba43ffd093d2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Schm=C3=B6lder?= Date: Wed, 4 Feb 2026 19:17:04 +0100 Subject: [PATCH 6/8] Commit repo version update after initializing output repo --- cadetrdm/repositories.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/cadetrdm/repositories.py b/cadetrdm/repositories.py index 3ef923c..4464cd7 100644 --- a/cadetrdm/repositories.py +++ b/cadetrdm/repositories.py @@ -832,7 +832,7 @@ def __init__( if not self.data_json_path.exists(): raise RuntimeError(f"Directory {self.path} does not appear to be a CADET-RDM repository.") - self._update_version() + changes_were_made = self._update_version() if not (self.path / self.output_directory).exists(): print("Output repository was missing, cloning now.") @@ -851,6 +851,21 @@ def __init__( self._package_dir = package_dir + if changes_were_made: + cadetrdm_version = Version(cadetrdm.__version__) + print( + f"Repo version {self.metadata['cadet_rdm_version']} was outdated. " + f"Current CADET-RDM version is {cadetrdm.__version__}.\n" + "Repo has been updated." + ) + self.metadata["cadet_rdm_version"] = str(cadetrdm_version) + self.save_metadata() + self.add(self.data_json_path) + self.commit( + f"Update CADET-RDM version to {cadetrdm_version}", + add_all=False + ) + @property def project_uuid(self) -> str: """Return Project UUID.""" @@ -908,26 +923,15 @@ def _update_version(self) -> None: delete_path(output_remotes_path) self.add(output_remotes_path) if current_version < Version("1.1.1"): + changes_were_made = True output_remotes = self.metadata.get("output_remotes") if isinstance(output_remotes, dict): if "output_folder_name" in output_remotes: output_remotes["output_directory_name"] = output_remotes.pop( "output_folder_name" ) - if changes_were_made: - print( - f"Repo version {metadata['cadet_rdm_version']} was outdated. " - f"Current CADET-RDM version is {cadetrdm.__version__}.\n" - "Repo has been updated." - ) - metadata["cadet_rdm_version"] = str(cadetrdm_version) - with open(self.data_json_path, "w", encoding="utf-8") as f: - json.dump(metadata, f, indent=2) - self.add(self.data_json_path) - self.commit( - f"Update CADET-RDM version to {cadetrdm_version}", - add_all=False - ) + + return changes_were_made def _clone_output_repo(self, multi_options: List[str] = None): output_remotes = self.metadata["output_remotes"] From 9d57f4299d2e2e2912532c5bc18fcefa47c16fe7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Schm=C3=B6lder?= Date: Wed, 4 Feb 2026 19:11:12 +0100 Subject: [PATCH 7/8] Add method to save metadata --- cadetrdm/repositories.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/cadetrdm/repositories.py b/cadetrdm/repositories.py index 4464cd7..99eb310 100644 --- a/cadetrdm/repositories.py +++ b/cadetrdm/repositories.py @@ -558,6 +558,11 @@ def load_metadata(self) -> dict: metadata["output_remotes"] = output_remotes return metadata + def save_metadata(self) -> None: + """Save metadata to file.""" + with open(self.data_json_path, "w", encoding="utf-8") as f: + json.dump(self.metadata, f, indent=2) + def add_remote(self, remote_url, remote_name=None): """ Add a remote to the repository. @@ -1170,18 +1175,15 @@ def check(self, commit=True): if commit: self.output_repo.commit(message="Update remote links", add_all=False, verbosity=1) - def update_output_remotes_json(self): + def update_output_remotes_json(self, load_metadata=True): output_repo_remotes = self.output_repo.remote_urls self.add_list_of_remotes_in_readme_file("Link to Output Repository", output_repo_remotes) - with open(self.data_json_path, "r", encoding="utf-8") as file_handle: - metadata = json.load(file_handle) - + metadata = self.load_metadata() if load_metadata else self.metadata remotes_dict = {remote.name: str(remote.url) for remote in self.output_repo.remotes} metadata["output_remotes"] = {"output_directory_name": self.output_directory, "output_remotes": remotes_dict} - - with open(self.data_json_path, "w", encoding="utf-8") as file_handle: - json.dump(metadata, file_handle, indent=2) + self._metadata = metadata + self.save_metadata() self.add(self.data_json_path) From a0226306cf021f06bfb8de53a5747ca046c649dd Mon Sep 17 00:00:00 2001 From: Hannah Lanzrath Date: Tue, 3 Feb 2026 16:43:36 +0100 Subject: [PATCH 8/8] chore: Version Bump to 1.1.1 and update Zenodo.json Update Zenodo.json --- .zenodo.json | 4 ++-- cadetrdm/__init__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.zenodo.json b/.zenodo.json index 3f641a4..6e1f5a9 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -1,5 +1,5 @@ { - "title": "CADET-RDM: Version 1.1.0", + "title": "CADET-RDM: Version 1.1.1", "upload_type": "software", "creators": [ { @@ -32,7 +32,7 @@ "keywords": [ "research data management", "simulation", "reproducibility", "containerization" ], - "version": "1.1.0", + "version": "1.1.1", "access_right": "open", "communities": [ { "identifier": "open-source" } diff --git a/cadetrdm/__init__.py b/cadetrdm/__init__.py index d706e4b..b9898b5 100644 --- a/cadetrdm/__init__.py +++ b/cadetrdm/__init__.py @@ -1,4 +1,4 @@ -__version__ = "1.1.0" +__version__ = "1.1.1" from cadetrdm.conda_env_utils import prepare_conda_env from cadetrdm.options import Options