Skip to content
4 changes: 2 additions & 2 deletions .zenodo.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"title": "CADET-RDM: Version 1.1.0",
"title": "CADET-RDM: Version 1.1.1",
"upload_type": "software",
"creators": [
{
Expand Down Expand Up @@ -32,7 +32,7 @@
"keywords": [
"research data management", "simulation", "reproducibility", "containerization"
],
"version": "1.1.0",
"version": "1.1.1",
"access_right": "open",
"communities": [
{ "identifier": "open-source" }
Expand Down
2 changes: 1 addition & 1 deletion cadetrdm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "1.1.0"
__version__ = "1.1.1"

from cadetrdm.conda_env_utils import prepare_conda_env
from cadetrdm.options import Options
Expand Down
4 changes: 2 additions & 2 deletions cadetrdm/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def __init__(
output_repo_commit_hash: str,
project_repo_branch: str,
project_repo_commit_hash: str,
project_repo_folder_name: str,
project_repo_directory_name: str,
project_repo_remotes: str,
python_sys_args: str,
tags: str,
Expand All @@ -28,7 +28,7 @@ def __init__(
self.output_repo_commit_hash = output_repo_commit_hash
self.project_repo_branch = project_repo_branch
self.project_repo_commit_hash = project_repo_commit_hash
self.project_repo_folder_name = project_repo_folder_name
self.project_repo_directory_name = project_repo_directory_name
self.project_repo_remotes = project_repo_remotes
self.python_sys_args = python_sys_args
self.tags = tags
Expand Down
171 changes: 126 additions & 45 deletions cadetrdm/repositories.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from typing import List, Optional, Any
from urllib.request import urlretrieve

from semantic_version import Version, SimpleSpec
from semantic_version import Version

import cadetrdm
from cadetrdm import Options
Expand Down Expand Up @@ -83,7 +83,7 @@ def __init__(self, path=None, search_parent_directories=True, *args, **kwargs):

self.add = self._git.add

def __enter__(self) -> "Repo":
def __enter__(self) -> git.Repo:
return self

def __exit__(self, *args: Any) -> None:
Expand Down Expand Up @@ -541,7 +541,13 @@ def __init__(self, path=None, search_parent_directories=True, *args, **kwargs):
super().__init__(path, search_parent_directories, *args, **kwargs)
self._metadata = self.load_metadata()

def load_metadata(self):
@property
def metadata(self) -> dict:
"""Return metadata information about CADET-RDM repository."""
return self._metadata

def load_metadata(self) -> dict:
"""Load metadata from file."""
with open(self.data_json_path, "r", encoding="utf-8") as handle:
metadata = json.load(handle)
if "output_remotes" not in metadata and metadata["is_project_repo"]:
Expand All @@ -552,6 +558,11 @@ def load_metadata(self):
metadata["output_remotes"] = output_remotes
return metadata

def save_metadata(self) -> None:
"""Save metadata to file."""
with open(self.data_json_path, "w", encoding="utf-8") as f:
json.dump(self.metadata, f, indent=2)

def add_remote(self, remote_url, remote_name=None):
"""
Add a remote to the repository.
Expand All @@ -563,7 +574,7 @@ def add_remote(self, remote_url, remote_name=None):
if remote_name is None:
remote_name = "origin"
self._git_repo.create_remote(remote_name, url=remote_url)
if self._metadata["is_project_repo"]:
if self.metadata["is_project_repo"]:
# This directory is a project repository. Use a project repo class to easily access the output repo.
output_repo = ProjectRepo(self.path).output_repo

Expand All @@ -574,7 +585,7 @@ def add_remote(self, remote_url, remote_name=None):
output_repo.add_list_of_remotes_in_readme_file("Link to Project Repository", self.remote_urls)
output_repo.add("README.md")
output_repo.commit("Add remote for project repo", verbosity=0, add_all=False)
if self._metadata["is_output_repo"]:
if self.metadata["is_output_repo"]:
# This directory is an output repository.
project_repo = ProjectRepo(self.path.parent)
project_repo.update_output_remotes_json()
Expand Down Expand Up @@ -817,22 +828,23 @@ def __init__(
test_for_lfs()

if output_directory is not None:
print("Deprecation Warning. Setting the output directory manually during repo instantiation is deprecated"
" and will be removed in a future update.")
print(
"Deprecation Warning. Setting the output directory manually during "
"repo instantiation is deprecated and will be removed in a future "
" update."
)

if not self.data_json_path.exists():
raise RuntimeError(f"Directory {self.path} does not appear to be a CADET-RDM repository.")

self._project_uuid = self._metadata["project_uuid"]
self._output_uuid = self._metadata["output_uuid"]
self._output_directory = self._metadata["output_remotes"]["output_directory_name"]
self._update_version()
changes_were_made = self._update_version()

if not (self.path / self._output_directory).exists():
if not (self.path / self.output_directory).exists():
print("Output repository was missing, cloning now.")
self._clone_output_repo()

self.output_repo = OutputRepo(
self.path / self._output_directory,
self.path / self.output_directory,
self,
)

Expand All @@ -844,6 +856,36 @@ def __init__(

self._package_dir = package_dir

if changes_were_made:
cadetrdm_version = Version(cadetrdm.__version__)
print(
f"Repo version {self.metadata['cadet_rdm_version']} was outdated. "
f"Current CADET-RDM version is {cadetrdm.__version__}.\n"
"Repo has been updated."
)
self.metadata["cadet_rdm_version"] = str(cadetrdm_version)
self.save_metadata()
self.add(self.data_json_path)
self.commit(
f"Update CADET-RDM version to {cadetrdm_version}",
add_all=False
)

@property
def project_uuid(self) -> str:
"""Return Project UUID."""
return self.metadata.project_uuid

@property
def output_uuid(self) -> str:
"""Return Project UUID."""
return self.metadata["output_uuid"]

@property
def output_directory(self) -> str:
"""Return output directory."""
return self.metadata["output_remotes"]["output_directory_name"]

@property
def name(self):
return self.path.parts[-1]
Expand All @@ -868,9 +910,8 @@ def module(self) -> ModuleType:

def _update_version(self) -> None:
"""Update project repo to latest CADET-RDM specs."""
metadata = self._metadata
cadetrdm_version = Version(cadetrdm.__version__)
current_version = Version(metadata["cadet_rdm_version"])
current_version = Version(self.metadata["cadet_rdm_version"])

# Skip if versions match
if cadetrdm_version == current_version:
Expand All @@ -886,24 +927,19 @@ def _update_version(self) -> None:
output_remotes_path = self.path / "output_remotes.json"
delete_path(output_remotes_path)
self.add(output_remotes_path)
if changes_were_made:
print(
f"Repo version {metadata['cadet_rdm_version']} was outdated. "
f"Current CADET-RDM version is {cadetrdm.__version__}.\n"
"Repo has been updated."
)
metadata["cadet_rdm_version"] = str(cadetrdm_version)
with open(self.data_json_path, "w", encoding="utf-8") as f:
json.dump(metadata, f, indent=2)
self.add(self.data_json_path)
self.commit(
f"Update CADET-RDM version to {cadetrdm_version}",
add_all=False
)
if current_version < Version("1.1.1"):
changes_were_made = True
output_remotes = self.metadata.get("output_remotes")
if isinstance(output_remotes, dict):
if "output_folder_name" in output_remotes:
output_remotes["output_directory_name"] = output_remotes.pop(
"output_folder_name"
)

return changes_were_made

def _clone_output_repo(self, multi_options: List[str] = None):
metadata = self.load_metadata()
output_remotes = metadata["output_remotes"]
output_remotes = self.metadata["output_remotes"]
output_path = self.path / output_remotes["output_directory_name"]
ssh_remotes = list(output_remotes["output_remotes"].values())
if len(ssh_remotes) == 0:
Expand Down Expand Up @@ -1061,7 +1097,7 @@ def update_output_main_logs(
output_repo_commit_hash=output_repo_hash,
project_repo_branch=str(self.active_branch),
project_repo_commit_hash=str(self.head.commit),
project_repo_folder_name=self.path.name,
project_repo_directory_name=self.path.name,
project_repo_remotes=self.remote_urls,
python_sys_args=str(sys.argv),
tags=", ".join(self.tags),
Expand Down Expand Up @@ -1139,18 +1175,15 @@ def check(self, commit=True):
if commit:
self.output_repo.commit(message="Update remote links", add_all=False, verbosity=1)

def update_output_remotes_json(self):
def update_output_remotes_json(self, load_metadata=True):
output_repo_remotes = self.output_repo.remote_urls
self.add_list_of_remotes_in_readme_file("Link to Output Repository", output_repo_remotes)

with open(self.data_json_path, "r", encoding="utf-8") as file_handle:
metadata = json.load(file_handle)

metadata = self.load_metadata() if load_metadata else self.metadata
remotes_dict = {remote.name: str(remote.url) for remote in self.output_repo.remotes}
metadata["output_remotes"] = {"output_directory_name": self._output_directory, "output_remotes": remotes_dict}

with open(self.data_json_path, "w", encoding="utf-8") as file_handle:
json.dump(metadata, file_handle, indent=2)
metadata["output_remotes"] = {"output_directory_name": self.output_directory, "output_remotes": remotes_dict}
self._metadata = metadata
self.save_metadata()

self.add(self.data_json_path)

Expand Down Expand Up @@ -1200,8 +1233,8 @@ def remove_cached_files(self):
"""
Delete all previously cached results.
"""
if (self.path / (self._output_directory + "_cached")).exists():
delete_path(self.path / (self._output_directory + "_cached"))
if (self.path / (self.output_directory + "_cached")).exists():
delete_path(self.path / (self.output_directory + "_cached"))

def import_static_data(self, source_path: Path | str, commit_message):
"""
Expand Down Expand Up @@ -1322,7 +1355,7 @@ def cache_folder_for_branch(self, branch_name=None):
branch_name_path = branch_name.replace("/", "_")

# Define the target directory
cache_folder = self.path / f"{self._output_directory}_cached" / str(branch_name_path)
cache_folder = self.path / f"{self.output_directory}_cached" / str(branch_name_path)
return cache_folder

def copy_data_to_cache(self, branch_name=None, target_folder=None):
Expand Down Expand Up @@ -1431,7 +1464,7 @@ def _commit_output_data(
commit_return = self.output_repo._git.commit("-m", message)
self.copy_data_to_cache()
self.update_output_main_logs(output_dict, options)
main_cach_path = self.path / (self._output_directory + "_cached") / self.output_repo.main_branch
main_cach_path = self.path / (self.output_directory + "_cached") / self.output_repo.main_branch
if main_cach_path.exists():
delete_path(main_cach_path)
self.copy_data_to_cache(self.output_repo.main_branch)
Expand Down Expand Up @@ -1588,7 +1621,7 @@ def add_filetype_to_lfs(self, file_type):

def _update_version(self) -> None:
"""Update output repo to latest CADET-RDM specs."""
metadata = self._metadata
metadata = self.metadata
cadetrdm_version = Version(cadetrdm.__version__)
current_version = Version(metadata["cadet_rdm_version"])

Expand All @@ -1598,6 +1631,14 @@ def _update_version(self) -> None:

changes_were_made = False

if current_version < Version("1.1.1"):
changes_were_made = True
if self.output_log_file_path.exists():
warnings.warn(
"Repo version has outdated options hashes. "
"Updating option hashes in output log.tsv."
)
self._rename_project_repo_folder_to_directory_in_log()
if current_version < Version("0.0.9"):
changes_were_made = True
self._convert_csv_to_tsv_if_necessary()
Expand Down Expand Up @@ -1764,6 +1805,46 @@ def _update_log_hashes(self):
self.add(self.output_log_file_path)
self.commit(message="Updated log hashes", add_all=False)

def _rename_project_repo_folder_to_directory_in_log(self) -> None:
"""Rename the TSV column header from folder to directory."""
self.checkout(self.main_branch)

with open(self.output_log_file_path, "r") as f:
reader = csv.DictReader(f, delimiter="\t")
rows = list(reader)

if not rows:
return

old_key = "project_repo_folder_name"
new_key = "project_repo_directory_name"

# Nothing to do if the old column does not exist
if old_key not in rows[0]:
return

fieldnames = list(rows[0].keys())

# Rename key in rows
for row in rows:
row[new_key] = row.pop(old_key)

# Rename key in header, keep position
idx = fieldnames.index(old_key)
fieldnames[idx] = new_key

# Write updated data back to file
with open(self.output_log_file_path, "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter="\t")
writer.writeheader()
writer.writerows(rows)

self.add("log.tsv")
self.commit(
message="Rename 'project_repo_folder_name' to 'project_repo_directory_name' in log.tsv",
add_all=False,
)

def _add_branch_name_to_log(self) -> None:
"""
Update the TSV file by adding a 'project_repo_branch' column.
Expand Down