diff --git a/.gitignore b/.gitignore index 881fb92..f27c67d 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ dist tmp .ipynb_checkpoints /tests/non_rdm_repo/ +.vscode diff --git a/AUTHORS.md b/AUTHORS.md new file mode 100644 index 0000000..9cd3a2e --- /dev/null +++ b/AUTHORS.md @@ -0,0 +1,8 @@ +# Contributors + +* [Ronald Jäpel](https://github.com/ronald-jaepel) (Forschungszentrum Juelich GmbH, IBG-1: Biotechnology, Juelich, Germany) +* [Johannes Schmölder](https://github.com/schmoelder) (Forschungszentrum Juelich GmbH, IBG-1: Biotechnology, Juelich, Germany) +* [Hannah Lanzrath](https://github.com/hannahlanzrath) (Forschungszentrum Juelich GmbH, IBG-1: Biotechnology, Juelich, Germany) +* [Katharina Paul](https://github.com/katharinapaul2403) (Forschungszentrum Juelich GmbH, IBG-1: Biotechnology, Juelich, Germany) +* [Daniel Klauß](https://github.com/daklauss) (Forschungszentrum Juelich GmbH, IBG-1: Biotechnology, Juelich, Germany) +* [Eric von Lieres](https://github.com/lieres) (Forschungszentrum Juelich GmbH, IBG-1: Biotechnology, Juelich, - Germany) \ No newline at end of file diff --git a/CITATION.bib b/CITATION.bib new file mode 100644 index 0000000..9c21297 --- /dev/null +++ b/CITATION.bib @@ -0,0 +1,2 @@ +% As an open-source project, CADET-RDM relies on the support and recognition from users and researchers to thrive. +% Therefore, we kindly ask that any publications or projects leveraging the capabilities of CADET-RDM acknowledge its creators and their contributions by citing an adequate selection of our publications. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..81bf3d9 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,128 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +cadet@fz-juelich.de. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/README.md b/README.md index 4a0d94d..41023c9 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Welcome to CADET-RDM, a project by the Forschungszentrum Jülich. -This tool aims to help track and version control: +This toolbox aims to help track and version control: - input data - code - software versions @@ -12,4 +12,5 @@ and allow for easy sharing, integration, and reproduction of generated results. ## Documentation -You can read the documentation [here](https://cadet-rdm.readthedocs.io). \ No newline at end of file +The documentation contains a user guide with helpful information on how to install CADET-RDM, how to quickly start working with it and a more detailed explaination of its tools. +The documentation can be found [here](https://cadet-rdm.readthedocs.io). \ No newline at end of file diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..3fa3663 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,17 @@ +# Security Policy + +## 1. No Network Dependencies + +CADET operates entirely offline and does not require any network access, nor does it handle sensitive information such as personal identifiers. This eliminates common attack vectors like network exploits or data breaches. + +## 2. Code Integrity and Contribution Reviews + +While there are no significant security risks associated with using CADET, we maintain strict controls over our source code. Any external contributions to the project are thoroughly reviewed and must meet our contribution guidelines. All pull requests are checked to ensure they do not introduce vulnerabilities. + +## 3. Dependency Management + +CADET does not rely on third-party libraries that introduce network or verification components. We carefully manage dependencies to ensure they are up-to-date and secure. + +## 4. Reporting Vulnerabilities + +Though CADET does not involve typical security risks, we encourage users and contributors to report any unexpected behavior or potential vulnerabilities they may discover. Please contact us via cadet@fz-juelich.de if you believe you've found a security issue. diff --git a/cadetrdm/cli_integration.py b/cadetrdm/cli_integration.py index fc0abd4..edf9105 100644 --- a/cadetrdm/cli_integration.py +++ b/cadetrdm/cli_integration.py @@ -13,8 +13,8 @@ def cli(): @cli.command(help="Create an empty CADET-RDM repository or initialize over an existing git repo.") -@click.option('--output_repo_name', default="output", - help='Name of the folder where the tracked output should be stored. Optional. Default: "output".') +@click.option('--output_directory_name', default="output", + help='Name of the directory where the tracked output should be stored. Optional. Default: "output".') @click.option('--gitignore', default=None, help='List of files to be added to the gitignore file. Optional.') @click.option('--gitattributes', default=None, @@ -22,21 +22,21 @@ def cli(): @click.option('--cookiecutter', default=None, help='URL or path to cookiecutter template. Optional.') @click.argument('path_to_repo', required=False) -def init(path_to_repo: str = None, output_repo_name: (str | bool) = "output", gitignore: list = None, +def init(path_to_repo: str = None, output_directory_name: (str | bool) = "output", gitignore: list = None, gitattributes: list = None, cookiecutter: str = None, output_repo_kwargs: dict = None): if path_to_repo is None: path_to_repo = "." from cadetrdm.initialize_repo import initialize_repo as initialize_git_repo_implementation - initialize_git_repo_implementation(path_to_repo, output_repo_name, gitignore, + initialize_git_repo_implementation(path_to_repo, output_directory_name, gitignore, gitattributes, output_repo_kwargs, cookiecutter) -@cli.command(help="Clone a repository into a new d^irectory.") +@cli.command(help="Clone a repository into a new empty directory.") @click.argument('project_url') -@click.argument('dest', required=False) -def clone(project_url, dest: str = None): +@click.argument('directory', required=False) +def clone(project_url, directory: str = None): from cadetrdm import ProjectRepo - repo = ProjectRepo.clone(url=project_url, to_path=dest) + repo = ProjectRepo.clone(url=project_url, to_path=directory) del repo diff --git a/cadetrdm/container/ApptainerAdapter.py b/cadetrdm/container/ApptainerAdapter.py deleted file mode 100644 index ae5b3bd..0000000 --- a/cadetrdm/container/ApptainerAdapter.py +++ /dev/null @@ -1,167 +0,0 @@ -# import os -# import subprocess -# import tempfile -# from pathlib import Path -# import yaml -# -# from cadetrdm.container import ContainerAdapter -# from cadetrdm.batch_running import Study, Case, Options -# from cadetrdm import Environment -# -# - -""" - -This is a placeholder for the upcoming Apptainer Adapters - -""" -# class ApptainerAdapter(ContainerAdapter): -# -# def __init__(self): -# pass -# -# def run(self, yml_path): -# with open(yml_path, "r") as stream: -# instructions = yaml.safe_load(stream) -# -# instructions = {key.lower(): value for key, value in instructions.items()} -# -# study = Study(**instructions["study"], suppress_lfs_warning=True) -# options = Options(**instructions["options"]) -# environment = Environment(**instructions["environment"]) -# case = Case(study, options, environment) -# -# return self.run_case(case, command=instructions["command"]) -# -# def run_case(self, case: Case, command: str = None): -# if self.image is None: -# image = self._build_image(case) -# else: -# image = self.image -# -# container_tmp_filename = "/tmp/options.json" -# options_tmp_filename = self._dump_options(case) -# -# full_command = self._prepare_case_command( -# case=case, -# command=command, -# container_tmp_filename=container_tmp_filename -# ) -# -# log, return_code = self._run_command( -# container_tmp_filename=container_tmp_filename, -# full_command=full_command, -# image=image, -# options_tmp_filename=options_tmp_filename -# ) -# -# return log, return_code -# -# def _run_command(self, container_tmp_filename, full_command, image, options_tmp_filename): -# -# ssh_location = Path.home() / ".ssh" -# if not ssh_location.exists(): -# raise FileNotFoundError("No ssh folder found. Please report this on GitHub/CADET/CADET-RDM") -# -# container = self.client.containers.run_yml( -# image=image, -# command=full_command, -# volumes={ -# f"{Path.home()}/.ssh": {'bind': "/root/.ssh_host_os", 'mode': "ro"}, -# options_tmp_filename.absolute().as_posix(): {'bind': container_tmp_filename, 'mode': 'ro'} -# }, -# detach=True, -# remove=False -# ) -# -# full_log = [] -# # Step 2: Attach to the container's logs -# for log in container.logs(stream=True): -# full_log.append(log.decode("utf-8")) -# print(log.decode("utf-8"), end="") -# -# # Wait for the container to finish execution -# result = container.wait() -# exit_code = result["StatusCode"] -# -# container.remove() -# -# return full_log, exit_code -# -# def _dump_options(self, case): -# tmp_filename = Path("tmp/" + next(tempfile._get_candidate_names()) + ".json") -# case.options.dump_json_file(tmp_filename) -# return tmp_filename -# -# def _build_image(self, case): -# cwd = os.getcwd() -# with open(case.project_repo.path / "Dockerfile", "rb") as dockerfile: -# os.chdir(case.project_repo.path.as_posix()) -# -# image, logs = self.client.images.build( -# path=case.project_repo.path.as_posix(), -# # fileobj=dockerfile, # A file object to use as the Dockerfile. -# tag=case.project_repo.name + ":" + case.name[:10], # A tag to add to the final image -# quiet=False, # Whether to return the status -# pull=True, # Downloads any updates to the FROM image in Dockerfiles -# -# ) -# if case.options.debug: -# for log in logs: -# print(log) -# os.chdir(cwd) -# return image -# -# def pull_image(self, repository, tag=None, all_tags=False, **kwargs): -# self.image = self.client.images.pull( -# repository=repository, -# tag=tag, -# all_tags=all_tags, -# **kwargs -# ) -# -# def _push_image(self, repository, tag=None, **kwargs): -# self.client.images.push( -# repository=repository, -# tag=tag, -# **kwargs -# ) -# -# def _tag_image(self, image, repository, tag=None, **kwargs): -# """ -# Tag this image into a repository. Similar to the ``docker tag`` -# command. -# -# Args: -# repository (str): The repository to set for the tag -# tag (str): The tag name -# force (bool): Force -# -# Raises: -# :py:class:`docker.errors.APIError` -# If the server returns an error. -# -# Returns: -# (bool): ``True`` if successful -# """ -# image.tag(repository=repository, tag=tag, **kwargs) -# return image -# -# def build_and_push_image(self, case, repository, tag=None, **kwargs): -# image = self._build_image(case) -# image = self._tag_image(image, repository, tag, **kwargs) -# self._push_image(repository, tag, **kwargs) -# -# def _update_Dockerfile_with_env_reqs(self, case): -# case.project_repo._reset_hard_to_head(force_entry=True) -# -# dockerfile = Path(case.project_repo.path) / "Dockerfile" -# install_command = case.environment.prepare_install_instructions() -# if install_command is None: -# return -# -# with open(dockerfile, "a") as handle: -# handle.write(f"\n{install_command}\n") -# -# def __del__(self): -# self.client.close() diff --git a/cadetrdm/container/containerAdapter.py b/cadetrdm/container/containerAdapter.py index 34536a9..b3858f8 100644 --- a/cadetrdm/container/containerAdapter.py +++ b/cadetrdm/container/containerAdapter.py @@ -45,7 +45,7 @@ def _prepare_case_command(case, command, container_options_filename): if command_install is not None: commands.append(command_install) - # pull the study from the URL into a "study" folder + # pull the study from the URL into a "study" repository command_pull = f"rdm clone {case.project_repo.url} study" # cd into the "study" folder command_cd = "cd study" diff --git a/cadetrdm/container/dockerAdapter.py b/cadetrdm/container/dockerAdapter.py index 10e9d8c..6faeea2 100644 --- a/cadetrdm/container/dockerAdapter.py +++ b/cadetrdm/container/dockerAdapter.py @@ -72,7 +72,7 @@ def _run_command(self, full_command, image, mounts=None): ssh_location = Path.home() / ".ssh" if not ssh_location.exists(): - raise FileNotFoundError("No ssh folder found. Please report this on GitHub/CADET/CADET-RDM") + raise FileNotFoundError("No ssh directory found. Please report this on GitHub/CADET/CADET-RDM") volumes = { f"{Path.home()}/.ssh": {'bind': "/root/.ssh_host_os", 'mode': "ro"}, diff --git a/cadetrdm/container/podmanAdapter.py b/cadetrdm/container/podmanAdapter.py index 8927bf9..2a6f479 100644 --- a/cadetrdm/container/podmanAdapter.py +++ b/cadetrdm/container/podmanAdapter.py @@ -69,7 +69,7 @@ def _run_command(self, full_command, image, mounts=None): ssh_location = Path.home() / ".ssh" if not ssh_location.exists(): - raise FileNotFoundError("No ssh folder found. Please report this on GitHub/CADET/CADET-RDM") + raise FileNotFoundError("No ssh directory found. Please report this on GitHub/CADET/CADET-RDM") full_command = full_command.replace('"', "'") @@ -82,7 +82,7 @@ def _run_command(self, full_command, image, mounts=None): podman_command = ( f'podman run ' '--rm ' # remove container after run_yml (to keep space usage low) - f'-v {ssh_location}:/root/.ssh_host_os:ro ' # mount ssh folder for the container to access + f'-v {ssh_location}:/root/.ssh_host_os:ro ' # mount ssh directory for the container to access f'{volume_mounts}' # mount options file f'{image} ' # specify image name f'bash -c "{full_command}"' # run_yml command in bash shell diff --git a/cadetrdm/initialize_repo.py b/cadetrdm/initialize_repo.py index 17727a1..683c26f 100644 --- a/cadetrdm/initialize_repo.py +++ b/cadetrdm/initialize_repo.py @@ -18,22 +18,24 @@ from cadetrdm.io_utils import write_lines_to_file, wait_for_user, init_lfs, test_for_lfs import cadetrdm.templates.dockerfile_template as dockerfile_template -def initialize_repo(path_to_repo: str | Path, output_folder_name: (str | bool) = "output", gitignore: list = None, +def initialize_repo(path_to_repo: str | Path, output_directory_name: (str | bool) = "output", gitignore: list = None, gitattributes: list = None, output_repo_kwargs: dict = None, cookiecutter_template: str = None): """ Initialize a git repository at the given path with an optional included output results repository. :param path_to_repo: - Path to main repository. - :param output_folder_name: + Path to main repository. If set to ".", the repository is initialized in the root directory without creating a new directory. If given as a relative path (e.g. 'repository_name'), a new directory with that name is created inside the root directory. If given as an absolute path (e.g. "C:\\User\\name\\project", a new directory is created at the specified location. + :param output_directory_name: Name for the output repository. :param gitignore: List of files to be added to the gitignore file. :param gitattributes: List of lines to be added to the gitattributes file :param output_repo_kwargs: - kwargs to be given to the creation of the output repo initalization function. + kwargs to be given to the creation of the output repository initalization function. Include gitignore, gitattributes, and lfs_filetypes kwargs. + :param cookiecutter_template: + Path to cookiecutter template to include files created by cookiecutter at repository initialization. """ test_for_lfs() @@ -43,8 +45,8 @@ def initialize_repo(path_to_repo: str | Path, output_folder_name: (str | bool) = if gitignore is None: gitignore = get_default_gitignore() + ["*.ipynb", "*.h5"] - gitignore.append(f"/{output_folder_name}/") - gitignore.append(f"/{output_folder_name}_cached/") + gitignore.append(f"/{output_directory_name}/") + gitignore.append(f"/{output_directory_name}_cached/") if gitattributes is not None: write_lines_to_file(path=".gitattributes", lines=gitattributes, open_type="a") @@ -74,7 +76,7 @@ def initialize_repo(path_to_repo: str | Path, output_folder_name: (str | bool) = "is_project_repo": True, "is_output_repo": False, "project_uuid": project_repo_uuid, "output_uuid": output_repo_uuid, "cadet_rdm_version": cadetrdm.__version__, - "output_remotes": {"output_folder_name": output_folder_name, "output_remotes": {}} + "output_remotes": {"output_directory_name": output_directory_name, "output_remotes": {}} } with open(".cadet-rdm-data.json", "w") as f: json.dump(rdm_data, f, indent=2) @@ -87,7 +89,7 @@ def initialize_repo(path_to_repo: str | Path, output_folder_name: (str | bool) = "commit_hash": "6e3c26527999036e9490d2d86251258fe81d46dc" }}, f, indent=2) - initialize_output_repo(output_folder_name, project_repo_uuid=project_repo_uuid, + initialize_output_repo(output_directory_name, project_repo_uuid=project_repo_uuid, output_repo_uuid=output_repo_uuid, **output_repo_kwargs) repo = ProjectRepo(".") @@ -115,12 +117,14 @@ def initialize_repo(path_to_repo: str | Path, output_folder_name: (str | bool) = def init_cookiecutter(cookiecutter_template, path_to_repo): """ - Initialize from cookiecutter template. Because cookiecutter can only create the files in a sub-directory - but cadet-rdm init can be called from within a folder with "path_to_repo" == ".", we copy the files from the - generated_dir folder into the path_to_repo folder afterwards. + Initialize from cookiecutter template. Because cookiecutter can only create the file structure in a sub-directory + but cadet-rdm init can be called from within another directory by specifying the absolute path of the new rdm repository with "path_to_repo" == ".", we copy the files from the + generated_dir directory into the path_to_repo directory afterwards. This means that only the internal contents, the directory layout and files are copied into the path_to_repo. The surrounding top-level directory itself is not carried over. :param cookiecutter_template: + str, Path to cookiecutter template which creates a file structure that is copied into the "output_dir". :param path_to_repo: + str, Path to main repository. If set to ".", the repository will be initialized in the current directory without creating an additional subfolder. """ generated_dir = cookiecutter(cookiecutter_template, output_dir=path_to_repo) file_names = os.listdir(generated_dir) @@ -138,10 +142,10 @@ def init_cookiecutter(cookiecutter_template, path_to_repo): # # repo = ProjectRepo(".") # -# if Path(repo._output_folder).exists(): -# raise RuntimeError(f"Output repo at {repo._output_folder} already exists.") +# if Path(repo._output_directory).exists(): +# raise RuntimeError(f"Output repo at {repo._output_directory} already exists.") # -# initialize_output_repo(repo._output_folder, project_repo_uuid=repo._project_uuid, +# initialize_output_repo(repo._output_directory, project_repo_uuid=repo._project_uuid, # output_repo_uuid=repo._output_uuid, **output_repo_kwargs) # # os.chdir(starting_directory) @@ -154,8 +158,8 @@ def initialize_git(folder="."): try: repo = git.Repo(".") - proceed = wait_for_user('The target directory already contains a git repo.\n' - 'Please commit or stash all changes to the repo before continuing.\n' + proceed = wait_for_user('The target directory already contains a git repository.\n' + 'Please commit or stash all changes to the repository before continuing.\n' 'Proceed?') if not proceed: raise KeyboardInterrupt @@ -174,13 +178,13 @@ def get_default_lfs_filetypes(): return ["*.jpg", "*.png", "*.xlsx", "*.h5", "*.ipynb", "*.pdf", "*.docx", "*.zip", "*.html", "*.csv"] -def initialize_output_repo(output_folder_name, gitignore: list = None, +def initialize_output_repo(output_directory_name, gitignore: list = None, gitattributes: list = None, lfs_filetypes: list = None, project_repo_uuid: str = None, output_repo_uuid: str = None): """ Initialize a git repository at the given path with an optional included output results repository. - :param output_folder_name: + :param output_directory_name: Name for the output repository. :param gitignore: List of files to be added to the gitignore file. @@ -190,8 +194,8 @@ def initialize_output_repo(output_folder_name, gitignore: list = None, List of filetypes to be handled by git lfs. """ starting_directory = os.getcwd() - os.makedirs(output_folder_name, exist_ok=True) - os.chdir(output_folder_name) + os.makedirs(output_directory_name, exist_ok=True) + os.chdir(output_directory_name) if gitignore is None: gitignore = get_default_gitignore() @@ -259,7 +263,7 @@ def create_output_readme(): "`CADET-RDM` automatically tracks all simulations that are started by running `main.py` from the corresponding project repository.", "", "Each simulation run creates a dedicated branch in this output repository. " - "The results are saved within the `src` folder of the respective branch. " + "The results are saved within the `src` directory of the respective branch. " "Additionally, a `log.tsv` file in the main branch records metadata for all runs, uniquely linking each output branch to its originating run in the project repository.", "", "## Project Repository", diff --git a/cadetrdm/repositories.py b/cadetrdm/repositories.py index 37ae17f..0d4220a 100644 --- a/cadetrdm/repositories.py +++ b/cadetrdm/repositories.py @@ -564,7 +564,7 @@ def add_remote(self, remote_url, remote_name=None): remote_name = "origin" self._git_repo.create_remote(remote_name, url=remote_url) if self._metadata["is_project_repo"]: - # This folder is a project repo. Use a project repo class to easily access the output repo. + # This directory is a project repository. Use a project repo class to easily access the output repo. output_repo = ProjectRepo(self.path).output_repo if output_repo.active_branch != output_repo.main_branch: @@ -575,7 +575,7 @@ def add_remote(self, remote_url, remote_name=None): output_repo.add("README.md") output_repo.commit("Add remote for project repo", verbosity=0, add_all=False) if self._metadata["is_output_repo"]: - # This folder is an output repo + # This directory is an output repository. project_repo = ProjectRepo(self.path.parent) project_repo.update_output_remotes_json() project_repo.add_list_of_remotes_in_readme_file("Link to Output Repository", self.remote_urls) @@ -596,7 +596,7 @@ def import_remote_repo(self, source_repo_location, source_repo_branch, target_re Branch of the source repo to check out. :param target_repo_location: - Place to store the repo. If None, the external_cache folder is used. + Place to store the repo. If None, the external_cache directory is used. :return: Path to the cloned repository @@ -770,7 +770,7 @@ class ProjectRepo(BaseRepo): def __init__( self, path: os.PathLike = None, - output_folder = None, + output_directory = None, search_parent_directories: bool = True, suppress_lfs_warning: bool = False, url: str = None, @@ -785,7 +785,7 @@ def __init__( :param path: Path to the root of the git repository. - :param output_folder: + :param output_directory: Deprecated: Path to the root of the output repository. :param search_parent_directories: if True, all parent directories will be searched for a valid repo as well. @@ -816,24 +816,23 @@ def __init__( if not suppress_lfs_warning: test_for_lfs() - if output_folder is not None: - print("Deprecation Warning. Setting the outputfolder manually during repo instantiation is deprecated" + if output_directory is not None: + print("Deprecation Warning. Setting the output directory manually during repo instantiation is deprecated" " and will be removed in a future update.") if not self.data_json_path.exists(): - raise RuntimeError(f"Folder {self.path} does not appear to be a CADET-RDM repository.") + raise RuntimeError(f"Directory {self.path} does not appear to be a CADET-RDM repository.") self._project_uuid = self._metadata["project_uuid"] self._output_uuid = self._metadata["output_uuid"] - self._output_folder = self._metadata["output_remotes"]["output_folder_name"] - + self._output_directory = self._metadata["output_remotes"]["output_directory_name"] self._update_version() - if not (self.path / self._output_folder).exists(): + if not (self.path / self._output_directory).exists(): print("Output repository was missing, cloning now.") self._clone_output_repo() self.output_repo = OutputRepo( - self.path / self._output_folder, + self.path / self._output_directory, self, ) @@ -905,7 +904,7 @@ def _update_version(self) -> None: def _clone_output_repo(self, multi_options: List[str] = None): metadata = self.load_metadata() output_remotes = metadata["output_remotes"] - output_path = self.path / output_remotes["output_folder_name"] + output_path = self.path / output_remotes["output_directory_name"] ssh_remotes = list(output_remotes["output_remotes"].values()) if len(ssh_remotes) == 0: warnings.warn("No output remotes configured in .cadet-rdm-data.json") @@ -1148,7 +1147,7 @@ def update_output_remotes_json(self): metadata = json.load(file_handle) remotes_dict = {remote.name: str(remote.url) for remote in self.output_repo.remotes} - metadata["output_remotes"] = {"output_folder_name": self._output_folder, "output_remotes": remotes_dict} + metadata["output_remotes"] = {"output_directory_name": self._output_directory, "output_remotes": remotes_dict} with open(self.data_json_path, "w", encoding="utf-8") as file_handle: json.dump(metadata, file_handle, indent=2) @@ -1171,11 +1170,11 @@ def download_file(self, url, file_path): def input_data(self, branch_name: str) -> Path: """ Load previously generated results to iterate upon. Copies entire branch of output repo - to the output_cached / branch_name folder. + to the output_cached / branch_name directory. :param branch_name: Name of the branch of the output repository in which the results are stored. :return: - Absolute path to the newly copied folder. + Absolute path to the newly copied directory. """ cached_branch_path = self.copy_data_to_cache(branch_name) @@ -1201,8 +1200,8 @@ def remove_cached_files(self): """ Delete all previously cached results. """ - if (self.path / (self._output_folder + "_cached")).exists(): - delete_path(self.path / (self._output_folder + "_cached")) + if (self.path / (self._output_directory + "_cached")).exists(): + delete_path(self.path / (self._output_directory + "_cached")) def import_static_data(self, source_path: Path | str, commit_message): """ @@ -1311,32 +1310,32 @@ def _get_new_output_branch( def cache_folder_for_branch(self, branch_name=None): """ - Returns the path to the cache folder for the given branch + Returns the path to the cache directory for the given branch :param branch_name: optional branch name, if None, current branch is used. :return Path: - Path to folder in cache + Path to directory in cache """ branch_name_path = branch_name.replace("/", "_") - # Define the target folder - cache_folder = self.path / f"{self._output_folder}_cached" / str(branch_name_path) + # Define the target directory + cache_folder = self.path / f"{self._output_directory}_cached" / str(branch_name_path) return cache_folder def copy_data_to_cache(self, branch_name=None, target_folder=None): """ - Copy all existing output results into a cached folder and make it read-only. + Copy all existing output results into a cached directory and make it read-only. :param branch_name: optional branch name, if None, current branch is used. :param target_folder: - optional target directory, if None, default cache folder is used. + optional target directory, if None, default cache directory is used. :return Path: - Path to folder in cache + Path to directory in cache """ # Determine the branch name if not provided if branch_name is None: @@ -1353,7 +1352,7 @@ def copy_data_to_cache(self, branch_name=None, target_folder=None): branch_name, f"origin/{branch_name}" ) - # Create the target folder if it doesn't exist + # Create the target directory if it doesn't exist if not target_folder.exists(): target_folder.mkdir(parents=True, exist_ok=True) @@ -1432,7 +1431,7 @@ def _commit_output_data( commit_return = self.output_repo._git.commit("-m", message) self.copy_data_to_cache() self.update_output_main_logs(output_dict, options) - main_cach_path = self.path / (self._output_folder + "_cached") / self.output_repo.main_branch + main_cach_path = self.path / (self._output_directory + "_cached") / self.output_repo.main_branch if main_cach_path.exists(): delete_path(main_cach_path) self.copy_data_to_cache(self.output_repo.main_branch) diff --git a/docs/README.md b/docs/README.md index 71c4d02..8681d57 100644 --- a/docs/README.md +++ b/docs/README.md @@ -7,7 +7,7 @@ pip install -e .[docs] ``` from the CADET-RDM root directory. -Then, in the `docs` folder run: +Then, in the `docs` directory run: ``` sphinx-build -b html source build diff --git a/pyproject.toml b/pyproject.toml index 6dc452c..2f8ae52 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,5 +86,13 @@ markers = [ "container: marks tests as using containerization interfaces such as Docker or Apptainer" ] +[tool.setuptools] +package-dir = {"" = "."} + +[tool.setuptools.packages.find] +include = ["cadetrdm*"] +exclude = ["tmp*", "batch_repos*"] + + [tool.setuptools.dynamic] version = { attr = "cadetrdm.__version__" } diff --git a/tests/case.yml b/tests/case.yml index 7f597b9..3636853 100644 --- a/tests/case.yml +++ b/tests/case.yml @@ -1,6 +1,6 @@ ProjectRepo: path: tmp/template - url: git@github.com:ronald-jaepel/rdm_testing_template.git + url: git@github.com:cadet/RDM-Testing-Template.git branch: main Options: commit_message: Trying out new things diff --git a/tests/test_container_docker.py b/tests/test_container_docker.py index 484ca51..8624200 100644 --- a/tests/test_container_docker.py +++ b/tests/test_container_docker.py @@ -12,7 +12,7 @@ def test_run_dockered(): rdm_example = ProjectRepo( path=WORK_DIR / 'template', - url="git@github.com:ronald-jaepel/rdm_testing_template.git", + url="git@github.com:cadet/RDM-Testing-Template.git", suppress_lfs_warning=True ) diff --git a/tests/test_container_podman.py b/tests/test_container_podman.py index 65d5c19..31d3afc 100644 --- a/tests/test_container_podman.py +++ b/tests/test_container_podman.py @@ -10,14 +10,14 @@ @pytest.mark.container def test_run_in_podman(): - # You need to install passt on your system and add it to the path + # passt needs to be installed and added to the path # os.environ["PATH"] += os.pathsep + "/home/bin/passt" WORK_DIR = Path.cwd() / "tmp" WORK_DIR.mkdir(parents=True, exist_ok=True) rdm_example = ProjectRepo( path=WORK_DIR / 'template', - url="git@github.com:ronald-jaepel/rdm_testing_template.git", + url="git@github.com:cadet/RDM-Testing-Template.git", branch="main", suppress_lfs_warning=True ) @@ -69,7 +69,7 @@ def test_run_in_podman(): @pytest.mark.slow @pytest.mark.container def test_pytest_in_podman(): - # You need to install passt on your system and add it to the path + # Passt needs to be installed and added to the path # os.environ["PATH"] += os.pathsep + "/home/bin/passt" WORK_DIR = Path.cwd() / "tmp" WORK_DIR.mkdir(parents=True, exist_ok=True) diff --git a/tests/test_git_adapter.py b/tests/test_git_adapter.py index d407e95..9984338 100644 --- a/tests/test_git_adapter.py +++ b/tests/test_git_adapter.py @@ -34,9 +34,9 @@ def count_commit_number(repo): return current_commit_number -def example_generate_results_array(path_to_repo, output_folder): +def example_generate_results_array(path_to_repo, output_directory): results_array = np.random.random((500, 3)) - np.savetxt(path_to_repo / output_folder / "result.csv", results_array, delimiter=",") + np.savetxt(path_to_repo / output_directory / "result.csv", results_array, delimiter=",") return results_array @@ -86,7 +86,7 @@ def try_commit_results_data(path_to_repo): repo = ProjectRepo(path_to_repo) current_commit_number = count_commit_number(repo.output_repo) with repo.track_results(results_commit_message="Add array") as output_branch: - example_generate_results_array(path_to_repo, output_folder=repo.output_path) + example_generate_results_array(path_to_repo, output_directory=repo.output_path) updated_commit_number = count_commit_number(repo.output_repo) assert current_commit_number <= updated_commit_number assert str(repo.output_repo.active_branch) == output_branch @@ -129,7 +129,7 @@ def try_commit_results_with_uncommitted_code_changes(path_to_repo): modify_code(path_to_repo) with pytest.raises(Exception): with repo.track_results(results_commit_message="Add array"): - example_generate_results_array(path_to_repo, output_folder=repo.output_path) + example_generate_results_array(path_to_repo, output_directory=repo.output_path) repo.commit("add code to print random number", add_all=True) @@ -158,7 +158,7 @@ def try_initialize_from_remote(): if Path("test_repo_from_remote").exists(): delete_path("test_repo_from_remote") ProjectRepo.clone( - url="git@github.com:ronald-jaepel/rdm_testing_template.git", + url="git@github.com:cadet/RDM-Testing-Template.git", to_path="test_repo_from_remote" ) assert try_init_gitpython_repo("test_repo_from_remote") @@ -408,7 +408,7 @@ def test_with_detached_head(): # repo.import_remote_repo(source_repo_location="../test_repo/results", source_repo_branch=branch_name) # repo.import_remote_repo(source_repo_location="../test_repo/results", source_repo_branch=branch_name, # target_repo_location="foo/bar/repo") -# # delete folder and reload +# # delete directory and reload # delete_path("foo/bar/repo") # # with pytest.raises(Exception):