diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 43ec044e..025e9335 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,16 +1,21 @@ -Unreleased -========== +Release 0.13.0 (unreleased) +==================================== * Add archive (``vcs: archive``) support for fetching dependencies from ``.tar.gz``, ``.tgz``, ``.tar.bz2``, ``.tar.xz`` and ``.zip`` files via HTTP, HTTPS or file URLs (#1058) * Fix path-traversal check using character-based prefix comparison instead of path-component comparison (#1058) * Fix directory hash being non-deterministic across filesystem traversal orders, causing false local-change detection (#1058) * Fix ``dfetch freeze`` not capturing branch information for SVN projects when only the revision matched (#1058) +* Rename child-manifests to sub-manifests in documentation and code (#1027) +* Fetch git submodules in git subproject at pinned revision (#1013) +* Add nested projects in subprojects to project report (#1017) +* Make ``dfetch report`` output more yaml-like (#1017) +* Don't break when importing submodules with space in path (#1017) +* Warn when ``src:`` glob pattern matches multiple directories (#1017) Release 0.12.1 (released 2026-02-24) ==================================== * Fix missing unicode data in standalone binaries (#1014) -* Rename child-manifests to sub-manifests in documentation and code (#1027) Release 0.12.0 (released 2026-02-21) ==================================== diff --git a/dfetch/commands/update.py b/dfetch/commands/update.py index 6e44ca35..6b6ec505 100644 --- a/dfetch/commands/update.py +++ b/dfetch/commands/update.py @@ -29,6 +29,26 @@ .. scenario-include:: ../features/updated-project-has-dependencies.feature +Git submodules +~~~~~~~~~~~~~~ + +When a git dependency itself contains git submodules, *Dfetch* fetches and resolves +them automatically, no extra manifest entries or ``git submodule`` commands are needed. + +Each submodule is checked out at the exact revision pinned by the parent repository. +*Dfetch* reports every resolved submodule in the update output:: + + Dfetch (0.12.1) + my-project: + > Found & fetched submodule "./ext/vendor-lib" (https://github.com/example/vendor-lib @ master - 79698c9…) + > Fetched master - e1fda19… + +Nested submodules (submodules of submodules) are resolved recursively. The pinned +details for each submodule are recorded in the ``.dfetch_data.yaml`` metadata file +and are visible in :ref:`Report`. + +.. scenario-include:: ../features/fetch-git-repo-with-submodule.feature + """ import argparse diff --git a/dfetch/log.py b/dfetch/log.py index 0025fe25..585c1de1 100644 --- a/dfetch/log.py +++ b/dfetch/log.py @@ -67,8 +67,9 @@ def print_info_line(self, name: str, info: str) -> None: self.info(f" [bold][bright_green]{safe_name}:[/bright_green][/bold]") DLogger._printed_projects.add(name) - line = markup_escape(info).replace("\n", "\n ") - self.info(f" [bold blue]> {line}[/bold blue]") + if info: + line = markup_escape(info).replace("\n", "\n ") + self.info(f" [bold blue]> {line}[/bold blue]") def print_warning_line(self, name: str, info: str) -> None: """Print a warning line: green name, yellow value.""" diff --git a/dfetch/manifest/parse.py b/dfetch/manifest/parse.py index 1c0bb4e3..6aa18b79 100644 --- a/dfetch/manifest/parse.py +++ b/dfetch/manifest/parse.py @@ -107,6 +107,11 @@ def get_submanifests(skip: list[str] | None = None) -> list[Manifest]: with prefix_runtime_exceptions( pathlib.Path(path).relative_to(os.path.dirname(os.getcwd())).as_posix() ): - submanifests += [parse(path)] + try: + submanifests += [parse(path)] + except FileNotFoundError: + logger.warning( + f"Sub-manifest {path} was found but no longer exists" + ) return submanifests diff --git a/dfetch/project/archivesubproject.py b/dfetch/project/archivesubproject.py index bd702ebf..f941716f 100644 --- a/dfetch/project/archivesubproject.py +++ b/dfetch/project/archivesubproject.py @@ -49,6 +49,7 @@ from dfetch.log import get_logger from dfetch.manifest.project import ProjectEntry from dfetch.manifest.version import Version +from dfetch.project.metadata import Dependency from dfetch.project.subproject import SubProject from dfetch.vcs.archive import ( ARCHIVE_EXTENSIONS, @@ -166,7 +167,7 @@ def wanted_version(self) -> Version: return Version(revision=self._project_entry.hash) return Version(revision=self.remote) - def _fetch_impl(self, version: Version) -> Version: + def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: """Download and extract the archive to the local destination. 1. Download the archive to a temporary file. @@ -211,7 +212,7 @@ def _fetch_impl(self, version: Version) -> Version: except OSError: pass - return version + return version, [] def freeze_project(self, project: ProjectEntry) -> str | None: """Pin *project* to a cryptographic hash of the archive. diff --git a/dfetch/project/gitsubproject.py b/dfetch/project/gitsubproject.py index db98ba3d..757027b4 100644 --- a/dfetch/project/gitsubproject.py +++ b/dfetch/project/gitsubproject.py @@ -1,14 +1,14 @@ """Git specific implementation.""" -import os import pathlib from functools import lru_cache from dfetch.log import get_logger from dfetch.manifest.project import ProjectEntry from dfetch.manifest.version import Version +from dfetch.project.metadata import Dependency from dfetch.project.subproject import SubProject -from dfetch.util.util import LICENSE_GLOBS, safe_rmtree +from dfetch.util.util import LICENSE_GLOBS, safe_rm from dfetch.vcs.git import GitLocalRepo, GitRemote, get_git_version logger = get_logger(__name__) @@ -57,7 +57,7 @@ def list_tool_info() -> None: ) SubProject._log_tool("git", "") - def _fetch_impl(self, version: Version) -> Version: + def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: """Get the revision of the remote and place it at the local path.""" rev_or_branch_or_tag = self._determine_what_to_fetch(version) @@ -69,17 +69,38 @@ def _fetch_impl(self, version: Version) -> Version: ] local_repo = GitLocalRepo(self.local_path) - fetched_sha = local_repo.checkout_version( + fetched_sha, submodules = local_repo.checkout_version( remote=self.remote, version=rev_or_branch_or_tag, src=self.source, - must_keeps=license_globs, + must_keeps=license_globs + [".gitmodules"], ignore=self.ignore, ) - safe_rmtree(os.path.join(self.local_path, local_repo.METADATA_DIR)) + vcs_deps = [] + for submodule in submodules: + self._log_project( + f'Found & fetched submodule "./{submodule.path}" ' + f" ({submodule.url} @ {Version(tag=submodule.tag, branch=submodule.branch, revision=submodule.sha)})", + ) + vcs_deps.append( + Dependency( + remote_url=submodule.url, + destination=submodule.path, + branch=submodule.branch, + tag=submodule.tag, + revision=submodule.sha, + source_type="git-submodule", + ) + ) + + targets = {local_repo.METADATA_DIR, local_repo.GIT_MODULES_FILE} + + for path in pathlib.Path(self.local_path).rglob("*"): + if path.name in targets: + safe_rm(path) - return self._determine_fetched_version(version, fetched_sha) + return self._determine_fetched_version(version, fetched_sha), vcs_deps def _determine_what_to_fetch(self, version: Version) -> str: """Based on asked version, target to fetch.""" diff --git a/dfetch/project/metadata.py b/dfetch/project/metadata.py index 0f611c81..2b3eba47 100644 --- a/dfetch/project/metadata.py +++ b/dfetch/project/metadata.py @@ -16,6 +16,17 @@ """ +class Dependency(TypedDict): + """Argument types for dependency class construction.""" + + branch: str + tag: str + revision: str + remote_url: str + destination: str + source_type: str + + class Options(TypedDict): # pylint: disable=too-many-ancestors """Argument types for Metadata class construction.""" @@ -27,6 +38,7 @@ class Options(TypedDict): # pylint: disable=too-many-ancestors destination: str hash: str patch: str | list[str] + dependencies: list["Dependency"] class Metadata: @@ -54,6 +66,8 @@ def __init__(self, kwargs: Options) -> None: # Historically only a single patch was allowed self._patch: list[str] = always_str_list(kwargs.get("patch", [])) + self._dependencies: list[Dependency] = kwargs.get("dependencies", []) + @classmethod def from_project_entry(cls, project: ProjectEntry) -> "Metadata": """Create a metadata object from a project entry.""" @@ -66,6 +80,7 @@ def from_project_entry(cls, project: ProjectEntry) -> "Metadata": "last_fetch": datetime.datetime(2000, 1, 1, 0, 0, 0), "hash": "", "patch": project.patch, + "dependencies": [], } return cls(data) @@ -77,13 +92,18 @@ def from_file(cls, path: str) -> "Metadata": return cls(data) def fetched( - self, version: Version, hash_: str = "", patch_: list[str] | None = None + self, + version: Version, + hash_: str = "", + patch_: list[str] | None = None, + dependencies: list[Dependency] | None = None, ) -> None: """Update metadata.""" self._last_fetch = datetime.datetime.now() self._version = version self._hash = hash_ self._patch = patch_ or [] + self._dependencies = dependencies or [] @property def version(self) -> Version: @@ -129,6 +149,11 @@ def patch(self) -> list[str]: """The list of applied patches as stored in the metadata.""" return self._patch + @property + def dependencies(self) -> list[Dependency]: + """The list of dependency projects as stored in the metadata.""" + return self._dependencies + @property def path(self) -> str: """Path to metadata file.""" @@ -152,12 +177,13 @@ def __eq__(self, other: object) -> bool: other._version.revision == self._version.revision, other.hash == self.hash, other.patch == self.patch, + other.dependencies == self.dependencies, ] ) def dump(self) -> None: """Dump metadata file to correct path.""" - metadata = { + metadata: dict[str, dict[str, str | list[str] | list[Dependency]]] = { "dfetch": { "remote_url": self.remote_url, "branch": self._version.branch, @@ -169,6 +195,9 @@ def dump(self) -> None: } } + if self.dependencies: + metadata["dfetch"]["dependencies"] = self.dependencies + with open(self.path, "w+", encoding="utf-8") as metadata_file: metadata_file.write(DONT_EDIT_WARNING) yaml.dump(metadata, metadata_file) diff --git a/dfetch/project/subproject.py b/dfetch/project/subproject.py index 5649b92c..36ccff3e 100644 --- a/dfetch/project/subproject.py +++ b/dfetch/project/subproject.py @@ -9,7 +9,7 @@ from dfetch.manifest.project import ProjectEntry from dfetch.manifest.version import Version from dfetch.project.abstract_check_reporter import AbstractCheckReporter -from dfetch.project.metadata import Metadata +from dfetch.project.metadata import Dependency, Metadata from dfetch.util.util import hash_directory, safe_rm from dfetch.util.versions import latest_tag_from_list from dfetch.vcs.patch import Patch @@ -129,7 +129,7 @@ def update( f"Fetching {to_fetch}", enabled=self._show_animations, ): - actually_fetched = self._fetch_impl(to_fetch) + actually_fetched, dependency = self._fetch_impl(to_fetch) self._log_project(f"Fetched {actually_fetched}") applied_patches = self._apply_patches(patch_count) @@ -145,6 +145,7 @@ def update( skiplist=[self.__metadata.FILENAME] + post_fetch_ignored, ), patch_=applied_patches, + dependencies=list(dependency), ) logger.debug(f"Writing repo metadata to: {self.__metadata.path}") @@ -392,7 +393,7 @@ def _are_there_local_changes(self, files_to_ignore: Sequence[str]) -> bool: ) @abstractmethod - def _fetch_impl(self, version: Version) -> Version: + def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: """Fetch the given version of the subproject, should be implemented by the child class.""" @abstractmethod diff --git a/dfetch/project/svnsubproject.py b/dfetch/project/svnsubproject.py index 85c28c22..6333856e 100644 --- a/dfetch/project/svnsubproject.py +++ b/dfetch/project/svnsubproject.py @@ -7,6 +7,7 @@ from dfetch.log import get_logger from dfetch.manifest.project import ProjectEntry from dfetch.manifest.version import Version +from dfetch.project.metadata import Dependency from dfetch.project.subproject import SubProject from dfetch.util.util import ( find_matching_files, @@ -107,7 +108,7 @@ def _remove_ignored_files(self) -> None: if not (file_or_dir.is_file() and is_license_file(file_or_dir.name)): safe_rm(file_or_dir) - def _fetch_impl(self, version: Version) -> Version: + def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: """Get the revision of the remote and place it at the local path.""" branch, branch_path, revision = self._determine_what_to_fetch(version) rev_arg = f"--revision {revision}" if revision else "" @@ -148,7 +149,7 @@ def _fetch_impl(self, version: Version) -> Version: if self.ignore: self._remove_ignored_files() - return Version(tag=version.tag, branch=branch, revision=revision) + return Version(tag=version.tag, branch=branch, revision=revision), [] @staticmethod def _parse_file_pattern(complete_path: str) -> tuple[str, str]: diff --git a/dfetch/reporting/stdout_reporter.py b/dfetch/reporting/stdout_reporter.py index 4982088c..87b57215 100644 --- a/dfetch/reporting/stdout_reporter.py +++ b/dfetch/reporting/stdout_reporter.py @@ -1,7 +1,71 @@ -"""*Dfetch* can generate an report on stdout. +"""*Dfetch* can generate a report on stdout. -Depending on the state of the projects it will show as much information -from the manifest or the metadata (``.dfetch_data.yaml``). +The stdout report prints one block per project. Fields are drawn from the +manifest where possible and fall back to the ``.dfetch_data.yaml`` metadata +written by :ref:`Update` when the project has been fetched at least once. + +Output format +~~~~~~~~~~~~~ + +A typical block looks like this: + +.. code-block:: console + + my-project: + - remote : + remote url : https://github.com/example/my-project + branch : main + tag : + last fetch : 01/01/2025, 12:00:00 + revision : e1fda19a… + patch : + licenses : MIT + +The fields are: + +- **remote**: named :ref:`Remotes` entry from the manifest (```` when + the URL is given directly via ``url:``). +- **remote url**: full URL of the upstream repository (derived from ``url:`` + or the ``url-base`` of the :ref:`Remotes` entry). +- **branch** / **tag** / **revision**: version as recorded at fetch time; + see :ref:`Revision/Branch/Tag`. +- **last fetch**: timestamp of the last successful ``dfetch update``. +- **patch**: patch file(s) applied after fetching (```` if unused); + see :ref:`Patch`. +- **licenses**: license(s) auto-detected in the fetched directory. + +If a project has never been fetched the metadata file is absent and only +``last fetch: never`` is shown. + +Dependencies +~~~~~~~~~~~~ + +When a fetched git project contains submodules, *Dfetch* records each one as a +dependency inside the project's ``.dfetch_data.yaml`` metadata file. The +stdout report surfaces these under a ``dependencies`` block: + +.. code-block:: console + + my-project: + - remote : + ... + dependencies : + - path : ext/vendor-lib + url : https://github.com/example/vendor-lib + branch : master + tag : + revision : 79698c99… + source-type : git-submodule + +Each dependency entry contains: + +- **path**: location of the submodule inside the fetched project. +- **url**: upstream URL of the submodule repository. +- **branch** / **tag** / **revision**: version information pinned by the parent. +- **source-type**: origin of the dependency (e.g. ``git-submodule``). + +.. scenario-include:: ../features/fetch-git-repo-with-submodule.feature + :scenario: Submodule changes are reported in the project report """ from dfetch.log import get_logger @@ -26,22 +90,36 @@ def add_project( ) -> None: """Add a project to the report.""" del version - logger.print_info_field("project", project.name) - logger.print_info_field(" remote", project.remote) + logger.print_info_line(project.name, "") + logger.print_info_field("- remote", project.remote) try: metadata = Metadata.from_file(Metadata.from_project_entry(project).path) - logger.print_info_field(" remote url", metadata.remote_url) - logger.print_info_field(" branch", metadata.branch) - logger.print_info_field(" tag", metadata.tag) - logger.print_info_field(" last fetch", str(metadata.last_fetch)) - logger.print_info_field(" revision", metadata.revision) - logger.print_info_field(" patch", ", ".join(metadata.patch)) + logger.print_info_field(" remote url", metadata.remote_url) + logger.print_info_field(" branch", metadata.branch) + logger.print_info_field(" tag", metadata.tag) + logger.print_info_field(" last fetch", str(metadata.last_fetch)) + logger.print_info_field(" revision", metadata.revision) + logger.print_info_field(" patch", ", ".join(metadata.patch)) logger.print_info_field( - " licenses", ",".join(license.name for license in licenses) + " licenses", ",".join(license.name for license in licenses) ) + if metadata.dependencies: + logger.info("") + logger.print_report_line(" dependencies", "") + for dependency in metadata.dependencies: + logger.print_info_field(" - path", dependency.get("destination", "")) + logger.print_info_field(" url", dependency.get("remote_url", "")) + logger.print_info_field(" branch", dependency.get("branch", "")) + logger.print_info_field(" tag", dependency.get("tag", "")) + logger.print_info_field(" revision", dependency.get("revision", "")) + logger.print_info_field( + " source-type", dependency.get("source_type", "") + ) + logger.info("") + except FileNotFoundError: - logger.print_info_field(" last fetch", "never") + logger.print_info_field(" last fetch", "never") def dump_to_file(self, outfile: str) -> bool: """Do nothing.""" diff --git a/dfetch/util/util.py b/dfetch/util/util.py index 24fdeceb..01452655 100644 --- a/dfetch/util/util.py +++ b/dfetch/util/util.py @@ -7,7 +7,7 @@ import stat from collections.abc import Generator, Iterator, Sequence from contextlib import contextmanager -from pathlib import Path +from pathlib import Path, PurePath from typing import Any from _hashlib import HASH @@ -102,7 +102,7 @@ def prune_files_by_pattern(directory: str, patterns: Sequence[str]) -> None: if os.path.lexists(str(file_or_dir)) and not ( file_or_dir.is_file() and is_license_file(file_or_dir.name) ): - safe_rm(file_or_dir) + safe_rm(file_or_dir, within=directory) def _remove_readonly(func: Any, path: str, _: Any) -> None: @@ -134,15 +134,29 @@ def find_matching_files(directory: str, patterns: Sequence[str]) -> Iterator[Pat yield Path(path) -def safe_rm(path: str | Path) -> None: - """Delete an file or directory safely.""" - if os.path.isdir(path): - safe_rmtree(str(path)) - else: - os.remove(path) - - -def safe_rmtree(path: str) -> None: +def safe_rm( + paths: str | Path | Sequence[str | Path], + within: str | Path = ".", +) -> None: + """Delete a file, directory or list of files/directories safely.""" + base = Path(within).resolve() + paths_to_remove = ( + [paths] if isinstance(paths, str) or not isinstance(paths, Sequence) else paths + ) + for path in paths_to_remove: + if os.path.lexists(path): + if os.path.islink(path): + check_no_path_traversal(Path(path).parent, base) + os.unlink(path) + else: + check_no_path_traversal(path, base) + if os.path.isdir(path): + _safe_rmtree(str(path)) + else: + os.remove(path) + + +def _safe_rmtree(path: str) -> None: """Delete an entire directory and all its subfolders and files.""" try: shutil.rmtree( # pylint: disable=deprecated-argument @@ -276,6 +290,51 @@ def check_no_path_traversal(path: str | Path, root: str | Path) -> None: raise RuntimeError(f"{str(path)!r} is outside root {str(root)!r}") +def strip_glob_prefix(path: str, pattern: str) -> str: + """Return *path* with its leading glob-pattern prefix stripped. + + When the first ``len(pattern.parts)`` components of *path* match *pattern* + (using :func:`fnmatch.fnmatch`), those components are removed and the + remainder is returned. If *path* does not match, or *path* has no + components beyond the matched prefix, *path* is returned unchanged. + + This is useful after a glob-matched directory has been "promoted" to the + root: it computes where a nested entry ends up relative to the new root. + + Args: + path: The original path whose prefix should be stripped. + pattern: A glob pattern (may contain ``*`` wildcards) whose matched + portion forms the prefix to remove. + + Returns: + The path with the matched prefix stripped, or *path* unchanged when no + match is found. + + Examples:: + + >>> strip_glob_prefix("some_dir_a/ext/lib", "some_dir_*") + 'ext/lib' + >>> strip_glob_prefix("SomeFolder/SomeSubFolder/file.c", "SomeFolder/Some*") + 'file.c' + >>> strip_glob_prefix("pkg/sub/module", "pkg") + 'sub/module' + >>> strip_glob_prefix("unrelated/path", "pkg") + 'unrelated/path' + """ + src_parts = PurePath(pattern).parts + sub_parts = PurePath(path).parts + depth = len(src_parts) + + if len(sub_parts) <= depth: + return path + + candidate = str(PurePath(*sub_parts[:depth])) + if fnmatch.fnmatch(candidate, pattern): + return str(Path(*sub_parts[depth:])) + + return path + + def resolve_absolute_path(path: str | Path) -> Path: """Return a guaranteed absolute Path, resolving symlinks. diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index 01315732..d803cc62 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -1,23 +1,25 @@ """Git specific implementation.""" import functools +import glob import os import re import shutil import tempfile from collections.abc import Generator, Sequence +from dataclasses import dataclass from pathlib import Path, PurePath -from typing import NamedTuple from dfetch.log import get_logger from dfetch.util.cmdline import SubprocessCommandError, run_on_cmdline -from dfetch.util.util import in_directory, safe_rmtree +from dfetch.util.util import in_directory, is_license_file, safe_rm, strip_glob_prefix from dfetch.vcs.patch import Patch, PatchType logger = get_logger(__name__) -class Submodule(NamedTuple): +@dataclass +class Submodule: """Information about a submodule.""" name: str @@ -224,7 +226,7 @@ def check_version_exists( except SubprocessCommandError as exc: if exc.returncode != 128: raise - safe_rmtree(temp_dir) + safe_rm(temp_dir, within=Path(temp_dir).parent) return exists @@ -233,6 +235,7 @@ class GitLocalRepo: """A git repository.""" METADATA_DIR = ".git" + GIT_MODULES_FILE = ".gitmodules" def __init__(self, path: str | Path = ".") -> None: """Create a local git repo.""" @@ -250,6 +253,23 @@ def is_git(self) -> bool: except (SubprocessCommandError, RuntimeError): return False + def _configure_sparse_checkout( + self, + src: str | None, + keeps: Sequence[str], + ignore: Sequence[str] | None = None, + ) -> None: + run_on_cmdline(logger, ["git", "config", "core.sparsecheckout", "true"]) + + with open(".git/info/sparse-checkout", "a", encoding="utf-8") as f: + patterns = list(keeps or []) + patterns.append(f"/{src or '*'}") + + if ignore: + patterns += self._determine_ignore_paths(src, ignore) + + f.write("\n".join(map(str, patterns)) + "\n") + def checkout_version( # pylint: disable=too-many-arguments self, *, @@ -258,7 +278,7 @@ def checkout_version( # pylint: disable=too-many-arguments src: str | None = None, must_keeps: list[str] | None = None, ignore: Sequence[str] | None = None, - ) -> str: + ) -> tuple[str, list[Submodule]]: """Checkout a specific version from a given remote. Args: @@ -274,19 +294,7 @@ def checkout_version( # pylint: disable=too-many-arguments run_on_cmdline(logger, ["git", "checkout", "-b", "dfetch-local-branch"]) if src or ignore: - run_on_cmdline(logger, ["git", "config", "core.sparsecheckout", "true"]) - with open( - ".git/info/sparse-checkout", "a", encoding="utf-8" - ) as sparse_checkout_file: - sparse_checkout_file.write( - "\n".join(list((must_keeps or []) + [f"/{src or '*'}"])) - ) - - if ignore: - ignore_abs_paths = self._determine_ignore_paths(src, ignore) - - sparse_checkout_file.write("\n") - sparse_checkout_file.write("\n".join(ignore_abs_paths)) + self._configure_sparse_checkout(src, must_keeps or [], ignore) run_on_cmdline( logger, @@ -295,40 +303,89 @@ def checkout_version( # pylint: disable=too-many-arguments ) run_on_cmdline(logger, ["git", "reset", "--hard", "FETCH_HEAD"]) + run_on_cmdline( + logger, + ["git", "submodule", "update", "--init", "--recursive"], + env=_extend_env_for_non_interactive_mode(), + ) + + submodules = self.submodules() + current_sha = ( run_on_cmdline(logger, ["git", "rev-parse", "HEAD"]) .stdout.decode() .strip() ) - if src: - self.move_src_folder_up(remote, src) + submodules = self._apply_src_and_ignore(remote, src, ignore, submodules) + + return str(current_sha), submodules + + def _apply_src_and_ignore( + self, + remote: str, + src: str | None, + ignore: Sequence[str] | None, + submodules: list[Submodule], + ) -> list[Submodule]: + """Apply src filter and ignore patterns, returning surviving submodules.""" + if src: + for submodule in submodules: + submodule.path = strip_glob_prefix(submodule.path, src) + self._move_src_folder_up(remote, src) + + for ignore_path in ignore or []: + paths = [ + p + for p in glob.glob(ignore_path) + if not (os.path.isfile(p) and is_license_file(os.path.basename(p))) + ] + safe_rm(paths, within=".") - return str(current_sha) + return [s for s in submodules if os.path.exists(s.path)] - def move_src_folder_up(self, remote: str, src: str) -> None: + @staticmethod + def _move_src_folder_up(remote: str, src: str) -> None: """Move the files from the src folder into the root of the project. Args: remote (str): Name of the root src (str): Src folder to move up """ - full_src = src - if not os.path.isdir(src): - src = os.path.dirname(src) + matched_paths = sorted(glob.glob(src)) - if not src: + if not matched_paths: + logger.warning( + f"The 'src:' filter '{src}' didn't match any files from '{remote}'" + ) return - try: - for file_to_copy in os.listdir(src): - shutil.move(src + "/" + file_to_copy, ".") - safe_rmtree(PurePath(src).parts[0]) - except FileNotFoundError: + dirs = [] + for src_dir_path in matched_paths: + if os.path.isdir(src_dir_path): + dirs.append(src_dir_path) + else: + if dir_path := os.path.dirname(src_dir_path): + dirs.append(dir_path) + + unique_dirs = list(dict.fromkeys(dirs)) + + if len(unique_dirs) > 1: logger.warning( - f"The 'src:' filter '{full_src}' didn't match any files from '{remote}'" + f"The 'src:' filter '{src}' matches multiple directories from '{remote}'. " + f"Only considering files in '{unique_dirs[0]}'." ) - return + + for src_dir_path in unique_dirs[:1]: + try: + for file_to_copy in os.listdir(src_dir_path): + shutil.move(src_dir_path + "/" + file_to_copy, ".") + safe_rm(PurePath(src_dir_path).parts[0]) + except FileNotFoundError: + logger.warning( + f"The 'src:' filter '{src_dir_path}' didn't match any files from '{remote}'" + ) + continue @staticmethod def _determine_ignore_paths( @@ -490,7 +547,7 @@ def submodules() -> list[Submodule]: "submodule", "foreach", "--quiet", - "echo $name $sm_path $sha1 $toplevel", + 'printf "%s\\0%s\\0%s\\0%s\n" "$name" "$sm_path" "$sha1" "$toplevel"', ], ) @@ -498,7 +555,7 @@ def submodules() -> list[Submodule]: urls: dict[str, str] = {} for line in result.stdout.decode().split("\n"): if line: - name, sm_path, sha, toplevel = line.split(" ") + name, sm_path, sha, toplevel = line.split("\0") urls = urls or GitLocalRepo._get_submodule_urls(toplevel) url = urls[name] branch, tag = GitRemote(url).find_branch_tip_or_tag_from_sha(sha) diff --git a/doc/internal.rst b/doc/internal.rst index 814205f5..19a61ec3 100644 --- a/doc/internal.rst +++ b/doc/internal.rst @@ -31,8 +31,6 @@ Metadata A file created by *DFetch* to store some relevant information about a subproject. -.. _architecture: - Architecture ------------ These diagrams are based on `Simon Brown's C4-model`_. diff --git a/example/dfetch.yaml b/example/dfetch.yaml index 4ee1e746..88641d90 100644 --- a/example/dfetch.yaml +++ b/example/dfetch.yaml @@ -52,3 +52,16 @@ manifest: - .github integrity: hash: sha256:7be7992439339017edb551d8e7d2315f9bb57c402da50c2cee9cd0e2724600a1 + + - name: TF-PSA-Crypto + url: https://github.com/Mbed-TLS/TF-PSA-Crypto.git + tag: v1.0.0 + dst: ext/TF-PSA-Crypto + ignore: + - tests + - scripts + - programs + - drivers + - doxygen + - docs + - framework/tests diff --git a/features/environment.py b/features/environment.py index 10d083e9..296a93d2 100644 --- a/features/environment.py +++ b/features/environment.py @@ -2,11 +2,12 @@ import os import tempfile +from pathlib import Path from behave import fixture, use_fixture from rich.console import Console -from dfetch.util.util import safe_rmtree +from dfetch.util.util import safe_rm @fixture @@ -22,7 +23,7 @@ def tmpdir(context): yield context.tmpdir # -- CLEANUP-FIXTURE PART: os.chdir(context.orig_cwd) - safe_rmtree(context.tmpdir) + safe_rm(context.tmpdir, within=Path(context.tmpdir).parent) def before_scenario(context, _): @@ -42,3 +43,5 @@ def before_all(context): context.config.logging_format = "%(message)s" context.remotes_dir = "some-remote-server" + + os.environ["GIT_ALLOW_PROTOCOL"] = "file:http:https:ssh" diff --git a/features/fetch-file-pattern-git.feature b/features/fetch-file-pattern-git.feature index b7fb036b..7be43bae 100644 --- a/features/fetch-file-pattern-git.feature +++ b/features/fetch-file-pattern-git.feature @@ -19,6 +19,8 @@ Feature: Fetch file pattern from git repo | SomeFolder/SomeSubFolder/SomeFile.txt | | SomeFolder/SomeSubFolder/OtherFile.txt | | SomeFolder/SomeSubFolder/SomeFile.md | + | SomeFolder/Unrelated.txt | + | AlsoUnrelated.txt | When I run "dfetch update" Then the output shows """ @@ -35,3 +37,35 @@ Feature: Fetch file pattern from git repo SomeFile.txt dfetch.yaml """ + + Scenario: A file pattern matches two files in different subfolders + Given the manifest 'dfetch.yaml' in MyProject + """ + manifest: + version: 0.0 + projects: + - name: SomeProjectWithAnInterestingFile + url: some-remote-server/SomeProjectWithAnInterestingFile.git + src: SomeFolder/Some* + tag: v1 + """ + And a git-repository "SomeProjectWithAnInterestingFile.git" with the files + | path | + | SomeFolder/SomeSubFolder/SomeFile.txt | + | SomeFolder/SomeOtherSubFolder/SomeFile.txt | + When I run "dfetch update" + Then the output shows + """ + Dfetch (0.12.1) + The 'src:' filter 'SomeFolder/Some*' matches multiple directories from 'some-remote-server/SomeProjectWithAnInterestingFile.git'. Only considering files in 'SomeFolder/SomeOtherSubFolder'. + SomeProjectWithAnInterestingFile: + > Fetched v1 + """ + Then 'MyProject' looks like: + """ + MyProject/ + SomeProjectWithAnInterestingFile/ + .dfetch_data.yaml + SomeFile.txt + dfetch.yaml + """ diff --git a/features/fetch-git-repo-with-submodule.feature b/features/fetch-git-repo-with-submodule.feature new file mode 100644 index 00000000..7978e215 --- /dev/null +++ b/features/fetch-git-repo-with-submodule.feature @@ -0,0 +1,155 @@ +Feature: Fetch projects with nested VCS dependencies + + Some projects include nested version control dependencies + such as Git submodules or other externals + These dependencies must be fetched at the exact revision + pinned by the parent repository to ensure reproducibility + + Background: + Given a git repository "TestRepo.git" + And a git-repository "SomeInterestingProject.git" with the following submodules + | path | url | revision | + | ext/test-repo1 | some-remote-server/TestRepo.git | master | + | ext/test-repo2 | some-remote-server/TestRepo.git | v1 | + + Scenario: A project with a git submodule is fetched at the pinned revision + Given the manifest 'dfetch.yaml' in MyProject + """ + manifest: + version: 0.0 + projects: + - name: my-project-with-submodules + url: some-remote-server/SomeInterestingProject.git + """ + When I run "dfetch update" + Then the output shows + """ + Dfetch (0.12.1) + my-project-with-submodules: + > Found & fetched submodule "./ext/test-repo1" (some-remote-server/TestRepo.git @ master - 79698c99152e4a4b7b759c9def50a130bc91a2ff) + > Found & fetched submodule "./ext/test-repo2" (some-remote-server/TestRepo.git @ master - 79698c99152e4a4b7b759c9def50a130bc91a2ff) + > Fetched master - e1fda19a57b873eb8e6ae37780594cbb77b70f1a + """ + Then 'MyProject' looks like: + """ + MyProject/ + dfetch.yaml + my-project-with-submodules/ + .dfetch_data.yaml + README.md + ext/ + test-repo1/ + README.md + test-repo2/ + README.md + """ + + Scenario: A project with a git submodule that itself has a nested submodule is fetched at the pinned revision + Given a git repository "LeafProject.git" + And a git-repository "MiddleProject.git" with the following submodules + | path | url | revision | + | ext/leaf | some-remote-server/LeafProject.git | master | + And a git-repository "OuterProject.git" with the following submodules + | path | url | revision | + | ext/middle | some-remote-server/MiddleProject.git | master | + Given the manifest 'dfetch.yaml' in MyProject + """ + manifest: + version: 0.0 + projects: + - name: outer-project + url: some-remote-server/OuterProject.git + """ + When I run "dfetch update" + Then the output shows + """ + Dfetch (0.12.1) + outer-project: + > Found & fetched submodule "./ext/middle" (some-remote-server/MiddleProject.git @ master - 79698c99152e4a4b7b759c9def50a130bc91a2ff) + > Fetched master - e1fda19a57b873eb8e6ae37780594cbb77b70f1a + """ + Then 'MyProject' looks like: + """ + MyProject/ + dfetch.yaml + outer-project/ + .dfetch_data.yaml + README.md + ext/ + middle/ + README.md + ext/ + leaf/ + README.md + """ + + Scenario: Submodule changes are reported in the project report + Given a fetched and committed MyProject with the manifest + """ + manifest: + version: 0.0 + projects: + - name: my-project-with-submodules + url: some-remote-server/SomeInterestingProject.git + """ + When I run "dfetch report" in MyProject + Then the output shows + """ + Dfetch (0.12.1) + my-project-with-submodules: + - remote : + remote url : some-remote-server/SomeInterestingProject.git + branch : master + tag : + last fetch : 26/02/2026, 20:28:24 + revision : 79698c99152e4a4b7b759c9def50a130bc91a2ff + patch : + licenses : + + dependencies : + - path : ext/test-repo1 + url : some-remote-server/TestRepo.git + branch : master + tag : + revision : e1fda19a57b873eb8e6ae37780594cbb77b70f1a + source-type : git-submodule + + - path : ext/test-repo2 + url : some-remote-server/TestRepo.git + branch : master + tag : + revision : 8df389d0524863b85f484f15a91c5f2c40aefda1 + source-type : git-submodule + """ + + Scenario: Subfolder is matched through a glob is fetched and submodules are resolved + Given a git-repository "GlobProject.git" with the following submodules + | path | url | revision | + | some_dir_a/ext/test-repo | some-remote-server/TestRepo.git | master | + Given the manifest 'dfetch.yaml' in MyProject + """ + manifest: + version: 0.0 + projects: + - name: glob-project + url: some-remote-server/GlobProject.git + src: some_dir_* + """ + When I run "dfetch update" + Then the output shows + """ + Dfetch (0.12.1) + glob-project: + > Found & fetched submodule "./ext/test-repo" (some-remote-server/TestRepo.git @ master - 79698c99152e4a4b7b759c9def50a130bc91a2ff) + > Fetched master - e1fda19a57b873eb8e6ae37780594cbb77b70f1a + """ + Then 'MyProject' looks like: + """ + MyProject/ + dfetch.yaml + glob-project/ + .dfetch_data.yaml + ext/ + test-repo/ + README.md + """ diff --git a/features/list-projects.feature b/features/list-projects.feature index f000de6c..79eb9f86 100644 --- a/features/list-projects.feature +++ b/features/list-projects.feature @@ -28,24 +28,24 @@ Feature: List dependencies Then the output shows """ Dfetch (0.12.1) - project : ext/test-repo-tag - remote : - remote url : https://github.com/dfetch-org/test-repo - branch : main - tag : - last fetch : 02/07/2021, 20:25:56 - revision : e1fda19a57b873eb8e6ae37780594cbb77b70f1a - patch : - licenses : MIT License - project : ext/test-rev-and-branch - remote : github-com-dfetch-org - remote url : https://github.com/dfetch-org/test-repo - branch : main - tag : v1 - last fetch : 02/07/2021, 20:25:56 - revision : - patch : - licenses : MIT License + ext/test-repo-tag: + - remote : + remote url : https://github.com/dfetch-org/test-repo + branch : main + tag : + last fetch : 02/07/2021, 20:25:56 + revision : e1fda19a57b873eb8e6ae37780594cbb77b70f1a + patch : + licenses : MIT License + ext/test-rev-and-branch: + - remote : github-com-dfetch-org + remote url : https://github.com/dfetch-org/test-repo + branch : main + tag : v1 + last fetch : 02/07/2021, 20:25:56 + revision : + patch : + licenses : MIT License """ @remote-svn @@ -68,15 +68,15 @@ Feature: List dependencies Then the output shows """ Dfetch (0.12.1) - project : cutter-svn-tag - remote : - remote url : svn://svn.code.sf.net/p/cutter/svn/cutter - branch : - tag : 1.1.7 - last fetch : 29/12/2024, 20:09:21 - revision : 4007 - patch : - licenses : + cutter-svn-tag: + - remote : + remote url : svn://svn.code.sf.net/p/cutter/svn/cutter + branch : + tag : 1.1.7 + last fetch : 29/12/2024, 20:09:21 + revision : 4007 + patch : + licenses : """ Scenario: Git repo with applied patches @@ -85,13 +85,13 @@ Feature: List dependencies Then the output shows """ Dfetch (0.12.1) - project : ext/test-repo-tag - remote : github-com-dfetch-org - remote url : https://github.com/dfetch-org/test-repo - branch : main - tag : v2.0 - last fetch : 02/07/2021, 20:25:56 - revision : - patch : 001-diff.patch, 002-diff.patch - licenses : MIT License + ext/test-repo-tag: + - remote : github-com-dfetch-org + remote url : https://github.com/dfetch-org/test-repo + branch : main + tag : v2.0 + last fetch : 02/07/2021, 20:25:56 + revision : + patch : 001-diff.patch, 002-diff.patch + licenses : MIT License """ diff --git a/features/steps/generic_steps.py b/features/steps/generic_steps.py index 0f646dd7..dfdadda1 100644 --- a/features/steps/generic_steps.py +++ b/features/steps/generic_steps.py @@ -406,6 +406,11 @@ def step_impl(context, name): check_file(name, context.text) +@then("'{name}' exists") +def step_impl(_, name): + assert os.path.exists(name), f"Expected {name} to exist, but it didn't!" + + @then("the '{name}' json file includes") def step_impl(context, name): """Partial JSON match - the expected JSON must be a *subset* of the actual file.""" diff --git a/features/steps/git_steps.py b/features/steps/git_steps.py index 73d61318..b0ea4a27 100644 --- a/features/steps/git_steps.py +++ b/features/steps/git_steps.py @@ -10,7 +10,12 @@ from behave import given, when # pylint: disable=no-name-in-module from dfetch.util.util import in_directory -from features.steps.generic_steps import call_command, extend_file, generate_file +from features.steps.generic_steps import ( + call_command, + extend_file, + generate_file, + remote_server_path, +) from features.steps.manifest_steps import generate_manifest @@ -39,18 +44,28 @@ def tag(name: str): subprocess.check_call(["git", "tag", "-a", name, "-m", "'Some tag'"]) +@given('a git-repository "{name}" with the following submodules') @given("a git repo with the following submodules") -def step_impl(context): - create_repo() +def step_impl(context, name=None): - for submodule in context.table: - subprocess.check_call( - ["git", "submodule", "add", submodule["url"], submodule["path"]] - ) + path = os.getcwd() + if name: + path = os.path.join(context.remotes_dir, name) + os.makedirs(path, exist_ok=True) + + with in_directory(path): + create_repo() + generate_file("README.md", "some content") + + for submodule in context.table: + url = submodule["url"].replace( + "some-remote-server", f"file:///{remote_server_path(context)}" + ) + subprocess.check_call(["git", "submodule", "add", url, submodule["path"]]) - with in_directory(submodule["path"]): - subprocess.check_call(["git", "checkout", submodule["revision"]]) - commit_all("Added submodules") + with in_directory(submodule["path"]): + subprocess.check_call(["git", "checkout", submodule["revision"]]) + commit_all("Added submodules") @given('a new tag "{tagname}" is added to git-repository "{name}"') diff --git a/pyproject.toml b/pyproject.toml index 3b71049b..46b8f35e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -122,6 +122,7 @@ version_scheme = "guess-next-dev" [tool.black] extend-exclude = "sphinxcontrib_asciinema" +target-version = ["py313"] [tool.isort] profile = "black" diff --git a/tests/manifest_mock.py b/tests/manifest_mock.py index f523a270..7df1afea 100644 --- a/tests/manifest_mock.py +++ b/tests/manifest_mock.py @@ -15,6 +15,7 @@ def mock_manifest(projects, path: str = "/some/path") -> MagicMock: mock_project = Mock(spec=ProjectEntry) mock_project.name = project["name"] mock_project.destination = "some_dest" + mock_project.remote = "" project_mocks += [mock_project] mocked_manifest = MagicMock(spec=Manifest, projects=project_mocks, path=path) diff --git a/tests/test_report.py b/tests/test_report.py index ef210c89..55d0cbe1 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -36,12 +36,12 @@ def test_report(name, projects): with patch( "dfetch.commands.report.create_super_project", return_value=fake_superproject ): - with patch("dfetch.log.DLogger.print_report_line") as mocked_print_report_line: + with patch("dfetch.log.DLogger.print_info_line") as mocked_print_info_line: report(DEFAULT_ARGS) if projects: for project in projects: - mocked_print_report_line.assert_any_call("project", project["name"]) + mocked_print_info_line.assert_any_call(project["name"], "") else: - mocked_print_report_line.assert_not_called() + mocked_print_info_line.assert_not_called() diff --git a/tests/test_subproject.py b/tests/test_subproject.py index 462086dc..fcf645d6 100644 --- a/tests/test_subproject.py +++ b/tests/test_subproject.py @@ -10,14 +10,15 @@ from dfetch.manifest.project import ProjectEntry from dfetch.manifest.version import Version +from dfetch.project.metadata import Dependency from dfetch.project.subproject import SubProject class ConcreteSubProject(SubProject): _wanted_version: Version - def _fetch_impl(self, version: Version) -> Version: - return Version() + def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: + return Version(), [] def _latest_revision_on_branch(self, branch): return "latest" diff --git a/tests/test_util.py b/tests/test_util.py index 5e7010c3..60073e8a 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -5,7 +5,12 @@ import pytest -from dfetch.util.util import copy_src_subset, hash_directory, prune_files_by_pattern +from dfetch.util.util import ( + copy_src_subset, + hash_directory, + prune_files_by_pattern, + strip_glob_prefix, +) # --------------------------------------------------------------------------- # copy_src_subset – path-traversal protection @@ -155,3 +160,27 @@ def test_prune_skips_already_removed_paths(tmp_path): assert not parent.exists() assert unrelated.exists() + + +# --------------------------------------------------------------------------- +# strip_glob_prefix +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "path, pattern, expected", + [ + # Wildcard prefix stripped + ("some_dir_a/ext/lib", "some_dir_*", "ext/lib"), + # Multi-level pattern with wildcard + ("SomeFolder/SomeSubFolder/file.c", "SomeFolder/Some*", "file.c"), + # Exact (no-wildcard) prefix stripped + ("pkg/sub/module", "pkg", "sub/module"), + # Path does not match pattern — returned unchanged + ("unrelated/path", "pkg", "unrelated/path"), + # Path too shallow to have anything beyond the pattern — returned unchanged + ("some_dir_a", "some_dir_*", "some_dir_a"), + ], +) +def test_strip_glob_prefix(path, pattern, expected): + assert strip_glob_prefix(path, pattern) == expected