From ee346f205b1d28297ebe1495b494dd4baf0ef28d Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 18 Mar 2026 19:22:03 +0000 Subject: [PATCH 01/15] Add to example --- example/dfetch.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/example/dfetch.yaml b/example/dfetch.yaml index 4ee1e746..4ceb394d 100644 --- a/example/dfetch.yaml +++ b/example/dfetch.yaml @@ -52,3 +52,15 @@ manifest: - .github integrity: hash: sha256:7be7992439339017edb551d8e7d2315f9bb57c402da50c2cee9cd0e2724600a1 + + - name: TF-PSA-Crypto + url: https://github.com/Mbed-TLS/TF-PSA-Crypto.git + tag: v1.0.0 + dst: ext/TF-PSA-Crypto + ignore: + - tests + - scripts + - programs + - drivers + - doxygen + - docs From b704079fe5d1b3c377a58d120db0b97484fdbcc9 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 18 Mar 2026 19:22:03 +0000 Subject: [PATCH 02/15] Fetching any submodule in a subproject with submodules Fixes #1013 --- CHANGELOG.rst | 9 +- dfetch/log.py | 5 +- dfetch/project/gitsubproject.py | 30 +++++-- dfetch/project/metadata.py | 33 ++++++- dfetch/project/subproject.py | 30 ++++++- dfetch/project/svnsubproject.py | 6 +- dfetch/reporting/stdout_reporter.py | 34 ++++--- dfetch/util/util.py | 11 +-- dfetch/vcs/git.py | 21 ++++- .../fetch-git-repo-with-submodule.feature | 89 +++++++++++++++++++ features/list-projects.feature | 72 +++++++-------- features/steps/generic_steps.py | 5 ++ features/steps/git_steps.py | 27 ++++-- tests/test_report.py | 6 +- tests/test_subproject.py | 6 +- 15 files changed, 295 insertions(+), 89 deletions(-) create mode 100644 features/fetch-git-repo-with-submodule.feature diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 43ec044e..ef15e5e9 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,16 +1,19 @@ -Unreleased -========== +Release 0.13.0 (unreleased) +==================================== * Add archive (``vcs: archive``) support for fetching dependencies from ``.tar.gz``, ``.tgz``, ``.tar.bz2``, ``.tar.xz`` and ``.zip`` files via HTTP, HTTPS or file URLs (#1058) * Fix path-traversal check using character-based prefix comparison instead of path-component comparison (#1058) * Fix directory hash being non-deterministic across filesystem traversal orders, causing false local-change detection (#1058) * Fix ``dfetch freeze`` not capturing branch information for SVN projects when only the revision matched (#1058) +* Rename child-manifests to sub-manifests in documentation and code (#1027) +* Fetch git submodules in git subproject at pinned revision (#1013) +* Add nested projects in subprojects to project report (#1017) +* Make `dfetch report` output more yaml-like (#1017) Release 0.12.1 (released 2026-02-24) ==================================== * Fix missing unicode data in standalone binaries (#1014) -* Rename child-manifests to sub-manifests in documentation and code (#1027) Release 0.12.0 (released 2026-02-21) ==================================== diff --git a/dfetch/log.py b/dfetch/log.py index 0025fe25..585c1de1 100644 --- a/dfetch/log.py +++ b/dfetch/log.py @@ -67,8 +67,9 @@ def print_info_line(self, name: str, info: str) -> None: self.info(f" [bold][bright_green]{safe_name}:[/bright_green][/bold]") DLogger._printed_projects.add(name) - line = markup_escape(info).replace("\n", "\n ") - self.info(f" [bold blue]> {line}[/bold blue]") + if info: + line = markup_escape(info).replace("\n", "\n ") + self.info(f" [bold blue]> {line}[/bold blue]") def print_warning_line(self, name: str, info: str) -> None: """Print a warning line: green name, yellow value.""" diff --git a/dfetch/project/gitsubproject.py b/dfetch/project/gitsubproject.py index db98ba3d..8f3e297e 100644 --- a/dfetch/project/gitsubproject.py +++ b/dfetch/project/gitsubproject.py @@ -7,8 +7,8 @@ from dfetch.log import get_logger from dfetch.manifest.project import ProjectEntry from dfetch.manifest.version import Version -from dfetch.project.subproject import SubProject -from dfetch.util.util import LICENSE_GLOBS, safe_rmtree +from dfetch.project.subproject import SubProject, VcsDependency +from dfetch.util.util import LICENSE_GLOBS, safe_rm, safe_rmtree from dfetch.vcs.git import GitLocalRepo, GitRemote, get_git_version logger = get_logger(__name__) @@ -57,7 +57,7 @@ def list_tool_info() -> None: ) SubProject._log_tool("git", "") - def _fetch_impl(self, version: Version) -> Version: + def _fetch_impl(self, version: Version) -> tuple[Version, list[VcsDependency]]: """Get the revision of the remote and place it at the local path.""" rev_or_branch_or_tag = self._determine_what_to_fetch(version) @@ -69,17 +69,35 @@ def _fetch_impl(self, version: Version) -> Version: ] local_repo = GitLocalRepo(self.local_path) - fetched_sha = local_repo.checkout_version( + fetched_sha, submodules = local_repo.checkout_version( remote=self.remote, version=rev_or_branch_or_tag, src=self.source, - must_keeps=license_globs, + must_keeps=license_globs + [".gitmodules"], ignore=self.ignore, ) + vcs_deps = [] + for submodule in submodules: + self._log_project( + f'Found & fetched submodule "./{submodule.path}" ' + f" ({submodule.url} @ {Version(tag=submodule.tag, branch=submodule.branch, revision=submodule.sha)})", + ) + vcs_deps.append( + VcsDependency( + remote_url=submodule.url, + destination=submodule.path, + branch=submodule.branch, + tag=submodule.tag, + revision=submodule.sha, + source_type="git-submodule", + ) + ) + safe_rmtree(os.path.join(self.local_path, local_repo.METADATA_DIR)) + safe_rm(os.path.join(self.local_path, local_repo.GIT_MODULES_FILE)) - return self._determine_fetched_version(version, fetched_sha) + return self._determine_fetched_version(version, fetched_sha), vcs_deps def _determine_what_to_fetch(self, version: Version) -> str: """Based on asked version, target to fetch.""" diff --git a/dfetch/project/metadata.py b/dfetch/project/metadata.py index 0f611c81..2b3eba47 100644 --- a/dfetch/project/metadata.py +++ b/dfetch/project/metadata.py @@ -16,6 +16,17 @@ """ +class Dependency(TypedDict): + """Argument types for dependency class construction.""" + + branch: str + tag: str + revision: str + remote_url: str + destination: str + source_type: str + + class Options(TypedDict): # pylint: disable=too-many-ancestors """Argument types for Metadata class construction.""" @@ -27,6 +38,7 @@ class Options(TypedDict): # pylint: disable=too-many-ancestors destination: str hash: str patch: str | list[str] + dependencies: list["Dependency"] class Metadata: @@ -54,6 +66,8 @@ def __init__(self, kwargs: Options) -> None: # Historically only a single patch was allowed self._patch: list[str] = always_str_list(kwargs.get("patch", [])) + self._dependencies: list[Dependency] = kwargs.get("dependencies", []) + @classmethod def from_project_entry(cls, project: ProjectEntry) -> "Metadata": """Create a metadata object from a project entry.""" @@ -66,6 +80,7 @@ def from_project_entry(cls, project: ProjectEntry) -> "Metadata": "last_fetch": datetime.datetime(2000, 1, 1, 0, 0, 0), "hash": "", "patch": project.patch, + "dependencies": [], } return cls(data) @@ -77,13 +92,18 @@ def from_file(cls, path: str) -> "Metadata": return cls(data) def fetched( - self, version: Version, hash_: str = "", patch_: list[str] | None = None + self, + version: Version, + hash_: str = "", + patch_: list[str] | None = None, + dependencies: list[Dependency] | None = None, ) -> None: """Update metadata.""" self._last_fetch = datetime.datetime.now() self._version = version self._hash = hash_ self._patch = patch_ or [] + self._dependencies = dependencies or [] @property def version(self) -> Version: @@ -129,6 +149,11 @@ def patch(self) -> list[str]: """The list of applied patches as stored in the metadata.""" return self._patch + @property + def dependencies(self) -> list[Dependency]: + """The list of dependency projects as stored in the metadata.""" + return self._dependencies + @property def path(self) -> str: """Path to metadata file.""" @@ -152,12 +177,13 @@ def __eq__(self, other: object) -> bool: other._version.revision == self._version.revision, other.hash == self.hash, other.patch == self.patch, + other.dependencies == self.dependencies, ] ) def dump(self) -> None: """Dump metadata file to correct path.""" - metadata = { + metadata: dict[str, dict[str, str | list[str] | list[Dependency]]] = { "dfetch": { "remote_url": self.remote_url, "branch": self._version.branch, @@ -169,6 +195,9 @@ def dump(self) -> None: } } + if self.dependencies: + metadata["dfetch"]["dependencies"] = self.dependencies + with open(self.path, "w+", encoding="utf-8") as metadata_file: metadata_file.write(DONT_EDIT_WARNING) yaml.dump(metadata, metadata_file) diff --git a/dfetch/project/subproject.py b/dfetch/project/subproject.py index 5649b92c..c4365649 100644 --- a/dfetch/project/subproject.py +++ b/dfetch/project/subproject.py @@ -4,12 +4,13 @@ import pathlib from abc import ABC, abstractmethod from collections.abc import Callable, Sequence +from typing import NamedTuple from dfetch.log import get_logger from dfetch.manifest.project import ProjectEntry from dfetch.manifest.version import Version from dfetch.project.abstract_check_reporter import AbstractCheckReporter -from dfetch.project.metadata import Metadata +from dfetch.project.metadata import Dependency, Metadata from dfetch.util.util import hash_directory, safe_rm from dfetch.util.versions import latest_tag_from_list from dfetch.vcs.patch import Patch @@ -17,6 +18,28 @@ logger = get_logger(__name__) +class VcsDependency(NamedTuple): + """Information about a vcs dependency.""" + + destination: str + remote_url: str + branch: str + tag: str + revision: str + source_type: str + + def to_dependency(self) -> Dependency: + """Convert this vcs dependency to a Dependency object.""" + return Dependency( + destination=self.destination, + remote_url=self.remote_url, + branch=self.branch, + tag=self.tag, + revision=self.revision, + source_type=self.source_type, + ) + + class SubProject(ABC): """Abstract SubProject object. @@ -129,7 +152,7 @@ def update( f"Fetching {to_fetch}", enabled=self._show_animations, ): - actually_fetched = self._fetch_impl(to_fetch) + actually_fetched, dependency = self._fetch_impl(to_fetch) self._log_project(f"Fetched {actually_fetched}") applied_patches = self._apply_patches(patch_count) @@ -145,6 +168,7 @@ def update( skiplist=[self.__metadata.FILENAME] + post_fetch_ignored, ), patch_=applied_patches, + dependencies=[dependency.to_dependency() for dependency in dependency], ) logger.debug(f"Writing repo metadata to: {self.__metadata.path}") @@ -392,7 +416,7 @@ def _are_there_local_changes(self, files_to_ignore: Sequence[str]) -> bool: ) @abstractmethod - def _fetch_impl(self, version: Version) -> Version: + def _fetch_impl(self, version: Version) -> tuple[Version, list[VcsDependency]]: """Fetch the given version of the subproject, should be implemented by the child class.""" @abstractmethod diff --git a/dfetch/project/svnsubproject.py b/dfetch/project/svnsubproject.py index 85c28c22..8b5e546d 100644 --- a/dfetch/project/svnsubproject.py +++ b/dfetch/project/svnsubproject.py @@ -7,7 +7,7 @@ from dfetch.log import get_logger from dfetch.manifest.project import ProjectEntry from dfetch.manifest.version import Version -from dfetch.project.subproject import SubProject +from dfetch.project.subproject import SubProject, VcsDependency from dfetch.util.util import ( find_matching_files, find_non_matching_files, @@ -107,7 +107,7 @@ def _remove_ignored_files(self) -> None: if not (file_or_dir.is_file() and is_license_file(file_or_dir.name)): safe_rm(file_or_dir) - def _fetch_impl(self, version: Version) -> Version: + def _fetch_impl(self, version: Version) -> tuple[Version, list[VcsDependency]]: """Get the revision of the remote and place it at the local path.""" branch, branch_path, revision = self._determine_what_to_fetch(version) rev_arg = f"--revision {revision}" if revision else "" @@ -148,7 +148,7 @@ def _fetch_impl(self, version: Version) -> Version: if self.ignore: self._remove_ignored_files() - return Version(tag=version.tag, branch=branch, revision=revision) + return Version(tag=version.tag, branch=branch, revision=revision), [] @staticmethod def _parse_file_pattern(complete_path: str) -> tuple[str, str]: diff --git a/dfetch/reporting/stdout_reporter.py b/dfetch/reporting/stdout_reporter.py index 4982088c..4c4ac521 100644 --- a/dfetch/reporting/stdout_reporter.py +++ b/dfetch/reporting/stdout_reporter.py @@ -26,22 +26,36 @@ def add_project( ) -> None: """Add a project to the report.""" del version - logger.print_info_field("project", project.name) - logger.print_info_field(" remote", project.remote) + logger.print_info_line(project.name, "") + logger.print_info_field("- remote", project.remote) try: metadata = Metadata.from_file(Metadata.from_project_entry(project).path) - logger.print_info_field(" remote url", metadata.remote_url) - logger.print_info_field(" branch", metadata.branch) - logger.print_info_field(" tag", metadata.tag) - logger.print_info_field(" last fetch", str(metadata.last_fetch)) - logger.print_info_field(" revision", metadata.revision) - logger.print_info_field(" patch", ", ".join(metadata.patch)) + logger.print_info_field(" remote url", metadata.remote_url) + logger.print_info_field(" branch", metadata.branch) + logger.print_info_field(" tag", metadata.tag) + logger.print_info_field(" last fetch", str(metadata.last_fetch)) + logger.print_info_field(" revision", metadata.revision) + logger.print_info_field(" patch", ", ".join(metadata.patch)) logger.print_info_field( - " licenses", ",".join(license.name for license in licenses) + " licenses", ",".join(license.name for license in licenses) ) + if metadata.dependencies: + logger.info("") + logger.print_report_line(" dependencies", "") + for dependency in metadata.dependencies: + logger.print_info_field(" - path", dependency.get("destination", "")) + logger.print_info_field(" url", dependency.get("remote_url", "")) + logger.print_info_field(" branch", dependency.get("branch", "")) + logger.print_info_field(" tag", dependency.get("tag", "")) + logger.print_info_field(" revision", dependency.get("revision", "")) + logger.print_info_field( + " source-type", dependency.get("source_type", "") + ) + logger.info("") + except FileNotFoundError: - logger.print_info_field(" last fetch", "never") + logger.print_info_field(" last fetch", "never") def dump_to_file(self, outfile: str) -> bool: """Do nothing.""" diff --git a/dfetch/util/util.py b/dfetch/util/util.py index 24fdeceb..dbf5b2d7 100644 --- a/dfetch/util/util.py +++ b/dfetch/util/util.py @@ -135,11 +135,12 @@ def find_matching_files(directory: str, patterns: Sequence[str]) -> Iterator[Pat def safe_rm(path: str | Path) -> None: - """Delete an file or directory safely.""" - if os.path.isdir(path): - safe_rmtree(str(path)) - else: - os.remove(path) + """Delete a file or directory safely.""" + if os.path.lexists(path): + if os.path.isdir(path): + safe_rmtree(str(path)) + else: + os.remove(path) def safe_rmtree(path: str) -> None: diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index 01315732..dcd8d3c3 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -6,8 +6,8 @@ import shutil import tempfile from collections.abc import Generator, Sequence +from dataclasses import dataclass from pathlib import Path, PurePath -from typing import NamedTuple from dfetch.log import get_logger from dfetch.util.cmdline import SubprocessCommandError, run_on_cmdline @@ -17,7 +17,8 @@ logger = get_logger(__name__) -class Submodule(NamedTuple): +@dataclass +class Submodule: """Information about a submodule.""" name: str @@ -233,6 +234,7 @@ class GitLocalRepo: """A git repository.""" METADATA_DIR = ".git" + GIT_MODULES_FILE = ".gitmodules" def __init__(self, path: str | Path = ".") -> None: """Create a local git repo.""" @@ -258,7 +260,7 @@ def checkout_version( # pylint: disable=too-many-arguments src: str | None = None, must_keeps: list[str] | None = None, ignore: Sequence[str] | None = None, - ) -> str: + ) -> tuple[str, list[Submodule]]: """Checkout a specific version from a given remote. Args: @@ -295,6 +297,14 @@ def checkout_version( # pylint: disable=too-many-arguments ) run_on_cmdline(logger, ["git", "reset", "--hard", "FETCH_HEAD"]) + run_on_cmdline( + logger, + ["git", "submodule", "update", "--init", "--recursive"], + env=_extend_env_for_non_interactive_mode(), + ) + + submodules = self.submodules() + current_sha = ( run_on_cmdline(logger, ["git", "rev-parse", "HEAD"]) .stdout.decode() @@ -302,9 +312,12 @@ def checkout_version( # pylint: disable=too-many-arguments ) if src: + for submodule in submodules: + submodule.path = str(Path(submodule.path).relative_to(Path(src))) + self.move_src_folder_up(remote, src) - return str(current_sha) + return str(current_sha), submodules def move_src_folder_up(self, remote: str, src: str) -> None: """Move the files from the src folder into the root of the project. diff --git a/features/fetch-git-repo-with-submodule.feature b/features/fetch-git-repo-with-submodule.feature new file mode 100644 index 00000000..75555274 --- /dev/null +++ b/features/fetch-git-repo-with-submodule.feature @@ -0,0 +1,89 @@ +Feature: Fetch projects with nested VCS dependencies + + Some projects include nested version control dependencies + such as Git submodules or other externals + These dependencies must be fetched at the exact revision + pinned by the parent repository to ensure reproducibility + + Background: + Given a git-repository "SomeInterestingProject.git" with the following submodules + | path | url | revision | + | ext/test-repo1 | https://github.com/dfetch-org/test-repo | e1fda19a57b873eb8e6ae37780594cbb77b70f1a | + | ext/test-repo2 | https://github.com/dfetch-org/test-repo | 8df389d0524863b85f484f15a91c5f2c40aefda1 | + + Scenario: A project with a git submodule is fetched at the pinned revision + Given the manifest 'dfetch.yaml' in MyProject + """ + manifest: + version: 0.0 + projects: + - name: my-project-with-submodules + url: some-remote-server/SomeInterestingProject.git + """ + When I run "dfetch update" + Then the output shows + """ + Dfetch (0.12.1) + my-project-with-submodules: + > Found & fetched submodule "./ext/test-repo1" (https://github.com/dfetch-org/test-repo @ main - e1fda19a57b873eb8e6ae37780594cbb77b70f1a) + > Found & fetched submodule "./ext/test-repo2" (https://github.com/dfetch-org/test-repo @ v1) + > Fetched master - 79698c99152e4a4b7b759c9def50a130bc91a2ff + """ + Then 'MyProject' looks like: + """ + MyProject/ + dfetch.yaml + my-project-with-submodules/ + .dfetch_data.yaml + README.md + ext/ + test-repo1/ + .git/ + .gitignore + LICENSE + README.md + test-repo2/ + .git/ + .gitignore + LICENSE + README.md + """ + + Scenario: Submodule changes are reported in the project report + Given a fetched and committed MyProject with the manifest + """ + manifest: + version: 0.0 + projects: + - name: my-project-with-submodules + url: some-remote-server/SomeInterestingProject.git + """ + When I run "dfetch report" in MyProject + Then the output shows + """ + Dfetch (0.12.1) + my-project-with-submodules: + - remote : + remote url : some-remote-server/SomeInterestingProject.git + branch : master + tag : + last fetch : 26/02/2026, 20:28:24 + revision : 79698c99152e4a4b7b759c9def50a130bc91a2ff + patch : + licenses : + + dependencies : + - path : ext/test-repo1 + url : https://github.com/dfetch-org/test-repo + branch : main + tag : + revision : e1fda19a57b873eb8e6ae37780594cbb77b70f1a + source-type : git-submodule + + - path : ext/test-repo2 + url : https://github.com/dfetch-org/test-repo + branch : + tag : v1 + revision : 8df389d0524863b85f484f15a91c5f2c40aefda1 + source-type : git-submodule + """ diff --git a/features/list-projects.feature b/features/list-projects.feature index f000de6c..79eb9f86 100644 --- a/features/list-projects.feature +++ b/features/list-projects.feature @@ -28,24 +28,24 @@ Feature: List dependencies Then the output shows """ Dfetch (0.12.1) - project : ext/test-repo-tag - remote : - remote url : https://github.com/dfetch-org/test-repo - branch : main - tag : - last fetch : 02/07/2021, 20:25:56 - revision : e1fda19a57b873eb8e6ae37780594cbb77b70f1a - patch : - licenses : MIT License - project : ext/test-rev-and-branch - remote : github-com-dfetch-org - remote url : https://github.com/dfetch-org/test-repo - branch : main - tag : v1 - last fetch : 02/07/2021, 20:25:56 - revision : - patch : - licenses : MIT License + ext/test-repo-tag: + - remote : + remote url : https://github.com/dfetch-org/test-repo + branch : main + tag : + last fetch : 02/07/2021, 20:25:56 + revision : e1fda19a57b873eb8e6ae37780594cbb77b70f1a + patch : + licenses : MIT License + ext/test-rev-and-branch: + - remote : github-com-dfetch-org + remote url : https://github.com/dfetch-org/test-repo + branch : main + tag : v1 + last fetch : 02/07/2021, 20:25:56 + revision : + patch : + licenses : MIT License """ @remote-svn @@ -68,15 +68,15 @@ Feature: List dependencies Then the output shows """ Dfetch (0.12.1) - project : cutter-svn-tag - remote : - remote url : svn://svn.code.sf.net/p/cutter/svn/cutter - branch : - tag : 1.1.7 - last fetch : 29/12/2024, 20:09:21 - revision : 4007 - patch : - licenses : + cutter-svn-tag: + - remote : + remote url : svn://svn.code.sf.net/p/cutter/svn/cutter + branch : + tag : 1.1.7 + last fetch : 29/12/2024, 20:09:21 + revision : 4007 + patch : + licenses : """ Scenario: Git repo with applied patches @@ -85,13 +85,13 @@ Feature: List dependencies Then the output shows """ Dfetch (0.12.1) - project : ext/test-repo-tag - remote : github-com-dfetch-org - remote url : https://github.com/dfetch-org/test-repo - branch : main - tag : v2.0 - last fetch : 02/07/2021, 20:25:56 - revision : - patch : 001-diff.patch, 002-diff.patch - licenses : MIT License + ext/test-repo-tag: + - remote : github-com-dfetch-org + remote url : https://github.com/dfetch-org/test-repo + branch : main + tag : v2.0 + last fetch : 02/07/2021, 20:25:56 + revision : + patch : 001-diff.patch, 002-diff.patch + licenses : MIT License """ diff --git a/features/steps/generic_steps.py b/features/steps/generic_steps.py index 0f646dd7..dfdadda1 100644 --- a/features/steps/generic_steps.py +++ b/features/steps/generic_steps.py @@ -406,6 +406,11 @@ def step_impl(context, name): check_file(name, context.text) +@then("'{name}' exists") +def step_impl(_, name): + assert os.path.exists(name), f"Expected {name} to exist, but it didn't!" + + @then("the '{name}' json file includes") def step_impl(context, name): """Partial JSON match - the expected JSON must be a *subset* of the actual file.""" diff --git a/features/steps/git_steps.py b/features/steps/git_steps.py index 73d61318..ec3359a2 100644 --- a/features/steps/git_steps.py +++ b/features/steps/git_steps.py @@ -39,18 +39,27 @@ def tag(name: str): subprocess.check_call(["git", "tag", "-a", name, "-m", "'Some tag'"]) +@given('a git-repository "{name}" with the following submodules') @given("a git repo with the following submodules") -def step_impl(context): - create_repo() +def step_impl(context, name=None): - for submodule in context.table: - subprocess.check_call( - ["git", "submodule", "add", submodule["url"], submodule["path"]] - ) + path = os.getcwd() + if name: + path = os.path.join(context.remotes_dir, name) + os.makedirs(path, exist_ok=True) + + with in_directory(path): + create_repo() + generate_file("README.md", "some content") + + for submodule in context.table: + subprocess.check_call( + ["git", "submodule", "add", submodule["url"], submodule["path"]] + ) - with in_directory(submodule["path"]): - subprocess.check_call(["git", "checkout", submodule["revision"]]) - commit_all("Added submodules") + with in_directory(submodule["path"]): + subprocess.check_call(["git", "checkout", submodule["revision"]]) + commit_all("Added submodules") @given('a new tag "{tagname}" is added to git-repository "{name}"') diff --git a/tests/test_report.py b/tests/test_report.py index ef210c89..55d0cbe1 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -36,12 +36,12 @@ def test_report(name, projects): with patch( "dfetch.commands.report.create_super_project", return_value=fake_superproject ): - with patch("dfetch.log.DLogger.print_report_line") as mocked_print_report_line: + with patch("dfetch.log.DLogger.print_info_line") as mocked_print_info_line: report(DEFAULT_ARGS) if projects: for project in projects: - mocked_print_report_line.assert_any_call("project", project["name"]) + mocked_print_info_line.assert_any_call(project["name"], "") else: - mocked_print_report_line.assert_not_called() + mocked_print_info_line.assert_not_called() diff --git a/tests/test_subproject.py b/tests/test_subproject.py index 462086dc..55b1f2b4 100644 --- a/tests/test_subproject.py +++ b/tests/test_subproject.py @@ -10,14 +10,14 @@ from dfetch.manifest.project import ProjectEntry from dfetch.manifest.version import Version -from dfetch.project.subproject import SubProject +from dfetch.project.subproject import SubProject, VcsDependency class ConcreteSubProject(SubProject): _wanted_version: Version - def _fetch_impl(self, version: Version) -> Version: - return Version() + def _fetch_impl(self, version: Version) -> tuple[Version, list[VcsDependency]]: + return Version(), [] def _latest_revision_on_branch(self, branch): return "latest" From 67c827dd6b03b4f72f8f76ae7f9cba2435b9286d Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 18 Mar 2026 19:22:03 +0000 Subject: [PATCH 03/15] Remove .git in submodules --- dfetch/project/gitsubproject.py | 10 ++++++---- features/fetch-git-repo-with-submodule.feature | 2 -- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dfetch/project/gitsubproject.py b/dfetch/project/gitsubproject.py index 8f3e297e..a06ff075 100644 --- a/dfetch/project/gitsubproject.py +++ b/dfetch/project/gitsubproject.py @@ -1,6 +1,5 @@ """Git specific implementation.""" -import os import pathlib from functools import lru_cache @@ -8,7 +7,7 @@ from dfetch.manifest.project import ProjectEntry from dfetch.manifest.version import Version from dfetch.project.subproject import SubProject, VcsDependency -from dfetch.util.util import LICENSE_GLOBS, safe_rm, safe_rmtree +from dfetch.util.util import LICENSE_GLOBS, safe_rm from dfetch.vcs.git import GitLocalRepo, GitRemote, get_git_version logger = get_logger(__name__) @@ -94,8 +93,11 @@ def _fetch_impl(self, version: Version) -> tuple[Version, list[VcsDependency]]: ) ) - safe_rmtree(os.path.join(self.local_path, local_repo.METADATA_DIR)) - safe_rm(os.path.join(self.local_path, local_repo.GIT_MODULES_FILE)) + targets = {local_repo.METADATA_DIR, local_repo.GIT_MODULES_FILE} + + for path in pathlib.Path(self.local_path).rglob("*"): + if path.name in targets: + safe_rm(path) return self._determine_fetched_version(version, fetched_sha), vcs_deps diff --git a/features/fetch-git-repo-with-submodule.feature b/features/fetch-git-repo-with-submodule.feature index 75555274..f2039a6a 100644 --- a/features/fetch-git-repo-with-submodule.feature +++ b/features/fetch-git-repo-with-submodule.feature @@ -38,12 +38,10 @@ Feature: Fetch projects with nested VCS dependencies README.md ext/ test-repo1/ - .git/ .gitignore LICENSE README.md test-repo2/ - .git/ .gitignore LICENSE README.md From 5656ee70a7d774d5ddd84fec370bf22f5e7d9728 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 18 Mar 2026 19:22:03 +0000 Subject: [PATCH 04/15] Add test for nested submodules --- features/environment.py | 2 + .../fetch-git-repo-with-submodule.feature | 39 +++++++++++++++++++ features/steps/git_steps.py | 12 ++++-- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/features/environment.py b/features/environment.py index 10d083e9..ea858996 100644 --- a/features/environment.py +++ b/features/environment.py @@ -42,3 +42,5 @@ def before_all(context): context.config.logging_format = "%(message)s" context.remotes_dir = "some-remote-server" + + os.environ["GIT_ALLOW_PROTOCOL"] = "file:http:https:ssh" diff --git a/features/fetch-git-repo-with-submodule.feature b/features/fetch-git-repo-with-submodule.feature index f2039a6a..f78e1b95 100644 --- a/features/fetch-git-repo-with-submodule.feature +++ b/features/fetch-git-repo-with-submodule.feature @@ -47,6 +47,45 @@ Feature: Fetch projects with nested VCS dependencies README.md """ + Scenario: A project with a git submodule that itself has a nested submodule is fetched at the pinned revision + Given a git repository "LeafProject.git" + And a git-repository "MiddleProject.git" with the following submodules + | path | url | revision | + | ext/leaf | some-remote-server/LeafProject.git | master | + And a git-repository "OuterProject.git" with the following submodules + | path | url | revision | + | ext/middle | some-remote-server/MiddleProject.git | master | + Given the manifest 'dfetch.yaml' in MyProject + """ + manifest: + version: 0.0 + projects: + - name: outer-project + url: some-remote-server/OuterProject.git + """ + When I run "dfetch update" + Then the output shows + """ + Dfetch (0.12.1) + outer-project: + > Found & fetched submodule "./ext/middle" (some-remote-server/MiddleProject.git @ master - [commit-hash]) + > Fetched master - [commit-hash] + """ + Then 'MyProject' looks like: + """ + MyProject/ + dfetch.yaml + outer-project/ + .dfetch_data.yaml + README.md + ext/ + middle/ + README.md + ext/ + leaf/ + README.md + """ + Scenario: Submodule changes are reported in the project report Given a fetched and committed MyProject with the manifest """ diff --git a/features/steps/git_steps.py b/features/steps/git_steps.py index ec3359a2..b0ea4a27 100644 --- a/features/steps/git_steps.py +++ b/features/steps/git_steps.py @@ -10,7 +10,12 @@ from behave import given, when # pylint: disable=no-name-in-module from dfetch.util.util import in_directory -from features.steps.generic_steps import call_command, extend_file, generate_file +from features.steps.generic_steps import ( + call_command, + extend_file, + generate_file, + remote_server_path, +) from features.steps.manifest_steps import generate_manifest @@ -53,9 +58,10 @@ def step_impl(context, name=None): generate_file("README.md", "some content") for submodule in context.table: - subprocess.check_call( - ["git", "submodule", "add", submodule["url"], submodule["path"]] + url = submodule["url"].replace( + "some-remote-server", f"file:///{remote_server_path(context)}" ) + subprocess.check_call(["git", "submodule", "add", url, submodule["path"]]) with in_directory(submodule["path"]): subprocess.check_call(["git", "checkout", submodule["revision"]]) From 7460860283af81da5e9ba49afe4fb951a06ed82c Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 18 Mar 2026 19:22:03 +0000 Subject: [PATCH 05/15] Only move up if it is directory --- dfetch/vcs/git.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index dcd8d3c3..6e678219 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -312,8 +312,9 @@ def checkout_version( # pylint: disable=too-many-arguments ) if src: - for submodule in submodules: - submodule.path = str(Path(submodule.path).relative_to(Path(src))) + if os.path.isdir(src): + for submodule in submodules: + submodule.path = str(Path(submodule.path).relative_to(src)) self.move_src_folder_up(remote, src) From dcc03b175a6a5d78beb6826aed702e24c7c69f47 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 18 Mar 2026 19:22:03 +0000 Subject: [PATCH 06/15] Fix src: moving logic for multiple matching files/dirs --- dfetch/vcs/git.py | 68 ++++++++++++++----- .../fetch-git-repo-with-submodule.feature | 65 +++++++++++++----- 2 files changed, 97 insertions(+), 36 deletions(-) diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index 6e678219..1a8018a4 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -1,6 +1,7 @@ """Git specific implementation.""" import functools +import glob import os import re import shutil @@ -312,37 +313,68 @@ def checkout_version( # pylint: disable=too-many-arguments ) if src: - if os.path.isdir(src): - for submodule in submodules: - submodule.path = str(Path(submodule.path).relative_to(src)) + for submodule in submodules: + submodule.path = self._rewrite_path(src, submodule.path) - self.move_src_folder_up(remote, src) + self._move_src_folder_up(remote, src) return str(current_sha), submodules - def move_src_folder_up(self, remote: str, src: str) -> None: + @staticmethod + def _rewrite_path(src: str, existing_path: str) -> str: + """Rewrites existing_path relative to src pattern. + + Handles wildcards (*) and nested directories. + """ + src_path = PurePath(src) + sub_path = PurePath(existing_path) + + if sub_path.match(str(src_path)): + # Count fixed prefix parts (before any wildcard) + prefix_len = 0 + for part in src_path.parts: + if "*" in part: + break + prefix_len += 1 + # Return path relative to fixed prefix + return str(Path(*sub_path.parts[prefix_len:])) + + # Return unchanged if no match + return existing_path + + @staticmethod + def _move_src_folder_up(remote: str, src: str) -> None: """Move the files from the src folder into the root of the project. Args: remote (str): Name of the root src (str): Src folder to move up """ - full_src = src - if not os.path.isdir(src): - src = os.path.dirname(src) - - if not src: - return + matched_paths = glob.glob(src) or [src] - try: - for file_to_copy in os.listdir(src): - shutil.move(src + "/" + file_to_copy, ".") - safe_rmtree(PurePath(src).parts[0]) - except FileNotFoundError: + if not matched_paths: logger.warning( - f"The 'src:' filter '{full_src}' didn't match any files from '{remote}'" + f"The 'src:' filter '{src}' didn't match any files from '{remote}'" ) - return + + processed_dirs = set() + for src_path in matched_paths: + if not os.path.isdir(src_path): + src_path = os.path.dirname(src_path) + + if not src_path or src_path in processed_dirs: + continue + + try: + for file_to_copy in os.listdir(src_path): + shutil.move(src_path + "/" + file_to_copy, ".") + safe_rmtree(PurePath(src_path).parts[0]) + processed_dirs.add(src_path) + except FileNotFoundError: + logger.warning( + f"The 'src:' filter '{src_path}' didn't match any files from '{remote}'" + ) + continue @staticmethod def _determine_ignore_paths( diff --git a/features/fetch-git-repo-with-submodule.feature b/features/fetch-git-repo-with-submodule.feature index f78e1b95..bdaec33e 100644 --- a/features/fetch-git-repo-with-submodule.feature +++ b/features/fetch-git-repo-with-submodule.feature @@ -6,10 +6,11 @@ Feature: Fetch projects with nested VCS dependencies pinned by the parent repository to ensure reproducibility Background: - Given a git-repository "SomeInterestingProject.git" with the following submodules - | path | url | revision | - | ext/test-repo1 | https://github.com/dfetch-org/test-repo | e1fda19a57b873eb8e6ae37780594cbb77b70f1a | - | ext/test-repo2 | https://github.com/dfetch-org/test-repo | 8df389d0524863b85f484f15a91c5f2c40aefda1 | + Given a git repository "TestRepo.git" + And a git-repository "SomeInterestingProject.git" with the following submodules + | path | url | revision | + | ext/test-repo1 | some-remote-server/TestRepo.git | master | + | ext/test-repo2 | some-remote-server/TestRepo.git | v1 | Scenario: A project with a git submodule is fetched at the pinned revision Given the manifest 'dfetch.yaml' in MyProject @@ -25,9 +26,9 @@ Feature: Fetch projects with nested VCS dependencies """ Dfetch (0.12.1) my-project-with-submodules: - > Found & fetched submodule "./ext/test-repo1" (https://github.com/dfetch-org/test-repo @ main - e1fda19a57b873eb8e6ae37780594cbb77b70f1a) - > Found & fetched submodule "./ext/test-repo2" (https://github.com/dfetch-org/test-repo @ v1) - > Fetched master - 79698c99152e4a4b7b759c9def50a130bc91a2ff + > Found & fetched submodule "./ext/test-repo1" (some-remote-server/TestRepo.git @ master - 79698c99152e4a4b7b759c9def50a130bc91a2ff) + > Found & fetched submodule "./ext/test-repo2" (some-remote-server/TestRepo.git @ master - 79698c99152e4a4b7b759c9def50a130bc91a2ff) + > Fetched master - e1fda19a57b873eb8e6ae37780594cbb77b70f1a """ Then 'MyProject' looks like: """ @@ -38,12 +39,8 @@ Feature: Fetch projects with nested VCS dependencies README.md ext/ test-repo1/ - .gitignore - LICENSE README.md test-repo2/ - .gitignore - LICENSE README.md """ @@ -68,8 +65,8 @@ Feature: Fetch projects with nested VCS dependencies """ Dfetch (0.12.1) outer-project: - > Found & fetched submodule "./ext/middle" (some-remote-server/MiddleProject.git @ master - [commit-hash]) - > Fetched master - [commit-hash] + > Found & fetched submodule "./ext/middle" (some-remote-server/MiddleProject.git @ master - 79698c99152e4a4b7b759c9def50a130bc91a2ff) + > Fetched master - e1fda19a57b873eb8e6ae37780594cbb77b70f1a """ Then 'MyProject' looks like: """ @@ -111,16 +108,48 @@ Feature: Fetch projects with nested VCS dependencies dependencies : - path : ext/test-repo1 - url : https://github.com/dfetch-org/test-repo - branch : main + url : some-remote-server/TestRepo.git + branch : master tag : revision : e1fda19a57b873eb8e6ae37780594cbb77b70f1a source-type : git-submodule - path : ext/test-repo2 - url : https://github.com/dfetch-org/test-repo - branch : - tag : v1 + url : some-remote-server/TestRepo.git + branch : master + tag : revision : 8df389d0524863b85f484f15a91c5f2c40aefda1 source-type : git-submodule """ + + Scenario: Subfolder is matched through a glob is fetched and submodules are resolved + Given a git-repository "GlobProject.git" with the following submodules + | path | url | revision | + | some_dir_a/ext/test-repo | some-remote-server/TestRepo.git | master | + Given the manifest 'dfetch.yaml' in MyProject + """ + manifest: + version: 0.0 + projects: + - name: glob-project + url: some-remote-server/GlobProject.git + src: some_dir_* + """ + When I run "dfetch update" + Then the output shows + """ + Dfetch (0.12.1) + glob-project: + > Found & fetched submodule "./some_dir_a/ext/test-repo" (some-remote-server/TestRepo.git @ master - 79698c99152e4a4b7b759c9def50a130bc91a2ff) + > Fetched master - e1fda19a57b873eb8e6ae37780594cbb77b70f1a + """ + Then 'MyProject' looks like: + """ + MyProject/ + dfetch.yaml + glob-project/ + .dfetch_data.yaml + ext/ + test-repo/ + README.md + """ From 37814f40f9497ba92c0a90873dc1bf7a26415ee4 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 18 Mar 2026 19:22:03 +0000 Subject: [PATCH 07/15] Don't break when importing submodules with space in path --- CHANGELOG.rst | 1 + dfetch/vcs/git.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ef15e5e9..75498213 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,6 +9,7 @@ Release 0.13.0 (unreleased) * Fetch git submodules in git subproject at pinned revision (#1013) * Add nested projects in subprojects to project report (#1017) * Make `dfetch report` output more yaml-like (#1017) +* Don't break when importing submodules with space in path (#1017) Release 0.12.1 (released 2026-02-24) ==================================== diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index 1a8018a4..9725d2ab 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -536,7 +536,7 @@ def submodules() -> list[Submodule]: "submodule", "foreach", "--quiet", - "echo $name $sm_path $sha1 $toplevel", + 'printf "%s\\0%s\\0%s\\0%s\n" "$name" "$sm_path" "$sha1" "$toplevel"', ], ) @@ -544,7 +544,7 @@ def submodules() -> list[Submodule]: urls: dict[str, str] = {} for line in result.stdout.decode().split("\n"): if line: - name, sm_path, sha, toplevel = line.split(" ") + name, sm_path, sha, toplevel = line.split("\0") urls = urls or GitLocalRepo._get_submodule_urls(toplevel) url = urls[name] branch, tag = GitRemote(url).find_branch_tip_or_tag_from_sha(sha) From 909703ac29f450783393f177e8bde9fa5f137a50 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 18 Mar 2026 19:22:03 +0000 Subject: [PATCH 08/15] Warn when src: glob pattern matches multiple directories --- CHANGELOG.rst | 3 ++- dfetch/vcs/git.py | 33 +++++++++++++++--------- features/fetch-file-pattern-git.feature | 34 +++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 75498213..025e9335 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,8 +8,9 @@ Release 0.13.0 (unreleased) * Rename child-manifests to sub-manifests in documentation and code (#1027) * Fetch git submodules in git subproject at pinned revision (#1013) * Add nested projects in subprojects to project report (#1017) -* Make `dfetch report` output more yaml-like (#1017) +* Make ``dfetch report`` output more yaml-like (#1017) * Don't break when importing submodules with space in path (#1017) +* Warn when ``src:`` glob pattern matches multiple directories (#1017) Release 0.12.1 (released 2026-02-24) ==================================== diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index 9725d2ab..de05c9fb 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -350,29 +350,38 @@ def _move_src_folder_up(remote: str, src: str) -> None: remote (str): Name of the root src (str): Src folder to move up """ - matched_paths = glob.glob(src) or [src] + matched_paths = glob.glob(src) if not matched_paths: logger.warning( f"The 'src:' filter '{src}' didn't match any files from '{remote}'" ) + return - processed_dirs = set() - for src_path in matched_paths: - if not os.path.isdir(src_path): - src_path = os.path.dirname(src_path) + dirs = [] + for src_dir_path in matched_paths: + if os.path.isdir(src_dir_path): + dirs.append(src_dir_path) + else: + if dir_path := os.path.dirname(src_dir_path): + dirs.append(dir_path) - if not src_path or src_path in processed_dirs: - continue + unique_dirs = list(dict.fromkeys(dirs)) + if len(unique_dirs) > 1: + logger.warning( + f"The 'src:' filter '{src}' matches multiple directories from '{remote}'. " + f"Only considering files in '{unique_dirs[0]}'." + ) + + for src_dir_path in unique_dirs[:1]: try: - for file_to_copy in os.listdir(src_path): - shutil.move(src_path + "/" + file_to_copy, ".") - safe_rmtree(PurePath(src_path).parts[0]) - processed_dirs.add(src_path) + for file_to_copy in os.listdir(src_dir_path): + shutil.move(src_dir_path + "/" + file_to_copy, ".") + safe_rmtree(PurePath(src_dir_path).parts[0]) except FileNotFoundError: logger.warning( - f"The 'src:' filter '{src_path}' didn't match any files from '{remote}'" + f"The 'src:' filter '{src_dir_path}' didn't match any files from '{remote}'" ) continue diff --git a/features/fetch-file-pattern-git.feature b/features/fetch-file-pattern-git.feature index b7fb036b..a78540d9 100644 --- a/features/fetch-file-pattern-git.feature +++ b/features/fetch-file-pattern-git.feature @@ -19,6 +19,8 @@ Feature: Fetch file pattern from git repo | SomeFolder/SomeSubFolder/SomeFile.txt | | SomeFolder/SomeSubFolder/OtherFile.txt | | SomeFolder/SomeSubFolder/SomeFile.md | + | SomeFolder/Unrelated.txt | + | AlsoUnrelated.txt | When I run "dfetch update" Then the output shows """ @@ -35,3 +37,35 @@ Feature: Fetch file pattern from git repo SomeFile.txt dfetch.yaml """ + + Scenario: A file pattern matches two files in different subfolders + Given the manifest 'dfetch.yaml' in MyProject + """ + manifest: + version: 0.0 + projects: + - name: SomeProjectWithAnInterestingFile + url: some-remote-server/SomeProjectWithAnInterestingFile.git + src: SomeFolder/Some* + tag: v1 + """ + And a git-repository "SomeProjectWithAnInterestingFile.git" with the files + | path | + | SomeFolder/SomeSubFolder/SomeFile.txt | + | SomeFolder/SomeOtherSubFolder/SomeFile.txt | + When I run "dfetch update" + Then the output shows + """ + Dfetch (0.12.1) + The 'src:' filter 'SomeFolder/Some*' matches multiple directories from 'some-remote-server/SomeProjectWithAnInterestingFile.git'. Only considering files in 'SomeFolder/SomeSubFolder'. + SomeProjectWithAnInterestingFile: + > Fetched v1 + """ + Then 'MyProject' looks like: + """ + MyProject/ + SomeProjectWithAnInterestingFile/ + .dfetch_data.yaml + SomeFile.txt + dfetch.yaml + """ From beeeefe9f60428ae66c7c28b4ee0dfba5ca8ae69 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 18 Mar 2026 19:22:03 +0000 Subject: [PATCH 09/15] Respect ignore within submodules --- dfetch/manifest/parse.py | 7 ++++++- dfetch/util/util.py | 20 +++++++++++------- dfetch/vcs/git.py | 44 +++++++++++++++++++++++++++------------- example/dfetch.yaml | 1 + pyproject.toml | 1 + 5 files changed, 51 insertions(+), 22 deletions(-) diff --git a/dfetch/manifest/parse.py b/dfetch/manifest/parse.py index 1c0bb4e3..6aa18b79 100644 --- a/dfetch/manifest/parse.py +++ b/dfetch/manifest/parse.py @@ -107,6 +107,11 @@ def get_submanifests(skip: list[str] | None = None) -> list[Manifest]: with prefix_runtime_exceptions( pathlib.Path(path).relative_to(os.path.dirname(os.getcwd())).as_posix() ): - submanifests += [parse(path)] + try: + submanifests += [parse(path)] + except FileNotFoundError: + logger.warning( + f"Sub-manifest {path} was found but no longer exists" + ) return submanifests diff --git a/dfetch/util/util.py b/dfetch/util/util.py index dbf5b2d7..0dd74b14 100644 --- a/dfetch/util/util.py +++ b/dfetch/util/util.py @@ -134,13 +134,19 @@ def find_matching_files(directory: str, patterns: Sequence[str]) -> Iterator[Pat yield Path(path) -def safe_rm(path: str | Path) -> None: - """Delete a file or directory safely.""" - if os.path.lexists(path): - if os.path.isdir(path): - safe_rmtree(str(path)) - else: - os.remove(path) +def safe_rm(paths: str | Path | Sequence[str | Path]) -> None: + """Delete a file, directory or list of files/directories safely.""" + paths_to_remove = ( + [paths] if isinstance(paths, str) or not isinstance(paths, Sequence) else paths + ) + for path in paths_to_remove: + if os.path.lexists(path): + if not Path(path).is_relative_to("."): + raise RuntimeError(f"Trying to delete '{path}' outside cwd!") + if os.path.isdir(path): + safe_rmtree(str(path)) + else: + os.remove(path) def safe_rmtree(path: str) -> None: diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index de05c9fb..d107ad5b 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -12,7 +12,7 @@ from dfetch.log import get_logger from dfetch.util.cmdline import SubprocessCommandError, run_on_cmdline -from dfetch.util.util import in_directory, safe_rmtree +from dfetch.util.util import in_directory, safe_rm, safe_rmtree from dfetch.vcs.patch import Patch, PatchType logger = get_logger(__name__) @@ -253,6 +253,26 @@ def is_git(self) -> bool: except (SubprocessCommandError, RuntimeError): return False + def _configure_sparse_checkout( + self, + src: str | None, + keeps: Sequence[str], + ignore: Sequence[str] | None = None, + ) -> None: + run_on_cmdline(logger, ["git", "config", "core.sparsecheckout", "true"]) + + with open(".git/info/sparse-checkout", "a", encoding="utf-8") as f: + patterns = list(keeps or []) + src_pattern = f"/{src or '*'}" + + if src_pattern not in patterns: + patterns.append(src_pattern) + + if ignore: + patterns += self._determine_ignore_paths(src, ignore) + + f.write("\n".join(map(str, patterns)) + "\n") + def checkout_version( # pylint: disable=too-many-arguments self, *, @@ -277,19 +297,11 @@ def checkout_version( # pylint: disable=too-many-arguments run_on_cmdline(logger, ["git", "checkout", "-b", "dfetch-local-branch"]) if src or ignore: - run_on_cmdline(logger, ["git", "config", "core.sparsecheckout", "true"]) - with open( - ".git/info/sparse-checkout", "a", encoding="utf-8" - ) as sparse_checkout_file: - sparse_checkout_file.write( - "\n".join(list((must_keeps or []) + [f"/{src or '*'}"])) - ) - - if ignore: - ignore_abs_paths = self._determine_ignore_paths(src, ignore) - - sparse_checkout_file.write("\n") - sparse_checkout_file.write("\n".join(ignore_abs_paths)) + self._configure_sparse_checkout( + src, + (must_keeps or []) + [f"/{src or '*'}"], + ignore, + ) run_on_cmdline( logger, @@ -318,6 +330,10 @@ def checkout_version( # pylint: disable=too-many-arguments self._move_src_folder_up(remote, src) + if submodules: + for ignore_path in ignore or []: + safe_rm(glob.glob(ignore_path)) + return str(current_sha), submodules @staticmethod diff --git a/example/dfetch.yaml b/example/dfetch.yaml index 4ceb394d..88641d90 100644 --- a/example/dfetch.yaml +++ b/example/dfetch.yaml @@ -64,3 +64,4 @@ manifest: - drivers - doxygen - docs + - framework/tests diff --git a/pyproject.toml b/pyproject.toml index 3b71049b..46b8f35e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -122,6 +122,7 @@ version_scheme = "guess-next-dev" [tool.black] extend-exclude = "sphinxcontrib_asciinema" +target-version = ["py313"] [tool.isort] profile = "black" From 6cac513fb45386f83c6c769e3af28539c2693186 Mon Sep 17 00:00:00 2001 From: Ben Date: Sun, 22 Mar 2026 20:47:37 +0000 Subject: [PATCH 10/15] Fix after rebase --- dfetch/project/archivesubproject.py | 6 +++--- dfetch/util/util.py | 11 +++++++---- dfetch/vcs/git.py | 2 +- features/fetch-file-pattern-git.feature | 2 +- tests/manifest_mock.py | 1 + 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/dfetch/project/archivesubproject.py b/dfetch/project/archivesubproject.py index bd702ebf..bc80346f 100644 --- a/dfetch/project/archivesubproject.py +++ b/dfetch/project/archivesubproject.py @@ -49,7 +49,7 @@ from dfetch.log import get_logger from dfetch.manifest.project import ProjectEntry from dfetch.manifest.version import Version -from dfetch.project.subproject import SubProject +from dfetch.project.subproject import SubProject, VcsDependency from dfetch.vcs.archive import ( ARCHIVE_EXTENSIONS, ArchiveLocalRepo, @@ -166,7 +166,7 @@ def wanted_version(self) -> Version: return Version(revision=self._project_entry.hash) return Version(revision=self.remote) - def _fetch_impl(self, version: Version) -> Version: + def _fetch_impl(self, version: Version) -> tuple[Version, list[VcsDependency]]: """Download and extract the archive to the local destination. 1. Download the archive to a temporary file. @@ -211,7 +211,7 @@ def _fetch_impl(self, version: Version) -> Version: except OSError: pass - return version + return version, [] def freeze_project(self, project: ProjectEntry) -> str | None: """Pin *project* to a cryptographic hash of the archive. diff --git a/dfetch/util/util.py b/dfetch/util/util.py index 0dd74b14..86923bf9 100644 --- a/dfetch/util/util.py +++ b/dfetch/util/util.py @@ -102,7 +102,7 @@ def prune_files_by_pattern(directory: str, patterns: Sequence[str]) -> None: if os.path.lexists(str(file_or_dir)) and not ( file_or_dir.is_file() and is_license_file(file_or_dir.name) ): - safe_rm(file_or_dir) + safe_rm(file_or_dir, within=directory) def _remove_readonly(func: Any, path: str, _: Any) -> None: @@ -134,15 +134,18 @@ def find_matching_files(directory: str, patterns: Sequence[str]) -> Iterator[Pat yield Path(path) -def safe_rm(paths: str | Path | Sequence[str | Path]) -> None: +def safe_rm( + paths: str | Path | Sequence[str | Path], + within: str | Path = ".", +) -> None: """Delete a file, directory or list of files/directories safely.""" + base = Path(within).resolve() paths_to_remove = ( [paths] if isinstance(paths, str) or not isinstance(paths, Sequence) else paths ) for path in paths_to_remove: if os.path.lexists(path): - if not Path(path).is_relative_to("."): - raise RuntimeError(f"Trying to delete '{path}' outside cwd!") + check_no_path_traversal(path, base) if os.path.isdir(path): safe_rmtree(str(path)) else: diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index d107ad5b..85086504 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -366,7 +366,7 @@ def _move_src_folder_up(remote: str, src: str) -> None: remote (str): Name of the root src (str): Src folder to move up """ - matched_paths = glob.glob(src) + matched_paths = sorted(glob.glob(src)) if not matched_paths: logger.warning( diff --git a/features/fetch-file-pattern-git.feature b/features/fetch-file-pattern-git.feature index a78540d9..7be43bae 100644 --- a/features/fetch-file-pattern-git.feature +++ b/features/fetch-file-pattern-git.feature @@ -57,7 +57,7 @@ Feature: Fetch file pattern from git repo Then the output shows """ Dfetch (0.12.1) - The 'src:' filter 'SomeFolder/Some*' matches multiple directories from 'some-remote-server/SomeProjectWithAnInterestingFile.git'. Only considering files in 'SomeFolder/SomeSubFolder'. + The 'src:' filter 'SomeFolder/Some*' matches multiple directories from 'some-remote-server/SomeProjectWithAnInterestingFile.git'. Only considering files in 'SomeFolder/SomeOtherSubFolder'. SomeProjectWithAnInterestingFile: > Fetched v1 """ diff --git a/tests/manifest_mock.py b/tests/manifest_mock.py index f523a270..7df1afea 100644 --- a/tests/manifest_mock.py +++ b/tests/manifest_mock.py @@ -15,6 +15,7 @@ def mock_manifest(projects, path: str = "/some/path") -> MagicMock: mock_project = Mock(spec=ProjectEntry) mock_project.name = project["name"] mock_project.destination = "some_dest" + mock_project.remote = "" project_mocks += [mock_project] mocked_manifest = MagicMock(spec=Manifest, projects=project_mocks, path=path) From 961a55a326788cc27a9a1ffea0f8d3df553309e8 Mon Sep 17 00:00:00 2001 From: Ben Date: Sun, 22 Mar 2026 20:59:43 +0000 Subject: [PATCH 11/15] Only unlink symlinks --- dfetch/util/util.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/dfetch/util/util.py b/dfetch/util/util.py index 86923bf9..7edb8785 100644 --- a/dfetch/util/util.py +++ b/dfetch/util/util.py @@ -145,11 +145,14 @@ def safe_rm( ) for path in paths_to_remove: if os.path.lexists(path): - check_no_path_traversal(path, base) - if os.path.isdir(path): - safe_rmtree(str(path)) + if os.path.islink(path): + os.unlink(path) else: - os.remove(path) + check_no_path_traversal(path, base) + if os.path.isdir(path): + safe_rmtree(str(path)) + else: + os.remove(path) def safe_rmtree(path: str) -> None: From 582caaec90a47a5741361bbefa25552d118d02f7 Mon Sep 17 00:00:00 2001 From: Ben Date: Sun, 22 Mar 2026 21:06:52 +0000 Subject: [PATCH 12/15] Only use safe_rm --- dfetch/util/util.py | 4 ++-- dfetch/vcs/git.py | 6 +++--- features/environment.py | 5 +++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/dfetch/util/util.py b/dfetch/util/util.py index 7edb8785..1afeec0f 100644 --- a/dfetch/util/util.py +++ b/dfetch/util/util.py @@ -150,12 +150,12 @@ def safe_rm( else: check_no_path_traversal(path, base) if os.path.isdir(path): - safe_rmtree(str(path)) + _safe_rmtree(str(path)) else: os.remove(path) -def safe_rmtree(path: str) -> None: +def _safe_rmtree(path: str) -> None: """Delete an entire directory and all its subfolders and files.""" try: shutil.rmtree( # pylint: disable=deprecated-argument diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index 85086504..92cb63d9 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -12,7 +12,7 @@ from dfetch.log import get_logger from dfetch.util.cmdline import SubprocessCommandError, run_on_cmdline -from dfetch.util.util import in_directory, safe_rm, safe_rmtree +from dfetch.util.util import in_directory, safe_rm from dfetch.vcs.patch import Patch, PatchType logger = get_logger(__name__) @@ -226,7 +226,7 @@ def check_version_exists( except SubprocessCommandError as exc: if exc.returncode != 128: raise - safe_rmtree(temp_dir) + safe_rm(temp_dir, within=Path(temp_dir).parent) return exists @@ -394,7 +394,7 @@ def _move_src_folder_up(remote: str, src: str) -> None: try: for file_to_copy in os.listdir(src_dir_path): shutil.move(src_dir_path + "/" + file_to_copy, ".") - safe_rmtree(PurePath(src_dir_path).parts[0]) + safe_rm(PurePath(src_dir_path).parts[0]) except FileNotFoundError: logger.warning( f"The 'src:' filter '{src_dir_path}' didn't match any files from '{remote}'" diff --git a/features/environment.py b/features/environment.py index ea858996..296a93d2 100644 --- a/features/environment.py +++ b/features/environment.py @@ -2,11 +2,12 @@ import os import tempfile +from pathlib import Path from behave import fixture, use_fixture from rich.console import Console -from dfetch.util.util import safe_rmtree +from dfetch.util.util import safe_rm @fixture @@ -22,7 +23,7 @@ def tmpdir(context): yield context.tmpdir # -- CLEANUP-FIXTURE PART: os.chdir(context.orig_cwd) - safe_rmtree(context.tmpdir) + safe_rm(context.tmpdir, within=Path(context.tmpdir).parent) def before_scenario(context, _): From 757a6f8a9c36cd509d7be74ccb2000fb96ac1f20 Mon Sep 17 00:00:00 2001 From: Ben Date: Sun, 22 Mar 2026 21:39:11 +0000 Subject: [PATCH 13/15] Review comments --- dfetch/util/util.py | 48 +++++++++++++++++- dfetch/vcs/git.py | 50 +++++++------------ .../fetch-git-repo-with-submodule.feature | 2 +- tests/test_util.py | 31 +++++++++++- 4 files changed, 97 insertions(+), 34 deletions(-) diff --git a/dfetch/util/util.py b/dfetch/util/util.py index 1afeec0f..01452655 100644 --- a/dfetch/util/util.py +++ b/dfetch/util/util.py @@ -7,7 +7,7 @@ import stat from collections.abc import Generator, Iterator, Sequence from contextlib import contextmanager -from pathlib import Path +from pathlib import Path, PurePath from typing import Any from _hashlib import HASH @@ -146,6 +146,7 @@ def safe_rm( for path in paths_to_remove: if os.path.lexists(path): if os.path.islink(path): + check_no_path_traversal(Path(path).parent, base) os.unlink(path) else: check_no_path_traversal(path, base) @@ -289,6 +290,51 @@ def check_no_path_traversal(path: str | Path, root: str | Path) -> None: raise RuntimeError(f"{str(path)!r} is outside root {str(root)!r}") +def strip_glob_prefix(path: str, pattern: str) -> str: + """Return *path* with its leading glob-pattern prefix stripped. + + When the first ``len(pattern.parts)`` components of *path* match *pattern* + (using :func:`fnmatch.fnmatch`), those components are removed and the + remainder is returned. If *path* does not match, or *path* has no + components beyond the matched prefix, *path* is returned unchanged. + + This is useful after a glob-matched directory has been "promoted" to the + root: it computes where a nested entry ends up relative to the new root. + + Args: + path: The original path whose prefix should be stripped. + pattern: A glob pattern (may contain ``*`` wildcards) whose matched + portion forms the prefix to remove. + + Returns: + The path with the matched prefix stripped, or *path* unchanged when no + match is found. + + Examples:: + + >>> strip_glob_prefix("some_dir_a/ext/lib", "some_dir_*") + 'ext/lib' + >>> strip_glob_prefix("SomeFolder/SomeSubFolder/file.c", "SomeFolder/Some*") + 'file.c' + >>> strip_glob_prefix("pkg/sub/module", "pkg") + 'sub/module' + >>> strip_glob_prefix("unrelated/path", "pkg") + 'unrelated/path' + """ + src_parts = PurePath(pattern).parts + sub_parts = PurePath(path).parts + depth = len(src_parts) + + if len(sub_parts) <= depth: + return path + + candidate = str(PurePath(*sub_parts[:depth])) + if fnmatch.fnmatch(candidate, pattern): + return str(Path(*sub_parts[depth:])) + + return path + + def resolve_absolute_path(path: str | Path) -> Path: """Return a guaranteed absolute Path, resolving symlinks. diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index 92cb63d9..2a6423e1 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -12,7 +12,7 @@ from dfetch.log import get_logger from dfetch.util.cmdline import SubprocessCommandError, run_on_cmdline -from dfetch.util.util import in_directory, safe_rm +from dfetch.util.util import in_directory, safe_rm, strip_glob_prefix from dfetch.vcs.patch import Patch, PatchType logger = get_logger(__name__) @@ -324,39 +324,27 @@ def checkout_version( # pylint: disable=too-many-arguments .strip() ) - if src: - for submodule in submodules: - submodule.path = self._rewrite_path(src, submodule.path) - - self._move_src_folder_up(remote, src) - - if submodules: - for ignore_path in ignore or []: - safe_rm(glob.glob(ignore_path)) + submodules = self._apply_src_and_ignore(remote, src, ignore, submodules) return str(current_sha), submodules - @staticmethod - def _rewrite_path(src: str, existing_path: str) -> str: - """Rewrites existing_path relative to src pattern. - - Handles wildcards (*) and nested directories. - """ - src_path = PurePath(src) - sub_path = PurePath(existing_path) - - if sub_path.match(str(src_path)): - # Count fixed prefix parts (before any wildcard) - prefix_len = 0 - for part in src_path.parts: - if "*" in part: - break - prefix_len += 1 - # Return path relative to fixed prefix - return str(Path(*sub_path.parts[prefix_len:])) - - # Return unchanged if no match - return existing_path + def _apply_src_and_ignore( + self, + remote: str, + src: str | None, + ignore: Sequence[str] | None, + submodules: list[Submodule], + ) -> list[Submodule]: + """Apply src filter and ignore patterns, returning surviving submodules.""" + if src: + for submodule in submodules: + submodule.path = strip_glob_prefix(submodule.path, src) + self._move_src_folder_up(remote, src) + + for ignore_path in ignore or []: + safe_rm(glob.glob(ignore_path)) + + return [s for s in submodules if os.path.exists(s.path)] @staticmethod def _move_src_folder_up(remote: str, src: str) -> None: diff --git a/features/fetch-git-repo-with-submodule.feature b/features/fetch-git-repo-with-submodule.feature index bdaec33e..7978e215 100644 --- a/features/fetch-git-repo-with-submodule.feature +++ b/features/fetch-git-repo-with-submodule.feature @@ -140,7 +140,7 @@ Feature: Fetch projects with nested VCS dependencies """ Dfetch (0.12.1) glob-project: - > Found & fetched submodule "./some_dir_a/ext/test-repo" (some-remote-server/TestRepo.git @ master - 79698c99152e4a4b7b759c9def50a130bc91a2ff) + > Found & fetched submodule "./ext/test-repo" (some-remote-server/TestRepo.git @ master - 79698c99152e4a4b7b759c9def50a130bc91a2ff) > Fetched master - e1fda19a57b873eb8e6ae37780594cbb77b70f1a """ Then 'MyProject' looks like: diff --git a/tests/test_util.py b/tests/test_util.py index 5e7010c3..60073e8a 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -5,7 +5,12 @@ import pytest -from dfetch.util.util import copy_src_subset, hash_directory, prune_files_by_pattern +from dfetch.util.util import ( + copy_src_subset, + hash_directory, + prune_files_by_pattern, + strip_glob_prefix, +) # --------------------------------------------------------------------------- # copy_src_subset – path-traversal protection @@ -155,3 +160,27 @@ def test_prune_skips_already_removed_paths(tmp_path): assert not parent.exists() assert unrelated.exists() + + +# --------------------------------------------------------------------------- +# strip_glob_prefix +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "path, pattern, expected", + [ + # Wildcard prefix stripped + ("some_dir_a/ext/lib", "some_dir_*", "ext/lib"), + # Multi-level pattern with wildcard + ("SomeFolder/SomeSubFolder/file.c", "SomeFolder/Some*", "file.c"), + # Exact (no-wildcard) prefix stripped + ("pkg/sub/module", "pkg", "sub/module"), + # Path does not match pattern — returned unchanged + ("unrelated/path", "pkg", "unrelated/path"), + # Path too shallow to have anything beyond the pattern — returned unchanged + ("some_dir_a", "some_dir_*", "some_dir_a"), + ], +) +def test_strip_glob_prefix(path, pattern, expected): + assert strip_glob_prefix(path, pattern) == expected From 3ac0e9061661f2d445bc39b48c330fb5e501e550 Mon Sep 17 00:00:00 2001 From: Ben Date: Sun, 22 Mar 2026 21:56:51 +0000 Subject: [PATCH 14/15] Extend documentation --- dfetch/commands/update.py | 20 +++++++++ dfetch/reporting/stdout_reporter.py | 70 +++++++++++++++++++++++++++-- doc/internal.rst | 2 - 3 files changed, 87 insertions(+), 5 deletions(-) diff --git a/dfetch/commands/update.py b/dfetch/commands/update.py index 6e44ca35..6b6ec505 100644 --- a/dfetch/commands/update.py +++ b/dfetch/commands/update.py @@ -29,6 +29,26 @@ .. scenario-include:: ../features/updated-project-has-dependencies.feature +Git submodules +~~~~~~~~~~~~~~ + +When a git dependency itself contains git submodules, *Dfetch* fetches and resolves +them automatically, no extra manifest entries or ``git submodule`` commands are needed. + +Each submodule is checked out at the exact revision pinned by the parent repository. +*Dfetch* reports every resolved submodule in the update output:: + + Dfetch (0.12.1) + my-project: + > Found & fetched submodule "./ext/vendor-lib" (https://github.com/example/vendor-lib @ master - 79698c9…) + > Fetched master - e1fda19… + +Nested submodules (submodules of submodules) are resolved recursively. The pinned +details for each submodule are recorded in the ``.dfetch_data.yaml`` metadata file +and are visible in :ref:`Report`. + +.. scenario-include:: ../features/fetch-git-repo-with-submodule.feature + """ import argparse diff --git a/dfetch/reporting/stdout_reporter.py b/dfetch/reporting/stdout_reporter.py index 4c4ac521..87b57215 100644 --- a/dfetch/reporting/stdout_reporter.py +++ b/dfetch/reporting/stdout_reporter.py @@ -1,7 +1,71 @@ -"""*Dfetch* can generate an report on stdout. +"""*Dfetch* can generate a report on stdout. -Depending on the state of the projects it will show as much information -from the manifest or the metadata (``.dfetch_data.yaml``). +The stdout report prints one block per project. Fields are drawn from the +manifest where possible and fall back to the ``.dfetch_data.yaml`` metadata +written by :ref:`Update` when the project has been fetched at least once. + +Output format +~~~~~~~~~~~~~ + +A typical block looks like this: + +.. code-block:: console + + my-project: + - remote : + remote url : https://github.com/example/my-project + branch : main + tag : + last fetch : 01/01/2025, 12:00:00 + revision : e1fda19a… + patch : + licenses : MIT + +The fields are: + +- **remote**: named :ref:`Remotes` entry from the manifest (```` when + the URL is given directly via ``url:``). +- **remote url**: full URL of the upstream repository (derived from ``url:`` + or the ``url-base`` of the :ref:`Remotes` entry). +- **branch** / **tag** / **revision**: version as recorded at fetch time; + see :ref:`Revision/Branch/Tag`. +- **last fetch**: timestamp of the last successful ``dfetch update``. +- **patch**: patch file(s) applied after fetching (```` if unused); + see :ref:`Patch`. +- **licenses**: license(s) auto-detected in the fetched directory. + +If a project has never been fetched the metadata file is absent and only +``last fetch: never`` is shown. + +Dependencies +~~~~~~~~~~~~ + +When a fetched git project contains submodules, *Dfetch* records each one as a +dependency inside the project's ``.dfetch_data.yaml`` metadata file. The +stdout report surfaces these under a ``dependencies`` block: + +.. code-block:: console + + my-project: + - remote : + ... + dependencies : + - path : ext/vendor-lib + url : https://github.com/example/vendor-lib + branch : master + tag : + revision : 79698c99… + source-type : git-submodule + +Each dependency entry contains: + +- **path**: location of the submodule inside the fetched project. +- **url**: upstream URL of the submodule repository. +- **branch** / **tag** / **revision**: version information pinned by the parent. +- **source-type**: origin of the dependency (e.g. ``git-submodule``). + +.. scenario-include:: ../features/fetch-git-repo-with-submodule.feature + :scenario: Submodule changes are reported in the project report """ from dfetch.log import get_logger diff --git a/doc/internal.rst b/doc/internal.rst index 814205f5..19a61ec3 100644 --- a/doc/internal.rst +++ b/doc/internal.rst @@ -31,8 +31,6 @@ Metadata A file created by *DFetch* to store some relevant information about a subproject. -.. _architecture: - Architecture ------------ These diagrams are based on `Simon Brown's C4-model`_. From 0ddf1dd23af1f1f32f933cc3fa23b05b2b3269db Mon Sep 17 00:00:00 2001 From: Ben Date: Sun, 22 Mar 2026 22:16:55 +0000 Subject: [PATCH 15/15] cleanup --- dfetch/project/archivesubproject.py | 5 +++-- dfetch/project/gitsubproject.py | 7 ++++--- dfetch/project/subproject.py | 27 ++------------------------- dfetch/project/svnsubproject.py | 5 +++-- dfetch/vcs/git.py | 20 +++++++++----------- tests/test_subproject.py | 5 +++-- 6 files changed, 24 insertions(+), 45 deletions(-) diff --git a/dfetch/project/archivesubproject.py b/dfetch/project/archivesubproject.py index bc80346f..f941716f 100644 --- a/dfetch/project/archivesubproject.py +++ b/dfetch/project/archivesubproject.py @@ -49,7 +49,8 @@ from dfetch.log import get_logger from dfetch.manifest.project import ProjectEntry from dfetch.manifest.version import Version -from dfetch.project.subproject import SubProject, VcsDependency +from dfetch.project.metadata import Dependency +from dfetch.project.subproject import SubProject from dfetch.vcs.archive import ( ARCHIVE_EXTENSIONS, ArchiveLocalRepo, @@ -166,7 +167,7 @@ def wanted_version(self) -> Version: return Version(revision=self._project_entry.hash) return Version(revision=self.remote) - def _fetch_impl(self, version: Version) -> tuple[Version, list[VcsDependency]]: + def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: """Download and extract the archive to the local destination. 1. Download the archive to a temporary file. diff --git a/dfetch/project/gitsubproject.py b/dfetch/project/gitsubproject.py index a06ff075..757027b4 100644 --- a/dfetch/project/gitsubproject.py +++ b/dfetch/project/gitsubproject.py @@ -6,7 +6,8 @@ from dfetch.log import get_logger from dfetch.manifest.project import ProjectEntry from dfetch.manifest.version import Version -from dfetch.project.subproject import SubProject, VcsDependency +from dfetch.project.metadata import Dependency +from dfetch.project.subproject import SubProject from dfetch.util.util import LICENSE_GLOBS, safe_rm from dfetch.vcs.git import GitLocalRepo, GitRemote, get_git_version @@ -56,7 +57,7 @@ def list_tool_info() -> None: ) SubProject._log_tool("git", "") - def _fetch_impl(self, version: Version) -> tuple[Version, list[VcsDependency]]: + def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: """Get the revision of the remote and place it at the local path.""" rev_or_branch_or_tag = self._determine_what_to_fetch(version) @@ -83,7 +84,7 @@ def _fetch_impl(self, version: Version) -> tuple[Version, list[VcsDependency]]: f" ({submodule.url} @ {Version(tag=submodule.tag, branch=submodule.branch, revision=submodule.sha)})", ) vcs_deps.append( - VcsDependency( + Dependency( remote_url=submodule.url, destination=submodule.path, branch=submodule.branch, diff --git a/dfetch/project/subproject.py b/dfetch/project/subproject.py index c4365649..36ccff3e 100644 --- a/dfetch/project/subproject.py +++ b/dfetch/project/subproject.py @@ -4,7 +4,6 @@ import pathlib from abc import ABC, abstractmethod from collections.abc import Callable, Sequence -from typing import NamedTuple from dfetch.log import get_logger from dfetch.manifest.project import ProjectEntry @@ -18,28 +17,6 @@ logger = get_logger(__name__) -class VcsDependency(NamedTuple): - """Information about a vcs dependency.""" - - destination: str - remote_url: str - branch: str - tag: str - revision: str - source_type: str - - def to_dependency(self) -> Dependency: - """Convert this vcs dependency to a Dependency object.""" - return Dependency( - destination=self.destination, - remote_url=self.remote_url, - branch=self.branch, - tag=self.tag, - revision=self.revision, - source_type=self.source_type, - ) - - class SubProject(ABC): """Abstract SubProject object. @@ -168,7 +145,7 @@ def update( skiplist=[self.__metadata.FILENAME] + post_fetch_ignored, ), patch_=applied_patches, - dependencies=[dependency.to_dependency() for dependency in dependency], + dependencies=list(dependency), ) logger.debug(f"Writing repo metadata to: {self.__metadata.path}") @@ -416,7 +393,7 @@ def _are_there_local_changes(self, files_to_ignore: Sequence[str]) -> bool: ) @abstractmethod - def _fetch_impl(self, version: Version) -> tuple[Version, list[VcsDependency]]: + def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: """Fetch the given version of the subproject, should be implemented by the child class.""" @abstractmethod diff --git a/dfetch/project/svnsubproject.py b/dfetch/project/svnsubproject.py index 8b5e546d..6333856e 100644 --- a/dfetch/project/svnsubproject.py +++ b/dfetch/project/svnsubproject.py @@ -7,7 +7,8 @@ from dfetch.log import get_logger from dfetch.manifest.project import ProjectEntry from dfetch.manifest.version import Version -from dfetch.project.subproject import SubProject, VcsDependency +from dfetch.project.metadata import Dependency +from dfetch.project.subproject import SubProject from dfetch.util.util import ( find_matching_files, find_non_matching_files, @@ -107,7 +108,7 @@ def _remove_ignored_files(self) -> None: if not (file_or_dir.is_file() and is_license_file(file_or_dir.name)): safe_rm(file_or_dir) - def _fetch_impl(self, version: Version) -> tuple[Version, list[VcsDependency]]: + def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: """Get the revision of the remote and place it at the local path.""" branch, branch_path, revision = self._determine_what_to_fetch(version) rev_arg = f"--revision {revision}" if revision else "" diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index 2a6423e1..d803cc62 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -12,7 +12,7 @@ from dfetch.log import get_logger from dfetch.util.cmdline import SubprocessCommandError, run_on_cmdline -from dfetch.util.util import in_directory, safe_rm, strip_glob_prefix +from dfetch.util.util import in_directory, is_license_file, safe_rm, strip_glob_prefix from dfetch.vcs.patch import Patch, PatchType logger = get_logger(__name__) @@ -263,10 +263,7 @@ def _configure_sparse_checkout( with open(".git/info/sparse-checkout", "a", encoding="utf-8") as f: patterns = list(keeps or []) - src_pattern = f"/{src or '*'}" - - if src_pattern not in patterns: - patterns.append(src_pattern) + patterns.append(f"/{src or '*'}") if ignore: patterns += self._determine_ignore_paths(src, ignore) @@ -297,11 +294,7 @@ def checkout_version( # pylint: disable=too-many-arguments run_on_cmdline(logger, ["git", "checkout", "-b", "dfetch-local-branch"]) if src or ignore: - self._configure_sparse_checkout( - src, - (must_keeps or []) + [f"/{src or '*'}"], - ignore, - ) + self._configure_sparse_checkout(src, must_keeps or [], ignore) run_on_cmdline( logger, @@ -342,7 +335,12 @@ def _apply_src_and_ignore( self._move_src_folder_up(remote, src) for ignore_path in ignore or []: - safe_rm(glob.glob(ignore_path)) + paths = [ + p + for p in glob.glob(ignore_path) + if not (os.path.isfile(p) and is_license_file(os.path.basename(p))) + ] + safe_rm(paths, within=".") return [s for s in submodules if os.path.exists(s.path)] diff --git a/tests/test_subproject.py b/tests/test_subproject.py index 55b1f2b4..fcf645d6 100644 --- a/tests/test_subproject.py +++ b/tests/test_subproject.py @@ -10,13 +10,14 @@ from dfetch.manifest.project import ProjectEntry from dfetch.manifest.version import Version -from dfetch.project.subproject import SubProject, VcsDependency +from dfetch.project.metadata import Dependency +from dfetch.project.subproject import SubProject class ConcreteSubProject(SubProject): _wanted_version: Version - def _fetch_impl(self, version: Version) -> tuple[Version, list[VcsDependency]]: + def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: return Version(), [] def _latest_revision_on_branch(self, branch):