Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
6fd7a63
Add feature tests and manifest schema for archive (tar/zip) support (…
claude Mar 20, 2026
c6f8019
Implement archive (tar/zip) VCS type with hash verification (#23)
claude Mar 20, 2026
fe49a30
Add archive support to reporters and SBOM (#23)
claude Mar 20, 2026
93a5632
Security hardening for archive extraction (#23)
claude Mar 20, 2026
46f2cc4
Simplify and clean up archive implementation (#23)
claude Mar 20, 2026
3203c75
Move archive hash into integrity block in manifest (#23)
claude Mar 20, 2026
b693355
Update manifest.rst schema doc for integrity block (#23)
claude Mar 20, 2026
9988a33
Fix review findings in archive/integrity implementation (#23)
claude Mar 20, 2026
ca20833
Apply security and correctness fixes from review (#23)
claude Mar 20, 2026
a34a394
Add unit tests for archive VCS, integrity block, and PURL; update doc…
claude Mar 20, 2026
1b2d30a
Fixes
spoorcc Mar 20, 2026
72aacb7
Cleanup implementation
spoorcc Mar 20, 2026
17cbeff
Fall back to manifest tag/revision/hash when metadata fields are empt…
claude Mar 21, 2026
1ae6367
Set component.group for GitHub and Bitbucket SBOM components (#23)
claude Mar 21, 2026
a748173
Cleanup
spoorcc Mar 20, 2026
328dea9
IntegretyHash class
spoorcc Mar 20, 2026
c3fe25c
Create integrety_hash module
spoorcc Mar 20, 2026
13dec1a
Review comments
spoorcc Mar 21, 2026
7d7f9e7
Fix test
spoorcc Mar 21, 2026
ec10685
Add feature test
spoorcc Mar 21, 2026
0912cc6
Add example to example/dfetch.yaml
spoorcc Mar 21, 2026
dfc5778
Ensure consistent hash
spoorcc Mar 21, 2026
96dfd32
Centralize path traversal check
spoorcc Mar 21, 2026
c7cfd20
Review comments
spoorcc Mar 21, 2026
3ca0cb6
Support all 3 hash algo's
spoorcc Mar 21, 2026
6924bd0
Update changelog
spoorcc Mar 21, 2026
24c512e
Don't changing hashing algorithm
spoorcc Mar 21, 2026
6cdc762
Review comments
spoorcc Mar 22, 2026
6f7191a
don't follow symlinks
spoorcc Mar 22, 2026
4aa1901
Review comments
spoorcc Mar 22, 2026
a2a464c
Fix CodeRabbitAI review comments: security, robustness, and platform …
claude Mar 22, 2026
501e3d9
Review comments
spoorcc Mar 22, 2026
b16855f
Review comments
spoorcc Mar 22, 2026
b6a19bf
add missing dst path
spoorcc Mar 22, 2026
59e59b2
Update demo magic hash
spoorcc Mar 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:

- name: Update latest tag
if: ${{ steps.release_info.outputs.tag == 'latest' }}
uses: EndBug/latest-tag@fabb56bc8d15d5937c76719060da2226f5c3ffa8
uses: EndBug/latest-tag@fabb56bc8d15d5937c76719060da2226f5c3ffa8
with:
ref: latest
description: Last state in main
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
Unreleased
==========

* Add archive (``vcs: archive``) support for fetching dependencies from ``.tar.gz``, ``.tgz``, ``.tar.bz2``, ``.tar.xz`` and ``.zip`` files via HTTP, HTTPS or file URLs (#1058)
* Fix path-traversal check using character-based prefix comparison instead of path-component comparison (#1058)
* Fix directory hash being non-deterministic across filesystem traversal orders, causing false local-change detection (#1058)
* Fix ``dfetch freeze`` not capturing branch information for SVN projects when only the revision matched (#1058)

Release 0.12.1 (released 2026-02-24)
====================================

Expand Down
4 changes: 4 additions & 0 deletions dfetch/commands/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@

.. scenario-include:: ../features/check-svn-repo.feature

.. tab:: Archive

.. scenario-include:: ../features/check-archive.feature

Sub-manifests
~~~~~~~~~~~~~

Expand Down
12 changes: 6 additions & 6 deletions dfetch/commands/format_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,11 @@
from dfetch.project.gitsubproject import GitSubProject
from dfetch.project.subproject import SubProject
from dfetch.project.svnsubproject import SvnSubProject
from dfetch.util.util import catch_runtime_exceptions, in_directory
from dfetch.util.util import (
catch_runtime_exceptions,
check_no_path_traversal,
in_directory,
)
from dfetch.vcs.patch import Patch, PatchAuthor, PatchInfo, PatchType

logger = get_logger(__name__)
Expand Down Expand Up @@ -80,11 +84,7 @@ def __call__(self, args: argparse.Namespace) -> None:

output_dir_path = pathlib.Path(args.output_directory).resolve()

if not output_dir_path.is_relative_to(superproject.root_directory):
raise RuntimeError(
f"Output directory '{output_dir_path}' must be inside"
f" the superproject root '{superproject.root_directory}'"
)
check_no_path_traversal(output_dir_path, superproject.root_directory)

output_dir_path.mkdir(parents=True, exist_ok=True)

Expand Down
41 changes: 25 additions & 16 deletions dfetch/commands/freeze.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@

.. scenario-include:: ../features/freeze-projects.feature

For archive projects, ``dfetch freeze`` adds the hash under the nested
``integrity.hash`` key (e.g. ``integrity.hash: sha256:<hex>``) to pin the
exact archive content used. This value acts as the version identifier:
DFetch verifies the downloaded archive against it on every subsequent
``dfetch update``.

.. scenario-include:: ../features/freeze-archive.feature

"""

import argparse
Expand Down Expand Up @@ -78,24 +86,25 @@ def __call__(self, args: argparse.Namespace) -> None:
with in_directory(superproject.root_directory):
for project in superproject.manifest.projects:
with catch_runtime_exceptions(exceptions) as exceptions:
on_disk_version = dfetch.project.create_sub_project(
project
).on_disk_version()

if project.version == on_disk_version:
logger.print_info_line(
project.name,
f"Already pinned in manifest on version {project.version}",
)
elif on_disk_version:
logger.print_info_line(
project.name, f"Freezing on version {on_disk_version}"
)
project.version = on_disk_version
sub_project = dfetch.project.create_sub_project(project)
on_disk_version = sub_project.on_disk_version()

new_version = sub_project.freeze_project(project)
if new_version is None:
if on_disk_version:
logger.print_info_line(
project.name,
f"Already pinned in manifest on version {on_disk_version}",
)
else:
logger.print_warning_line(
project.name,
"No version on disk, first update with 'dfetch update'",
)
else:
logger.print_warning_line(
logger.print_info_line(
project.name,
"No version on disk, first update with 'dfetch update'",
f"Frozen on version {new_version}",
)

projects.append(project)
Expand Down
25 changes: 19 additions & 6 deletions dfetch/commands/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
from dfetch.manifest.project import ProjectEntry
from dfetch.project import create_super_project
from dfetch.project.metadata import Metadata
from dfetch.project.subproject import SubProject
from dfetch.reporting import REPORTERS, ReportTypes
from dfetch.util.license import License, guess_license_in_file
from dfetch.util.util import is_license_file

logger = get_logger(__name__)

Expand Down Expand Up @@ -89,8 +89,7 @@ def _determine_licenses(project: ProjectEntry) -> list[License]:

license_files = []
with dfetch.util.util.in_directory(project.destination):

for license_file in filter(SubProject.is_license_file, glob.glob("*")):
for license_file in filter(is_license_file, glob.glob("*")):
logger.debug(f"Found license file {license_file} for {project.name}")
guessed_license = guess_license_in_file(license_file)

Expand All @@ -107,10 +106,24 @@ def _determine_licenses(project: ProjectEntry) -> list[License]:

@staticmethod
def _determine_version(project: ProjectEntry) -> str:
"""Determine the fetched version."""
"""Determine the fetched version.

For archive projects the sha256 hash (``sha256:<hex>``) stored in the
metadata *revision* field is used as the version identifier. When no
metadata is present yet, the ``integrity.hash`` field from the manifest
is used as fallback so the SBOM can still be generated before the first
fetch.
"""
try:
metadata = Metadata.from_file(Metadata.from_project_entry(project).path)
version = metadata.tag or metadata.revision or ""
version = (
metadata.tag
or metadata.revision
or project.tag
or project.revision
or project.hash
or ""
)
except FileNotFoundError:
version = project.tag or project.revision or ""
version = project.tag or project.revision or project.hash or ""
return version
24 changes: 19 additions & 5 deletions dfetch/commands/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@

.. scenario-include:: ../features/fetch-svn-repo.feature

.. tab:: Archive

.. scenario-include:: ../features/fetch-archive.feature

Sub-manifests
~~~~~~~~~~~~~~~

Expand All @@ -37,7 +41,11 @@
from dfetch.commands.common import check_sub_manifests
from dfetch.log import get_logger
from dfetch.project import create_super_project
from dfetch.util.util import catch_runtime_exceptions, in_directory
from dfetch.util.util import (
catch_runtime_exceptions,
check_no_path_traversal,
in_directory,
)

logger = get_logger(__name__)

Expand Down Expand Up @@ -85,9 +93,14 @@ def __call__(self, args: argparse.Namespace) -> None:
for project in superproject.manifest.selected_projects(args.projects):
with catch_runtime_exceptions(exceptions) as exceptions:
self._check_destination(project, destinations)
destination = project.destination

def _ignored(dst: str = destination) -> list[str]:
return list(superproject.ignored_files(dst))

dfetch.project.create_sub_project(project).update(
force=args.force,
files_to_ignore=superproject.ignored_files(project.destination),
ignored_files_callback=_ignored,
)

if not args.no_recommendations and os.path.isdir(
Expand Down Expand Up @@ -117,16 +130,17 @@ def _check_path_traversal(
project: dfetch.manifest.project.ProjectEntry, real_path: str, safe_dir: str
) -> None:
"""Check if destination is outside the directory tree."""
if os.path.commonprefix((real_path, safe_dir)) != safe_dir:
# See https://owasp.org/www-community/attacks/Path_Traversal
try:
check_no_path_traversal(real_path, safe_dir)
except RuntimeError:
logger.print_warning_line(
project.name,
f'Skipping, path "{project.destination}" is outside manifest directory tree.',
)
raise RuntimeError(
"Destination must be in the manifests folder or a subfolder. "
f'"{project.destination}" is outside this tree!'
)
) from None

@staticmethod
def _check_dst_not_in_blacklist(
Expand Down
18 changes: 12 additions & 6 deletions dfetch/commands/update_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@
from dfetch.project.gitsuperproject import GitSuperProject
from dfetch.project.metadata import Metadata
from dfetch.project.superproject import NoVcsSuperProject, RevisionRange
from dfetch.util.util import catch_runtime_exceptions, in_directory
from dfetch.util.util import (
catch_runtime_exceptions,
check_no_path_traversal,
in_directory,
)

logger = get_logger(__name__)

Expand Down Expand Up @@ -86,8 +90,10 @@ def __call__(self, args: argparse.Namespace) -> None:
for project in superproject.manifest.selected_projects(args.projects):
with catch_runtime_exceptions(exceptions) as exceptions:
subproject = dfetch.project.create_sub_project(project)
destination = project.destination

files_to_ignore = superproject.ignored_files(project.destination)
def _ignored(dst: str = destination) -> list[str]:
return list(superproject.ignored_files(dst))

# Check if the project has a patch, maybe suggest creating one?
if not subproject.patch:
Expand Down Expand Up @@ -118,7 +124,7 @@ def __call__(self, args: argparse.Namespace) -> None:
# force update to fetched version from metadata without applying patch
subproject.update(
force=True,
files_to_ignore=files_to_ignore,
ignored_files_callback=_ignored,
patch_count=len(subproject.patch) - 1,
)

Expand All @@ -141,7 +147,7 @@ def __call__(self, args: argparse.Namespace) -> None:

# force update again to fetched version from metadata but with applying patch
subproject.update(
force=True, files_to_ignore=files_to_ignore, patch_count=-1
force=True, ignored_files_callback=_ignored, patch_count=-1
)

if exceptions:
Expand All @@ -158,8 +164,8 @@ def _update_patch(
patch_path = pathlib.Path(patch_to_update).resolve()

try:
patch_path.relative_to(root)
except ValueError:
check_no_path_traversal(patch_path, root)
except RuntimeError:
logger.print_warning_line(
project_name,
f'No updating patch "{patch_to_update}" which is outside {root}',
Expand Down
45 changes: 35 additions & 10 deletions dfetch/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
import logging
import os
import sys
import types
from contextlib import nullcontext
from typing import Any, cast

from rich.console import Console
from rich.highlighter import NullHighlighter
from rich.logging import RichHandler
from rich.markup import escape as markup_escape
from rich.status import Status

from dfetch import __version__
Expand Down Expand Up @@ -52,26 +54,30 @@ class DLogger(logging.Logger):

def print_report_line(self, name: str, info: str) -> None:
"""Print a line for a report."""
safe_name = markup_escape(name)
safe_info = markup_escape(info)
self.info(
f" [bold][bright_green]{name:20s}:[/bright_green][blue] {info}[/blue][/bold]"
f" [bold][bright_green]{safe_name:20s}:[/bright_green][blue] {safe_info}[/blue][/bold]"
)

def print_info_line(self, name: str, info: str) -> None:
"""Print a line of info, only printing the project name once."""
if name not in DLogger._printed_projects:
self.info(f" [bold][bright_green]{name}:[/bright_green][/bold]")
safe_name = markup_escape(name)
self.info(f" [bold][bright_green]{safe_name}:[/bright_green][/bold]")
DLogger._printed_projects.add(name)

line = info.replace("\n", "\n ")
line = markup_escape(info).replace("\n", "\n ")
self.info(f" [bold blue]> {line}[/bold blue]")

def print_warning_line(self, name: str, info: str) -> None:
"""Print a warning line: green name, yellow value."""
if name not in DLogger._printed_projects:
self.info(f" [bold][bright_green]{name}:[/bright_green][/bold]")
safe_name = markup_escape(name)
self.info(f" [bold][bright_green]{safe_name}:[/bright_green][/bold]")
DLogger._printed_projects.add(name)

line = info.replace("\n", "\n ")
line = markup_escape(info).replace("\n", "\n ")
self.info(f" [bold bright_yellow]> {line}[/bold bright_yellow]")

def print_title(self) -> None:
Expand All @@ -85,12 +91,14 @@ def print_info_field(self, field_name: str, field: str) -> None:
def warning(self, msg: object, *args: Any, **kwargs: Any) -> None:
"""Log warning."""
super().warning(
f"[bold bright_yellow]{msg}[/bold bright_yellow]", *args, **kwargs
f"[bold bright_yellow]{markup_escape(str(msg))}[/bold bright_yellow]",
*args,
**kwargs,
)

def error(self, msg: object, *args: Any, **kwargs: Any) -> None:
"""Log error."""
super().error(f"[red]{msg}[/red]", *args, **kwargs)
super().error(f"[red]{markup_escape(str(msg))}[/red]", *args, **kwargs)

def status(
self, name: str, message: str, spinner: str = "dots", enabled: bool = True
Expand All @@ -111,11 +119,12 @@ def status(
return nullcontext(None)

if name not in DLogger._printed_projects:
self.info(f" [bold][bright_green]{name}:[/bright_green][/bold]")
safe_name = markup_escape(name)
self.info(f" [bold][bright_green]{safe_name}:[/bright_green][/bold]")
DLogger._printed_projects.add(name)

return Status(
f"[bold bright_blue]> {message}[/bold bright_blue]",
f"[bold bright_blue]> {markup_escape(message)}[/bold bright_blue]",
spinner=spinner,
console=rich_console,
)
Expand All @@ -138,8 +147,9 @@ def filter(self, record: logging.LogRecord) -> bool:
"""Add indentation to the log record message."""
color = "blue" if record.levelno < logging.WARNING else "yellow"

line = record.msg.replace("\n", "\n ")
line = markup_escape(record.getMessage()).replace("\n", "\n ")
record.msg = f"{self.prefix}[{color}]{line}[/{color}]"
record.args = ()
return True


Expand Down Expand Up @@ -186,7 +196,22 @@ def get_logger(name: str, console: Console | None = None) -> DLogger:
def configure_external_logger(name: str, level: int = logging.INFO) -> None:
"""Configure an external logger from a third party package."""
logger = logging.getLogger(name)
# Ensure the external logger is a plain Logger so its log methods do not
# wrap messages in Rich markup (which DLogger.warning / DLogger.error do).
# Without this, markup_escape in ExtLogFilter would turn those Rich tags
# into literal text that shifts tab-stop calculations when rendered.
logger.__class__ = logging.Logger
logger.setLevel(level)
logger.propagate = True
logger.handlers.clear()
logger.addFilter(ExtLogFilter())
# Some packages (e.g. patch_ng) cache logger bound-methods as module-level
# names at import time (e.g. `warning = logger.warning`). After the
# __class__ reassignment above those cached references still point at the
# old DLogger method, so re-bind them to the freshly demoted logger.
module = sys.modules.get(name.split(".")[0])
if module is not None:
for method_name in ("debug", "info", "warning", "error", "critical"):
attr = getattr(module, method_name, None)
if isinstance(attr, types.MethodType) and attr.__self__ is logger:
setattr(module, method_name, getattr(logger, method_name))
Loading
Loading