From 22fccf37626a444f05fc1be0c2090f49013928d8 Mon Sep 17 00:00:00 2001 From: Ben Spoor <37540691+ben-edna@users.noreply.github.com> Date: Tue, 6 Jan 2026 17:13:31 +0000 Subject: [PATCH] Switch from pykwalify to StrictYAML --- CHANGELOG.rst | 2 + dfetch/manifest/schema.py | 42 ++++++++++++ dfetch/manifest/validate.py | 68 ++++++++++++++----- dfetch/resources/__init__.py | 6 +- dfetch/resources/schema.yaml | 39 ----------- .../updated-project-has-dependencies.feature | 6 +- features/validate-manifest.feature | 27 +++++++- pyproject.toml | 2 +- tests/test_resources.py | 18 ++--- 9 files changed, 138 insertions(+), 72 deletions(-) create mode 100644 dfetch/manifest/schema.py delete mode 100644 dfetch/resources/schema.yaml diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e79d95ee..4799d6ae 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,8 @@ Release 0.12.0 (unreleased) ==================================== * Internal refactoring: introduce superproject & subproject (#896) +* Switch from pykwalify to StrictYAML (#922) +* Show line number when manifest validation fails (#36) Release 0.11.0 (released 2026-01-03) ==================================== diff --git a/dfetch/manifest/schema.py b/dfetch/manifest/schema.py new file mode 100644 index 00000000..bb5cc9a6 --- /dev/null +++ b/dfetch/manifest/schema.py @@ -0,0 +1,42 @@ +"""StrictYAML schema for the manifest.""" + +from strictyaml import Bool, Enum, Float, Int, Map, Optional, Seq, Str + +NUMBER = Int() | Float() + +REMOTE_SCHEMA = Map( + { + "name": Str(), + "url-base": Str(), + Optional("default"): Bool(), + } +) + +PROJECT_SCHEMA = Map( + { + "name": Str(), + Optional("dst"): Str(), + Optional("branch"): Str(), + Optional("tag"): Str(), + Optional("revision"): Str(), + Optional("url"): Str(), + Optional("repo-path"): Str(), + Optional("remote"): Str(), + Optional("patch"): Str(), + Optional("vcs"): Enum(["git", "svn"]), + Optional("src"): Str(), + Optional("ignore"): Seq(Str()), + } +) + +MANIFEST_SCHEMA = Map( + { + "manifest": Map( + { + "version": NUMBER, + Optional("remotes"): Seq(REMOTE_SCHEMA), + "projects": Seq(PROJECT_SCHEMA), + } + ) + } +) diff --git a/dfetch/manifest/validate.py b/dfetch/manifest/validate.py index d22ce60a..7076031c 100644 --- a/dfetch/manifest/validate.py +++ b/dfetch/manifest/validate.py @@ -1,27 +1,61 @@ -"""Validate manifests.""" +"""Validate manifests using StrictYAML.""" -import logging +import pathlib +from collections.abc import Mapping +from typing import Any, cast -import pykwalify -from pykwalify.core import Core, SchemaError -from yaml.scanner import ScannerError +from strictyaml import StrictYAMLError, YAMLValidationError, load -import dfetch.resources +from dfetch.manifest.schema import MANIFEST_SCHEMA -def validate(path: str) -> None: - """Validate the given manifest.""" - logging.getLogger(pykwalify.__name__).setLevel(logging.CRITICAL) +def _ensure_unique(seq: list[dict[str, Any]], key: str, context: str) -> None: + """Ensure values for `key` are unique within a sequence of dicts.""" + values = [item.get(key) for item in seq if key in item] + seen: set[Any] = set() + dups: set[Any] = set() + for val in values: + if val in seen: + dups.add(val) + else: + seen.add(val) + + if dups: + dup_list = ", ".join(sorted(map(str, dups))) + raise RuntimeError( + f"Schema validation failed:\nDuplicate {context}.{key} value(s): {dup_list}" + ) + - with dfetch.resources.schema_path() as schema_path: - try: - validator = Core(source_file=path, schema_files=[str(schema_path)]) - except ScannerError as err: - raise RuntimeError(f"{schema_path} is not a valid YAML file!") from err +def validate(path: str) -> None: + """Validate the given manifest file against the StrictYAML schema. + Raises: + RuntimeError: if the file is not valid YAML or violates the schema/uniqueness constraints. + """ try: - validator.validate(raise_exception=True) - except SchemaError as err: + + loaded_manifest = load( + pathlib.Path(path).read_text(encoding="UTF-8"), schema=MANIFEST_SCHEMA + ) + except (YAMLValidationError, StrictYAMLError) as err: raise RuntimeError( - str(err.msg) # pyright: ignore[reportAttributeAccessIssue, reportCallIssue] + "\n".join( + [ + "Schema validation failed:", + "", + err.context_mark.get_snippet(), + "", + err.problem, + ] + ) ) from err + + data: dict[str, Any] = cast(dict[str, Any], loaded_manifest.data) + manifest: Mapping[str, Any] = data["manifest"] # required + projects: list[dict[str, Any]] = manifest["projects"] # required + remotes: list[dict[str, Any]] = manifest.get("remotes", []) or [] # optional + + _ensure_unique(remotes, "name", "manifest.remotes") + _ensure_unique(projects, "name", "manifest.projects") + _ensure_unique(projects, "dst", "manifest.projects") diff --git a/dfetch/resources/__init__.py b/dfetch/resources/__init__.py index bc2f611f..ed1298ff 100644 --- a/dfetch/resources/__init__.py +++ b/dfetch/resources/__init__.py @@ -17,9 +17,9 @@ def _resource_path(filename: str) -> ContextManager[Path]: ) -def schema_path() -> ContextManager[Path]: - """Get path to schema.""" - return _resource_path("schema.yaml") +def template_path() -> ContextManager[Path]: + """Get path to template.""" + return _resource_path("template.yaml") TEMPLATE_PATH = _resource_path("template.yaml") diff --git a/dfetch/resources/schema.yaml b/dfetch/resources/schema.yaml deleted file mode 100644 index 16142203..00000000 --- a/dfetch/resources/schema.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# Schema file (schema.yaml) -type: map -mapping: - "manifest": - type: map - required: True - mapping: - "version": { type: number, required: True} - "remotes": - required: False - type: seq - sequence: - - type: map - mapping: - "name": { type: str, required: True, unique: True} - "url-base": { type: str, required: True} - "default": { type: bool } - "projects": - required: True - type: seq - sequence: - - type: map - mapping: - "name": { type: str, required: True, unique: True} - "dst": { type: str, unique: True } - "branch": { type: str } - "tag": { type: str } - "revision": { type: str } - "url": { type: str } - "repo-path": { type: str } - "remote": { type: str } - "patch": { type: str } - "vcs": { type: str, enum: ['git', 'svn'] } - "src": { type: str } - "ignore": - required: False - type: seq - sequence: - - type: str diff --git a/features/updated-project-has-dependencies.feature b/features/updated-project-has-dependencies.feature index 3bc75536..77e7c456 100644 --- a/features/updated-project-has-dependencies.feature +++ b/features/updated-project-has-dependencies.feature @@ -90,7 +90,11 @@ Feature: Updated project has dependencies Dfetch (0.11.0) SomeProject : Fetched v1 SomeProject/dfetch.yaml: Schema validation failed: - - Value 'very-invalid-manifest' is not a dict. Value path: ''. + + "very-invalid-manifest\n" + ^ (line: 1) + + found arbitrary text """ And 'MyProject' looks like: """ diff --git a/features/validate-manifest.feature b/features/validate-manifest.feature index 3f5bfc46..62371453 100644 --- a/features/validate-manifest.feature +++ b/features/validate-manifest.feature @@ -44,6 +44,29 @@ Feature: Validate a manifest """ Dfetch (0.11.0) Schema validation failed: - - Cannot find required key 'manifest'. Path: ''. - - Key 'manifest-wrong' was not defined. Path: ''. + + manifest-wrong: + ^ (line: 1) + + unexpected key not in schema 'manifest-wrong' + """ + + Scenario: A manifest with duplicate project names + Given the manifest 'dfetch.yaml' + """ + manifest: + version: '0.0' + remotes: + - name: github-com-dfetch-org + url-base: https://github.com/dfetch-org/test-repo + projects: + - name: ext/test-repo-rev-only + - name: ext/test-repo-rev-only + """ + When I run "dfetch validate" + Then the output shows + """ + Dfetch (0.11.0) + Schema validation failed: + Duplicate manifest.projects.name value(s): ext/test-repo-rev-only """ diff --git a/pyproject.toml b/pyproject.toml index d0c3da20..9ac6def5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ classifiers = [ dependencies = [ "PyYAML==6.0.3", "coloredlogs==15.0.1", - "pykwalify==1.8.0", + "strictyaml==1.7.3", "halo==0.0.31", "colorama==0.4.6", "typing-extensions==4.15.0", diff --git a/tests/test_resources.py b/tests/test_resources.py index cba2068c..dcb4bbad 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -8,18 +8,18 @@ import dfetch.resources -def test_schema_path() -> None: - """Test that schema path can be used as context manager.""" +def test_template_path() -> None: + """Test that template path can be used as context manager.""" - with dfetch.resources.schema_path() as schema_path: - assert os.path.isfile(schema_path) + with dfetch.resources.template_path() as template_path: + assert os.path.isfile(template_path) -def test_call_schema_path_twice() -> None: +def test_call_template_path_twice() -> None: """Had a lot of problems with calling contextmanager twice.""" - with dfetch.resources.schema_path() as schema_path: - assert os.path.isfile(schema_path) + with dfetch.resources.template_path() as template_path: + assert os.path.isfile(template_path) - with dfetch.resources.schema_path() as schema_path: - assert os.path.isfile(schema_path) + with dfetch.resources.template_path() as template_path: + assert os.path.isfile(template_path)