Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions doclang/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,17 +78,16 @@ def validate(
)
except ValidationError as exc:
results: dict[str, Any] = {
"file": exc.file,
"xsd": {"valid": exc.xsd_valid, "errors": exc.xsd_errors},
"schematron": {"valid": exc.schematron_valid, "errors": exc.schematron_errors},
"xsd": {"valid": not exc.xsd_errors, "errors": exc.xsd_errors},
"schematron": {"valid": not exc.schematron_errors, "errors": exc.schematron_errors},
}

if not quiet and format == OutputFormat.text:
if not schematron_only:
if verbose:
typer.echo("XSD Validation")
typer.echo(f"Schema: {bundled_xsd}")
if exc.xsd_valid:
if not exc.xsd_errors:
typer.echo("XSD validation passed")
else:
typer.echo("XSD validation failed")
Expand All @@ -98,11 +97,11 @@ def validate(
else:
typer.echo(f" {error.get('error', 'Unknown error')}")

if not xsd_only and (exc.xsd_valid or schematron_only):
if not xsd_only:
if verbose:
typer.echo("Schematron Validation")
typer.echo(f"Schema: {bundled_sch}")
if exc.schematron_valid:
if not exc.schematron_errors:
typer.echo("Schematron validation passed")
else:
typer.echo("Schematron validation failed")
Expand Down Expand Up @@ -135,7 +134,6 @@ def validate(
typer.echo(
json.dumps(
{
"file": str(xml_file),
"xsd": {"valid": True, "errors": []},
"schematron": {"valid": True, "errors": []},
},
Expand Down
14 changes: 8 additions & 6 deletions doclang/schematron_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,12 @@ def _validate_with_schematron(
xml_file: Union[str, Path],
allow_empty_namespace: bool = False,
verbose: bool = False,
) -> tuple[bool, list]:
"""Validate XML against the bundled DocLang Schematron rules."""
) -> list:
"""Validate XML against the bundled DocLang Schematron rules.

Returns:
SVRL failed-assert elements; empty when validation passes.
"""
sch_file = _bundled_sch_path()
if verbose:
print(f"Using Schematron file: {sch_file}")
Expand Down Expand Up @@ -187,12 +191,10 @@ def _validate_with_schematron(
if result:
result_doc = etree.fromstring(result.encode("utf-8"))
failed_asserts = result_doc.findall(".//{http://purl.oclc.org/dsdl/svrl}failed-assert")

is_valid = len(failed_asserts) == 0
return is_valid, failed_asserts
return failed_asserts
else:
# No output means validation passed
return True, []
return []

# Temporary file automatically deleted when exiting context manager

Expand Down
28 changes: 8 additions & 20 deletions doclang/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,30 +28,24 @@ class ValidationError(Exception):

def __init__(
self,
file: str,
*,
xsd_valid: bool,
xsd_errors: list[dict[str, Any]],
schematron_valid: bool,
schematron_errors: list[dict[str, Any]],
) -> None:
self.file = file
self.xsd_valid = xsd_valid
self.xsd_errors = xsd_errors
self.schematron_valid = schematron_valid
self.schematron_errors = schematron_errors
super().__init__(self._format_message())

def _format_message(self) -> str:
lines: list[str] = [f"Validation failed for {self.file}"]
if not self.xsd_valid:
lines: list[str] = []
if self.xsd_errors:
lines.append("XSD validation failed:")
for error in self.xsd_errors:
if "line" in error:
lines.append(f" Line {error['line']}: {error['message']}")
else:
lines.append(f" {error.get('error', 'Unknown error')}")
if not self.schematron_valid:
if self.schematron_errors:
lines.append("Schematron validation failed:")
for error in self.schematron_errors:
if "location" in error:
Expand All @@ -73,32 +67,26 @@ def validate(
Raises :class:`ValidationError` on failure.
"""
path = Path(xml_file)
xsd_valid = True
xsd_errors: list[dict[str, Any]] = []
schematron_valid = True
schematron_errors: list[dict[str, Any]] = []

if not schematron_only:
xsd_valid, xsd_errors = _validate_xsd(path, allow_empty_namespace=allow_empty_namespace)
xsd_errors = _validate_xsd(path, allow_empty_namespace=allow_empty_namespace)

if not xsd_only and (xsd_valid or schematron_only):
if not xsd_only:
try:
schematron_valid, failed_asserts = _validate_with_schematron(
failed_asserts = _validate_with_schematron(
path,
allow_empty_namespace=allow_empty_namespace,
verbose=False,
)
if not schematron_valid:
if failed_asserts:
schematron_errors = _failed_asserts_to_errors(failed_asserts)
except Exception as exc:
schematron_valid = False
schematron_errors = [{"error": str(exc)}]

if not (xsd_valid and schematron_valid):
if xsd_errors or schematron_errors:
raise ValidationError(
str(path),
xsd_valid=xsd_valid,
xsd_errors=xsd_errors,
schematron_valid=schematron_valid,
schematron_errors=schematron_errors,
)
13 changes: 6 additions & 7 deletions doclang/xsd_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

def _validate_xsd_at(
xml_file: Union[str, Path], xsd_file: Union[str, Path], allow_empty_namespace: bool = False
) -> tuple[bool, list[dict[str, Any]]]:
) -> list[dict[str, Any]]:
"""
Validate XML against an XSD schema using lxml (internal).

Expand All @@ -25,7 +25,7 @@ def _validate_xsd_at(
allow_empty_namespace: If True, automatically add DocLang namespace if missing

Returns:
Tuple of (is_valid, errors) where errors is a list of dicts with 'line' and 'message' keys
Validation errors as a list of dicts with 'line' and 'message' keys
"""
try:
with open(xsd_file, "rb") as f:
Expand All @@ -39,14 +39,13 @@ def _validate_xsd_at(
xml_doc = _ensure_namespace(xml_doc)

if schema.validate(xml_doc):
return True, []
errors = [{"line": error.line, "message": error.message} for error in schema.error_log]
return False, errors
return []
return [{"line": error.line, "message": error.message} for error in schema.error_log]

except Exception as e:
return False, [{"error": str(e)}]
return [{"error": str(e)}]


def _validate_xsd(xml_file: Union[str, Path], allow_empty_namespace: bool = False) -> tuple[bool, list[dict[str, Any]]]:
def _validate_xsd(xml_file: Union[str, Path], allow_empty_namespace: bool = False) -> list[dict[str, Any]]:
"""Validate XML against the bundled DocLang XSD schema."""
return _validate_xsd_at(xml_file, _bundled_xsd_path(), allow_empty_namespace=allow_empty_namespace)
13 changes: 13 additions & 0 deletions tests/data/invalid/nok_xsd_and_schematron.dclg.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<doclang xmlns="https://www.doclang.ai/ns/v0">

<!-- XSD: heading@level must be a positive integer -->
<heading level="0">Invalid heading level</heading>

<!-- Schematron: xref references a thread_id that is not defined -->
<text>
<xref thread_id="1"/>
Body text
</text>

</doclang>
17 changes: 14 additions & 3 deletions tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,25 @@ def test_invalid(xml_file):
validate(xml_file, allow_empty_namespace=False)

exc = exc_info.value
assert not (exc.xsd_valid and exc.schematron_valid), f"Expected {xml_file.name} to fail validation, but it passed"
assert exc.xsd_errors or exc.schematron_errors, f"Expected {xml_file.name} to fail validation, but it passed"

if not exc.xsd_valid:
if exc.xsd_errors:
assert len(exc.xsd_errors) > 0, f"Expected XSD validation errors for {xml_file.name}"
if not exc.schematron_valid:
if exc.schematron_errors:
assert len(exc.schematron_errors) > 0, f"Expected Schematron validation errors for {xml_file.name}"


def test_invalid_reports_both_xsd_and_schematron_errors():
"""A document may fail both XSD and Schematron validation in a single run."""
xml_file = INVALID_DIR / "nok_xsd_and_schematron.dclg.xml"
with pytest.raises(ValidationError) as exc_info:
validate(xml_file, allow_empty_namespace=False)

exc = exc_info.value
assert len(exc.xsd_errors) == 1
assert len(exc.schematron_errors) == 1


def test_schema_files_exist():
"""Test that required schema files are bundled with the package."""
assert (SCHEMA_DIR / "doclang.xsd").exists(), f"XSD file not found under {SCHEMA_DIR}"
Expand Down
Loading