Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 78 additions & 7 deletions scripts/validate.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
sys.path.insert(0, os.path.dirname(__file__))
from common import load_oapi, load_doc

# Use jsonschema's Draft202012Validator for validation
from jsonschema import Draft202012Validator
from referencing import Registry, Resource

# folder -> schema $ref in oapi.yaml
SCHEMA_MAP = {
Expand All @@ -34,12 +34,78 @@ def resolve_schema(spec: dict, ref: str) -> dict:
return current

def validate(data: dict, schema: dict, spec: dict) -> list[str]:
registry = Registry().with_resource(
"oapi",
Resource.from_contents(spec, DRAFT202012) # explicit spec
)
validator = Draft202012Validator(schema, registry=registry)
return [f"{e.json_path}: {e.message}" for e in validator.iter_errors(data)]
# Use jsonschema validator directly. The referencing-based registry
# approach was causing incompatibilities in some environments, so
# stick to the standard validator here and produce readable paths.
validator = Draft202012Validator(schema)
errors: list[str] = []
for e in validator.iter_errors(data):
# Build a JSON-path-like representation from the error path
if hasattr(e, 'path') and e.path:
path = '/'.join(str(p) for p in e.path)
else:
path = '<root>'
errors.append(f"{path}: {e.message}")
return errors


def _parse_validate_rules(s: str) -> list[str]:
if not s:
return []
if isinstance(s, str):
return [p.strip() for p in s.split(',') if p.strip()]
if isinstance(s, (list, tuple)):
return list(s)
return []


def run_extra_validations(data: dict, schema: dict) -> list[str]:
"""Run x-oapi-codegen-extra-tags validators declared in the schema.

Supported validators: nonempty, nospace, httpsurl
"""
errors: list[str] = []
if not isinstance(data, dict):
return errors

props = schema.get('properties') or {}
for name, prop_schema in props.items():
extra = prop_schema.get('x-oapi-codegen-extra-tags') or {}
validate_spec = extra.get('validate') if isinstance(extra, dict) else None
rules = _parse_validate_rules(validate_spec) # type: ignore
if not rules:
continue

# Skip missing fields; JSON Schema required/nullable rules will cover requiredness
if name not in data:
continue

val = data.get(name)

for rule in rules:
if rule == 'nonempty':
if val is None:
errors.append(f"{name}: must not be empty")
elif isinstance(val, str) and len(val.strip()) == 0:
errors.append(f"{name}: must not be empty")
elif isinstance(val, (list, dict)) and len(val) == 0:
errors.append(f"{name}: must not be empty")

elif rule == 'nospace':
if val is None:
continue
if not isinstance(val, str):
errors.append(f"{name}: nospace rule applies to string values")
elif ' ' in val:
errors.append(f"{name}: must not contain spaces")

elif rule == 'httpsurl':
if not isinstance(val, str) or not val.startswith('https://'):
Comment thread
semmet95 marked this conversation as resolved.
errors.append(f"{name}: must be an https URL")

# Unknown rules are ignored for now

return errors

def scan_tracked_files() -> list[str]:
files = []
Expand Down Expand Up @@ -103,6 +169,11 @@ def main() -> int:
continue

errors = validate(data, schema, spec)
# Run extra validators defined via x-oapi-codegen-extra-tags
extra_errors = run_extra_validations(data, schema)
if extra_errors:
errors.extend(extra_errors)

if errors:
_error("%s: %d validation error(s)", f, len(errors))
for e in errors:
Expand Down
4 changes: 4 additions & 0 deletions sources/al-jazeera.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
name: "Al Jazeera"
summary: "Al Jazeera is an international news organization based in Doha, delivering independent global news coverage, documentaries, and deep analysis with a strong focus on the Middle East and the Global South."
tags: "qatari"
uri: "https://www.aljazeera.com"
4 changes: 4 additions & 0 deletions sources/bbc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
name: "BBC"
summary: "The BBC is a British public service broadcaster providing impartial news, in-depth analysis, and comprehensive coverage of global events to audiences in the UK and worldwide."
tags: "british"
uri: "https://www.bbc.com"
4 changes: 4 additions & 0 deletions sources/ht.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
name: "Hindustan Times"
summary: "Hindustan Times is one of India's largest English-language daily newspapers, offering extensive reporting, analysis, and opinion on national, political, and international affairs."
tags: "indian"
uri: "https://www.hindustantimes.com"
4 changes: 4 additions & 0 deletions sources/scmp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
name: "South China Morning Post"
summary: "The South China Morning Post is a leading English-language newspaper based in Hong Kong, known for its authoritative reporting and insights on Hong Kong, mainland China, and the broader Asian region."
tags: "hongkong"
uri: "https://www.scmp.com"
Loading