From 71add81138fcf8daad1122141e6f7c98051289ea Mon Sep 17 00:00:00 2001 From: Amit Singh Date: Sat, 2 May 2026 19:35:40 +0530 Subject: [PATCH 1/2] chore: adds new sources Signed-off-by: Amit Singh --- sources/al-jazeera.yaml | 4 ++++ sources/bbc.yaml | 4 ++++ sources/ht.yaml | 4 ++++ sources/scmp.yaml | 4 ++++ 4 files changed, 16 insertions(+) create mode 100644 sources/al-jazeera.yaml create mode 100644 sources/bbc.yaml create mode 100644 sources/ht.yaml create mode 100644 sources/scmp.yaml diff --git a/sources/al-jazeera.yaml b/sources/al-jazeera.yaml new file mode 100644 index 0000000..ab59a83 --- /dev/null +++ b/sources/al-jazeera.yaml @@ -0,0 +1,4 @@ +name: "Al Jazeera" +summary: "Al Jazeera is an international news organization based in Doha, delivering independent global news coverage, documentaries, and deep analysis with a strong focus on the Middle East and the Global South." +tags: "qatari" +uri: "https://www.aljazeera.com" \ No newline at end of file diff --git a/sources/bbc.yaml b/sources/bbc.yaml new file mode 100644 index 0000000..7e4d200 --- /dev/null +++ b/sources/bbc.yaml @@ -0,0 +1,4 @@ +name: "BBC" +summary: "The BBC is a British public service broadcaster providing impartial news, in-depth analysis, and comprehensive coverage of global events to audiences in the UK and worldwide." +tags: "british" +uri: "https://www.bbc.com" \ No newline at end of file diff --git a/sources/ht.yaml b/sources/ht.yaml new file mode 100644 index 0000000..682a7e0 --- /dev/null +++ b/sources/ht.yaml @@ -0,0 +1,4 @@ +name: "Hindustan Times" +summary: "Hindustan Times is one of India's largest English-language daily newspapers, offering extensive reporting, analysis, and opinion on national, political, and international affairs." +tags: "indian" +uri: "https://www.hindustantimes.com" \ No newline at end of file diff --git a/sources/scmp.yaml b/sources/scmp.yaml new file mode 100644 index 0000000..48294e0 --- /dev/null +++ b/sources/scmp.yaml @@ -0,0 +1,4 @@ +name: "South China Morning Post" +summary: "The South China Morning Post is a leading English-language newspaper based in Hong Kong, known for its authoritative reporting and insights on Hong Kong, mainland China, and the broader Asian region." +tags: "hongkong" +uri: "https://www.scmp.com" \ No newline at end of file From 7fd0fe5ca544ec1787efa5b5e49c5ef6be862dbb Mon Sep 17 00:00:00 2001 From: Amit Singh Date: Sat, 2 May 2026 19:56:06 +0530 Subject: [PATCH 2/2] fix: updates validator to run custom validation checks Signed-off-by: Amit Singh --- scripts/validate.py | 85 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 78 insertions(+), 7 deletions(-) mode change 100644 => 100755 scripts/validate.py diff --git a/scripts/validate.py b/scripts/validate.py old mode 100644 new mode 100755 index c67ed3f..dfe61d3 --- a/scripts/validate.py +++ b/scripts/validate.py @@ -15,8 +15,8 @@ sys.path.insert(0, os.path.dirname(__file__)) from common import load_oapi, load_doc +# Use jsonschema's Draft202012Validator for validation from jsonschema import Draft202012Validator -from referencing import Registry, Resource # folder -> schema $ref in oapi.yaml SCHEMA_MAP = { @@ -34,12 +34,78 @@ def resolve_schema(spec: dict, ref: str) -> dict: return current def validate(data: dict, schema: dict, spec: dict) -> list[str]: - registry = Registry().with_resource( - "oapi", - Resource.from_contents(spec, DRAFT202012) # explicit spec - ) - validator = Draft202012Validator(schema, registry=registry) - return [f"{e.json_path}: {e.message}" for e in validator.iter_errors(data)] + # Use jsonschema validator directly. The referencing-based registry + # approach was causing incompatibilities in some environments, so + # stick to the standard validator here and produce readable paths. + validator = Draft202012Validator(schema) + errors: list[str] = [] + for e in validator.iter_errors(data): + # Build a JSON-path-like representation from the error path + if hasattr(e, 'path') and e.path: + path = '/'.join(str(p) for p in e.path) + else: + path = '' + errors.append(f"{path}: {e.message}") + return errors + + +def _parse_validate_rules(s: str) -> list[str]: + if not s: + return [] + if isinstance(s, str): + return [p.strip() for p in s.split(',') if p.strip()] + if isinstance(s, (list, tuple)): + return list(s) + return [] + + +def run_extra_validations(data: dict, schema: dict) -> list[str]: + """Run x-oapi-codegen-extra-tags validators declared in the schema. + + Supported validators: nonempty, nospace, httpsurl + """ + errors: list[str] = [] + if not isinstance(data, dict): + return errors + + props = schema.get('properties') or {} + for name, prop_schema in props.items(): + extra = prop_schema.get('x-oapi-codegen-extra-tags') or {} + validate_spec = extra.get('validate') if isinstance(extra, dict) else None + rules = _parse_validate_rules(validate_spec) # type: ignore + if not rules: + continue + + # Skip missing fields; JSON Schema required/nullable rules will cover requiredness + if name not in data: + continue + + val = data.get(name) + + for rule in rules: + if rule == 'nonempty': + if val is None: + errors.append(f"{name}: must not be empty") + elif isinstance(val, str) and len(val.strip()) == 0: + errors.append(f"{name}: must not be empty") + elif isinstance(val, (list, dict)) and len(val) == 0: + errors.append(f"{name}: must not be empty") + + elif rule == 'nospace': + if val is None: + continue + if not isinstance(val, str): + errors.append(f"{name}: nospace rule applies to string values") + elif ' ' in val: + errors.append(f"{name}: must not contain spaces") + + elif rule == 'httpsurl': + if not isinstance(val, str) or not val.startswith('https://'): + errors.append(f"{name}: must be an https URL") + + # Unknown rules are ignored for now + + return errors def scan_tracked_files() -> list[str]: files = [] @@ -103,6 +169,11 @@ def main() -> int: continue errors = validate(data, schema, spec) + # Run extra validators defined via x-oapi-codegen-extra-tags + extra_errors = run_extra_validations(data, schema) + if extra_errors: + errors.extend(extra_errors) + if errors: _error("%s: %d validation error(s)", f, len(errors)) for e in errors: