diff --git a/Makefile b/Makefile index c05a8ff..5302bff 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ -.PHONY: validate validate-control-plane-examples validate-nlboot-examples +.PHONY: validate validate-control-plane-examples validate-nlboot-examples validate-lattice-data-governai-examples -validate: validate-control-plane-examples validate-nlboot-examples +validate: validate-control-plane-examples validate-nlboot-examples validate-lattice-data-governai-examples @echo "OK: validate" validate-control-plane-examples: @@ -10,3 +10,7 @@ validate-control-plane-examples: validate-nlboot-examples: python3 -m pip install --user jsonschema >/dev/null python3 tools/validate_nlboot_examples.py + +validate-lattice-data-governai-examples: + python3 -m pip install --user jsonschema >/dev/null + python3 tools/validate_lattice_data_governai_examples.py diff --git a/examples/annotation_set.json b/examples/annotation_set.json new file mode 100644 index 0000000..1b37040 --- /dev/null +++ b/examples/annotation_set.json @@ -0,0 +1,40 @@ +{ + "id": "urn:srcos:annotation-set:community_truth_demo_labels", + "type": "AnnotationSet", + "specVersion": "2.0.0", + "name": "Community Truth Demo Labels", + "description": "Synthetic factuality and fallacy labels for a governed training/evaluation path.", + "subjectRefs": ["urn:srcos:data-product:community_truth_demo"], + "labelingProjectRef": "urn:srcos:labeling-project:community_truth_demo", + "annotations": [ + { + "annotationId": "ann-001", + "targetRef": "urn:srcos:content:demo-article-001", + "label": "factuality", + "value": "supported", + "span": null, + "sourceRef": "urn:srcos:user:demo-reviewer", + "confidence": 0.88, + "createdAt": "2026-05-01T19:00:00Z" + }, + { + "annotationId": "ann-002", + "targetRef": "urn:srcos:content:demo-article-001", + "label": "fallacy", + "value": "none-observed", + "span": { "start": 0, "end": 120 }, + "sourceRef": "urn:srcos:user:demo-reviewer", + "confidence": 0.73, + "createdAt": "2026-05-01T19:01:00Z" + } + ], + "governance": { + "trainingAllowed": true, + "evaluationAllowed": true, + "policyRef": "urn:srcos:policy:annotation-training-demo", + "licensePolicyRef": "urn:srcos:license-policy:demo-open-review", + "annotationReliabilityScore": 0.81 + }, + "derivedDatasetRefs": ["urn:srcos:dataset:community_truth_demo_training"], + "evidenceRef": "urn:srcos:evidence:community_truth_demo_annotations" +} \ No newline at end of file diff --git a/examples/data_contract.json b/examples/data_contract.json new file mode 100644 index 0000000..7a62060 --- /dev/null +++ b/examples/data_contract.json @@ -0,0 +1,30 @@ +{ + "id": "urn:srcos:data-contract:community_truth_demo", + "type": "DataContract", + "specVersion": "2.0.0", + "name": "Community Truth Demo Contract", + "description": "Contract for demo factuality and annotation-derived data product.", + "ownerRef": "urn:srcos:community:lattice-demo", + "schemaRefs": ["urn:srcos:schema:community_truth_demo_v1"], + "semanticRefs": ["urn:srcos:glossary:factuality", "urn:srcos:glossary:information-density"], + "allowedUses": { + "query": true, + "export": false, + "training": true, + "publication": true, + "notes": "Training use allowed only for governed demo models." + }, + "sensitivity": "internal", + "freshness": { + "expectedInterval": "P1D", + "staleAfter": "P7D" + }, + "qualityRef": "urn:srcos:quality-profile:community_truth_demo", + "retention": { + "policyRef": "urn:srcos:policy:demo-retention", + "minimumDays": 30, + "maximumDays": 365 + }, + "policyRef": "urn:srcos:policy:lattice-data-contract-demo", + "evidenceRefs": ["urn:srcos:evidence:community_truth_demo_contract"] +} \ No newline at end of file diff --git a/examples/data_product.json b/examples/data_product.json new file mode 100644 index 0000000..67f4952 --- /dev/null +++ b/examples/data_product.json @@ -0,0 +1,30 @@ +{ + "id": "urn:srcos:data-product:community_truth_demo", + "type": "DataProduct", + "specVersion": "2.0.0", + "name": "Community Truth Demo Data Product", + "description": "Demo data product for Lattice Studio/Data/GovernAI vertical slice.", + "ownerRef": "urn:srcos:community:lattice-demo", + "stewardRefs": ["urn:srcos:user:demo-steward"], + "resourceRefs": ["urn:srcos:asset:community_truth_demo_object_prefix"], + "datasetRefs": ["urn:srcos:dataset:health_obs"], + "contractRef": "urn:srcos:data-contract:community_truth_demo", + "qualityProfileRef": "urn:srcos:quality-profile:community_truth_demo", + "policyRef": "urn:srcos:policy:lattice-data-product-demo", + "licensePolicyRef": "urn:srcos:license-policy:demo-open-review", + "lineageRefs": ["urn:srcos:prov:community_truth_demo_ingest"], + "trust": { + "datasetTrustScore": 0.82, + "provenanceDepth": 3, + "reproducibilityScore": 0.77, + "scoreEvidenceRefs": ["urn:srcos:evidence:community_truth_demo_trust"] + }, + "lifecycle": { + "state": "candidate", + "stateSince": "2026-05-01T19:00:00Z", + "deprecationNoticeRef": null + }, + "consumerImpactRefs": [], + "evidenceRef": "urn:srcos:evidence:community_truth_demo_data_product", + "links": [] +} \ No newline at end of file diff --git a/examples/evaluation_bundle.json b/examples/evaluation_bundle.json new file mode 100644 index 0000000..42d3708 --- /dev/null +++ b/examples/evaluation_bundle.json @@ -0,0 +1,34 @@ +{ + "id": "urn:srcos:evaluation-bundle:community_truth_demo_model_eval", + "type": "EvaluationBundle", + "specVersion": "2.0.0", + "subjectRef": "urn:srcos:model:community_truth_demo_candidate", + "evaluationKind": "model", + "inputRefs": [ + "urn:srcos:data-product:community_truth_demo", + "urn:srcos:dataset:community_truth_demo_evaluation" + ], + "runtimeRef": "urn:srcos:runtime:lattice-python-ml-demo", + "metrics": [ + { + "name": "factuality_f1", + "value": 0.84, + "status": "pass", + "threshold": 0.8, + "evidenceRef": "urn:srcos:evidence:eval-factuality-f1" + }, + { + "name": "grounding_precision", + "value": 0.78, + "status": "warn", + "threshold": 0.8, + "evidenceRef": "urn:srcos:evidence:eval-grounding-precision" + } + ], + "verdict": "needs-review", + "riskTier": "medium", + "evaluatedAt": "2026-05-01T19:05:00Z", + "evaluatorRef": "urn:srcos:agent:lattice-eval-demo", + "policyRef": "urn:srcos:policy:model-eval-demo", + "evidenceRefs": ["urn:srcos:evidence:community_truth_demo_model_eval"] +} \ No newline at end of file diff --git a/examples/factsheet.json b/examples/factsheet.json new file mode 100644 index 0000000..1e195c3 --- /dev/null +++ b/examples/factsheet.json @@ -0,0 +1,33 @@ +{ + "id": "urn:srcos:factsheet:community_truth_demo_model", + "type": "Factsheet", + "specVersion": "2.0.0", + "subjectRef": "urn:srcos:model:community_truth_demo_candidate", + "factsheetKind": "model", + "summary": { + "name": "Community Truth Demo Model", + "purpose": "Classify factuality and annotation quality for the Lattice demo path.", + "ownerRef": "urn:srcos:community:lattice-demo", + "limitations": [ + "Synthetic demo fixture only", + "Not approved for production use" + ] + }, + "lineageRefs": [ + "urn:srcos:data-product:community_truth_demo", + "urn:srcos:annotation-set:community_truth_demo_labels" + ], + "evaluationRefs": ["urn:srcos:evaluation-bundle:community_truth_demo_model_eval"], + "risk": { + "riskTier": "medium", + "riskRefs": ["urn:srcos:risk:demo-factuality-classifier"] + }, + "approval": { + "state": "needs-review", + "workflowRef": "urn:srcos:workflow:model-review-demo", + "decidedAt": "2026-05-01T19:06:00Z", + "decisionRef": null + }, + "policyRefs": ["urn:srcos:policy:model-eval-demo"], + "evidenceRefs": ["urn:srcos:evidence:community_truth_demo_factsheet"] +} \ No newline at end of file diff --git a/examples/publication_artifact.json b/examples/publication_artifact.json new file mode 100644 index 0000000..978dae9 --- /dev/null +++ b/examples/publication_artifact.json @@ -0,0 +1,27 @@ +{ + "id": "urn:srcos:publication-artifact:community_truth_demo_report", + "type": "PublicationArtifact", + "specVersion": "2.0.0", + "title": "Community Truth Demo Reproducible Report", + "abstract": "Synthetic reproducible report package for the Lattice Studio/Data/GovernAI vertical slice.", + "authorRefs": ["urn:srcos:user:demo-author"], + "artifactRefs": { + "dataProductRefs": ["urn:srcos:data-product:community_truth_demo"], + "runtimeRefs": ["urn:srcos:runtime:lattice-python-ml-demo"], + "notebookRefs": ["urn:srcos:notebook-session:community_truth_demo"], + "modelRefs": ["urn:srcos:model:community_truth_demo_candidate"], + "promptRefs": [] + }, + "reproduction": { + "recipeRef": "urn:srcos:recipe:community_truth_demo_report", + "attemptRefs": ["urn:srcos:reproduction-attempt:community_truth_demo_report_001"], + "score": 0.74 + }, + "review": { + "state": "under-review", + "reviewThreadRefs": ["urn:srcos:review-thread:community_truth_demo_report"], + "decisionRef": null + }, + "policyRefs": ["urn:srcos:policy:publication-review-demo"], + "evidenceRefs": ["urn:srcos:evidence:community_truth_demo_report"] +} \ No newline at end of file diff --git a/examples/quality_profile.json b/examples/quality_profile.json new file mode 100644 index 0000000..dbec93d --- /dev/null +++ b/examples/quality_profile.json @@ -0,0 +1,24 @@ +{ + "id": "urn:srcos:quality-profile:community_truth_demo", + "type": "QualityProfile", + "specVersion": "2.0.0", + "subjectRef": "urn:srcos:data-product:community_truth_demo", + "dimensions": [ + { + "name": "completeness", + "score": 0.91, + "status": "pass", + "evidenceRef": "urn:srcos:evidence:quality-completeness" + }, + { + "name": "freshness", + "score": 0.72, + "status": "warn", + "evidenceRef": "urn:srcos:evidence:quality-freshness" + } + ], + "overallScore": 0.82, + "evaluatedAt": "2026-05-01T19:00:00Z", + "profileStatsRefs": ["urn:srcos:profile-stats:community_truth_demo"], + "lineageRefs": ["urn:srcos:prov:community_truth_demo_quality"] +} \ No newline at end of file diff --git a/schemas/AnnotationSet.json b/schemas/AnnotationSet.json new file mode 100644 index 0000000..2cfc2ce --- /dev/null +++ b/schemas/AnnotationSet.json @@ -0,0 +1,58 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.srcos.ai/v2/AnnotationSet.json", + "title": "AnnotationSet", + "description": "A governed set of community, reviewer, or model annotations that can seed training and evaluation datasets.", + "type": "object", + "additionalProperties": false, + "required": ["id", "type", "specVersion", "subjectRefs", "labelingProjectRef", "annotations", "governance", "evidenceRef"], + "properties": { + "id": { "type": "string", "pattern": "^urn:srcos:annotation-set:" }, + "type": { "const": "AnnotationSet" }, + "specVersion": { "type": "string" }, + "name": { "type": "string" }, + "description": { "type": ["string", "null"] }, + "subjectRefs": { "type": "array", "minItems": 1, "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "labelingProjectRef": { "type": "string", "minLength": 1 }, + "annotations": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "required": ["annotationId", "targetRef", "label", "sourceRef", "createdAt"], + "properties": { + "annotationId": { "type": "string", "minLength": 1 }, + "targetRef": { "type": "string", "minLength": 1 }, + "label": { "type": "string", "minLength": 1 }, + "value": { "type": ["string", "number", "boolean", "null"] }, + "span": { + "type": ["object", "null"], + "additionalProperties": false, + "properties": { + "start": { "type": "integer", "minimum": 0 }, + "end": { "type": "integer", "minimum": 0 } + } + }, + "sourceRef": { "type": "string", "minLength": 1 }, + "confidence": { "type": "number", "minimum": 0, "maximum": 1 }, + "createdAt": { "type": "string", "format": "date-time" } + } + } + }, + "governance": { + "type": "object", + "additionalProperties": false, + "required": ["trainingAllowed", "evaluationAllowed", "policyRef"], + "properties": { + "trainingAllowed": { "type": "boolean" }, + "evaluationAllowed": { "type": "boolean" }, + "policyRef": { "type": "string", "minLength": 1 }, + "licensePolicyRef": { "type": ["string", "null"] }, + "annotationReliabilityScore": { "type": "number", "minimum": 0, "maximum": 1 } + } + }, + "derivedDatasetRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "evidenceRef": { "type": "string", "minLength": 1 } + } +} diff --git a/schemas/DataContract.json b/schemas/DataContract.json new file mode 100644 index 0000000..61da351 --- /dev/null +++ b/schemas/DataContract.json @@ -0,0 +1,53 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.srcos.ai/v2/DataContract.json", + "title": "DataContract", + "description": "A governed contract for a DataProduct covering schema, semantics, freshness, quality, privacy, retention, and allowed use.", + "type": "object", + "additionalProperties": false, + "required": ["id", "type", "specVersion", "name", "schemaRefs", "ownerRef", "allowedUses", "sensitivity", "freshness", "qualityRef", "policyRef"], + "properties": { + "id": { "type": "string", "pattern": "^urn:srcos:data-contract:" }, + "type": { "const": "DataContract" }, + "specVersion": { "type": "string" }, + "name": { "type": "string", "minLength": 1 }, + "description": { "type": ["string", "null"] }, + "ownerRef": { "type": "string", "minLength": 1 }, + "schemaRefs": { "type": "array", "minItems": 1, "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "semanticRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "allowedUses": { + "type": "object", + "additionalProperties": false, + "required": ["query", "export", "training", "publication"], + "properties": { + "query": { "type": "boolean" }, + "export": { "type": "boolean" }, + "training": { "type": "boolean" }, + "publication": { "type": "boolean" }, + "notes": { "type": ["string", "null"] } + } + }, + "sensitivity": { "enum": ["public", "internal", "restricted", "sensitive", "regulated"] }, + "freshness": { + "type": "object", + "additionalProperties": false, + "required": ["expectedInterval", "staleAfter"], + "properties": { + "expectedInterval": { "type": "string" }, + "staleAfter": { "type": "string" } + } + }, + "qualityRef": { "type": "string", "pattern": "^urn:srcos:quality-profile:" }, + "retention": { + "type": "object", + "additionalProperties": false, + "properties": { + "policyRef": { "type": "string" }, + "minimumDays": { "type": "integer", "minimum": 0 }, + "maximumDays": { "type": ["integer", "null"], "minimum": 0 } + } + }, + "policyRef": { "type": "string", "minLength": 1 }, + "evidenceRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true } + } +} diff --git a/schemas/DataProduct.json b/schemas/DataProduct.json new file mode 100644 index 0000000..b296579 --- /dev/null +++ b/schemas/DataProduct.json @@ -0,0 +1,61 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.srcos.ai/v2/DataProduct.json", + "title": "DataProduct", + "description": "A governed, consumable data product with ownership, contract, lineage, quality, access, trust, and lifecycle posture for Lattice Studio/Data/GovernAI.", + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "type", + "specVersion", + "name", + "ownerRef", + "resourceRefs", + "contractRef", + "qualityProfileRef", + "policyRef", + "lifecycle", + "evidenceRef" + ], + "properties": { + "id": { "type": "string", "pattern": "^urn:srcos:data-product:" }, + "type": { "const": "DataProduct" }, + "specVersion": { "type": "string" }, + "name": { "type": "string", "minLength": 1 }, + "description": { "type": ["string", "null"] }, + "ownerRef": { "type": "string", "minLength": 1 }, + "stewardRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "resourceRefs": { "type": "array", "minItems": 1, "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "datasetRefs": { "type": "array", "items": { "type": "string", "pattern": "^urn:srcos:dataset:" }, "uniqueItems": true }, + "contractRef": { "type": "string", "pattern": "^urn:srcos:data-contract:" }, + "qualityProfileRef": { "type": "string", "pattern": "^urn:srcos:quality-profile:" }, + "policyRef": { "type": "string", "minLength": 1 }, + "licensePolicyRef": { "type": ["string", "null"] }, + "lineageRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "trust": { + "type": "object", + "additionalProperties": false, + "required": ["datasetTrustScore", "provenanceDepth", "reproducibilityScore"], + "properties": { + "datasetTrustScore": { "type": "number", "minimum": 0, "maximum": 1 }, + "provenanceDepth": { "type": "integer", "minimum": 0 }, + "reproducibilityScore": { "type": "number", "minimum": 0, "maximum": 1 }, + "scoreEvidenceRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true } + } + }, + "lifecycle": { + "type": "object", + "additionalProperties": false, + "required": ["state", "stateSince"], + "properties": { + "state": { "enum": ["draft", "candidate", "published", "restricted", "deprecated", "retired"] }, + "stateSince": { "type": "string", "format": "date-time" }, + "deprecationNoticeRef": { "type": ["string", "null"] } + } + }, + "consumerImpactRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "evidenceRef": { "type": "string", "minLength": 1 }, + "links": { "type": "array", "items": { "type": "string" } } + } +} diff --git a/schemas/EvaluationBundle.json b/schemas/EvaluationBundle.json new file mode 100644 index 0000000..151b3cb --- /dev/null +++ b/schemas/EvaluationBundle.json @@ -0,0 +1,40 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.srcos.ai/v2/EvaluationBundle.json", + "title": "EvaluationBundle", + "description": "A governed evaluation record for data products, notebooks, models, prompts, RAG pipelines, agents, or publications.", + "type": "object", + "additionalProperties": false, + "required": ["id", "type", "specVersion", "subjectRef", "evaluationKind", "metrics", "verdict", "evaluatedAt", "evidenceRefs"], + "properties": { + "id": { "type": "string", "pattern": "^urn:srcos:evaluation-bundle:" }, + "type": { "const": "EvaluationBundle" }, + "specVersion": { "type": "string" }, + "subjectRef": { "type": "string", "minLength": 1 }, + "evaluationKind": { "enum": ["data-quality", "model", "prompt", "rag", "agent", "publication", "runtime", "notebook"] }, + "inputRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "runtimeRef": { "type": ["string", "null"] }, + "metrics": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "required": ["name", "value", "status"], + "properties": { + "name": { "type": "string", "minLength": 1 }, + "value": { "type": ["number", "string", "boolean"] }, + "status": { "enum": ["pass", "warn", "fail", "unknown"] }, + "threshold": { "type": ["number", "string", "null"] }, + "evidenceRef": { "type": ["string", "null"] } + } + } + }, + "verdict": { "enum": ["approved", "rejected", "needs-review", "blocked", "informational"] }, + "riskTier": { "enum": ["low", "medium", "high", "critical", "unknown"] }, + "evaluatedAt": { "type": "string", "format": "date-time" }, + "evaluatorRef": { "type": ["string", "null"] }, + "policyRef": { "type": ["string", "null"] }, + "evidenceRefs": { "type": "array", "minItems": 1, "items": { "type": "string", "minLength": 1 }, "uniqueItems": true } + } +} diff --git a/schemas/Factsheet.json b/schemas/Factsheet.json new file mode 100644 index 0000000..976282e --- /dev/null +++ b/schemas/Factsheet.json @@ -0,0 +1,50 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.srcos.ai/v2/Factsheet.json", + "title": "Factsheet", + "description": "A GovernAI factsheet for model, prompt, agent, data, runtime, notebook, or publication assets.", + "type": "object", + "additionalProperties": false, + "required": ["id", "type", "specVersion", "subjectRef", "factsheetKind", "summary", "lineageRefs", "evaluationRefs", "approval", "evidenceRefs"], + "properties": { + "id": { "type": "string", "pattern": "^urn:srcos:factsheet:" }, + "type": { "const": "Factsheet" }, + "specVersion": { "type": "string" }, + "subjectRef": { "type": "string", "minLength": 1 }, + "factsheetKind": { "enum": ["data", "model", "prompt", "agent", "runtime", "notebook", "publication"] }, + "summary": { + "type": "object", + "additionalProperties": false, + "required": ["name", "purpose", "ownerRef"], + "properties": { + "name": { "type": "string", "minLength": 1 }, + "purpose": { "type": "string", "minLength": 1 }, + "ownerRef": { "type": "string", "minLength": 1 }, + "limitations": { "type": "array", "items": { "type": "string" } } + } + }, + "lineageRefs": { "type": "array", "minItems": 1, "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "evaluationRefs": { "type": "array", "minItems": 1, "items": { "type": "string", "pattern": "^urn:srcos:evaluation-bundle:" }, "uniqueItems": true }, + "risk": { + "type": "object", + "additionalProperties": false, + "properties": { + "riskTier": { "enum": ["low", "medium", "high", "critical", "unknown"] }, + "riskRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true } + } + }, + "approval": { + "type": "object", + "additionalProperties": false, + "required": ["state", "decidedAt"], + "properties": { + "state": { "enum": ["draft", "needs-review", "approved", "rejected", "blocked", "deprecated"] }, + "workflowRef": { "type": ["string", "null"] }, + "decidedAt": { "type": "string", "format": "date-time" }, + "decisionRef": { "type": ["string", "null"] } + } + }, + "policyRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "evidenceRefs": { "type": "array", "minItems": 1, "items": { "type": "string", "minLength": 1 }, "uniqueItems": true } + } +} diff --git a/schemas/PublicationArtifact.json b/schemas/PublicationArtifact.json new file mode 100644 index 0000000..9320b2f --- /dev/null +++ b/schemas/PublicationArtifact.json @@ -0,0 +1,50 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.srcos.ai/v2/PublicationArtifact.json", + "title": "PublicationArtifact", + "description": "A reproducible publication package tying data, runtime, notebook/code, evaluation, review, and evidence records.", + "type": "object", + "additionalProperties": false, + "required": ["id", "type", "specVersion", "title", "authorRefs", "artifactRefs", "reproduction", "review", "evidenceRefs"], + "properties": { + "id": { "type": "string", "pattern": "^urn:srcos:publication-artifact:" }, + "type": { "const": "PublicationArtifact" }, + "specVersion": { "type": "string" }, + "title": { "type": "string", "minLength": 1 }, + "abstract": { "type": ["string", "null"] }, + "authorRefs": { "type": "array", "minItems": 1, "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "artifactRefs": { + "type": "object", + "additionalProperties": false, + "properties": { + "dataProductRefs": { "type": "array", "items": { "type": "string", "pattern": "^urn:srcos:data-product:" }, "uniqueItems": true }, + "runtimeRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "notebookRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "modelRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "promptRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true } + } + }, + "reproduction": { + "type": "object", + "additionalProperties": false, + "required": ["recipeRef", "attemptRefs", "score"], + "properties": { + "recipeRef": { "type": "string", "minLength": 1 }, + "attemptRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "score": { "type": "number", "minimum": 0, "maximum": 1 } + } + }, + "review": { + "type": "object", + "additionalProperties": false, + "required": ["state"], + "properties": { + "state": { "enum": ["draft", "submitted", "under-review", "accepted", "needs-revision"] }, + "reviewThreadRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "decisionRef": { "type": ["string", "null"] } + } + }, + "policyRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "evidenceRefs": { "type": "array", "minItems": 1, "items": { "type": "string", "minLength": 1 }, "uniqueItems": true } + } +} diff --git a/schemas/QualityProfile.json b/schemas/QualityProfile.json new file mode 100644 index 0000000..d931890 --- /dev/null +++ b/schemas/QualityProfile.json @@ -0,0 +1,34 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.srcos.ai/v2/QualityProfile.json", + "title": "QualityProfile", + "description": "Quality and observability profile for a governed data product or derived dataset.", + "type": "object", + "additionalProperties": false, + "required": ["id", "type", "specVersion", "subjectRef", "dimensions", "overallScore", "evaluatedAt"], + "properties": { + "id": { "type": "string", "pattern": "^urn:srcos:quality-profile:" }, + "type": { "const": "QualityProfile" }, + "specVersion": { "type": "string" }, + "subjectRef": { "type": "string", "minLength": 1 }, + "dimensions": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "required": ["name", "score", "status"], + "properties": { + "name": { "type": "string", "minLength": 1 }, + "score": { "type": "number", "minimum": 0, "maximum": 1 }, + "status": { "enum": ["pass", "warn", "fail", "unknown"] }, + "evidenceRef": { "type": ["string", "null"] } + } + } + }, + "overallScore": { "type": "number", "minimum": 0, "maximum": 1 }, + "evaluatedAt": { "type": "string", "format": "date-time" }, + "profileStatsRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }, + "lineageRefs": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true } + } +} diff --git a/tools/validate_lattice_data_governai_examples.py b/tools/validate_lattice_data_governai_examples.py new file mode 100644 index 0000000..7ba3033 --- /dev/null +++ b/tools/validate_lattice_data_governai_examples.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import json +from pathlib import Path + +import jsonschema + +ROOT = Path(__file__).resolve().parents[1] +PAIRS = [ + (ROOT / "schemas" / "DataProduct.json", ROOT / "examples" / "data_product.json"), + (ROOT / "schemas" / "DataContract.json", ROOT / "examples" / "data_contract.json"), + (ROOT / "schemas" / "QualityProfile.json", ROOT / "examples" / "quality_profile.json"), + (ROOT / "schemas" / "AnnotationSet.json", ROOT / "examples" / "annotation_set.json"), + (ROOT / "schemas" / "EvaluationBundle.json", ROOT / "examples" / "evaluation_bundle.json"), + (ROOT / "schemas" / "Factsheet.json", ROOT / "examples" / "factsheet.json"), + (ROOT / "schemas" / "PublicationArtifact.json", ROOT / "examples" / "publication_artifact.json"), +] + + +def validate_pair(schema_path: Path, example_path: Path) -> None: + schema = json.loads(schema_path.read_text(encoding="utf-8")) + jsonschema.validators.validator_for(schema).check_schema(schema) + example = json.loads(example_path.read_text(encoding="utf-8")) + jsonschema.validate(example, schema) + + +def main() -> int: + checks: dict[str, bool] = {} + for schema_path, example_path in PAIRS: + validate_pair(schema_path, example_path) + checks[example_path.name] = True + print(json.dumps({"ok": all(checks.values()), "checks": checks}, indent=2, sort_keys=True)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())