Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ lxml
pandas
pyld
pytest
pytest_bdd
5 changes: 5 additions & 0 deletions test/diffTests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import pathlib

TEST_FOLDER = pathlib.Path(__file__).parent.parent
PROJECT_DIR_PATH = TEST_FOLDER.parent
TEST_DATA_DIR = TEST_FOLDER / "testData" / "rdf-differ-data"
20 changes: 20 additions & 0 deletions test/diffTests/features/owl_diff.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Feature: OWL diffing

Background:
Given the OWL files "tests/test_data/owl/ePO_sample-4.0.0.orig.ttl" and "tests/test_data/owl/ePO_sample-4.0.0.upd.ttl"
And the test prefixes are defined

Scenario Outline: Diffing example resources in the OWL sample
When the diff is run
Then the report should contain the change for "<resource_type>","<instance>","<operation>","<predicate>","<old_value>","<new_value>"

Examples:
| resource_type | instance | operation | predicate | old_value | new_value |
| class | epo:AwardCriterion | added | | | |
| class | epo:AdHocChannel | deleted | | | |
| class | epo:AcquiringCentralPurchasingBody | changed | skos:prefLabel | | rdfs:label |
| class | epo:AwardCriteriaSummary | updated | skos:prefLabel | Award criteria summary | Award criteria summarization |
| datatype_property | epo:describesObjectiveParticipationRules | added | | | |
| datatype_property | epo:describesProfessionRelevantLaw | deleted | | | |
| object_property | epo:followsRulesSetBy | added | | | |
| object_property | epo:exposesChannel | deleted | | | |
Empty file.
221 changes: 221 additions & 0 deletions test/diffTests/steps/test_owl_diff_steps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
from enum import Enum
import json
import os
import subprocess
from pathlib import Path

import pytest
from pytest_bdd import given, when, then, scenario, parsers

from diffTests import PROJECT_DIR_PATH, TEST_DATA_DIR


SCRIPT_PATH = PROJECT_DIR_PATH / "rdf-differ-ws" / "bash" / "rdf-differ.sh"
BASE_URL = os.environ.get("RDF_DIFFER_BASE_URL", "http://localhost:4030")
SAVED_REPORT = TEST_DATA_DIR / "ePO_sample-4.0.0-upd_diff-report.json"
REUSE_SAVED_REPORT = os.environ.get(
"RDF_DIFFER_REUSE_SAVED_REPORT", "true"
).lower() in ["1", "true", "yes"]

# trick to run diffing only once and not for all scenarios
_diff_cache = {}

SUPPORTED_TYPES = ("class", "datatype_property", "object_property")

@scenario("../features/owl_diff.feature", "Diffing example resources in the OWL sample")
def test_owl_diff_feature():
pass


@pytest.fixture
def ctx(tmp_path):
"""Context fixture to store state between steps."""
return {"tmpdir": tmp_path}


@given("the test prefixes are defined")
def prefixes(ctx):
# Hardcoded prefixes for converting between the feature file
# and the diff reports which are RDF/JSON with no prefixes
ctx["prefixes"] = {
"epo": "http://data.europa.eu/a4g/ontology#",
"skos": "http://www.w3.org/2004/02/skos/core#",
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
}
return ctx["prefixes"]


@given(parsers.parse('the OWL files "{old}" and "{new}"'))
def owl_files(ctx, old, new):
# store absolute paths
ctx["old"] = str(Path(old))
ctx["new"] = str(Path(new))
return ctx


@when("the diff is run")
def run_diff(ctx):
script = os.path.abspath(os.path.join(os.path.dirname(__file__), SCRIPT_PATH))
outdir = str(ctx["tmpdir"])
old = ctx["old"]
new = ctx["new"]
profile = "owl-core-en-only"

# we keep a record of already run diffs to speed up tests (we set the cache at the end of this function)
key = (old, new)
if key in _diff_cache:
ctx["report"] = _diff_cache[key]
return

if REUSE_SAVED_REPORT:
# use pre-existing report -- for faster testing/debugging of this test skipping the building of the report
report_file = Path(
os.path.abspath(os.path.join(os.path.dirname(__file__), SAVED_REPORT))
)
else:
# run full workflow producing JSON output into temporary dir -- this should be the normal way
# WARNING: as this runs an async call, sometimes this can fail due to race conditions
# (the Celery task queue may be empty if called too fast or too late)
result = subprocess.run(
[
script,
"--base-url",
BASE_URL,
"--old",
old,
"--new",
new,
"--ap",
profile,
"--template",
"json",
"--output",
outdir,
"full",
],
capture_output=False,
text=True,
)

assert (
result.returncode == 0
), f"Diff script failed: {result.stderr}\n{result.stdout}"
report_file = Path(outdir) / "diff.json"

assert report_file.exists(), f"Report file not found: {report_file}"
with open(report_file) as fh:
report = json.load(fh)
ctx["report"] = report
_diff_cache[key] = report


def expand(prefixed, prefixes):
if prefixed is None:
return None
if ":" not in prefixed:
return prefixed
p, local = prefixed.split(":", 1)
if p not in prefixes:
raise ValueError(f"Unknown prefix: {p}")
return prefixes[p] + local


def camel_to_snake(name: str) -> str:
# Convert camelCase or mixed to snake_case (prefLabel -> pref_label)
out = ""
for ch in name:
if ch.isupper():
out += "_" + ch.lower()
else:
out += ch
return out

def build_query_key(operation: str, resource_type: str, prop_snake: str) -> str:
normalized = resource_type.replace("datatype_", "").replace("object_", "")
return f"{operation}_property_{normalized}_{prop_snake}.rq"

# this is only possible in Behave (e.g. {predicate:NullableString})
# @parse.with_pattern(r'.*')
# def parse_nullable_string(text):
# return text
# register_type(NullableString=parse_nullable_string)


# pytest-bdd currently lacks support for optional parameters (empty cells in the feature) in parse, so we use a regex trick
# @then(parsers.parse('the report should contain the change for "{type}","{instance}","{operation}","{predicate}","{old_value}","{new_value}"'))
@then(
parsers.re(
r'the report should contain the change for "(?P<resource_type>[^"]*)","(?P<instance>[^"]*)","(?P<operation>[^"]*)","(?P<predicate>[^"]*)","(?P<old_value>[^"]*)","(?P<new_value>[^"]*)"'
)
)
def assert_report_contains(ctx, resource_type, instance, operation, predicate, old_value, new_value):
report = ctx.get("report")
prefixes = ctx.get("prefixes")

assert report is not None, "Report not found in context"

# normalize inputs
predicate = predicate.strip() or None
new_value = new_value.strip() or None
old_value = old_value.strip() or None
if resource_type == "data_property":
resource_type = "datatype_property"

if operation in ("added", "deleted") and resource_type in SUPPORTED_TYPES:
# unified handling for added/deleted instances
key = f"{operation}_instance_{resource_type}.rq"
assert key in report, f"Missing key {key} in report"
full_instance = expand(instance, prefixes)
bindings = report[key].get("results", {}).get("bindings", [])
assert any(
b.get("resource", {}).get("value") == full_instance for b in bindings
), f"{operation.capitalize()} {resource_type} {full_instance} not found in {key}"

elif operation == "changed" and resource_type in SUPPORTED_TYPES:
prop_prefix, prop_local = predicate.split(":", 1)
prop_snake = camel_to_snake(prop_local)
key = build_query_key(operation, resource_type, prop_snake)
assert key in report, f"Missing key {key} in report"
full_instance = expand(instance, prefixes)
bindings = report[key].get("results", {}).get("bindings", [])
binding = next(
(b for b in bindings if b.get("resource", {}).get("value") == full_instance),
None,
)
assert binding is not None, f"No binding for instance {full_instance} in {key}"
# check oldProperty and newProperty values for the given instance
# where the given predicate is oldProperty
# and the given newValue is newProperty
expected_old = expand(predicate, prefixes)
expected_new = expand(new_value, prefixes)
assert (
binding.get("oldProperty", {}).get("value") == expected_old
), f"oldProperty mismatch: expected {expected_old}, got {binding.get('oldProperty', {}).get('value')}"
assert (
binding.get("newProperty", {}).get("value") == expected_new
), f"newProperty mismatch: expected {expected_new}, got {binding.get('newProperty', {}).get('value')}"
elif operation == "updated" and resource_type in SUPPORTED_TYPES:
prop_prefix, prop_local = predicate.split(":", 1)
prop_snake = camel_to_snake(prop_local)
key = build_query_key(operation, resource_type, prop_snake)
assert key in report, f"Missing key {key} in report"
full_instance = expand(instance, prefixes)
bindings = report[key].get("results", {}).get("bindings", [])
binding = next(
(b for b in bindings if b.get("resource", {}).get("value") == full_instance),
None,
)
assert binding is not None, f"No binding for instance {full_instance} in {key}"
# check oldValue and newValue values for the given predicate of the given instance
expected_old = old_value.strip() if old_value else None
expected_new = new_value.strip() if new_value else None
assert (
binding.get("oldValue", {}).get("value") == expected_old
), f"oldValue mismatch: expected {expected_old}, got {binding.get('oldValue', {}).get('value')}"
assert (
binding.get("newValue", {}).get("value") == expected_new
), f"newValue mismatch: expected {expected_new}, got {binding.get('newValue', {}).get('value')}"
else:
raise AssertionError(
f"Unsupported combination: resource_type={resource_type}, operation={operation}"
)
54 changes: 54 additions & 0 deletions test/testData/rdf-differ-data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Minimal ePO test data for OWL-core profile

Given the following versions of a dataset:

- **old:** `ePO_sample-4.0.0.orig.ttl`
- **new:** `ePO_sample-4.0.0.upd.ttl`

The **new** file is a _combined_ OWL and SHACL file that contains also
embedded SHACL data, for testing retrieval of certain constraint information
for added resources, such as the domain, range and cardinality, which would
otherwise not be supported/available in the OWL-core profile.

The following are changes comparing **old** to **new**, where _redundant_
refers to redundant appearances in the existing diff'ing/reporting, and _not
captured_ to the non-appearance thereof. The latter relates to complex cases that are not supported:

1. added class **epo:AwardCriterion**
2. class **epo:AwardCriterion** added `skos:prefLabel` (redundant, from added class)
3. class **epo:AwardCriterion** added `skos:definition` (redundant, from added class)
4. class **epo:AwardCriterion** added `rdfs:subClassOf` (redundant, from added class)
5. class **epo:AwardCriterion** added `rdfs:isDefinedBy` (redundant, from added class)
6. deleted class **epo:AdHocChannel**
7. class **epo:AdHocChannel** deleted `skos:prefLabel` (redundant, from deleted class)
8. class **epo:AdHocChannel** deleted `skos:definition` (redundant, from deleted class)
9. class **epo:AdHocChannel** deleted `rdfs:subClassOf` (redundant, from deleted class)
10. class **epo:AdHocChannel** deleted `rdfs:isDefinedBy` (redundant, from deleted class)
11. class **epo:AcquiringCentralPurchasingBody** `skos:prefLabel` changed to `rdfs:label`
12. class **epo:AcquiringCentralPurchasingBody** added `rdfs:label` (redundant, from changed property)
13. class **epo:AcquiringCentralPurchasingBody** deleted `skos:prefLabel` (redundant, from changed property)
14. class **epo:Document** added `skos:prefLabel` lang _es_
1. class **epo:AccessTerm** deleted `skos:prefLabel`
2. class **epo:AwardCriteriaSummary** updated `skos:prefLabel` (new value "Award criteria summarization"; original value "Award criteria summary" moved to `skos:altLabel`)
3. class **epo:AwardCriteriaSummary** changed `skos:prefLabel` to `skos:altLabel` (cross-property move of original `skos:prefLabel` to `skos:altLabel`; could be ignored as the original property was retained with a new value)
4. class **epo:AwardCriteriaSummary** added `skos:altLabel` (redundant, from changed property; could be considered non-redundant if the cross-property move is ignored)
5. added objectProperty **epo:followsRulesSetBy** with domain `epo:PurchaseContract`, range `epo:FrameworkAgreement` and maxCardinality 1
6. objectProperty **epo:followsRulesSetBy** added `skos:prefLabel` (redundant, from added objectProperty)
7. objectProperty **epo:followsRulesSetBy** added `rdfs:isDefinedBy` (redundant, from added objectProperty)
8. deleted objectProperty **epo:exposesChannel**
9. objectProperty **epo:exposesChannel** deleted `skos:prefLabel` (redundant, from deleted objectProperty)
10. objectProperty **epo:exposesChannel** deleted `rdfs:isDefinedBy` (redundant, from deleted objectProperty)
11. objectProperty **epo:exposesInvoiceeChannel** added `rdfs:label`
12. objectProperty **epo:describesResultNotice** added `skos:altLabel`
13. added datatypeProperty **epo:describesObjectiveParticipationRules**
14. datatypeProperty **epo:describesObjectiveParticipationRules** added `skos:prefLabel` (redundant, from added datatypeProperty)
15. datatypeProperty **epo:describesObjectiveParticipationRules** added `rdfs:isDefinedBy` (redundant, from added datatypeProperty)
16. deleted datatypeProperty **epo:describesProfessionRelevantLaw**
17. datatypeProperty **epo:describesProfessionRelevantLaw** deleted `skos:prefLabel` (redundant, from deleted datatypeProperty)
18. datatypeProperty **epo:describesProfessionRelevantLaw** deleted `rdfs:isDefinedBy` (redundant, from deleted datatypeProperty)
19. datatypeProperty **epo:describesProfession** added `rdfs:label` no lang
20. datatypeProperty **epo:describesVerificationMethod** converted to objectProperty (not captured)
21. objectProperty **epo:distributesOffer** deleted `skos:prefLabel` lang (not captured)
22. objectProperty **epo:actsOnBehalfOf** updated `skos:prefLabel` lang _en_ to _de_ (not captured)

The files are used by the test suite specified in [owl_diff.feature](../../features/owl_diff.feature) and implemented in [test_owl_diff_steps.py](../../steps/test_owl_diff_steps.py). The above description can be also used to facilitate manual tests.
Loading