diff --git a/pyproject.toml b/pyproject.toml
index e0b40b3b3..0394cc2c9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
[build-system]
-requires = ["setuptools>=61.0", "wheel"]
+requires = ["setuptools>=64.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
@@ -27,6 +27,7 @@ Github = "https://github.com/ad-freiburg/qlever"
[project.scripts]
"qlever" = "qlever.qlever_main:main"
+"sparql_conformance" = "qlever.qlever_main:main"
[tool.setuptools]
license-files = ["LICENSE"]
diff --git a/src/qlever/__init__.py b/src/qlever/__init__.py
index 1adcd3451..3948849a7 100644
--- a/src/qlever/__init__.py
+++ b/src/qlever/__init__.py
@@ -16,7 +16,7 @@ def snake_to_camel(str):
ENGINE_NAMES = {
"qlever": "QLever",
- "qmdb": "MillenniumDB",
+ "sparql_conformance": "SPARQL Conformance",
}
# Default engine_name = script_name without starting 'q' and capitalized
engine_name = ENGINE_NAMES.get(script_name, script_name[1:].capitalize())
diff --git a/src/qlever/commands/index.py b/src/qlever/commands/index.py
index 0189d70e5..cf82aa59a 100644
--- a/src/qlever/commands/index.py
+++ b/src/qlever/commands/index.py
@@ -178,7 +178,7 @@ def get_input_options_for_json(self, args) -> str:
# Return the concatenated command-line options.
return " ".join(input_options)
- def execute(self, args) -> bool:
+ def execute(self, args, called_from_conformance_test = False) -> bool:
# The mandatory part of the command line (specifying the input, the
# basename of the index, and the settings file). There are two ways
# to specify the input: via a single stream or via multiple streams.
@@ -278,15 +278,16 @@ def execute(self, args) -> bool:
return False
# Check if all of the input files exist.
- for pattern in shlex.split(args.input_files):
- if len(glob.glob(pattern)) == 0:
- log.error(f'No file matching "{pattern}" found')
- log.info("")
- log.info(
- "Did you call `qlever get-data`? If you did, check "
- "GET_DATA_CMD and INPUT_FILES in the QLeverfile"
- )
- return False
+ if not called_from_conformance_test:
+ for pattern in shlex.split(args.input_files):
+ if len(glob.glob(pattern)) == 0:
+ log.error(f'No file matching "{pattern}" found')
+ log.info("")
+ log.info(
+ "Did you call `qlever get-data`? If you did, check "
+ "GET_DATA_CMD and INPUT_FILES in the QLeverfile"
+ )
+ return False
# Check if index files (name.index.*) already exist.
existing_index_files = get_existing_index_files(args.name)
@@ -325,7 +326,7 @@ def execute(self, args) -> bool:
# Run the index command.
try:
- run_command(index_cmd, show_output=True)
+ run_command(index_cmd, show_output=not called_from_conformance_test)
except Exception as e:
log.error(f"Building the index failed: {e}")
return False
diff --git a/src/qlever/commands/query.py b/src/qlever/commands/query.py
index 4681e33dd..3b0de845e 100644
--- a/src/qlever/commands/query.py
+++ b/src/qlever/commands/query.py
@@ -15,6 +15,7 @@ class QueryCommand(QleverCommand):
"""
def __init__(self):
+ self.query_output = ""
self.predefined_queries = {
"all-predicates": (
"SELECT (?p AS ?predicate) (COUNT(?p) AS ?count) "
@@ -84,7 +85,7 @@ def additional_arguments(self, subparser) -> None:
help="Do not print the (end-to-end) time taken",
)
- def execute(self, args) -> bool:
+ def execute(self, args, called_from_conformance_test = False) -> bool:
# Use a predefined query if requested.
if args.predefined_query:
args.query = self.predefined_queries[args.predefined_query]
@@ -105,6 +106,11 @@ def execute(self, args) -> bool:
)
else:
curl_cmd_additions = ""
+ query_type = "query="
+ if called_from_conformance_test:
+ curl_cmd_additions += f" -w '\\nHTTP_STATUS:%{{http_code}}'"
+ query_type = args.content_type
+ curl_cmd_additions += f" --data-urlencode access-token={shlex.quote(args.access_token)}"
# Show what the command will do.
sparql_endpoint = (
@@ -115,7 +121,7 @@ def execute(self, args) -> bool:
curl_cmd = (
f"curl -s {sparql_endpoint}"
f' -H "Accept: {args.accept}"'
- f" --data-urlencode query={shlex.quote(args.query)}"
+ f" --data-urlencode {query_type}{shlex.quote(args.query)}"
f"{curl_cmd_additions}"
)
self.show(curl_cmd, only_show=args.show)
@@ -125,7 +131,10 @@ def execute(self, args) -> bool:
# Launch query.
try:
start_time = time.time()
- run_command(curl_cmd, show_output=True)
+ if called_from_conformance_test:
+ self.query_output = run_command(curl_cmd, return_output=True)
+ else:
+ run_command(curl_cmd, show_output=True)
time_msecs = round(1000 * (time.time() - start_time))
if not args.no_time and args.log_level != "NO_LOG":
log.info("")
diff --git a/src/qlever/commands/start.py b/src/qlever/commands/start.py
index a6811c6ff..8c8e9b75c 100644
--- a/src/qlever/commands/start.py
+++ b/src/qlever/commands/start.py
@@ -166,7 +166,7 @@ def additional_arguments(self, subparser) -> None:
"(default: run in the background with `nohup`)",
)
- def execute(self, args) -> bool:
+ def execute(self, args, called_from_conformance_test = False) -> bool:
# Kill existing server with the same name if so desired.
#
# TODO: This is currently disabled because I never used it once over
@@ -267,8 +267,9 @@ def execute(self, args) -> bool:
f" (Ctrl-C stops following the log, but NOT the server)"
)
log.info("")
- tail_cmd = f"exec tail -f {args.name}.server-log.txt"
- tail_proc = subprocess.Popen(tail_cmd, shell=True)
+ if not called_from_conformance_test:
+ tail_cmd = f"exec tail -f {args.name}.server-log.txt"
+ tail_proc = subprocess.Popen(tail_cmd, shell=True)
while not is_qlever_server_alive(endpoint_url):
time.sleep(1)
@@ -288,7 +289,7 @@ def execute(self, args) -> bool:
return False
# Kill the tail process. NOTE: `tail_proc.kill()` does not work.
- if not args.run_in_foreground:
+ if not args.run_in_foreground and not called_from_conformance_test:
tail_proc.terminate()
# Execute the warmup command.
diff --git a/src/qlever/commands/status.py b/src/qlever/commands/status.py
index a8efed543..5c3593a16 100644
--- a/src/qlever/commands/status.py
+++ b/src/qlever/commands/status.py
@@ -3,6 +3,7 @@
import psutil
from qlever.command import QleverCommand
+from qlever.log import log
from qlever.util import show_process_info
@@ -46,5 +47,5 @@ def execute(self, args) -> bool:
if process_shown:
num_processes_found += 1
if num_processes_found == 0:
- print("No processes found")
+ log.error("No processes found")
return True
diff --git a/src/qlever/qleverfile.py b/src/qlever/qleverfile.py
index 5ea39b21d..3598530aa 100644
--- a/src/qlever/qleverfile.py
+++ b/src/qlever/qleverfile.py
@@ -1,5 +1,6 @@
from __future__ import annotations
+import json
import re
import socket
import subprocess
@@ -21,6 +22,79 @@ class Qleverfile:
Qleverfile + functions for parsing.
"""
+ @staticmethod
+ def get_conformance_arguments(arg):
+ """
+ Define all possible parameters for conformance checks.
+ """
+ args = {}
+ args["name"] = arg(
+ "--name",
+ type=str,
+ required=True,
+ help="Name of the result file of the conformance check.",
+ )
+ args["port"] = arg(
+ "--port",
+ type=str,
+ required=True,
+ help="Port which will be used for the SPARQL sever.",
+ )
+ args["graph_store"] = arg(
+ "--graph-store",
+ type=str,
+ required=True,
+ help="Name of the graph store endpoint used for graph store protocol tests.",
+ )
+ args["testsuite_dir"] = arg(
+ "--testsuite-dir",
+ type=str,
+ default=None,
+ help="Path to the directory of the testsuite.",
+ )
+ args["type_alias"] = arg(
+ "--type-alias",
+ type=json.loads,
+ required=False,
+ help=("Type mismatches that will be considered intended."
+ "ex. \"[['http://www.w3.org/2001/XMLSchema#integer', "
+ "'http://www.w3.org/2001/XMLSchema#int']..."
+ "['http://www.w3.org/2001/XMLSchema#float',"
+ "'http://www.w3.org/2001/XMLSchema#double']]\""
+ ),
+ )
+ args["engine"] = arg(
+ "--engine",
+ type=str,
+ choices=["qlever", "qlever-binaries"],# "mdb", "oxigraph"],
+ default="docker",
+ help="Which system to use to run the tests in"
+ )
+ args["exclude"] = arg(
+ "--exclude",
+ type=lambda s: s.split(","),
+ default=[],
+ help=("Tests (names) or test groups to exclude from the run."
+ "ex. service,entailment,POST - existing graph"
+ )
+ )
+ args["include"] = arg(
+ "--include",
+ type=lambda s: s.split(","),
+ default=None,
+ help=("Tests (names) or test groups to include in the run."
+ "ex. service,entailment,POST - existing graph"
+ )
+ )
+ args["binaries_directory"] = arg(
+ "--binaries-directory",
+ type=str,
+ required=False,
+ help="Path to the directory of the IndexBuilderMain and ServerMain binaries.",
+ default=""
+ )
+ return args
+
@staticmethod
def all_arguments():
"""
@@ -41,6 +115,11 @@ def arg(*args, **kwargs):
server_args = all_args["server"] = {}
runtime_args = all_args["runtime"] = {}
ui_args = all_args["ui"] = {}
+ all_args["conformance"] = Qleverfile.get_conformance_arguments(arg)
+ qlever_binaries_args = all_args["qlever_binaries"] = {}
+ qlever_args = all_args["qlever"] = {}
+ oxigraph_args = all_args["oxigraph"] = {}
+ conformance_ui_args = all_args["conformance_ui"] = {}
data_args["name"] = arg(
"--name", type=str, required=True, help="The name of the dataset"
@@ -365,6 +444,34 @@ def arg(*args, **kwargs):
help="The name of the container used for `qlever ui`",
)
+
+ qlever_args["qlever_image"] = arg(
+ "--qlever-image",
+ type=str,
+ default="docker.io/adfreiburg/qlever",
+ help="The name of the image when running in a container",
+ )
+
+ oxigraph_args["oxigraph_image"] = arg(
+ "--oxigraph-image",
+ type=str,
+ default="ghcr.io/oxigraph/oxigraph",
+ help="The name of the image when running in a container",
+ )
+
+ conformance_ui_args["port"] = arg(
+ '--port',
+ required=False,
+ help='Port of the webserver (default: 3000)',
+ default='3000'
+ )
+ conformance_ui_args["result_directory"] = arg(
+ '--result-directory',
+ required=False,
+ help='Directory containing the results of the SPARQL conformance tests (default: current directory)',
+ default='$(pwd)'
+ )
+
return all_args
@staticmethod
diff --git a/src/sparql_conformance/Dockerfile b/src/sparql_conformance/Dockerfile
new file mode 100644
index 000000000..3a56fd4fb
--- /dev/null
+++ b/src/sparql_conformance/Dockerfile
@@ -0,0 +1,31 @@
+FROM node:18
+ARG UID
+ARG GID
+
+RUN set -eux; \
+ if getent group "${GID}" >/dev/null; then \
+ echo "Using existing group with GID ${GID}"; \
+ else \
+ groupadd -g "${GID}" appgroup; \
+ fi; \
+ if getent passwd "${UID}" >/dev/null; then \
+ echo "Using existing user with UID ${UID}"; \
+ else \
+ useradd -m -u "${UID}" -g "${GID}" appuser; \
+ fi
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
+
+RUN git clone https://github.com/SIRDNARch/qlever-conformance-website.git .
+
+RUN mkdir -p /public/results
+
+RUN npm install
+
+EXPOSE 3000
+
+USER ${UID}:${GID}
+
+CMD [ "node", "server.js" ]
diff --git a/src/sparql_conformance/Qleverfiles/Qleverfile.qlever b/src/sparql_conformance/Qleverfiles/Qleverfile.qlever
new file mode 100644
index 000000000..3c57e948d
--- /dev/null
+++ b/src/sparql_conformance/Qleverfiles/Qleverfile.qlever
@@ -0,0 +1,16 @@
+# Qleverfile for SPARQL conformance tests using the qlever engine
+# To exclude certain tests or test groups add them like this:
+# EXCLUDE = service-description,service,entailment,POST - existing graph,PUT - mismatched payload,query specifying dataset in both query string and protocol; test for use of protocol-specified dataset
+[data]
+NAME = ConformanceTest
+
+[runtime]
+SYSTEM = docker
+
+[conformance]
+NAME = ConformanceTest
+PORT = 7036
+ENGINE = qlever
+TESTSUITE_DIR = ./testsuite-files/sparql/sparql11/
+GRAPH_STORE = /http-graph-store
+TYPE_ALIAS = [["http://www.w3.org/2001/XMLSchema#int", "http://www.w3.org/2001/XMLSchema#integer"], ["http://www.w3.org/2001/XMLSchema#float", "http://www.w3.org/2001/XMLSchema#double"], ["http://www.w3.org/2001/XMLSchema#decimal", "http://www.w3.org/2001/XMLSchema#double"], ["http://www.w3.org/2001/XMLSchema#decimal", "http://www.w3.org/2001/XMLSchema#float"], ["http://www.w3.org/2001/XMLSchema#string", null]]
diff --git a/src/sparql_conformance/__init__.py b/src/sparql_conformance/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/sparql_conformance/commands/__init__.py b/src/sparql_conformance/commands/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/sparql_conformance/commands/analyze.py b/src/sparql_conformance/commands/analyze.py
new file mode 100644
index 000000000..017224de4
--- /dev/null
+++ b/src/sparql_conformance/commands/analyze.py
@@ -0,0 +1,91 @@
+from pathlib import Path
+
+from qlever.command import QleverCommand
+from qlever.log import log
+from qlever.util import run_command
+from sparql_conformance.config import Config
+from sparql_conformance.extract_tests import extract_tests
+from sparql_conformance.testsuite import TestSuite
+from sparql_conformance.engines.engine_manager import EngineManager
+from sparql_conformance.engines.qlever import QLeverManager
+
+
+
+def get_engine_manager(engine_type: str) -> EngineManager:
+ """Get the appropriate engine manager for the given engine type"""
+ managers = {
+ 'qlever-binaries': QLeverBinaryManager,
+ 'qlever': QLeverManager,
+ # 'mdb': MDBManager,
+ # 'oxigraph': OxigraphManager
+ }
+
+ manager_class = managers.get(engine_type)
+ if manager_class is None:
+ raise ValueError(f"Unsupported engine type: {engine_type}")
+
+ return manager_class()
+
+
+class AnalyzeCommand(QleverCommand):
+ """
+ Class for executing the `test` command.
+ """
+
+ def __init__(self):
+ self.options = [
+ 'qlever',
+ #'mdb',
+ #'oxigraph'
+ ]
+
+ def description(self) -> str:
+ return "Run SPARQL conformance tests against different engines"
+
+ def should_have_qleverfile(self) -> bool:
+ return False
+
+ def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
+ return {
+ "conformance": ["name", "port", "engine", "graph_store",
+ "testsuite_dir", "type_alias", "exclude"],
+ "runtime": ["system"],
+ "qlever": ["qlever_image"],
+ "oxigraph": ["oxigraph_image"]
+ }
+
+ def additional_arguments(self, subparser):
+ subparser.add_argument(
+ "test_name",
+ type=str,
+ help="The name of the test to start the server for.",
+ )
+
+ def execute(self, args) -> bool:
+ if args.engine not in self.options:
+ log.error(f"Invalid engine type: {args.engine}")
+ return False
+ image = getattr(args, f"{args.engine}_image", None)
+ if (args.system == "native" and args.binaries_directory == "" or
+ args.system != "native" and image is None):
+ log.error(
+ f"Selected system {args.system} not compatible with image: {image}"
+ f" and binaries_directory: {args.binaries_directory}"
+ )
+ return False
+
+ if args.testsuite_dir is None or not Path(args.testsuite_dir).is_dir():
+ log.error("Could not find testsuite directory. Use `sparql_conformance setup` to download it.")
+ return False
+
+ alias = [tuple(x) for x in args.type_alias] if args.type_alias else []
+ config = Config(image, args.system, args.port, args.graph_store, args.testsuite_dir, alias,
+ args.binaries_directory, args.exclude, args.test_name)
+ print("Preparing ...")
+ if "qlever" in args.engine:
+ print("access_token='abc'")
+ tests, test_count = extract_tests(config)
+ test_suite = TestSuite(name=args.name, tests=tests, test_count=test_count, config=config,
+ engine_manager=get_engine_manager(args.engine))
+ test_suite.analyze()
+ return True
diff --git a/src/sparql_conformance/commands/setup.py b/src/sparql_conformance/commands/setup.py
new file mode 100644
index 000000000..94bd5a93c
--- /dev/null
+++ b/src/sparql_conformance/commands/setup.py
@@ -0,0 +1,117 @@
+import subprocess
+from pathlib import Path
+from os import environ
+
+from qlever.command import QleverCommand
+from qlever.log import log
+from qlever.util import run_command
+
+
+class SetupCommand(QleverCommand):
+ """
+ Class for executing the `setup` command.
+ """
+
+ def __init__(self):
+ self.qleverfiles_path = Path(__file__).parent.parent / "Qleverfiles"
+ self.testsuite_command = f"""
+git clone --sparse --filter=blob:none --depth 1 https://github.com/w3c/rdf-tests ./testsuite-files && \
+git -C ./testsuite-files sparse-checkout set sparql/sparql11
+"""
+
+ def description(self) -> str:
+ return "Setup a pre-configured Qleverfile and download test suite for the SPARQL conformance tests"
+
+ def should_have_qleverfile(self) -> bool:
+ return False
+
+ def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
+ return {}
+
+ def additional_arguments(self, subparser):
+ subparser.add_argument(
+ "engine_name",
+ type=str,
+ choices=["qlever", "qlever-native"],
+ help="The engine name for the pre-configured Qleverfile to create",
+ )
+
+ def execute(self, args) -> bool:
+ # Show a warning if `QLEVER_OVERRIDE_SYSTEM_NATIVE` is set.
+ qlever_is_running_in_container = environ.get("QLEVER_IS_RUNNING_IN_CONTAINER")
+ if qlever_is_running_in_container:
+ log.warning(
+ "The environment variable `QLEVER_IS_RUNNING_IN_CONTAINER` is set, "
+ "therefore the Qleverfile is modified to use `SYSTEM = native` "
+ "(since inside the container, QLever should run natively)"
+ )
+ log.info("")
+ # Construct the command line and show it.
+ qleverfile_path = self.qleverfiles_path / f"Qleverfile.{args.engine_name} "
+ setup_config_cmd = f"cat {qleverfile_path}"
+ if qlever_is_running_in_container:
+ setup_config_cmd += (
+ " | sed -E 's/(^SYSTEM[[:space:]]*=[[:space:]]*).*/\\1native/'"
+ )
+ setup_config_cmd += "> Qleverfile"
+ self.show(setup_config_cmd, only_show=args.show)
+ if args.show:
+ return True
+
+ # If there is already a Qleverfile in the current directory, exit.
+ qleverfile_path = Path("Qleverfile")
+ if qleverfile_path.exists():
+ log.error("`Qleverfile` already exists in current directory")
+ log.info("")
+ log.info(
+ "If you want to create a new Qleverfile using "
+ "`sparql_conformance setup`, delete the existing Qleverfile "
+ "first"
+ )
+ return False
+
+ # Copy the Qleverfile to the current directory.
+ try:
+ subprocess.run(
+ setup_config_cmd,
+ shell=True,
+ check=True,
+ stdin=subprocess.DEVNULL,
+ stdout=subprocess.DEVNULL,
+ )
+ except Exception as e:
+ log.error(
+ f'Could not copy "{qleverfile_path}"' f" to current directory: {e}"
+ )
+ return False
+
+ # If we get here, everything went well.
+ log.info(
+ f'Created Qleverfile for engine "{args.engine_name}"'
+ f" in current directory"
+ )
+
+ # If there is already a test suite in the current directory, exit.
+ testsuite_path = Path("./testsuite-files/sparql/sparql11")
+ if testsuite_path.exists():
+ log.error("`Test suite` already exists in current directory")
+ log.info("")
+ log.info(
+ "If you want to download the test suite using "
+ "`sparql_conformance setup`, delete the existing test suite "
+ "first"
+ )
+ return False
+ testsuite_command = (
+ "git clone --sparse --filter=blob:none --depth 1 https://github.com/w3c/rdf-tests ./testsuite-files && \ "
+ "git -C ./testsuite-files sparse-checkout set sparql/sparql11"
+ )
+ try:
+ run_command(self.testsuite_command)
+ except Exception as e:
+ log.error(
+ f'Could not download test suite from https://github.com/w3c/rdf-tests' f" to current directory: {e}"
+ )
+ return False
+ return True
+
diff --git a/src/sparql_conformance/commands/test.py b/src/sparql_conformance/commands/test.py
new file mode 100644
index 000000000..0b414b791
--- /dev/null
+++ b/src/sparql_conformance/commands/test.py
@@ -0,0 +1,85 @@
+from pathlib import Path
+
+from qlever.command import QleverCommand
+from qlever.log import log
+from sparql_conformance.config import Config
+from sparql_conformance.extract_tests import extract_tests
+from sparql_conformance.testsuite import TestSuite
+from sparql_conformance.engines.engine_manager import EngineManager
+from sparql_conformance.engines.qlever import QLeverManager
+
+
+def get_engine_manager(engine_type: str) -> EngineManager:
+ """Get the appropriate engine manager for the given engine type"""
+ managers = {
+ 'qlever': QLeverManager,
+ # 'mdb': MDBManager,
+ # 'oxigraph': OxigraphManager
+ }
+
+ manager_class = managers.get(engine_type)
+ if manager_class is None:
+ raise ValueError(f"Unsupported engine type: {engine_type}")
+
+ return manager_class()
+
+
+class TestCommand(QleverCommand):
+ """
+ Class for executing the `test` command.
+ """
+
+ def __init__(self):
+ self.options = [
+ 'qlever',
+ 'qlever-binaries',
+ # 'mdb',
+ # 'oxigraph'
+ ]
+
+ def description(self) -> str:
+ return "Run SPARQL conformance tests against different engines"
+
+ def should_have_qleverfile(self) -> bool:
+ return False
+
+ def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
+ return {
+ "conformance": ["name", "port", "engine",
+ "graph_store", "testsuite_dir",
+ "type_alias", "exclude", "include", "binaries_directory"],
+ "runtime": ["system"],
+ "qlever": ["qlever_image"],
+ "oxigraph": ["oxigraph_image"]
+ }
+
+ def additional_arguments(self, subparser):
+ pass
+
+ def execute(self, args) -> bool:
+ if args.engine not in self.options:
+ log.error(f"Invalid engine type: {args.engine}")
+ return False
+ image = getattr(args, f"{args.engine}_image", None)
+ if (args.system == "native" and args.binaries_directory == "" or
+ args.system != "native" and image is None):
+ log.error(
+ f"Selected system {args.system} not compatible with image: {image}"
+ f" and binaries_directory: {args.binaries_directory}"
+ )
+ return False
+
+ if args.testsuite_dir is None or not Path(args.testsuite_dir).is_dir():
+ log.error("Could not find testsuite directory. Use `sparql_conformance setup` to download it.")
+ return False
+ alias = [tuple(x) for x in args.type_alias] if args.type_alias else []
+ config = Config(image, args.system, args.port, args.graph_store, args.testsuite_dir, alias,
+ args.binaries_directory, args.exclude, args.include)
+ print("Running testsuite...")
+ tests, test_count = extract_tests(config)
+ test_suite = TestSuite(name=args.name, tests=tests, test_count=test_count, config=config,
+ engine_manager=get_engine_manager(args.engine))
+ test_suite.run()
+ test_suite.generate_json_file()
+ print("Finished!")
+ return True
diff --git a/src/sparql_conformance/commands/visualize.py b/src/sparql_conformance/commands/visualize.py
new file mode 100644
index 000000000..9407f7298
--- /dev/null
+++ b/src/sparql_conformance/commands/visualize.py
@@ -0,0 +1,53 @@
+import os
+from pathlib import Path
+
+from qlever.command import QleverCommand
+from qlever.log import log
+from qlever.util import run_command
+
+
+class VisualizeCommand(QleverCommand):
+ def __init__(self):
+ pass
+
+ def description(self) -> str:
+ return "Visualize SPARQL conformance test results."
+
+ def should_have_qleverfile(self) -> bool:
+ return False
+
+ def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
+ return {"runtime": ["system"],
+ "conformance_ui": ["result_directory", "port"]
+ }
+
+ def additional_arguments(self, subparser):
+ pass
+
+ def execute(self, args) -> bool:
+ dockerfile_dir = Path(__file__).parent.parent
+ dockerfile_path = dockerfile_dir / "Dockerfile"
+ system = args.system
+ uid = f"UID={os.getuid()}" if hasattr(os, "getuid") else "UID=1000"
+ gid = f"GID={os.getgid()}" if hasattr(os, "getuid") else "GID=1000"
+ build_cmd = f"docker build -f {dockerfile_path} -t visualize-results \
+ --build-arg {uid} --build-arg {gid} {dockerfile_dir}"
+ start_server_cmd = f"docker run -it --rm \
+ -p {args.port}:3000 \
+ -v {args.result_directory}:/app/public/results \
+ visualize-results"
+ image_id = run_command(
+ f"{system} images -q visualize-results", return_output=True
+ )
+ if not image_id:
+ try:
+ run_command(build_cmd, show_output=True)
+ except Exception as e:
+ log.error(f"Building the {system} image visualize-results failed: {e}")
+ return False
+ try:
+ run_command(start_server_cmd, show_output=True)
+ except Exception as e:
+ log.error(f"Building the index failed: {e}")
+ return False
+ return True
\ No newline at end of file
diff --git a/src/sparql_conformance/config.py b/src/sparql_conformance/config.py
new file mode 100644
index 000000000..5333da114
--- /dev/null
+++ b/src/sparql_conformance/config.py
@@ -0,0 +1,40 @@
+import os
+from typing import Dict, Any, Tuple, List
+
+
+class Config:
+ """Configuration class for SPARQL test suite execution."""
+
+ def __init__(self,
+ image: str,
+ system: str,
+ port: str,
+ graph_store: str,
+ testsuite_dir: str,
+ type_alias: List[Tuple[str, str]],
+ binaries_directory: str,
+ exclude: List[str],
+ include: List[str] = None
+ ):
+ self.server_address = 'localhost'
+ self.image = image
+ self.system = system
+ self.port = port
+ self.GRAPHSTORE = graph_store
+ self.alias = type_alias
+ self.path_to_test_suite = os.path.abspath(testsuite_dir)
+ self.path_to_binaries = os.path.abspath(binaries_directory)
+ self.exclude = exclude
+ self.include = include
+ self.number_types = [
+ "http://www.w3.org/2001/XMLSchema#integer",
+ "http://www.w3.org/2001/XMLSchema#double",
+ "http://www.w3.org/2001/XMLSchema#decimal",
+ "http://www.w3.org/2001/XMLSchema#float",
+ "http://www.w3.org/2001/XMLSchema#int",
+ "http://www.w3.org/2001/XMLSchema#decimal"
+ ]
+
+ def to_dict(self) -> Dict[str, Any]:
+ """Convert configuration to dictionary format."""
+ return self.__dict__
diff --git a/src/sparql_conformance/data/data0.rdf b/src/sparql_conformance/data/data0.rdf
new file mode 100644
index 000000000..79bf57fbb
--- /dev/null
+++ b/src/sparql_conformance/data/data0.rdf
@@ -0,0 +1,3 @@
+
+
+
diff --git a/src/sparql_conformance/data/data1.rdf b/src/sparql_conformance/data/data1.rdf
new file mode 100644
index 000000000..8e9c78a90
--- /dev/null
+++ b/src/sparql_conformance/data/data1.rdf
@@ -0,0 +1,4 @@
+
+
+
+
diff --git a/src/sparql_conformance/data/data2.rdf b/src/sparql_conformance/data/data2.rdf
new file mode 100644
index 000000000..8e9c78a90
--- /dev/null
+++ b/src/sparql_conformance/data/data2.rdf
@@ -0,0 +1,4 @@
+
+
+
+
diff --git a/src/sparql_conformance/data/data3.rdf b/src/sparql_conformance/data/data3.rdf
new file mode 100644
index 000000000..8e9c78a90
--- /dev/null
+++ b/src/sparql_conformance/data/data3.rdf
@@ -0,0 +1,4 @@
+
+
+
+
diff --git a/src/sparql_conformance/engines/__init__.py b/src/sparql_conformance/engines/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/sparql_conformance/engines/engine_manager.py b/src/sparql_conformance/engines/engine_manager.py
new file mode 100644
index 000000000..326a2c780
--- /dev/null
+++ b/src/sparql_conformance/engines/engine_manager.py
@@ -0,0 +1,71 @@
+from abc import ABC, abstractmethod
+from typing import Tuple
+
+from sparql_conformance.config import Config
+
+
+class EngineManager(ABC):
+ """Abstract base class for SPARQL engine managers"""
+
+ @abstractmethod
+ def setup(self,
+ config: Config,
+ graph_paths: Tuple[Tuple[str, str], ...]
+ ) -> Tuple[bool, bool, str, str]:
+ """
+ Set up the engine for testing.
+
+ Args:
+ config: Test suite config, used to set engine-specific settings
+ graph_paths: ex. default graph + named graph (('graph_path', '-'),
+ ('graph_path2', 'graph_name2'))
+
+ Returns:
+ index_success (bool), server_success (bool), index_log (str), server_log (str)
+ """
+ pass
+
+ @abstractmethod
+ def cleanup(self, config: Config):
+ """Clean up the test environment after testing"""
+ pass
+
+ @abstractmethod
+ def query(self, config: Config, query: str, result_format: str) -> Tuple[int, str]:
+ """
+ Send a SPARQL query to the engine and return the result
+
+ Args:
+ config: Test suite config, used to set engine-specific settings
+ query: The SPARQL query to be executed
+ result_format: Type of the result
+
+ Returns:
+ HTTP status code (int), query result (str)
+ """
+ pass
+
+ @abstractmethod
+ def update(self, config: Config, query: str) -> Tuple[int, str]:
+ """
+ Send a SPARQL update query to the engine and return the result
+
+ Args:
+ config: Test suite config, used to set engine-specific settings
+ query: The SPARQL update query to be executed
+
+ Returns:
+ HTTP status code (int), response (str)
+ """
+ pass
+
+ @abstractmethod
+ def protocol_endpoint(self) -> str:
+ """
+ Returns the name of the protocol endpoint for the engine.
+ Used to replace the standard endpoint with the
+ engine-specific endpoint in the protocol tests.
+ Ex. POST /sparql/ HTTP/1.1 -> POST /qlever/ HTTP/1.1
+ """
+ pass
+
diff --git a/src/sparql_conformance/engines/qlever.py b/src/sparql_conformance/engines/qlever.py
new file mode 100644
index 000000000..6b37e310f
--- /dev/null
+++ b/src/sparql_conformance/engines/qlever.py
@@ -0,0 +1,157 @@
+import json
+import os
+from pathlib import Path
+from argparse import Namespace
+from typing import Tuple, List
+import requests
+
+
+from qlever.commands.query import QueryCommand
+from qlever.log import mute_log
+from qlever.util import run_command
+from qlever.commands.start import StartCommand
+from qlever.commands.stop import StopCommand
+from sparql_conformance.config import Config
+from sparql_conformance.engines.engine_manager import EngineManager
+from sparql_conformance import util
+from qlever.commands.index import IndexCommand
+from sparql_conformance.rdf_tools import write_ttl_file, delete_ttl_file, rdf_xml_to_turtle
+
+
+class QLeverManager(EngineManager):
+ """Manager for QLever using docker execution"""
+
+ def update(self, config: Config, query: str) -> Tuple[int, str]:
+ return self._query(config, query, "ru", "json")
+
+ def protocol_endpoint(self) -> str:
+ return "sparql"
+
+ def cleanup(self, config: Config):
+ self._stop_server(config)
+ with mute_log():
+ run_command('rm -f qlever-sparql-conformance*')
+
+ def query(self, config: Config, query: str, result_format: str) -> Tuple[int, str]:
+ return self._query(config, query, "rq", result_format)
+
+ def _query(self, config: Config, query: str, query_type: str, result_format: str) -> Tuple[int, str]:
+ content_type = "query=" if query_type == "rq" else "update="
+ args = util.make_args(
+ config,
+ accept=util.get_accept_header(result_format),
+ query=query,
+ content_type=content_type,
+ )
+
+ try:
+ with mute_log():
+ qc = QueryCommand()
+ qc.execute(args, True)
+ body, _, status_line = qc.query_output.rpartition("HTTP_STATUS:")
+ status = int(status_line.strip())
+ return status, body
+ except Exception as e:
+ return 1, str(e)
+
+ def setup(self, config: Config, graph_paths: Tuple[Tuple[str, str], ...]) -> Tuple[bool, bool, str, str]:
+ server_success = False
+ graphs = []
+ for graph_path, graph_name in graph_paths:
+ # Handle rdf files by turning them into turtle format.
+ if graph_path.endswith(".rdf"):
+ graph_path_new = Path(graph_path).name
+ graph_path_new = graph_path_new.replace(".rdf", ".ttl")
+ write_ttl_file(graph_path_new, rdf_xml_to_turtle(graph_path, graph_name))
+ graph_path = graph_path_new
+ else:
+ graph_path = util.copy_graph_to_workdir(graph_path, os.getcwd())
+ graphs.append((graph_path, graph_name))
+
+ index_success, index_log = self._index(config, graphs)
+ if not index_success:
+ return index_success, server_success, index_log, ''
+ else:
+ server_success, server_log = self._start_server(config)
+
+ if not server_success:
+ return index_success, server_success, index_log, server_log
+ for path, name in graphs:
+ delete_ttl_file(path)
+ return index_success, server_success, index_log, server_log
+
+ def _stop_server(self, config: Config) -> Tuple[bool, str]:
+ args = Namespace(
+ name='qlever-sparql-conformance',
+ port=config.port,
+ server_container='qlever-sparql-conformance-server-container',
+ no_containers=config.system == 'native',
+ show=False,
+ cmdline_regex='ServerMain.* -i [^ ]*%%NAME%%'
+ )
+ try:
+ with mute_log(50):
+ result = StopCommand().execute(args)
+ except Exception as e:
+ error_output = str(e)
+ return False, error_output
+ return result, 'Success'
+
+ def _start_server(self, config: Config) -> Tuple[bool, str]:
+ binary = 'ServerMain'
+ binary = binary if config.system != 'native' else Path(config.path_to_binaries, binary)
+ args = util.make_args(
+ config,
+ server_binary=binary,
+ )
+ try:
+ with mute_log():
+ result = StartCommand().execute(args, called_from_conformance_test=True)
+ except Exception as e:
+ error_output = str(e)
+ return False, error_output
+
+ server_log = ''
+ if os.path.exists('./qlever-sparql-conformance.server-log.txt'):
+ server_log = util.read_file('./qlever-sparql-conformance.server-log.txt')
+ return result, server_log
+
+ def _index(self, config: Config, graph_paths: List[Tuple[str, str]]) -> Tuple[bool, str]:
+ binary = 'IndexBuilderMain'
+ index_binary = binary if config.system != 'native' else Path(config.path_to_binaries, binary)
+ args = util.make_args(
+ config,
+ multi_input_json=self._generate_multi_input_json(graph_paths),
+ index_binary=index_binary
+ )
+ try:
+ with mute_log():
+ result = IndexCommand().execute(args=args, called_from_conformance_test=True)
+ except Exception as e:
+ error_output = str(e)
+ return False, error_output
+
+ index_log = ''
+ if os.path.exists("./qlever-sparql-conformance.index-log.txt"):
+ index_log = util.read_file("./qlever-sparql-conformance.index-log.txt")
+ return result, index_log
+
+ def _generate_multi_input_json(self, graph_paths: List[Tuple[str, str]]) -> str:
+ """Generate the JSON input for multi_input_json in IndexCommand.execute()"""
+ input_list = []
+ for graph_path, graph_name in graph_paths:
+ entry = {
+ 'cmd': f'cat {graph_path}',
+ 'graph': graph_name if graph_name else '-',
+ 'format': 'ttl'
+ }
+ input_list.append(entry)
+ return json.dumps(input_list)
+
+ def activate_syntax_test_mode(self, server_address, port):
+ url = f'http://{server_address}:{port}'
+ params = {
+ "access-token": "abc",
+ "syntax-test-mode": "true"
+ }
+ requests.get(url, params)
diff --git a/src/sparql_conformance/extract_tests.py b/src/sparql_conformance/extract_tests.py
new file mode 100644
index 000000000..ac81276f1
--- /dev/null
+++ b/src/sparql_conformance/extract_tests.py
@@ -0,0 +1,214 @@
+import os
+from rdflib import Graph, Namespace, RDF, URIRef
+from typing import Union, Dict, Any, List, Tuple, Optional, Set
+
+from .config import Config
+from .util import uri_to_path, local_name
+from .test_object import TestObject
+
+# Namespaces
+MF = Namespace("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#")
+DAWGT = Namespace("http://www.w3.org/2001/sw/DataAccess/tests/test-dawg#")
+SD = Namespace("http://www.w3.org/ns/sparql-service-description#")
+RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
+
+
+def collect_tests_by_graph(tests: List[TestObject]) -> Dict[str, Dict[Tuple[Tuple[str, str], ...], List[TestObject]]]:
+ """
+ Groups tests by their graph references and categories.
+ The resulting dictionary has the following structure:
+ {'query': { (('graph_path', 'graph_name'), ...): [Test1, Test2, ...], ...}, ...}
+ """
+ if len(tests) == 0:
+ return {}
+ type_to_category: Dict[str, str] = {
+ 'QueryEvaluationTest': 'query',
+ 'CSVResultFormatTest': 'format',
+ 'UpdateEvaluationTest': 'update',
+ 'PositiveSyntaxTest11': 'syntax',
+ 'NegativeSyntaxTest11': 'syntax',
+ 'PositiveUpdateSyntaxTest11': 'syntax',
+ 'NegativeUpdateSyntaxTest11': 'syntax',
+ 'ProtocolTest': 'protocol',
+ 'GraphStoreProtocolTest': 'graphstoreprotocol',
+ 'ServiceDescriptionTest': 'service',
+ }
+
+ graph_index: Dict[str, Dict[Tuple[Tuple[str, str], ...], List[TestObject]]] = {
+ 'query': dict(),
+ 'format': dict(),
+ 'update': dict(),
+ 'syntax': dict(),
+ 'protocol': dict(),
+ 'graphstoreprotocol': dict(),
+ 'service': dict(),
+ }
+
+ fallback_graph = (os.path.join(tests[0].config.path_to_test_suite, 'property-path', 'empty.ttl'), '-')
+
+ for test in tests:
+ if isinstance(test.action_node, dict):
+ graph_refs: List[Tuple[str, str]] = []
+
+ if "data" in test.action_node:
+ graph_refs.append((test.action_node["data"], "-"))
+ else:
+ graph_refs.append(fallback_graph)
+
+ graph_data = test.action_node.get("graphData")
+ if isinstance(graph_data, list):
+ for entry in graph_data:
+ if isinstance(entry, dict):
+ graph_file = entry.get("graph")
+ label = entry.get("label")
+ if graph_file:
+ graph_refs.append((graph_file, label))
+ else:
+ graph_refs.append((entry, entry.split('/')[-1]))
+ elif isinstance(graph_data, dict):
+ graph_file = graph_data.get("graph")
+ label = graph_data.get("label")
+ if graph_file:
+ graph_refs.append((graph_file, label))
+ elif isinstance(graph_data, str):
+ graph_refs.append((graph_data, graph_data.split('/')[-1]))
+ else:
+ graph_refs = [fallback_graph]
+
+ key = tuple(sorted(set(graph_refs)))
+ category = type_to_category.get(test.type_name)
+ if category:
+ if key in graph_index[category]:
+ graph_index[category][key].append(test)
+ else:
+ graph_index[category][key] = [test]
+
+ return graph_index
+
+
+def parse_node(graph: Graph, node: Any) -> Union[str, Dict[str, Any], None]:
+ """
+ Parse a RDF-node and convert it into an object.
+ """
+ if isinstance(node, URIRef):
+ return str(node)
+ if node is None:
+ return None
+ if node.__class__.__name__ == "Literal":
+ return str(node)
+
+ value_dict: Dict[str, Union[str, List[str]]] = {}
+ for p, o in graph.predicate_objects(node):
+ key = local_name(str(p))
+ if key == 'request':
+ key = 'query'
+ value = uri_to_path(parse_node(graph, o))
+
+ if key in value_dict:
+ if isinstance(value_dict[key], list):
+ value_dict[key].append(value)
+ else:
+ value_dict[key] = [value_dict[key], value]
+ else:
+ value_dict[key] = value
+
+ return value_dict
+
+
+def load_tests_from_manifest(
+ manifest_path: str,
+ config: Config,
+ visited: Optional[Set[str]] = None
+) -> List[TestObject]:
+ """
+ Load tests from a manifest file and all included sub-manifests.
+ """
+ if visited is None:
+ visited = set()
+
+ manifest_abs_path = os.path.abspath(manifest_path)
+ if manifest_abs_path in visited:
+ return []
+ visited.add(manifest_abs_path)
+
+ g = Graph()
+ g.parse(manifest_abs_path, format="turtle")
+ tests: List[TestObject] = []
+ sub_manifest_paths: List[str] = []
+
+ for collection in g.objects(None, MF.entries):
+ for test_uri in g.items(collection):
+ test_type = g.value(test_uri, RDF.type)
+ if not isinstance(test_type, URIRef):
+ continue
+
+ test_type = str(local_name(test_type))
+ name = g.value(test_uri, MF.name)
+ action_node = g.value(test_uri, MF.action)
+ result_node = g.value(test_uri, MF.result)
+
+ action = parse_node(g, action_node)
+ if isinstance(action, str):
+ action = {"query": action}
+ result = parse_node(g, result_node)
+ if isinstance(result, str):
+ result = {"data": result}
+
+ approval = g.value(test_uri, DAWGT.approval)
+ approved_by = g.value(test_uri, DAWGT.approvedBy)
+ comment = g.value(test_uri, RDFS.comment)
+
+ feature = [str(f) for f in g.objects(test_uri, MF.feature) if isinstance(f, URIRef)]
+ path = manifest_abs_path.split("manifest.ttl")[0]
+ entailment_regime = g.value(test_uri, SD.entailmentRegime)
+ entailment_profile = g.value(test_uri, SD.entailmentProfile)
+ group = os.path.basename(os.path.normpath(path))
+ if str(name) in config.exclude or group in config.exclude:
+ continue
+ if config.include and str(name) not in config.include and group not in config.include:
+ continue
+ tests.append(TestObject(
+ test=str(test_uri),
+ name=str(name),
+ type_name=test_type,
+ group=group,
+ path=path,
+ action_node=action,
+ result_node=result,
+ approval=str(approval) if approval else None,
+ approved_by=str(approved_by) if approved_by else None,
+ comment=str(comment) if comment else None,
+ entailment_regime=str(entailment_regime) if entailment_regime else None,
+ entailment_profile=str(entailment_profile) if entailment_profile else None,
+ feature=feature,
+ config=config,
+ ))
+
+ for include_list in g.objects(None, MF.include):
+ for sub_manifest_uri in g.items(include_list):
+ sub_manifest_path = uri_to_path(sub_manifest_uri)
+ sub_manifest_path = os.path.normpath(sub_manifest_path)
+
+ if os.path.exists(sub_manifest_path):
+ sub_manifest_paths.append(sub_manifest_path)
+ tests.extend(load_tests_from_manifest(
+ sub_manifest_path,
+ config,
+ visited=visited
+ ))
+
+ return tests
+
+
+def extract_tests(config: Config) -> Tuple[Dict[str, Dict[Tuple[Tuple[str, str], ...], List[TestObject]]], int]:
+ """
+ Extract tests from the SPARQL testsuite manifest file.
+
+ Returns:
+ Tuple:
+ - A dictionary grouped by categories
+ - Number of tests
+ """
+ path_to_manifest = os.path.join(config.path_to_test_suite, 'manifest-all.ttl')
+ tests = load_tests_from_manifest(path_to_manifest, config)
+ return collect_tests_by_graph(tests), len(tests)
\ No newline at end of file
diff --git a/src/sparql_conformance/json_tools.py b/src/sparql_conformance/json_tools.py
new file mode 100644
index 000000000..e3e994aae
--- /dev/null
+++ b/src/sparql_conformance/json_tools.py
@@ -0,0 +1,393 @@
+import json
+from typing import List, Tuple
+
+from sparql_conformance.test_object import Status, ErrorMessage
+
+
+def handle_bindings(
+ indent: int,
+ level: int,
+ bindings: list,
+ remaining_bindings: list,
+ mark_red: list) -> str:
+ """
+ Formats the "bindings" list with HTML labels as needed for highlighting.
+
+ This method iterates over a list of bindings and applies HTML labels to those
+ that match any in the reference bindings list. The method handles indentation
+ and formatting to create a readable HTML-formatted string.
+
+ Parameters:
+ indent (int): Number of spaces used for indentation.
+ level (int): Current nesting level for correct indentation.
+ bindings (list): List of binding items to format.
+ remaining_bindings (list): List of binding items used for comparison.
+ mark_red (list): List containing the elements that must be highlighted red.
+
+ Returns:
+ str: An HTML-formatted string representing the bindings list with highlighted items.
+ """
+ mark_red_copy = list(mark_red)
+ parts = ["["]
+ for i, binding in enumerate(bindings):
+ if i > 0:
+ parts.append(", ")
+ parts.append("\n" + " " * (indent * (level + 1)))
+
+ # Apply label if the binding matches any in the reference bindings
+ if binding in remaining_bindings:
+ if binding in mark_red_copy:
+ label = ')"
+
+ def replace_first_match(match):
+ return f'{match.group()}'
+
+ serialized_turtle = re.sub(
+ pattern,
+ replace_first_match,
+ serialized_turtle,
+ flags=re.DOTALL
+ )
+
+ return serialized_turtle
+
+def compare_ttl(expected_ttl: str, query_ttl: str) -> tuple:
+ status = Status.FAILED
+ error_type = ErrorMessage.RESULTS_NOT_THE_SAME
+ expected_graph = rdflib.Graph()
+ query_graph = rdflib.Graph()
+ try:
+ expected_graph.parse(data=expected_ttl, format="turtle")
+ except Exception:
+ expected_ttl = '@prefix foaf: .\n@prefix v: .\n\n' + expected_ttl
+ try:
+ expected_graph.parse(data=expected_ttl, format="turtle")
+ except Exception as e:
+ error_type = ErrorMessage.FORMAT_ERROR
+ escaped_expected = f'{escape(expected_ttl)}'
+ return Status.NOT_TESTED, error_type, escaped_expected, escape(query_ttl), f'{e}', escape(
+ query_ttl)
+
+ try:
+ query_graph.parse(data=query_ttl, format="turtle")
+ except Exception as e:
+ error_type = ErrorMessage.FORMAT_ERROR
+ escaped_query = f'{escape(query_ttl)}'
+ escaped_expected = f'{escape(expected_ttl)}'
+ return status, error_type, escape(
+ expected_ttl), escaped_query, escaped_expected, f'{e}'
+
+ is_isomorphic = expected_graph.isomorphic(query_graph)
+
+ if is_isomorphic:
+ status = Status.PASSED
+ error_type = ""
+ expected_string = escape(expected_ttl)
+ query_string = escape(query_ttl)
+ expected_string_red = ""
+ query_string_red = ""
+ else:
+ triples_in_expected_not_in_query = expected_graph - query_graph
+ triples_in_query_not_in_expected = query_graph - expected_graph
+
+ # Repair namespaces
+ copy_namespaces(expected_graph, triples_in_expected_not_in_query)
+ copy_namespaces(query_graph, triples_in_query_not_in_expected)
+ expected_string = highlight_differences(
+ expected_graph, triples_in_expected_not_in_query)
+ query_string = highlight_differences(
+ query_graph, triples_in_query_not_in_expected)
+
+ no_prefix_escaped_expected = escape(
+ remove_prefix(
+ triples_in_expected_not_in_query.serialize(
+ format="turtle")))
+ no_prefix_escaped_query = escape(
+ remove_prefix(
+ triples_in_query_not_in_expected.serialize(
+ format="turtle")))
+ expected_string_red = f'{no_prefix_escaped_expected}'
+ query_string_red = f'{no_prefix_escaped_query}'
+
+ return status, error_type, expected_string, query_string, expected_string_red, query_string_red
diff --git a/src/sparql_conformance/test_object.py b/src/sparql_conformance/test_object.py
new file mode 100644
index 000000000..7ce6db8eb
--- /dev/null
+++ b/src/sparql_conformance/test_object.py
@@ -0,0 +1,207 @@
+from enum import Enum
+from typing import Optional, List, Union, Dict, Any
+
+from sparql_conformance.config import Config
+from sparql_conformance.util import local_name, read_file, escape
+import os
+import json
+
+class Status(str, Enum):
+ PASSED = "Passed"
+ INTENDED = "Failed: Intended"
+ FAILED = "Failed"
+ NOT_TESTED = "Not tested"
+
+class ErrorMessage(str, Enum):
+ QUERY_EXCEPTION = "QUERY EXCEPTION"
+ REQUEST_ERROR = "REQUEST ERROR"
+ QUERY_ERROR = "QUERY RESULT ERROR"
+ INDEX_BUILD_ERROR = "INDEX BUILD ERROR"
+ SERVER_ERROR = "SERVER ERROR"
+ NOT_TESTED = "NOT TESTED"
+ RESULTS_NOT_THE_SAME = "RESULTS NOT THE SAME"
+ INTENDED_MSG = "Known, intended behaviour that does not comply with SPARQL standard"
+ EXPECTED_EXCEPTION = "EXPECTED: QUERY EXCEPTION ERROR"
+ FORMAT_ERROR = "QUERY RESULT FORMAT ERROR"
+ NOT_SUPPORTED = "QUERY NOT SUPPORTED"
+ CONTENT_TYPE_NOT_SUPPORTED = "CONTENT TYPE NOT SUPPORTED"
+
+ @classmethod
+ def is_query_error(cls, error: str) -> bool:
+ """Subset of query-related errors."""
+ return error in [
+ cls.QUERY_EXCEPTION,
+ cls.QUERY_ERROR,
+ cls.REQUEST_ERROR,
+ cls.NOT_SUPPORTED,
+ cls.CONTENT_TYPE_NOT_SUPPORTED,
+ ]
+
+def process_graph_data(graph_data: Union[None, str, Dict, List], target_dict: Dict[str, str]) -> None:
+ """
+ Process graph data and store results in the target dictionary.
+ Result: {'label': 'graph', ...}
+ """
+ if graph_data is None:
+ return
+
+ if isinstance(graph_data, str):
+ label = graph_data.split('/')[-1]
+ target_dict[label] = read_file(graph_data)
+ return
+
+ if not isinstance(graph_data, List):
+ graph_data = [graph_data]
+
+ for graph_entry in graph_data:
+ if isinstance(graph_entry, dict):
+ graph_path = graph_entry.get('graph')
+ if graph_path:
+ label = graph_entry.get('label', graph_path.split('/')[-1])
+ target_dict[label] = read_file(graph_path)
+ elif isinstance(graph_entry, str):
+ label = graph_entry.split('/')[-1]
+ target_dict[label] = read_file(graph_entry)
+
+
+class TestObject:
+ """Represents a single SPARQL test case with its configuration and results."""
+
+ def __init__(
+ self,
+ test: str,
+ name: str,
+ type_name: str,
+ group: str,
+ path: str,
+ action_node: Optional[Dict[str, Any]],
+ result_node: Optional[Dict[str, Any]],
+ approval: Optional[str],
+ approved_by: Optional[str],
+ comment: Optional[str],
+ entailment_regime: Optional[str],
+ entailment_profile: Optional[str],
+ feature: List[str],
+ config: Config,
+ ):
+ """
+ Initialize a test object with all its properties.
+
+ Args:
+ test: Test URI
+ name: Test name
+ type_name: Type of the test
+ group: Test group identifier
+ path: Path to test files
+ action_node: Node containing test actions
+ result_node: Node containing expected results
+ approval: Test approval status
+ approved_by: Approver identifier
+ comment: Test description/comment
+ entailment_regime: SPARQL entailment regime
+ entailment_profile: Entailment profile
+ feature: List of test features
+ config: Test configuration
+ """
+ self.test = test
+ self.name = name
+ self.type_name = type_name
+ self.group = group
+ self.path = path
+ self.action_node = action_node
+ self.result_node = result_node
+ self.approval = approval
+ self.approved_by = approved_by
+ self.comment = comment
+ self.entailment_regime = entailment_regime
+ self.entailment_profile = entailment_profile
+ self.feature = feature
+ self.config = config
+
+ self.status = Status.NOT_TESTED
+ self.index_files: Dict[str, str] = {}
+ self.result_files: Dict[str, str] = {}
+
+ # Process action node
+ if isinstance(action_node, dict):
+ self.query = local_name(action_node.get('query', 'no query'))
+ self.graph = local_name(action_node.get('data', 'no query'))
+ self.query_file = read_file(os.path.join(self.path, self.query))
+ self.graph_file = read_file(os.path.join(self.path, self.graph))
+ process_graph_data(action_node.get('graphData'), self.index_files)
+ else:
+ self.query = self.graph = self.query_file = self.graph_file = ''
+
+ # Process result node
+ if isinstance(result_node, dict):
+ self.result = local_name(result_node.get('data', 'no query'))
+ self.result_format = self.result[self.result.rfind('.') + 1:]
+ self.result_file = read_file(os.path.join(self.path, self.result))
+ process_graph_data(result_node.get('graphData'), self.result_files)
+ else:
+ self.result = self.result_file = ''
+
+ # Initialize test execution results
+ self.error_type = ''
+ self.expected_html = ''
+ self.got_html = ''
+ self.expected_html_red = ''
+ self.got_html_red = ''
+ self.index_log = ''
+ self.server_log = ''
+ self.server_status = ''
+ self.query_result = ''
+ self.query_answer = ''
+ self.query_log = ''
+ self.query_sent = ''
+ self.protocol = ''
+ self.protocol_sent = ''
+ self.response_extracted = ''
+ self.response = ''
+
+ def __repr__(self) -> str:
+ """Return string representation of the test object."""
+ return f''
+
+ def to_dict(self) -> Dict[str, str]:
+ """Convert test object to dictionary format for serialization."""
+ self.graph_file = 'default:
' + escape(self.graph_file) + '
'
+ for name, graph in self.index_files.items():
+ self.graph_file += f' {name}:
{escape(graph)}
'
+
+ return {
+ 'test': escape(self.test),
+ 'typeName': escape(self.type_name),
+ 'name': escape(self.name),
+ 'group': escape(self.group),
+ 'feature': escape(';'.join(self.feature)),
+ 'comment': escape(self.comment),
+ 'approval': escape(self.approval),
+ 'approvedBy': escape(self.approved_by),
+ 'query': escape(self.query),
+ 'graph': escape(self.graph),
+ 'queryFile': escape(self.query_file),
+ 'graphFile': self.graph_file,
+ 'resultFile': escape(self.result_file),
+ 'status': escape(self.status),
+ 'errorType': escape(self.error_type),
+ 'expectedHtml': self.expected_html,
+ 'gotHtml': self.got_html,
+ 'expectedHtmlRed': self.expected_html_red,
+ 'gotHtmlRed': self.got_html_red,
+ 'indexLog': escape(self.index_log),
+ 'serverLog': escape(self.server_log),
+ 'serverStatus': escape(self.server_status),
+ 'queryResult': escape(self.query_result),
+ 'queryAnswer': escape(self.query_answer),
+ 'queryLog': escape(self.query_log),
+ 'querySent': escape(self.query_sent),
+ 'regime': escape(self.entailment_regime),
+ 'protocol': escape(self.protocol),
+ 'protocolSent': escape(self.protocol_sent),
+ 'responseExtracted': escape(self.response_extracted),
+ 'response': escape(self.response),
+ 'config': escape(json.dumps(self.config.to_dict(), indent=4)),
+ 'indexFiles': escape(json.dumps(self.index_files, indent=4)),
+ 'resultFiles': escape(json.dumps(self.result_files, indent=4))
+ }
\ No newline at end of file
diff --git a/src/sparql_conformance/testsuite.py b/src/sparql_conformance/testsuite.py
new file mode 100644
index 000000000..e751309cb
--- /dev/null
+++ b/src/sparql_conformance/testsuite.py
@@ -0,0 +1,490 @@
+import bz2
+import json
+import os
+from typing import List, Dict, Tuple
+
+import sparql_conformance.util as util
+from qlever.log import log
+from sparql_conformance.config import Config
+from sparql_conformance.engines.engine_manager import EngineManager
+from sparql_conformance.engines.qlever import QLeverManager
+from sparql_conformance.json_tools import compare_json
+from sparql_conformance.protocol_tools import run_protocol_test
+from sparql_conformance.rdf_tools import compare_ttl
+from sparql_conformance.test_object import TestObject, Status, ErrorMessage
+from sparql_conformance.tsv_csv_tools import compare_sv
+from sparql_conformance.xml_tools import compare_xml
+
+
+class TestSuite:
+ """
+ A class to represent a test suite for SPARQL using QLever.
+ """
+
+ def __init__(self, name: str, tests: Dict[str, Dict[Tuple[Tuple[str, str], ...], List[TestObject]]], test_count, config: Config, engine_manager: EngineManager):
+ """
+ Constructs all the necessary attributes for the TestSuite object.
+
+ Parameters:
+ name (str): Name of the current run.
+ """
+ self.name = name
+ self.config = config
+ self.tests = tests
+ self.test_count = test_count
+ self.passed = 0
+ self.failed = 0
+ self.passed_failed = 0
+ self.engine_manager = engine_manager
+
+ def evaluate_query(
+ self,
+ expected_string: str,
+ query_result: str,
+ test: TestObject,
+ result_format: str):
+ """
+ Evaluates a query result based on the expected output and the format.
+ """
+ status = Status.FAILED
+ error_type = ErrorMessage.RESULTS_NOT_THE_SAME
+ if result_format == "srx":
+ status, error_type, expected_html, test_html, expected_red, test_red = compare_xml(
+ expected_string, query_result, self.config.alias, self.config.number_types)
+ elif result_format == "srj":
+ status, error_type, expected_html, test_html, expected_red, test_red = compare_json(
+ expected_string, query_result, self.config.alias, self.config.number_types)
+ elif result_format == "csv" or result_format == "tsv":
+ status, error_type, expected_html, test_html, expected_red, test_red = compare_sv(
+ expected_string, query_result, result_format, self.config.alias)
+ elif result_format == "ttl":
+ status, error_type, expected_html, test_html, expected_red, test_red = compare_ttl(
+ expected_string, query_result)
+ else:
+ expected_html = ""
+ test_html = ""
+ expected_red = ""
+ test_red = ""
+
+ self.update_test_status(test, status, error_type)
+ setattr(test, "got_html", test_html)
+ setattr(test, "expected_html", expected_html)
+ setattr(test, "got_html_red", test_red)
+ setattr(test, "expected_html_red", expected_red)
+
+ def evaluate_update(
+ self,
+ expected_graphs,
+ graphs,
+ test: TestObject):
+ """
+ Evaluates the graphs after running the update.
+
+ Parameters:
+ test (TestObject): Object containing the test being run.
+ expected_graphs ([str]]): The expected state of each graph.
+ graphs ([str]): The actual state of our graphs.
+ """
+ status = [Status.FAILED for _ in range(len(expected_graphs))]
+ error_type = [ErrorMessage.RESULTS_NOT_THE_SAME for _ in range(len(expected_graphs))]
+ expected_html = ["" for _ in range(len(expected_graphs))]
+ test_html = ["" for _ in range(len(expected_graphs))]
+ expected_red = ["" for _ in range(len(expected_graphs))]
+ test_red = ["" for _ in range(len(expected_graphs))]
+ assert(len(expected_graphs) == len(graphs))
+ for i in range(len(expected_graphs)):
+ status[i], error_type[i], expected_html[i], test_html[i], expected_red[i], test_red[i] = compare_ttl(
+ expected_graphs[i], graphs[i])
+
+ for s, e in zip(status, error_type):
+ if s != Status.PASSED:
+ status[0] = s
+ error_type[0] = e
+ break
+
+ self.update_test_status(test, status[0], error_type[0])
+ t_html = f"default: {test_html[0]}"
+ e_html = f"default: {expected_html[0]}"
+ t_red = f"default: {test_red[0]}"
+ e_red = f"default: {expected_red[0]}"
+ i = 1
+ for key, value in test.result_files.items():
+ t_html += f"
{key}: {test_html[i]}"
+ e_html += f"
{key}: {expected_html[i]}"
+ t_red += f"
{key}: {test_red[i]}"
+ e_red += f"
{key}: {expected_red[i]}"
+ i += 1
+
+ setattr(test, "got_html", t_html)
+ setattr(test, "expected_html", e_html)
+ setattr(test, "got_html_red", t_red)
+ setattr(test, "expected_html_red", e_red)
+
+ def log_for_all_tests(self, list_of_tests: list, attribute: str, log_message: str):
+ """
+ Logs information for all tests of a given graph.
+ """
+ for test in list_of_tests:
+ setattr(test, attribute, log_message)
+
+ def update_test_status(
+ self,
+ test: TestObject,
+ status: str,
+ error_type: str):
+ """
+ Updates the status of a test in the test data.
+ """
+ self.log_for_all_tests([test], "status", status)
+ self.log_for_all_tests([test], "error_type", error_type)
+
+ def update_graph_status(
+ self,
+ list_of_tests: list,
+ status: str,
+ error_type: str):
+ """
+ Updates the status for all test of a graph.
+ """
+ for test in list_of_tests:
+ self.update_test_status(test, status, error_type)
+
+ def prepare_test_environment(
+ self,
+ graph_paths: Tuple[Tuple[str, str], ...],
+ list_of_tests: List[TestObject]) -> bool:
+ """
+ Prepares the test environment for a given graph.
+
+ Args:
+ graph_paths: ex. default graph + named graph (('graph_path', '-'), ('graph_path2', 'graph_name2'))
+ list_of_tests: [Test1, Test2, ...]
+
+ Returns:
+ True if the environment is successfully prepared, False otherwise.
+ """
+ self.engine_manager.cleanup(self.config)
+ index_success, server_success, index_log, server_log = self.engine_manager.setup(self.config, graph_paths)
+ if not index_success:
+ self.engine_manager.cleanup(self.config)
+ self.update_graph_status(list_of_tests, Status.FAILED, ErrorMessage.INDEX_BUILD_ERROR)
+ if not server_success:
+ self.engine_manager.cleanup(self.config)
+ self.update_graph_status(list_of_tests, Status.FAILED, ErrorMessage.SERVER_ERROR)
+ if isinstance(self.engine_manager, QLeverManager) and index_success and server_success and "Syntax" in list_of_tests[0].type_name:
+ self.engine_manager.activate_syntax_test_mode(self.config.server_address, self.config.port)
+ self.log_for_all_tests(list_of_tests, "index_log", index_log)
+ self.log_for_all_tests(list_of_tests, "server_log", server_log)
+ return index_success and server_success
+
+ def process_failed_response(self, test, query_response: tuple):
+ if "exception" in query_response[1]:
+ query_log = json.loads(
+ query_response[1])["exception"].replace(
+ ";", ";\n")
+ error_type = ErrorMessage.QUERY_EXCEPTION
+ elif "HTTP Request" in query_response[1]:
+ error_type = ErrorMessage.REQUEST_ERROR
+ query_log = query_response[1]
+ elif "not supported" in query_response[1]:
+ error_type = ErrorMessage.NOT_SUPPORTED
+ if "content type" in query_response[1]:
+ error_type = ErrorMessage.CONTENT_TYPE_NOT_SUPPORTED
+ query_log = query_response[1]
+ else:
+ error_type = ErrorMessage.QUERY_ERROR
+ query_log = query_response[1]
+ setattr(test, "query_log", query_log)
+ self.update_test_status(test, Status.FAILED, error_type)
+
+ def run_query_tests(self, graphs_list_of_tests):
+ """
+ Executes query tests for each graph in the test suite.
+ """
+ for graph in graphs_list_of_tests:
+ log.info(f"Running query tests for graph / graphs: {graph}")
+ if not self.prepare_test_environment(
+ graph, graphs_list_of_tests[graph]):
+ continue
+
+ for test in graphs_list_of_tests[graph]:
+ log.info(f"Running: {test.name}")
+ query_result = self.engine_manager.query(
+ self.config,
+ test.query_file,
+ test.result_format)
+ if query_result[0] == 200:
+ self.evaluate_query(
+ test.result_file, query_result[1], test, test.result_format)
+ else:
+ self.process_failed_response(test, query_result)
+
+ if os.path.exists("./TestSuite.server-log.txt"):
+ server_log = util.read_file("./TestSuite.server-log.txt")
+ self.log_for_all_tests(
+ graphs_list_of_tests[graph],
+ "server_log",
+ util.remove_date_time_parts(server_log))
+ self.engine_manager.cleanup(self.config)
+
+ def run_update_tests(self, graphs_list_of_tests):
+ """
+ Executes update tests for each graph in the test suite.
+ """
+ for graph in graphs_list_of_tests:
+ log.info(f"Running update tests for graph / graphs: {graph}")
+ for test in graphs_list_of_tests[graph]:
+ log.info(f"Running: {test.name}")
+ if not self.prepare_test_environment(
+ graph, graphs_list_of_tests[graph]):
+ # If the environment is not prepared, skip all tests for this graph.
+ break
+ # Execute the update query.
+ query_update_result = self.engine_manager.update(self.config, test.query_file)
+
+ # If the update query was successful, retrieve the current state of all graphs
+ # and check if the results match the expected results.
+ if query_update_result[0] == 200:
+ actual_state_of_graphs = []
+ expected_state_of_graphs = []
+ # Handle default graph that has no uri
+ construct_graph = self.engine_manager.query(
+ self.config,
+ "CONSTRUCT {?s ?p ?o} WHERE { GRAPH ql:default-graph {?s ?p ?o}}",
+ "ttl")
+ actual_state_of_graphs.append(construct_graph[1])
+ expected_state_of_graphs.append(test.result_file)
+
+ # Handle named graphs.
+ if test.result_files:
+ for graph_label, expected_graph in test.result_files.items():
+ construct_graph = self.engine_manager.query(
+ self.config,
+ f"CONSTRUCT {{?s ?p ?o}} WHERE {{ GRAPH <{graph_label}> {{?s ?p ?o}}}}",
+ "ttl")
+ actual_state_of_graphs.append(construct_graph[1])
+ expected_state_of_graphs.append(expected_graph)
+
+ # Evaluate state of graphs.
+ self.evaluate_update(expected_state_of_graphs, actual_state_of_graphs, test)
+ else:
+ self.process_failed_response(test, query_update_result)
+
+ if os.path.exists("./TestSuite.server-log.txt"):
+ server_log = util.read_file("./TestSuite.server-log.txt")
+ self.log_for_all_tests(
+ graphs_list_of_tests[graph],
+ "server_log",
+ util.remove_date_time_parts(server_log))
+ self.engine_manager.cleanup(self.config)
+
+ def run_syntax_tests(self, graphs_list_of_tests: Dict[Tuple[Tuple[str, str], ...], List[TestObject]]):
+ """
+ Executes query tests for each graph in the test suite.
+ """
+ for graph_path in graphs_list_of_tests:
+ log.info(f"Running syntax tests for graph: {graph_path}")
+ if not self.prepare_test_environment(
+ graph_path, graphs_list_of_tests[graph_path]):
+ continue
+
+ for test in graphs_list_of_tests[graph_path]:
+ log.info(f"Running: {test.name}")
+ result_format = "srx"
+ if "construct" in test.name:
+ result_format = "ttl"
+ if "Update" in test.type_name:
+ query_result = self.engine_manager.update(
+ self.config,
+ test.query_file)
+ else:
+ query_result = self.engine_manager.query(
+ self.config,
+ test.query_file,
+ result_format)
+
+ if query_result[0] != 200:
+ self.process_failed_response(test, query_result)
+ else:
+ setattr(test, "query_log", query_result[1])
+ self.update_test_status(test, Status.PASSED, "")
+ if test.type_name == "NegativeSyntaxTest11" or test.type_name == "NegativeUpdateSyntaxTest11":
+ if ErrorMessage.is_query_error(test.error_type):
+ status = Status.PASSED
+ error_type = ""
+ else:
+ status = Status.FAILED
+ error_type = ErrorMessage.EXPECTED_EXCEPTION
+ self.update_test_status(test, status, error_type)
+
+ if os.path.exists("./TestSuite.server-log.txt"):
+ server_log = util.read_file("./TestSuite.server-log.txt")
+ self.log_for_all_tests(
+ graphs_list_of_tests[graph_path],
+ "server_log",
+ util.remove_date_time_parts(server_log))
+ self.engine_manager.cleanup(self.config)
+
+ def run_protocol_tests(self, graphs_list_of_tests: Dict[Tuple[Tuple[str, str], ...], List[TestObject]]):
+ """
+ Executes protocol tests for each graph in the test suite.
+ """
+ for graph_path in graphs_list_of_tests:
+ log.info(f"Running protocol tests for graph: {graph_path}")
+ # Work around for issue #25, missing data for protocol tests
+ path_to_data = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
+ graph_paths = graph_path
+ for i in range(4):
+ path_to_graph = os.path.join(path_to_data, f"data{i}.rdf")
+ name_of_graph = f"http://kasei.us/2009/09/sparql/data/data{i}.rdf"
+ new_path: Tuple[str, str] = (path_to_graph, name_of_graph)
+ graph_paths = graph_paths + (new_path,)
+ for test in graphs_list_of_tests[graph_path]:
+ log.info(f"Running: {test.name}")
+ if not self.prepare_test_environment(
+ graph_paths, graphs_list_of_tests[graph_path]):
+ break
+ if test.comment:
+ status, error_type, extracted_expected_responses, extracted_sent_requests, got_responses, newpath = run_protocol_test(
+ self.engine_manager, test, test.comment, '')
+
+ if os.path.exists("./TestSuite.server-log.txt"):
+ server_log = util.read_file(
+ "./TestSuite.server-log.txt")
+ self.log_for_all_tests(
+ graphs_list_of_tests[graph_path],
+ "server_log",
+ util.remove_date_time_parts(server_log))
+ self.engine_manager.cleanup(self.config)
+ self.update_test_status(test, status, error_type)
+ else:
+ extracted_sent_requests = ''
+ extracted_expected_responses = ''
+ got_responses = ''
+ setattr(test, "protocol", test.comment)
+ setattr(test, "protocol_sent", extracted_sent_requests)
+ setattr(
+ test,
+ "response_extracted",
+ extracted_expected_responses)
+ setattr(test, "response", got_responses)
+
+ def run_graphstore_protocol_tests(self, graphs_list_of_tests: Dict[Tuple[Tuple[str, str], ...], List[TestObject]]):
+ """
+ Executes graphstore protocol tests for each graph in the test suite.
+ """
+ for graph_path in graphs_list_of_tests:
+ log.info(f'Running graphstore protocol tests for graph: {graph_path}')
+ if not self.prepare_test_environment(
+ graph_path, graphs_list_of_tests[graph_path]):
+ break
+ newpath = '/newpath-not-set'
+ for test in graphs_list_of_tests[graph_path]:
+ log.info(f"Running: {test.name}")
+ if test.comment:
+ status, error_type, extracted_expected_responses, extracted_sent_requests, got_responses, new_newpath = run_protocol_test(
+ self.engine_manager, test, test.comment, newpath)
+ if new_newpath != '':
+ newpath = new_newpath
+ self.update_test_status(test, status, error_type)
+ else:
+ extracted_sent_requests = ''
+ extracted_expected_responses = ''
+ got_responses = ''
+ setattr(test, 'protocol', test.comment)
+ setattr(test, 'protocol_sent', extracted_sent_requests)
+ setattr(
+ test,
+ 'response_extracted',
+ extracted_expected_responses)
+ setattr(test, 'response', got_responses)
+ if os.path.exists('./TestSuite.server-log.txt'):
+ server_log = util.read_file(
+ './TestSuite.server-log.txt')
+ self.log_for_all_tests(
+ graphs_list_of_tests[graph_path],
+ 'server_log',
+ util.remove_date_time_parts(server_log))
+ self.engine_manager.cleanup(self.config)
+
+ def analyze(self):
+ """
+ Method to index and start the server for a specific test.
+ """
+ graphs_list_of_tests = {k: v for d in self.tests.values() for k, v in d.items()}
+ for graph_path in graphs_list_of_tests:
+ log.info(f"Running server for graph: {graph_path}")
+ if not self.prepare_test_environment(
+ graph_path, graphs_list_of_tests[graph_path]):
+ break
+ print(f"Listening on: {self.config.server_address}:{self.config.port} ...")
+ print("\n" * 3)
+ input("Press Enter to shutdown the server and continue...")
+ self.engine_manager.cleanup(self.config)
+
+ def run(self):
+ """
+ Main method to run all tests.
+ """
+ try:
+ self.run_query_tests(self.tests["query"])
+ self.run_query_tests(self.tests["format"])
+ self.run_update_tests(self.tests["update"])
+ self.run_syntax_tests(self.tests["syntax"])
+ self.run_protocol_tests(self.tests["protocol"])
+ self.run_graphstore_protocol_tests(self.tests["graphstoreprotocol"])
+ except KeyboardInterrupt:
+ log.warning("Interrupted by user.")
+ self.engine_manager.cleanup(self.config)
+
+ def compress_json_bz2(self, input_data, output_filename):
+ with bz2.open(output_filename, "wt") as zipfile:
+ json.dump(input_data, zipfile, indent=4)
+ log.info("Done writing result file: " + output_filename)
+
+ def generate_json_file(self):
+ """
+ Generates a JSON file with the test results.
+ """
+ os.makedirs("./results", exist_ok=True)
+ file_path = f"./results/{self.name}.json.bz2"
+ data = {}
+
+ for test_format in self.tests:
+ for graph in self.tests[test_format]:
+ for test in self.tests[test_format][graph]:
+ match test.status:
+ case Status.PASSED:
+ self.passed += 1
+ case Status.FAILED:
+ self.failed += 1
+ case Status.INTENDED:
+ self.passed_failed += 1
+ # This will add a number behind the name if the name is not
+ # unique
+ if test.name in data:
+ i = 1
+ while True:
+ i += 1
+ new_name = f"{test.name} {i}"
+ if new_name in data:
+ continue
+ else:
+ test.name = new_name
+ data[new_name] = test.to_dict()
+ break
+ else:
+ data[test.name] = test.to_dict()
+ data["info"] = {
+ "name": "info",
+ "passed": self.passed,
+ "tests": self.test_count,
+ "failed": self.failed,
+ "passedFailed": self.passed_failed,
+ "notTested": (
+ self.test_count -
+ self.passed -
+ self.failed -
+ self.passed_failed)}
+ log.info("Writing file...")
+ self.compress_json_bz2(data, file_path)
diff --git a/src/sparql_conformance/tsv_csv_tools.py b/src/sparql_conformance/tsv_csv_tools.py
new file mode 100644
index 000000000..20d45e420
--- /dev/null
+++ b/src/sparql_conformance/tsv_csv_tools.py
@@ -0,0 +1,337 @@
+from typing import List, Tuple
+
+from sparql_conformance.util import escape, is_number
+from io import StringIO
+import csv
+from sparql_conformance.test_object import Status, ErrorMessage
+
+def _build_column_mapping(expected_header: list, actual_header: list):
+ """
+ Return a list L which aligns actual[row][L[i]] with expected[row][i].
+ Example: actual: s p o expected: o p s -> L[0] = 2, L[1] = 1, L[2] = 0
+ If no perfect mapping exists, return None.
+ """
+ if len(expected_header) != len(actual_header):
+ return None
+
+ wanted = expected_header
+ have = actual_header
+
+ used = set()
+ mapping = []
+ for name in wanted:
+ idx = None
+ for j, col in enumerate(have):
+ if j in used:
+ continue
+ if col.strip() == name.strip():
+ idx = j
+ break
+ if idx is None:
+ return None
+ used.add(idx)
+ mapping.append(idx)
+ return mapping
+
+
+def _reorder_columns_to_expected(expected_array: list, actual_array: list):
+ """
+ If the first rows (headers) of expected/actual are a permutation of each other,
+ reorder every row of the actual array to match the expected header order.
+ Otherwise, just return actual_array.
+ """
+ if not expected_array or not actual_array:
+ return actual_array
+
+ expected_header = expected_array[0]
+ actual_header = actual_array[0]
+
+ if sorted(expected_header) != sorted(actual_header):
+ return actual_array
+
+ mapping = _build_column_mapping(expected_header, actual_header)
+ if mapping is None:
+ return actual_array
+
+ def reorder_row(row):
+ return [row[i] if i < len(row) else "" for i in mapping]
+
+ return [reorder_row(r) for r in actual_array]
+
+
+def write_csv_file(file_path: str, csv_rows: list):
+ with open(file_path, "w", newline="") as csvfile:
+ csv_writer = csv.writer(csvfile)
+ csv_writer.writerows(csv_rows)
+
+
+def row_to_string(row: list, separator: str) -> str:
+ """
+ Converts a row (list of values) to a string representation separated by a specified delimiter.
+
+ Parameters:
+ row (list): The row to be converted to a string.
+ separator (str): The separator used to separate the values in the row "," or "\t"
+
+ Returns:
+ str: A string representation of the row.
+ """
+ result = ""
+ index = 0
+ row_length = len(row) - 1
+ for element in row:
+ if index == row_length:
+ delimiter = ""
+ else:
+ delimiter = separator
+ element = str(element)
+ if separator in element:
+ element = "\"" + element + "\""
+ result += element + delimiter
+ index += 1
+ return result
+
+
+def generate_highlighted_string_sv(
+ array: list,
+ remaining: list,
+ mark_red: list,
+ result_type: str) -> str:
+ """
+ Generates a string representation of an array, with specific rows highlighted.
+
+ Parameters:
+ array (list): The array to be converted to a string.
+ mark_red (list): The rows to be highlighted in red.
+ remaining (list): The rows to be highlighted.
+ result_type (str): The type of result (csv or tsv) to determine the separator.
+
+ Returns:
+ str: A string representation of the array with highlighted rows.
+ """
+ separator = "," if result_type == "csv" else "\t"
+
+ result_string = ""
+ for row in array:
+ if row in remaining:
+ if row in mark_red:
+ result_string += ''
+ else:
+ result_string += ''
+ result_string += escape(row_to_string(row, separator))
+ result_string += '\n'
+ else:
+ result_string += escape(row_to_string(row, separator)) + "\n"
+ return result_string
+
+
+def compare_values(
+ value1: str,
+ value2: str,
+ use_config: bool,
+ alias: List[Tuple[str, str]],
+ map_bnodes: dict) -> bool:
+ """
+ Compares two values for equality accounting for numeric differences and aliases.
+
+ Parameters:
+ value1 (str): The first value to compare.
+ value2 (str): The second value to compare.
+ use_config (bool): Flag to use configuration for additional comparison logic.
+ alias (dict): Dictionary with aliases for datatypes ex. int = integer .
+ map_bnodes (dict): Dictionary mapping the used bnodes.
+
+ Returns:
+ bool: True if the values are considered equal.
+ """
+ if value1 is None or value2 is None:
+ return False
+ # Blank nodes
+ if len(value1) > 1 and len(
+ value2) > 1 and value1[0] == "_" and value2[0] == "_":
+ if value1 not in map_bnodes and value2 not in map_bnodes:
+ map_bnodes[value1] = value2
+ map_bnodes[value2] = value1
+ return True
+ if map_bnodes.get(value1) == value2 and map_bnodes.get(
+ value2) == value1:
+ return True
+ return False
+ # In most cases the values are in the same representation
+ if value1 == value2:
+ return True
+ # Handle exceptions ex. 30000 == 3E4
+ if is_number(value1) and is_number(value2):
+ if float(value1) == float(value2):
+ return True
+ else: # Handle exceptions integer = int
+ if use_config and ((value1, value2) in alias or (value2, value1) in alias):
+ return True
+ return False
+
+
+def compare_rows(
+ row1: list,
+ row2: list,
+ use_config: bool,
+ alias: List[Tuple[str, str]],
+ map_bnodes: dict) -> bool:
+ """
+ Compares two rows for equality.
+
+ Parameters:
+ row1 (list): The first row to compare.
+ row2 (list): The second row to compare.
+ use_config (bool): Flag to use configuration for additional comparison logic.
+ alias (List[Tuple[str, str]]): Dictionary with aliases for datatypes ex. int = integer .
+ map_bnodes (dict): Dictionary mapping the used bnodes.
+
+ Returns:
+ bool: True if the rows are considered equal otherwise False
+ """
+ if len(row1) != len(row2):
+ return False
+
+ for element1, element2 in zip(row1, row2):
+ if not compare_values(
+ element1.split("^")[0],
+ element2.split("^")[0],
+ use_config,
+ alias,
+ map_bnodes):
+ return False
+ return True
+
+
+def compare_array(
+ expected_result: list,
+ result: list,
+ result_copy: list,
+ expected_result_copy: list,
+ use_config: bool,
+ alias: List[Tuple[str, str]],
+ map_bnodes: dict):
+ """
+ Compares two arrays and removes equal rows from both arrays.
+
+ Parameters:
+ expected_result (list): The expected result array.
+ result (list): The actual result array.
+ result_copy (list): A copy of the actual result array for modification.
+ expected_result_copy (list): A copy of the expected result array for modification.
+ use_config (bool): Flag to use configuration for additional comparison logic.
+ alias (List[Tuple[str, str]]): Dictionary with aliases for datatypes ex. int = integer .
+ map_bnodes (dict): Dictionary mapping the used bnodes.
+ """
+ for row1 in result:
+ equal = False
+ row2_delete = None
+ for row2 in expected_result:
+ if compare_rows(row1, row2, use_config, alias, map_bnodes):
+ equal = True
+ row2_delete = row2
+ break
+ if equal:
+ result_copy.remove(row1)
+ expected_result_copy.remove(row2_delete)
+
+
+def convert_csv_tsv_to_array(input_string: str, input_type: str):
+ """
+ Converts a CSV/TSV string to an array of rows.
+
+ Parameters:
+ input_string (str): The CSV/TSV formatted string.
+ input_type (str): The type of the input ('csv' or 'tsv').
+
+ Returns:
+ An array representation of the input string.
+ """
+ rows = []
+ delimiter = "," if input_type == "csv" else "\t"
+ with StringIO(input_string) as io:
+ reader = csv.reader(io, delimiter=delimiter)
+ for row in reader:
+ # Drop empty rows
+ if not row or not any(cell.strip() for cell in row):
+ continue
+ rows.append(row)
+ return rows
+
+
+def compare_sv(
+ expected_string: str,
+ query_result: str,
+ result_format: str,
+ alias: List[Tuple[str, str]]):
+ """
+ Compares CSV/TSV formatted query result with the expected output.
+
+ Parameters:
+ expected_string (str): Expected CSV/TSV formatted string.
+ query_result (str): Actual CSV/TSV formatted string from the query.
+ result_format (str): Format of the output ('csv' or 'tsv').
+ alias (List[Tuple[str, str]]): Dictionary with aliases for datatypes ex. int = integer .
+
+ Returns:
+ tuple(int, str, str, str, str, str): A tuple of test status and error message and expected html, query html, expected red, query red
+ """
+ map_bnodes = {}
+ status = Status.FAILED
+ error_type = ErrorMessage.RESULTS_NOT_THE_SAME
+
+ expected_array = convert_csv_tsv_to_array(expected_string, result_format)
+ actual_array = convert_csv_tsv_to_array(query_result, result_format)
+
+ # NEW: normalize actual column order to match expected header
+ actual_array = _reorder_columns_to_expected(expected_array, actual_array)
+
+ actual_array_copy = actual_array.copy()
+ expected_array_copy = expected_array.copy()
+ actual_array_mark_red = []
+ expected_array_mark_red = []
+
+ compare_array(
+ expected_array,
+ actual_array,
+ actual_array_copy,
+ expected_array_copy,
+ False,
+ alias,
+ map_bnodes)
+
+ if len(actual_array_copy) == 0 and len(expected_array_copy) == 0:
+ status = Status.PASSED
+ error_type = ""
+ else:
+ actual_array_mark_red = actual_array_copy.copy()
+ expected_array_mark_red = expected_array_copy.copy()
+ compare_array(
+ expected_array_copy,
+ actual_array_copy,
+ actual_array_mark_red,
+ expected_array_mark_red,
+ True,
+ alias,
+ map_bnodes)
+ if len(actual_array_mark_red) == 0 and len(
+ expected_array_mark_red) == 0:
+ status = Status.INTENDED
+ error_type = ErrorMessage.INTENDED_MSG
+
+ expected_html = generate_highlighted_string_sv(
+ expected_array,
+ expected_array_copy,
+ expected_array_mark_red,
+ result_format)
+ actual_html = generate_highlighted_string_sv(
+ actual_array, actual_array_copy, actual_array_mark_red, result_format)
+ expected_html_red = generate_highlighted_string_sv(
+ expected_array_copy,
+ expected_array_copy,
+ expected_array_mark_red,
+ result_format)
+ actual_html_red = generate_highlighted_string_sv(
+ actual_array_copy, actual_array_copy, actual_array_mark_red, result_format)
+
+ return status, error_type, expected_html, actual_html, expected_html_red, actual_html_red
diff --git a/src/sparql_conformance/util.py b/src/sparql_conformance/util.py
new file mode 100644
index 000000000..cd499eee6
--- /dev/null
+++ b/src/sparql_conformance/util.py
@@ -0,0 +1,175 @@
+import re
+import os
+import shutil
+from argparse import Namespace
+from pathlib import Path
+from typing import Optional
+from urllib.parse import urlparse, unquote
+
+from qlever.log import log
+from sparql_conformance.config import Config
+
+
+def make_args(config: Config, **overrides):
+ base = dict(
+ # GENERAL used by more than one.
+ name="qlever-sparql-conformance",
+ host_name=config.server_address,
+ port=config.port,
+ system=config.system,
+ image=config.image,
+ server_container="qlever-sparql-conformance-server-container",
+ access_token="abc",
+ only_pso_and_pos_permutations=False,
+ use_patterns=True,
+ # STOP SERVER.
+ no_containers=config.system == "native",
+ show=False,
+ cmdline_regex="ServerMain.* -i [^ ]*%%NAME%%",
+ # QUERY.
+ sparql_endpoint=None,
+ pin_to_cache=False,
+ no_time=True,
+ predefined_query=None,
+ log_level="ERROR",
+ # START SERVER.
+ description="",
+ text_description="",
+ memory_for_queries="4GB",
+ cache_max_size="1GB",
+ cache_max_size_single_entry="100MB",
+ cache_max_num_entries=1000000,
+ num_threads=1,
+ timeout=None,
+ persist_updates=False,
+ use_text_index="no",
+ warmup_cmd=None,
+ kill_existing_with_same_port=False,
+ no_warmup=True,
+ run_in_foreground=False,
+ # INDEX.
+ index_container = "qlever-sparql-conformance-index-container",
+ cat_input_files=None,
+ input_files='*.ttl',
+ format='ttl',
+ settings_json='{ "num-triples-per-batch": 1000000 }',
+ parallel_parsing=False,
+ text_index=None,
+ stxxl_memory=None,
+ parser_buffer_size=None,
+ ulimit=None,
+ overwrite_existing=True,
+ vocabulary_type='on-disk-compressed',
+ encode_as_id=None,
+ )
+ return Namespace(**{**base, **overrides})
+
+def local_name(uri: str) -> str:
+ """Extract the local name from a URI (after # or /)."""
+ if "#" in uri:
+ return uri.split("#")[-1]
+ return uri.split("/")[-1]
+
+
+def uri_to_path(uri):
+ parsed = urlparse(str(uri))
+ if parsed.scheme != 'file':
+ return uri
+ return unquote(parsed.path)
+
+
+def path_exists(path):
+ if not os.path.exists(path):
+ log.error(f"{path} does not exist!")
+ return False
+ return True
+
+
+def is_number(s):
+ try:
+ float(s)
+ return True
+ except ValueError:
+ return False
+
+
+def escape(string: Optional[str]) -> str:
+ """
+ Takes any string and returns the escaped version to use in html.
+ """
+ if string is None:
+ return ''
+ return string.replace(
+ "&",
+ "&").replace(
+ "<",
+ "<").replace(
+ ">",
+ ">").replace(
+ '\"',
+ """).replace(
+ "'",
+ "'")
+
+
+def read_file(file_path: str) -> str:
+ """
+ Reads and returns the content of a file.
+
+ If file does not exist return empty string.
+
+ Parameters:
+ file_path (str): The path to the file to be read.
+
+ Returns:
+ str: The content of the file.
+ """
+ try:
+ data = open(file_path, "r", encoding="utf-8").read()
+ except BaseException:
+ data = ""
+ return data
+
+
+def remove_date_time_parts(index_log: str) -> str:
+ """
+ Remove date and time from index log.
+ ex. 2023-12-20 14:02:33.089 - INFO: You specified the input format: TTL
+ to: INFO: You specified the input format: TTL
+
+ Parameters:
+ index_log (str): The index log.
+
+ Returns:
+ The index log without time and date as a string.
+ """
+ pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}\s*-"
+ return re.sub(pattern, "", index_log)
+
+
+def copy_graph_to_workdir(file_path: str, workdir: str) -> str:
+ """
+ Copy the file to the docker working directory and returns the new relative path.
+
+ Args:
+ file_path (str): Path to the source file.
+ workdir (str): Path to the working directory mounted in docker.
+
+ Returns:
+ str: Basename, usable inside the container.
+ """
+ src = Path(file_path).resolve()
+ dest = Path(workdir).resolve() / src.name
+ shutil.copy(src, dest)
+ return src.name
+
+
+def get_accept_header(result_format: str) -> str:
+ format_headers = {
+ "csv": "text/csv",
+ "tsv": "text/tab-separated-values",
+ "srx": "application/sparql-results+xml",
+ "ttl": "text/turtle",
+ "json": "application/sparql-results+json"
+ }
+ return format_headers.get(result_format, "application/sparql-results+json")
diff --git a/src/sparql_conformance/xml_tools.py b/src/sparql_conformance/xml_tools.py
new file mode 100644
index 000000000..0d4493828
--- /dev/null
+++ b/src/sparql_conformance/xml_tools.py
@@ -0,0 +1,444 @@
+import re
+import xml.etree.ElementTree as ET
+import xml.dom.minidom as md
+from typing import List, Tuple
+
+from sparql_conformance.test_object import Status, ErrorMessage
+from sparql_conformance.util import escape
+
+
+def replace_self_closing_tag(xml: str) -> str:
+ """
+ Takes any xml string and replaces all self-closing xml tags () with open and close tags ().
+
+ The regular expression \\w+ matches one or more word characters which is then used in the replacement pattern
+ where \1 refers to the content of the first capture group.
+
+ Parameters:
+ xml (str): The string containing self-closing xml tags.
+
+ Returns:
+ str: xml string without self-closing xml tags
+ """
+ pattern = r"<(\w+)/>"
+ replacement = r"<\1>\1>"
+ return re.sub(pattern, replacement, xml)
+
+
+def highlight_first_occurrence(original: str, string_part: str, label: str) -> str:
+ """
+ Highlights the first occurrence of string_part in original by wrapping it with a tag.
+
+ Ensures that if string_part occurs multiple times it does not get double-wrapped.
+
+ Parameters:
+ original (str): Any string
+ string_part (str): A string which might be a part of original
+ label (str): css class of the label
+
+ Returns:
+ str: The original string with the string_part highlighted if found
+ """
+ string_part_escaped = re.escape(string_part)
+ # This stops double-wrapping look for first occurrence without a label
+ pattern = rf"{string_part_escaped}(?!)"
+
+ def replace_first_match(match):
+ return f'{match.group()}'
+ original_highlighted = re.sub(
+ pattern, replace_first_match, original, count=1)
+
+ return original_highlighted
+
+
+def element_to_string(element: ET.Element, escaped_xml: str, label: str):
+ """
+ This function takes an element turns in into a string and if the string is part of the escaped_xml string it will be enclosed with a HTML label
+
+ Returns:
+ str: An HTML-escaped XML string with specific elements highlighted.
+ """
+ element_str = ET.tostring(
+ element, encoding="utf-8").decode("utf-8").replace(" />", "/>")
+ element_str = element_str.replace("ns0:", "")
+ escaped_element_str = escape(element_str).rstrip()
+ if escaped_element_str in escaped_xml:
+ return highlight_first_occurrence(
+ escaped_xml, escaped_element_str, label)
+ elif escaped_element_str.replace('"', "'") in escaped_xml:
+ return highlight_first_occurrence(
+ escaped_xml, escaped_element_str.replace(
+ '"', "'"), label)
+ else:
+ element_str = replace_self_closing_tag(element_str)
+ escaped_element_str = escape(element_str).rstrip()
+ if escaped_element_str in escaped_xml:
+ return highlight_first_occurrence(
+ escaped_xml, escaped_element_str, label)
+ return escaped_xml
+
+
+def generate_highlighted_string_xml(
+ original_xml: str,
+ remaining_tree: ET.ElementTree,
+ red_tree: ET.ElementTree,
+ number_types: list) -> str:
+ """
+ This method takes an XML string and an ElementTree object representing a subset of the XML.
+ It escapes the XML string for HTML display and then highlights the elements from the
+ ElementTree within the escaped XML string. Elements to be highlighted are wrapped in a
+ tag.
+
+ Returns:
+ str: An HTML-escaped XML string with specific elements highlighted.
+ """
+ escaped_xml = escape(original_xml)
+
+ for element in remaining_tree.getroot().findall('.//head/variable'):
+ escaped_xml = element_to_string(element, escaped_xml, "red")
+
+ bool_element = remaining_tree.getroot().find(".//boolean")
+ if bool_element is not None:
+ escaped_xml = element_to_string(bool_element, escaped_xml, "red")
+
+ for element in remaining_tree.getroot().findall('.//result'):
+ label = "yellow"
+ for elem in red_tree.getroot().findall('.//result'):
+ if xml_elements_equal(element, elem, False, [], number_types, {}):
+ label = "red"
+ escaped_xml = element_to_string(element, escaped_xml, label)
+
+ return escaped_xml
+
+
+def strip_namespace(tree: ET.ElementTree) -> ET.ElementTree:
+ """
+ Removes the namespace from the tags in an XML ElementTree.
+
+ Parameters:
+ tree (ET.ElementTree): An XML ElementTree with namespace in the tags.
+
+ Returns:
+ ET.ElementTree: The modified XML ElementTree with namespace removed from tags.
+ """
+ for elem in tree.iter():
+ elem.tag = elem.tag.partition("}")[-1]
+ return tree
+
+
+def generate_html_for_xml(
+ xml1: str,
+ xml2: str,
+ remaining_tree1: ET.ElementTree,
+ remaining_tree2: ET.ElementTree,
+ red_tree1: ET.ElementTree,
+ red_tree2: ET.ElementTree,
+ number_types: list) -> tuple:
+ """
+ Generates HTML representations for two XML strings with specific elements highlighted.
+
+ Returns:
+ tuple (str, str, str, str): A tuple containing four HTML-escaped and highlighted XML strings. (XML1, XML2, XML1 RED, XML2 RED)
+ """
+ strip_namespace(red_tree1)
+ strip_namespace(red_tree2)
+ strip_namespace(remaining_tree1)
+ strip_namespace(remaining_tree2)
+ remaining_tree1_string = ET.tostring(remaining_tree1.getroot(
+ ), encoding='utf-8').decode("utf-8").replace(" />", "/>").replace("ns0:", "")
+ remaining_tree2_string = ET.tostring(remaining_tree2.getroot(
+ ), encoding='utf-8').decode("utf-8").replace(" />", "/>").replace("ns0:", "")
+ highlighted_xml1 = generate_highlighted_string_xml(
+ xml1, remaining_tree1, red_tree1, number_types)
+ highlighted_xml2 = generate_highlighted_string_xml(
+ xml2, remaining_tree2, red_tree2, number_types)
+ highlighted_xml1_only_red = generate_highlighted_string_xml(
+ remaining_tree1_string, remaining_tree1, red_tree1, number_types)
+ highlighted_xml2_only_red = generate_highlighted_string_xml(
+ remaining_tree2_string, remaining_tree2, red_tree2, number_types)
+
+ return highlighted_xml1, highlighted_xml2, highlighted_xml1_only_red, highlighted_xml2_only_red
+
+
+def xml_elements_equal(
+ element1: ET.Element,
+ element2: ET.Element,
+ compare_with_intended_behaviour: bool,
+ alias: List[Tuple[str, str]],
+ number_types: list,
+ map_bnodes: dict) -> bool:
+ """
+ Compares two XML elements for equality in tags, attributes and text.
+
+ Parameters:
+ element1 (ET.Element): The first XML element
+ element2 (ET.Element): The second XML element
+ compare_with_intended_behaviour (bool): Bool to determine whether to use intended behaviour aliases in comparison.
+ alias (List[Tuple[str, str]]): Dictionary with aliases for datatypes ex. int = integer .
+ number_types (list): List containing all datatypes that should be used as numbers.
+ map_bnodes (dict): Dictionary mapping the used bnodes.
+
+ Returns:
+ bool: True if elements are considered equal and if not False.
+ """
+ if len(list(element1)) != len(list(element2)):
+ return False
+
+ is_number = False
+ if element1.tag != element2.tag:
+ if not compare_with_intended_behaviour or not (element1.tag, element2.tag) in alias and not (element2.tag, element1.tag) in alias:
+ return False
+
+ if element1.attrib != element2.attrib:
+ if isinstance(
+ element1.attrib,
+ dict) != isinstance(
+ element2.attrib,
+ dict):
+ return False
+ if ((element1.attrib.get("datatype") is not None or element2.attrib.get("datatype") != "http://www.w3.org/2001/XMLSchema#string") and
+ (element2.attrib.get("datatype") is not None or element1.attrib.get("datatype") != "http://www.w3.org/2001/XMLSchema#string")):
+ if not isinstance(element1.attrib, dict):
+ if not compare_with_intended_behaviour or not (element1.attrib, element2.attrib) in alias and not (element2.attrib, element1.attrib) in alias:
+ return False
+ if isinstance(element1.attrib, dict):
+ if element1.attrib.get("datatype") is None and element2.attrib.get(
+ "datatype") is None:
+ # Check if language tags are equal, treat them as case-insensitive ex. en-US = en-us
+ xml_lang_key = '{http://www.w3.org/XML/1998/namespace}lang'
+ if xml_lang_key in element1.attrib and xml_lang_key in element2.attrib:
+ if not element1.attrib[xml_lang_key].lower() == element2.attrib[xml_lang_key].lower():
+ return False
+ else:
+ return False
+ else:
+ if not compare_with_intended_behaviour or not (element1.attrib.get("datatype"),
+ element2.attrib.get("datatype")) in alias and not (element2.attrib.get("datatype"),
+ element1.attrib.get("datatype")) in alias:
+ return False
+
+ if (element1.attrib.get("datatype") in number_types) != (
+ element2.attrib.get("datatype") in number_types):
+ return False
+
+ if element1.attrib.get("datatype") in number_types and element2.attrib.get(
+ "datatype") in number_types:
+ is_number = True
+
+ if element1.tail != element2.tail:
+ if (
+ (
+ isinstance(
+ element1.tail,
+ str) and element2.tail is None and not element1.tail.strip() == "") and (
+ isinstance(
+ element2.tail,
+ str) and element1.tail is None and not element2.tail.strip() == "")) or (
+ isinstance(
+ element1.tail,
+ str) and isinstance(
+ element2.tail,
+ str) and element1.tail.strip() != element2.tail.strip()):
+ return False
+
+ if element1.text != element2.text:
+ if element1.tag == "{http://www.w3.org/2005/sparql-results#}bnode":
+ if element1.text not in map_bnodes and element2.text not in map_bnodes:
+ map_bnodes[element1.text] = element2.text
+ map_bnodes[element2.text] = element1.text
+ return all(any(xml_elements_equal(
+ c1,
+ c2,
+ compare_with_intended_behaviour,
+ alias,
+ number_types,
+ map_bnodes) for c2 in element2) for c1 in element1)
+ elif map_bnodes.get(element1.text) == element2.text and map_bnodes.get(element2.text) == element1.text:
+ return all(any(xml_elements_equal(
+ c1,
+ c2,
+ compare_with_intended_behaviour,
+ alias,
+ number_types,
+ map_bnodes) for c2 in element2) for c1 in element1)
+ return False
+ if (element1.text is None and element2.text.strip() == "") or (
+ element2.text is None and element1.text.strip() == ""):
+ return all(any(xml_elements_equal(
+ c1,
+ c2,
+ compare_with_intended_behaviour,
+ alias,
+ number_types,
+ map_bnodes) for c2 in element2) for c1 in element1)
+ if element1.text is None or element2.text is None:
+ return False
+ if element1.text.strip() == element2.text.strip():
+ return all(any(xml_elements_equal(
+ c1,
+ c2,
+ compare_with_intended_behaviour,
+ alias,
+ number_types,
+ map_bnodes) for c2 in element2) for c1 in element1)
+ if is_number:
+ if float(element1.text) == float(element2.text):
+ return all(any(xml_elements_equal(
+ c1,
+ c2,
+ compare_with_intended_behaviour,
+ alias,
+ number_types,
+ map_bnodes) for c2 in element2) for c1 in element1)
+ if not compare_with_intended_behaviour or not (element1.text, element2.text) in alias and not (element2.text, element1.text) in alias:
+ return False
+ return all(any(xml_elements_equal(
+ c1,
+ c2,
+ compare_with_intended_behaviour,
+ alias,
+ number_types,
+ map_bnodes) for c2 in element2) for c1 in element1)
+
+
+def xml_remove_equal_elements(
+ parent1: ET.Element,
+ parent2: ET.Element,
+ use_config: bool,
+ alias: List[Tuple[str, str]],
+ number_types: list,
+ map_bnodes: dict):
+ """
+ Compares and removes equal child elements from two parent XML elements.
+
+ This method iterates over the children of two given parent XML elements and removes
+ matching children from both parents.
+
+ Parameters:
+ parent1 (ET.Element): The first parent XML element.
+ parent2 (ET.Element): The second parent XML element.
+ use_config (bool): Configuration Bool to control comparison behavior.
+ alias (dict): Dictionary with aliases for datatypes ex. int = integer .
+ number_types (list): List containing all datatypes that should be used as numbers.
+ map_bnodes (dict): Dictionary mapping the used bnodes.
+ """
+ for child1 in list(parent1):
+ for child2 in list(parent2):
+ if xml_elements_equal(
+ child1,
+ child2,
+ use_config,
+ alias,
+ number_types,
+ map_bnodes):
+ parent1.remove(child1)
+ parent2.remove(child2)
+ break
+
+
+def compare_xml(
+ expected_xml: str,
+ query_xml: str,
+ alias: List[Tuple[str, str]],
+ number_types: list) -> tuple:
+ """
+ Compares two XML documents, identifies differences and generates HTML representations.
+
+ This method compares two XML documents and identifies differences.
+ It removes equal elements in both documents and generates HTML representations highlighting the remaining differences.
+
+ Parameters:
+ expected_xml (str): The expected XML content as a string.
+ query_xml (str): The query XML content as a string.
+ alias (dict): Dictionary with aliases for datatypes ex. int = integer .
+ number_types (list): List containing all datatypes that should be used as numbers.
+
+ Returns:
+ tuple (str,str,str,str,str,str): A tuple containing the status, error type and the strings XML1, XML2, XML1 RED, XML2 RED
+ """
+ query_xml = md.parseString(query_xml).toxml()
+ query_xml = md.parseString(query_xml).toprettyxml(indent=" ")
+ map_bnodes = {}
+ status = Status.FAILED
+ error_type = ErrorMessage.RESULTS_NOT_THE_SAME
+ expected_tree = ET.ElementTree(ET.fromstring(expected_xml))
+ query_tree = ET.ElementTree(ET.fromstring(query_xml))
+
+ # Compare and remove equal elements in
+ head1 = expected_tree.find(
+ ".//{http://www.w3.org/2005/sparql-results#}head")
+ head2 = query_tree.find(".//{http://www.w3.org/2005/sparql-results#}head")
+ if head1 is not None and head2 is not None:
+ xml_remove_equal_elements(
+ head1,
+ head2,
+ False,
+ alias,
+ number_types,
+ map_bnodes)
+
+ # Compare and remove equal
+ expected_bool = expected_tree.find(
+ ".//{http://www.w3.org/2005/sparql-results#}boolean")
+ query_bool = query_tree.find(
+ ".//{http://www.w3.org/2005/sparql-results#}boolean")
+ if expected_bool is not None and query_bool is not None:
+ if str(expected_bool.text) == str(query_bool.text):
+ expected_tree.getroot().remove(expected_bool)
+ query_tree.getroot().remove(query_bool)
+
+ expected_bool = expected_tree.find(
+ ".//{http://www.w3.org/2005/sparql-results#}boolean")
+ query_bool = query_tree.find(
+ ".//{http://www.w3.org/2005/sparql-results#}boolean")
+
+ # Compare and remove equal elements in
+ results1 = expected_tree.find(
+ ".//{http://www.w3.org/2005/sparql-results#}results")
+ results2 = query_tree.find(
+ ".//{http://www.w3.org/2005/sparql-results#}results")
+
+ if results1 is not None and results2 is not None:
+ xml_remove_equal_elements(
+ results1,
+ results2,
+ False,
+ alias,
+ number_types,
+ map_bnodes)
+ # Copy expected_tree
+ expected_tree_string = ET.tostring(expected_tree.getroot())
+ copied_expected_tree = ET.ElementTree(ET.fromstring(expected_tree_string))
+
+ # Copy query_tree
+ query_tree_string = ET.tostring(query_tree.getroot())
+ copied_query_tree = ET.ElementTree(ET.fromstring(query_tree_string))
+ if (
+ results1 is not None and results2 is not None and len(
+ list(results1)) == 0 and len(
+ list(results2)) == 0 and len(
+ list(head1)) == 0 and len(
+ list(head2)) == 0) or (
+ results1 is None and results2 is None and head1 is None and head2 is None and expected_bool is None and query_bool is None):
+ status = Status.PASSED
+ error_type = ""
+ else:
+ if results1 is not None and results2 is not None:
+ xml_remove_equal_elements(
+ results1,
+ results2,
+ True,
+ alias,
+ number_types,
+ map_bnodes)
+
+ if len(list(results1)) == 0 and len(list(results2)) == 0:
+ status = Status.INTENDED
+ error_type = ErrorMessage.INTENDED_MSG
+ elif expected_bool is None and query_bool is None:
+ status = Status.PASSED
+ error_type = ""
+
+ expected_string, query_string, expected_string_red, query_string_red = generate_html_for_xml(
+ expected_xml, query_xml, copied_expected_tree, copied_query_tree, expected_tree, query_tree, number_types)
+ return status, error_type, expected_string, query_string, expected_string_red, query_string_red