diff --git a/pyproject.toml b/pyproject.toml
index e0b40b3b3..0394cc2c9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools>=61.0", "wheel"]
+requires = ["setuptools>=64.0", "wheel"]
 build-backend = "setuptools.build_meta"
 
 [project]
@@ -27,6 +27,7 @@ Github = "https://github.com/ad-freiburg/qlever"
 
 [project.scripts]
 "qlever" = "qlever.qlever_main:main"
+"sparql_conformance" = "qlever.qlever_main:main"
 
 [tool.setuptools]
 license-files = ["LICENSE"]
diff --git a/src/qlever/__init__.py b/src/qlever/__init__.py
index 1adcd3451..3948849a7 100644
--- a/src/qlever/__init__.py
+++ b/src/qlever/__init__.py
@@ -16,7 +16,7 @@ def snake_to_camel(str):
 
 ENGINE_NAMES = {
     "qlever": "QLever",
-    "qmdb": "MillenniumDB",
+    "sparql_conformance": "SPARQL Conformance",
 }
 # Default engine_name = script_name without starting 'q' and capitalized
 engine_name = ENGINE_NAMES.get(script_name, script_name[1:].capitalize())
diff --git a/src/qlever/commands/index.py b/src/qlever/commands/index.py
index 0189d70e5..cf82aa59a 100644
--- a/src/qlever/commands/index.py
+++ b/src/qlever/commands/index.py
@@ -178,7 +178,7 @@ def get_input_options_for_json(self, args) -> str:
         # Return the concatenated command-line options.
         return " ".join(input_options)
 
-    def execute(self, args) -> bool:
+    def execute(self, args, called_from_conformance_test = False) -> bool:
         # The mandatory part of the command line (specifying the input, the
         # basename of the index, and the settings file). There are two ways
         # to specify the input: via a single stream or via multiple streams.
@@ -278,15 +278,16 @@ def execute(self, args) -> bool:
                 return False
 
         # Check if all of the input files exist.
-        for pattern in shlex.split(args.input_files):
-            if len(glob.glob(pattern)) == 0:
-                log.error(f'No file matching "{pattern}" found')
-                log.info("")
-                log.info(
-                    "Did you call `qlever get-data`? If you did, check "
-                    "GET_DATA_CMD and INPUT_FILES in the QLeverfile"
-                )
-                return False
+        if not called_from_conformance_test:
+            for pattern in shlex.split(args.input_files):
+                if len(glob.glob(pattern)) == 0:
+                    log.error(f'No file matching "{pattern}" found')
+                    log.info("")
+                    log.info(
+                        "Did you call `qlever get-data`? If you did, check "
+                        "GET_DATA_CMD and INPUT_FILES in the QLeverfile"
+                    )
+                    return False
 
         # Check if index files (name.index.*) already exist.
         existing_index_files = get_existing_index_files(args.name)
@@ -325,7 +326,7 @@ def execute(self, args) -> bool:
 
         # Run the index command.
         try:
-            run_command(index_cmd, show_output=True)
+            run_command(index_cmd, show_output=not called_from_conformance_test)
         except Exception as e:
             log.error(f"Building the index failed: {e}")
             return False
diff --git a/src/qlever/commands/query.py b/src/qlever/commands/query.py
index 4681e33dd..3b0de845e 100644
--- a/src/qlever/commands/query.py
+++ b/src/qlever/commands/query.py
@@ -15,6 +15,7 @@ class QueryCommand(QleverCommand):
     """
 
     def __init__(self):
+        self.query_output = ""
         self.predefined_queries = {
             "all-predicates": (
                 "SELECT (?p AS ?predicate) (COUNT(?p) AS ?count) "
@@ -84,7 +85,7 @@ def additional_arguments(self, subparser) -> None:
             help="Do not print the (end-to-end) time taken",
         )
 
-    def execute(self, args) -> bool:
+    def execute(self, args, called_from_conformance_test = False) -> bool:
         # Use a predefined query if requested.
         if args.predefined_query:
             args.query = self.predefined_queries[args.predefined_query]
@@ -105,6 +106,11 @@ def execute(self, args) -> bool:
             )
         else:
             curl_cmd_additions = ""
+        query_type = "query="
+        if called_from_conformance_test:
+            curl_cmd_additions += f" -w '\\nHTTP_STATUS:%{{http_code}}'"
+            query_type = args.content_type
+            curl_cmd_additions += f" --data-urlencode access-token={shlex.quote(args.access_token)}"
 
         # Show what the command will do.
         sparql_endpoint = (
@@ -115,7 +121,7 @@ def execute(self, args) -> bool:
         curl_cmd = (
             f"curl -s {sparql_endpoint}"
             f' -H "Accept: {args.accept}"'
-            f" --data-urlencode query={shlex.quote(args.query)}"
+            f" --data-urlencode {query_type}{shlex.quote(args.query)}"
             f"{curl_cmd_additions}"
         )
         self.show(curl_cmd, only_show=args.show)
@@ -125,7 +131,10 @@ def execute(self, args) -> bool:
         # Launch query.
         try:
             start_time = time.time()
-            run_command(curl_cmd, show_output=True)
+            if called_from_conformance_test:
+                self.query_output = run_command(curl_cmd, return_output=True)
+            else:
+                run_command(curl_cmd, show_output=True)
             time_msecs = round(1000 * (time.time() - start_time))
             if not args.no_time and args.log_level != "NO_LOG":
                 log.info("")
diff --git a/src/qlever/commands/start.py b/src/qlever/commands/start.py
index a6811c6ff..8c8e9b75c 100644
--- a/src/qlever/commands/start.py
+++ b/src/qlever/commands/start.py
@@ -166,7 +166,7 @@ def additional_arguments(self, subparser) -> None:
             "(default: run in the background with `nohup`)",
         )
 
-    def execute(self, args) -> bool:
+    def execute(self, args, called_from_conformance_test = False) -> bool:
         # Kill existing server with the same name if so desired.
         #
         # TODO: This is currently disabled because I never used it once over
@@ -267,8 +267,9 @@ def execute(self, args) -> bool:
                 f" (Ctrl-C stops following the log, but NOT the server)"
             )
         log.info("")
-        tail_cmd = f"exec tail -f {args.name}.server-log.txt"
-        tail_proc = subprocess.Popen(tail_cmd, shell=True)
+        if not called_from_conformance_test:
+            tail_cmd = f"exec tail -f {args.name}.server-log.txt"
+            tail_proc = subprocess.Popen(tail_cmd, shell=True)
         while not is_qlever_server_alive(endpoint_url):
             time.sleep(1)
 
@@ -288,7 +289,7 @@ def execute(self, args) -> bool:
                 return False
 
         # Kill the tail process. NOTE: `tail_proc.kill()` does not work.
-        if not args.run_in_foreground:
+        if not args.run_in_foreground and not called_from_conformance_test:
             tail_proc.terminate()
 
         # Execute the warmup command.
diff --git a/src/qlever/commands/status.py b/src/qlever/commands/status.py
index a8efed543..5c3593a16 100644
--- a/src/qlever/commands/status.py
+++ b/src/qlever/commands/status.py
@@ -3,6 +3,7 @@
 import psutil
 
 from qlever.command import QleverCommand
+from qlever.log import log
 from qlever.util import show_process_info
 
 
@@ -46,5 +47,5 @@ def execute(self, args) -> bool:
             if process_shown:
                 num_processes_found += 1
         if num_processes_found == 0:
-            print("No processes found")
+            log.error("No processes found")
         return True
diff --git a/src/qlever/qleverfile.py b/src/qlever/qleverfile.py
index 5ea39b21d..3598530aa 100644
--- a/src/qlever/qleverfile.py
+++ b/src/qlever/qleverfile.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import json
 import re
 import socket
 import subprocess
@@ -21,6 +22,79 @@ class Qleverfile:
     Qleverfile + functions for parsing.
     """
 
+    @staticmethod
+    def get_conformance_arguments(arg):
+        """
+        Define all possible parameters for conformance checks.
+        """
+        args = {}
+        args["name"] = arg(
+            "--name",
+            type=str,
+            required=True,
+            help="Name of the result file of the conformance check.",
+        )
+        args["port"] = arg(
+            "--port",
+            type=str,
+            required=True,
+            help="Port which will be used for the SPARQL sever.",
+        )
+        args["graph_store"] = arg(
+            "--graph-store",
+            type=str,
+            required=True,
+            help="Name of the graph store endpoint used for graph store protocol tests.",
+        )
+        args["testsuite_dir"] = arg(
+            "--testsuite-dir",
+            type=str,
+            default=None,
+            help="Path to the directory of the testsuite.",
+        )
+        args["type_alias"] = arg(
+            "--type-alias",
+            type=json.loads,
+            required=False,
+            help=("Type mismatches that will be considered intended."
+                  "ex. \"[['http://www.w3.org/2001/XMLSchema#integer', "
+                  "'http://www.w3.org/2001/XMLSchema#int']..."
+                  "['http://www.w3.org/2001/XMLSchema#float',"
+                  "'http://www.w3.org/2001/XMLSchema#double']]\""
+            ),
+        )
+        args["engine"] = arg(
+            "--engine",
+            type=str,
+            choices=["qlever", "qlever-binaries"],# "mdb", "oxigraph"],
+            default="docker",
+            help="Which system to use to run the tests in"
+        )
+        args["exclude"] = arg(
+            "--exclude",
+            type=lambda s: s.split(","),
+            default=[],
+            help=("Tests (names) or test groups to exclude from the run."
+                  "ex. service,entailment,POST - existing graph"
+            )
+        )
+        args["include"] = arg(
+            "--include",
+            type=lambda s: s.split(","),
+            default=None,
+            help=("Tests (names) or test groups to include in the run."
+                  "ex. service,entailment,POST - existing graph"
+            )
+        )
+        args["binaries_directory"] = arg(
+            "--binaries-directory",
+            type=str,
+            required=False,
+            help="Path to the directory of the IndexBuilderMain and ServerMain binaries.",
+            default=""
+        )
+        return args
+
     @staticmethod
     def all_arguments():
         """
@@ -41,6 +115,11 @@ def arg(*args, **kwargs):
         server_args = all_args["server"] = {}
         runtime_args = all_args["runtime"] = {}
         ui_args = all_args["ui"] = {}
+        all_args["conformance"] = Qleverfile.get_conformance_arguments(arg)
+        qlever_binaries_args = all_args["qlever_binaries"] = {}
+        qlever_args = all_args["qlever"] = {}
+        oxigraph_args = all_args["oxigraph"] = {}
+        conformance_ui_args = all_args["conformance_ui"] = {}
 
         data_args["name"] = arg(
             "--name", type=str, required=True, help="The name of the dataset"
@@ -365,6 +444,34 @@ def arg(*args, **kwargs):
             help="The name of the container used for `qlever ui`",
         )
 
+
+        qlever_args["qlever_image"] = arg(
+            "--qlever-image",
+            type=str,
+            default="docker.io/adfreiburg/qlever",
+            help="The name of the image when running in a container",
+        )
+
+        oxigraph_args["oxigraph_image"] = arg(
+            "--oxigraph-image",
+            type=str,
+            default="ghcr.io/oxigraph/oxigraph",
+            help="The name of the image when running in a container",
+        )
+
+        conformance_ui_args["port"] = arg(
+            '--port',
+            required=False,
+            help='Port of the webserver (default: 3000)',
+            default='3000'
+        )
+        conformance_ui_args["result_directory"] = arg(
+            '--result-directory',
+            required=False,
+            help='Directory containing the results of the SPARQL conformance tests (default: current directory)',
+            default='$(pwd)'
+        )
+
         return all_args
 
     @staticmethod
diff --git a/src/sparql_conformance/Dockerfile b/src/sparql_conformance/Dockerfile
new file mode 100644
index 000000000..3a56fd4fb
--- /dev/null
+++ b/src/sparql_conformance/Dockerfile
@@ -0,0 +1,31 @@
+FROM node:18
+ARG UID
+ARG GID
+
+RUN set -eux; \
+    if getent group "${GID}" >/dev/null; then \
+      echo "Using existing group with GID ${GID}"; \
+    else \
+      groupadd -g "${GID}" appgroup; \
+    fi; \
+    if getent passwd "${UID}" >/dev/null; then \
+      echo "Using existing user with UID ${UID}"; \
+    else \
+      useradd -m -u "${UID}" -g "${GID}" appuser; \
+    fi
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
+
+RUN git clone https://github.com/SIRDNARch/qlever-conformance-website.git .
+
+RUN mkdir -p /public/results
+
+RUN npm install
+
+EXPOSE 3000
+
+USER ${UID}:${GID}
+
+CMD [ "node", "server.js" ]
diff --git a/src/sparql_conformance/Qleverfiles/Qleverfile.qlever b/src/sparql_conformance/Qleverfiles/Qleverfile.qlever
new file mode 100644
index 000000000..3c57e948d
--- /dev/null
+++ b/src/sparql_conformance/Qleverfiles/Qleverfile.qlever
@@ -0,0 +1,16 @@
+# Qleverfile for SPARQL conformance tests using the qlever engine
+# To exclude certain tests or test groups add them like this:
+# EXCLUDE = service-description,service,entailment,POST - existing graph,PUT - mismatched payload,query specifying dataset in both query string and protocol; test for use of protocol-specified dataset
+[data]
+NAME = ConformanceTest
+
+[runtime]
+SYSTEM = docker
+
+[conformance]
+NAME = ConformanceTest
+PORT = 7036
+ENGINE = qlever
+TESTSUITE_DIR = ./testsuite-files/sparql/sparql11/
+GRAPH_STORE = /http-graph-store
+TYPE_ALIAS = [["http://www.w3.org/2001/XMLSchema#int", "http://www.w3.org/2001/XMLSchema#integer"], ["http://www.w3.org/2001/XMLSchema#float", "http://www.w3.org/2001/XMLSchema#double"], ["http://www.w3.org/2001/XMLSchema#decimal", "http://www.w3.org/2001/XMLSchema#double"], ["http://www.w3.org/2001/XMLSchema#decimal", "http://www.w3.org/2001/XMLSchema#float"], ["http://www.w3.org/2001/XMLSchema#string", null]]
diff --git a/src/sparql_conformance/__init__.py b/src/sparql_conformance/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/sparql_conformance/commands/__init__.py b/src/sparql_conformance/commands/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/sparql_conformance/commands/analyze.py b/src/sparql_conformance/commands/analyze.py
new file mode 100644
index 000000000..017224de4
--- /dev/null
+++ b/src/sparql_conformance/commands/analyze.py
@@ -0,0 +1,91 @@
+from pathlib import Path
+
+from qlever.command import QleverCommand
+from qlever.log import log
+from qlever.util import run_command
+from sparql_conformance.config import Config
+from sparql_conformance.extract_tests import extract_tests
+from sparql_conformance.testsuite import TestSuite
+from sparql_conformance.engines.engine_manager import EngineManager
+from sparql_conformance.engines.qlever import QLeverManager
+
+
+
+def get_engine_manager(engine_type: str) -> EngineManager:
+    """Get the appropriate engine manager for the given engine type"""
+    managers = {
+        'qlever-binaries': QLeverBinaryManager,
+        'qlever': QLeverManager,
+        # 'mdb': MDBManager,
+        # 'oxigraph': OxigraphManager
+    }
+
+    manager_class = managers.get(engine_type)
+    if manager_class is None:
+        raise ValueError(f"Unsupported engine type: {engine_type}")
+
+    return manager_class()
+
+
+class AnalyzeCommand(QleverCommand):
+    """
+    Class for executing the `test` command.
+    """
+
+    def __init__(self):
+        self.options = [
+            'qlever',
+            #'mdb',
+            #'oxigraph'
+        ]
+
+    def description(self) -> str:
+        return "Run SPARQL conformance tests against different engines"
+
+    def should_have_qleverfile(self) -> bool:
+        return False
+
+    def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
+        return {
+            "conformance": ["name", "port", "engine", "graph_store",
+                            "testsuite_dir", "type_alias", "exclude"],
+            "runtime": ["system"],
+            "qlever": ["qlever_image"],
+            "oxigraph": ["oxigraph_image"]
+        }
+
+    def additional_arguments(self, subparser):
+        subparser.add_argument(
+            "test_name",
+            type=str,
+            help="The name of the test to start the server for.",
+        )
+
+    def execute(self, args) -> bool:
+        if args.engine not in self.options:
+            log.error(f"Invalid engine type: {args.engine}")
+            return False
+        image = getattr(args, f"{args.engine}_image", None)
+        if (args.system == "native" and args.binaries_directory == "" or
+                args.system != "native" and image is None):
+            log.error(
+                f"Selected system {args.system} not compatible with image: {image}"
+                f" and binaries_directory: {args.binaries_directory}"
+            )
+            return False
+
+        if args.testsuite_dir is None or not Path(args.testsuite_dir).is_dir():
+            log.error("Could not find testsuite directory. Use `sparql_conformance setup` to download it.")
+            return False
+
+        alias = [tuple(x) for x in args.type_alias] if args.type_alias else []
+        config = Config(image, args.system, args.port, args.graph_store, args.testsuite_dir, alias,
+                        args.binaries_directory, args.exclude, args.test_name)
+        print("Preparing ...")
+        if "qlever" in args.engine:
+            print("access_token='abc'")
+        tests, test_count = extract_tests(config)
+        test_suite = TestSuite(name=args.name, tests=tests, test_count=test_count, config=config,
+                               engine_manager=get_engine_manager(args.engine))
+        test_suite.analyze()
+        return True
diff --git a/src/sparql_conformance/commands/setup.py b/src/sparql_conformance/commands/setup.py
new file mode 100644
index 000000000..94bd5a93c
--- /dev/null
+++ b/src/sparql_conformance/commands/setup.py
@@ -0,0 +1,117 @@
+import subprocess
+from pathlib import Path
+from os import environ
+
+from qlever.command import QleverCommand
+from qlever.log import log
+from qlever.util import run_command
+
+
+class SetupCommand(QleverCommand):
+    """
+    Class for executing the `setup` command.
+    """
+
+    def __init__(self):
+        self.qleverfiles_path = Path(__file__).parent.parent / "Qleverfiles"
+        self.testsuite_command = f"""
+git clone --sparse --filter=blob:none --depth 1 https://github.com/w3c/rdf-tests ./testsuite-files && \
+git -C ./testsuite-files sparse-checkout set sparql/sparql11
+"""
+
+    def description(self) -> str:
+        return "Setup a pre-configured Qleverfile and download test suite for the SPARQL conformance tests"
+
+    def should_have_qleverfile(self) -> bool:
+        return False
+
+    def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
+        return {}
+
+    def additional_arguments(self, subparser):
+        subparser.add_argument(
+            "engine_name",
+            type=str,
+            choices=["qlever", "qlever-native"],
+            help="The engine name for the pre-configured Qleverfile to create",
+        )
+
+    def execute(self, args) -> bool:
+        # Show a warning if `QLEVER_OVERRIDE_SYSTEM_NATIVE` is set.
+        qlever_is_running_in_container = environ.get("QLEVER_IS_RUNNING_IN_CONTAINER")
+        if qlever_is_running_in_container:
+            log.warning(
+                "The environment variable `QLEVER_IS_RUNNING_IN_CONTAINER` is set, "
+                "therefore the Qleverfile is modified to use `SYSTEM = native` "
+                "(since inside the container, QLever should run natively)"
+            )
+            log.info("")
+        # Construct the command line and show it.
+        qleverfile_path = self.qleverfiles_path / f"Qleverfile.{args.engine_name} "
+        setup_config_cmd = f"cat {qleverfile_path}"
+        if qlever_is_running_in_container:
+            setup_config_cmd += (
+                " | sed -E 's/(^SYSTEM[[:space:]]*=[[:space:]]*).*/\\1native/'"
+            )
+        setup_config_cmd += "> Qleverfile"
+        self.show(setup_config_cmd, only_show=args.show)
+        if args.show:
+            return True
+
+        # If there is already a Qleverfile in the current directory, exit.
+        qleverfile_path = Path("Qleverfile")
+        if qleverfile_path.exists():
+            log.error("`Qleverfile` already exists in current directory")
+            log.info("")
+            log.info(
+                "If you want to create a new Qleverfile using "
+                "`sparql_conformance setup`, delete the existing Qleverfile "
+                "first"
+            )
+            return False
+
+        # Copy the Qleverfile to the current directory.
+        try:
+            subprocess.run(
+                setup_config_cmd,
+                shell=True,
+                check=True,
+                stdin=subprocess.DEVNULL,
+                stdout=subprocess.DEVNULL,
+            )
+        except Exception as e:
+            log.error(
+                f'Could not copy "{qleverfile_path}"' f" to current directory: {e}"
+            )
+            return False
+
+        # If we get here, everything went well.
+        log.info(
+            f'Created Qleverfile for engine "{args.engine_name}"'
+            f" in current directory"
+        )
+
+        # If there is already a test suite in the current directory, exit.
+        testsuite_path = Path("./testsuite-files/sparql/sparql11")
+        if testsuite_path.exists():
+            log.error("`Test suite` already exists in current directory")
+            log.info("")
+            log.info(
+                "If you want to download the test suite using "
+                "`sparql_conformance setup`, delete the existing test suite "
+                "first"
+            )
+            return False
+        testsuite_command = (
+            "git clone --sparse --filter=blob:none --depth 1 https://github.com/w3c/rdf-tests ./testsuite-files && \ "
+            "git -C ./testsuite-files sparse-checkout set sparql/sparql11"
+        )
+        try:
+            run_command(self.testsuite_command)
+        except Exception as e:
+            log.error(
+                f'Could not download test suite from https://github.com/w3c/rdf-tests' f" to current directory: {e}"
+            )
+            return False
+        return True
+
diff --git a/src/sparql_conformance/commands/test.py b/src/sparql_conformance/commands/test.py
new file mode 100644
index 000000000..0b414b791
--- /dev/null
+++ b/src/sparql_conformance/commands/test.py
@@ -0,0 +1,85 @@
+from pathlib import Path
+
+from qlever.command import QleverCommand
+from qlever.log import log
+from sparql_conformance.config import Config
+from sparql_conformance.extract_tests import extract_tests
+from sparql_conformance.testsuite import TestSuite
+from sparql_conformance.engines.engine_manager import EngineManager
+from sparql_conformance.engines.qlever import QLeverManager
+
+
+def get_engine_manager(engine_type: str) -> EngineManager:
+    """Get the appropriate engine manager for the given engine type"""
+    managers = {
+        'qlever': QLeverManager,
+        # 'mdb': MDBManager,
+        # 'oxigraph': OxigraphManager
+    }
+
+    manager_class = managers.get(engine_type)
+    if manager_class is None:
+        raise ValueError(f"Unsupported engine type: {engine_type}")
+
+    return manager_class()
+
+
+class TestCommand(QleverCommand):
+    """
+    Class for executing the `test` command.
+    """
+
+    def __init__(self):
+        self.options = [
+            'qlever',
+            'qlever-binaries',
+            # 'mdb',
+            # 'oxigraph'
+        ]
+
+    def description(self) -> str:
+        return "Run SPARQL conformance tests against different engines"
+
+    def should_have_qleverfile(self) -> bool:
+        return False
+
+    def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
+        return {
+            "conformance": ["name", "port", "engine",
+                            "graph_store", "testsuite_dir",
+                            "type_alias", "exclude", "include", "binaries_directory"],
+            "runtime": ["system"],
+            "qlever": ["qlever_image"],
+            "oxigraph": ["oxigraph_image"]
+        }
+
+    def additional_arguments(self, subparser):
+        pass
+
+    def execute(self, args) -> bool:
+        if args.engine not in self.options:
+            log.error(f"Invalid engine type: {args.engine}")
+            return False
+        image = getattr(args, f"{args.engine}_image", None)
+        if (args.system == "native" and args.binaries_directory == "" or
+                args.system != "native" and image is None):
+            log.error(
+                f"Selected system {args.system} not compatible with image: {image}"
+                f" and binaries_directory: {args.binaries_directory}"
+            )
+            return False
+
+        if args.testsuite_dir is None or not Path(args.testsuite_dir).is_dir():
+            log.error("Could not find testsuite directory. Use `sparql_conformance setup` to download it.")
+            return False
+        alias = [tuple(x) for x in args.type_alias] if args.type_alias else []
+        config = Config(image, args.system, args.port, args.graph_store, args.testsuite_dir, alias,
+                        args.binaries_directory, args.exclude, args.include)
+        print("Running testsuite...")
+        tests, test_count = extract_tests(config)
+        test_suite = TestSuite(name=args.name, tests=tests, test_count=test_count, config=config,
+                               engine_manager=get_engine_manager(args.engine))
+        test_suite.run()
+        test_suite.generate_json_file()
+        print("Finished!")
+        return True
diff --git a/src/sparql_conformance/commands/visualize.py b/src/sparql_conformance/commands/visualize.py
new file mode 100644
index 000000000..9407f7298
--- /dev/null
+++ b/src/sparql_conformance/commands/visualize.py
@@ -0,0 +1,53 @@
+import os
+from pathlib import Path
+
+from qlever.command import QleverCommand
+from qlever.log import log
+from qlever.util import run_command
+
+
+class VisualizeCommand(QleverCommand):
+    def __init__(self):
+        pass
+
+    def description(self) -> str:
+        return "Visualize SPARQL conformance test results."
+
+    def should_have_qleverfile(self) -> bool:
+        return False
+
+    def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
+        return {"runtime": ["system"],
+                "conformance_ui": ["result_directory", "port"]
+                }
+
+    def additional_arguments(self, subparser):
+        pass
+
+    def execute(self, args) -> bool:
+        dockerfile_dir = Path(__file__).parent.parent
+        dockerfile_path = dockerfile_dir / "Dockerfile"
+        system = args.system
+        uid = f"UID={os.getuid()}" if hasattr(os, "getuid") else "UID=1000"
+        gid = f"GID={os.getgid()}" if hasattr(os, "getuid") else "GID=1000"
+        build_cmd = f"docker build -f {dockerfile_path} -t visualize-results \
+                            --build-arg {uid} --build-arg {gid} {dockerfile_dir}"
+        start_server_cmd = f"docker run -it --rm \
+                            -p {args.port}:3000 \
+                            -v {args.result_directory}:/app/public/results \
+                            visualize-results"
+        image_id = run_command(
+            f"{system} images -q visualize-results", return_output=True
+        )
+        if not image_id:
+            try:
+                run_command(build_cmd, show_output=True)
+            except Exception as e:
+                log.error(f"Building the {system} image visualize-results failed: {e}")
+                return False
+        try:
+            run_command(start_server_cmd, show_output=True)
+        except Exception as e:
+            log.error(f"Building the index failed: {e}")
+            return False
+        return True
\ No newline at end of file
diff --git a/src/sparql_conformance/config.py b/src/sparql_conformance/config.py
new file mode 100644
index 000000000..5333da114
--- /dev/null
+++ b/src/sparql_conformance/config.py
@@ -0,0 +1,40 @@
+import os
+from typing import Dict, Any, Tuple, List
+
+
+class Config:
+    """Configuration class for SPARQL test suite execution."""
+
+    def __init__(self,
+                 image: str,
+                 system: str,
+                 port: str,
+                 graph_store: str,
+                 testsuite_dir: str,
+                 type_alias: List[Tuple[str, str]],
+                 binaries_directory: str,
+                 exclude: List[str],
+                 include: List[str] = None
+                 ):
+        self.server_address = 'localhost'
+        self.image = image
+        self.system = system
+        self.port = port
+        self.GRAPHSTORE = graph_store
+        self.alias = type_alias
+        self.path_to_test_suite = os.path.abspath(testsuite_dir)
+        self.path_to_binaries = os.path.abspath(binaries_directory)
+        self.exclude = exclude
+        self.include = include
+        self.number_types = [
+            "http://www.w3.org/2001/XMLSchema#integer",
+            "http://www.w3.org/2001/XMLSchema#double",
+            "http://www.w3.org/2001/XMLSchema#decimal",
+            "http://www.w3.org/2001/XMLSchema#float",
+            "http://www.w3.org/2001/XMLSchema#int",
+            "http://www.w3.org/2001/XMLSchema#decimal"
+        ]
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert configuration to dictionary format."""
+        return self.__dict__
diff --git a/src/sparql_conformance/data/data0.rdf b/src/sparql_conformance/data/data0.rdf
new file mode 100644
index 000000000..79bf57fbb
--- /dev/null
+++ b/src/sparql_conformance/data/data0.rdf
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="utf-8"?>
+<rdf:RDF xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+</rdf:RDF>
diff --git a/src/sparql_conformance/data/data1.rdf b/src/sparql_conformance/data/data1.rdf
new file mode 100644
index 000000000..8e9c78a90
--- /dev/null
+++ b/src/sparql_conformance/data/data1.rdf
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
+<rdf:RDF xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+  <foaf:Document rdf:about="" />
+</rdf:RDF>
diff --git a/src/sparql_conformance/data/data2.rdf b/src/sparql_conformance/data/data2.rdf
new file mode 100644
index 000000000..8e9c78a90
--- /dev/null
+++ b/src/sparql_conformance/data/data2.rdf
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
+<rdf:RDF xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+  <foaf:Document rdf:about="" />
+</rdf:RDF>
diff --git a/src/sparql_conformance/data/data3.rdf b/src/sparql_conformance/data/data3.rdf
new file mode 100644
index 000000000..8e9c78a90
--- /dev/null
+++ b/src/sparql_conformance/data/data3.rdf
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
+<rdf:RDF xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+  <foaf:Document rdf:about="" />
+</rdf:RDF>
diff --git a/src/sparql_conformance/engines/__init__.py b/src/sparql_conformance/engines/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/sparql_conformance/engines/engine_manager.py b/src/sparql_conformance/engines/engine_manager.py
new file mode 100644
index 000000000..326a2c780
--- /dev/null
+++ b/src/sparql_conformance/engines/engine_manager.py
@@ -0,0 +1,71 @@
+from abc import ABC, abstractmethod
+from typing import Tuple
+
+from sparql_conformance.config import Config
+
+
+class EngineManager(ABC):
+    """Abstract base class for SPARQL engine managers"""
+
+    @abstractmethod
+    def setup(self,
+              config: Config,
+              graph_paths: Tuple[Tuple[str, str], ...]
+              ) -> Tuple[bool, bool, str, str]:
+        """
+        Set up the engine for testing.
+
+        Args:
+            config: Test suite config, used to set engine-specific settings
+            graph_paths: ex. default graph + named graph (('graph_path', '-'),
+                            ('graph_path2', 'graph_name2'))
+
+        Returns:
+            index_success (bool), server_success (bool), index_log (str), server_log (str)
+        """
+        pass
+
+    @abstractmethod
+    def cleanup(self, config: Config):
+        """Clean up the test environment after testing"""
+        pass
+
+    @abstractmethod
+    def query(self, config: Config, query: str, result_format: str) -> Tuple[int, str]:
+        """
+        Send a SPARQL query to the engine and return the result
+
+        Args:
+            config: Test suite config, used to set engine-specific settings
+            query: The SPARQL query to be executed
+            result_format: Type of the result
+
+        Returns:
+           HTTP status code (int), query result (str)
+        """
+        pass
+
+    @abstractmethod
+    def update(self, config: Config, query: str) -> Tuple[int, str]:
+        """
+        Send a SPARQL update query to the engine and return the result
+
+        Args:
+            config: Test suite config, used to set engine-specific settings
+            query: The SPARQL update query to be executed
+
+        Returns:
+           HTTP status code (int), response (str)
+        """
+        pass
+
+    @abstractmethod
+    def protocol_endpoint(self) -> str:
+        """
+        Returns the name of the protocol endpoint for the engine.
+        Used to replace the standard endpoint with the
+        engine-specific endpoint in the protocol tests.
+        Ex. POST /sparql/ HTTP/1.1 -> POST /qlever/ HTTP/1.1
+        """
+        pass
+
diff --git a/src/sparql_conformance/engines/qlever.py b/src/sparql_conformance/engines/qlever.py
new file mode 100644
index 000000000..6b37e310f
--- /dev/null
+++ b/src/sparql_conformance/engines/qlever.py
@@ -0,0 +1,157 @@
+import json
+import os
+from pathlib import Path
+from argparse import Namespace
+from typing import Tuple, List
+import requests
+
+
+from qlever.commands.query import QueryCommand
+from qlever.log import mute_log
+from qlever.util import run_command
+from qlever.commands.start import StartCommand
+from qlever.commands.stop import StopCommand
+from sparql_conformance.config import Config
+from sparql_conformance.engines.engine_manager import EngineManager
+from sparql_conformance import util
+from qlever.commands.index import IndexCommand
+from sparql_conformance.rdf_tools import write_ttl_file, delete_ttl_file, rdf_xml_to_turtle
+
+
+class QLeverManager(EngineManager):
+    """Manager for QLever using docker execution"""
+
+    def update(self, config: Config, query: str) -> Tuple[int, str]:
+        return self._query(config, query, "ru", "json")
+
+    def protocol_endpoint(self) -> str:
+        return "sparql"
+
+    def cleanup(self, config: Config):
+        self._stop_server(config)
+        with mute_log():
+            run_command('rm -f qlever-sparql-conformance*')
+
+    def query(self, config: Config, query: str, result_format: str) -> Tuple[int, str]:
+        return self._query(config, query, "rq", result_format)
+
+    def _query(self, config: Config, query: str, query_type: str, result_format: str) -> Tuple[int, str]:
+        content_type = "query=" if query_type == "rq" else "update="
+        args = util.make_args(
+            config,
+            accept=util.get_accept_header(result_format),
+            query=query,
+            content_type=content_type,
+        )
+
+        try:
+            with mute_log():
+                qc = QueryCommand()
+                qc.execute(args, True)
+                body, _, status_line = qc.query_output.rpartition("HTTP_STATUS:")
+                status = int(status_line.strip())
+            return status, body
+        except Exception as e:
+            return 1, str(e)
+
+    def setup(self, config: Config, graph_paths: Tuple[Tuple[str, str], ...]) -> Tuple[bool, bool, str, str]:
+        server_success = False
+        graphs = []
+        for graph_path, graph_name in graph_paths:
+            # Handle rdf files by turning them into turtle format.
+            if graph_path.endswith(".rdf"):
+                graph_path_new = Path(graph_path).name
+                graph_path_new = graph_path_new.replace(".rdf", ".ttl")
+                write_ttl_file(graph_path_new, rdf_xml_to_turtle(graph_path, graph_name))
+                graph_path = graph_path_new
+            else:
+                graph_path = util.copy_graph_to_workdir(graph_path, os.getcwd())
+            graphs.append((graph_path, graph_name))
+
+        index_success, index_log = self._index(config, graphs)
+        if not index_success:
+            return index_success, server_success, index_log, ''
+        else:
+            server_success, server_log = self._start_server(config)
+
+            if not server_success:
+                return index_success, server_success, index_log, server_log
+        for path, name in graphs:
+            delete_ttl_file(path)
+        return index_success, server_success, index_log, server_log
+
+    def _stop_server(self, config: Config) -> Tuple[bool, str]:
+        args = Namespace(
+            name='qlever-sparql-conformance',
+            port=config.port,
+            server_container='qlever-sparql-conformance-server-container',
+            no_containers=config.system == 'native',
+            show=False,
+            cmdline_regex='ServerMain.* -i [^ ]*%%NAME%%'
+        )
+        try:
+            with mute_log(50):
+                result = StopCommand().execute(args)
+        except Exception as e:
+            error_output = str(e)
+            return False, error_output
+        return result, 'Success'
+
+    def _start_server(self, config: Config) -> Tuple[bool, str]:
+        binary = 'ServerMain'
+        binary = binary if config.system != 'native' else Path(config.path_to_binaries, binary)
+        args = util.make_args(
+            config,
+            server_binary=binary,
+        )
+        try:
+            with mute_log():
+                result = StartCommand().execute(args, called_from_conformance_test=True)
+        except Exception as e:
+            error_output = str(e)
+            return False, error_output
+
+        server_log = ''
+        if os.path.exists('./qlever-sparql-conformance.server-log.txt'):
+            server_log = util.read_file('./qlever-sparql-conformance.server-log.txt')
+        return result, server_log
+
+    def _index(self, config: Config, graph_paths: List[Tuple[str, str]]) -> Tuple[bool, str]:
+        binary = 'IndexBuilderMain'
+        index_binary = binary if config.system != 'native' else Path(config.path_to_binaries, binary)
+        args = util.make_args(
+            config,
+            multi_input_json=self._generate_multi_input_json(graph_paths),
+            index_binary=index_binary
+        )
+        try:
+            with mute_log():
+                result = IndexCommand().execute(args=args, called_from_conformance_test=True)
+        except Exception as e:
+            error_output = str(e)
+            return False, error_output
+
+        index_log = ''
+        if os.path.exists("./qlever-sparql-conformance.index-log.txt"):
+            index_log = util.read_file("./qlever-sparql-conformance.index-log.txt")
+        return result, index_log
+
+    def _generate_multi_input_json(self, graph_paths: List[Tuple[str, str]]) -> str:
+        """Generate the JSON input for multi_input_json in IndexCommand.execute()"""
+        input_list = []
+        for graph_path, graph_name in graph_paths:
+            entry = {
+                'cmd': f'cat {graph_path}',
+                'graph': graph_name if graph_name else '-',
+                'format': 'ttl'
+            }
+            input_list.append(entry)
+        return json.dumps(input_list)
+
+    def activate_syntax_test_mode(self, server_address, port):
+        url = f'http://{server_address}:{port}'
+        params = {
+            "access-token": "abc",
+            "syntax-test-mode": "true"
+        }
+        requests.get(url, params)
diff --git a/src/sparql_conformance/extract_tests.py b/src/sparql_conformance/extract_tests.py
new file mode 100644
index 000000000..ac81276f1
--- /dev/null
+++ b/src/sparql_conformance/extract_tests.py
@@ -0,0 +1,214 @@
+import os
+from rdflib import Graph, Namespace, RDF, URIRef
+from typing import Union, Dict, Any, List, Tuple, Optional, Set
+
+from .config import Config
+from .util import uri_to_path, local_name
+from .test_object import TestObject
+
+# Namespaces
+MF = Namespace("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#")
+DAWGT = Namespace("http://www.w3.org/2001/sw/DataAccess/tests/test-dawg#")
+SD = Namespace("http://www.w3.org/ns/sparql-service-description#")
+RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
+
+
+def collect_tests_by_graph(tests: List[TestObject]) -> Dict[str, Dict[Tuple[Tuple[str, str], ...], List[TestObject]]]:
+    """
+    Groups tests by their graph references and categories.
+    The resulting dictionary has the following structure:
+    {'query': { (('graph_path', 'graph_name'), ...): [Test1, Test2, ...], ...}, ...}
+    """
+    if len(tests) == 0:
+        return {}
+    type_to_category: Dict[str, str] = {
+        'QueryEvaluationTest': 'query',
+        'CSVResultFormatTest': 'format',
+        'UpdateEvaluationTest': 'update',
+        'PositiveSyntaxTest11': 'syntax',
+        'NegativeSyntaxTest11': 'syntax',
+        'PositiveUpdateSyntaxTest11': 'syntax',
+        'NegativeUpdateSyntaxTest11': 'syntax',
+        'ProtocolTest': 'protocol',
+        'GraphStoreProtocolTest': 'graphstoreprotocol',
+        'ServiceDescriptionTest': 'service',
+    }
+
+    graph_index: Dict[str, Dict[Tuple[Tuple[str, str], ...], List[TestObject]]] = {
+        'query': dict(),
+        'format': dict(),
+        'update': dict(),
+        'syntax': dict(),
+        'protocol': dict(),
+        'graphstoreprotocol': dict(),
+        'service': dict(),
+    }
+
+    fallback_graph = (os.path.join(tests[0].config.path_to_test_suite, 'property-path', 'empty.ttl'), '-')
+
+    for test in tests:
+        if isinstance(test.action_node, dict):
+            graph_refs: List[Tuple[str, str]] = []
+
+            if "data" in test.action_node:
+                graph_refs.append((test.action_node["data"], "-"))
+            else:
+                graph_refs.append(fallback_graph)
+
+            graph_data = test.action_node.get("graphData")
+            if isinstance(graph_data, list):
+                for entry in graph_data:
+                    if isinstance(entry, dict):
+                        graph_file = entry.get("graph")
+                        label = entry.get("label")
+                        if graph_file:
+                            graph_refs.append((graph_file, label))
+                    else:
+                        graph_refs.append((entry, entry.split('/')[-1]))
+            elif isinstance(graph_data, dict):
+                graph_file = graph_data.get("graph")
+                label = graph_data.get("label")
+                if graph_file:
+                    graph_refs.append((graph_file, label))
+            elif isinstance(graph_data, str):
+                graph_refs.append((graph_data, graph_data.split('/')[-1]))
+        else:
+            graph_refs = [fallback_graph]
+
+        key = tuple(sorted(set(graph_refs)))
+        category = type_to_category.get(test.type_name)
+        if category:
+            if key in graph_index[category]:
+                graph_index[category][key].append(test)
+            else:
+                graph_index[category][key] = [test]
+
+    return graph_index
+
+
+def parse_node(graph: Graph, node: Any) -> Union[str, Dict[str, Any], None]:
+    """
+    Parse a RDF-node and convert it into an object.
+    """
+    if isinstance(node, URIRef):
+        return str(node)
+    if node is None:
+        return None
+    if node.__class__.__name__ == "Literal":
+        return str(node)
+
+    value_dict: Dict[str, Union[str, List[str]]] = {}
+    for p, o in graph.predicate_objects(node):
+        key = local_name(str(p))
+        if key == 'request':
+            key = 'query'
+        value = uri_to_path(parse_node(graph, o))
+
+        if key in value_dict:
+            if isinstance(value_dict[key], list):
+                value_dict[key].append(value)
+            else:
+                value_dict[key] = [value_dict[key], value]
+        else:
+            value_dict[key] = value
+
+    return value_dict
+
+
+def load_tests_from_manifest(
+        manifest_path: str,
+        config: Config,
+        visited: Optional[Set[str]] = None
+) -> List[TestObject]:
+    """
+    Load tests from a manifest file and all included sub-manifests.
+    """
+    if visited is None:
+        visited = set()
+
+    manifest_abs_path = os.path.abspath(manifest_path)
+    if manifest_abs_path in visited:
+        return []
+    visited.add(manifest_abs_path)
+
+    g = Graph()
+    g.parse(manifest_abs_path, format="turtle")
+    tests: List[TestObject] = []
+    sub_manifest_paths: List[str] = []
+
+    for collection in g.objects(None, MF.entries):
+        for test_uri in g.items(collection):
+            test_type = g.value(test_uri, RDF.type)
+            if not isinstance(test_type, URIRef):
+                continue
+
+            test_type = str(local_name(test_type))
+            name = g.value(test_uri, MF.name)
+            action_node = g.value(test_uri, MF.action)
+            result_node = g.value(test_uri, MF.result)
+
+            action = parse_node(g, action_node)
+            if isinstance(action, str):
+                action = {"query": action}
+            result = parse_node(g, result_node)
+            if isinstance(result, str):
+                result = {"data": result}
+
+            approval = g.value(test_uri, DAWGT.approval)
+            approved_by = g.value(test_uri, DAWGT.approvedBy)
+            comment = g.value(test_uri, RDFS.comment)
+
+            feature = [str(f) for f in g.objects(test_uri, MF.feature) if isinstance(f, URIRef)]
+            path = manifest_abs_path.split("manifest.ttl")[0]
+            entailment_regime = g.value(test_uri, SD.entailmentRegime)
+            entailment_profile = g.value(test_uri, SD.entailmentProfile)
+            group = os.path.basename(os.path.normpath(path))
+            if str(name) in config.exclude or group in config.exclude:
+                continue
+            if config.include and str(name) not in config.include and group not in config.include:
+                continue
+            tests.append(TestObject(
+                test=str(test_uri),
+                name=str(name),
+                type_name=test_type,
+                group=group,
+                path=path,
+                action_node=action,
+                result_node=result,
+                approval=str(approval) if approval else None,
+                approved_by=str(approved_by) if approved_by else None,
+                comment=str(comment) if comment else None,
+                entailment_regime=str(entailment_regime) if entailment_regime else None,
+                entailment_profile=str(entailment_profile) if entailment_profile else None,
+                feature=feature,
+                config=config,
+            ))
+
+    for include_list in g.objects(None, MF.include):
+        for sub_manifest_uri in g.items(include_list):
+            sub_manifest_path = uri_to_path(sub_manifest_uri)
+            sub_manifest_path = os.path.normpath(sub_manifest_path)
+
+            if os.path.exists(sub_manifest_path):
+                sub_manifest_paths.append(sub_manifest_path)
+                tests.extend(load_tests_from_manifest(
+                    sub_manifest_path,
+                    config,
+                    visited=visited
+                ))
+
+    return tests
+
+
+def extract_tests(config: Config) -> Tuple[Dict[str, Dict[Tuple[Tuple[str, str], ...], List[TestObject]]], int]:
+    """
+    Extract tests from the SPARQL testsuite manifest file.
+
+    Returns:
+        Tuple:
+        - A dictionary grouped by categories
+        - Number of tests
+    """
+    path_to_manifest = os.path.join(config.path_to_test_suite, 'manifest-all.ttl')
+    tests = load_tests_from_manifest(path_to_manifest, config)
+    return collect_tests_by_graph(tests), len(tests)
\ No newline at end of file
diff --git a/src/sparql_conformance/json_tools.py b/src/sparql_conformance/json_tools.py
new file mode 100644
index 000000000..e3e994aae
--- /dev/null
+++ b/src/sparql_conformance/json_tools.py
@@ -0,0 +1,393 @@
+import json
+from typing import List, Tuple
+
+from sparql_conformance.test_object import Status, ErrorMessage
+
+
+def handle_bindings(
+        indent: int,
+        level: int,
+        bindings: list,
+        remaining_bindings: list,
+        mark_red: list) -> str:
+    """
+    Formats the "bindings" list with HTML labels as needed for highlighting.
+
+    This method iterates over a list of bindings and applies HTML labels to those
+    that match any in the reference bindings list. The method handles indentation
+    and formatting to create a readable HTML-formatted string.
+
+    Parameters:
+        indent (int): Number of spaces used for indentation.
+        level (int): Current nesting level for correct indentation.
+        bindings (list): List of binding items to format.
+        remaining_bindings (list): List of binding items used for comparison.
+        mark_red (list): List containing the elements that must be highlighted red.
+
+    Returns:
+        str: An HTML-formatted string representing the bindings list with highlighted items.
+    """
+    mark_red_copy = list(mark_red)
+    parts = ["["]
+    for i, binding in enumerate(bindings):
+        if i > 0:
+            parts.append(", ")
+        parts.append("\n" + " " * (indent * (level + 1)))
+
+        # Apply label if the binding matches any in the reference bindings
+        if binding in remaining_bindings:
+            if binding in mark_red_copy:
+                label = '<label class="red">'
+            else:
+                label = '<label class="yellow">'
+            end_label = '</label>'
+        else:
+            label = ""
+            end_label = ""
+        parts.append(
+            f"{label}{json_to_string(binding, {},mark_red, level + 1)}{end_label}")
+    parts.append("\n" + " " * (indent * level) + "]")
+    return "".join(parts)
+
+
+def json_dict(
+        indent: int,
+        level: int,
+        json_dictionary: dict,
+        remaining_dict: dict,
+        mark_red: list) -> str:
+    """
+    Formats a dictionary with HTML labels as needed for highlighting.
+
+    Iterates through the dictionary and formats each key-value pair. Special handling is
+    applied for lists under specific keys "vars" and "bindings". The method manages
+    indentation and applies HTML labels for highlighting as needed.
+
+    Parameters:
+        indent (int): Number of spaces used for indentation.
+        level (int): Current nesting level for correct indentation.
+        json_dictionary (dict): Dictionary to format.
+        remaining_dict (dict): Dictionary used for comparison to determine highlighting.
+        mark_red (list): List containing the elements that must be highlighted red.
+
+    Returns:
+        str: An HTML-formatted string representing the dictionary with highlighted elements.
+    """
+    parts = ["{"]
+    for i, (key, value) in enumerate(json_dictionary.items()):
+        if i > 0:
+            parts.append(", ")
+        parts.append("\n" + " " * (indent * (level + 1)))
+        if isinstance(value, list) and key == "vars":
+            # Special handling for "vars" in "head"
+            parts.append(
+                f"\"{key}\": {json_to_string(value, remaining_dict.get(key, []),mark_red, level + 1)}")
+        elif isinstance(value, list) and key == "bindings":
+            # Special handling for "bindings" in "results"
+            formatted_bindings = handle_bindings(
+                indent, level, value, remaining_dict.get(
+                    key, []), mark_red)
+            parts.append(f"\"{key}\": {formatted_bindings}")
+        elif key == "boolean":
+            # Special handling for "boolean" in "results"
+            label = ""
+            end_label = ""
+            if remaining_dict.get("boolean") is not None:
+                label = '<label class="red">'
+                end_label = '</label>'
+            parts.append(f"\"{key}\": {label}{str(value).lower()}{end_label}")
+        else:
+            parts.append(
+                f"\"{key}\": {json_to_string(value, remaining_dict.get(key, {}), mark_red, level + 1)}")
+
+    parts.append("\n" + " " * (indent * level) + "}")
+    return "".join(parts)
+
+
+def json_list(
+        indent: int,
+        level: int,
+        json_list_items: list,
+        remaining_list: list) -> str:
+    """
+    Formats a list with HTML labels as needed for highlighting.
+
+    Iterates through the list and applies HTML labels to items that match
+    any in the list. Manages indentation for a readable format.
+
+    Parameters:
+        indent (int): Number of spaces used for indentation.
+        level (int): Current nesting level for correct indentation.
+        json_list_items (list): List of items to format.
+        remaining_list (list): List used for comparison to determine highlighting.
+
+    Returns:
+        str: An HTML-formatted string representing the list with highlighted elements.
+    """
+    parts = ["["]
+    for i, item in enumerate(json_list_items):
+        if i > 0:
+            parts.append(", ")
+        parts.append("\n" + " " * (indent * (level + 1)))
+        # Apply label if the item is in the list
+        if item in remaining_list:
+            label = '<label class="red">'
+            end_label = '</label>'
+        else:
+            label = ""
+            end_label = ""
+        parts.append(f"{label}\"{item}\"{end_label}")
+    parts.append("\n" + " " * (indent * level) + "]")
+    return "".join(parts)
+
+
+def json_to_string(json_obj, remaining_json, mark_red: list, level=0) -> str:
+    """
+    Converts a JSON object to a readable string and highlights elements found in the reference JSON with <"></">.
+
+    Parameters:
+    json_obj (dict or list): The JSON object to be converted.
+    remaining_json (dict or list): SON object to check for matching elements.
+    mark_red (list): List containing the elements that must be highlighted red.
+    level (int): Current recursion level to calculate indentation.
+
+    Returns:
+    str: A readable string representation of the JSON object with highlighted elements.
+    """
+    indent = 4
+    if isinstance(json_obj, dict) and json_obj:
+        return json_dict(indent, level, json_obj, remaining_json, mark_red)
+    elif isinstance(json_obj, list):
+        return json_list(indent, level, json_obj, remaining_json)
+    elif isinstance(json_obj, str):
+        return f"\"{json_obj}\""
+    else:
+        return str(json_obj)
+
+
+def generate_highlighted_string_json(
+        json_obj: dict,
+        remaining_json: dict,
+        mark_red: list) -> str:
+    """
+    Generates an HTML-formatted and highlighted string representation of a JSON object.
+
+    Parameters:
+        json_obj: The JSON object to be formatted and highlighted.
+        remaining_json: The JSON object used as a reference for highlighting elements in the json_obj.
+        mark_red (list): List containing the elements that must be highlighted red.
+
+    Returns:
+        str: An HTML string representing the formatted and highlighted JSON object.
+    """
+    return json_to_string(json_obj, remaining_json, mark_red)
+
+
+def json_elements_equal(
+        element1: dict,
+        element2: dict,
+        compare_with_intended_behaviour: bool,
+        alias: List[Tuple[str, str]],
+        number_types: list,
+        map_bnodes: dict) -> bool:
+    """
+    Compares two JSON elements for equality.
+
+    This method compares two JSON elements for equality. It checks for matching
+    keys and compares their values. It also accounts for datatype differences by comparing numerical values.
+    The comparison can include intended behavior based on the compare_with_intended_behaviour Bool.
+
+    Parameters:
+        element1 (dict): The first JSON element to compare.
+        element2 (dict): The second JSON element to compare.
+        compare_with_intended_behaviour (bool): Bool to determine whether to use intended behavior aliases in comparison.
+        alias (dict): Dictionary with aliases for datatypes ex. int = integer .
+        number_types (list): List containing all datatypes that should be used as numbers.
+        map_bnodes (dict): Dictionary mapping the used bnodes.
+
+    Returns:
+        bool: True if considered equal otherwise False.
+    """
+    if set(element1.keys()) != set(element2.keys()):
+        return False
+    for key in element1:
+        field1 = element1[key]
+        field2 = element2[key]
+
+        if isinstance(field1, dict) and isinstance(field2, dict):
+            for sub_key in set(field1.keys()) | set(field2.keys()):
+                if field1.get(sub_key) != field2.get(sub_key):
+                    if str(
+                            field1.get("type")) == "bnode" and str(
+                            field2.get("type")) == "bnode" and str(sub_key) == "value":
+                        if field1.get("value") not in map_bnodes and field2.get(
+                                "value") not in map_bnodes:
+                            map_bnodes[field1.get(
+                                "value")] = field2.get("value")
+                            map_bnodes[field2.get(
+                                "value")] = field1.get("value")
+                            continue
+                        if map_bnodes.get(
+                                field1.get("value")) == field2.get("value") and map_bnodes.get(
+                                field2.get("value")) == field1.get("value"):
+                            continue
+                    if str(field1.get("datatype")) in number_types and str(
+                            field2.get("datatype")) in number_types and str(sub_key) == "value":
+                        if float(
+                                field1.get(sub_key)) == float(
+                                field2.get(sub_key)):
+                            continue
+                    if compare_with_intended_behaviour and ((field1.get(sub_key), field2.get(sub_key)) in alias or (field2.get(sub_key), field1.get(sub_key)) in alias):
+                        continue
+                    return False
+        else:
+            if field1 != field2:
+                return False
+    return True
+
+
+def remove_once_found(
+        list1: list,
+        list2: list,
+        compare_with_intended_behaviour: bool,
+        alias: List[Tuple[str, str]],
+        number_types: list,
+        map_bnodes: dict) -> list:
+    """
+    Compares two lists and returns the first list will all elements remove that are also in the second list.
+
+    Parameters:
+        list1 (list): The first list to compare.
+        list2 (list): The second list to compare.
+        compare_with_intended_behaviour (bool): Bool to determine whether to use intended behavior aliases in comparison.
+        alias (List[Tuple[str, str]]): Dictionary with aliases for datatypes ex. int = integer .
+        number_types (list): List containing all datatypes that should be used as numbers.
+        map_bnodes (dict): Dictionary mapping the used bnodes.
+
+    Returns:
+        list: Retuns list1 with removed elements.
+    """
+    temp_list1 = list1[:]
+
+    for item2 in list2:
+        for i, item1 in enumerate(temp_list1):
+            if json_elements_equal(
+                    item1,
+                    item2,
+                    compare_with_intended_behaviour,
+                    alias,
+                    number_types,
+                    map_bnodes):
+                # Remove the first found match and break the loop to move to
+                # the next b2
+                temp_list1.pop(i)
+                break
+
+    return temp_list1
+
+
+def compare_json(
+        expected_json: str,
+        query_json: str,
+        alias: List[Tuple[str, str]],
+        number_types: list) -> tuple:
+    """
+    Compares two JSON objects and identifies differences in their "head" and "results" sections.
+
+    This method parses two JSON strings representing expected and query results. It compares
+    these JSON objects, particularly focusing on the "head" and "results" sections.
+    Differences are highlighted, and a status of comparison along with any error type is returned.
+
+    Parameters:
+        expected_json (str): The expected JSON content as a string.
+        query_json (str): The query JSON content as a string.
+        alias (List[Tuple[str, str]]): Dictionary with aliases for datatypes ex. int = integer .
+        number_types (list): List containing all datatypes that should be used as numbers.
+
+    Returns:
+        tuple: A tuple containing the status and error type.
+    """
+    map_bnodes = {}
+    status = Status.FAILED
+    error_type = ErrorMessage.RESULTS_NOT_THE_SAME
+    expected = json.loads(expected_json)
+    query = json.loads(query_json)
+
+    vars1 = []
+    vars2 = []
+
+    # Compare and remove similar parts in "head" section
+    if expected.get("head") is not None and expected.get(
+            "head").get("vars") is not None:
+        vars1 = expected.get("head").get("vars")
+
+    if query.get("head") is not None and query.get(
+            "head").get("vars") is not None:
+        vars2 = query.get("head").get("vars")
+
+    if expected.get("head") is not None and expected.get(
+            "head").get("vars") is not None:
+        unique_vars1 = [v for v in vars1 if v not in vars2]
+        expected["head"]["vars"] = unique_vars1
+
+    if query.get("head") is not None and query.get(
+            "head").get("vars") is not None:
+        unique_vars2 = [v for v in vars2 if v not in vars1]
+        query["head"]["vars"] = unique_vars2
+
+    # Check if its a boolean result or variable binding results
+    if query.get("results") is not None and expected.get(
+            "results") is not None:
+        # Compare and remove similar parts in "bindings" section using the
+        # custom comparison function
+        bindings1 = expected["results"]["bindings"]
+        bindings2 = query["results"]["bindings"]
+
+        unique_bindings1 = remove_once_found(
+            bindings1, bindings2, False, alias, number_types, map_bnodes)
+        unique_bindings2 = remove_once_found(
+            bindings2, bindings1, False, alias, number_types, map_bnodes)
+
+        expected["results"]["bindings"] = unique_bindings1
+        query["results"]["bindings"] = unique_bindings2
+
+        if len(
+            expected["results"]["bindings"]) == 0 and len(
+            query["results"]["bindings"]) == 0 and len(
+            expected["head"]["vars"]) == 0 and len(
+                query["head"]["vars"]) == 0:
+            status = Status.PASSED
+            error_type = ""
+        else:
+            unique_bindings1 = remove_once_found(
+                bindings1, bindings2, True, alias, number_types, map_bnodes)
+            unique_bindings2 = remove_once_found(
+                bindings2, bindings1, True, alias, number_types, map_bnodes)
+            if len(unique_bindings1) == 0 and len(unique_bindings2) == 0:
+                status = Status.INTENDED
+                error_type = ErrorMessage.INTENDED_MSG
+        expected_string = generate_highlighted_string_json(
+            json.loads(expected_json), expected, unique_bindings1)
+        query_string = generate_highlighted_string_json(
+            json.loads(query_json), query, unique_bindings2)
+        expected_string_red = generate_highlighted_string_json(
+            expected, expected, unique_bindings1)
+        query_string_red = generate_highlighted_string_json(
+            query, query, unique_bindings2)
+    else:
+        bool1 = expected["boolean"]
+        bool2 = query["boolean"]
+        if str(bool1) == str(bool2):
+            del expected["boolean"]
+            del query["boolean"]
+            status = Status.PASSED
+            error_type = ""
+        expected_string = generate_highlighted_string_json(
+            json.loads(expected_json), expected, [])
+        query_string = generate_highlighted_string_json(
+            json.loads(query_json), query, [])
+        expected_string_red = generate_highlighted_string_json(
+            expected, expected, [])
+        query_string_red = generate_highlighted_string_json(query, query, [])
+
+    return status, error_type, expected_string, query_string, expected_string_red, query_string_red
diff --git a/src/sparql_conformance/protocol_tools.py b/src/sparql_conformance/protocol_tools.py
new file mode 100644
index 000000000..59ee20c8d
--- /dev/null
+++ b/src/sparql_conformance/protocol_tools.py
@@ -0,0 +1,247 @@
+import telnetlib as telnet
+import re
+import json
+from typing import Tuple
+
+from sparql_conformance.engines.engine_manager import EngineManager
+from sparql_conformance.test_object import TestObject, Status, ErrorMessage
+from sparql_conformance.rdf_tools import compare_ttl
+
+
+def prepare_request(engine_manager: EngineManager, test: TestObject, request_with_reponse: str, newpath: str) -> Tuple[str, str]:
+    request = request_with_reponse.split('#### Response')[0]
+    # Quick fix: change the x-www-url-form-urlencoded content type to x-www-form-urlencoded
+    request = request.replace('application/x-www-url-form-urlencoded', 'application/x-www-form-urlencoded')
+    if test.type_name == 'GraphStoreProtocolTest':
+        request = request.replace(
+            '$HOST$', 'localhost')
+        request = request.replace(
+            '$NEWPATH$', newpath)
+    before_header = True
+    request_lines = request.splitlines()
+    index_header = 0
+    index_line_between = 0
+    for index, line in enumerate(request_lines):
+        line = line.strip()
+        request_lines[index] = line
+        if not line and not before_header and index_line_between == 0:
+            index_line_between = index
+        if line.startswith('POST') or line.startswith('GET') or line.startswith(
+                'PUT') or line.startswith('DELETE') or line.startswith('HEAD'):
+            before_header = False
+            index_header = index
+            line = line.replace('sparql', engine_manager.protocol_endpoint())
+        if line.startswith('GET') and not line.endswith('HTTP/1.1'):
+            request_lines[index] = line + ' HTTP/1.1'
+    request_header_lines = request_lines[index_header:index_line_between]
+    if len([l for l in request_header_lines if "Content-Length" in l]) == 0:
+        request_header_lines.append("Content-Length: XXX")
+    request_body_lines = [
+        x for x in request_lines[index_line_between + 1:] if x]
+    request_header = '\r\n'.join(request_header_lines)
+    request_body = '\r\n'.join(request_body_lines)
+    request_header = request_header + '\r\n' + 'Authorization: Bearer abc'
+    if test.type_name == 'GraphStoreProtocolTest':
+        request_header = request_header.replace(
+            '$GRAPHSTORE$', '/' + test.config.GRAPHSTORE)
+        request_body = request_body.replace(
+            '$GRAPHSTORE$', test.config.GRAPHSTORE)
+    request_header = request_header.replace('XXX', str(len(request_body)))
+    return request_header + '\r\n\r\n', request_body + '\r\n'
+
+
+def prepare_response(test: TestObject, request_with_reponse: str, newpath: str) -> dict[str, str | list[str]]:
+    response: dict[str, str | list[str]] = {'status_codes': [], 'content_types': []}
+    response_string = request_with_reponse.split('#### Response')[1]
+    if test.type_name == 'GraphStoreProtocolTest':
+        response_string = response_string.replace(
+            '$HOST$', 'localhost')
+        response_string = response_string.replace(
+            '$GRAPHSTORE$', test.config.GRAPHSTORE)
+        response_string = response_string.replace(
+            '$NEWPATH$', newpath)
+    response_lines = [x.strip() for x in response_string.splitlines() if x]
+    for line in response_lines:
+        if line.endswith('response') or re.search(r'\dxx', line) is not None:
+            line = line.replace('response', '')
+            status_codes = line.strip().split('or')
+            for status_code in status_codes:
+                response['status_codes'].append(status_code.strip())
+        if re.search(r'^\d\d\d ', line) is not None:
+            response['status_codes'].append(
+                re.search(r'^\d\d\d ', line).group(0))
+        if line.startswith('Content-Type:'):
+            line = line.replace('Content-Type:', '')
+            content_types = line.strip().split('or')
+            for content_type in content_types:
+                # Split on ',' to handle multiple content types
+                cts = content_type.split(',')
+                for ct in cts:
+                    if ct != '':
+                        response['content_types'].append(ct.strip().split(';')[0])
+        if line.startswith('true'):
+            response['result'] = 'true'
+        if line.startswith('false'):
+            response['result'] = 'false'
+        if line.startswith('Location: $NEWPATH$'):
+            response['newpath'] = 'Location: $NEWPATH$'
+    if 'text/turtle' in response['content_types'] and response.get(
+            'result') is None:
+        response['result'] = '\n\n'.join(response_string.split('\n\n')[2:])
+    return response
+
+
+def parse_chunked_response(response: str) -> str:
+    """
+    Extract the body of a http response that uses chunked transfer encoding.
+    Important: This function assumes that the input still consists of the
+    headers + chunked body.
+    """
+    # only extract the chunked body, and then parse it.
+    headers, body = response.split('\r\n\r\n', 1)
+    return parse_chunked_body(body)
+
+def parse_chunked_body(response_body: str) -> str:
+    """
+    Parses a chunked transfer encoded HTTP response body and returns the complete decoded string.
+
+    Parameters:
+    - response_body: The raw body as a string (with chunk sizes and data).
+
+    Returns:
+    - A string with the fully concatenated body content.
+    """
+    result = []
+    i = 0
+    length = len(response_body)
+
+    while i < length:
+        # Find the next \r\n to extract the chunk size
+        rn_index = response_body.find("\r\n", i)
+        if rn_index == -1:
+            break  # Malformed chunk
+
+        # Parse chunk size (hexadecimal)
+        chunk_size_str = response_body[i:rn_index]
+        try:
+            chunk_size = int(chunk_size_str, 16)
+        except ValueError:
+            raise ValueError(f"Invalid chunk size: {chunk_size_str}")
+
+        if chunk_size == 0:
+            break
+
+        # Move pointer past chunk size line
+        i = rn_index + 2
+
+        chunk_data = response_body[i:i + chunk_size]
+        result.append(chunk_data)
+
+        # Move pointer past chunk data and the following \r\n
+        i += chunk_size + 2
+
+    return ''.join(result)
+
+
+def compare_response(expected_response: dict[str, str | list[str]], got_response: str, is_select: bool) -> Tuple[bool, str]:
+    status_code_match = False
+    content_type_match = False
+    result_match = False
+
+    for status_code in expected_response['status_codes']:
+        pattern = r'HTTP/1\.1 '
+        for digit in status_code:
+            if digit == 'x':
+                pattern += '\\d'
+            else:
+                pattern += digit
+        found_status_code = re.search(pattern, got_response)
+        if found_status_code is not None:
+            status_code_match = True
+
+    if len(expected_response['content_types']) == 0:
+        content_type_match = True
+
+    for content_type in expected_response['content_types']:
+        if got_response.find(content_type) != -1:
+            content_type_match = True
+
+    if expected_response.get('result') is None or got_response.find(
+            expected_response['result']) != -1:
+        result_match = True
+    # Handle SELECT queries with the expected result true
+    if expected_response.get('result', False) and is_select:
+        try:
+            json_body = parse_chunked_response(got_response)
+            parsed = json.loads(json_body)
+            result_match = bool(parsed.get('results', {}).get('bindings'))
+        except:
+            pass
+    if 'text/turtle' in expected_response.get(
+            'content_types') and status_code_match and content_type_match:
+        response_ttl = parse_chunked_response(got_response)
+        status, error_type, expected_string, query_string, expected_string_red, query_string_red = compare_ttl(
+            expected_response['result'], response_ttl)
+        if status == 'Passed':
+            result_match = True
+    newpath = ''
+    if 'newpath' in expected_response:
+        match = re.search(r'^Location:\s*(.*)', got_response, re.MULTILINE)
+        if match:
+            newpath = match.group(1)
+    return status_code_match and content_type_match and result_match, newpath
+
+
+def run_protocol_test(
+        engine_manager: EngineManager,
+        test: TestObject,
+        test_protocol: str,
+        newpath: str) -> tuple:
+    server_address = 'localhost'
+    port = test.config.port
+    result = Status.FAILED
+    error_type = ErrorMessage.RESULTS_NOT_THE_SAME
+    status = []
+    if 'followed by' in test_protocol:
+        test_request_split = test_protocol.split('followed by')
+    elif test_protocol.count('#### Request') > 1:
+        test_request_split = [line for line in test_protocol.split(
+            '#### Request') if len(line) > 2]
+    else:
+        test_request_split = [test_protocol]
+    requests = []
+    responses = []
+    got_responses = []
+    for request_with_reponse in test_request_split:
+        request_head, request_body = prepare_request(engine_manager, test, request_with_reponse, newpath)
+        requests.append(request_head + request_body)
+        response = prepare_response(test, request_with_reponse, newpath)
+        responses.append(response)
+        tn = telnet.Telnet(server_address, int(port))
+        tn.sock.settimeout(5)
+        if 'charset=UTF-16' in request_head:
+            encoding = 'utf-16'
+        else:
+            encoding = 'utf-8'
+        tn.write(request_head.encode('utf-8') + request_body.encode(encoding))
+        try:
+            tn_response = tn.read_all().decode('utf-8')
+        except Exception as e:
+            tn_response = str(e)
+        got_responses.append(tn_response)
+        matching, newpath = compare_response(response, tn_response, 'SELECT' in request_with_reponse)
+        status.append(matching)
+        tn.close()
+    if all(status):
+        result = Status.PASSED
+        error_type = ''
+    extracted_expected_responses = ''
+    for response in responses:
+        extracted_expected_responses += str(response) + '\n'
+    extracted_sent_requests = ''
+    for request in requests:
+        extracted_sent_requests += request + '\n'
+    got_responses_string = ''
+    for response in got_responses:
+        got_responses_string += response + '\n'
+    return result, error_type, extracted_expected_responses, extracted_sent_requests, got_responses_string, newpath
diff --git a/src/sparql_conformance/rdf_tools.py b/src/sparql_conformance/rdf_tools.py
new file mode 100644
index 000000000..515794cc9
--- /dev/null
+++ b/src/sparql_conformance/rdf_tools.py
@@ -0,0 +1,126 @@
+import rdflib
+from sparql_conformance.test_object import Status, ErrorMessage
+import os
+import re
+from sparql_conformance.util import escape
+
+
+def rdf_xml_to_turtle(file_path, public_id) -> str:
+    graph = rdflib.Graph()
+    graph.parse(file_path, format="xml", publicID=public_id)
+    return graph.serialize(format="turtle")
+
+
+def remove_prefix(turtle_string: str) -> str:
+    split = turtle_string.split("\n")
+    result = split
+    for line in split:
+        if line.startswith("@prefix") or line.startswith("PREFIX"):
+            result.remove(line)
+    return "\n".join(result)
+
+
+def write_ttl_file(name: str, ttl_string: str):
+    f = open(name, "w", encoding="utf-8")
+    f.write(ttl_string)
+    f.close()
+
+
+def delete_ttl_file(name: str):
+    if os.path.exists(name):
+        os.remove(name)
+
+
+def copy_namespaces(source_graph, target_graph):
+    for prefix, namespace in source_graph.namespaces():
+        target_graph.bind(prefix, namespace, override=False)
+
+
+def highlight_differences(turtle_data, diff):
+    # Serialize the main graph to turtle (escaped for HTML rendering)
+    serialized_turtle = escape(turtle_data.serialize(format="turtle"))
+    
+    for s, p, o in diff:
+        s_prefixed = s.n3(namespace_manager=turtle_data.namespace_manager)
+        p_prefixed = p.n3(namespace_manager=turtle_data.namespace_manager)
+        o_prefixed = o.n3(namespace_manager=turtle_data.namespace_manager)
+
+        # Escape for matching
+        s_escaped = re.escape(escape(s_prefixed))
+        p_escaped = re.escape(escape(p_prefixed))
+        o_escaped = re.escape(escape(o_prefixed))
+
+        # This matches the whole line of the triple.
+        pattern = rf"{s_escaped}(?:[^.]*?)?{p_escaped}\s+(?:[^.]*?){o_escaped}[^.]*?\s+\.(?!</label>)"
+
+        def replace_first_match(match):
+            return f'<label class="red">{match.group()}</label>'
+
+        serialized_turtle = re.sub(
+            pattern,
+            replace_first_match,
+            serialized_turtle,
+            flags=re.DOTALL
+        )
+
+    return serialized_turtle
+
+def compare_ttl(expected_ttl: str, query_ttl: str) -> tuple:
+    status = Status.FAILED
+    error_type = ErrorMessage.RESULTS_NOT_THE_SAME
+    expected_graph = rdflib.Graph()
+    query_graph = rdflib.Graph()
+    try:
+        expected_graph.parse(data=expected_ttl, format="turtle")
+    except Exception:
+        expected_ttl = '@prefix foaf: <http://xmlns.com/foaf/0.1/> .\n@prefix v: <http://www.w3.org/2006/vcard/ns#> .\n\n' + expected_ttl
+        try:
+            expected_graph.parse(data=expected_ttl, format="turtle")
+        except Exception as e:
+            error_type = ErrorMessage.FORMAT_ERROR
+            escaped_expected = f'<label class="red">{escape(expected_ttl)}</label>'
+            return Status.NOT_TESTED, error_type, escaped_expected, escape(query_ttl), f'<label class="red">{e}</label>', escape(
+                query_ttl)
+
+    try:
+        query_graph.parse(data=query_ttl, format="turtle")
+    except Exception as e:
+        error_type = ErrorMessage.FORMAT_ERROR
+        escaped_query = f'<label class="red">{escape(query_ttl)}</label>'
+        escaped_expected = f'<label class="red">{escape(expected_ttl)}</label>'
+        return status, error_type, escape(
+            expected_ttl), escaped_query, escaped_expected, f'<label class="red">{e}</label>'
+
+    is_isomorphic = expected_graph.isomorphic(query_graph)
+
+    if is_isomorphic:
+        status = Status.PASSED
+        error_type = ""
+        expected_string = escape(expected_ttl)
+        query_string = escape(query_ttl)
+        expected_string_red = ""
+        query_string_red = ""
+    else:
+        triples_in_expected_not_in_query = expected_graph - query_graph
+        triples_in_query_not_in_expected = query_graph - expected_graph
+
+        # Repair namespaces
+        copy_namespaces(expected_graph, triples_in_expected_not_in_query)
+        copy_namespaces(query_graph, triples_in_query_not_in_expected)
+        expected_string = highlight_differences(
+            expected_graph, triples_in_expected_not_in_query)
+        query_string = highlight_differences(
+            query_graph, triples_in_query_not_in_expected)
+
+        no_prefix_escaped_expected = escape(
+            remove_prefix(
+                triples_in_expected_not_in_query.serialize(
+                    format="turtle")))
+        no_prefix_escaped_query = escape(
+            remove_prefix(
+                triples_in_query_not_in_expected.serialize(
+                    format="turtle")))
+        expected_string_red = f'<label class="red">{no_prefix_escaped_expected}</label>'
+        query_string_red = f'<label class="red">{no_prefix_escaped_query}</label>'
+
+    return status, error_type, expected_string, query_string, expected_string_red, query_string_red
diff --git a/src/sparql_conformance/test_object.py b/src/sparql_conformance/test_object.py
new file mode 100644
index 000000000..7ce6db8eb
--- /dev/null
+++ b/src/sparql_conformance/test_object.py
@@ -0,0 +1,207 @@
+from enum import Enum
+from typing import Optional, List, Union, Dict, Any
+
+from sparql_conformance.config import Config
+from sparql_conformance.util import local_name, read_file, escape
+import os
+import json
+
+class Status(str, Enum):
+    PASSED = "Passed"
+    INTENDED = "Failed: Intended"
+    FAILED = "Failed"
+    NOT_TESTED = "Not tested"
+
+class ErrorMessage(str, Enum):
+    QUERY_EXCEPTION = "QUERY EXCEPTION"
+    REQUEST_ERROR = "REQUEST ERROR"
+    QUERY_ERROR = "QUERY RESULT ERROR"
+    INDEX_BUILD_ERROR = "INDEX BUILD ERROR"
+    SERVER_ERROR = "SERVER ERROR"
+    NOT_TESTED = "NOT TESTED"
+    RESULTS_NOT_THE_SAME = "RESULTS NOT THE SAME"
+    INTENDED_MSG = "Known, intended behaviour that does not comply with SPARQL standard"
+    EXPECTED_EXCEPTION = "EXPECTED: QUERY EXCEPTION ERROR"
+    FORMAT_ERROR = "QUERY RESULT FORMAT ERROR"
+    NOT_SUPPORTED = "QUERY NOT SUPPORTED"
+    CONTENT_TYPE_NOT_SUPPORTED = "CONTENT TYPE NOT SUPPORTED"
+
+    @classmethod
+    def is_query_error(cls, error: str) -> bool:
+        """Subset of query-related errors."""
+        return error in [
+            cls.QUERY_EXCEPTION,
+            cls.QUERY_ERROR,
+            cls.REQUEST_ERROR,
+            cls.NOT_SUPPORTED,
+            cls.CONTENT_TYPE_NOT_SUPPORTED,
+        ]
+
+def process_graph_data(graph_data: Union[None, str, Dict, List], target_dict: Dict[str, str]) -> None:
+    """
+    Process graph data and store results in the target dictionary.
+    Result: {'label': 'graph', ...}
+    """
+    if graph_data is None:
+        return
+
+    if isinstance(graph_data, str):
+        label = graph_data.split('/')[-1]
+        target_dict[label] = read_file(graph_data)
+        return
+
+    if not isinstance(graph_data, List):
+        graph_data = [graph_data]
+
+    for graph_entry in graph_data:
+        if isinstance(graph_entry, dict):
+            graph_path = graph_entry.get('graph')
+            if graph_path:
+                label = graph_entry.get('label', graph_path.split('/')[-1])
+                target_dict[label] = read_file(graph_path)
+        elif isinstance(graph_entry, str):
+            label = graph_entry.split('/')[-1]
+            target_dict[label] = read_file(graph_entry)
+
+
+class TestObject:
+    """Represents a single SPARQL test case with its configuration and results."""
+
+    def __init__(
+            self,
+            test: str,
+            name: str,
+            type_name: str,
+            group: str,
+            path: str,
+            action_node: Optional[Dict[str, Any]],
+            result_node: Optional[Dict[str, Any]],
+            approval: Optional[str],
+            approved_by: Optional[str],
+            comment: Optional[str],
+            entailment_regime: Optional[str],
+            entailment_profile: Optional[str],
+            feature: List[str],
+            config: Config,
+    ):
+        """
+        Initialize a test object with all its properties.
+
+        Args:
+            test: Test URI
+            name: Test name
+            type_name: Type of the test
+            group: Test group identifier
+            path: Path to test files
+            action_node: Node containing test actions
+            result_node: Node containing expected results
+            approval: Test approval status
+            approved_by: Approver identifier
+            comment: Test description/comment
+            entailment_regime: SPARQL entailment regime
+            entailment_profile: Entailment profile
+            feature: List of test features
+            config: Test configuration
+        """
+        self.test = test
+        self.name = name
+        self.type_name = type_name
+        self.group = group
+        self.path = path
+        self.action_node = action_node
+        self.result_node = result_node
+        self.approval = approval
+        self.approved_by = approved_by
+        self.comment = comment
+        self.entailment_regime = entailment_regime
+        self.entailment_profile = entailment_profile
+        self.feature = feature
+        self.config = config
+
+        self.status = Status.NOT_TESTED
+        self.index_files: Dict[str, str] = {}
+        self.result_files: Dict[str, str] = {}
+
+        # Process action node
+        if isinstance(action_node, dict):
+            self.query = local_name(action_node.get('query', 'no query'))
+            self.graph = local_name(action_node.get('data', 'no query'))
+            self.query_file = read_file(os.path.join(self.path, self.query))
+            self.graph_file = read_file(os.path.join(self.path, self.graph))
+            process_graph_data(action_node.get('graphData'), self.index_files)
+        else:
+            self.query = self.graph = self.query_file = self.graph_file = ''
+
+        # Process result node
+        if isinstance(result_node, dict):
+            self.result = local_name(result_node.get('data', 'no query'))
+            self.result_format = self.result[self.result.rfind('.') + 1:]
+            self.result_file = read_file(os.path.join(self.path, self.result))
+            process_graph_data(result_node.get('graphData'), self.result_files)
+        else:
+            self.result = self.result_file = ''
+
+        # Initialize test execution results
+        self.error_type = ''
+        self.expected_html = ''
+        self.got_html = ''
+        self.expected_html_red = ''
+        self.got_html_red = ''
+        self.index_log = ''
+        self.server_log = ''
+        self.server_status = ''
+        self.query_result = ''
+        self.query_answer = ''
+        self.query_log = ''
+        self.query_sent = ''
+        self.protocol = ''
+        self.protocol_sent = ''
+        self.response_extracted = ''
+        self.response = ''
+
+    def __repr__(self) -> str:
+        """Return string representation of the test object."""
+        return f'<TestObject name={self.name}, type={self.type_name}, uri={self.test}>'
+
+    def to_dict(self) -> Dict[str, str]:
+        """Convert test object to dictionary format for serialization."""
+        self.graph_file = '<b>default:</b> <br> <pre>' + escape(self.graph_file) + '</pre>'
+        for name, graph in self.index_files.items():
+            self.graph_file += f'<br><b>{name}:</b> <br> <pre>{escape(graph)}</pre>'
+
+        return {
+            'test': escape(self.test),
+            'typeName': escape(self.type_name),
+            'name': escape(self.name),
+            'group': escape(self.group),
+            'feature': escape(';'.join(self.feature)),
+            'comment': escape(self.comment),
+            'approval': escape(self.approval),
+            'approvedBy': escape(self.approved_by),
+            'query': escape(self.query),
+            'graph': escape(self.graph),
+            'queryFile': escape(self.query_file),
+            'graphFile': self.graph_file,
+            'resultFile': escape(self.result_file),
+            'status': escape(self.status),
+            'errorType': escape(self.error_type),
+            'expectedHtml': self.expected_html,
+            'gotHtml': self.got_html,
+            'expectedHtmlRed': self.expected_html_red,
+            'gotHtmlRed': self.got_html_red,
+            'indexLog': escape(self.index_log),
+            'serverLog': escape(self.server_log),
+            'serverStatus': escape(self.server_status),
+            'queryResult': escape(self.query_result),
+            'queryAnswer': escape(self.query_answer),
+            'queryLog': escape(self.query_log),
+            'querySent': escape(self.query_sent),
+            'regime': escape(self.entailment_regime),
+            'protocol': escape(self.protocol),
+            'protocolSent': escape(self.protocol_sent),
+            'responseExtracted': escape(self.response_extracted),
+            'response': escape(self.response),
+            'config': escape(json.dumps(self.config.to_dict(), indent=4)),
+            'indexFiles': escape(json.dumps(self.index_files, indent=4)),
+            'resultFiles': escape(json.dumps(self.result_files, indent=4))
+        }
\ No newline at end of file
diff --git a/src/sparql_conformance/testsuite.py b/src/sparql_conformance/testsuite.py
new file mode 100644
index 000000000..e751309cb
--- /dev/null
+++ b/src/sparql_conformance/testsuite.py
@@ -0,0 +1,490 @@
+import bz2
+import json
+import os
+from typing import List, Dict, Tuple
+
+import sparql_conformance.util as util
+from qlever.log import log
+from sparql_conformance.config import Config
+from sparql_conformance.engines.engine_manager import EngineManager
+from sparql_conformance.engines.qlever import QLeverManager
+from sparql_conformance.json_tools import compare_json
+from sparql_conformance.protocol_tools import run_protocol_test
+from sparql_conformance.rdf_tools import compare_ttl
+from sparql_conformance.test_object import TestObject, Status, ErrorMessage
+from sparql_conformance.tsv_csv_tools import compare_sv
+from sparql_conformance.xml_tools import compare_xml
+
+
+class TestSuite:
+    """
+    A class to represent a test suite for SPARQL using QLever.
+    """
+
+    def __init__(self, name: str, tests: Dict[str, Dict[Tuple[Tuple[str, str], ...], List[TestObject]]], test_count, config: Config, engine_manager: EngineManager):
+        """
+        Constructs all the necessary attributes for the TestSuite object.
+
+        Parameters:
+            name (str): Name of the current run.
+        """
+        self.name = name
+        self.config = config
+        self.tests = tests
+        self.test_count = test_count
+        self.passed = 0
+        self.failed = 0
+        self.passed_failed = 0
+        self.engine_manager = engine_manager
+
+    def evaluate_query(
+            self,
+            expected_string: str,
+            query_result: str,
+            test: TestObject,
+            result_format: str):
+        """
+        Evaluates a query result based on the expected output and the format.
+        """
+        status = Status.FAILED
+        error_type = ErrorMessage.RESULTS_NOT_THE_SAME
+        if result_format == "srx":
+            status, error_type, expected_html, test_html, expected_red, test_red = compare_xml(
+                expected_string, query_result, self.config.alias, self.config.number_types)
+        elif result_format == "srj":
+            status, error_type, expected_html, test_html, expected_red, test_red = compare_json(
+                expected_string, query_result, self.config.alias, self.config.number_types)
+        elif result_format == "csv" or result_format == "tsv":
+            status, error_type, expected_html, test_html, expected_red, test_red = compare_sv(
+                expected_string, query_result, result_format, self.config.alias)
+        elif result_format == "ttl":
+            status, error_type, expected_html, test_html, expected_red, test_red = compare_ttl(
+                expected_string, query_result)
+        else:
+            expected_html = ""
+            test_html = ""
+            expected_red = ""
+            test_red = ""
+
+        self.update_test_status(test, status, error_type)
+        setattr(test, "got_html", test_html)
+        setattr(test, "expected_html", expected_html)
+        setattr(test, "got_html_red", test_red)
+        setattr(test, "expected_html_red", expected_red)
+
+    def evaluate_update(
+                self,
+                expected_graphs,
+                graphs,
+                test: TestObject):
+        """
+        Evaluates the graphs after running the update.
+
+        Parameters:
+            test (TestObject): Object containing the test being run.
+            expected_graphs ([str]]): The expected state of each graph.
+            graphs ([str]): The actual state of our graphs.
+        """
+        status = [Status.FAILED for _ in range(len(expected_graphs))]
+        error_type = [ErrorMessage.RESULTS_NOT_THE_SAME for _ in range(len(expected_graphs))]
+        expected_html = ["" for _ in range(len(expected_graphs))]
+        test_html = ["" for _ in range(len(expected_graphs))]
+        expected_red = ["" for _ in range(len(expected_graphs))]
+        test_red = ["" for _ in range(len(expected_graphs))]
+        assert(len(expected_graphs) == len(graphs))
+        for i in range(len(expected_graphs)):
+            status[i], error_type[i], expected_html[i], test_html[i], expected_red[i], test_red[i] = compare_ttl(
+                    expected_graphs[i], graphs[i])
+            
+        for s, e in zip(status, error_type):
+            if s != Status.PASSED:
+                status[0] = s
+                error_type[0] = e
+                break
+        
+        self.update_test_status(test, status[0], error_type[0])
+        t_html = f"<b>default:</b><br>{test_html[0]}"
+        e_html = f"<b>default:</b><br>{expected_html[0]}"
+        t_red = f"<b>default:</b><br>{test_red[0]}"
+        e_red = f"<b>default:</b><br>{expected_red[0]}"
+        i = 1
+        for key, value in test.result_files.items():
+            t_html += f"<br><br><b>{key}:</b><br>{test_html[i]}"
+            e_html += f"<br><br><b>{key}:</b><br>{expected_html[i]}"
+            t_red += f"<br><br><b>{key}:</b><br>{test_red[i]}"
+            e_red += f"<br><br><b>{key}:</b><br>{expected_red[i]}"
+            i += 1
+
+        setattr(test, "got_html", t_html)
+        setattr(test, "expected_html", e_html)
+        setattr(test, "got_html_red", t_red)
+        setattr(test, "expected_html_red", e_red)
+
+    def log_for_all_tests(self, list_of_tests: list, attribute: str, log_message: str):
+        """
+        Logs information for all tests of a given graph.
+        """
+        for test in list_of_tests:
+            setattr(test, attribute, log_message)
+
+    def update_test_status(
+            self,
+            test: TestObject,
+            status: str,
+            error_type: str):
+        """
+        Updates the status of a test in the test data.
+        """
+        self.log_for_all_tests([test], "status", status)
+        self.log_for_all_tests([test], "error_type", error_type)
+
+    def update_graph_status(
+            self,
+            list_of_tests: list,
+            status: str,
+            error_type: str):
+        """
+        Updates the status for all test of a graph.
+        """
+        for test in list_of_tests:
+            self.update_test_status(test, status, error_type)
+
+    def prepare_test_environment(
+            self,
+            graph_paths: Tuple[Tuple[str, str], ...],
+            list_of_tests: List[TestObject]) -> bool:
+        """
+        Prepares the test environment for a given graph.
+
+        Args:
+            graph_paths: ex. default graph + named graph (('graph_path', '-'), ('graph_path2', 'graph_name2'))
+            list_of_tests: [Test1, Test2, ...]
+
+        Returns:
+            True if the environment is successfully prepared, False otherwise.
+        """
+        self.engine_manager.cleanup(self.config)
+        index_success, server_success, index_log, server_log = self.engine_manager.setup(self.config, graph_paths)
+        if not index_success:
+            self.engine_manager.cleanup(self.config)
+            self.update_graph_status(list_of_tests, Status.FAILED, ErrorMessage.INDEX_BUILD_ERROR)
+        if not server_success:
+            self.engine_manager.cleanup(self.config)
+            self.update_graph_status(list_of_tests, Status.FAILED, ErrorMessage.SERVER_ERROR)
+        if isinstance(self.engine_manager, QLeverManager) and index_success and server_success and "Syntax" in list_of_tests[0].type_name:
+            self.engine_manager.activate_syntax_test_mode(self.config.server_address, self.config.port)
+        self.log_for_all_tests(list_of_tests, "index_log", index_log)
+        self.log_for_all_tests(list_of_tests, "server_log", server_log)
+        return index_success and server_success
+
+    def process_failed_response(self, test, query_response: tuple):
+        if "exception" in query_response[1]:
+            query_log = json.loads(
+                query_response[1])["exception"].replace(
+                ";", ";\n")
+            error_type = ErrorMessage.QUERY_EXCEPTION
+        elif "HTTP Request" in query_response[1]:
+            error_type = ErrorMessage.REQUEST_ERROR
+            query_log = query_response[1]
+        elif "not supported" in query_response[1]:
+            error_type = ErrorMessage.NOT_SUPPORTED
+            if "content type" in query_response[1]:
+                error_type = ErrorMessage.CONTENT_TYPE_NOT_SUPPORTED
+            query_log = query_response[1]
+        else:
+            error_type = ErrorMessage.QUERY_ERROR
+            query_log = query_response[1]
+        setattr(test, "query_log", query_log)
+        self.update_test_status(test, Status.FAILED, error_type)
+
+    def run_query_tests(self, graphs_list_of_tests):
+        """
+        Executes query tests for each graph in the test suite.
+        """
+        for graph in graphs_list_of_tests:
+            log.info(f"Running query tests for graph / graphs: {graph}")
+            if not self.prepare_test_environment(
+                    graph, graphs_list_of_tests[graph]):
+                continue
+
+            for test in graphs_list_of_tests[graph]:
+                log.info(f"Running: {test.name}")
+                query_result = self.engine_manager.query(
+                    self.config,
+                    test.query_file,
+                    test.result_format)
+                if query_result[0] == 200:
+                    self.evaluate_query(
+                        test.result_file, query_result[1], test, test.result_format)
+                else:
+                    self.process_failed_response(test, query_result)
+
+            if os.path.exists("./TestSuite.server-log.txt"):
+                server_log = util.read_file("./TestSuite.server-log.txt")
+                self.log_for_all_tests(
+                    graphs_list_of_tests[graph],
+                    "server_log",
+                    util.remove_date_time_parts(server_log))
+            self.engine_manager.cleanup(self.config)
+
+    def run_update_tests(self, graphs_list_of_tests):
+        """
+        Executes update tests for each graph in the test suite.
+        """
+        for graph in graphs_list_of_tests:
+            log.info(f"Running update tests for graph / graphs: {graph}")
+            for test in graphs_list_of_tests[graph]:
+                log.info(f"Running: {test.name}")
+                if not self.prepare_test_environment(
+                        graph, graphs_list_of_tests[graph]):
+                    # If the environment is not prepared, skip all tests for this graph.
+                    break
+                # Execute the update query.
+                query_update_result = self.engine_manager.update(self.config, test.query_file)
+                
+                # If the update query was successful, retrieve the current state of all graphs
+                # and check if the results match the expected results.
+                if query_update_result[0] == 200:
+                    actual_state_of_graphs = []
+                    expected_state_of_graphs = []
+                    # Handle default graph that has no uri 
+                    construct_graph = self.engine_manager.query(
+                        self.config,
+                        "CONSTRUCT {?s ?p ?o} WHERE { GRAPH ql:default-graph {?s ?p ?o}}",
+                        "ttl")
+                    actual_state_of_graphs.append(construct_graph[1])
+                    expected_state_of_graphs.append(test.result_file)
+                    
+                    # Handle named graphs.
+                    if test.result_files:
+                        for graph_label, expected_graph in test.result_files.items():
+                            construct_graph = self.engine_manager.query(
+                                self.config,
+                                f"CONSTRUCT {{?s ?p ?o}} WHERE {{ GRAPH <{graph_label}> {{?s ?p ?o}}}}",
+                                "ttl")
+                            actual_state_of_graphs.append(construct_graph[1])
+                            expected_state_of_graphs.append(expected_graph)
+
+                    # Evaluate state of graphs.
+                    self.evaluate_update(expected_state_of_graphs, actual_state_of_graphs, test)
+                else:
+                    self.process_failed_response(test, query_update_result)
+
+                if os.path.exists("./TestSuite.server-log.txt"):
+                    server_log = util.read_file("./TestSuite.server-log.txt")
+                    self.log_for_all_tests(
+                        graphs_list_of_tests[graph],
+                        "server_log",
+                        util.remove_date_time_parts(server_log))
+                self.engine_manager.cleanup(self.config)
+
+    def run_syntax_tests(self, graphs_list_of_tests: Dict[Tuple[Tuple[str, str], ...], List[TestObject]]):
+        """
+        Executes query tests for each graph in the test suite.
+        """
+        for graph_path in graphs_list_of_tests:
+            log.info(f"Running syntax tests for graph: {graph_path}")
+            if not self.prepare_test_environment(
+                    graph_path, graphs_list_of_tests[graph_path]):
+                continue
+
+            for test in graphs_list_of_tests[graph_path]:
+                log.info(f"Running: {test.name}")
+                result_format = "srx"
+                if "construct" in test.name:
+                    result_format = "ttl"
+                if "Update" in test.type_name:
+                    query_result = self.engine_manager.update(
+                        self.config,
+                        test.query_file)
+                else:
+                    query_result = self.engine_manager.query(
+                        self.config,
+                        test.query_file,
+                        result_format)
+
+                if query_result[0] != 200:
+                    self.process_failed_response(test, query_result)
+                else:
+                    setattr(test, "query_log", query_result[1])
+                    self.update_test_status(test, Status.PASSED, "")
+                if test.type_name == "NegativeSyntaxTest11" or test.type_name == "NegativeUpdateSyntaxTest11":
+                    if ErrorMessage.is_query_error(test.error_type):
+                        status = Status.PASSED
+                        error_type = ""
+                    else:
+                        status = Status.FAILED
+                        error_type = ErrorMessage.EXPECTED_EXCEPTION
+                    self.update_test_status(test, status, error_type)
+
+            if os.path.exists("./TestSuite.server-log.txt"):
+                server_log = util.read_file("./TestSuite.server-log.txt")
+                self.log_for_all_tests(
+                    graphs_list_of_tests[graph_path],
+                    "server_log",
+                    util.remove_date_time_parts(server_log))
+            self.engine_manager.cleanup(self.config)
+
+    def run_protocol_tests(self, graphs_list_of_tests: Dict[Tuple[Tuple[str, str], ...], List[TestObject]]):
+        """
+        Executes protocol tests for each graph in the test suite.
+        """
+        for graph_path in graphs_list_of_tests:
+            log.info(f"Running protocol tests for graph: {graph_path}")
+            # Work around for issue #25, missing data for protocol tests
+            path_to_data = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
+            graph_paths = graph_path
+            for i in range(4):
+                path_to_graph = os.path.join(path_to_data, f"data{i}.rdf")
+                name_of_graph = f"http://kasei.us/2009/09/sparql/data/data{i}.rdf"
+                new_path: Tuple[str, str] = (path_to_graph, name_of_graph)
+                graph_paths = graph_paths + (new_path,)
+            for test in graphs_list_of_tests[graph_path]:
+                log.info(f"Running: {test.name}")
+                if not self.prepare_test_environment(
+                        graph_paths, graphs_list_of_tests[graph_path]):
+                    break
+                if test.comment:
+                    status, error_type, extracted_expected_responses, extracted_sent_requests, got_responses, newpath = run_protocol_test(
+                        self.engine_manager, test, test.comment, '')
+
+                    if os.path.exists("./TestSuite.server-log.txt"):
+                        server_log = util.read_file(
+                            "./TestSuite.server-log.txt")
+                        self.log_for_all_tests(
+                            graphs_list_of_tests[graph_path],
+                            "server_log",
+                            util.remove_date_time_parts(server_log))
+                    self.engine_manager.cleanup(self.config)
+                    self.update_test_status(test, status, error_type)
+                else:
+                    extracted_sent_requests = ''
+                    extracted_expected_responses = ''
+                    got_responses = ''
+                setattr(test, "protocol", test.comment)
+                setattr(test, "protocol_sent", extracted_sent_requests)
+                setattr(
+                    test,
+                    "response_extracted",
+                    extracted_expected_responses)
+                setattr(test, "response", got_responses)
+
+    def run_graphstore_protocol_tests(self, graphs_list_of_tests: Dict[Tuple[Tuple[str, str], ...], List[TestObject]]):
+        """
+        Executes graphstore protocol tests for each graph in the test suite.
+        """
+        for graph_path in graphs_list_of_tests:
+            log.info(f'Running graphstore protocol tests for graph: {graph_path}')
+            if not self.prepare_test_environment(
+                    graph_path, graphs_list_of_tests[graph_path]):
+                break
+            newpath = '/newpath-not-set'
+            for test in graphs_list_of_tests[graph_path]:
+                log.info(f"Running: {test.name}")
+                if test.comment:
+                    status, error_type, extracted_expected_responses, extracted_sent_requests, got_responses, new_newpath = run_protocol_test(
+                        self.engine_manager, test, test.comment, newpath)
+                    if new_newpath != '':
+                        newpath = new_newpath
+                    self.update_test_status(test, status, error_type)
+                else:
+                    extracted_sent_requests = ''
+                    extracted_expected_responses = ''
+                    got_responses = ''
+                setattr(test, 'protocol', test.comment)
+                setattr(test, 'protocol_sent', extracted_sent_requests)
+                setattr(
+                    test,
+                    'response_extracted',
+                    extracted_expected_responses)
+                setattr(test, 'response', got_responses)
+            if os.path.exists('./TestSuite.server-log.txt'):
+                server_log = util.read_file(
+                    './TestSuite.server-log.txt')
+                self.log_for_all_tests(
+                    graphs_list_of_tests[graph_path],
+                    'server_log',
+                    util.remove_date_time_parts(server_log))
+            self.engine_manager.cleanup(self.config)
+
+    def analyze(self):
+        """
+        Method to index and start the server for a specific test.
+        """
+        graphs_list_of_tests = {k: v for d in self.tests.values() for k, v in d.items()}
+        for graph_path in graphs_list_of_tests:
+            log.info(f"Running server for graph: {graph_path}")
+            if not self.prepare_test_environment(
+                    graph_path, graphs_list_of_tests[graph_path]):
+                break
+            print(f"Listening on: {self.config.server_address}:{self.config.port} ...")
+            print("\n" * 3)
+            input("Press Enter to shutdown the server and continue...")
+            self.engine_manager.cleanup(self.config)
+
+    def run(self):
+        """
+        Main method to run all tests.
+        """
+        try:
+            self.run_query_tests(self.tests["query"])
+            self.run_query_tests(self.tests["format"])
+            self.run_update_tests(self.tests["update"])
+            self.run_syntax_tests(self.tests["syntax"])
+            self.run_protocol_tests(self.tests["protocol"])
+            self.run_graphstore_protocol_tests(self.tests["graphstoreprotocol"])
+        except KeyboardInterrupt:
+            log.warning("Interrupted by user.")
+            self.engine_manager.cleanup(self.config)
+
+    def compress_json_bz2(self, input_data, output_filename):
+        with bz2.open(output_filename, "wt") as zipfile:
+            json.dump(input_data, zipfile, indent=4)
+        log.info("Done writing result file: " + output_filename)
+
+    def generate_json_file(self):
+        """
+        Generates a JSON file with the test results.
+        """
+        os.makedirs("./results", exist_ok=True)
+        file_path = f"./results/{self.name}.json.bz2"
+        data = {}
+
+        for test_format in self.tests:
+            for graph in self.tests[test_format]:
+                for test in self.tests[test_format][graph]:
+                    match test.status:
+                        case Status.PASSED:
+                            self.passed += 1
+                        case Status.FAILED:
+                            self.failed += 1
+                        case Status.INTENDED:
+                            self.passed_failed += 1
+                    # This will add a number behind the name if the name is not
+                    # unique
+                    if test.name in data:
+                        i = 1
+                        while True:
+                            i += 1
+                            new_name = f"{test.name} {i}"
+                            if new_name in data:
+                                continue
+                            else:
+                                test.name = new_name
+                                data[new_name] = test.to_dict()
+                                break
+                    else:
+                        data[test.name] = test.to_dict()
+        data["info"] = {
+            "name": "info",
+            "passed": self.passed,
+            "tests": self.test_count,
+            "failed": self.failed,
+            "passedFailed": self.passed_failed,
+            "notTested": (
+                self.test_count -
+                self.passed -
+                self.failed -
+                self.passed_failed)}
+        log.info("Writing file...")
+        self.compress_json_bz2(data, file_path)
diff --git a/src/sparql_conformance/tsv_csv_tools.py b/src/sparql_conformance/tsv_csv_tools.py
new file mode 100644
index 000000000..20d45e420
--- /dev/null
+++ b/src/sparql_conformance/tsv_csv_tools.py
@@ -0,0 +1,337 @@
+from typing import List, Tuple
+
+from sparql_conformance.util import escape, is_number
+from io import StringIO
+import csv
+from sparql_conformance.test_object import Status, ErrorMessage
+
+def _build_column_mapping(expected_header: list, actual_header: list):
+    """
+    Return a list L which aligns actual[row][L[i]] with expected[row][i].
+    Example: actual: s p o expected: o p s -> L[0] = 2, L[1] = 1, L[2] = 0
+    If no perfect mapping exists, return None.
+    """
+    if len(expected_header) != len(actual_header):
+        return None
+
+    wanted = expected_header
+    have = actual_header
+
+    used = set()
+    mapping = []
+    for name in wanted:
+        idx = None
+        for j, col in enumerate(have):
+            if j in used:
+                continue
+            if col.strip() == name.strip():
+                idx = j
+                break
+        if idx is None:
+            return None
+        used.add(idx)
+        mapping.append(idx)
+    return mapping
+
+
+def _reorder_columns_to_expected(expected_array: list, actual_array: list):
+    """
+    If the first rows (headers) of expected/actual are a permutation of each other,
+    reorder every row of the actual array to match the expected header order.
+    Otherwise, just return actual_array.
+    """
+    if not expected_array or not actual_array:
+        return actual_array
+
+    expected_header = expected_array[0]
+    actual_header = actual_array[0]
+
+    if sorted(expected_header) != sorted(actual_header):
+        return actual_array
+
+    mapping = _build_column_mapping(expected_header, actual_header)
+    if mapping is None:
+        return actual_array
+
+    def reorder_row(row):
+        return [row[i] if i < len(row) else "" for i in mapping]
+
+    return [reorder_row(r) for r in actual_array]
+
+
+def write_csv_file(file_path: str, csv_rows: list):
+    with open(file_path, "w", newline="") as csvfile:
+        csv_writer = csv.writer(csvfile)
+        csv_writer.writerows(csv_rows)
+
+
+def row_to_string(row: list, separator: str) -> str:
+    """
+    Converts a row (list of values) to a string representation separated by a specified delimiter.
+
+    Parameters:
+        row (list): The row to be converted to a string.
+        separator (str): The separator used to separate the values in the row "," or "\t"
+
+    Returns:
+        str: A string representation of the row.
+    """
+    result = ""
+    index = 0
+    row_length = len(row) - 1
+    for element in row:
+        if index == row_length:
+            delimiter = ""
+        else:
+            delimiter = separator
+        element = str(element)
+        if separator in element:
+            element = "\"" + element + "\""
+        result += element + delimiter
+        index += 1
+    return result
+
+
+def generate_highlighted_string_sv(
+        array: list,
+        remaining: list,
+        mark_red: list,
+        result_type: str) -> str:
+    """
+    Generates a string representation of an array, with specific rows highlighted.
+
+    Parameters:
+        array (list): The array to be converted to a string.
+        mark_red (list): The rows to be highlighted in red.
+        remaining (list): The rows to be highlighted.
+        result_type (str): The type of result (csv or tsv) to determine the separator.
+
+    Returns:
+        str: A string representation of the array with highlighted rows.
+    """
+    separator = "," if result_type == "csv" else "\t"
+
+    result_string = ""
+    for row in array:
+        if row in remaining:
+            if row in mark_red:
+                result_string += '<label class="red">'
+            else:
+                result_string += '<label class="yellow">'
+            result_string += escape(row_to_string(row, separator))
+            result_string += '</label>\n'
+        else:
+            result_string += escape(row_to_string(row, separator)) + "\n"
+    return result_string
+
+
+def compare_values(
+        value1: str,
+        value2: str,
+        use_config: bool,
+        alias: List[Tuple[str, str]],
+        map_bnodes: dict) -> bool:
+    """
+    Compares two values for equality accounting for numeric differences and aliases.
+
+    Parameters:
+        value1 (str): The first value to compare.
+        value2 (str): The second value to compare.
+        use_config (bool): Flag to use configuration for additional comparison logic.
+        alias (dict): Dictionary with aliases for datatypes ex. int = integer .
+        map_bnodes (dict): Dictionary mapping the used bnodes.
+
+    Returns:
+        bool: True if the values are considered equal.
+    """
+    if value1 is None or value2 is None:
+        return False
+    # Blank nodes
+    if len(value1) > 1 and len(
+            value2) > 1 and value1[0] == "_" and value2[0] == "_":
+        if value1 not in map_bnodes and value2 not in map_bnodes:
+            map_bnodes[value1] = value2
+            map_bnodes[value2] = value1
+            return True
+        if map_bnodes.get(value1) == value2 and map_bnodes.get(
+                value2) == value1:
+            return True
+        return False
+    # In most cases the values are in the same representation
+    if value1 == value2:
+        return True
+    # Handle exceptions ex. 30000 == 3E4
+    if is_number(value1) and is_number(value2):
+        if float(value1) == float(value2):
+            return True
+    else:  # Handle exceptions integer = int
+        if use_config and ((value1, value2) in alias or (value2, value1) in alias):
+            return True
+    return False
+
+
+def compare_rows(
+        row1: list,
+        row2: list,
+        use_config: bool,
+        alias: List[Tuple[str, str]],
+        map_bnodes: dict) -> bool:
+    """
+    Compares two rows for equality.
+
+    Parameters:
+        row1 (list): The first row to compare.
+        row2 (list): The second row to compare.
+        use_config (bool): Flag to use configuration for additional comparison logic.
+        alias (List[Tuple[str, str]]): Dictionary with aliases for datatypes ex. int = integer .
+        map_bnodes (dict): Dictionary mapping the used bnodes.
+
+    Returns:
+        bool: True if the rows are considered equal otherwise False
+    """
+    if len(row1) != len(row2):
+        return False
+
+    for element1, element2 in zip(row1, row2):
+        if not compare_values(
+                element1.split("^")[0],
+                element2.split("^")[0],
+                use_config,
+                alias,
+                map_bnodes):
+            return False
+    return True
+
+
+def compare_array(
+        expected_result: list,
+        result: list,
+        result_copy: list,
+        expected_result_copy: list,
+        use_config: bool,
+        alias: List[Tuple[str, str]],
+        map_bnodes: dict):
+    """
+    Compares two arrays and removes equal rows from both arrays.
+
+    Parameters:
+        expected_result (list): The expected result array.
+        result (list): The actual result array.
+        result_copy (list): A copy of the actual result array for modification.
+        expected_result_copy (list): A copy of the expected result array for modification.
+        use_config (bool): Flag to use configuration for additional comparison logic.
+        alias (List[Tuple[str, str]]): Dictionary with aliases for datatypes ex. int = integer .
+        map_bnodes (dict): Dictionary mapping the used bnodes.
+    """
+    for row1 in result:
+        equal = False
+        row2_delete = None
+        for row2 in expected_result:
+            if compare_rows(row1, row2, use_config, alias, map_bnodes):
+                equal = True
+                row2_delete = row2
+                break
+        if equal:
+            result_copy.remove(row1)
+            expected_result_copy.remove(row2_delete)
+
+
+def convert_csv_tsv_to_array(input_string: str, input_type: str):
+    """
+    Converts a CSV/TSV string to an array of rows.
+
+    Parameters:
+        input_string (str): The CSV/TSV formatted string.
+        input_type (str): The type of the input ('csv' or 'tsv').
+
+    Returns:
+        An array representation of the input string.
+    """
+    rows = []
+    delimiter = "," if input_type == "csv" else "\t"
+    with StringIO(input_string) as io:
+        reader = csv.reader(io, delimiter=delimiter)
+        for row in reader:
+            # Drop empty rows
+            if not row or not any(cell.strip() for cell in row):
+                continue
+            rows.append(row)
+    return rows
+
+
+def compare_sv(
+        expected_string: str,
+        query_result: str,
+        result_format: str,
+        alias: List[Tuple[str, str]]):
+    """
+    Compares CSV/TSV formatted query result with the expected output.
+
+    Parameters:
+        expected_string (str): Expected CSV/TSV formatted string.
+        query_result (str): Actual CSV/TSV formatted string from the query.
+        result_format (str): Format of the output ('csv' or 'tsv').
+        alias (List[Tuple[str, str]]): Dictionary with aliases for datatypes ex. int = integer .
+
+    Returns:
+        tuple(int, str, str, str, str, str): A tuple of test status and error message and expected html, query html, expected red, query red
+    """
+    map_bnodes = {}
+    status = Status.FAILED
+    error_type = ErrorMessage.RESULTS_NOT_THE_SAME
+
+    expected_array = convert_csv_tsv_to_array(expected_string, result_format)
+    actual_array = convert_csv_tsv_to_array(query_result, result_format)
+
+    # NEW: normalize actual column order to match expected header
+    actual_array = _reorder_columns_to_expected(expected_array, actual_array)
+
+    actual_array_copy = actual_array.copy()
+    expected_array_copy = expected_array.copy()
+    actual_array_mark_red = []
+    expected_array_mark_red = []
+
+    compare_array(
+        expected_array,
+        actual_array,
+        actual_array_copy,
+        expected_array_copy,
+        False,
+        alias,
+        map_bnodes)
+
+    if len(actual_array_copy) == 0 and len(expected_array_copy) == 0:
+        status = Status.PASSED
+        error_type = ""
+    else:
+        actual_array_mark_red = actual_array_copy.copy()
+        expected_array_mark_red = expected_array_copy.copy()
+        compare_array(
+            expected_array_copy,
+            actual_array_copy,
+            actual_array_mark_red,
+            expected_array_mark_red,
+            True,
+            alias,
+            map_bnodes)
+        if len(actual_array_mark_red) == 0 and len(
+                expected_array_mark_red) == 0:
+            status = Status.INTENDED
+            error_type = ErrorMessage.INTENDED_MSG
+
+    expected_html = generate_highlighted_string_sv(
+        expected_array,
+        expected_array_copy,
+        expected_array_mark_red,
+        result_format)
+    actual_html = generate_highlighted_string_sv(
+        actual_array, actual_array_copy, actual_array_mark_red, result_format)
+    expected_html_red = generate_highlighted_string_sv(
+        expected_array_copy,
+        expected_array_copy,
+        expected_array_mark_red,
+        result_format)
+    actual_html_red = generate_highlighted_string_sv(
+        actual_array_copy, actual_array_copy, actual_array_mark_red, result_format)
+
+    return status, error_type, expected_html, actual_html, expected_html_red, actual_html_red
diff --git a/src/sparql_conformance/util.py b/src/sparql_conformance/util.py
new file mode 100644
index 000000000..cd499eee6
--- /dev/null
+++ b/src/sparql_conformance/util.py
@@ -0,0 +1,175 @@
+import re
+import os
+import shutil
+from argparse import Namespace
+from pathlib import Path
+from typing import Optional
+from urllib.parse import urlparse, unquote
+
+from qlever.log import log
+from sparql_conformance.config import Config
+
+
+def make_args(config: Config, **overrides):
+    base = dict(
+        # GENERAL used by more than one.
+        name="qlever-sparql-conformance",
+        host_name=config.server_address,
+        port=config.port,
+        system=config.system,
+        image=config.image,
+        server_container="qlever-sparql-conformance-server-container",
+        access_token="abc",
+        only_pso_and_pos_permutations=False,
+        use_patterns=True,
+        # STOP SERVER.
+        no_containers=config.system == "native",
+        show=False,
+        cmdline_regex="ServerMain.* -i [^ ]*%%NAME%%",
+        # QUERY.
+        sparql_endpoint=None,
+        pin_to_cache=False,
+        no_time=True,
+        predefined_query=None,
+        log_level="ERROR",
+        # START SERVER.
+        description="",
+        text_description="",
+        memory_for_queries="4GB",
+        cache_max_size="1GB",
+        cache_max_size_single_entry="100MB",
+        cache_max_num_entries=1000000,
+        num_threads=1,
+        timeout=None,
+        persist_updates=False,
+        use_text_index="no",
+        warmup_cmd=None,
+        kill_existing_with_same_port=False,
+        no_warmup=True,
+        run_in_foreground=False,
+        # INDEX.
+        index_container = "qlever-sparql-conformance-index-container",
+        cat_input_files=None,
+        input_files='*.ttl',
+        format='ttl',
+        settings_json='{ "num-triples-per-batch": 1000000 }',
+        parallel_parsing=False,
+        text_index=None,
+        stxxl_memory=None,
+        parser_buffer_size=None,
+        ulimit=None,
+        overwrite_existing=True,
+        vocabulary_type='on-disk-compressed',
+        encode_as_id=None,
+    )
+    return Namespace(**{**base, **overrides})
+
+def local_name(uri: str) -> str:
+    """Extract the local name from a URI (after # or /)."""
+    if "#" in uri:
+        return uri.split("#")[-1]
+    return uri.split("/")[-1]
+
+
+def uri_to_path(uri):
+    parsed = urlparse(str(uri))
+    if parsed.scheme != 'file':
+        return uri
+    return unquote(parsed.path)
+
+
+def path_exists(path):
+    if not os.path.exists(path):
+        log.error(f"{path} does not exist!")
+        return False
+    return True
+
+
+def is_number(s):
+    try:
+        float(s)
+        return True
+    except ValueError:
+        return False
+
+
+def escape(string: Optional[str]) -> str:
+    """
+    Takes any string and returns the escaped version to use in html.
+    """
+    if string is None:
+        return ''
+    return string.replace(
+        "&",
+        "&amp;").replace(
+        "<",
+        "&lt;").replace(
+        ">",
+        "&gt;").replace(
+        '\"',
+        "&quot;").replace(
+        "'",
+        "&apos;")
+
+
+def read_file(file_path: str) -> str:
+    """
+    Reads and returns the content of a file.
+
+    If file does not exist return empty string.
+
+    Parameters:
+        file_path (str): The path to the file to be read.
+
+    Returns:
+        str: The content of the file.
+    """
+    try:
+        data = open(file_path, "r", encoding="utf-8").read()
+    except BaseException:
+        data = ""
+    return data
+
+
+def remove_date_time_parts(index_log: str) -> str:
+    """
+    Remove date and time from index log.
+    ex. 2023-12-20 14:02:33.089	- INFO:  You specified the input format: TTL
+    to: INFO:  You specified the input format: TTL
+
+    Parameters:
+        index_log (str): The index log.
+
+    Returns:
+        The index log without time and date as a string.
+    """
+    pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}\s*-"
+    return re.sub(pattern, "", index_log)
+
+
+def copy_graph_to_workdir(file_path: str, workdir: str) -> str:
+    """
+    Copy the file to the docker working directory and returns the new relative path.
+
+    Args:
+        file_path (str): Path to the source file.
+        workdir (str): Path to the working directory mounted in docker.
+
+    Returns:
+        str: Basename, usable inside the container.
+    """
+    src = Path(file_path).resolve()
+    dest = Path(workdir).resolve() / src.name
+    shutil.copy(src, dest)
+    return src.name
+
+
+def get_accept_header(result_format: str) -> str:
+    format_headers = {
+        "csv": "text/csv",
+        "tsv": "text/tab-separated-values",
+        "srx": "application/sparql-results+xml",
+        "ttl": "text/turtle",
+        "json": "application/sparql-results+json"
+    }
+    return format_headers.get(result_format, "application/sparql-results+json")
diff --git a/src/sparql_conformance/xml_tools.py b/src/sparql_conformance/xml_tools.py
new file mode 100644
index 000000000..0d4493828
--- /dev/null
+++ b/src/sparql_conformance/xml_tools.py
@@ -0,0 +1,444 @@
+import re
+import xml.etree.ElementTree as ET
+import xml.dom.minidom as md
+from typing import List, Tuple
+
+from sparql_conformance.test_object import Status, ErrorMessage
+from sparql_conformance.util import escape
+
+
+def replace_self_closing_tag(xml: str) -> str:
+    """
+    Takes any xml string and replaces all self-closing xml tags (<abc/>) with open and close tags (<abc></abc>).
+
+    The regular expression \\w+ matches one or more word characters which is then used in the replacement pattern
+    where \1 refers to the content of the first capture group.
+
+    Parameters:
+        xml (str): The  string containing self-closing xml tags.
+
+    Returns:
+        str: xml string without self-closing xml tags
+    """
+    pattern = r"<(\w+)/>"
+    replacement = r"<\1></\1>"
+    return re.sub(pattern, replacement, xml)
+
+
+def highlight_first_occurrence(original: str, string_part: str, label: str) -> str:
+    """
+    Highlights the first occurrence of string_part in original by wrapping it with a <label class="red"> tag.
+
+    Ensures that if string_part occurs multiple times it does not get double-wrapped.
+
+    Parameters:
+        original (str): Any string
+        string_part (str): A string which might be a part of original
+        label (str): css class of the label
+
+    Returns:
+        str: The original string with the string_part highlighted if found
+    """
+    string_part_escaped = re.escape(string_part)
+    # This stops double-wrapping look for first occurrence without a label
+    pattern = rf"{string_part_escaped}(?!</label>)"
+
+    def replace_first_match(match):
+        return f'<label class="{label}">{match.group()}</label>'
+    original_highlighted = re.sub(
+        pattern, replace_first_match, original, count=1)
+
+    return original_highlighted
+
+
+def element_to_string(element: ET.Element, escaped_xml: str, label: str):
+    """
+    This function takes an element turns in into a string and if the string is part of the escaped_xml string it will be enclosed with a HTML label
+
+    Returns:
+        str: An HTML-escaped XML string with specific elements highlighted.
+    """
+    element_str = ET.tostring(
+        element, encoding="utf-8").decode("utf-8").replace(" />", "/>")
+    element_str = element_str.replace("ns0:", "")
+    escaped_element_str = escape(element_str).rstrip()
+    if escaped_element_str in escaped_xml:
+        return highlight_first_occurrence(
+            escaped_xml, escaped_element_str, label)
+    elif escaped_element_str.replace('&quot;', "&apos;") in escaped_xml:
+        return highlight_first_occurrence(
+            escaped_xml, escaped_element_str.replace(
+                '&quot;', "&apos;"), label)
+    else:
+        element_str = replace_self_closing_tag(element_str)
+        escaped_element_str = escape(element_str).rstrip()
+        if escaped_element_str in escaped_xml:
+            return highlight_first_occurrence(
+                escaped_xml, escaped_element_str, label)
+        return escaped_xml
+
+
+def generate_highlighted_string_xml(
+        original_xml: str,
+        remaining_tree: ET.ElementTree,
+        red_tree: ET.ElementTree,
+        number_types: list) -> str:
+    """
+    This method takes an XML string and an ElementTree object representing a subset of the XML.
+    It escapes the XML string for HTML display and then highlights the elements from the
+    ElementTree within the escaped XML string. Elements to be highlighted are wrapped in a
+    <label> tag.
+
+    Returns:
+        str: An HTML-escaped XML string with specific elements highlighted.
+    """
+    escaped_xml = escape(original_xml)
+
+    for element in remaining_tree.getroot().findall('.//head/variable'):
+        escaped_xml = element_to_string(element, escaped_xml, "red")
+
+    bool_element = remaining_tree.getroot().find(".//boolean")
+    if bool_element is not None:
+        escaped_xml = element_to_string(bool_element, escaped_xml, "red")
+
+    for element in remaining_tree.getroot().findall('.//result'):
+        label = "yellow"
+        for elem in red_tree.getroot().findall('.//result'):
+            if xml_elements_equal(element, elem, False, [], number_types, {}):
+                label = "red"
+        escaped_xml = element_to_string(element, escaped_xml, label)
+
+    return escaped_xml
+
+
+def strip_namespace(tree: ET.ElementTree) -> ET.ElementTree:
+    """
+    Removes the namespace from the tags in an XML ElementTree.
+
+    Parameters:
+        tree (ET.ElementTree): An XML ElementTree with namespace in the tags.
+
+    Returns:
+        ET.ElementTree: The modified XML ElementTree with namespace removed from tags.
+    """
+    for elem in tree.iter():
+        elem.tag = elem.tag.partition("}")[-1]
+    return tree
+
+
+def generate_html_for_xml(
+        xml1: str,
+        xml2: str,
+        remaining_tree1: ET.ElementTree,
+        remaining_tree2: ET.ElementTree,
+        red_tree1: ET.ElementTree,
+        red_tree2: ET.ElementTree,
+        number_types: list) -> tuple:
+    """
+    Generates HTML representations for two XML strings with specific elements highlighted.
+
+    Returns:
+        tuple (str, str, str, str): A tuple containing four HTML-escaped and highlighted XML strings. (XML1, XML2, XML1 RED, XML2 RED)
+    """
+    strip_namespace(red_tree1)
+    strip_namespace(red_tree2)
+    strip_namespace(remaining_tree1)
+    strip_namespace(remaining_tree2)
+    remaining_tree1_string = ET.tostring(remaining_tree1.getroot(
+    ), encoding='utf-8').decode("utf-8").replace(" />", "/>").replace("ns0:", "")
+    remaining_tree2_string = ET.tostring(remaining_tree2.getroot(
+    ), encoding='utf-8').decode("utf-8").replace(" />", "/>").replace("ns0:", "")
+    highlighted_xml1 = generate_highlighted_string_xml(
+        xml1, remaining_tree1, red_tree1, number_types)
+    highlighted_xml2 = generate_highlighted_string_xml(
+        xml2, remaining_tree2, red_tree2, number_types)
+    highlighted_xml1_only_red = generate_highlighted_string_xml(
+        remaining_tree1_string, remaining_tree1, red_tree1, number_types)
+    highlighted_xml2_only_red = generate_highlighted_string_xml(
+        remaining_tree2_string, remaining_tree2, red_tree2, number_types)
+
+    return highlighted_xml1, highlighted_xml2, highlighted_xml1_only_red, highlighted_xml2_only_red
+
+
+def xml_elements_equal(
+        element1: ET.Element,
+        element2: ET.Element,
+        compare_with_intended_behaviour: bool,
+        alias: List[Tuple[str, str]],
+        number_types: list,
+        map_bnodes: dict) -> bool:
+    """
+    Compares two XML elements for equality in tags, attributes and text.
+
+    Parameters:
+        element1 (ET.Element): The first XML element
+        element2 (ET.Element): The second XML element
+        compare_with_intended_behaviour (bool): Bool to determine whether to use intended behaviour aliases in comparison.
+        alias (List[Tuple[str, str]]): Dictionary with aliases for datatypes ex. int = integer .
+        number_types (list): List containing all datatypes that should be used as numbers.
+        map_bnodes (dict): Dictionary mapping the used bnodes.
+
+    Returns:
+        bool: True if elements are considered equal and if not False.
+    """
+    if len(list(element1)) != len(list(element2)):
+        return False
+
+    is_number = False
+    if element1.tag != element2.tag:
+        if not compare_with_intended_behaviour or not (element1.tag, element2.tag) in alias and not (element2.tag, element1.tag) in alias:
+            return False
+
+    if element1.attrib != element2.attrib:
+        if isinstance(
+                element1.attrib,
+                dict) != isinstance(
+                element2.attrib,
+                dict):
+            return False
+        if ((element1.attrib.get("datatype") is not None or element2.attrib.get("datatype") != "http://www.w3.org/2001/XMLSchema#string") and
+            (element2.attrib.get("datatype") is not None or element1.attrib.get("datatype") != "http://www.w3.org/2001/XMLSchema#string")):
+            if not isinstance(element1.attrib, dict):
+                if not compare_with_intended_behaviour or not (element1.attrib, element2.attrib) in alias and not (element2.attrib, element1.attrib) in alias:
+                    return False
+            if isinstance(element1.attrib, dict):
+                if element1.attrib.get("datatype") is None and element2.attrib.get(
+                        "datatype") is None:
+                    # Check if language tags are equal, treat them as case-insensitive ex. en-US = en-us
+                    xml_lang_key = '{http://www.w3.org/XML/1998/namespace}lang'
+                    if xml_lang_key in element1.attrib and xml_lang_key in element2.attrib:
+                        if not element1.attrib[xml_lang_key].lower() == element2.attrib[xml_lang_key].lower():
+                            return False
+                    else:
+                        return False
+                else:
+                    if not compare_with_intended_behaviour or not (element1.attrib.get("datatype"),
+                                                                   element2.attrib.get("datatype")) in alias and not (element2.attrib.get("datatype"),
+                                                                                                      element1.attrib.get("datatype")) in alias:
+                        return False
+
+    if (element1.attrib.get("datatype") in number_types) != (
+            element2.attrib.get("datatype") in number_types):
+        return False
+
+    if element1.attrib.get("datatype") in number_types and element2.attrib.get(
+            "datatype") in number_types:
+        is_number = True
+
+    if element1.tail != element2.tail:
+        if (
+            (
+                isinstance(
+                    element1.tail,
+                    str) and element2.tail is None and not element1.tail.strip() == "") and (
+                isinstance(
+                    element2.tail,
+                    str) and element1.tail is None and not element2.tail.strip() == "")) or (
+                        isinstance(
+                            element1.tail,
+                            str) and isinstance(
+                                element2.tail,
+                            str) and element1.tail.strip() != element2.tail.strip()):
+            return False
+
+    if element1.text != element2.text:
+        if element1.tag == "{http://www.w3.org/2005/sparql-results#}bnode":
+            if element1.text not in map_bnodes and element2.text not in map_bnodes:
+                map_bnodes[element1.text] = element2.text
+                map_bnodes[element2.text] = element1.text
+                return all(any(xml_elements_equal(
+                    c1,
+                    c2,
+                    compare_with_intended_behaviour,
+                    alias,
+                    number_types,
+                    map_bnodes) for c2 in element2) for c1 in element1)
+            elif map_bnodes.get(element1.text) == element2.text and map_bnodes.get(element2.text) == element1.text:
+                return all(any(xml_elements_equal(
+                    c1,
+                    c2,
+                    compare_with_intended_behaviour,
+                    alias,
+                    number_types,
+                    map_bnodes) for c2 in element2) for c1 in element1)
+            return False
+        if (element1.text is None and element2.text.strip() == "") or (
+                element2.text is None and element1.text.strip() == ""):
+            return all(any(xml_elements_equal(
+                    c1,
+                    c2,
+                    compare_with_intended_behaviour,
+                    alias,
+                    number_types,
+                    map_bnodes) for c2 in element2) for c1 in element1)
+        if element1.text is None or element2.text is None:
+            return False
+        if element1.text.strip() == element2.text.strip():
+            return all(any(xml_elements_equal(
+                c1,
+                c2,
+                compare_with_intended_behaviour,
+                alias,
+                number_types,
+                map_bnodes) for c2 in element2) for c1 in element1)
+        if is_number:
+            if float(element1.text) == float(element2.text):
+                return all(any(xml_elements_equal(
+                    c1,
+                    c2,
+                    compare_with_intended_behaviour,
+                    alias,
+                    number_types,
+                    map_bnodes) for c2 in element2) for c1 in element1)
+        if not compare_with_intended_behaviour or not (element1.text, element2.text) in alias and not (element2.text, element1.text) in alias:
+            return False
+    return all(any(xml_elements_equal(
+            c1,
+            c2,
+            compare_with_intended_behaviour,
+            alias,
+            number_types,
+            map_bnodes) for c2 in element2) for c1 in element1)
+
+
+def xml_remove_equal_elements(
+        parent1: ET.Element,
+        parent2: ET.Element,
+        use_config: bool,
+        alias: List[Tuple[str, str]],
+        number_types: list,
+        map_bnodes: dict):
+    """
+    Compares and removes equal child elements from two parent XML elements.
+
+    This method iterates over the children of two given parent XML elements and removes
+    matching children from both parents.
+
+    Parameters:
+        parent1 (ET.Element): The first parent XML element.
+        parent2 (ET.Element): The second parent XML element.
+        use_config (bool): Configuration Bool to control comparison behavior.
+        alias (dict): Dictionary with aliases for datatypes ex. int = integer .
+        number_types (list): List containing all datatypes that should be used as numbers.
+        map_bnodes (dict): Dictionary mapping the used bnodes.
+    """
+    for child1 in list(parent1):
+        for child2 in list(parent2):
+            if xml_elements_equal(
+                    child1,
+                    child2,
+                    use_config,
+                    alias,
+                    number_types,
+                    map_bnodes):
+                parent1.remove(child1)
+                parent2.remove(child2)
+                break
+
+
+def compare_xml(
+        expected_xml: str,
+        query_xml: str,
+        alias: List[Tuple[str, str]],
+        number_types: list) -> tuple:
+    """
+    Compares two XML documents, identifies differences and generates HTML representations.
+
+    This method compares two XML documents and identifies differences.
+    It removes equal elements in both documents and generates HTML representations highlighting the remaining differences.
+
+    Parameters:
+        expected_xml (str): The expected XML content as a string.
+        query_xml (str): The query XML content as a string.
+        alias (dict): Dictionary with aliases for datatypes ex. int = integer .
+        number_types (list): List containing all datatypes that should be used as numbers.
+
+    Returns:
+        tuple (str,str,str,str,str,str): A tuple containing the status, error type and the strings XML1, XML2, XML1 RED, XML2 RED
+    """
+    query_xml = md.parseString(query_xml).toxml()
+    query_xml = md.parseString(query_xml).toprettyxml(indent="  ")
+    map_bnodes = {}
+    status = Status.FAILED
+    error_type = ErrorMessage.RESULTS_NOT_THE_SAME
+    expected_tree = ET.ElementTree(ET.fromstring(expected_xml))
+    query_tree = ET.ElementTree(ET.fromstring(query_xml))
+
+    # Compare and remove equal elements in <head>
+    head1 = expected_tree.find(
+        ".//{http://www.w3.org/2005/sparql-results#}head")
+    head2 = query_tree.find(".//{http://www.w3.org/2005/sparql-results#}head")
+    if head1 is not None and head2 is not None:
+        xml_remove_equal_elements(
+            head1,
+            head2,
+            False,
+            alias,
+            number_types,
+            map_bnodes)
+
+    # Compare and remove equal <boolean>
+    expected_bool = expected_tree.find(
+        ".//{http://www.w3.org/2005/sparql-results#}boolean")
+    query_bool = query_tree.find(
+        ".//{http://www.w3.org/2005/sparql-results#}boolean")
+    if expected_bool is not None and query_bool is not None:
+        if str(expected_bool.text) == str(query_bool.text):
+            expected_tree.getroot().remove(expected_bool)
+            query_tree.getroot().remove(query_bool)
+
+    expected_bool = expected_tree.find(
+        ".//{http://www.w3.org/2005/sparql-results#}boolean")
+    query_bool = query_tree.find(
+        ".//{http://www.w3.org/2005/sparql-results#}boolean")
+    
+    # Compare and remove equal <result> elements in <results>
+    results1 = expected_tree.find(
+        ".//{http://www.w3.org/2005/sparql-results#}results")
+    results2 = query_tree.find(
+        ".//{http://www.w3.org/2005/sparql-results#}results")
+
+    if results1 is not None and results2 is not None:
+        xml_remove_equal_elements(
+            results1,
+            results2,
+            False,
+            alias,
+            number_types,
+            map_bnodes)
+    # Copy expected_tree
+    expected_tree_string = ET.tostring(expected_tree.getroot())
+    copied_expected_tree = ET.ElementTree(ET.fromstring(expected_tree_string))
+
+    # Copy query_tree
+    query_tree_string = ET.tostring(query_tree.getroot())
+    copied_query_tree = ET.ElementTree(ET.fromstring(query_tree_string))
+    if (
+        results1 is not None and results2 is not None and len(
+            list(results1)) == 0 and len(
+            list(results2)) == 0 and len(
+                list(head1)) == 0 and len(
+                    list(head2)) == 0) or (
+                        results1 is None and results2 is None and head1 is None and head2 is None and expected_bool is None and query_bool is None):
+        status = Status.PASSED
+        error_type = ""
+    else:
+        if results1 is not None and results2 is not None:
+            xml_remove_equal_elements(
+                results1,
+                results2,
+                True,
+                alias,
+                number_types,
+                map_bnodes)
+
+            if len(list(results1)) == 0 and len(list(results2)) == 0:
+                status = Status.INTENDED
+                error_type = ErrorMessage.INTENDED_MSG
+        elif expected_bool is None and query_bool is None:
+            status = Status.PASSED
+            error_type = ""
+
+    expected_string, query_string, expected_string_red, query_string_red = generate_html_for_xml(
+        expected_xml, query_xml, copied_expected_tree, copied_query_tree, expected_tree, query_tree, number_types)
+    return status, error_type, expected_string, query_string, expected_string_red, query_string_red