From 1a4373ec9b36286c661aa8d15b0ee66d06331ef6 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Thu, 17 Apr 2025 16:13:29 +0200 Subject: [PATCH 01/10] Added commands to setup Oxigraph Added all the commands necessary to setup SPARQL endpoint for oxigraph natively and containerized --- pyproject.toml | 2 + src/qoxigraph/__init__.py | 0 src/qoxigraph/commands/__init__.py | 0 src/qoxigraph/commands/example_queries.py | 12 ++ src/qoxigraph/commands/extract_queries.py | 1 + src/qoxigraph/commands/get_data.py | 1 + src/qoxigraph/commands/index.py | 109 ++++++++++++++ src/qoxigraph/commands/log.py | 50 +++++++ src/qoxigraph/commands/query.py | 54 +++++++ src/qoxigraph/commands/setup_config.py | 109 ++++++++++++++ src/qoxigraph/commands/start.py | 175 ++++++++++++++++++++++ src/qoxigraph/commands/status.py | 19 +++ src/qoxigraph/commands/stop.py | 71 +++++++++ 13 files changed, 603 insertions(+) create mode 100644 src/qoxigraph/__init__.py create mode 100644 src/qoxigraph/commands/__init__.py create mode 100644 src/qoxigraph/commands/example_queries.py create mode 120000 src/qoxigraph/commands/extract_queries.py create mode 120000 src/qoxigraph/commands/get_data.py create mode 100644 src/qoxigraph/commands/index.py create mode 100644 src/qoxigraph/commands/log.py create mode 100644 src/qoxigraph/commands/query.py create mode 100644 src/qoxigraph/commands/setup_config.py create mode 100644 src/qoxigraph/commands/start.py create mode 100644 src/qoxigraph/commands/status.py create mode 100644 src/qoxigraph/commands/stop.py diff --git a/pyproject.toml b/pyproject.toml index b053fe62d..12c82f470 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,8 @@ Github = "https://github.com/ad-freiburg/qlever" [project.scripts] "qlever" = "qlever.qlever_main:main" +"qoxigraph" = "qlever.qlever_main:main" +"qlever-old" = "qlever.qlever_old:main" [tool.setuptools] license-files = ["LICENSE"] diff --git a/src/qoxigraph/__init__.py b/src/qoxigraph/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/qoxigraph/commands/__init__.py b/src/qoxigraph/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/qoxigraph/commands/example_queries.py b/src/qoxigraph/commands/example_queries.py new file mode 100644 index 000000000..d62982a83 --- /dev/null +++ b/src/qoxigraph/commands/example_queries.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from qlever.commands.example_queries import ( + ExampleQueriesCommand as QleverExampleQueriesCommand, +) + + +class ExampleQueriesCommand(QleverExampleQueriesCommand): + def execute(self, args) -> bool: + if not args.sparql_endpoint: + args.sparql_endpoint = f"localhost:{args.port}/query" + return super().execute(args) diff --git a/src/qoxigraph/commands/extract_queries.py b/src/qoxigraph/commands/extract_queries.py new file mode 120000 index 000000000..5667cc52a --- /dev/null +++ b/src/qoxigraph/commands/extract_queries.py @@ -0,0 +1 @@ +../../qlever/commands/extract_queries.py \ No newline at end of file diff --git a/src/qoxigraph/commands/get_data.py b/src/qoxigraph/commands/get_data.py new file mode 120000 index 000000000..4900dbb87 --- /dev/null +++ b/src/qoxigraph/commands/get_data.py @@ -0,0 +1 @@ +../../qlever/commands/get_data.py \ No newline at end of file diff --git a/src/qoxigraph/commands/index.py b/src/qoxigraph/commands/index.py new file mode 100644 index 000000000..128b9a826 --- /dev/null +++ b/src/qoxigraph/commands/index.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import glob +import shlex +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import binary_exists, run_command + + +class IndexCommand(QleverCommand): + def __init__(self): + self.script_name = "qoxigraph" + + def description(self) -> str: + return "Build the index for a given RDF dataset" + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name", "format"], + "index": ["input_files"], + "runtime": ["system", "image", "index_container"], + } + + def additional_arguments(self, subparser): + subparser.add_argument( + "--index-binary", + type=str, + default="oxigraph", + help=( + "The binary for building the index (default: oxigraph) " + "(this requires that you have oxigraph-cli installed " + "on your machine)" + ), + ) + + @staticmethod + def wrap_cmd_in_container(args, cmd: str) -> str: + return Containerize().containerize_command( + cmd=cmd, + container_system=args.system, + run_subcommand="run --rm", + image_name=args.image, + container_name=args.index_container, + volumes=[("$(pwd)", "/index")], + working_directory="/index", + use_bash=False, + ) + + def execute(self, args) -> bool: + index_cmd = f"load --location . --file {args.input_files}" + index_cmd += f" |& tee {args.name}.index-log.txt" + + index_cmd = ( + f"{args.index_binary} {index_cmd}" + if args.system == "native" + else self.wrap_cmd_in_container(args, index_cmd) + ) + + # Show the command line. + self.show(index_cmd, only_show=args.show) + if args.show: + return True + + # Check if all of the input files exist. + for pattern in shlex.split(args.input_files): + if len(glob.glob(pattern)) == 0: + log.error(f'No file matching "{pattern}" found') + log.info("") + log.info( + f"Did you call `{self.script_name} get-data`? If you did, " + "check GET_DATA_CMD and INPUT_FILES in the Qleverfile" + ) + return False + + # When running natively, check if the binary exists and works. + if args.system == "native": + if not binary_exists(args.index_binary, "index-binary"): + return False + else: + if Containerize().is_running(args.system, args.index_container): + log.info( + f"{args.system} container {args.index_container} is still up, " + "which means that data loading is in progress. Please wait..." + ) + return False + + if len([p.name for p in Path.cwd().glob("*.sst")]) != 0: + log.error( + "Index files (*.sst) found in current directory " + "which shows presence of a previous index" + ) + log.info("") + log.info("Aborting the index operation...") + return False + + # Run the index command. + try: + run_command(index_cmd, show_output=True, show_stderr=True) + except Exception as e: + log.error(f"Building the index failed: {e}") + return False + + return True diff --git a/src/qoxigraph/commands/log.py b/src/qoxigraph/commands/log.py new file mode 100644 index 000000000..a90d22288 --- /dev/null +++ b/src/qoxigraph/commands/log.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from qlever.commands.log import LogCommand as QleverLogCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import run_command + + +class LogCommand(QleverLogCommand): + def __init__(self): + self.script_name = "qoxigraph" + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "runtime": [ + "system", + "image", + "server_container", + ], + } + + def execute(self, args) -> bool: + if args.system == "native": + return super().execute(args) + + log_cmd = f"{args.system} logs " + + if not args.from_beginning: + log_cmd += f"-n {args.tail_num_lines} " + if not args.no_follow: + log_cmd += "-f " + + log_cmd += args.server_container + + # Show the command line. + self.show(log_cmd, only_show=args.show) + if args.show: + return True + + if not Containerize().is_running(args.system, args.server_container): + log.error(f"No server container {args.server_container} found!\n") + log.info(f"Are you sure you called `{self.script_name} start`?") + return False + + try: + run_command(log_cmd, show_output=True, show_stderr=True) + except Exception as e: + log.error(f"Cannot display container logs - {e}") + return True diff --git a/src/qoxigraph/commands/query.py b/src/qoxigraph/commands/query.py new file mode 100644 index 000000000..6518905f3 --- /dev/null +++ b/src/qoxigraph/commands/query.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from qlever.commands.query import QueryCommand as QleverQueryCommand + + +class QueryCommand(QleverQueryCommand): + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "query", + type=str, + nargs="?", + default="SELECT * WHERE { ?s ?p ?o } LIMIT 10", + help="SPARQL query to send", + ) + subparser.add_argument( + "--predefined-query", + type=str, + choices=self.predefined_queries.keys(), + help="Use a predefined query", + ) + subparser.add_argument( + "--sparql-endpoint", type=str, help="URL of the SPARQL endpoint" + ) + subparser.add_argument( + "--accept", + type=str, + choices=[ + "text/tab-separated-values", + "text/csv", + "application/sparql-results+json", + "application/sparql-results+xml", + ], + default="text/tab-separated-values", + help="Accept header for the SPARQL query", + ) + subparser.add_argument( + "--get", + action="store_true", + default=False, + help="Use GET request instead of POST", + ) + subparser.add_argument( + "--no-time", + action="store_true", + default=False, + help="Do not print the (end-to-end) time taken", + ) + + def execute(self, args) -> bool: + if not args.sparql_endpoint: + args.sparql_endpoint = f"localhost:{args.port}/query" + args.pin_to_cache = None + args.access_token = None + super().execute(args) diff --git a/src/qoxigraph/commands/setup_config.py b/src/qoxigraph/commands/setup_config.py new file mode 100644 index 000000000..b6d9225ba --- /dev/null +++ b/src/qoxigraph/commands/setup_config.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +from configparser import RawConfigParser +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.log import log +from qlever.qleverfile import Qleverfile + + +class SetupConfigCommand(QleverCommand): + IMAGE = "ghcr.io/oxigraph/oxigraph" + + FILTER_CRITERIA = { + "data": [], + "index": ["INPUT_FILES"], + "server": ["PORT"], + "runtime": ["SYSTEM", "IMAGE"], + "ui": ["UI_CONFIG"], + } + + def __init__(self): + self.qleverfiles_path = ( + Path(__file__).parent.parent.parent / "qlever" / "Qleverfiles" + ) + self.qleverfile_names = [ + p.name.split(".")[1] + for p in self.qleverfiles_path.glob("Qleverfile.*") + ] + + def description(self) -> str: + return "Get a pre-configured Qleverfile" + + def should_have_qleverfile(self) -> bool: + return False + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return {} + + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "config_name", + type=str, + choices=self.qleverfile_names, + help="The name of the pre-configured Qleverfile to create", + ) + + def validate_qleverfile_setup( + self, args, qleverfile_path: Path + ) -> bool | None: + # Construct the command line and show it. + setup_config_show = ( + f"Creating Qleverfile for {args.config_name} using " + f"Qleverfile.{args.config_name} file in {self.qleverfiles_path}" + ) + self.show(setup_config_show, only_show=args.show) + if args.show: + return True + + # If there is already a Qleverfile in the current directory, exit. + if qleverfile_path.exists(): + log.error("`Qleverfile` already exists in current directory") + log.info("") + log.info( + "If you want to create a new Qleverfile using " + "`qlever setup-config`, delete the existing Qleverfile " + "first" + ) + return False + return None + + def get_filtered_qleverfile_parser( + self, config_name: str + ) -> RawConfigParser: + qleverfile_config_path = ( + self.qleverfiles_path / f"Qleverfile.{config_name}" + ) + qleverfile_parser = Qleverfile.filter( + qleverfile_config_path, self.FILTER_CRITERIA + ) + if qleverfile_parser.has_section("runtime"): + qleverfile_parser.set("runtime", "IMAGE", self.IMAGE) + return qleverfile_parser + + def execute(self, args) -> bool: + qleverfile_path = Path("Qleverfile") + exit_status = self.validate_qleverfile_setup(args, qleverfile_path) + if exit_status is not None: + return exit_status + + qleverfile_parser = self.get_filtered_qleverfile_parser( + args.config_name + ) + # Copy the Qleverfile to the current directory. + try: + with qleverfile_path.open("w") as f: + qleverfile_parser.write(f) + except Exception as e: + log.error( + f'Could not copy "{qleverfile_path}" to current directory: {e}' + ) + return False + + # If we get here, everything went well. + log.info( + f'Created Qleverfile for config "{args.config_name}"' + f" in current directory" + ) + return True diff --git a/src/qoxigraph/commands/start.py b/src/qoxigraph/commands/start.py new file mode 100644 index 000000000..8a038344d --- /dev/null +++ b/src/qoxigraph/commands/start.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +import subprocess +import time +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import binary_exists, is_server_alive, run_command + + +class StartCommand(QleverCommand): + def __init__(self): + self.script_name = "qoxigraph" + + def description(self) -> str: + return ( + "Start the server for Oxigraph (requires that you have built an " + "index before)" + ) + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "server": ["host_name", "port"], + "runtime": ["system", "image", "server_container"], + } + + def additional_arguments(self, subparser): + subparser.add_argument( + "--run-in-foreground", + action="store_true", + default=False, + help=( + "Run the start command in the foreground " + "(default: run in the background)" + ), + ) + subparser.add_argument( + "--server-binary", + type=str, + default="oxigraph", + help=( + "The binary for starting the server (default: oxigraph) " + "(this requires that you have oxigraph-cli installed " + "on your machine)" + ), + ) + + @staticmethod + def wrap_cmd_in_container(args, cmd: str) -> str: + run_subcommand = "run --restart=unless-stopped" + if not args.run_in_foreground: + run_subcommand += " -d" + return Containerize().containerize_command( + cmd=cmd, + container_system=args.system, + run_subcommand=run_subcommand, + image_name=args.image, + container_name=args.server_container, + volumes=[("$(pwd)", "/index")], + ports=[(args.port, args.port)], + working_directory="/index", + use_bash=False, + ) + + def execute(self, args) -> bool: + bind = ( + f"{args.host_name}:{args.port}" + if args.system == "native" + else f"0.0.0.0:{args.port}" + ) + start_cmd = f"serve-read-only --location . --bind={bind}" + + if args.system == "native": + start_cmd = f"{args.server_binary} {start_cmd}" + if not args.run_in_foreground: + start_cmd = ( + f"nohup {start_cmd} > {args.name}.server-log.txt 2>&1 &" + ) + else: + start_cmd = self.wrap_cmd_in_container(args, start_cmd) + + # Show the command line. + self.show(start_cmd, only_show=args.show) + if args.show: + return True + + endpoint_url = f"http://{args.host_name}:{args.port}/query" + + # When running natively, check if the binary exists and works. + if args.system == "native": + if not binary_exists(args.server_binary, "server-binary"): + return False + else: + if Containerize().is_running(args.system, args.server_container): + log.error( + f"Server container {args.server_container} already exists!\n" + ) + log.info( + f"To kill the existing server, use `{self.script_name} stop`" + ) + return False + + # Check if index files (*.sst) present in cwd + if len([p.name for p in Path.cwd().glob("*.sst")]) == 0: + log.error(f"No Oxigraph index files for {args.name} found!\n") + log.info( + f"Did you call `{self.script_name} index`? If you did, check " + "if .sst index files are present in current working directory." + ) + return False + + if is_server_alive(url=endpoint_url): + log.error(f"Oxigraph server already running on {endpoint_url}\n") + log.info( + f"To kill the existing server, use `{self.script_name} stop`" + ) + return False + + try: + process = run_command( + start_cmd, + use_popen=args.run_in_foreground, + ) + except Exception as e: + log.error(f"Starting the Oxigraph server failed ({e})") + return False + + # Tail the server log until the server is ready (note that the `exec` + # is important to make sure that the tail process is killed and not + # just the bash process). + if args.run_in_foreground: + log.info( + "Follow the server logs as long as the server is" + " running (Ctrl-C stops the server)" + ) + else: + log.info( + "Follow the server logs until the server is ready" + " (Ctrl-C stops following the log, but NOT the server)" + ) + log.info("") + if args.system == "native": + log_cmd = f"exec tail -f {args.name}.server-log.txt" + else: + time.sleep(2) + log_cmd = f"exec {args.system} logs -f {args.server_container}" + log_proc = subprocess.Popen(log_cmd, shell=True) + while not is_server_alive(endpoint_url): + time.sleep(1) + + log.info( + f"Oxigraph server webapp for {args.name} will be available at " + f"http://{args.host_name}:{args.port} and the sparql endpoint for " + f"queries is {endpoint_url} when the server is ready" + ) + + # Kill the log process + if not args.run_in_foreground: + log_proc.terminate() + + # With `--run-in-foreground`, wait until the server is stopped. + if args.run_in_foreground: + try: + process.wait() + except KeyboardInterrupt: + process.terminate() + log_proc.terminate() + + return True diff --git a/src/qoxigraph/commands/status.py b/src/qoxigraph/commands/status.py new file mode 100644 index 000000000..eb2de86cf --- /dev/null +++ b/src/qoxigraph/commands/status.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from qlever.commands.status import StatusCommand as QleverStatusCommand + + +class StatusCommand(QleverStatusCommand): + DEFAULT_REGEX = "oxigraph\\s+serve-read-only" + + def description(self) -> str: + return "Show Oxigraph processes running on this machine" + + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "--cmdline-regex", + default=self.DEFAULT_REGEX, + help=( + "Show only processes where the command line matches this regex" + ), + ) diff --git a/src/qoxigraph/commands/stop.py b/src/qoxigraph/commands/stop.py new file mode 100644 index 000000000..ed9c10369 --- /dev/null +++ b/src/qoxigraph/commands/stop.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from qlever.command import QleverCommand +from qlever.commands import stop as qlever_stop +from qlever.log import log +from qoxigraph.commands.status import StatusCommand + + +class StopCommand(QleverCommand): + # Override this with StatusCommand from child class for execute + # method to work as intended + STATUS_COMMAND = StatusCommand() + DEFAULT_REGEX = "oxigraph\\s+serve-read-only.*:%%PORT%%" + + def __init__(self): + pass + + def description(self) -> str: + return "Stop Oxigraph server for a given dataset or port" + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "server": ["port"], + "runtime": ["system", "server_container"], + } + + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "--cmdline-regex", + default=self.DEFAULT_REGEX, + help="Show only processes where the command " + "line matches this regex", + ) + + def execute(self, args) -> bool: + cmdline_regex = args.cmdline_regex.replace("%%PORT%%", str(args.port)) + description = ( + f'Checking for processes matching "{cmdline_regex}"' + if args.system == "native" + else f"Checking for container with name {args.server_container}" + ) + + self.show(description, only_show=args.show) + if args.show: + return True + + if args.system == "native": + stop_process_results = ( + qlever_stop.StopCommand().stop_process_with_regex( + cmdline_regex + ) + ) + if stop_process_results is None: + return False + if len(stop_process_results) > 0: + return all(stop_process_results) + + # If no matching process found, show a message and the output of the + # status command. + log.error("No matching process found") + args.cmdline_regex = self.STATUS_COMMAND.DEFAULT_REGEX + log.info("") + StatusCommand().execute(args) + return True + + # First check if container is running and if yes, stop and remove it + return qlever_stop.stop_container(args.server_container) From 956a59df7e60f71f57315173c36ca81acec13ba8 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Mon, 19 May 2025 23:51:47 +0200 Subject: [PATCH 02/10] Fix host_name bug in example_querie and add util stop_with_regex command to stop --- src/qoxigraph/commands/example_queries.py | 2 +- src/qoxigraph/commands/stop.py | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/qoxigraph/commands/example_queries.py b/src/qoxigraph/commands/example_queries.py index d62982a83..4ef76c241 100644 --- a/src/qoxigraph/commands/example_queries.py +++ b/src/qoxigraph/commands/example_queries.py @@ -8,5 +8,5 @@ class ExampleQueriesCommand(QleverExampleQueriesCommand): def execute(self, args) -> bool: if not args.sparql_endpoint: - args.sparql_endpoint = f"localhost:{args.port}/query" + args.sparql_endpoint = f"{args.host_name}:{args.port}/query" return super().execute(args) diff --git a/src/qoxigraph/commands/stop.py b/src/qoxigraph/commands/stop.py index ed9c10369..dedd1ff2d 100644 --- a/src/qoxigraph/commands/stop.py +++ b/src/qoxigraph/commands/stop.py @@ -3,6 +3,7 @@ from qlever.command import QleverCommand from qlever.commands import stop as qlever_stop from qlever.log import log +from qlever.util import stop_process_with_regex from qoxigraph.commands.status import StatusCommand @@ -49,11 +50,7 @@ def execute(self, args) -> bool: return True if args.system == "native": - stop_process_results = ( - qlever_stop.StopCommand().stop_process_with_regex( - cmdline_regex - ) - ) + stop_process_results = stop_process_with_regex(cmdline_regex) if stop_process_results is None: return False if len(stop_process_results) > 0: From 1826f61ca75e2e92d09dc117cac71a6ff743a89a Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Thu, 13 Mar 2025 13:59:27 +0100 Subject: [PATCH 03/10] Added commands to setup sparql-endpoint for Jena --- pyproject.toml | 1 + src/qjena/Dockerfile | 38 ++++++ src/qjena/__init__.py | 0 src/qjena/commands/__init__.py | 0 src/qjena/commands/example_queries.py | 9 ++ src/qjena/commands/extract_queries.py | 9 ++ src/qjena/commands/get_data.py | 9 ++ src/qjena/commands/index.py | 125 ++++++++++++++++++++ src/qjena/commands/log.py | 32 ++++++ src/qjena/commands/query.py | 9 ++ src/qjena/commands/setup_config.py | 11 ++ src/qjena/commands/start.py | 160 ++++++++++++++++++++++++++ src/qjena/commands/stop.py | 8 ++ 13 files changed, 411 insertions(+) create mode 100644 src/qjena/Dockerfile create mode 100644 src/qjena/__init__.py create mode 100644 src/qjena/commands/__init__.py create mode 100644 src/qjena/commands/example_queries.py create mode 100644 src/qjena/commands/extract_queries.py create mode 100644 src/qjena/commands/get_data.py create mode 100644 src/qjena/commands/index.py create mode 100644 src/qjena/commands/log.py create mode 100644 src/qjena/commands/query.py create mode 100644 src/qjena/commands/setup_config.py create mode 100644 src/qjena/commands/start.py create mode 100644 src/qjena/commands/stop.py diff --git a/pyproject.toml b/pyproject.toml index 12c82f470..718748d76 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ Github = "https://github.com/ad-freiburg/qlever" [project.scripts] "qlever" = "qlever.qlever_main:main" +"qjena" = "qlever.qlever_main:main" "qoxigraph" = "qlever.qlever_main:main" "qlever-old" = "qlever.qlever_old:main" diff --git a/src/qjena/Dockerfile b/src/qjena/Dockerfile new file mode 100644 index 000000000..43f612832 --- /dev/null +++ b/src/qjena/Dockerfile @@ -0,0 +1,38 @@ +# Use an official OpenJDK runtime as a parent image +FROM openjdk:21-jdk-slim + +RUN apt-get update && apt-get install -y wget unzip jq + +WORKDIR /opt + +# Download and extract Apache Jena Fuseki and Apache Jena using the latest version dynamically +RUN LATEST_VERSION=$(wget -qO- https://dlcdn.apache.org/jena/binaries/ \ + | grep -oP 'apache-jena-\K[0-9]+\.[0-9]+\.[0-9]+' \ + | sort -V \ + | tail -n 1) \ + && wget https://dlcdn.apache.org/jena/binaries/apache-jena-fuseki-${LATEST_VERSION}.zip \ + && unzip apache-jena-fuseki-${LATEST_VERSION}.zip \ + && rm -f apache-jena-fuseki-${LATEST_VERSION}.zip \ + && wget https://dlcdn.apache.org/jena/binaries/apache-jena-${LATEST_VERSION}.zip \ + && unzip apache-jena-${LATEST_VERSION}.zip \ + && rm -f apache-jena-${LATEST_VERSION}.zip \ + && mv apache-jena-${LATEST_VERSION} /opt/apache-jena \ + && mv apache-jena-fuseki-${LATEST_VERSION} /opt/apache-jena-fuseki + +# Set ownership to the user passed by UID and GID +ARG UID +ARG GID +RUN if [ "${UID:-}" != "" ] && [ "${GID:-}" != "" ]; then \ + chown -R ${UID}:${GID} /opt; \ + fi + +# Ensure the bin and fuseki folders are in PATH +ENV PATH="/opt/apache-jena/bin:${PATH}" + +# Make sure scripts are executable +RUN chmod +x /opt/apache-jena/bin/* + +RUN chmod +x /opt/apache-jena-fuseki/fuseki-server.jar + +# Set entrypoint +CMD ["bash"] diff --git a/src/qjena/__init__.py b/src/qjena/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/qjena/commands/__init__.py b/src/qjena/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/qjena/commands/example_queries.py b/src/qjena/commands/example_queries.py new file mode 100644 index 000000000..8afac4500 --- /dev/null +++ b/src/qjena/commands/example_queries.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from qlever.commands import example_queries + + +class ExampleQueriesCommand(example_queries.ExampleQueriesCommand): + """ + Should behave exactly the same as example-queries command in qlever + """ diff --git a/src/qjena/commands/extract_queries.py b/src/qjena/commands/extract_queries.py new file mode 100644 index 000000000..f361ff5f1 --- /dev/null +++ b/src/qjena/commands/extract_queries.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from qlever.commands import extract_queries + + +class ExtractQueriesCommand(extract_queries.ExtractQueriesCommand): + """ + Should behave exactly the same as extract-queries command in qlever + """ diff --git a/src/qjena/commands/get_data.py b/src/qjena/commands/get_data.py new file mode 100644 index 000000000..0d570233e --- /dev/null +++ b/src/qjena/commands/get_data.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from qlever.commands import get_data + + +class GetDataCommand(get_data.GetDataCommand): + """ + Should behave exactly the same as get-data command in qlever + """ diff --git a/src/qjena/commands/index.py b/src/qjena/commands/index.py new file mode 100644 index 000000000..7966d650b --- /dev/null +++ b/src/qjena/commands/index.py @@ -0,0 +1,125 @@ +from __future__ import annotations + +import glob +import shlex +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import run_command + + +class IndexCommand(QleverCommand): + def __init__(self): + self.script_name = "qjena" + + def description(self) -> str: + return "Build the index for a given RDF dataset" + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name", "format"], + "index": ["input_files"], + "server": ["port"], + "runtime": ["system", "image", "server_container"], + } + + def additional_arguments(self, subparser): + pass + + def build_image(self, build_cmd: str, system: str, image: str) -> bool: + try: + run_command(build_cmd, show_output=True) + return True + except Exception as e: + log.error(f"Building the {system} image {image} failed: {e}") + return False + + def execute(self, args) -> bool: + system = args.system + input_files = args.input_files + server_container = args.server_container + run_subcommand = "run -d" + + loading_flag = "/opt/loading.flag" + index_cmd = f"touch {loading_flag} && " + index_cmd += ( + f"tdb2.xloader --loc index data/{input_files} " + ">> /opt/data/index.log 2>&1 && " + ) + index_cmd += f"rm {loading_flag} && " + index_cmd += "tail -f /dev/null" + + index_cmd = Containerize().containerize_command( + cmd=index_cmd, + container_system=system, + run_subcommand=run_subcommand, + image_name=args.image, + container_name=server_container, + volumes=[("$(pwd)", "/opt/data")], + ports=[(int(args.port), 3030)], + ) + + dockerfile_dir = Path(__file__).parent.parent + dockerfile_path = dockerfile_dir / "Dockerfile" + build_cmd = ( + f"{system} build -f {dockerfile_path} -t {args.image} --build-arg " + f"UID=$(id -u) --build-arg GID=$(id -g) {dockerfile_dir}" + ) + image_id = run_command( + f"{system} images -q {args.image}", return_output=True + ) + cmd_to_show = ( + f"{build_cmd}\n\n{index_cmd}" if not image_id else index_cmd + ) + + # Show the command line. + self.show(cmd_to_show, only_show=args.show) + if args.show: + return True + + # Check if all of the input files exist. + for pattern in shlex.split(input_files): + if len(glob.glob(pattern)) == 0: + log.error(f'No file matching "{pattern}" found') + log.info("") + log.info( + f"Did you call `{self.script_name} get-data`? If you did, " + "check GET_DATA_CMD and INPUT_FILES in the Qleverfile" + ) + return False + + if Containerize().is_running(system, server_container): + log.error( + f"{system} container {server_container} exists, " + f"which means that server for {args.name} is already running. \n" + f"Stop the container {server_container} with `{self.script_name} " + "stop` first before loading the data." + ) + return False + + if not image_id: + build_successful = self.build_image(build_cmd, system, args.image) + if not build_successful: + return False + else: + log.info( + f"{args.image} image present on the system. Executing command..." + ) + + # Run the index command. + try: + run_command(index_cmd) + log.info( + f"Follow `{self.script_name} log` until data loading is finished." + f" (Ctrl-C stops following the log, but NOT the data loading!)" + ) + except Exception as e: + log.error(f"Building the index failed: {e}") + return False + + return True diff --git a/src/qjena/commands/log.py b/src/qjena/commands/log.py new file mode 100644 index 000000000..3e4c2640f --- /dev/null +++ b/src/qjena/commands/log.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from pathlib import Path + +from qlever.commands import log as log_command + + +class LogCommand(log_command.LogCommand): + def __init__(self): + self.script_name = "qjena" + + def description(self) -> str: + return ( + "Show the last lines of the server or index log file and follow it. " + "(Default: server log if it exists otherwise index log)" + ) + + def additional_arguments(self, subparser) -> None: + super().additional_arguments(subparser) + subparser.add_argument( + "--index-log", + action="store_true", + default=False, + help=("Follow the index log (default: follow server log)"), + ) + + def execute(self, args) -> bool: + log_file = "server.log" + if not Path("server.log").exists() or args.index_log: + log_file = "index.log" + args.log_file = log_file + return super().execute(args) diff --git a/src/qjena/commands/query.py b/src/qjena/commands/query.py new file mode 100644 index 000000000..5cac4f60c --- /dev/null +++ b/src/qjena/commands/query.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from qoxigraph.commands import query + + +class QueryCommand(query.QueryCommand): + """ + Should behave exactly the same as query command in qoxigraph + """ diff --git a/src/qjena/commands/setup_config.py b/src/qjena/commands/setup_config.py new file mode 100644 index 000000000..643f8e8ec --- /dev/null +++ b/src/qjena/commands/setup_config.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from qoxigraph.commands import setup_config + + +class SetupConfigCommand(setup_config.SetupConfigCommand): + """ + Should behave exactly the same as setup-config command in qoxigraph, + just with a different Docker image name + """ + IMAGE = "adfreiburg/qjena" diff --git a/src/qjena/commands/start.py b/src/qjena/commands/start.py new file mode 100644 index 000000000..e4e030096 --- /dev/null +++ b/src/qjena/commands/start.py @@ -0,0 +1,160 @@ +from __future__ import annotations + +from qlever.command import QleverCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import run_command + + +class StartCommand(QleverCommand): + def __init__(self): + self.script_name = "qjena" + + def description(self) -> str: + return ( + "Start the server for Jena (requires that you have built an " + "index before) (Runs in a container)" + ) + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "server": ["host_name", "port"], + "runtime": ["system", "image", "server_container"], + } + + def additional_arguments(self, subparser): + subparser.add_argument( + "--run-in-foreground", + action="store_true", + default=False, + help=( + "Run the start command in the foreground " + "(default: run in the background)" + ), + ) + + @staticmethod + def is_data_loading(system: str, container: str) -> bool: + """ + Check if `index` command is still running and data loading + is in progress + """ + check_index_running_cmd = ( + f"{system} exec {container} bash -c " + "\"test -f /opt/loading.flag && echo 'running' || " + "echo 'finished'\"" + ) + index_ps_running = run_command( + check_index_running_cmd, return_output=True + ) + return index_ps_running.strip() == "running" + + @staticmethod + def index_exists(system: str, container: str) -> bool: + """ + Check if the index was built correctly and the index folder Data-0001 + exists in /opt/index directory in the container + """ + check_index_cmd = ( + f"{system} exec {container} bash -c " + "\"test -d /opt/index && test -d /opt/index/Data-0001 " + "&& echo 'exists' || echo 'missing'\"" + ) + index_exists = run_command(check_index_cmd, return_output=True) + return index_exists.strip() == "exists" + + @staticmethod + def is_server_alive(url: str) -> bool: + """ + Check if the Jena server is already alive at the given endpoint url + """ + check_server_cmd = ( + f"curl -s {url} && echo 'alive' || echo 'not'" + ) + is_server_alive = run_command(check_server_cmd, return_output=True) + return "alive" in is_server_alive.strip() + + def execute(self, args) -> bool: + system = args.system + dataset = args.name + + server_container = args.server_container + + port = int(args.port) + exec_cmd = ( + f"{system} exec {'-d ' if not args.run_in_foreground else ''}" + f"{server_container} bash -c " + ) + serve_cmd = ( + '"java -jar /opt/apache-jena-fuseki/fuseki-server.jar --port 3030 ' + f'--loc /opt/index /{args.name} > /opt/data/server.log 2>&1 &"' + ) + start_cmd = exec_cmd + serve_cmd + + # Show the command line. + self.show(start_cmd, only_show=args.show) + if args.show: + return True + + # Warn if server container not running (i.e. index not built) + if not Containerize().is_running(system, server_container): + log.error( + f"{system} container {server_container} does not exist! " + f"Did you call `{self.script_name} index`?" + ) + return False + # Check if index process ongoing + if self.is_data_loading(system, server_container): + log.error( + "Data loading is in progress. Please wait...\n" + f"Check status of {server_container} with " + f"`{self.script_name} log`" + ) + return False + + # Check if index folder Data-0001 missing + if not self.index_exists(system, server_container): + log.error( + f"Index folder Data-0001 missing in {system} container " + f"{server_container}! Did you call " + f"`{self.script_name} index`?" + ) + return False + + # Check and warn if server already running + endpoint_url = f"http://{args.host_name}:{port}" + server_url = f"{endpoint_url}/{args.name}/query" + if self.is_server_alive(server_url): + log.error(f"Jena server already running on {server_url}") + log.info( + f"To kill the existing server, use `{self.script_name} stop` " + ) + return False + + # Run the start command. + try: + run_command(start_cmd, show_output=True) + log.info( + f"Jena server webapp for {dataset} will be available at " + f"{endpoint_url} and the sparql endpoint for " + f"queries is {server_url}" + ) + if args.run_in_foreground: + log.info( + "Follow the log as long as the server is" + " running (Ctrl-C stops the server)" + ) + else: + log.info( + f"Follow `{self.script_name} log` until the server is ready" + f" (Ctrl-C stops following the log, but NOT the server)" + ) + except Exception as e: + log.error(f"Starting the Jena server failed: {e}") + return False + + return True diff --git a/src/qjena/commands/stop.py b/src/qjena/commands/stop.py new file mode 100644 index 000000000..d44de054f --- /dev/null +++ b/src/qjena/commands/stop.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from qoxigraph.commands import stop + + +class StopCommand(stop.StopCommand): + def description(self) -> str: + return "Stop Jena server for a given dataset or port" From d14a74d6596d0e5cb9966d0a5c1381ad67eb1ec2 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Sat, 15 Mar 2025 13:40:17 +0100 Subject: [PATCH 04/10] Add native support for Jena --- src/qjena/Dockerfile | 4 +- src/qjena/commands/index.py | 127 ++++++++++++++++-------------- src/qjena/commands/log.py | 29 +------ src/qjena/commands/start.py | 145 +++++++++++++++-------------------- src/qjena/commands/status.py | 7 ++ src/qjena/commands/stop.py | 22 +++++- 6 files changed, 167 insertions(+), 167 deletions(-) create mode 100644 src/qjena/commands/status.py diff --git a/src/qjena/Dockerfile b/src/qjena/Dockerfile index 43f612832..23af72a4b 100644 --- a/src/qjena/Dockerfile +++ b/src/qjena/Dockerfile @@ -27,7 +27,9 @@ RUN if [ "${UID:-}" != "" ] && [ "${GID:-}" != "" ]; then \ fi # Ensure the bin and fuseki folders are in PATH -ENV PATH="/opt/apache-jena/bin:${PATH}" +ENV JENA_HOME="/opt/apache-jena" +ENV FUSEKI_HOME="/opt/apache-jena-fuseki" +ENV PATH="${JENA_HOME}/bin:${FUSEKI_HOME}:${PATH}" # Make sure scripts are executable RUN chmod +x /opt/apache-jena/bin/* diff --git a/src/qjena/commands/index.py b/src/qjena/commands/index.py index 7966d650b..5abc1f17c 100644 --- a/src/qjena/commands/index.py +++ b/src/qjena/commands/index.py @@ -7,7 +7,7 @@ from qlever.command import QleverCommand from qlever.containerize import Containerize from qlever.log import log -from qlever.util import run_command +from qlever.util import binary_exists, run_command class IndexCommand(QleverCommand): @@ -24,14 +24,23 @@ def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: return { "data": ["name", "format"], "index": ["input_files"], - "server": ["port"], - "runtime": ["system", "image", "server_container"], + "runtime": ["system", "image", "index_container"], } def additional_arguments(self, subparser): - pass + subparser.add_argument( + "--index-binary", + type=str, + default="tdb2.xloader", + help=( + "The binary for building the index (default: tdb2.xloader) " + "(this requires that you have apache-jena installed " + "on your machine)" + ), + ) - def build_image(self, build_cmd: str, system: str, image: str) -> bool: + @staticmethod + def build_image(build_cmd: str, system: str, image: str) -> bool: try: run_command(build_cmd, show_output=True) return True @@ -39,43 +48,41 @@ def build_image(self, build_cmd: str, system: str, image: str) -> bool: log.error(f"Building the {system} image {image} failed: {e}") return False + @staticmethod + def wrap_cmd_in_container(args, cmd: str) -> str: + return Containerize().containerize_command( + cmd=cmd, + container_system=args.system, + run_subcommand="run --rm", + image_name=args.image, + container_name=args.index_container, + volumes=[("$(pwd)", "/opt/data")], + working_directory="/opt/data", + ) + def execute(self, args) -> bool: system = args.system input_files = args.input_files - server_container = args.server_container - run_subcommand = "run -d" - - loading_flag = "/opt/loading.flag" - index_cmd = f"touch {loading_flag} && " - index_cmd += ( - f"tdb2.xloader --loc index data/{input_files} " - ">> /opt/data/index.log 2>&1 && " - ) - index_cmd += f"rm {loading_flag} && " - index_cmd += "tail -f /dev/null" - index_cmd = Containerize().containerize_command( - cmd=index_cmd, - container_system=system, - run_subcommand=run_subcommand, - image_name=args.image, - container_name=server_container, - volumes=[("$(pwd)", "/opt/data")], - ports=[(int(args.port), 3030)], - ) + index_cmd = f"{args.index_binary} --loc index {input_files}" + index_cmd += f" | tee {args.name}.index-log.txt" - dockerfile_dir = Path(__file__).parent.parent - dockerfile_path = dockerfile_dir / "Dockerfile" - build_cmd = ( - f"{system} build -f {dockerfile_path} -t {args.image} --build-arg " - f"UID=$(id -u) --build-arg GID=$(id -g) {dockerfile_dir}" - ) - image_id = run_command( - f"{system} images -q {args.image}", return_output=True - ) - cmd_to_show = ( - f"{build_cmd}\n\n{index_cmd}" if not image_id else index_cmd - ) + if args.system == "native": + cmd_to_show = index_cmd + else: + index_cmd = self.wrap_cmd_in_container(args, index_cmd) + dockerfile_dir = Path(__file__).parent.parent + dockerfile_path = dockerfile_dir / "Dockerfile" + build_cmd = ( + f"{system} build -f {dockerfile_path} -t {args.image} --build-arg " + f"UID=$(id -u) --build-arg GID=$(id -g) {dockerfile_dir}" + ) + image_id = run_command( + f"{system} images -q {args.image}", return_output=True + ) + cmd_to_show = ( + f"{build_cmd}\n\n{index_cmd}" if not image_id else index_cmd + ) # Show the command line. self.show(cmd_to_show, only_show=args.show) @@ -93,31 +100,39 @@ def execute(self, args) -> bool: ) return False - if Containerize().is_running(system, server_container): - log.error( - f"{system} container {server_container} exists, " - f"which means that server for {args.name} is already running. \n" - f"Stop the container {server_container} with `{self.script_name} " - "stop` first before loading the data." - ) - return False - - if not image_id: - build_successful = self.build_image(build_cmd, system, args.image) - if not build_successful: + # When running natively, check if the binary exists and works. + if args.system == "native": + if not binary_exists(args.index_binary, "index-binary"): return False else: - log.info( - f"{args.image} image present on the system. Executing command..." + if Containerize().is_running(args.system, args.index_container): + log.info( + f"{args.system} container {args.index_container} is still up, " + "which means that data loading is in progress. Please wait..." + ) + return False + + if not image_id: + build_successful = self.build_image( + build_cmd, system, args.image + ) + if not build_successful: + return False + else: + log.info(f"{args.image} image present on the system\n") + + index_dir = Path("index/Data-0001") + if index_dir.exists() and any(index_dir.iterdir()): + log.error( + "Index files found in index/Data-0001 directory " + "which shows presence of a previous index\n" ) + log.info("Aborting the index operation...") + return False # Run the index command. try: - run_command(index_cmd) - log.info( - f"Follow `{self.script_name} log` until data loading is finished." - f" (Ctrl-C stops following the log, but NOT the data loading!)" - ) + run_command(index_cmd, show_output=True) except Exception as e: log.error(f"Building the index failed: {e}") return False diff --git a/src/qjena/commands/log.py b/src/qjena/commands/log.py index 3e4c2640f..10e1fea85 100644 --- a/src/qjena/commands/log.py +++ b/src/qjena/commands/log.py @@ -1,32 +1,9 @@ from __future__ import annotations -from pathlib import Path - from qlever.commands import log as log_command class LogCommand(log_command.LogCommand): - def __init__(self): - self.script_name = "qjena" - - def description(self) -> str: - return ( - "Show the last lines of the server or index log file and follow it. " - "(Default: server log if it exists otherwise index log)" - ) - - def additional_arguments(self, subparser) -> None: - super().additional_arguments(subparser) - subparser.add_argument( - "--index-log", - action="store_true", - default=False, - help=("Follow the index log (default: follow server log)"), - ) - - def execute(self, args) -> bool: - log_file = "server.log" - if not Path("server.log").exists() or args.index_log: - log_file = "index.log" - args.log_file = log_file - return super().execute(args) + """ + Should behave exactly the same as log command in qlever + """ diff --git a/src/qjena/commands/start.py b/src/qjena/commands/start.py index e4e030096..52200aadc 100644 --- a/src/qjena/commands/start.py +++ b/src/qjena/commands/start.py @@ -1,9 +1,11 @@ from __future__ import annotations +from pathlib import Path + from qlever.command import QleverCommand from qlever.containerize import Containerize from qlever.log import log -from qlever.util import run_command +from qlever.util import binary_exists, is_server_alive, run_command class StartCommand(QleverCommand): @@ -36,102 +38,81 @@ def additional_arguments(self, subparser): "(default: run in the background)" ), ) - - @staticmethod - def is_data_loading(system: str, container: str) -> bool: - """ - Check if `index` command is still running and data loading - is in progress - """ - check_index_running_cmd = ( - f"{system} exec {container} bash -c " - "\"test -f /opt/loading.flag && echo 'running' || " - "echo 'finished'\"" - ) - index_ps_running = run_command( - check_index_running_cmd, return_output=True - ) - return index_ps_running.strip() == "running" - - @staticmethod - def index_exists(system: str, container: str) -> bool: - """ - Check if the index was built correctly and the index folder Data-0001 - exists in /opt/index directory in the container - """ - check_index_cmd = ( - f"{system} exec {container} bash -c " - "\"test -d /opt/index && test -d /opt/index/Data-0001 " - "&& echo 'exists' || echo 'missing'\"" + subparser.add_argument( + "--server-binary", + type=str, + default="fuseki-server", + help=( + "The binary for starting the server (default: fuseki-server) " + "(this requires that you have apache-jena-fuseki installed " + "on your machine)" + ), ) - index_exists = run_command(check_index_cmd, return_output=True) - return index_exists.strip() == "exists" @staticmethod - def is_server_alive(url: str) -> bool: - """ - Check if the Jena server is already alive at the given endpoint url - """ - check_server_cmd = ( - f"curl -s {url} && echo 'alive' || echo 'not'" + def wrap_cmd_in_container(args, cmd: str) -> str: + run_subcommand = "run --restart=unless-stopped" + if not args.run_in_foreground: + run_subcommand += " -d" + if not args.run_in_foreground: + cmd = f"{cmd} > {args.name}.server-log.txt 2>&1" + return Containerize().containerize_command( + cmd=cmd, + container_system=args.system, + run_subcommand=run_subcommand, + image_name=args.image, + container_name=args.server_container, + volumes=[("$(pwd)", "/opt/data")], + working_directory="/opt/data", + ports=[(args.port, args.port)], ) - is_server_alive = run_command(check_server_cmd, return_output=True) - return "alive" in is_server_alive.strip() def execute(self, args) -> bool: - system = args.system - dataset = args.name - - server_container = args.server_container - - port = int(args.port) - exec_cmd = ( - f"{system} exec {'-d ' if not args.run_in_foreground else ''}" - f"{server_container} bash -c " + start_cmd = ( + f"{args.server_binary} --port {args.port} --loc index /{args.name}" ) - serve_cmd = ( - '"java -jar /opt/apache-jena-fuseki/fuseki-server.jar --port 3030 ' - f'--loc /opt/index /{args.name} > /opt/data/server.log 2>&1 &"' - ) - start_cmd = exec_cmd + serve_cmd + + if args.system == "native": + if not args.run_in_foreground: + start_cmd = ( + f"nohup {start_cmd} > {args.name}.server-log.txt 2>&1 &" + ) + else: + start_cmd = self.wrap_cmd_in_container(args, start_cmd) # Show the command line. self.show(start_cmd, only_show=args.show) if args.show: return True - # Warn if server container not running (i.e. index not built) - if not Containerize().is_running(system, server_container): - log.error( - f"{system} container {server_container} does not exist! " - f"Did you call `{self.script_name} index`?" - ) - return False - # Check if index process ongoing - if self.is_data_loading(system, server_container): - log.error( - "Data loading is in progress. Please wait...\n" - f"Check status of {server_container} with " - f"`{self.script_name} log`" - ) - return False + # When running natively, check if the binary exists and works. + if args.system == "native": + if not binary_exists(args.server_binary, "server-binary"): + return False + else: + if Containerize().is_running(args.system, args.server_container): + log.error( + f"Server container {args.server_container} already exists!\n" + ) + log.info( + f"To kill the existing server, use `{self.script_name} stop`" + ) + return False - # Check if index folder Data-0001 missing - if not self.index_exists(system, server_container): - log.error( - f"Index folder Data-0001 missing in {system} container " - f"{server_container}! Did you call " - f"`{self.script_name} index`?" + index_dir = Path("index/Data-0001") + if not index_dir.exists() or not any(index_dir.iterdir()): + log.info(f"No Jena index files for {args.name} found! ") + log.info( + f"Did you call `{self.script_name} index`? If you did, check " + "if index files are present in index/Data-0001 directory" ) return False - # Check and warn if server already running - endpoint_url = f"http://{args.host_name}:{port}" - server_url = f"{endpoint_url}/{args.name}/query" - if self.is_server_alive(server_url): - log.error(f"Jena server already running on {server_url}") + endpoint_url = f"http://{args.host_name}:{args.port}/{args.name}/query" + if is_server_alive(url=endpoint_url): + log.error(f"Jena server already running on {endpoint_url}\n") log.info( - f"To kill the existing server, use `{self.script_name} stop` " + f"To kill the existing server, use `{self.script_name} stop`" ) return False @@ -139,9 +120,9 @@ def execute(self, args) -> bool: try: run_command(start_cmd, show_output=True) log.info( - f"Jena server webapp for {dataset} will be available at " - f"{endpoint_url} and the sparql endpoint for " - f"queries is {server_url}" + f"Jena server webapp for {args.name} will be available at " + f"http://{args.host_name}:{args.port} and the sparql endpoint for " + f"queries is {endpoint_url}" ) if args.run_in_foreground: log.info( diff --git a/src/qjena/commands/status.py b/src/qjena/commands/status.py new file mode 100644 index 000000000..382f9667f --- /dev/null +++ b/src/qjena/commands/status.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from qoxigraph.commands import status + + +class StatusCommand(status.StatusCommand): + DEFAULT_REGEX = "fuseki-server" diff --git a/src/qjena/commands/stop.py b/src/qjena/commands/stop.py index d44de054f..e3c654b0a 100644 --- a/src/qjena/commands/stop.py +++ b/src/qjena/commands/stop.py @@ -1,8 +1,26 @@ from __future__ import annotations -from qoxigraph.commands import stop +from qjena.commands.status import StatusCommand +from qoxigraph.commands import stop as qoxigraph_stop -class StopCommand(stop.StopCommand): +class StopCommand(qoxigraph_stop.StopCommand): + STATUS_COMMAND = StatusCommand() + DEFAULT_REGEX = r".*fuseki-server.*--port\s%%PORT%%.*%%NAME%%.*" + def description(self) -> str: return "Stop Jena server for a given dataset or port" + + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "--cmdline-regex", + default=self.DEFAULT_REGEX, + help="Show only processes where the command " + "line matches this regex", + ) + + def execute(self, args) -> bool: + args.cmdline_regex = args.cmdline_regex.replace( + "%%PORT%%", str(args.port) + ).replace("%%NAME%%", args.name) + return super().execute(args) From 7bb95479fbfee6d71fc22f861dd472208d68e4f4 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Sun, 16 Mar 2025 16:29:36 +0100 Subject: [PATCH 05/10] Made import from qoxigraph more explicit in qjena --- src/qjena/commands/setup_config.py | 7 +++++-- src/qjena/commands/status.py | 4 ++-- src/qjena/commands/stop.py | 12 ++---------- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/qjena/commands/setup_config.py b/src/qjena/commands/setup_config.py index 643f8e8ec..1beefe891 100644 --- a/src/qjena/commands/setup_config.py +++ b/src/qjena/commands/setup_config.py @@ -1,11 +1,14 @@ from __future__ import annotations -from qoxigraph.commands import setup_config +from qoxigraph.commands.setup_config import ( + SetupConfigCommand as QoxigraphSetupConfigCommand, +) -class SetupConfigCommand(setup_config.SetupConfigCommand): +class SetupConfigCommand(QoxigraphSetupConfigCommand): """ Should behave exactly the same as setup-config command in qoxigraph, just with a different Docker image name """ + IMAGE = "adfreiburg/qjena" diff --git a/src/qjena/commands/status.py b/src/qjena/commands/status.py index 382f9667f..72f80b356 100644 --- a/src/qjena/commands/status.py +++ b/src/qjena/commands/status.py @@ -1,7 +1,7 @@ from __future__ import annotations -from qoxigraph.commands import status +from qoxigraph.commands.status import StatusCommand as QoxigraphStatusCommand -class StatusCommand(status.StatusCommand): +class StatusCommand(QoxigraphStatusCommand): DEFAULT_REGEX = "fuseki-server" diff --git a/src/qjena/commands/stop.py b/src/qjena/commands/stop.py index e3c654b0a..d45f96590 100644 --- a/src/qjena/commands/stop.py +++ b/src/qjena/commands/stop.py @@ -1,24 +1,16 @@ from __future__ import annotations from qjena.commands.status import StatusCommand -from qoxigraph.commands import stop as qoxigraph_stop +from qoxigraph.commands.stop import StopCommand as QoxigraphStopCommand -class StopCommand(qoxigraph_stop.StopCommand): +class StopCommand(QoxigraphStopCommand): STATUS_COMMAND = StatusCommand() DEFAULT_REGEX = r".*fuseki-server.*--port\s%%PORT%%.*%%NAME%%.*" def description(self) -> str: return "Stop Jena server for a given dataset or port" - def additional_arguments(self, subparser) -> None: - subparser.add_argument( - "--cmdline-regex", - default=self.DEFAULT_REGEX, - help="Show only processes where the command " - "line matches this regex", - ) - def execute(self, args) -> bool: args.cmdline_regex = args.cmdline_regex.replace( "%%PORT%%", str(args.port) From 1c11dc21f1d5806a3a8b0fe5928de171a1b2786b Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Sun, 16 Mar 2025 16:30:35 +0100 Subject: [PATCH 06/10] Added symlinks for commands that are exactly the same as in qlever and qoxigraph --- src/qjena/commands/example_queries.py | 10 +--------- src/qjena/commands/extract_queries.py | 10 +--------- src/qjena/commands/get_data.py | 10 +--------- src/qjena/commands/log.py | 10 +--------- src/qjena/commands/query.py | 10 +--------- 5 files changed, 5 insertions(+), 45 deletions(-) mode change 100644 => 120000 src/qjena/commands/example_queries.py mode change 100644 => 120000 src/qjena/commands/extract_queries.py mode change 100644 => 120000 src/qjena/commands/get_data.py mode change 100644 => 120000 src/qjena/commands/log.py mode change 100644 => 120000 src/qjena/commands/query.py diff --git a/src/qjena/commands/example_queries.py b/src/qjena/commands/example_queries.py deleted file mode 100644 index 8afac4500..000000000 --- a/src/qjena/commands/example_queries.py +++ /dev/null @@ -1,9 +0,0 @@ -from __future__ import annotations - -from qlever.commands import example_queries - - -class ExampleQueriesCommand(example_queries.ExampleQueriesCommand): - """ - Should behave exactly the same as example-queries command in qlever - """ diff --git a/src/qjena/commands/example_queries.py b/src/qjena/commands/example_queries.py new file mode 120000 index 000000000..82a17b122 --- /dev/null +++ b/src/qjena/commands/example_queries.py @@ -0,0 +1 @@ +../../qlever/commands/example_queries.py \ No newline at end of file diff --git a/src/qjena/commands/extract_queries.py b/src/qjena/commands/extract_queries.py deleted file mode 100644 index f361ff5f1..000000000 --- a/src/qjena/commands/extract_queries.py +++ /dev/null @@ -1,9 +0,0 @@ -from __future__ import annotations - -from qlever.commands import extract_queries - - -class ExtractQueriesCommand(extract_queries.ExtractQueriesCommand): - """ - Should behave exactly the same as extract-queries command in qlever - """ diff --git a/src/qjena/commands/extract_queries.py b/src/qjena/commands/extract_queries.py new file mode 120000 index 000000000..5667cc52a --- /dev/null +++ b/src/qjena/commands/extract_queries.py @@ -0,0 +1 @@ +../../qlever/commands/extract_queries.py \ No newline at end of file diff --git a/src/qjena/commands/get_data.py b/src/qjena/commands/get_data.py deleted file mode 100644 index 0d570233e..000000000 --- a/src/qjena/commands/get_data.py +++ /dev/null @@ -1,9 +0,0 @@ -from __future__ import annotations - -from qlever.commands import get_data - - -class GetDataCommand(get_data.GetDataCommand): - """ - Should behave exactly the same as get-data command in qlever - """ diff --git a/src/qjena/commands/get_data.py b/src/qjena/commands/get_data.py new file mode 120000 index 000000000..4900dbb87 --- /dev/null +++ b/src/qjena/commands/get_data.py @@ -0,0 +1 @@ +../../qlever/commands/get_data.py \ No newline at end of file diff --git a/src/qjena/commands/log.py b/src/qjena/commands/log.py deleted file mode 100644 index 10e1fea85..000000000 --- a/src/qjena/commands/log.py +++ /dev/null @@ -1,9 +0,0 @@ -from __future__ import annotations - -from qlever.commands import log as log_command - - -class LogCommand(log_command.LogCommand): - """ - Should behave exactly the same as log command in qlever - """ diff --git a/src/qjena/commands/log.py b/src/qjena/commands/log.py new file mode 120000 index 000000000..90ffabb45 --- /dev/null +++ b/src/qjena/commands/log.py @@ -0,0 +1 @@ +../../qlever/commands/log.py \ No newline at end of file diff --git a/src/qjena/commands/query.py b/src/qjena/commands/query.py deleted file mode 100644 index 5cac4f60c..000000000 --- a/src/qjena/commands/query.py +++ /dev/null @@ -1,9 +0,0 @@ -from __future__ import annotations - -from qoxigraph.commands import query - - -class QueryCommand(query.QueryCommand): - """ - Should behave exactly the same as query command in qoxigraph - """ diff --git a/src/qjena/commands/query.py b/src/qjena/commands/query.py new file mode 120000 index 000000000..788bdd6d3 --- /dev/null +++ b/src/qjena/commands/query.py @@ -0,0 +1 @@ +../../qoxigraph/commands/query.py \ No newline at end of file From 3b35ebec84fc9c33c7a1f74e9236ff7d9a9d5cf9 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Sat, 22 Mar 2025 16:46:54 +0100 Subject: [PATCH 07/10] Changed default sparql-endpoint for qjena and --- src/qjena/commands/example_queries.py | 16 +++++++++++++++- src/qjena/commands/query.py | 14 +++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) mode change 120000 => 100644 src/qjena/commands/example_queries.py mode change 120000 => 100644 src/qjena/commands/query.py diff --git a/src/qjena/commands/example_queries.py b/src/qjena/commands/example_queries.py deleted file mode 120000 index 82a17b122..000000000 --- a/src/qjena/commands/example_queries.py +++ /dev/null @@ -1 +0,0 @@ -../../qlever/commands/example_queries.py \ No newline at end of file diff --git a/src/qjena/commands/example_queries.py b/src/qjena/commands/example_queries.py new file mode 100644 index 000000000..993089e4d --- /dev/null +++ b/src/qjena/commands/example_queries.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from qlever.commands.example_queries import ( + ExampleQueriesCommand as QleverExampleQueriesCommand, +) + + +class ExampleQueriesCommand(QleverExampleQueriesCommand): + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return {"data": ["name"], "server": ["port"], "ui": ["ui_config"]} + + def execute(self, args) -> bool: + if not args.sparql_endpoint: + args.sparql_endpoint = f"localhost:{args.port}/{args.name}/query" + return super().execute(args) diff --git a/src/qjena/commands/query.py b/src/qjena/commands/query.py deleted file mode 120000 index 788bdd6d3..000000000 --- a/src/qjena/commands/query.py +++ /dev/null @@ -1 +0,0 @@ -../../qoxigraph/commands/query.py \ No newline at end of file diff --git a/src/qjena/commands/query.py b/src/qjena/commands/query.py new file mode 100644 index 000000000..7d4acd293 --- /dev/null +++ b/src/qjena/commands/query.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from qoxigraph.commands.query import QueryCommand as QoxigraphQueryCommand + + +class QueryCommand(QoxigraphQueryCommand): + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return {"data": ["name"], "server": ["port", "access_token"]} + + def execute(self, args) -> bool: + if not args.sparql_endpoint: + args.sparql_endpoint = f"localhost:{args.port}/{args.name}/query" + super().execute(args) From f17ad9100cb27a6081dff48a935df16f1703aad7 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Tue, 1 Apr 2025 23:57:47 +0200 Subject: [PATCH 08/10] Fix description for `start` and `status` commands for qjena --- src/qjena/commands/start.py | 2 +- src/qjena/commands/status.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/qjena/commands/start.py b/src/qjena/commands/start.py index 52200aadc..6aa33d0ca 100644 --- a/src/qjena/commands/start.py +++ b/src/qjena/commands/start.py @@ -15,7 +15,7 @@ def __init__(self): def description(self) -> str: return ( "Start the server for Jena (requires that you have built an " - "index before) (Runs in a container)" + "index before)" ) def should_have_qleverfile(self) -> bool: diff --git a/src/qjena/commands/status.py b/src/qjena/commands/status.py index 72f80b356..e06ee3f82 100644 --- a/src/qjena/commands/status.py +++ b/src/qjena/commands/status.py @@ -5,3 +5,6 @@ class StatusCommand(QoxigraphStatusCommand): DEFAULT_REGEX = "fuseki-server" + + def description(self) -> str: + return "Show Jena fuseki-server processes running on this machine" From 5d966028568638cb90d8c5cda693bd8c9f108183 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Sat, 5 Apr 2025 12:58:43 +0200 Subject: [PATCH 09/10] Added logging to qjena `start` command --- src/qjena/commands/start.py | 62 ++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/src/qjena/commands/start.py b/src/qjena/commands/start.py index 6aa33d0ca..96a94383e 100644 --- a/src/qjena/commands/start.py +++ b/src/qjena/commands/start.py @@ -1,5 +1,7 @@ from __future__ import annotations +import subprocess +import time from pathlib import Path from qlever.command import QleverCommand @@ -115,27 +117,51 @@ def execute(self, args) -> bool: f"To kill the existing server, use `{self.script_name} stop`" ) return False - - # Run the start command. + try: - run_command(start_cmd, show_output=True) - log.info( - f"Jena server webapp for {args.name} will be available at " - f"http://{args.host_name}:{args.port} and the sparql endpoint for " - f"queries is {endpoint_url}" + process = run_command( + start_cmd, + use_popen=args.run_in_foreground, ) - if args.run_in_foreground: - log.info( - "Follow the log as long as the server is" - " running (Ctrl-C stops the server)" - ) - else: - log.info( - f"Follow `{self.script_name} log` until the server is ready" - f" (Ctrl-C stops following the log, but NOT the server)" - ) except Exception as e: - log.error(f"Starting the Jena server failed: {e}") + log.error(f"Starting the Jena server failed ({e})") return False + # Tail the server log until the server is ready (note that the `exec` + # is important to make sure that the tail process is killed and not + # just the bash process). + if args.run_in_foreground: + log.info( + "Follow the server logs as long as the server is" + " running (Ctrl-C stops the server)" + ) + else: + log.info( + "Follow the server logs until the server is ready" + " (Ctrl-C stops following the log, but NOT the server)" + ) + log.info("") + log_cmd = f"exec tail -f {args.name}.server-log.txt" + log_proc = subprocess.Popen(log_cmd, shell=True) + while not is_server_alive(endpoint_url): + time.sleep(1) + + log.info( + f"Jena server webapp for {args.name} will be available at " + f"http://{args.host_name}:{args.port} and the sparql endpoint for " + f"queries is {endpoint_url}" + ) + + # Kill the log process + if not args.run_in_foreground: + log_proc.terminate() + + # With `--run-in-foreground`, wait until the server is stopped. + if args.run_in_foreground: + try: + process.wait() + except KeyboardInterrupt: + process.terminate() + log_proc.terminate() + return True From 083786f77f65712aa2b998d98530a58b87455372 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Wed, 4 Jun 2025 22:34:41 +0200 Subject: [PATCH 10/10] Change localhost -> args.host_name in qjena `example-queries` --- src/qjena/commands/example_queries.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/qjena/commands/example_queries.py b/src/qjena/commands/example_queries.py index 993089e4d..e8f4ab148 100644 --- a/src/qjena/commands/example_queries.py +++ b/src/qjena/commands/example_queries.py @@ -7,9 +7,11 @@ class ExampleQueriesCommand(QleverExampleQueriesCommand): def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: - return {"data": ["name"], "server": ["port"], "ui": ["ui_config"]} + return {"data": ["name"], "server": ["host_name", "port"], "ui": ["ui_config"]} def execute(self, args) -> bool: if not args.sparql_endpoint: - args.sparql_endpoint = f"localhost:{args.port}/{args.name}/query" + args.sparql_endpoint = ( + f"{args.host_name}:{args.port}/{args.name}/query" + ) return super().execute(args)