From 1a4373ec9b36286c661aa8d15b0ee66d06331ef6 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Thu, 17 Apr 2025 16:13:29 +0200 Subject: [PATCH 1/7] Added commands to setup Oxigraph Added all the commands necessary to setup SPARQL endpoint for oxigraph natively and containerized --- pyproject.toml | 2 + src/qoxigraph/__init__.py | 0 src/qoxigraph/commands/__init__.py | 0 src/qoxigraph/commands/example_queries.py | 12 ++ src/qoxigraph/commands/extract_queries.py | 1 + src/qoxigraph/commands/get_data.py | 1 + src/qoxigraph/commands/index.py | 109 ++++++++++++++ src/qoxigraph/commands/log.py | 50 +++++++ src/qoxigraph/commands/query.py | 54 +++++++ src/qoxigraph/commands/setup_config.py | 109 ++++++++++++++ src/qoxigraph/commands/start.py | 175 ++++++++++++++++++++++ src/qoxigraph/commands/status.py | 19 +++ src/qoxigraph/commands/stop.py | 71 +++++++++ 13 files changed, 603 insertions(+) create mode 100644 src/qoxigraph/__init__.py create mode 100644 src/qoxigraph/commands/__init__.py create mode 100644 src/qoxigraph/commands/example_queries.py create mode 120000 src/qoxigraph/commands/extract_queries.py create mode 120000 src/qoxigraph/commands/get_data.py create mode 100644 src/qoxigraph/commands/index.py create mode 100644 src/qoxigraph/commands/log.py create mode 100644 src/qoxigraph/commands/query.py create mode 100644 src/qoxigraph/commands/setup_config.py create mode 100644 src/qoxigraph/commands/start.py create mode 100644 src/qoxigraph/commands/status.py create mode 100644 src/qoxigraph/commands/stop.py diff --git a/pyproject.toml b/pyproject.toml index b053fe62d..12c82f470 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,8 @@ Github = "https://github.com/ad-freiburg/qlever" [project.scripts] "qlever" = "qlever.qlever_main:main" +"qoxigraph" = "qlever.qlever_main:main" +"qlever-old" = "qlever.qlever_old:main" [tool.setuptools] license-files = ["LICENSE"] diff --git a/src/qoxigraph/__init__.py b/src/qoxigraph/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/qoxigraph/commands/__init__.py b/src/qoxigraph/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/qoxigraph/commands/example_queries.py b/src/qoxigraph/commands/example_queries.py new file mode 100644 index 000000000..d62982a83 --- /dev/null +++ b/src/qoxigraph/commands/example_queries.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from qlever.commands.example_queries import ( + ExampleQueriesCommand as QleverExampleQueriesCommand, +) + + +class ExampleQueriesCommand(QleverExampleQueriesCommand): + def execute(self, args) -> bool: + if not args.sparql_endpoint: + args.sparql_endpoint = f"localhost:{args.port}/query" + return super().execute(args) diff --git a/src/qoxigraph/commands/extract_queries.py b/src/qoxigraph/commands/extract_queries.py new file mode 120000 index 000000000..5667cc52a --- /dev/null +++ b/src/qoxigraph/commands/extract_queries.py @@ -0,0 +1 @@ +../../qlever/commands/extract_queries.py \ No newline at end of file diff --git a/src/qoxigraph/commands/get_data.py b/src/qoxigraph/commands/get_data.py new file mode 120000 index 000000000..4900dbb87 --- /dev/null +++ b/src/qoxigraph/commands/get_data.py @@ -0,0 +1 @@ +../../qlever/commands/get_data.py \ No newline at end of file diff --git a/src/qoxigraph/commands/index.py b/src/qoxigraph/commands/index.py new file mode 100644 index 000000000..128b9a826 --- /dev/null +++ b/src/qoxigraph/commands/index.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import glob +import shlex +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import binary_exists, run_command + + +class IndexCommand(QleverCommand): + def __init__(self): + self.script_name = "qoxigraph" + + def description(self) -> str: + return "Build the index for a given RDF dataset" + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name", "format"], + "index": ["input_files"], + "runtime": ["system", "image", "index_container"], + } + + def additional_arguments(self, subparser): + subparser.add_argument( + "--index-binary", + type=str, + default="oxigraph", + help=( + "The binary for building the index (default: oxigraph) " + "(this requires that you have oxigraph-cli installed " + "on your machine)" + ), + ) + + @staticmethod + def wrap_cmd_in_container(args, cmd: str) -> str: + return Containerize().containerize_command( + cmd=cmd, + container_system=args.system, + run_subcommand="run --rm", + image_name=args.image, + container_name=args.index_container, + volumes=[("$(pwd)", "/index")], + working_directory="/index", + use_bash=False, + ) + + def execute(self, args) -> bool: + index_cmd = f"load --location . --file {args.input_files}" + index_cmd += f" |& tee {args.name}.index-log.txt" + + index_cmd = ( + f"{args.index_binary} {index_cmd}" + if args.system == "native" + else self.wrap_cmd_in_container(args, index_cmd) + ) + + # Show the command line. + self.show(index_cmd, only_show=args.show) + if args.show: + return True + + # Check if all of the input files exist. + for pattern in shlex.split(args.input_files): + if len(glob.glob(pattern)) == 0: + log.error(f'No file matching "{pattern}" found') + log.info("") + log.info( + f"Did you call `{self.script_name} get-data`? If you did, " + "check GET_DATA_CMD and INPUT_FILES in the Qleverfile" + ) + return False + + # When running natively, check if the binary exists and works. + if args.system == "native": + if not binary_exists(args.index_binary, "index-binary"): + return False + else: + if Containerize().is_running(args.system, args.index_container): + log.info( + f"{args.system} container {args.index_container} is still up, " + "which means that data loading is in progress. Please wait..." + ) + return False + + if len([p.name for p in Path.cwd().glob("*.sst")]) != 0: + log.error( + "Index files (*.sst) found in current directory " + "which shows presence of a previous index" + ) + log.info("") + log.info("Aborting the index operation...") + return False + + # Run the index command. + try: + run_command(index_cmd, show_output=True, show_stderr=True) + except Exception as e: + log.error(f"Building the index failed: {e}") + return False + + return True diff --git a/src/qoxigraph/commands/log.py b/src/qoxigraph/commands/log.py new file mode 100644 index 000000000..a90d22288 --- /dev/null +++ b/src/qoxigraph/commands/log.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from qlever.commands.log import LogCommand as QleverLogCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import run_command + + +class LogCommand(QleverLogCommand): + def __init__(self): + self.script_name = "qoxigraph" + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "runtime": [ + "system", + "image", + "server_container", + ], + } + + def execute(self, args) -> bool: + if args.system == "native": + return super().execute(args) + + log_cmd = f"{args.system} logs " + + if not args.from_beginning: + log_cmd += f"-n {args.tail_num_lines} " + if not args.no_follow: + log_cmd += "-f " + + log_cmd += args.server_container + + # Show the command line. + self.show(log_cmd, only_show=args.show) + if args.show: + return True + + if not Containerize().is_running(args.system, args.server_container): + log.error(f"No server container {args.server_container} found!\n") + log.info(f"Are you sure you called `{self.script_name} start`?") + return False + + try: + run_command(log_cmd, show_output=True, show_stderr=True) + except Exception as e: + log.error(f"Cannot display container logs - {e}") + return True diff --git a/src/qoxigraph/commands/query.py b/src/qoxigraph/commands/query.py new file mode 100644 index 000000000..6518905f3 --- /dev/null +++ b/src/qoxigraph/commands/query.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from qlever.commands.query import QueryCommand as QleverQueryCommand + + +class QueryCommand(QleverQueryCommand): + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "query", + type=str, + nargs="?", + default="SELECT * WHERE { ?s ?p ?o } LIMIT 10", + help="SPARQL query to send", + ) + subparser.add_argument( + "--predefined-query", + type=str, + choices=self.predefined_queries.keys(), + help="Use a predefined query", + ) + subparser.add_argument( + "--sparql-endpoint", type=str, help="URL of the SPARQL endpoint" + ) + subparser.add_argument( + "--accept", + type=str, + choices=[ + "text/tab-separated-values", + "text/csv", + "application/sparql-results+json", + "application/sparql-results+xml", + ], + default="text/tab-separated-values", + help="Accept header for the SPARQL query", + ) + subparser.add_argument( + "--get", + action="store_true", + default=False, + help="Use GET request instead of POST", + ) + subparser.add_argument( + "--no-time", + action="store_true", + default=False, + help="Do not print the (end-to-end) time taken", + ) + + def execute(self, args) -> bool: + if not args.sparql_endpoint: + args.sparql_endpoint = f"localhost:{args.port}/query" + args.pin_to_cache = None + args.access_token = None + super().execute(args) diff --git a/src/qoxigraph/commands/setup_config.py b/src/qoxigraph/commands/setup_config.py new file mode 100644 index 000000000..b6d9225ba --- /dev/null +++ b/src/qoxigraph/commands/setup_config.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +from configparser import RawConfigParser +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.log import log +from qlever.qleverfile import Qleverfile + + +class SetupConfigCommand(QleverCommand): + IMAGE = "ghcr.io/oxigraph/oxigraph" + + FILTER_CRITERIA = { + "data": [], + "index": ["INPUT_FILES"], + "server": ["PORT"], + "runtime": ["SYSTEM", "IMAGE"], + "ui": ["UI_CONFIG"], + } + + def __init__(self): + self.qleverfiles_path = ( + Path(__file__).parent.parent.parent / "qlever" / "Qleverfiles" + ) + self.qleverfile_names = [ + p.name.split(".")[1] + for p in self.qleverfiles_path.glob("Qleverfile.*") + ] + + def description(self) -> str: + return "Get a pre-configured Qleverfile" + + def should_have_qleverfile(self) -> bool: + return False + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return {} + + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "config_name", + type=str, + choices=self.qleverfile_names, + help="The name of the pre-configured Qleverfile to create", + ) + + def validate_qleverfile_setup( + self, args, qleverfile_path: Path + ) -> bool | None: + # Construct the command line and show it. + setup_config_show = ( + f"Creating Qleverfile for {args.config_name} using " + f"Qleverfile.{args.config_name} file in {self.qleverfiles_path}" + ) + self.show(setup_config_show, only_show=args.show) + if args.show: + return True + + # If there is already a Qleverfile in the current directory, exit. + if qleverfile_path.exists(): + log.error("`Qleverfile` already exists in current directory") + log.info("") + log.info( + "If you want to create a new Qleverfile using " + "`qlever setup-config`, delete the existing Qleverfile " + "first" + ) + return False + return None + + def get_filtered_qleverfile_parser( + self, config_name: str + ) -> RawConfigParser: + qleverfile_config_path = ( + self.qleverfiles_path / f"Qleverfile.{config_name}" + ) + qleverfile_parser = Qleverfile.filter( + qleverfile_config_path, self.FILTER_CRITERIA + ) + if qleverfile_parser.has_section("runtime"): + qleverfile_parser.set("runtime", "IMAGE", self.IMAGE) + return qleverfile_parser + + def execute(self, args) -> bool: + qleverfile_path = Path("Qleverfile") + exit_status = self.validate_qleverfile_setup(args, qleverfile_path) + if exit_status is not None: + return exit_status + + qleverfile_parser = self.get_filtered_qleverfile_parser( + args.config_name + ) + # Copy the Qleverfile to the current directory. + try: + with qleverfile_path.open("w") as f: + qleverfile_parser.write(f) + except Exception as e: + log.error( + f'Could not copy "{qleverfile_path}" to current directory: {e}' + ) + return False + + # If we get here, everything went well. + log.info( + f'Created Qleverfile for config "{args.config_name}"' + f" in current directory" + ) + return True diff --git a/src/qoxigraph/commands/start.py b/src/qoxigraph/commands/start.py new file mode 100644 index 000000000..8a038344d --- /dev/null +++ b/src/qoxigraph/commands/start.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +import subprocess +import time +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import binary_exists, is_server_alive, run_command + + +class StartCommand(QleverCommand): + def __init__(self): + self.script_name = "qoxigraph" + + def description(self) -> str: + return ( + "Start the server for Oxigraph (requires that you have built an " + "index before)" + ) + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "server": ["host_name", "port"], + "runtime": ["system", "image", "server_container"], + } + + def additional_arguments(self, subparser): + subparser.add_argument( + "--run-in-foreground", + action="store_true", + default=False, + help=( + "Run the start command in the foreground " + "(default: run in the background)" + ), + ) + subparser.add_argument( + "--server-binary", + type=str, + default="oxigraph", + help=( + "The binary for starting the server (default: oxigraph) " + "(this requires that you have oxigraph-cli installed " + "on your machine)" + ), + ) + + @staticmethod + def wrap_cmd_in_container(args, cmd: str) -> str: + run_subcommand = "run --restart=unless-stopped" + if not args.run_in_foreground: + run_subcommand += " -d" + return Containerize().containerize_command( + cmd=cmd, + container_system=args.system, + run_subcommand=run_subcommand, + image_name=args.image, + container_name=args.server_container, + volumes=[("$(pwd)", "/index")], + ports=[(args.port, args.port)], + working_directory="/index", + use_bash=False, + ) + + def execute(self, args) -> bool: + bind = ( + f"{args.host_name}:{args.port}" + if args.system == "native" + else f"0.0.0.0:{args.port}" + ) + start_cmd = f"serve-read-only --location . --bind={bind}" + + if args.system == "native": + start_cmd = f"{args.server_binary} {start_cmd}" + if not args.run_in_foreground: + start_cmd = ( + f"nohup {start_cmd} > {args.name}.server-log.txt 2>&1 &" + ) + else: + start_cmd = self.wrap_cmd_in_container(args, start_cmd) + + # Show the command line. + self.show(start_cmd, only_show=args.show) + if args.show: + return True + + endpoint_url = f"http://{args.host_name}:{args.port}/query" + + # When running natively, check if the binary exists and works. + if args.system == "native": + if not binary_exists(args.server_binary, "server-binary"): + return False + else: + if Containerize().is_running(args.system, args.server_container): + log.error( + f"Server container {args.server_container} already exists!\n" + ) + log.info( + f"To kill the existing server, use `{self.script_name} stop`" + ) + return False + + # Check if index files (*.sst) present in cwd + if len([p.name for p in Path.cwd().glob("*.sst")]) == 0: + log.error(f"No Oxigraph index files for {args.name} found!\n") + log.info( + f"Did you call `{self.script_name} index`? If you did, check " + "if .sst index files are present in current working directory." + ) + return False + + if is_server_alive(url=endpoint_url): + log.error(f"Oxigraph server already running on {endpoint_url}\n") + log.info( + f"To kill the existing server, use `{self.script_name} stop`" + ) + return False + + try: + process = run_command( + start_cmd, + use_popen=args.run_in_foreground, + ) + except Exception as e: + log.error(f"Starting the Oxigraph server failed ({e})") + return False + + # Tail the server log until the server is ready (note that the `exec` + # is important to make sure that the tail process is killed and not + # just the bash process). + if args.run_in_foreground: + log.info( + "Follow the server logs as long as the server is" + " running (Ctrl-C stops the server)" + ) + else: + log.info( + "Follow the server logs until the server is ready" + " (Ctrl-C stops following the log, but NOT the server)" + ) + log.info("") + if args.system == "native": + log_cmd = f"exec tail -f {args.name}.server-log.txt" + else: + time.sleep(2) + log_cmd = f"exec {args.system} logs -f {args.server_container}" + log_proc = subprocess.Popen(log_cmd, shell=True) + while not is_server_alive(endpoint_url): + time.sleep(1) + + log.info( + f"Oxigraph server webapp for {args.name} will be available at " + f"http://{args.host_name}:{args.port} and the sparql endpoint for " + f"queries is {endpoint_url} when the server is ready" + ) + + # Kill the log process + if not args.run_in_foreground: + log_proc.terminate() + + # With `--run-in-foreground`, wait until the server is stopped. + if args.run_in_foreground: + try: + process.wait() + except KeyboardInterrupt: + process.terminate() + log_proc.terminate() + + return True diff --git a/src/qoxigraph/commands/status.py b/src/qoxigraph/commands/status.py new file mode 100644 index 000000000..eb2de86cf --- /dev/null +++ b/src/qoxigraph/commands/status.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from qlever.commands.status import StatusCommand as QleverStatusCommand + + +class StatusCommand(QleverStatusCommand): + DEFAULT_REGEX = "oxigraph\\s+serve-read-only" + + def description(self) -> str: + return "Show Oxigraph processes running on this machine" + + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "--cmdline-regex", + default=self.DEFAULT_REGEX, + help=( + "Show only processes where the command line matches this regex" + ), + ) diff --git a/src/qoxigraph/commands/stop.py b/src/qoxigraph/commands/stop.py new file mode 100644 index 000000000..ed9c10369 --- /dev/null +++ b/src/qoxigraph/commands/stop.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from qlever.command import QleverCommand +from qlever.commands import stop as qlever_stop +from qlever.log import log +from qoxigraph.commands.status import StatusCommand + + +class StopCommand(QleverCommand): + # Override this with StatusCommand from child class for execute + # method to work as intended + STATUS_COMMAND = StatusCommand() + DEFAULT_REGEX = "oxigraph\\s+serve-read-only.*:%%PORT%%" + + def __init__(self): + pass + + def description(self) -> str: + return "Stop Oxigraph server for a given dataset or port" + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "server": ["port"], + "runtime": ["system", "server_container"], + } + + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "--cmdline-regex", + default=self.DEFAULT_REGEX, + help="Show only processes where the command " + "line matches this regex", + ) + + def execute(self, args) -> bool: + cmdline_regex = args.cmdline_regex.replace("%%PORT%%", str(args.port)) + description = ( + f'Checking for processes matching "{cmdline_regex}"' + if args.system == "native" + else f"Checking for container with name {args.server_container}" + ) + + self.show(description, only_show=args.show) + if args.show: + return True + + if args.system == "native": + stop_process_results = ( + qlever_stop.StopCommand().stop_process_with_regex( + cmdline_regex + ) + ) + if stop_process_results is None: + return False + if len(stop_process_results) > 0: + return all(stop_process_results) + + # If no matching process found, show a message and the output of the + # status command. + log.error("No matching process found") + args.cmdline_regex = self.STATUS_COMMAND.DEFAULT_REGEX + log.info("") + StatusCommand().execute(args) + return True + + # First check if container is running and if yes, stop and remove it + return qlever_stop.stop_container(args.server_container) From 956a59df7e60f71f57315173c36ca81acec13ba8 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Mon, 19 May 2025 23:51:47 +0200 Subject: [PATCH 2/7] Fix host_name bug in example_querie and add util stop_with_regex command to stop --- src/qoxigraph/commands/example_queries.py | 2 +- src/qoxigraph/commands/stop.py | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/qoxigraph/commands/example_queries.py b/src/qoxigraph/commands/example_queries.py index d62982a83..4ef76c241 100644 --- a/src/qoxigraph/commands/example_queries.py +++ b/src/qoxigraph/commands/example_queries.py @@ -8,5 +8,5 @@ class ExampleQueriesCommand(QleverExampleQueriesCommand): def execute(self, args) -> bool: if not args.sparql_endpoint: - args.sparql_endpoint = f"localhost:{args.port}/query" + args.sparql_endpoint = f"{args.host_name}:{args.port}/query" return super().execute(args) diff --git a/src/qoxigraph/commands/stop.py b/src/qoxigraph/commands/stop.py index ed9c10369..dedd1ff2d 100644 --- a/src/qoxigraph/commands/stop.py +++ b/src/qoxigraph/commands/stop.py @@ -3,6 +3,7 @@ from qlever.command import QleverCommand from qlever.commands import stop as qlever_stop from qlever.log import log +from qlever.util import stop_process_with_regex from qoxigraph.commands.status import StatusCommand @@ -49,11 +50,7 @@ def execute(self, args) -> bool: return True if args.system == "native": - stop_process_results = ( - qlever_stop.StopCommand().stop_process_with_regex( - cmdline_regex - ) - ) + stop_process_results = stop_process_with_regex(cmdline_regex) if stop_process_results is None: return False if len(stop_process_results) > 0: From 0008cca2de10bdd4fd49ee787f31f2da4f4c4e37 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Mon, 24 Mar 2025 15:24:53 +0100 Subject: [PATCH 3/7] Added commands to setup sparql-endpoint for qblazegraph natively and containerized --- pyproject.toml | 1 + src/qblazegraph/Dockerfile | 23 +++ src/qblazegraph/__init__.py | 0 src/qblazegraph/blazegraph.properties | 28 ++++ src/qblazegraph/commands/__init__.py | 0 src/qblazegraph/commands/example_queries.py | 18 +++ src/qblazegraph/commands/extract_queries.py | 1 + src/qblazegraph/commands/get_data.py | 1 + src/qblazegraph/commands/index.py | 170 ++++++++++++++++++++ src/qblazegraph/commands/log.py | 1 + src/qblazegraph/commands/query.py | 16 ++ src/qblazegraph/commands/setup_config.py | 45 ++++++ src/qblazegraph/commands/start.py | 166 +++++++++++++++++++ src/qblazegraph/commands/status.py | 7 + src/qblazegraph/commands/stop.py | 18 +++ 15 files changed, 495 insertions(+) create mode 100644 src/qblazegraph/Dockerfile create mode 100644 src/qblazegraph/__init__.py create mode 100644 src/qblazegraph/blazegraph.properties create mode 100644 src/qblazegraph/commands/__init__.py create mode 100644 src/qblazegraph/commands/example_queries.py create mode 120000 src/qblazegraph/commands/extract_queries.py create mode 120000 src/qblazegraph/commands/get_data.py create mode 100644 src/qblazegraph/commands/index.py create mode 120000 src/qblazegraph/commands/log.py create mode 100644 src/qblazegraph/commands/query.py create mode 100644 src/qblazegraph/commands/setup_config.py create mode 100644 src/qblazegraph/commands/start.py create mode 100644 src/qblazegraph/commands/status.py create mode 100644 src/qblazegraph/commands/stop.py diff --git a/pyproject.toml b/pyproject.toml index 12c82f470..e6a32af1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ Github = "https://github.com/ad-freiburg/qlever" [project.scripts] "qlever" = "qlever.qlever_main:main" +"qblazegraph" = "qlever.qlever_main:main" "qoxigraph" = "qlever.qlever_main:main" "qlever-old" = "qlever.qlever_old:main" diff --git a/src/qblazegraph/Dockerfile b/src/qblazegraph/Dockerfile new file mode 100644 index 000000000..75078049d --- /dev/null +++ b/src/qblazegraph/Dockerfile @@ -0,0 +1,23 @@ +# Use an official OpenJDK runtime as a parent image +FROM openjdk:21-jdk-slim + +RUN apt-get update && apt-get install -y wget gzip coreutils curl unzip + +WORKDIR /opt + +# Download and set up Blazegraph +RUN wget https://github.com/blazegraph/database/releases/download/BLAZEGRAPH_2_1_6_RC/blazegraph.jar + +# Set ownership to the user passed by UID and GID +ARG UID +ARG GID +RUN if [ "${UID:-}" != "" ] && [ "${GID:-}" != "" ]; then \ + chown -R ${UID}:${GID} /opt; \ + fi + +RUN chmod u+x blazegraph.jar + +# Expose Blazegraph port +# EXPOSE 9999 + +CMD ["bash"] diff --git a/src/qblazegraph/__init__.py b/src/qblazegraph/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/qblazegraph/blazegraph.properties b/src/qblazegraph/blazegraph.properties new file mode 100644 index 000000000..cad05ff70 --- /dev/null +++ b/src/qblazegraph/blazegraph.properties @@ -0,0 +1,28 @@ +# This configuration turns off incremental inference for load and retract, so +# you must explicitly force these operations if you want to compute the closure +# of the knowledge base. Forcing the closure requires punching through the SAIL +# layer. Of course, if you are not using inference then this configuration is +# just the ticket and is quite fast. + +# set the initial and maximum extent of the journal +com.bigdata.journal.AbstractJournal.initialExtent=209715200 +com.bigdata.journal.AbstractJournal.maximumExtent=209715200 + +# turn off automatic inference in the SAIL +com.bigdata.rdf.sail.truthMaintenance=false + +# don't store justification chains, meaning retraction requires full manual +# re-closure of the database +com.bigdata.rdf.store.AbstractTripleStore.justify=false + +# turn off the statement identifiers feature for provenance +com.bigdata.rdf.store.AbstractTripleStore.statementIdentifiers=false +com.bigdata.rdf.store.AbstractTripleStore.quads=false +# turn off the free text index +com.bigdata.rdf.store.AbstractTripleStore.textIndex=true +com.bigdata.rdf.store.AbstractTripleStore.axiomsClass=com.bigdata.rdf.axioms.NoAxioms +com.bigdata.rdf.store.DataLoader.commit=Incremental +com.bigdata.rdf.store.DataLoader.closure=None + +com.bigdata.journal.AbstractJournal.bufferMode=DiskRW +com.bigdata.journal.AbstractJournal.file=blazegraph.jnl diff --git a/src/qblazegraph/commands/__init__.py b/src/qblazegraph/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/qblazegraph/commands/example_queries.py b/src/qblazegraph/commands/example_queries.py new file mode 100644 index 000000000..a072131e6 --- /dev/null +++ b/src/qblazegraph/commands/example_queries.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from qlever.commands.example_queries import ( + ExampleQueriesCommand as QleverExampleQueriesCommand, +) + + +class ExampleQueriesCommand(QleverExampleQueriesCommand): + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return {"data": ["name"], "server": ["port"], "ui": ["ui_config"]} + + def execute(self, args) -> bool: + if not args.sparql_endpoint: + args.sparql_endpoint = ( + f"localhost:{args.port}/blazegraph/namespace/" + f"{args.name}/sparql" + ) + return super().execute(args) diff --git a/src/qblazegraph/commands/extract_queries.py b/src/qblazegraph/commands/extract_queries.py new file mode 120000 index 000000000..5667cc52a --- /dev/null +++ b/src/qblazegraph/commands/extract_queries.py @@ -0,0 +1 @@ +../../qlever/commands/extract_queries.py \ No newline at end of file diff --git a/src/qblazegraph/commands/get_data.py b/src/qblazegraph/commands/get_data.py new file mode 120000 index 000000000..4900dbb87 --- /dev/null +++ b/src/qblazegraph/commands/get_data.py @@ -0,0 +1 @@ +../../qlever/commands/get_data.py \ No newline at end of file diff --git a/src/qblazegraph/commands/index.py b/src/qblazegraph/commands/index.py new file mode 100644 index 000000000..df0e1fb06 --- /dev/null +++ b/src/qblazegraph/commands/index.py @@ -0,0 +1,170 @@ +from __future__ import annotations + +import glob +import shlex +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import run_command + + +class IndexCommand(QleverCommand): + def __init__(self): + self.script_name = "qblazegraph" + + def description(self) -> str: + return "Build the index for a given RDF dataset" + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name", "format"], + "index": ["input_files"], + "runtime": ["system", "image", "index_container"], + } + + def additional_arguments(self, subparser): + subparser.add_argument( + "--blazegraph-jar", + type=str, + default="blazegraph.jar", + help=( + "Path to blazegraph.jar file (default: blazegraph.jar) " + "(this requires that you have Java installed and blazegraph.jar " + "downloaded on your machine)" + ), + ) + + @staticmethod + def build_image(build_cmd: str, system: str, image: str) -> bool: + try: + run_command(build_cmd, show_output=True) + return True + except Exception as e: + log.error(f"Building the {system} image {image} failed: {e}") + return False + + @staticmethod + def wrap_cmd_in_container(args, cmd: str) -> str: + return Containerize().containerize_command( + cmd=cmd, + container_system=args.system, + run_subcommand="run --rm", + image_name=args.image, + container_name=args.index_container, + volumes=[("$(pwd)", "/opt/index")], + working_directory="/opt/index", + ) + + def execute(self, args) -> bool: + system = args.system + input_files = args.input_files + + jar_path = ( + args.blazegraph_jar + if args.system == "native" + else "/opt/blazegraph.jar" + ) + + index_cmd = ( + f"java -Xmx4g -XX:+UseG1GC -cp {jar_path} " + "com.bigdata.rdf.store.DataLoader -verbose " + f"-namespace {args.name} blazegraph.properties {input_files}" + ) + index_cmd += f" | tee {args.name}.index-log.txt" + + if args.system == "native": + cmd_to_show = index_cmd + else: + index_cmd = self.wrap_cmd_in_container(args, index_cmd) + dockerfile_dir = Path(__file__).parent.parent + dockerfile_path = dockerfile_dir / "Dockerfile" + build_cmd = ( + f"{system} build -f {dockerfile_path} -t {args.image} --build-arg " + f"UID=$(id -u) --build-arg GID=$(id -g) {dockerfile_dir}" + ) + image_id = run_command( + f"{system} images -q {args.image}", return_output=True + ) + cmd_to_show = ( + f"{build_cmd}\n\n{index_cmd}" if not image_id else index_cmd + ) + + # Show the command line. + self.show(cmd_to_show, only_show=args.show) + if args.show: + return True + + # Check if all of the input files exist. + for pattern in shlex.split(input_files): + if len(glob.glob(pattern)) == 0: + log.error(f'No file matching "{pattern}" found') + log.info("") + log.info( + f"Did you call `{self.script_name} get-data`? If you did, " + "check GET_DATA_CMD and INPUT_FILES in the Qleverfile" + ) + return False + + # When running natively, check if the binary exists and works. + if args.system == "native": + try: + run_command("java --help") + except Exception as e: + log.error(f"Java not found on the machine! - {e}") + log.info( + "Blazegraph needs Java to execute the blazegraph.jar file" + ) + return False + if not Path(args.blazegraph_jar).exists(): + jar_link = ( + "https://github.com/blazegraph/database/releases/download/" + "BLAZEGRAPH_2_1_6_RC/blazegraph.jar" + ) + log.error( + "Couldn't find the blazegraph.jar in specified path: " + f"{Path(args.blazegraph_jar).absolute()}\n" + ) + log.info( + "Are you sure you downloaded the blazegraph.jar file? " + f"blazegraph.jar can be downloaded from {jar_link}" + ) + return False + else: + if Containerize().is_running(args.system, args.index_container): + log.info( + f"{args.system} container {args.index_container} is still up, " + "which means that data loading is in progress. Please wait..." + ) + return False + + if not image_id: + build_successful = self.build_image( + build_cmd, system, args.image + ) + if not build_successful: + return False + else: + log.info(f"{args.image} image present on the system\n") + + # index_dir = Path("blazegraph.jnl") + # if index_dir.exists() and any(index_dir.iterdir()): + # log.error( + # "Blazegraph journal blazegraph.jnl found in current working " + # "directory which shows presence of a previous index\n" + # ) + # log.info("Aborting the index operation...") + # return False + + # Run the index command. + try: + run_command(index_cmd, show_output=True) + except Exception as e: + log.error(f"Building the index failed: {e}") + return False + + return True diff --git a/src/qblazegraph/commands/log.py b/src/qblazegraph/commands/log.py new file mode 120000 index 000000000..90ffabb45 --- /dev/null +++ b/src/qblazegraph/commands/log.py @@ -0,0 +1 @@ +../../qlever/commands/log.py \ No newline at end of file diff --git a/src/qblazegraph/commands/query.py b/src/qblazegraph/commands/query.py new file mode 100644 index 000000000..d63c09ce7 --- /dev/null +++ b/src/qblazegraph/commands/query.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from qoxigraph.commands.query import QueryCommand as QoxigraphQueryCommand + + +class QueryCommand(QoxigraphQueryCommand): + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return {"data": ["name"], "server": ["port", "access_token"]} + + def execute(self, args) -> bool: + if not args.sparql_endpoint: + args.sparql_endpoint = ( + f"localhost:{args.port}/blazegraph/namespace/" + f"{args.name}/sparql" + ) + super().execute(args) diff --git a/src/qblazegraph/commands/setup_config.py b/src/qblazegraph/commands/setup_config.py new file mode 100644 index 000000000..2433948a5 --- /dev/null +++ b/src/qblazegraph/commands/setup_config.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import shutil +from pathlib import Path + +from qlever.log import log +from qoxigraph.commands.setup_config import ( + SetupConfigCommand as QoxigraphSetupConfigCommand, +) + + +class SetupConfigCommand(QoxigraphSetupConfigCommand): + """ + Should behave exactly the same as setup-config command in qoxigraph, + just with a different Docker image name + """ + + IMAGE = "adfreiburg/qblazegraph" + + def execute(self, args) -> bool: + if not super().execute(args): + return False + + if args.show: + return True + + properties_file_path = ( + Path(__file__).parent.parent / "blazegraph.properties" + ) + destination = Path("blazegraph.properties") + try: + shutil.copy(properties_file_path, destination) + log.info("Copied blazegraph.properties to current directory!") + return True + except Exception as e: + file_url = ( + "https://github.com/ad-freiburg/qlever-control/tree/main/src/" + "qblazegraph/blazegraph.properties" + ) + log.error( + "Couldn't copy blazegraph.properties file to current working " + f"directory! Error: {e}\n" + ) + log.info(f"Download it manually from {file_url}") + return False diff --git a/src/qblazegraph/commands/start.py b/src/qblazegraph/commands/start.py new file mode 100644 index 000000000..5b12b8508 --- /dev/null +++ b/src/qblazegraph/commands/start.py @@ -0,0 +1,166 @@ +from __future__ import annotations + +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import is_server_alive, run_command + + +class StartCommand(QleverCommand): + def __init__(self): + self.script_name = "qblazegraph" + + def description(self) -> str: + return ( + "Start the server for Blazegraph (requires that you have built an " + "index before)" + ) + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "server": ["host_name", "port"], + "runtime": ["system", "image", "server_container"], + } + + def additional_arguments(self, subparser): + subparser.add_argument( + "--run-in-foreground", + action="store_true", + default=False, + help=( + "Run the start command in the foreground " + "(default: run in the background)" + ), + ) + subparser.add_argument( + "--blazegraph-jar", + type=str, + default="blazegraph.jar", + help=( + "Path to blazegraph.jar file (default: blazegraph.jar) " + "(this requires that you have Java installed and blazegraph.jar " + "downloaded on your machine)" + ), + ) + + @staticmethod + def wrap_cmd_in_container(args, cmd: str) -> str: + run_subcommand = "run --restart=unless-stopped" + if not args.run_in_foreground: + run_subcommand += " -d" + if not args.run_in_foreground: + cmd = f"{cmd} > {args.name}.server-log.txt 2>&1" + return Containerize().containerize_command( + cmd=cmd, + container_system=args.system, + run_subcommand=run_subcommand, + image_name=args.image, + container_name=args.server_container, + volumes=[("$(pwd)", "/opt/index")], + working_directory="/opt/index", + ports=[(args.port, args.port)], + ) + + def execute(self, args) -> bool: + jar_path = ( + args.blazegraph_jar + if args.system == "native" + else "/opt/blazegraph.jar" + ) + start_cmd = ( + f"java -server -Xmx4g -Djetty.port={args.port} -jar {jar_path}" + ) + + if args.system == "native": + if not args.run_in_foreground: + start_cmd = ( + f"nohup {start_cmd} > {args.name}.server-log.txt 2>&1 &" + ) + else: + start_cmd = self.wrap_cmd_in_container(args, start_cmd) + + # Show the command line. + self.show(start_cmd, only_show=args.show) + if args.show: + return True + + # When running natively, check if the binary exists and works. + if args.system == "native": + try: + run_command("java --help") + except Exception as e: + log.error(f"Java not found on the machine! - {e}") + log.info( + "Blazegraph needs Java to execute the blazegraph.jar file" + ) + return False + if not Path(args.blazegraph_jar).exists(): + jar_link = ( + "https://github.com/blazegraph/database/releases/download/" + "BLAZEGRAPH_2_1_6_RC/blazegraph.jar" + ) + log.error( + "Couldn't find the blazegraph.jar in specified path: " + f"{Path(args.blazegraph_jar).absolute()}\n" + ) + log.info( + "Are you sure you downloaded the blazegraph.jar file? " + f"blazegraph.jar can be downloaded from {jar_link}" + ) + return False + else: + if Containerize().is_running(args.system, args.server_container): + log.error( + f"Server container {args.server_container} already exists!\n" + ) + log.info( + f"To kill the existing server, use `{self.script_name} stop`" + ) + return False + + jnl_file = Path("blazegraph.jnl") + if not jnl_file.exists(): + log.info(f"No Blazegraph journal for {args.name} found! ") + log.info( + f"Did you call `{self.script_name} index`? If you did, check " + "if blazegraph.jnl is present in the current working directory" + ) + return False + + endpoint_url = f"http://{args.host_name}:{args.port}/blazegraph" + if is_server_alive(url=endpoint_url): + log.error(f"Blazegraph server already running on {endpoint_url}\n") + log.info( + f"To kill the existing server, use `{self.script_name} stop`" + ) + return False + + # Run the start command. + try: + run_command(start_cmd, show_output=True) + log.info( + f"Blazegraph server webapp for {args.name} will be available at " + f"http://{args.host_name}:{args.port} and the sparql endpoint for " + f"queries is {endpoint_url}/namespace/{args.name}/sparql" + ) + if args.run_in_foreground: + log.info( + "Follow the log as long as the server is" + " running (Ctrl-C stops the server)" + ) + else: + log.info( + f"Follow `{self.script_name} log` until the server is ready" + f" (Ctrl-C stops following the log, but NOT the server)" + ) + except Exception as e: + log.error(f"Starting the Jena server failed: {e}") + return False + + return True diff --git a/src/qblazegraph/commands/status.py b/src/qblazegraph/commands/status.py new file mode 100644 index 000000000..f7fbb8c16 --- /dev/null +++ b/src/qblazegraph/commands/status.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from qoxigraph.commands.status import StatusCommand as QoxigraphStatusCommand + + +class StatusCommand(QoxigraphStatusCommand): + DEFAULT_REGEX = "java\\s+-server" diff --git a/src/qblazegraph/commands/stop.py b/src/qblazegraph/commands/stop.py new file mode 100644 index 000000000..fea9f363e --- /dev/null +++ b/src/qblazegraph/commands/stop.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from qblazegraph.commands.status import StatusCommand +from qoxigraph.commands.stop import StopCommand as QoxigraphStopCommand + + +class StopCommand(QoxigraphStopCommand): + STATUS_COMMAND = StatusCommand() + DEFAULT_REGEX = "java\\s+-server.*=%%PORT%%" + + def description(self) -> str: + return "Stop Blazegraph server for a given dataset or port" + + def execute(self, args) -> bool: + args.cmdline_regex = args.cmdline_regex.replace( + "%%PORT%%", str(args.port) + ) + return super().execute(args) From 0c42803d7b43f2739e528c47b7acba56f356f0d8 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Tue, 25 Mar 2025 22:54:57 +0100 Subject: [PATCH 4/7] Added `--java-heap-gb` arg to qblazegraph index and server --- src/qblazegraph/commands/index.py | 4 ++-- src/qblazegraph/commands/setup_config.py | 30 +++++++++++++++++++++--- src/qblazegraph/commands/start.py | 5 ++-- src/qlever/qleverfile.py | 12 ++++++++++ 4 files changed, 44 insertions(+), 7 deletions(-) diff --git a/src/qblazegraph/commands/index.py b/src/qblazegraph/commands/index.py index df0e1fb06..e8015ea80 100644 --- a/src/qblazegraph/commands/index.py +++ b/src/qblazegraph/commands/index.py @@ -23,7 +23,7 @@ def should_have_qleverfile(self) -> bool: def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: return { "data": ["name", "format"], - "index": ["input_files"], + "index": ["input_files", "java_heap_gb"], "runtime": ["system", "image", "index_container"], } @@ -71,7 +71,7 @@ def execute(self, args) -> bool: ) index_cmd = ( - f"java -Xmx4g -XX:+UseG1GC -cp {jar_path} " + f"java -Xmx{args.java_heap_gb}g -XX:+UseG1GC -cp {jar_path} " "com.bigdata.rdf.store.DataLoader -verbose " f"-namespace {args.name} blazegraph.properties {input_files}" ) diff --git a/src/qblazegraph/commands/setup_config.py b/src/qblazegraph/commands/setup_config.py index 2433948a5..c10c80499 100644 --- a/src/qblazegraph/commands/setup_config.py +++ b/src/qblazegraph/commands/setup_config.py @@ -18,12 +18,36 @@ class SetupConfigCommand(QoxigraphSetupConfigCommand): IMAGE = "adfreiburg/qblazegraph" def execute(self, args) -> bool: - if not super().execute(args): + qleverfile_path = Path("Qleverfile") + exit_status = self.validate_qleverfile_setup(args, qleverfile_path) + if exit_status is not None: + return exit_status + + qleverfile_parser = self.get_filtered_qleverfile_parser( + args.config_name + ) + # Add the java_heap_gb to index and server sections + qleverfile_parser.set("index", "JAVA_HEAP_GB", 6) + qleverfile_parser.set("server", "JAVA_HEAP_GB", 6) + + # Copy the Qleverfile to the current directory. + try: + with qleverfile_path.open("w") as f: + qleverfile_parser.write(f) + except Exception as e: + log.error( + f'Could not copy "{qleverfile_path}" to current directory: {e}' + ) return False - if args.show: - return True + # If we get here, everything went well. + log.info( + f'Created Qleverfile for config "{args.config_name}"' + f" in current directory" + ) + log.info("") + log.info("Fetching blazegraph.properties file...") properties_file_path = ( Path(__file__).parent.parent / "blazegraph.properties" ) diff --git a/src/qblazegraph/commands/start.py b/src/qblazegraph/commands/start.py index 5b12b8508..06640f3ae 100644 --- a/src/qblazegraph/commands/start.py +++ b/src/qblazegraph/commands/start.py @@ -24,7 +24,7 @@ def should_have_qleverfile(self) -> bool: def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: return { "data": ["name"], - "server": ["host_name", "port"], + "server": ["host_name", "port", "java_heap_gb"], "runtime": ["system", "image", "server_container"], } @@ -74,7 +74,8 @@ def execute(self, args) -> bool: else "/opt/blazegraph.jar" ) start_cmd = ( - f"java -server -Xmx4g -Djetty.port={args.port} -jar {jar_path}" + f"java -server -Xmx{args.java_heap_gb}g " + f"-Djetty.port={args.port} -jar {jar_path}" ) if args.system == "native": diff --git a/src/qlever/qleverfile.py b/src/qlever/qleverfile.py index 803b971cb..ff4758749 100644 --- a/src/qlever/qleverfile.py +++ b/src/qlever/qleverfile.py @@ -177,6 +177,18 @@ def arg(*args, **kwargs): help="File with the documents for the text index (one line " "per document, format: `id\tdocument text`)", ) + java_heap_arg = arg( + "--java-heap-gb", + type=int, + default=6, + help=( + "Specify Java heap size to match data size. In most cases, " + "6 will be enough. Also beware of setting heap more than 8 " + "due to garbage collector pressure." + ) + ) + index_args["java_heap_gb"] = java_heap_arg + server_args["java_heap_gb"] = java_heap_arg server_args["server_binary"] = arg( "--server-binary", From b19b5225552ff65a7de0c5ac15f93c7009ad2ace Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Wed, 2 Apr 2025 00:05:42 +0200 Subject: [PATCH 5/7] Fix description and regex for `stop` and `status` commands for qblazegraph --- src/qblazegraph/commands/status.py | 7 ++++++- src/qblazegraph/commands/stop.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/qblazegraph/commands/status.py b/src/qblazegraph/commands/status.py index f7fbb8c16..107b0481d 100644 --- a/src/qblazegraph/commands/status.py +++ b/src/qblazegraph/commands/status.py @@ -4,4 +4,9 @@ class StatusCommand(QoxigraphStatusCommand): - DEFAULT_REGEX = "java\\s+-server" + DEFAULT_REGEX = "java\\s+-server.*blazegraph.jar" + + def description(self) -> str: + return ( + "Show Java processes with blazegraph.jar running on this machine" + ) diff --git a/src/qblazegraph/commands/stop.py b/src/qblazegraph/commands/stop.py index fea9f363e..22253fbaa 100644 --- a/src/qblazegraph/commands/stop.py +++ b/src/qblazegraph/commands/stop.py @@ -6,7 +6,7 @@ class StopCommand(QoxigraphStopCommand): STATUS_COMMAND = StatusCommand() - DEFAULT_REGEX = "java\\s+-server.*=%%PORT%%" + DEFAULT_REGEX = "java\\s+-server.*=%%PORT%%.*blazegraph.jar" def description(self) -> str: return "Stop Blazegraph server for a given dataset or port" From 85378e53da5849e5065c1ea73ee9a1b59a0049ed Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Sat, 5 Apr 2025 13:54:39 +0200 Subject: [PATCH 6/7] Added logging to qblazegraph `start` command --- src/qblazegraph/commands/start.py | 59 ++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/src/qblazegraph/commands/start.py b/src/qblazegraph/commands/start.py index 06640f3ae..6057f3960 100644 --- a/src/qblazegraph/commands/start.py +++ b/src/qblazegraph/commands/start.py @@ -1,5 +1,7 @@ from __future__ import annotations +import subprocess +import time from pathlib import Path from qlever.command import QleverCommand @@ -144,24 +146,49 @@ def execute(self, args) -> bool: # Run the start command. try: - run_command(start_cmd, show_output=True) - log.info( - f"Blazegraph server webapp for {args.name} will be available at " - f"http://{args.host_name}:{args.port} and the sparql endpoint for " - f"queries is {endpoint_url}/namespace/{args.name}/sparql" + process = run_command( + start_cmd, + use_popen=args.run_in_foreground, ) - if args.run_in_foreground: - log.info( - "Follow the log as long as the server is" - " running (Ctrl-C stops the server)" - ) - else: - log.info( - f"Follow `{self.script_name} log` until the server is ready" - f" (Ctrl-C stops following the log, but NOT the server)" - ) except Exception as e: - log.error(f"Starting the Jena server failed: {e}") + log.error(f"Starting the Jena server failed ({e})") return False + # Tail the server log until the server is ready (note that the `exec` + # is important to make sure that the tail process is killed and not + # just the bash process). + if args.run_in_foreground: + log.info( + "Follow the server logs as long as the server is" + " running (Ctrl-C stops the server)" + ) + else: + log.info( + "Follow the server logs until the server is ready" + " (Ctrl-C stops following the log, but NOT the server)" + ) + log.info("") + log_cmd = f"exec tail -f {args.name}.server-log.txt" + log_proc = subprocess.Popen(log_cmd, shell=True) + while not is_server_alive(endpoint_url): + time.sleep(1) + + log.info( + f"Blazegraph server webapp for {args.name} will be available at " + f"http://{args.host_name}:{args.port} and the sparql endpoint for " + f"queries is {endpoint_url}/namespace/{args.name}/sparql" + ) + + # Kill the log process + if not args.run_in_foreground: + log_proc.terminate() + + # With `--run-in-foreground`, wait until the server is stopped. + if args.run_in_foreground: + try: + process.wait() + except KeyboardInterrupt: + process.terminate() + log_proc.terminate() + return True From 13e78665df16e1b56ad2f0b8395867c5880802a8 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Sun, 1 Jun 2025 22:06:13 +0200 Subject: [PATCH 7/7] Change localhost -> args.host_name in `example-queries` --- src/qblazegraph/commands/example_queries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/qblazegraph/commands/example_queries.py b/src/qblazegraph/commands/example_queries.py index a072131e6..eb7e43c58 100644 --- a/src/qblazegraph/commands/example_queries.py +++ b/src/qblazegraph/commands/example_queries.py @@ -12,7 +12,7 @@ def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: def execute(self, args) -> bool: if not args.sparql_endpoint: args.sparql_endpoint = ( - f"localhost:{args.port}/blazegraph/namespace/" + f"{args.host_name}:{args.port}/blazegraph/namespace/" f"{args.name}/sparql" ) return super().execute(args)