From 1a4373ec9b36286c661aa8d15b0ee66d06331ef6 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Thu, 17 Apr 2025 16:13:29 +0200 Subject: [PATCH 1/6] Added commands to setup Oxigraph Added all the commands necessary to setup SPARQL endpoint for oxigraph natively and containerized --- pyproject.toml | 2 + src/qoxigraph/__init__.py | 0 src/qoxigraph/commands/__init__.py | 0 src/qoxigraph/commands/example_queries.py | 12 ++ src/qoxigraph/commands/extract_queries.py | 1 + src/qoxigraph/commands/get_data.py | 1 + src/qoxigraph/commands/index.py | 109 ++++++++++++++ src/qoxigraph/commands/log.py | 50 +++++++ src/qoxigraph/commands/query.py | 54 +++++++ src/qoxigraph/commands/setup_config.py | 109 ++++++++++++++ src/qoxigraph/commands/start.py | 175 ++++++++++++++++++++++ src/qoxigraph/commands/status.py | 19 +++ src/qoxigraph/commands/stop.py | 71 +++++++++ 13 files changed, 603 insertions(+) create mode 100644 src/qoxigraph/__init__.py create mode 100644 src/qoxigraph/commands/__init__.py create mode 100644 src/qoxigraph/commands/example_queries.py create mode 120000 src/qoxigraph/commands/extract_queries.py create mode 120000 src/qoxigraph/commands/get_data.py create mode 100644 src/qoxigraph/commands/index.py create mode 100644 src/qoxigraph/commands/log.py create mode 100644 src/qoxigraph/commands/query.py create mode 100644 src/qoxigraph/commands/setup_config.py create mode 100644 src/qoxigraph/commands/start.py create mode 100644 src/qoxigraph/commands/status.py create mode 100644 src/qoxigraph/commands/stop.py diff --git a/pyproject.toml b/pyproject.toml index b053fe62d..12c82f470 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,8 @@ Github = "https://github.com/ad-freiburg/qlever" [project.scripts] "qlever" = "qlever.qlever_main:main" +"qoxigraph" = "qlever.qlever_main:main" +"qlever-old" = "qlever.qlever_old:main" [tool.setuptools] license-files = ["LICENSE"] diff --git a/src/qoxigraph/__init__.py b/src/qoxigraph/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/qoxigraph/commands/__init__.py b/src/qoxigraph/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/qoxigraph/commands/example_queries.py b/src/qoxigraph/commands/example_queries.py new file mode 100644 index 000000000..d62982a83 --- /dev/null +++ b/src/qoxigraph/commands/example_queries.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from qlever.commands.example_queries import ( + ExampleQueriesCommand as QleverExampleQueriesCommand, +) + + +class ExampleQueriesCommand(QleverExampleQueriesCommand): + def execute(self, args) -> bool: + if not args.sparql_endpoint: + args.sparql_endpoint = f"localhost:{args.port}/query" + return super().execute(args) diff --git a/src/qoxigraph/commands/extract_queries.py b/src/qoxigraph/commands/extract_queries.py new file mode 120000 index 000000000..5667cc52a --- /dev/null +++ b/src/qoxigraph/commands/extract_queries.py @@ -0,0 +1 @@ +../../qlever/commands/extract_queries.py \ No newline at end of file diff --git a/src/qoxigraph/commands/get_data.py b/src/qoxigraph/commands/get_data.py new file mode 120000 index 000000000..4900dbb87 --- /dev/null +++ b/src/qoxigraph/commands/get_data.py @@ -0,0 +1 @@ +../../qlever/commands/get_data.py \ No newline at end of file diff --git a/src/qoxigraph/commands/index.py b/src/qoxigraph/commands/index.py new file mode 100644 index 000000000..128b9a826 --- /dev/null +++ b/src/qoxigraph/commands/index.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import glob +import shlex +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import binary_exists, run_command + + +class IndexCommand(QleverCommand): + def __init__(self): + self.script_name = "qoxigraph" + + def description(self) -> str: + return "Build the index for a given RDF dataset" + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name", "format"], + "index": ["input_files"], + "runtime": ["system", "image", "index_container"], + } + + def additional_arguments(self, subparser): + subparser.add_argument( + "--index-binary", + type=str, + default="oxigraph", + help=( + "The binary for building the index (default: oxigraph) " + "(this requires that you have oxigraph-cli installed " + "on your machine)" + ), + ) + + @staticmethod + def wrap_cmd_in_container(args, cmd: str) -> str: + return Containerize().containerize_command( + cmd=cmd, + container_system=args.system, + run_subcommand="run --rm", + image_name=args.image, + container_name=args.index_container, + volumes=[("$(pwd)", "/index")], + working_directory="/index", + use_bash=False, + ) + + def execute(self, args) -> bool: + index_cmd = f"load --location . --file {args.input_files}" + index_cmd += f" |& tee {args.name}.index-log.txt" + + index_cmd = ( + f"{args.index_binary} {index_cmd}" + if args.system == "native" + else self.wrap_cmd_in_container(args, index_cmd) + ) + + # Show the command line. + self.show(index_cmd, only_show=args.show) + if args.show: + return True + + # Check if all of the input files exist. + for pattern in shlex.split(args.input_files): + if len(glob.glob(pattern)) == 0: + log.error(f'No file matching "{pattern}" found') + log.info("") + log.info( + f"Did you call `{self.script_name} get-data`? If you did, " + "check GET_DATA_CMD and INPUT_FILES in the Qleverfile" + ) + return False + + # When running natively, check if the binary exists and works. + if args.system == "native": + if not binary_exists(args.index_binary, "index-binary"): + return False + else: + if Containerize().is_running(args.system, args.index_container): + log.info( + f"{args.system} container {args.index_container} is still up, " + "which means that data loading is in progress. Please wait..." + ) + return False + + if len([p.name for p in Path.cwd().glob("*.sst")]) != 0: + log.error( + "Index files (*.sst) found in current directory " + "which shows presence of a previous index" + ) + log.info("") + log.info("Aborting the index operation...") + return False + + # Run the index command. + try: + run_command(index_cmd, show_output=True, show_stderr=True) + except Exception as e: + log.error(f"Building the index failed: {e}") + return False + + return True diff --git a/src/qoxigraph/commands/log.py b/src/qoxigraph/commands/log.py new file mode 100644 index 000000000..a90d22288 --- /dev/null +++ b/src/qoxigraph/commands/log.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from qlever.commands.log import LogCommand as QleverLogCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import run_command + + +class LogCommand(QleverLogCommand): + def __init__(self): + self.script_name = "qoxigraph" + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "runtime": [ + "system", + "image", + "server_container", + ], + } + + def execute(self, args) -> bool: + if args.system == "native": + return super().execute(args) + + log_cmd = f"{args.system} logs " + + if not args.from_beginning: + log_cmd += f"-n {args.tail_num_lines} " + if not args.no_follow: + log_cmd += "-f " + + log_cmd += args.server_container + + # Show the command line. + self.show(log_cmd, only_show=args.show) + if args.show: + return True + + if not Containerize().is_running(args.system, args.server_container): + log.error(f"No server container {args.server_container} found!\n") + log.info(f"Are you sure you called `{self.script_name} start`?") + return False + + try: + run_command(log_cmd, show_output=True, show_stderr=True) + except Exception as e: + log.error(f"Cannot display container logs - {e}") + return True diff --git a/src/qoxigraph/commands/query.py b/src/qoxigraph/commands/query.py new file mode 100644 index 000000000..6518905f3 --- /dev/null +++ b/src/qoxigraph/commands/query.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from qlever.commands.query import QueryCommand as QleverQueryCommand + + +class QueryCommand(QleverQueryCommand): + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "query", + type=str, + nargs="?", + default="SELECT * WHERE { ?s ?p ?o } LIMIT 10", + help="SPARQL query to send", + ) + subparser.add_argument( + "--predefined-query", + type=str, + choices=self.predefined_queries.keys(), + help="Use a predefined query", + ) + subparser.add_argument( + "--sparql-endpoint", type=str, help="URL of the SPARQL endpoint" + ) + subparser.add_argument( + "--accept", + type=str, + choices=[ + "text/tab-separated-values", + "text/csv", + "application/sparql-results+json", + "application/sparql-results+xml", + ], + default="text/tab-separated-values", + help="Accept header for the SPARQL query", + ) + subparser.add_argument( + "--get", + action="store_true", + default=False, + help="Use GET request instead of POST", + ) + subparser.add_argument( + "--no-time", + action="store_true", + default=False, + help="Do not print the (end-to-end) time taken", + ) + + def execute(self, args) -> bool: + if not args.sparql_endpoint: + args.sparql_endpoint = f"localhost:{args.port}/query" + args.pin_to_cache = None + args.access_token = None + super().execute(args) diff --git a/src/qoxigraph/commands/setup_config.py b/src/qoxigraph/commands/setup_config.py new file mode 100644 index 000000000..b6d9225ba --- /dev/null +++ b/src/qoxigraph/commands/setup_config.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +from configparser import RawConfigParser +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.log import log +from qlever.qleverfile import Qleverfile + + +class SetupConfigCommand(QleverCommand): + IMAGE = "ghcr.io/oxigraph/oxigraph" + + FILTER_CRITERIA = { + "data": [], + "index": ["INPUT_FILES"], + "server": ["PORT"], + "runtime": ["SYSTEM", "IMAGE"], + "ui": ["UI_CONFIG"], + } + + def __init__(self): + self.qleverfiles_path = ( + Path(__file__).parent.parent.parent / "qlever" / "Qleverfiles" + ) + self.qleverfile_names = [ + p.name.split(".")[1] + for p in self.qleverfiles_path.glob("Qleverfile.*") + ] + + def description(self) -> str: + return "Get a pre-configured Qleverfile" + + def should_have_qleverfile(self) -> bool: + return False + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return {} + + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "config_name", + type=str, + choices=self.qleverfile_names, + help="The name of the pre-configured Qleverfile to create", + ) + + def validate_qleverfile_setup( + self, args, qleverfile_path: Path + ) -> bool | None: + # Construct the command line and show it. + setup_config_show = ( + f"Creating Qleverfile for {args.config_name} using " + f"Qleverfile.{args.config_name} file in {self.qleverfiles_path}" + ) + self.show(setup_config_show, only_show=args.show) + if args.show: + return True + + # If there is already a Qleverfile in the current directory, exit. + if qleverfile_path.exists(): + log.error("`Qleverfile` already exists in current directory") + log.info("") + log.info( + "If you want to create a new Qleverfile using " + "`qlever setup-config`, delete the existing Qleverfile " + "first" + ) + return False + return None + + def get_filtered_qleverfile_parser( + self, config_name: str + ) -> RawConfigParser: + qleverfile_config_path = ( + self.qleverfiles_path / f"Qleverfile.{config_name}" + ) + qleverfile_parser = Qleverfile.filter( + qleverfile_config_path, self.FILTER_CRITERIA + ) + if qleverfile_parser.has_section("runtime"): + qleverfile_parser.set("runtime", "IMAGE", self.IMAGE) + return qleverfile_parser + + def execute(self, args) -> bool: + qleverfile_path = Path("Qleverfile") + exit_status = self.validate_qleverfile_setup(args, qleverfile_path) + if exit_status is not None: + return exit_status + + qleverfile_parser = self.get_filtered_qleverfile_parser( + args.config_name + ) + # Copy the Qleverfile to the current directory. + try: + with qleverfile_path.open("w") as f: + qleverfile_parser.write(f) + except Exception as e: + log.error( + f'Could not copy "{qleverfile_path}" to current directory: {e}' + ) + return False + + # If we get here, everything went well. + log.info( + f'Created Qleverfile for config "{args.config_name}"' + f" in current directory" + ) + return True diff --git a/src/qoxigraph/commands/start.py b/src/qoxigraph/commands/start.py new file mode 100644 index 000000000..8a038344d --- /dev/null +++ b/src/qoxigraph/commands/start.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +import subprocess +import time +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import binary_exists, is_server_alive, run_command + + +class StartCommand(QleverCommand): + def __init__(self): + self.script_name = "qoxigraph" + + def description(self) -> str: + return ( + "Start the server for Oxigraph (requires that you have built an " + "index before)" + ) + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "server": ["host_name", "port"], + "runtime": ["system", "image", "server_container"], + } + + def additional_arguments(self, subparser): + subparser.add_argument( + "--run-in-foreground", + action="store_true", + default=False, + help=( + "Run the start command in the foreground " + "(default: run in the background)" + ), + ) + subparser.add_argument( + "--server-binary", + type=str, + default="oxigraph", + help=( + "The binary for starting the server (default: oxigraph) " + "(this requires that you have oxigraph-cli installed " + "on your machine)" + ), + ) + + @staticmethod + def wrap_cmd_in_container(args, cmd: str) -> str: + run_subcommand = "run --restart=unless-stopped" + if not args.run_in_foreground: + run_subcommand += " -d" + return Containerize().containerize_command( + cmd=cmd, + container_system=args.system, + run_subcommand=run_subcommand, + image_name=args.image, + container_name=args.server_container, + volumes=[("$(pwd)", "/index")], + ports=[(args.port, args.port)], + working_directory="/index", + use_bash=False, + ) + + def execute(self, args) -> bool: + bind = ( + f"{args.host_name}:{args.port}" + if args.system == "native" + else f"0.0.0.0:{args.port}" + ) + start_cmd = f"serve-read-only --location . --bind={bind}" + + if args.system == "native": + start_cmd = f"{args.server_binary} {start_cmd}" + if not args.run_in_foreground: + start_cmd = ( + f"nohup {start_cmd} > {args.name}.server-log.txt 2>&1 &" + ) + else: + start_cmd = self.wrap_cmd_in_container(args, start_cmd) + + # Show the command line. + self.show(start_cmd, only_show=args.show) + if args.show: + return True + + endpoint_url = f"http://{args.host_name}:{args.port}/query" + + # When running natively, check if the binary exists and works. + if args.system == "native": + if not binary_exists(args.server_binary, "server-binary"): + return False + else: + if Containerize().is_running(args.system, args.server_container): + log.error( + f"Server container {args.server_container} already exists!\n" + ) + log.info( + f"To kill the existing server, use `{self.script_name} stop`" + ) + return False + + # Check if index files (*.sst) present in cwd + if len([p.name for p in Path.cwd().glob("*.sst")]) == 0: + log.error(f"No Oxigraph index files for {args.name} found!\n") + log.info( + f"Did you call `{self.script_name} index`? If you did, check " + "if .sst index files are present in current working directory." + ) + return False + + if is_server_alive(url=endpoint_url): + log.error(f"Oxigraph server already running on {endpoint_url}\n") + log.info( + f"To kill the existing server, use `{self.script_name} stop`" + ) + return False + + try: + process = run_command( + start_cmd, + use_popen=args.run_in_foreground, + ) + except Exception as e: + log.error(f"Starting the Oxigraph server failed ({e})") + return False + + # Tail the server log until the server is ready (note that the `exec` + # is important to make sure that the tail process is killed and not + # just the bash process). + if args.run_in_foreground: + log.info( + "Follow the server logs as long as the server is" + " running (Ctrl-C stops the server)" + ) + else: + log.info( + "Follow the server logs until the server is ready" + " (Ctrl-C stops following the log, but NOT the server)" + ) + log.info("") + if args.system == "native": + log_cmd = f"exec tail -f {args.name}.server-log.txt" + else: + time.sleep(2) + log_cmd = f"exec {args.system} logs -f {args.server_container}" + log_proc = subprocess.Popen(log_cmd, shell=True) + while not is_server_alive(endpoint_url): + time.sleep(1) + + log.info( + f"Oxigraph server webapp for {args.name} will be available at " + f"http://{args.host_name}:{args.port} and the sparql endpoint for " + f"queries is {endpoint_url} when the server is ready" + ) + + # Kill the log process + if not args.run_in_foreground: + log_proc.terminate() + + # With `--run-in-foreground`, wait until the server is stopped. + if args.run_in_foreground: + try: + process.wait() + except KeyboardInterrupt: + process.terminate() + log_proc.terminate() + + return True diff --git a/src/qoxigraph/commands/status.py b/src/qoxigraph/commands/status.py new file mode 100644 index 000000000..eb2de86cf --- /dev/null +++ b/src/qoxigraph/commands/status.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from qlever.commands.status import StatusCommand as QleverStatusCommand + + +class StatusCommand(QleverStatusCommand): + DEFAULT_REGEX = "oxigraph\\s+serve-read-only" + + def description(self) -> str: + return "Show Oxigraph processes running on this machine" + + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "--cmdline-regex", + default=self.DEFAULT_REGEX, + help=( + "Show only processes where the command line matches this regex" + ), + ) diff --git a/src/qoxigraph/commands/stop.py b/src/qoxigraph/commands/stop.py new file mode 100644 index 000000000..ed9c10369 --- /dev/null +++ b/src/qoxigraph/commands/stop.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from qlever.command import QleverCommand +from qlever.commands import stop as qlever_stop +from qlever.log import log +from qoxigraph.commands.status import StatusCommand + + +class StopCommand(QleverCommand): + # Override this with StatusCommand from child class for execute + # method to work as intended + STATUS_COMMAND = StatusCommand() + DEFAULT_REGEX = "oxigraph\\s+serve-read-only.*:%%PORT%%" + + def __init__(self): + pass + + def description(self) -> str: + return "Stop Oxigraph server for a given dataset or port" + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "server": ["port"], + "runtime": ["system", "server_container"], + } + + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "--cmdline-regex", + default=self.DEFAULT_REGEX, + help="Show only processes where the command " + "line matches this regex", + ) + + def execute(self, args) -> bool: + cmdline_regex = args.cmdline_regex.replace("%%PORT%%", str(args.port)) + description = ( + f'Checking for processes matching "{cmdline_regex}"' + if args.system == "native" + else f"Checking for container with name {args.server_container}" + ) + + self.show(description, only_show=args.show) + if args.show: + return True + + if args.system == "native": + stop_process_results = ( + qlever_stop.StopCommand().stop_process_with_regex( + cmdline_regex + ) + ) + if stop_process_results is None: + return False + if len(stop_process_results) > 0: + return all(stop_process_results) + + # If no matching process found, show a message and the output of the + # status command. + log.error("No matching process found") + args.cmdline_regex = self.STATUS_COMMAND.DEFAULT_REGEX + log.info("") + StatusCommand().execute(args) + return True + + # First check if container is running and if yes, stop and remove it + return qlever_stop.stop_container(args.server_container) From 956a59df7e60f71f57315173c36ca81acec13ba8 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Mon, 19 May 2025 23:51:47 +0200 Subject: [PATCH 2/6] Fix host_name bug in example_querie and add util stop_with_regex command to stop --- src/qoxigraph/commands/example_queries.py | 2 +- src/qoxigraph/commands/stop.py | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/qoxigraph/commands/example_queries.py b/src/qoxigraph/commands/example_queries.py index d62982a83..4ef76c241 100644 --- a/src/qoxigraph/commands/example_queries.py +++ b/src/qoxigraph/commands/example_queries.py @@ -8,5 +8,5 @@ class ExampleQueriesCommand(QleverExampleQueriesCommand): def execute(self, args) -> bool: if not args.sparql_endpoint: - args.sparql_endpoint = f"localhost:{args.port}/query" + args.sparql_endpoint = f"{args.host_name}:{args.port}/query" return super().execute(args) diff --git a/src/qoxigraph/commands/stop.py b/src/qoxigraph/commands/stop.py index ed9c10369..dedd1ff2d 100644 --- a/src/qoxigraph/commands/stop.py +++ b/src/qoxigraph/commands/stop.py @@ -3,6 +3,7 @@ from qlever.command import QleverCommand from qlever.commands import stop as qlever_stop from qlever.log import log +from qlever.util import stop_process_with_regex from qoxigraph.commands.status import StatusCommand @@ -49,11 +50,7 @@ def execute(self, args) -> bool: return True if args.system == "native": - stop_process_results = ( - qlever_stop.StopCommand().stop_process_with_regex( - cmdline_regex - ) - ) + stop_process_results = stop_process_with_regex(cmdline_regex) if stop_process_results is None: return False if len(stop_process_results) > 0: From 7f36716932bce6a8111baf83942e8bebdbbd6f7b Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Thu, 27 Mar 2025 16:05:58 +0100 Subject: [PATCH 3/6] Added commands to setup sparql-endpoint for qmdb (MillenniumDB) natively and containerized --- pyproject.toml | 1 + src/qmdb/Dockerfile | 63 ++++++++++++ src/qmdb/__init__.py | 0 src/qmdb/commands/__init__.py | 0 src/qmdb/commands/example_queries.py | 12 +++ src/qmdb/commands/extract_queries.py | 1 + src/qmdb/commands/get_data.py | 1 + src/qmdb/commands/index.py | 140 ++++++++++++++++++++++++++ src/qmdb/commands/log.py | 1 + src/qmdb/commands/query.py | 10 ++ src/qmdb/commands/setup_config.py | 14 +++ src/qmdb/commands/start.py | 143 +++++++++++++++++++++++++++ src/qmdb/commands/status.py | 7 ++ src/qmdb/commands/stop.py | 18 ++++ 14 files changed, 411 insertions(+) create mode 100644 src/qmdb/Dockerfile create mode 100644 src/qmdb/__init__.py create mode 100644 src/qmdb/commands/__init__.py create mode 100644 src/qmdb/commands/example_queries.py create mode 120000 src/qmdb/commands/extract_queries.py create mode 120000 src/qmdb/commands/get_data.py create mode 100644 src/qmdb/commands/index.py create mode 120000 src/qmdb/commands/log.py create mode 100644 src/qmdb/commands/query.py create mode 100644 src/qmdb/commands/setup_config.py create mode 100644 src/qmdb/commands/start.py create mode 100644 src/qmdb/commands/status.py create mode 100644 src/qmdb/commands/stop.py diff --git a/pyproject.toml b/pyproject.toml index 12c82f470..3b34ae749 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ Github = "https://github.com/ad-freiburg/qlever" [project.scripts] "qlever" = "qlever.qlever_main:main" +"qmdb" = "qlever.qlever_main:main" "qoxigraph" = "qlever.qlever_main:main" "qlever-old" = "qlever.qlever_old:main" diff --git a/src/qmdb/Dockerfile b/src/qmdb/Dockerfile new file mode 100644 index 000000000..eb515348a --- /dev/null +++ b/src/qmdb/Dockerfile @@ -0,0 +1,63 @@ +# Clone the MillenniumDB repository as the first step +FROM alpine:3.18 AS clone +WORKDIR /mdb-src + +RUN apk --no-cache add git +RUN git clone --depth 1 https://github.com/MillenniumDB/MillenniumDB.git . +RUN ls -l /mdb-src + +# Build stage +FROM alpine:3.18 AS build +WORKDIR /mdb + +# Install necessary build tools and dependencies +RUN apk --no-cache add cmake \ + make \ + g++ \ + openssl-dev \ + boost1.82-dev \ + ncurses-dev \ + icu-dev + +# Use files from the cloned repository +COPY --from=clone /mdb-src/src src +COPY --from=clone /mdb-src/CMakeLists.txt CMakeLists.txt +COPY --from=clone /mdb-src/third_party/antlr4-runtime-4.13.1 third_party/antlr4-runtime-4.13.1 + +# Build MillenniumDB +RUN cmake -B build -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=./ && \ + cmake --build build -j $(($(getconf _NPROCESSORS_ONLN)-1)) --target install + +COPY --from=clone /mdb-src/browser browser + +# Final minimal stage (to minimize image size) +FROM alpine:3.18 AS final +WORKDIR /data + +# Install runtime dependencies +RUN apk --no-cache add libstdc++ \ + libgcc \ + openssl \ + musl-locales \ + libncursesw \ + less \ + bash \ + icu-libs + +# Copy the binaries and browser from the build stage +COPY --from=build /mdb/build/bin /usr/bin +COPY --from=build /mdb/browser /browser + +# Set ownership to the user passed by UID and GID +ARG UID +ARG GID +RUN if [ "${UID:-}" != "" ] && [ "${GID:-}" != "" ]; then \ + chown -R ${UID}:${GID} /data; \ + fi + +# Expose necessary volumes and environment variables +VOLUME /data +ENV MDB_BROWSER=/browser + +# Default command to run in the container +CMD ["bash"] diff --git a/src/qmdb/__init__.py b/src/qmdb/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/qmdb/commands/__init__.py b/src/qmdb/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/qmdb/commands/example_queries.py b/src/qmdb/commands/example_queries.py new file mode 100644 index 000000000..402bbe044 --- /dev/null +++ b/src/qmdb/commands/example_queries.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from qlever.commands.example_queries import ( + ExampleQueriesCommand as QleverExampleQueriesCommand, +) + + +class ExampleQueriesCommand(QleverExampleQueriesCommand): + def execute(self, args) -> bool: + if not args.sparql_endpoint: + args.sparql_endpoint = f"localhost:{args.port}/sparql" + return super().execute(args) diff --git a/src/qmdb/commands/extract_queries.py b/src/qmdb/commands/extract_queries.py new file mode 120000 index 000000000..5667cc52a --- /dev/null +++ b/src/qmdb/commands/extract_queries.py @@ -0,0 +1 @@ +../../qlever/commands/extract_queries.py \ No newline at end of file diff --git a/src/qmdb/commands/get_data.py b/src/qmdb/commands/get_data.py new file mode 120000 index 000000000..4900dbb87 --- /dev/null +++ b/src/qmdb/commands/get_data.py @@ -0,0 +1 @@ +../../qlever/commands/get_data.py \ No newline at end of file diff --git a/src/qmdb/commands/index.py b/src/qmdb/commands/index.py new file mode 100644 index 000000000..3581560f2 --- /dev/null +++ b/src/qmdb/commands/index.py @@ -0,0 +1,140 @@ +from __future__ import annotations + +import glob +import shlex +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import binary_exists, run_command + + +class IndexCommand(QleverCommand): + def __init__(self): + self.script_name = "qmdb" + + def description(self) -> str: + return "Build the index for a given RDF dataset" + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name", "format"], + "index": ["input_files"], + "runtime": ["system", "image", "index_container"], + } + + def additional_arguments(self, subparser): + subparser.add_argument( + "--index-binary", + type=str, + default="mdb-import", + help=( + "The binary for building the index (default: mdb-import) " + "(this requires that you have Millennium DB built from source " + "on your machine)" + ), + ) + + @staticmethod + def build_image(build_cmd: str, system: str, image: str) -> bool: + try: + run_command(build_cmd, show_output=True) + return True + except Exception as e: + log.error(f"Building the {system} image {image} failed: {e}") + return False + + @staticmethod + def wrap_cmd_in_container(args, cmd: str) -> str: + return Containerize().containerize_command( + cmd=cmd, + container_system=args.system, + run_subcommand="run --rm", + image_name=args.image, + container_name=args.index_container, + volumes=[("$(pwd)", "/data")], + working_directory="/data", + ) + + def execute(self, args) -> bool: + system = args.system + input_files = args.input_files + + index_cmd = f"{args.index_binary} {input_files} index" + index_cmd += f" | tee {args.name}.index-log.txt" + + if args.system == "native": + cmd_to_show = index_cmd + else: + index_cmd = self.wrap_cmd_in_container(args, index_cmd) + dockerfile_dir = Path(__file__).parent.parent + dockerfile_path = dockerfile_dir / "Dockerfile" + build_cmd = ( + f"{system} build -f {dockerfile_path} -t {args.image} --build-arg " + f"UID=$(id -u) --build-arg GID=$(id -g) {dockerfile_dir}" + ) + image_id = run_command( + f"{system} images -q {args.image}", return_output=True + ) + cmd_to_show = ( + f"{build_cmd}\n\n{index_cmd}" if not image_id else index_cmd + ) + + # Show the command line. + self.show(cmd_to_show, only_show=args.show) + if args.show: + return True + + # Check if all of the input files exist. + for pattern in shlex.split(input_files): + if len(glob.glob(pattern)) == 0: + log.error(f'No file matching "{pattern}" found') + log.info("") + log.info( + f"Did you call `{self.script_name} get-data`? If you did, " + "check GET_DATA_CMD and INPUT_FILES in the Qleverfile" + ) + return False + + # When running natively, check if the binary exists and works. + if args.system == "native": + if not binary_exists(args.index_binary, "index-binary"): + return False + else: + if Containerize().is_running(args.system, args.index_container): + log.info( + f"{args.system} container {args.index_container} is still up, " + "which means that data loading is in progress. Please wait..." + ) + return False + + if not image_id: + build_successful = self.build_image( + build_cmd, system, args.image + ) + if not build_successful: + return False + else: + log.info(f"{args.image} image present on the system\n") + + index_dir = Path("index") + if index_dir.exists() and any(index_dir.iterdir()): + log.error( + "Index files found in index directory " + "which shows presence of a previous index\n" + ) + log.info("Aborting the index operation...") + return False + + # Run the index command. + try: + run_command(index_cmd, show_output=True) + except Exception as e: + log.error(f"Building the index failed: {e}") + return False + + return True diff --git a/src/qmdb/commands/log.py b/src/qmdb/commands/log.py new file mode 120000 index 000000000..90ffabb45 --- /dev/null +++ b/src/qmdb/commands/log.py @@ -0,0 +1 @@ +../../qlever/commands/log.py \ No newline at end of file diff --git a/src/qmdb/commands/query.py b/src/qmdb/commands/query.py new file mode 100644 index 000000000..c775e322d --- /dev/null +++ b/src/qmdb/commands/query.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +from qoxigraph.commands.query import QueryCommand as QoxigraphQueryCommand + + +class QueryCommand(QoxigraphQueryCommand): + def execute(self, args) -> bool: + if not args.sparql_endpoint: + args.sparql_endpoint = f"localhost:{args.port}/sparql" + super().execute(args) diff --git a/src/qmdb/commands/setup_config.py b/src/qmdb/commands/setup_config.py new file mode 100644 index 000000000..f946aaef4 --- /dev/null +++ b/src/qmdb/commands/setup_config.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from qoxigraph.commands.setup_config import ( + SetupConfigCommand as QoxigraphSetupConfigCommand, +) + + +class SetupConfigCommand(QoxigraphSetupConfigCommand): + """ + Should behave exactly the same as setup-config command in qoxigraph, + just with a different Docker image name + """ + + IMAGE = "adfreiburg/qmdb" diff --git a/src/qmdb/commands/start.py b/src/qmdb/commands/start.py new file mode 100644 index 000000000..21e6badd1 --- /dev/null +++ b/src/qmdb/commands/start.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import binary_exists, is_server_alive, run_command + + +class StartCommand(QleverCommand): + def __init__(self): + self.script_name = "qmdb" + + def description(self) -> str: + return ( + "Start the server for MillenniumDB (requires that you have built an " + "index before)" + ) + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "server": ["host_name", "port"], + "runtime": ["system", "image", "server_container"], + "ui": ["ui_port"], + } + + def additional_arguments(self, subparser): + subparser.add_argument( + "--run-in-foreground", + action="store_true", + default=False, + help=( + "Run the start command in the foreground " + "(default: run in the background)" + ), + ) + subparser.add_argument( + "--server-binary", + type=str, + default="mdb-server", + help=( + "The binary for starting the server (default: mdb-server) " + "(this requires that you have Millennium DB built from source " + "on your machine)" + ), + ) + + @staticmethod + def wrap_cmd_in_container(args, cmd: str) -> str: + run_subcommand = "run --restart=unless-stopped" + if not args.run_in_foreground: + run_subcommand += " -d" + if not args.run_in_foreground: + cmd = f"{cmd} > {args.name}.server-log.txt 2>&1" + return Containerize().containerize_command( + cmd=cmd, + container_system=args.system, + run_subcommand=run_subcommand, + image_name=args.image, + container_name=args.server_container, + volumes=[("$(pwd)", "/data")], + working_directory="/data", + ports=[(args.port, args.port), (args.ui_port, args.ui_port)], + ) + + def execute(self, args) -> bool: + start_cmd = ( + f"{args.server_binary} --port {args.port} " + f"--browser-port {args.ui_port} index" + ) + + if args.system == "native": + if not args.run_in_foreground: + start_cmd = ( + f"nohup {start_cmd} > {args.name}.server-log.txt 2>&1 &" + ) + else: + start_cmd = self.wrap_cmd_in_container(args, start_cmd) + + # Show the command line. + self.show(start_cmd, only_show=args.show) + if args.show: + return True + + # When running natively, check if the binary exists and works. + if args.system == "native": + if not binary_exists(args.server_binary, "server-binary"): + return False + else: + if Containerize().is_running(args.system, args.server_container): + log.error( + f"Server container {args.server_container} already exists!\n" + ) + log.info( + f"To kill the existing server, use `{self.script_name} stop`" + ) + return False + + index_dir = Path("index") + if not index_dir.exists() or not any(index_dir.iterdir()): + log.info(f"No MillenniumDB index files for {args.name} found! ") + log.info( + f"Did you call `{self.script_name} index`? If you did, check " + "if index files are present in the index directory" + ) + return False + + endpoint_url = f"http://{args.host_name}:{args.port}/sparql" + if is_server_alive(url=endpoint_url): + log.error(f"MillenniumDB server already running on {endpoint_url}\n") + log.info( + f"To kill the existing server, use `{self.script_name} stop`" + ) + return False + + # Run the start command. + try: + run_command(start_cmd, show_output=True) + log.info( + f"MillenniumDB server webapp for {args.name} will be available at " + f"http://{args.host_name}:{args.ui_port} and the sparql endpoint for " + f"queries is {endpoint_url}" + ) + if args.run_in_foreground: + log.info( + "Follow the log as long as the server is" + " running (Ctrl-C stops the server)" + ) + else: + log.info( + f"Follow `{self.script_name} log` until the server is ready" + f" (Ctrl-C stops following the log, but NOT the server)" + ) + except Exception as e: + log.error(f"Starting the MillenniumDB server failed: {e}") + return False + + return True diff --git a/src/qmdb/commands/status.py b/src/qmdb/commands/status.py new file mode 100644 index 000000000..63ec029c4 --- /dev/null +++ b/src/qmdb/commands/status.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from qoxigraph.commands.status import StatusCommand as QoxigraphStatusCommand + + +class StatusCommand(QoxigraphStatusCommand): + DEFAULT_REGEX = "mdb-server" diff --git a/src/qmdb/commands/stop.py b/src/qmdb/commands/stop.py new file mode 100644 index 000000000..916414d89 --- /dev/null +++ b/src/qmdb/commands/stop.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from qmdb.commands.status import StatusCommand +from qoxigraph.commands.stop import StopCommand as QoxigraphStopCommand + + +class StopCommand(QoxigraphStopCommand): + STATUS_COMMAND = StatusCommand() + DEFAULT_REGEX = r"mdb-server.*--port\s%%PORT%%.*" + + def description(self) -> str: + return "Stop MillenniumDB server for a given dataset or port" + + def execute(self, args) -> bool: + args.cmdline_regex = args.cmdline_regex.replace( + "%%PORT%%", str(args.port) + ) + return super().execute(args) From d39dd6e04dce520f08dd8b7888f8ea55975aa089 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Wed, 2 Apr 2025 00:07:31 +0200 Subject: [PATCH 4/6] Fix description for qmdb `status` commands --- src/qmdb/commands/status.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/qmdb/commands/status.py b/src/qmdb/commands/status.py index 63ec029c4..02e7a2fa4 100644 --- a/src/qmdb/commands/status.py +++ b/src/qmdb/commands/status.py @@ -5,3 +5,6 @@ class StatusCommand(QoxigraphStatusCommand): DEFAULT_REGEX = "mdb-server" + + def description(self) -> str: + return "Show MillenniumDB processes running on this machine" From 896a8c9010d520b06a513b3f99f2be209506c6ca Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Sat, 5 Apr 2025 14:07:58 +0200 Subject: [PATCH 5/6] Removed ui_port and added logging to qmdb `start` command --- src/qmdb/commands/start.py | 70 +++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 24 deletions(-) diff --git a/src/qmdb/commands/start.py b/src/qmdb/commands/start.py index 21e6badd1..518521846 100644 --- a/src/qmdb/commands/start.py +++ b/src/qmdb/commands/start.py @@ -1,5 +1,7 @@ from __future__ import annotations +import subprocess +import time from pathlib import Path from qlever.command import QleverCommand @@ -26,7 +28,6 @@ def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: "data": ["name"], "server": ["host_name", "port"], "runtime": ["system", "image", "server_container"], - "ui": ["ui_port"], } def additional_arguments(self, subparser): @@ -65,14 +66,11 @@ def wrap_cmd_in_container(args, cmd: str) -> str: container_name=args.server_container, volumes=[("$(pwd)", "/data")], working_directory="/data", - ports=[(args.port, args.port), (args.ui_port, args.ui_port)], + ports=[(args.port, args.port)], ) def execute(self, args) -> bool: - start_cmd = ( - f"{args.server_binary} --port {args.port} " - f"--browser-port {args.ui_port} index" - ) + start_cmd = f"{args.server_binary} --port {args.port} index" if args.system == "native": if not args.run_in_foreground: @@ -112,32 +110,56 @@ def execute(self, args) -> bool: endpoint_url = f"http://{args.host_name}:{args.port}/sparql" if is_server_alive(url=endpoint_url): - log.error(f"MillenniumDB server already running on {endpoint_url}\n") + log.error( + f"MillenniumDB server already running on {endpoint_url}\n" + ) log.info( f"To kill the existing server, use `{self.script_name} stop`" ) return False - # Run the start command. try: - run_command(start_cmd, show_output=True) - log.info( - f"MillenniumDB server webapp for {args.name} will be available at " - f"http://{args.host_name}:{args.ui_port} and the sparql endpoint for " - f"queries is {endpoint_url}" + process = run_command( + start_cmd, + use_popen=args.run_in_foreground, ) - if args.run_in_foreground: - log.info( - "Follow the log as long as the server is" - " running (Ctrl-C stops the server)" - ) - else: - log.info( - f"Follow `{self.script_name} log` until the server is ready" - f" (Ctrl-C stops following the log, but NOT the server)" - ) except Exception as e: - log.error(f"Starting the MillenniumDB server failed: {e}") + log.error(f"Starting the MillenniumDB server failed ({e})") return False + # Tail the server log until the server is ready (note that the `exec` + # is important to make sure that the tail process is killed and not + # just the bash process). + if args.run_in_foreground: + log.info( + "Follow the server logs as long as the server is" + " running (Ctrl-C stops the server)" + ) + else: + log.info( + "Follow the server logs until the server is ready" + " (Ctrl-C stops following the log, but NOT the server)" + ) + log.info("") + log_cmd = f"exec tail -f {args.name}.server-log.txt" + log_proc = subprocess.Popen(log_cmd, shell=True) + while not is_server_alive(endpoint_url): + time.sleep(1) + + log.info( + f"MillenniumDB server sparql endpoint for queries is {endpoint_url}" + ) + + # Kill the log process + if not args.run_in_foreground: + log_proc.terminate() + + # With `--run-in-foreground`, wait until the server is stopped. + if args.run_in_foreground: + try: + process.wait() + except KeyboardInterrupt: + process.terminate() + log_proc.terminate() + return True From 72412290ee8466f8b431b834b12a9bf9bce622b2 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Fri, 23 May 2025 12:54:04 +0200 Subject: [PATCH 6/6] Change localhost -> args.host_name in qmdb `example-queries` --- src/qmdb/commands/example_queries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/qmdb/commands/example_queries.py b/src/qmdb/commands/example_queries.py index 402bbe044..d612b7095 100644 --- a/src/qmdb/commands/example_queries.py +++ b/src/qmdb/commands/example_queries.py @@ -8,5 +8,5 @@ class ExampleQueriesCommand(QleverExampleQueriesCommand): def execute(self, args) -> bool: if not args.sparql_endpoint: - args.sparql_endpoint = f"localhost:{args.port}/sparql" + args.sparql_endpoint = f"{args.host_name}:{args.port}/sparql" return super().execute(args)