diff --git a/pyproject.toml b/pyproject.toml index b053fe62d..3b34ae749 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,9 @@ Github = "https://github.com/ad-freiburg/qlever" [project.scripts] "qlever" = "qlever.qlever_main:main" +"qmdb" = "qlever.qlever_main:main" +"qoxigraph" = "qlever.qlever_main:main" +"qlever-old" = "qlever.qlever_old:main" [tool.setuptools] license-files = ["LICENSE"] diff --git a/src/qmdb/Dockerfile b/src/qmdb/Dockerfile new file mode 100644 index 000000000..eb515348a --- /dev/null +++ b/src/qmdb/Dockerfile @@ -0,0 +1,63 @@ +# Clone the MillenniumDB repository as the first step +FROM alpine:3.18 AS clone +WORKDIR /mdb-src + +RUN apk --no-cache add git +RUN git clone --depth 1 https://github.com/MillenniumDB/MillenniumDB.git . +RUN ls -l /mdb-src + +# Build stage +FROM alpine:3.18 AS build +WORKDIR /mdb + +# Install necessary build tools and dependencies +RUN apk --no-cache add cmake \ + make \ + g++ \ + openssl-dev \ + boost1.82-dev \ + ncurses-dev \ + icu-dev + +# Use files from the cloned repository +COPY --from=clone /mdb-src/src src +COPY --from=clone /mdb-src/CMakeLists.txt CMakeLists.txt +COPY --from=clone /mdb-src/third_party/antlr4-runtime-4.13.1 third_party/antlr4-runtime-4.13.1 + +# Build MillenniumDB +RUN cmake -B build -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=./ && \ + cmake --build build -j $(($(getconf _NPROCESSORS_ONLN)-1)) --target install + +COPY --from=clone /mdb-src/browser browser + +# Final minimal stage (to minimize image size) +FROM alpine:3.18 AS final +WORKDIR /data + +# Install runtime dependencies +RUN apk --no-cache add libstdc++ \ + libgcc \ + openssl \ + musl-locales \ + libncursesw \ + less \ + bash \ + icu-libs + +# Copy the binaries and browser from the build stage +COPY --from=build /mdb/build/bin /usr/bin +COPY --from=build /mdb/browser /browser + +# Set ownership to the user passed by UID and GID +ARG UID +ARG GID +RUN if [ "${UID:-}" != "" ] && [ "${GID:-}" != "" ]; then \ + chown -R ${UID}:${GID} /data; \ + fi + +# Expose necessary volumes and environment variables +VOLUME /data +ENV MDB_BROWSER=/browser + +# Default command to run in the container +CMD ["bash"] diff --git a/src/qmdb/__init__.py b/src/qmdb/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/qmdb/commands/__init__.py b/src/qmdb/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/qmdb/commands/example_queries.py b/src/qmdb/commands/example_queries.py new file mode 100644 index 000000000..d612b7095 --- /dev/null +++ b/src/qmdb/commands/example_queries.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from qlever.commands.example_queries import ( + ExampleQueriesCommand as QleverExampleQueriesCommand, +) + + +class ExampleQueriesCommand(QleverExampleQueriesCommand): + def execute(self, args) -> bool: + if not args.sparql_endpoint: + args.sparql_endpoint = f"{args.host_name}:{args.port}/sparql" + return super().execute(args) diff --git a/src/qmdb/commands/extract_queries.py b/src/qmdb/commands/extract_queries.py new file mode 120000 index 000000000..5667cc52a --- /dev/null +++ b/src/qmdb/commands/extract_queries.py @@ -0,0 +1 @@ +../../qlever/commands/extract_queries.py \ No newline at end of file diff --git a/src/qmdb/commands/get_data.py b/src/qmdb/commands/get_data.py new file mode 120000 index 000000000..4900dbb87 --- /dev/null +++ b/src/qmdb/commands/get_data.py @@ -0,0 +1 @@ +../../qlever/commands/get_data.py \ No newline at end of file diff --git a/src/qmdb/commands/index.py b/src/qmdb/commands/index.py new file mode 100644 index 000000000..3581560f2 --- /dev/null +++ b/src/qmdb/commands/index.py @@ -0,0 +1,140 @@ +from __future__ import annotations + +import glob +import shlex +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import binary_exists, run_command + + +class IndexCommand(QleverCommand): + def __init__(self): + self.script_name = "qmdb" + + def description(self) -> str: + return "Build the index for a given RDF dataset" + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name", "format"], + "index": ["input_files"], + "runtime": ["system", "image", "index_container"], + } + + def additional_arguments(self, subparser): + subparser.add_argument( + "--index-binary", + type=str, + default="mdb-import", + help=( + "The binary for building the index (default: mdb-import) " + "(this requires that you have Millennium DB built from source " + "on your machine)" + ), + ) + + @staticmethod + def build_image(build_cmd: str, system: str, image: str) -> bool: + try: + run_command(build_cmd, show_output=True) + return True + except Exception as e: + log.error(f"Building the {system} image {image} failed: {e}") + return False + + @staticmethod + def wrap_cmd_in_container(args, cmd: str) -> str: + return Containerize().containerize_command( + cmd=cmd, + container_system=args.system, + run_subcommand="run --rm", + image_name=args.image, + container_name=args.index_container, + volumes=[("$(pwd)", "/data")], + working_directory="/data", + ) + + def execute(self, args) -> bool: + system = args.system + input_files = args.input_files + + index_cmd = f"{args.index_binary} {input_files} index" + index_cmd += f" | tee {args.name}.index-log.txt" + + if args.system == "native": + cmd_to_show = index_cmd + else: + index_cmd = self.wrap_cmd_in_container(args, index_cmd) + dockerfile_dir = Path(__file__).parent.parent + dockerfile_path = dockerfile_dir / "Dockerfile" + build_cmd = ( + f"{system} build -f {dockerfile_path} -t {args.image} --build-arg " + f"UID=$(id -u) --build-arg GID=$(id -g) {dockerfile_dir}" + ) + image_id = run_command( + f"{system} images -q {args.image}", return_output=True + ) + cmd_to_show = ( + f"{build_cmd}\n\n{index_cmd}" if not image_id else index_cmd + ) + + # Show the command line. + self.show(cmd_to_show, only_show=args.show) + if args.show: + return True + + # Check if all of the input files exist. + for pattern in shlex.split(input_files): + if len(glob.glob(pattern)) == 0: + log.error(f'No file matching "{pattern}" found') + log.info("") + log.info( + f"Did you call `{self.script_name} get-data`? If you did, " + "check GET_DATA_CMD and INPUT_FILES in the Qleverfile" + ) + return False + + # When running natively, check if the binary exists and works. + if args.system == "native": + if not binary_exists(args.index_binary, "index-binary"): + return False + else: + if Containerize().is_running(args.system, args.index_container): + log.info( + f"{args.system} container {args.index_container} is still up, " + "which means that data loading is in progress. Please wait..." + ) + return False + + if not image_id: + build_successful = self.build_image( + build_cmd, system, args.image + ) + if not build_successful: + return False + else: + log.info(f"{args.image} image present on the system\n") + + index_dir = Path("index") + if index_dir.exists() and any(index_dir.iterdir()): + log.error( + "Index files found in index directory " + "which shows presence of a previous index\n" + ) + log.info("Aborting the index operation...") + return False + + # Run the index command. + try: + run_command(index_cmd, show_output=True) + except Exception as e: + log.error(f"Building the index failed: {e}") + return False + + return True diff --git a/src/qmdb/commands/log.py b/src/qmdb/commands/log.py new file mode 120000 index 000000000..90ffabb45 --- /dev/null +++ b/src/qmdb/commands/log.py @@ -0,0 +1 @@ +../../qlever/commands/log.py \ No newline at end of file diff --git a/src/qmdb/commands/query.py b/src/qmdb/commands/query.py new file mode 100644 index 000000000..c775e322d --- /dev/null +++ b/src/qmdb/commands/query.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +from qoxigraph.commands.query import QueryCommand as QoxigraphQueryCommand + + +class QueryCommand(QoxigraphQueryCommand): + def execute(self, args) -> bool: + if not args.sparql_endpoint: + args.sparql_endpoint = f"localhost:{args.port}/sparql" + super().execute(args) diff --git a/src/qmdb/commands/setup_config.py b/src/qmdb/commands/setup_config.py new file mode 100644 index 000000000..f946aaef4 --- /dev/null +++ b/src/qmdb/commands/setup_config.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from qoxigraph.commands.setup_config import ( + SetupConfigCommand as QoxigraphSetupConfigCommand, +) + + +class SetupConfigCommand(QoxigraphSetupConfigCommand): + """ + Should behave exactly the same as setup-config command in qoxigraph, + just with a different Docker image name + """ + + IMAGE = "adfreiburg/qmdb" diff --git a/src/qmdb/commands/start.py b/src/qmdb/commands/start.py new file mode 100644 index 000000000..518521846 --- /dev/null +++ b/src/qmdb/commands/start.py @@ -0,0 +1,165 @@ +from __future__ import annotations + +import subprocess +import time +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import binary_exists, is_server_alive, run_command + + +class StartCommand(QleverCommand): + def __init__(self): + self.script_name = "qmdb" + + def description(self) -> str: + return ( + "Start the server for MillenniumDB (requires that you have built an " + "index before)" + ) + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "server": ["host_name", "port"], + "runtime": ["system", "image", "server_container"], + } + + def additional_arguments(self, subparser): + subparser.add_argument( + "--run-in-foreground", + action="store_true", + default=False, + help=( + "Run the start command in the foreground " + "(default: run in the background)" + ), + ) + subparser.add_argument( + "--server-binary", + type=str, + default="mdb-server", + help=( + "The binary for starting the server (default: mdb-server) " + "(this requires that you have Millennium DB built from source " + "on your machine)" + ), + ) + + @staticmethod + def wrap_cmd_in_container(args, cmd: str) -> str: + run_subcommand = "run --restart=unless-stopped" + if not args.run_in_foreground: + run_subcommand += " -d" + if not args.run_in_foreground: + cmd = f"{cmd} > {args.name}.server-log.txt 2>&1" + return Containerize().containerize_command( + cmd=cmd, + container_system=args.system, + run_subcommand=run_subcommand, + image_name=args.image, + container_name=args.server_container, + volumes=[("$(pwd)", "/data")], + working_directory="/data", + ports=[(args.port, args.port)], + ) + + def execute(self, args) -> bool: + start_cmd = f"{args.server_binary} --port {args.port} index" + + if args.system == "native": + if not args.run_in_foreground: + start_cmd = ( + f"nohup {start_cmd} > {args.name}.server-log.txt 2>&1 &" + ) + else: + start_cmd = self.wrap_cmd_in_container(args, start_cmd) + + # Show the command line. + self.show(start_cmd, only_show=args.show) + if args.show: + return True + + # When running natively, check if the binary exists and works. + if args.system == "native": + if not binary_exists(args.server_binary, "server-binary"): + return False + else: + if Containerize().is_running(args.system, args.server_container): + log.error( + f"Server container {args.server_container} already exists!\n" + ) + log.info( + f"To kill the existing server, use `{self.script_name} stop`" + ) + return False + + index_dir = Path("index") + if not index_dir.exists() or not any(index_dir.iterdir()): + log.info(f"No MillenniumDB index files for {args.name} found! ") + log.info( + f"Did you call `{self.script_name} index`? If you did, check " + "if index files are present in the index directory" + ) + return False + + endpoint_url = f"http://{args.host_name}:{args.port}/sparql" + if is_server_alive(url=endpoint_url): + log.error( + f"MillenniumDB server already running on {endpoint_url}\n" + ) + log.info( + f"To kill the existing server, use `{self.script_name} stop`" + ) + return False + + try: + process = run_command( + start_cmd, + use_popen=args.run_in_foreground, + ) + except Exception as e: + log.error(f"Starting the MillenniumDB server failed ({e})") + return False + + # Tail the server log until the server is ready (note that the `exec` + # is important to make sure that the tail process is killed and not + # just the bash process). + if args.run_in_foreground: + log.info( + "Follow the server logs as long as the server is" + " running (Ctrl-C stops the server)" + ) + else: + log.info( + "Follow the server logs until the server is ready" + " (Ctrl-C stops following the log, but NOT the server)" + ) + log.info("") + log_cmd = f"exec tail -f {args.name}.server-log.txt" + log_proc = subprocess.Popen(log_cmd, shell=True) + while not is_server_alive(endpoint_url): + time.sleep(1) + + log.info( + f"MillenniumDB server sparql endpoint for queries is {endpoint_url}" + ) + + # Kill the log process + if not args.run_in_foreground: + log_proc.terminate() + + # With `--run-in-foreground`, wait until the server is stopped. + if args.run_in_foreground: + try: + process.wait() + except KeyboardInterrupt: + process.terminate() + log_proc.terminate() + + return True diff --git a/src/qmdb/commands/status.py b/src/qmdb/commands/status.py new file mode 100644 index 000000000..02e7a2fa4 --- /dev/null +++ b/src/qmdb/commands/status.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +from qoxigraph.commands.status import StatusCommand as QoxigraphStatusCommand + + +class StatusCommand(QoxigraphStatusCommand): + DEFAULT_REGEX = "mdb-server" + + def description(self) -> str: + return "Show MillenniumDB processes running on this machine" diff --git a/src/qmdb/commands/stop.py b/src/qmdb/commands/stop.py new file mode 100644 index 000000000..916414d89 --- /dev/null +++ b/src/qmdb/commands/stop.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from qmdb.commands.status import StatusCommand +from qoxigraph.commands.stop import StopCommand as QoxigraphStopCommand + + +class StopCommand(QoxigraphStopCommand): + STATUS_COMMAND = StatusCommand() + DEFAULT_REGEX = r"mdb-server.*--port\s%%PORT%%.*" + + def description(self) -> str: + return "Stop MillenniumDB server for a given dataset or port" + + def execute(self, args) -> bool: + args.cmdline_regex = args.cmdline_regex.replace( + "%%PORT%%", str(args.port) + ) + return super().execute(args) diff --git a/src/qoxigraph/__init__.py b/src/qoxigraph/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/qoxigraph/commands/__init__.py b/src/qoxigraph/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/qoxigraph/commands/example_queries.py b/src/qoxigraph/commands/example_queries.py new file mode 100644 index 000000000..4ef76c241 --- /dev/null +++ b/src/qoxigraph/commands/example_queries.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from qlever.commands.example_queries import ( + ExampleQueriesCommand as QleverExampleQueriesCommand, +) + + +class ExampleQueriesCommand(QleverExampleQueriesCommand): + def execute(self, args) -> bool: + if not args.sparql_endpoint: + args.sparql_endpoint = f"{args.host_name}:{args.port}/query" + return super().execute(args) diff --git a/src/qoxigraph/commands/extract_queries.py b/src/qoxigraph/commands/extract_queries.py new file mode 120000 index 000000000..5667cc52a --- /dev/null +++ b/src/qoxigraph/commands/extract_queries.py @@ -0,0 +1 @@ +../../qlever/commands/extract_queries.py \ No newline at end of file diff --git a/src/qoxigraph/commands/get_data.py b/src/qoxigraph/commands/get_data.py new file mode 120000 index 000000000..4900dbb87 --- /dev/null +++ b/src/qoxigraph/commands/get_data.py @@ -0,0 +1 @@ +../../qlever/commands/get_data.py \ No newline at end of file diff --git a/src/qoxigraph/commands/index.py b/src/qoxigraph/commands/index.py new file mode 100644 index 000000000..128b9a826 --- /dev/null +++ b/src/qoxigraph/commands/index.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import glob +import shlex +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import binary_exists, run_command + + +class IndexCommand(QleverCommand): + def __init__(self): + self.script_name = "qoxigraph" + + def description(self) -> str: + return "Build the index for a given RDF dataset" + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name", "format"], + "index": ["input_files"], + "runtime": ["system", "image", "index_container"], + } + + def additional_arguments(self, subparser): + subparser.add_argument( + "--index-binary", + type=str, + default="oxigraph", + help=( + "The binary for building the index (default: oxigraph) " + "(this requires that you have oxigraph-cli installed " + "on your machine)" + ), + ) + + @staticmethod + def wrap_cmd_in_container(args, cmd: str) -> str: + return Containerize().containerize_command( + cmd=cmd, + container_system=args.system, + run_subcommand="run --rm", + image_name=args.image, + container_name=args.index_container, + volumes=[("$(pwd)", "/index")], + working_directory="/index", + use_bash=False, + ) + + def execute(self, args) -> bool: + index_cmd = f"load --location . --file {args.input_files}" + index_cmd += f" |& tee {args.name}.index-log.txt" + + index_cmd = ( + f"{args.index_binary} {index_cmd}" + if args.system == "native" + else self.wrap_cmd_in_container(args, index_cmd) + ) + + # Show the command line. + self.show(index_cmd, only_show=args.show) + if args.show: + return True + + # Check if all of the input files exist. + for pattern in shlex.split(args.input_files): + if len(glob.glob(pattern)) == 0: + log.error(f'No file matching "{pattern}" found') + log.info("") + log.info( + f"Did you call `{self.script_name} get-data`? If you did, " + "check GET_DATA_CMD and INPUT_FILES in the Qleverfile" + ) + return False + + # When running natively, check if the binary exists and works. + if args.system == "native": + if not binary_exists(args.index_binary, "index-binary"): + return False + else: + if Containerize().is_running(args.system, args.index_container): + log.info( + f"{args.system} container {args.index_container} is still up, " + "which means that data loading is in progress. Please wait..." + ) + return False + + if len([p.name for p in Path.cwd().glob("*.sst")]) != 0: + log.error( + "Index files (*.sst) found in current directory " + "which shows presence of a previous index" + ) + log.info("") + log.info("Aborting the index operation...") + return False + + # Run the index command. + try: + run_command(index_cmd, show_output=True, show_stderr=True) + except Exception as e: + log.error(f"Building the index failed: {e}") + return False + + return True diff --git a/src/qoxigraph/commands/log.py b/src/qoxigraph/commands/log.py new file mode 100644 index 000000000..a90d22288 --- /dev/null +++ b/src/qoxigraph/commands/log.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from qlever.commands.log import LogCommand as QleverLogCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import run_command + + +class LogCommand(QleverLogCommand): + def __init__(self): + self.script_name = "qoxigraph" + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "runtime": [ + "system", + "image", + "server_container", + ], + } + + def execute(self, args) -> bool: + if args.system == "native": + return super().execute(args) + + log_cmd = f"{args.system} logs " + + if not args.from_beginning: + log_cmd += f"-n {args.tail_num_lines} " + if not args.no_follow: + log_cmd += "-f " + + log_cmd += args.server_container + + # Show the command line. + self.show(log_cmd, only_show=args.show) + if args.show: + return True + + if not Containerize().is_running(args.system, args.server_container): + log.error(f"No server container {args.server_container} found!\n") + log.info(f"Are you sure you called `{self.script_name} start`?") + return False + + try: + run_command(log_cmd, show_output=True, show_stderr=True) + except Exception as e: + log.error(f"Cannot display container logs - {e}") + return True diff --git a/src/qoxigraph/commands/query.py b/src/qoxigraph/commands/query.py new file mode 100644 index 000000000..6518905f3 --- /dev/null +++ b/src/qoxigraph/commands/query.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from qlever.commands.query import QueryCommand as QleverQueryCommand + + +class QueryCommand(QleverQueryCommand): + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "query", + type=str, + nargs="?", + default="SELECT * WHERE { ?s ?p ?o } LIMIT 10", + help="SPARQL query to send", + ) + subparser.add_argument( + "--predefined-query", + type=str, + choices=self.predefined_queries.keys(), + help="Use a predefined query", + ) + subparser.add_argument( + "--sparql-endpoint", type=str, help="URL of the SPARQL endpoint" + ) + subparser.add_argument( + "--accept", + type=str, + choices=[ + "text/tab-separated-values", + "text/csv", + "application/sparql-results+json", + "application/sparql-results+xml", + ], + default="text/tab-separated-values", + help="Accept header for the SPARQL query", + ) + subparser.add_argument( + "--get", + action="store_true", + default=False, + help="Use GET request instead of POST", + ) + subparser.add_argument( + "--no-time", + action="store_true", + default=False, + help="Do not print the (end-to-end) time taken", + ) + + def execute(self, args) -> bool: + if not args.sparql_endpoint: + args.sparql_endpoint = f"localhost:{args.port}/query" + args.pin_to_cache = None + args.access_token = None + super().execute(args) diff --git a/src/qoxigraph/commands/setup_config.py b/src/qoxigraph/commands/setup_config.py new file mode 100644 index 000000000..b6d9225ba --- /dev/null +++ b/src/qoxigraph/commands/setup_config.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +from configparser import RawConfigParser +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.log import log +from qlever.qleverfile import Qleverfile + + +class SetupConfigCommand(QleverCommand): + IMAGE = "ghcr.io/oxigraph/oxigraph" + + FILTER_CRITERIA = { + "data": [], + "index": ["INPUT_FILES"], + "server": ["PORT"], + "runtime": ["SYSTEM", "IMAGE"], + "ui": ["UI_CONFIG"], + } + + def __init__(self): + self.qleverfiles_path = ( + Path(__file__).parent.parent.parent / "qlever" / "Qleverfiles" + ) + self.qleverfile_names = [ + p.name.split(".")[1] + for p in self.qleverfiles_path.glob("Qleverfile.*") + ] + + def description(self) -> str: + return "Get a pre-configured Qleverfile" + + def should_have_qleverfile(self) -> bool: + return False + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return {} + + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "config_name", + type=str, + choices=self.qleverfile_names, + help="The name of the pre-configured Qleverfile to create", + ) + + def validate_qleverfile_setup( + self, args, qleverfile_path: Path + ) -> bool | None: + # Construct the command line and show it. + setup_config_show = ( + f"Creating Qleverfile for {args.config_name} using " + f"Qleverfile.{args.config_name} file in {self.qleverfiles_path}" + ) + self.show(setup_config_show, only_show=args.show) + if args.show: + return True + + # If there is already a Qleverfile in the current directory, exit. + if qleverfile_path.exists(): + log.error("`Qleverfile` already exists in current directory") + log.info("") + log.info( + "If you want to create a new Qleverfile using " + "`qlever setup-config`, delete the existing Qleverfile " + "first" + ) + return False + return None + + def get_filtered_qleverfile_parser( + self, config_name: str + ) -> RawConfigParser: + qleverfile_config_path = ( + self.qleverfiles_path / f"Qleverfile.{config_name}" + ) + qleverfile_parser = Qleverfile.filter( + qleverfile_config_path, self.FILTER_CRITERIA + ) + if qleverfile_parser.has_section("runtime"): + qleverfile_parser.set("runtime", "IMAGE", self.IMAGE) + return qleverfile_parser + + def execute(self, args) -> bool: + qleverfile_path = Path("Qleverfile") + exit_status = self.validate_qleverfile_setup(args, qleverfile_path) + if exit_status is not None: + return exit_status + + qleverfile_parser = self.get_filtered_qleverfile_parser( + args.config_name + ) + # Copy the Qleverfile to the current directory. + try: + with qleverfile_path.open("w") as f: + qleverfile_parser.write(f) + except Exception as e: + log.error( + f'Could not copy "{qleverfile_path}" to current directory: {e}' + ) + return False + + # If we get here, everything went well. + log.info( + f'Created Qleverfile for config "{args.config_name}"' + f" in current directory" + ) + return True diff --git a/src/qoxigraph/commands/start.py b/src/qoxigraph/commands/start.py new file mode 100644 index 000000000..8a038344d --- /dev/null +++ b/src/qoxigraph/commands/start.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +import subprocess +import time +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import binary_exists, is_server_alive, run_command + + +class StartCommand(QleverCommand): + def __init__(self): + self.script_name = "qoxigraph" + + def description(self) -> str: + return ( + "Start the server for Oxigraph (requires that you have built an " + "index before)" + ) + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "server": ["host_name", "port"], + "runtime": ["system", "image", "server_container"], + } + + def additional_arguments(self, subparser): + subparser.add_argument( + "--run-in-foreground", + action="store_true", + default=False, + help=( + "Run the start command in the foreground " + "(default: run in the background)" + ), + ) + subparser.add_argument( + "--server-binary", + type=str, + default="oxigraph", + help=( + "The binary for starting the server (default: oxigraph) " + "(this requires that you have oxigraph-cli installed " + "on your machine)" + ), + ) + + @staticmethod + def wrap_cmd_in_container(args, cmd: str) -> str: + run_subcommand = "run --restart=unless-stopped" + if not args.run_in_foreground: + run_subcommand += " -d" + return Containerize().containerize_command( + cmd=cmd, + container_system=args.system, + run_subcommand=run_subcommand, + image_name=args.image, + container_name=args.server_container, + volumes=[("$(pwd)", "/index")], + ports=[(args.port, args.port)], + working_directory="/index", + use_bash=False, + ) + + def execute(self, args) -> bool: + bind = ( + f"{args.host_name}:{args.port}" + if args.system == "native" + else f"0.0.0.0:{args.port}" + ) + start_cmd = f"serve-read-only --location . --bind={bind}" + + if args.system == "native": + start_cmd = f"{args.server_binary} {start_cmd}" + if not args.run_in_foreground: + start_cmd = ( + f"nohup {start_cmd} > {args.name}.server-log.txt 2>&1 &" + ) + else: + start_cmd = self.wrap_cmd_in_container(args, start_cmd) + + # Show the command line. + self.show(start_cmd, only_show=args.show) + if args.show: + return True + + endpoint_url = f"http://{args.host_name}:{args.port}/query" + + # When running natively, check if the binary exists and works. + if args.system == "native": + if not binary_exists(args.server_binary, "server-binary"): + return False + else: + if Containerize().is_running(args.system, args.server_container): + log.error( + f"Server container {args.server_container} already exists!\n" + ) + log.info( + f"To kill the existing server, use `{self.script_name} stop`" + ) + return False + + # Check if index files (*.sst) present in cwd + if len([p.name for p in Path.cwd().glob("*.sst")]) == 0: + log.error(f"No Oxigraph index files for {args.name} found!\n") + log.info( + f"Did you call `{self.script_name} index`? If you did, check " + "if .sst index files are present in current working directory." + ) + return False + + if is_server_alive(url=endpoint_url): + log.error(f"Oxigraph server already running on {endpoint_url}\n") + log.info( + f"To kill the existing server, use `{self.script_name} stop`" + ) + return False + + try: + process = run_command( + start_cmd, + use_popen=args.run_in_foreground, + ) + except Exception as e: + log.error(f"Starting the Oxigraph server failed ({e})") + return False + + # Tail the server log until the server is ready (note that the `exec` + # is important to make sure that the tail process is killed and not + # just the bash process). + if args.run_in_foreground: + log.info( + "Follow the server logs as long as the server is" + " running (Ctrl-C stops the server)" + ) + else: + log.info( + "Follow the server logs until the server is ready" + " (Ctrl-C stops following the log, but NOT the server)" + ) + log.info("") + if args.system == "native": + log_cmd = f"exec tail -f {args.name}.server-log.txt" + else: + time.sleep(2) + log_cmd = f"exec {args.system} logs -f {args.server_container}" + log_proc = subprocess.Popen(log_cmd, shell=True) + while not is_server_alive(endpoint_url): + time.sleep(1) + + log.info( + f"Oxigraph server webapp for {args.name} will be available at " + f"http://{args.host_name}:{args.port} and the sparql endpoint for " + f"queries is {endpoint_url} when the server is ready" + ) + + # Kill the log process + if not args.run_in_foreground: + log_proc.terminate() + + # With `--run-in-foreground`, wait until the server is stopped. + if args.run_in_foreground: + try: + process.wait() + except KeyboardInterrupt: + process.terminate() + log_proc.terminate() + + return True diff --git a/src/qoxigraph/commands/status.py b/src/qoxigraph/commands/status.py new file mode 100644 index 000000000..eb2de86cf --- /dev/null +++ b/src/qoxigraph/commands/status.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from qlever.commands.status import StatusCommand as QleverStatusCommand + + +class StatusCommand(QleverStatusCommand): + DEFAULT_REGEX = "oxigraph\\s+serve-read-only" + + def description(self) -> str: + return "Show Oxigraph processes running on this machine" + + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "--cmdline-regex", + default=self.DEFAULT_REGEX, + help=( + "Show only processes where the command line matches this regex" + ), + ) diff --git a/src/qoxigraph/commands/stop.py b/src/qoxigraph/commands/stop.py new file mode 100644 index 000000000..dedd1ff2d --- /dev/null +++ b/src/qoxigraph/commands/stop.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +from qlever.command import QleverCommand +from qlever.commands import stop as qlever_stop +from qlever.log import log +from qlever.util import stop_process_with_regex +from qoxigraph.commands.status import StatusCommand + + +class StopCommand(QleverCommand): + # Override this with StatusCommand from child class for execute + # method to work as intended + STATUS_COMMAND = StatusCommand() + DEFAULT_REGEX = "oxigraph\\s+serve-read-only.*:%%PORT%%" + + def __init__(self): + pass + + def description(self) -> str: + return "Stop Oxigraph server for a given dataset or port" + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "server": ["port"], + "runtime": ["system", "server_container"], + } + + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "--cmdline-regex", + default=self.DEFAULT_REGEX, + help="Show only processes where the command " + "line matches this regex", + ) + + def execute(self, args) -> bool: + cmdline_regex = args.cmdline_regex.replace("%%PORT%%", str(args.port)) + description = ( + f'Checking for processes matching "{cmdline_regex}"' + if args.system == "native" + else f"Checking for container with name {args.server_container}" + ) + + self.show(description, only_show=args.show) + if args.show: + return True + + if args.system == "native": + stop_process_results = stop_process_with_regex(cmdline_regex) + if stop_process_results is None: + return False + if len(stop_process_results) > 0: + return all(stop_process_results) + + # If no matching process found, show a message and the output of the + # status command. + log.error("No matching process found") + args.cmdline_regex = self.STATUS_COMMAND.DEFAULT_REGEX + log.info("") + StatusCommand().execute(args) + return True + + # First check if container is running and if yes, stop and remove it + return qlever_stop.stop_container(args.server_container)