diff --git a/src/qlever/commands/index.py b/src/qlever/commands/index.py index b18bb02d..d47f616f 100644 --- a/src/qlever/commands/index.py +++ b/src/qlever/commands/index.py @@ -12,6 +12,7 @@ binary_exists, get_existing_index_files, get_total_file_size, + input_files_exist, run_command, ) @@ -282,15 +283,8 @@ def execute(self, args) -> bool: return False # Check if all of the input files exist. - for pattern in shlex.split(args.input_files): - if len(glob.glob(pattern)) == 0: - log.error(f'No file matching "{pattern}" found') - log.info("") - log.info( - "Did you call `qlever get-data`? If you did, check " - "GET_DATA_CMD and INPUT_FILES in the QLeverfile" - ) - return False + if not input_files_exist(args.input_files): + return False # Check if index files (name.index.*) already exist. existing_index_files = get_existing_index_files(args.name) diff --git a/src/qlever/commands/index_stats.py b/src/qlever/commands/index_stats.py index d1c96b76..8b435d41 100644 --- a/src/qlever/commands/index_stats.py +++ b/src/qlever/commands/index_stats.py @@ -381,7 +381,7 @@ def execute(self, args) -> bool: # missing timestamps (duration is None). for heading, (duration, time_unit) in durations.items(): if duration is not None: - if heading == "TOTAL time": + if heading == "TOTAL time" and len(durations) != 1: log.info("") log.info( f"{heading:<25} : {duration:>6.1f} {time_unit}" @@ -400,7 +400,7 @@ def execute(self, args) -> bool: sizes = self.execute_space(args) # Display the disk space used by each group of index files. for heading, (size, size_unit) in sizes.items(): - if heading == "TOTAL size": + if heading == "TOTAL size" and len(sizes) != 1: log.info("") if size_unit == "B": log.info(f"{heading:<25} : {size:,} {size_unit}") diff --git a/src/qlever/commands/setup_config.py b/src/qlever/commands/setup_config.py index 1a53d604..b2411ac9 100644 --- a/src/qlever/commands/setup_config.py +++ b/src/qlever/commands/setup_config.py @@ -4,9 +4,9 @@ from os import environ from pathlib import Path +import qlever.util as util from qlever.command import QleverCommand from qlever.log import log -from qlever.util import get_random_string class SetupConfigCommand(QleverCommand): @@ -15,9 +15,19 @@ class SetupConfigCommand(QleverCommand): """ def __init__(self): - self.qleverfiles_path = Path(__file__).parent.parent / "Qleverfiles" + self.qleverfiles_path = ( + Path(__file__).parent.parent.parent / "qlever/Qleverfiles" + ) self.qleverfile_names = [ - p.name.split(".")[1] for p in self.qleverfiles_path.glob("Qleverfile.*") + p.name.split(".")[1] + for p in self.qleverfiles_path.glob("Qleverfile.*") + ] + # Arguments that can be overridden when generating a Qleverfile, + # as (section, arg_name) pairs. + self.override_args = [ + ("server", "port"), + ("server", "timeout"), + ("runtime", "system"), ] def description(self) -> str: @@ -27,7 +37,10 @@ def should_have_qleverfile(self) -> bool: return False def relevant_qleverfile_arguments(self) -> dict[str, list[str]]: - return {} + result = {} + for section, arg_name in self.override_args: + result.setdefault(section, []).append(arg_name) + return result def additional_arguments(self, subparser) -> None: subparser.add_argument( @@ -37,9 +50,24 @@ def additional_arguments(self, subparser) -> None: help="The name of the pre-configured Qleverfile to create", ) + def check_qleverfile_exists(self) -> bool: + """Return True if a Qleverfile already exists (and log an error).""" + if Path("Qleverfile").exists(): + log.error("`Qleverfile` already exists in current directory") + log.info("") + log.info( + "If you want to create a new Qleverfile using " + "`qlever setup-config`, delete the existing Qleverfile " + "first" + ) + return True + return False + def execute(self, args) -> bool: # Show a warning if `QLEVER_OVERRIDE_SYSTEM_NATIVE` is set. - qlever_is_running_in_container = environ.get("QLEVER_IS_RUNNING_IN_CONTAINER") + qlever_is_running_in_container = environ.get( + "QLEVER_IS_RUNNING_IN_CONTAINER" + ) if qlever_is_running_in_container: log.warning( "The environment variable `QLEVER_IS_RUNNING_IN_CONTAINER` is set, " @@ -48,30 +76,33 @@ def execute(self, args) -> bool: ) log.info("") # Construct the command line and show it. - qleverfile_path = self.qleverfiles_path / f"Qleverfile.{args.config_name}" - setup_config_cmd = ( - f"cat {qleverfile_path}" - f" | sed -E 's/(^ACCESS_TOKEN.*)/\\1_{get_random_string(12)}/'" + qleverfile_path = ( + self.qleverfiles_path / f"Qleverfile.{args.config_name}" ) + setup_config_cmd = f"cat {qleverfile_path} | { + util.get_ini_sed_cmd( + 'server', 'ACCESS_TOKEN', util.get_random_string(12), True + ) + }" if qlever_is_running_in_container: setup_config_cmd += ( - " | sed -E 's/(^SYSTEM[[:space:]]*=[[:space:]]*).*/\\1native/'" + f" | {util.get_ini_sed_cmd('runtime', 'SYSTEM', 'native')}" ) + else: + for section, arg_name in self.override_args: + if arg_value := getattr(args, arg_name, None): + setup_config_cmd += f" | { + util.get_ini_sed_cmd( + section, arg_name.upper(), arg_value + ) + }" + setup_config_cmd += "> Qleverfile" self.show(setup_config_cmd, only_show=args.show) if args.show: return True - # If there is already a Qleverfile in the current directory, exit. - qleverfile_path = Path("Qleverfile") - if qleverfile_path.exists(): - log.error("`Qleverfile` already exists in current directory") - log.info("") - log.info( - "If you want to create a new Qleverfile using " - "`qlever setup-config`, delete the existing Qleverfile " - "first" - ) + if self.check_qleverfile_exists(): return False # Copy the Qleverfile to the current directory. @@ -85,11 +116,10 @@ def execute(self, args) -> bool: ) except Exception as e: log.error( - f'Could not copy "{qleverfile_path}"' f" to current directory: {e}" + f'Could not copy "{qleverfile_path}" to current directory: {e}' ) return False - # If we get here, everything went well. log.info( f'Created Qleverfile for config "{args.config_name}"' f" in current directory" diff --git a/src/qlever/config.py b/src/qlever/config.py index a6c7e9e0..f877512a 100644 --- a/src/qlever/config.py +++ b/src/qlever/config.py @@ -81,6 +81,10 @@ def argument_error(prefix): f"`{section}` not found") args, kwargs = all_qleverfile_args[section][arg_name] kwargs_copy = kwargs.copy() + action_type = kwargs_copy.get("action", "store") + if action_type == "store" and "metavar" not in kwargs_copy: + metavar = arg_name.upper() + kwargs_copy["metavar"] = f"(in Qleverfile: [{section}] {metavar})" # If `qleverfile_config` is given, add info about default # values to the help string. if qleverfile_config is not None: diff --git a/src/qlever/qleverfile.py b/src/qlever/qleverfile.py index 83ed8794..392a3658 100644 --- a/src/qlever/qleverfile.py +++ b/src/qlever/qleverfile.py @@ -4,6 +4,7 @@ import socket import subprocess from configparser import ConfigParser, ExtendedInterpolation, RawConfigParser +from importlib import import_module from pathlib import Path from qlever import script_name @@ -415,6 +416,14 @@ def arg(*args, **kwargs): help="The name of the container used for `qlever ui`", ) + engine_args_module_path = f"{script_name}.qleverfile" + try: + if script_name != "qlever": + module = import_module(engine_args_module_path) + module.qleverfile_args(all_args) + except (ImportError, AttributeError) as e: + log.debug(f"Could not import module {engine_args_module_path}: {e}") + return all_args @staticmethod diff --git a/src/qlever/util.py b/src/qlever/util.py index 94de6f92..848549bd 100644 --- a/src/qlever/util.py +++ b/src/qlever/util.py @@ -1,6 +1,8 @@ from __future__ import annotations +import argparse import errno +import glob import re import secrets import shlex @@ -15,20 +17,26 @@ import psutil +from qlever import script_name from qlever.log import log -def get_total_file_size(patterns: list[str]) -> int: +def get_total_file_size( + patterns: list[str], exclude: set[str] | None = None +) -> int: """ - Helper function that gets the total size of all files mathing the given - patterns in bytes. + Helper function that gets the total size of all files matching the given + patterns in bytes. Files whose names match any entry in `exclude` are + skipped. """ - + if not exclude: + exclude = set() total_size = 0 search_dir = Path.cwd() for pattern in patterns: for file in search_dir.glob(pattern): - total_size += file.stat().st_size + if file.name not in exclude: + total_size += file.stat().st_size return total_size @@ -88,7 +96,7 @@ def run_curl_command( url: str, headers: dict[str, str] = {}, params: dict[str, str] = {}, - result_file: Optional[str] = None, + result_file: str | None = None, max_time: int | None = None, ) -> str: """ @@ -300,7 +308,11 @@ def stop_process_with_regex(cmdline_regex: str) -> list[bool] | None: "cmdline", ] ) - cmdline = " ".join(pinfo["cmdline"]) + cmdline = ( + " ".join(pinfo["cmdline"]) + if isinstance(pinfo["cmdline"], list) + else "" + ) except Exception as e: # For some processes (e.g., zombies), getting info may fail. log.debug(f"Error getting process info: {e}") @@ -324,7 +336,7 @@ def binary_exists(binary: str, cmd_arg: str, args) -> bool: is_containerized = args.system in Containerize.supported_systems() cmd = f"{binary} --help" - if is_containerized: + if is_containerized and script_name == "qlever": cmd = Containerize().containerize_command( cmd, args.system, @@ -372,6 +384,117 @@ def is_server_alive(url: str) -> bool: return False +def input_files_exist(input_files: str) -> bool: + """ + Check if all of the input files exist in current working directory. + """ + for pattern in shlex.split(input_files): + if len(glob.glob(pattern)) == 0: + log.error(f'No file matching "{pattern}" found') + log.info("") + log.info( + f"Did you call `{script_name} get-data`? If you did, " + "check GET_DATA_CMD and INPUT_FILES in the Qleverfile" + ) + return False + return True + + +def build_image(build_cmd: str, system: str, image: str) -> bool: + """ + Build a container image using the build command, container system and + image name. This method is supposed to be used before executing the index + command and the logs show that. + """ + log.info(f"Building {system} image {image}...") + try: + run_command(build_cmd, show_output=True, show_stderr=True) + log.info( + f"Finished building {system} image {image}! " + "Continuing with index operation...\n" + ) + return True + except Exception as e: + log.error(f"Building the {system} image {image} failed: {e}") + return False + + +def get_container_image_id(system: str, image: str) -> str: + """ + Get the container image ID to check if the image exists on the system. + """ + try: + image_id = run_command( + f"{system} images -q {image}", return_output=True + ) + except Exception as e: + log.info( + f"Couldn't identify if {system} image {image} " + f"exists on the system : {e}" + ) + log.info( + "Assuming that the image doesn't exist and the image would " + "be built.\n" + ) + image_id = "" + return image_id + + +def get_ini_sed_cmd( + section: str, option: str, new_value: str, is_suffix: bool = False +) -> str: + """ + Generates a cross-platform sed command to update the value of a + key = value pair or append to one (by using is_suffix = True) in an INI file. + """ + if is_suffix: + pattern = f"s/(^{option}.*)/\\1{new_value}/" + else: + pattern = f"s/(^{option}[[:space:]]*=[[:space:]]*).*/\\1{new_value}/" + return f"sed -E '/^\\[{section}\\]/,/^\\[/ {pattern}'" + + +def parse_memory(value: str) -> str: + """ + Validate memory size string like '4G'. + Returns the string unchanged if valid, raises argparse.ArgumentTypeError otherwise. + """ + if not re.match(r"^\d+[G]$", value, re.IGNORECASE): + raise argparse.ArgumentTypeError( + f"Invalid memory size '{value}'. Use format like 4G, 32G." + ) + return value.upper() + + +def add_memory_options(subparser, index=True, server=True): + """ + Add total memory-related options to a subparser for setup-config command. + """ + if index: + subparser.add_argument( + "--total-index-memory", + type=parse_memory, + default="4G", + help=( + "Maximum memory budget for indexing. All relevant [index] " + "options in the Qleverfile will be auto-generated with sensible " + "defaults that together stay within this limit. " + ), + ) + + if server: + subparser.add_argument( + "--total-server-memory", + type=parse_memory, + default="4G", + help=( + "Maximum memory budget for the server. All relevant [server] " + "options in the Qleverfile will be auto-generated with sensible " + "defaults that together stay within this limit. " + ), + ) + + def tail_log_file( log_file: Path, max_wait_seconds: int = 30, diff --git a/test/qlever/commands/test_index_execute.py b/test/qlever/commands/test_index_execute.py index cff13ab9..e97a7740 100644 --- a/test/qlever/commands/test_index_execute.py +++ b/test/qlever/commands/test_index_execute.py @@ -14,10 +14,10 @@ class TestIndexCommand(unittest.TestCase): @patch("qlever.commands.index.Containerize") @patch("qlever.commands.index.get_existing_index_files") @patch("qlever.commands.index.get_total_file_size") - @patch("qlever.commands.index.glob") + @patch("qlever.commands.index.input_files_exist") def test_execute_successful_indexing_without_extras( self, - mock_glob, + mock_input_files_exist, mock_get_total_file_size, mock_get_existing_index_files, mock_containerize, @@ -50,9 +50,9 @@ def test_execute_successful_indexing_without_extras( args.parser_buffer_size = None args.materialized_views = None - # Mock glob, get_total_file_size, get_existing_index_files, - # run_command and containerize - mock_glob.glob.return_value = ["input1.nt", "input2.nt"] + # Mock input_files_exist, get_total_file_size, + # get_existing_index_files, run_command and containerize + mock_input_files_exist.return_value = True mock_get_total_file_size.return_value = 5e9 # 5 GB mock_get_existing_index_files.return_value = [] mock_index_run_command.return_value = None @@ -98,10 +98,10 @@ def test_execute_successful_indexing_without_extras( @patch("qlever.commands.index.get_existing_index_files") @patch("qlever.commands.index.get_total_file_size") @patch("qlever.commands.index.log") - @patch("qlever.commands.index.glob") + @patch("qlever.commands.index.input_files_exist") def test_execute_indexing_with_already_existing_files( self, - mock_glob, + mock_input_files_exist, mock_log, mock_get_total_file_size, mock_get_existing_index_files, @@ -129,9 +129,9 @@ def test_execute_indexing_with_already_existing_files( args.multi_input_json = False args.materialized_views = None - # Mock glob, get_total_file_size, get_existing_index_files, - # run_command and containerize - mock_glob.glob.return_value = ["input1.nt", "input2.nt"] + # Mock input_files_exist, get_total_file_size, + # get_existing_index_files, run_command and containerize + mock_input_files_exist.return_value = True mock_get_total_file_size.return_value = 5e9 # 5 GB mock_get_existing_index_files.return_value = ["TestName.index"] mock_run_command.return_value = None @@ -226,10 +226,10 @@ def test_execute_fails_if_no_indexing_binary_is_found( @patch("qlever.commands.index.Containerize") @patch("qlever.commands.index.get_existing_index_files") @patch("qlever.commands.index.get_total_file_size") - @patch("qlever.commands.index.glob") + @patch("qlever.commands.index.input_files_exist") def test_execute_total_file_size_greater_than_ten_gb( self, - mock_glob, + mock_input_files_exist, mock_get_total_file_size, mock_get_existing_index_files, mock_containerize, @@ -262,9 +262,9 @@ def test_execute_total_file_size_greater_than_ten_gb( args.parser_buffer_size = None args.materialized_views = None - # Mock glob, get_total_file_size, get_existing_index_files, - # run_command and containerize - mock_glob.glob.return_value = ["input1.nt", "input2.nt"] + # Mock input_files_exist, get_total_file_size, + # get_existing_index_files, run_command and containerize + mock_input_files_exist.return_value = True mock_get_total_file_size.return_value = 15e9 # 15 GB mock_get_existing_index_files.return_value = [] mock_index_run_command.return_value = None