From 7916e5c78ff1971f6f71439e8914811df31e8598 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Mon, 21 Jul 2025 10:06:54 +0200 Subject: [PATCH 01/25] Added 'osm-update-command' to update qlever with 'osm-live-updates' --- src/qlever/commands/osm_update.py | 204 ++++++++++++++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 src/qlever/commands/osm_update.py diff --git a/src/qlever/commands/osm_update.py b/src/qlever/commands/osm_update.py new file mode 100644 index 000000000..f35d3a4a0 --- /dev/null +++ b/src/qlever/commands/osm_update.py @@ -0,0 +1,204 @@ +from __future__ import annotations + +import os +import signal +import time + +from qlever.command import QleverCommand +from qlever.log import log +from qlever.util import run_command + +from qlever.containerize import Containerize + + +# Exception to be raised when the user interrupts the command with Ctrl+C. +class UserInterruptException(Exception): + pass + + +class OsmUpdateCommand(QleverCommand): + """ + Class for executing the `osm-update` command. + """ + + def __init__(self): + self.planet_replication_server_url = \ + "https://planet.osm.org/replication/" + # Remember if Ctrl+C was pressed and if a update is currently running, + # so we can handle it gracefully. + self.is_running_update = False + self.ctrl_c_pressed = False + + def description(self) -> str: + return "Update OSM data for a given dataset" + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: + return {"data": ["name"], + "server": ["host_name", "port", "access_token"]} + + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "granularity", + nargs=1, + choices=["minute", "hour", "day"], + type=str, + help="The granularity with which the OSM data should be updated. " + "Choose from 'minute', 'hour', or 'day'.", + ) + subparser.add_argument( + "--once", + action='store_true', + default=False, + help="If set, the OSM data will be updated only once. " + "Otherwise, it will be updated continuously at the specified " + "granularity.", + ) + subparser.add_argument( + "--polyfile", + nargs='?', + type=str, + help="The poly file that defines the boundaries of your osm " + "dataset. (Poly files for country extracts are available at " + "https://download.geofabrik.de/) If no poly file is provided," + " the complete osm planet data will be used.", + ) + subparser.add_argument( + "--replication-server", + nargs='?', + type=str, + help="The URL of the OSM replication server to use. By default, " + "the OSM planet replication server " + "('https://planet.osm.org/replication/) is used." + ) + + # Handle Ctrl+C gracefully by finishing the current update and then + # exiting. + def handle_ctrl_c(self, signal_received, frame): + if self.ctrl_c_pressed: + log.warn("\rCtrl+C pressed again, undoing the previous Ctrl+C") + self.ctrl_c_pressed = False + else: + self.ctrl_c_pressed = True + if self.is_running_update: + log.warn("\rCtrl+C pressed, will finish the current update " + "and then exit [press Ctrl+C again to continue]") + else: + raise UserInterruptException() + + def execute(self, args) -> bool: + # If the user has specified a replication server, use that one, + # otherwise we use the planet replication server with the specified + # granularity. + granularity = args.granularity[0] + replications_server: str + if args.replication_server: + replication_server = args.replication_server + else: + replication_server = (f"{self.planet_replication_server_url}" + f"{granularity}/") + + granularity_in_seconds: int + if granularity == "minute": + granularity_in_seconds = 60 + elif granularity == "hour": + granularity_in_seconds = 3600 + elif granularity == "day": + granularity_in_seconds = 86400 + + cmd_description = [ + f"Update OSM data for dataset '{args.name}' with " + f"granularity '{granularity}' from the OSM replication" + f" server '{replication_server}'."] + self.show("\n".join(cmd_description), only_show=args.show) + + signal.signal(signal.SIGINT, self.handle_ctrl_c) + if not args.once and not args.show: + log.warn( + "Press Ctrl+C to finish any currently running updates and end " + "gracefully, press Ctrl+C again to continue\n" + ) + + # Construct the command to run the osm-live-updates tool. + olu_cmd = self.construct_olu_cmd(replication_server, args) + self.show(f"{olu_cmd}") + if args.show: + return True + + try: + while True: + if self.ctrl_c_pressed: + raise UserInterruptException() + + start_time = time.time() + + self.is_running_update = True + log.info(f"Starting OSM data update...") + process = run_command(olu_cmd, show_output=True, show_stderr=True, use_popen=True) + try: + process.wait() + except KeyboardInterrupt: + log.warn("\njsdfkalj OSM data update interrupted by user.") + self.is_running_update = False + self.ctrl_c_pressed = True + + log.info("\nOSM data update completed successfully.") + self.is_running_update = False + + # If the user has specified `--once`, we exit after the + # first update. + if args.once: + return True + + # Wait for the next update interval based on the granularity + elapsed = time.time() - start_time + sleep_time = max(0, granularity_in_seconds - elapsed) + if sleep_time > 0: + log.info(f"\nWaiting for {sleep_time:.0f} seconds " + f"until the next update...") + time.sleep(sleep_time) + + except UserInterruptException: + log.info("\nOSM data update interrupted by user.") + return True + + except BaseException as e: + log.error(f"An error occurred during the OSM data update: {e}") + return False + + def construct_olu_cmd(self, replication_server_url: str, args) -> str: + sparql_endpoint = f"http://{args.host_name}:{args.port}" + container_name = f"olu-{args.name}" + + olu_cmd = f"{sparql_endpoint}" + olu_cmd += f" -a {args.access_token}" + olu_cmd += f" -f {replication_server_url}" + olu_cmd += f" --qlever" + olu_cmd += f" --statistics" + + # If the user has specified a polygon file, we add it to the command. + if args.polyfile: + # Check if polygon file exists + if not os.path.exists(args.polyfile): + log.error(f'No file matching "{args.polyfile}" found') + log.info("") + log.info("Check if the polyfile exists and if the path is " + "correct.") + return False + + olu_cmd += f" --polygon {args.polyfile}" + + olu_cmd = Containerize().containerize_command( + olu_cmd, + "docker", + "run --rm", + "olu:latest", + container_name, + volumes=[("$(pwd)", "/update")], + working_directory="/update", + use_bash=False + ) + + return olu_cmd From 17c543811ac612bffd4a6d6dbe50bc419138a252 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Mon, 21 Jul 2025 11:38:24 +0200 Subject: [PATCH 02/25] Added 'new_session' parameter to 'run_command' which will start the subprocess in its own process group (only works on POSIX systems) --- src/qlever/util.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/qlever/util.py b/src/qlever/util.py index e8845bd5c..0fa9e210b 100644 --- a/src/qlever/util.py +++ b/src/qlever/util.py @@ -1,6 +1,7 @@ from __future__ import annotations import errno +import os import re import secrets import shlex @@ -37,11 +38,15 @@ def run_command( show_output: bool = False, show_stderr: bool = False, use_popen: bool = False, + new_session: bool = False, ) -> Optional[str | subprocess.Popen]: """ Run the given command and throw an exception if the exit code is non-zero. If `return_output` is `True`, return what the command wrote to `stdout`. + If 'new_session' is `True`, the command will be started in a new process + group. NOTE: 'new_session' will only work on POSIX systems + NOTE: The `set -o pipefail` ensures that the exit code of the command is non-zero if any part of the pipeline fails (not just the last part). @@ -56,6 +61,10 @@ def run_command( "stderr": None if show_stderr else subprocess.PIPE, } + # Add process group isolation if ignore_sigint is True + if new_session: + subprocess_args["preexec_fn"] = os.setsid + # With `Popen`, the command runs in the current shell and a process object # is returned (which can be used, e.g., to kill the process). if use_popen: From 2af42156dedbd4050f628c1f4a6c4546c2c9f5c4 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Mon, 21 Jul 2025 11:50:40 +0200 Subject: [PATCH 03/25] Fixed problem that User Interrupt (Ctrl+C) closed the subprocess by starting it in a new detached session --- src/qlever/commands/osm_update.py | 44 +++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/src/qlever/commands/osm_update.py b/src/qlever/commands/osm_update.py index f35d3a4a0..81392e6dd 100644 --- a/src/qlever/commands/osm_update.py +++ b/src/qlever/commands/osm_update.py @@ -2,8 +2,13 @@ import os import signal +import subprocess +import sys +import threading import time +from daemon import DaemonContext + from qlever.command import QleverCommand from qlever.log import log from qlever.util import run_command @@ -129,23 +134,31 @@ def execute(self, args) -> bool: try: while True: - if self.ctrl_c_pressed: - raise UserInterruptException() + log.info(f"Starting OSM data update...\n") start_time = time.time() + # Run the osm-live-updates tool in a subprocess, + # use new_session to avoid that the subprocess receives the + # Ctrl+C signal. self.is_running_update = True - log.info(f"Starting OSM data update...") - process = run_command(olu_cmd, show_output=True, show_stderr=True, use_popen=True) - try: - process.wait() - except KeyboardInterrupt: - log.warn("\njsdfkalj OSM data update interrupted by user.") - self.is_running_update = False - self.ctrl_c_pressed = True - - log.info("\nOSM data update completed successfully.") + olu = run_command(olu_cmd, show_stderr=True, + show_output=True, use_popen=True, + new_session=True) + + # Wait for the subprocess to finish. + olu_return_code = olu.wait() self.is_running_update = False + if olu_return_code != 0: + log.error(f"\nOSM data update failed with return code " + f"{olu_return_code}.") + return False + else: + log.info("\nOSM data update completed successfully.") + + # Check if the user has pressed Ctrl+C during the update. + if self.ctrl_c_pressed: + raise UserInterruptException() # If the user has specified `--once`, we exit after the # first update. @@ -153,11 +166,14 @@ def execute(self, args) -> bool: return True # Wait for the next update interval based on the granularity + # and the time it took to run the previous update. elapsed = time.time() - start_time sleep_time = max(0, granularity_in_seconds - elapsed) if sleep_time > 0: - log.info(f"\nWaiting for {sleep_time:.0f} seconds " - f"until the next update...") + formatted_time = time.strftime('%Hh:%Mm:%Ss', + time.gmtime(sleep_time)) + log.info(f"\nWaiting for {formatted_time} until the next " + f"update...") time.sleep(sleep_time) except UserInterruptException: From bb3b08d94f91a7e2bcac9c71508360f8663891f6 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Mon, 21 Jul 2025 15:32:38 +0200 Subject: [PATCH 04/25] Added option for user to specify a bbox as boundary --- src/qlever/commands/osm_update.py | 38 +++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/src/qlever/commands/osm_update.py b/src/qlever/commands/osm_update.py index 81392e6dd..c6124adb7 100644 --- a/src/qlever/commands/osm_update.py +++ b/src/qlever/commands/osm_update.py @@ -67,7 +67,15 @@ def additional_arguments(self, subparser) -> None: type=str, help="The poly file that defines the boundaries of your osm " "dataset. (Poly files for country extracts are available at " - "https://download.geofabrik.de/) If no poly file is provided," + "https://download.geofabrik.de/). If no boundary is provided," + " the complete osm planet data will be used.", + ) + subparser.add_argument( + "--bbox", + nargs='?', + type=str, + help="The bounding box (LEFT,BOTTOM,RIGHT,TOP) that defines the " + "boundaries of your osm dataset. If no boundary is provided," " the complete osm planet data will be used.", ) subparser.add_argument( @@ -119,6 +127,8 @@ def execute(self, args) -> bool: f" server '{replication_server}'."] self.show("\n".join(cmd_description), only_show=args.show) + # Handle user interruptions (Ctrl+C) gracefully by waiting for the + # current update to finish and then exiting. signal.signal(signal.SIGINT, self.handle_ctrl_c) if not args.once and not args.show: log.warn( @@ -127,8 +137,15 @@ def execute(self, args) -> bool: ) # Construct the command to run the osm-live-updates tool. - olu_cmd = self.construct_olu_cmd(replication_server, args) - self.show(f"{olu_cmd}") + try: + olu_cmd = self.construct_olu_cmd(replication_server, args) + self.show(f"{olu_cmd}") + except (ValueError, FileNotFoundError) as e: + log.error(f"{e}") + return False + + # If the user has specified `--show`, we only show the command and + # return without executing it. if args.show: return True @@ -192,17 +209,18 @@ def construct_olu_cmd(self, replication_server_url: str, args) -> str: olu_cmd += f" -a {args.access_token}" olu_cmd += f" -f {replication_server_url}" olu_cmd += f" --qlever" - olu_cmd += f" --statistics" - # If the user has specified a polygon file, we add it to the command. + # If the user has specified a boundary, we add it to the command. + if args.bbox and args.polyfile: + raise ValueError("You cannot specify both --bbox and --polyfile. " + "Please choose one of them.") + if args.bbox: + olu_cmd += f" --bbox {args.bbox}" if args.polyfile: # Check if polygon file exists if not os.path.exists(args.polyfile): - log.error(f'No file matching "{args.polyfile}" found') - log.info("") - log.info("Check if the polyfile exists and if the path is " - "correct.") - return False + raise FileNotFoundError(f'No file matching "{args.polyfile}"' + f' found.') olu_cmd += f" --polygon {args.polyfile}" From 1c109847959f69528ba01a3d18d633b20f798428 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Wed, 23 Jul 2025 13:11:34 +0200 Subject: [PATCH 05/25] * Added logic to pull olu image * Check if QLever endpoint is running before running olu --- src/qlever/commands/osm_update.py | 40 ++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/src/qlever/commands/osm_update.py b/src/qlever/commands/osm_update.py index c6124adb7..804a1e6fb 100644 --- a/src/qlever/commands/osm_update.py +++ b/src/qlever/commands/osm_update.py @@ -2,20 +2,14 @@ import os import signal -import subprocess -import sys -import threading import time -from daemon import DaemonContext - from qlever.command import QleverCommand from qlever.log import log -from qlever.util import run_command +from qlever.util import run_command, is_qlever_server_alive from qlever.containerize import Containerize - # Exception to be raised when the user interrupts the command with Ctrl+C. class UserInterruptException(Exception): pass @@ -42,7 +36,8 @@ def should_have_qleverfile(self) -> bool: def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: return {"data": ["name"], - "server": ["host_name", "port", "access_token"]} + "server": ["host_name", "port", "access_token"], + "runtime": ["system"]} def additional_arguments(self, subparser) -> None: subparser.add_argument( @@ -86,6 +81,12 @@ def additional_arguments(self, subparser) -> None: "the OSM planet replication server " "('https://planet.osm.org/replication/) is used." ) + subparser.add_argument( + "--olu-image", + type=str, + default="docker.io/adfreiburg/olu", + help="The name of the image used for osm-live-updates.", + ) # Handle Ctrl+C gracefully by finishing the current update and then # exiting. @@ -136,6 +137,16 @@ def execute(self, args) -> bool: "gracefully, press Ctrl+C again to continue\n" ) + # Create command to pull the latest image for osm-live-updates if + # remote image is used. + pull_cmd = "" + if ("/" in args.olu_image and + args.system in Containerize.supported_systems()): + pull_cmd = f"{args.system} pull -q {args.olu_image}" + log.debug(f"Pulling image `{args.olu_image}` for" + f" osm-live-updates.") + self.show(f"{pull_cmd}") + # Construct the command to run the osm-live-updates tool. try: olu_cmd = self.construct_olu_cmd(replication_server, args) @@ -149,6 +160,17 @@ def execute(self, args) -> bool: if args.show: return True + endpoint_url = f"http://{args.host_name}:{args.port}" + if not is_qlever_server_alive(endpoint_url): + log.error( + f"QLever endpoint at {endpoint_url} is not running." + ) + return False + + # Pull the latest image for osm-live-updates if remote image is used. + if pull_cmd: + run_command(pull_cmd) + try: while True: log.info(f"Starting OSM data update...\n") @@ -226,7 +248,7 @@ def construct_olu_cmd(self, replication_server_url: str, args) -> str: olu_cmd = Containerize().containerize_command( olu_cmd, - "docker", + args.system, "run --rm", "olu:latest", container_name, From 0c4dae1786de6ca6ff630937fcf321a34dccd922 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Wed, 23 Jul 2025 13:31:37 +0200 Subject: [PATCH 06/25] * Use subprocesses 'start-new-session' option * Do not add this argument if we are on a Windows system, since it will raise an exception. --- src/qlever/util.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/qlever/util.py b/src/qlever/util.py index 0fa9e210b..b43923d27 100644 --- a/src/qlever/util.py +++ b/src/qlever/util.py @@ -1,7 +1,6 @@ from __future__ import annotations import errno -import os import re import secrets import shlex @@ -11,13 +10,13 @@ import subprocess from datetime import date, datetime from pathlib import Path +from platform import system from typing import Any, Optional import psutil from qlever.log import log - def get_total_file_size(patterns: list[str]) -> int: """ Helper function that gets the total size of all files mathing the given @@ -61,9 +60,10 @@ def run_command( "stderr": None if show_stderr else subprocess.PIPE, } - # Add process group isolation if ignore_sigint is True - if new_session: - subprocess_args["preexec_fn"] = os.setsid + # Add process group isolation if new_session is True + # (Works only on POSIX systems). + if new_session and system() != "Windows": + subprocess_args["start_new_session"] = True # With `Popen`, the command runs in the current shell and a process object # is returned (which can be used, e.g., to kill the process). From d4180ccdf8cf87dafc3a05f5ba2157c8d931692c Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Wed, 23 Jul 2025 14:00:33 +0200 Subject: [PATCH 07/25] * Handle user pressing Ctrl+Z by stopping subprocess and container --- src/qlever/commands/osm_update.py | 37 ++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/src/qlever/commands/osm_update.py b/src/qlever/commands/osm_update.py index 804a1e6fb..9fa0c516c 100644 --- a/src/qlever/commands/osm_update.py +++ b/src/qlever/commands/osm_update.py @@ -2,7 +2,9 @@ import os import signal +import subprocess import time +from typing import Optional from qlever.command import QleverCommand from qlever.log import log @@ -27,6 +29,8 @@ def __init__(self): # so we can handle it gracefully. self.is_running_update = False self.ctrl_c_pressed = False + # The process which starts the osm-live-updates tool. + self.olu_process: Optional[subprocess.Popen] = None def description(self) -> str: return "Update OSM data for a given dataset" @@ -102,6 +106,24 @@ def handle_ctrl_c(self, signal_received, frame): else: raise UserInterruptException() + # Handle forceful termination (Ctrl+Z) + def handle_ctrl_z(self, args, signal_received, frame): + if self.is_running_update: + log.error("Ctrl+Z pressed, will kill the current update and exit." + "\nThe data may be corrupted if triples where currently " + "inserted or deleted.") + else: + raise UserInterruptException() + + if self.olu_process and self.olu_process.poll() is None: + self.olu_process.kill() + + if self.is_running_update: + Containerize().stop_and_remove_container(args.system, + f"olu-{args.name}") + + raise UserInterruptException() + def execute(self, args) -> bool: # If the user has specified a replication server, use that one, # otherwise we use the planet replication server with the specified @@ -131,10 +153,14 @@ def execute(self, args) -> bool: # Handle user interruptions (Ctrl+C) gracefully by waiting for the # current update to finish and then exiting. signal.signal(signal.SIGINT, self.handle_ctrl_c) + signal.signal(signal.SIGTSTP, + lambda s, f: self.handle_ctrl_z(args, s, f)) if not args.once and not args.show: log.warn( "Press Ctrl+C to finish any currently running updates and end " "gracefully, press Ctrl+C again to continue\n" + "Press Ctrl+Z to terminate updates forcefully. Doing so while " + "triples are being deleted or inserted may corrupt the data.\n" ) # Create command to pull the latest image for osm-live-updates if @@ -181,12 +207,13 @@ def execute(self, args) -> bool: # use new_session to avoid that the subprocess receives the # Ctrl+C signal. self.is_running_update = True - olu = run_command(olu_cmd, show_stderr=True, - show_output=True, use_popen=True, - new_session=True) + self.olu_process = run_command(olu_cmd, show_stderr=True, + show_output=True, + use_popen=True, + new_session=True) # Wait for the subprocess to finish. - olu_return_code = olu.wait() + olu_return_code = self.olu_process.wait() self.is_running_update = False if olu_return_code != 0: log.error(f"\nOSM data update failed with return code " @@ -250,7 +277,7 @@ def construct_olu_cmd(self, replication_server_url: str, args) -> str: olu_cmd, args.system, "run --rm", - "olu:latest", + args.olu_image, container_name, volumes=[("$(pwd)", "/update")], working_directory="/update", From b130421d89d24e2224d647519cd43ed582e33075 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Wed, 23 Jul 2025 14:13:35 +0200 Subject: [PATCH 08/25] * Use same argument name as osmium for polygon file --- src/qlever/commands/osm_update.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/qlever/commands/osm_update.py b/src/qlever/commands/osm_update.py index 9fa0c516c..014c7a00c 100644 --- a/src/qlever/commands/osm_update.py +++ b/src/qlever/commands/osm_update.py @@ -61,10 +61,10 @@ def additional_arguments(self, subparser) -> None: "granularity.", ) subparser.add_argument( - "--polyfile", + "--polygon", nargs='?', type=str, - help="The poly file that defines the boundaries of your osm " + help="The polygon that defines the boundaries of your osm " "dataset. (Poly files for country extracts are available at " "https://download.geofabrik.de/). If no boundary is provided," " the complete osm planet data will be used.", @@ -260,18 +260,18 @@ def construct_olu_cmd(self, replication_server_url: str, args) -> str: olu_cmd += f" --qlever" # If the user has specified a boundary, we add it to the command. - if args.bbox and args.polyfile: - raise ValueError("You cannot specify both --bbox and --polyfile. " + if args.bbox and args.polygon: + raise ValueError("You cannot specify both --bbox and --polygon. " "Please choose one of them.") if args.bbox: olu_cmd += f" --bbox {args.bbox}" - if args.polyfile: + if args.polygon: # Check if polygon file exists - if not os.path.exists(args.polyfile): - raise FileNotFoundError(f'No file matching "{args.polyfile}"' + if not os.path.exists(args.polygon): + raise FileNotFoundError(f'No file matching "{args.polygon}"' f' found.') - olu_cmd += f" --polygon {args.polyfile}" + olu_cmd += f" --polygon {args.polygon}" olu_cmd = Containerize().containerize_command( olu_cmd, From b57833d9ab2f73754663e16872ce000eb8d9dc87 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Thu, 24 Jul 2025 09:27:50 +0200 Subject: [PATCH 09/25] * Fixed formatting --- src/qlever/commands/osm_update.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/qlever/commands/osm_update.py b/src/qlever/commands/osm_update.py index 014c7a00c..68fde45c0 100644 --- a/src/qlever/commands/osm_update.py +++ b/src/qlever/commands/osm_update.py @@ -12,7 +12,9 @@ from qlever.containerize import Containerize -# Exception to be raised when the user interrupts the command with Ctrl+C. + +# Exception to be raised when the user interrupts the command with Ctrl+C or +# Ctrl+Z. class UserInterruptException(Exception): pass @@ -25,7 +27,7 @@ class OsmUpdateCommand(QleverCommand): def __init__(self): self.planet_replication_server_url = \ "https://planet.osm.org/replication/" - # Remember if Ctrl+C was pressed and if a update is currently running, + # Remember if Ctrl+C was pressed and if an update is currently running, # so we can handle it gracefully. self.is_running_update = False self.ctrl_c_pressed = False From d31a2abd74d46c3dd1fec81bf5d72f2e5ff22d95 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Mon, 28 Jul 2025 10:47:59 +0200 Subject: [PATCH 10/25] * Make 'granularity' a named argument but keep it required --- src/qlever/commands/osm_update.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/qlever/commands/osm_update.py b/src/qlever/commands/osm_update.py index 68fde45c0..b9786e76f 100644 --- a/src/qlever/commands/osm_update.py +++ b/src/qlever/commands/osm_update.py @@ -47,10 +47,11 @@ def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: def additional_arguments(self, subparser) -> None: subparser.add_argument( - "granularity", + "--granularity", nargs=1, choices=["minute", "hour", "day"], type=str, + required=True, help="The granularity with which the OSM data should be updated. " "Choose from 'minute', 'hour', or 'day'.", ) From d6ebd7d7bc32c6ca42646920ff18a5fc34a1ccf8 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Mon, 28 Jul 2025 11:09:01 +0200 Subject: [PATCH 11/25] * Added support to run command natively --- src/qlever/commands/osm_update.py | 40 +++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/src/qlever/commands/osm_update.py b/src/qlever/commands/osm_update.py index b9786e76f..c3196f908 100644 --- a/src/qlever/commands/osm_update.py +++ b/src/qlever/commands/osm_update.py @@ -8,7 +8,7 @@ from qlever.command import QleverCommand from qlever.log import log -from qlever.util import run_command, is_qlever_server_alive +from qlever.util import run_command, is_qlever_server_alive, binary_exists from qlever.containerize import Containerize @@ -94,6 +94,13 @@ def additional_arguments(self, subparser) -> None: default="docker.io/adfreiburg/olu", help="The name of the image used for osm-live-updates.", ) + subparser.add_argument( + "--olu-binary", + type=str, + default="osm-live-updates", + help="The name or path of the compiled `osm-live-updates` binary" + " to use when running natively.", + ) # Handle Ctrl+C gracefully by finishing the current update and then # exiting. @@ -269,22 +276,29 @@ def construct_olu_cmd(self, replication_server_url: str, args) -> str: if args.bbox: olu_cmd += f" --bbox {args.bbox}" if args.polygon: - # Check if polygon file exists + # Check if the polygon file exists if not os.path.exists(args.polygon): raise FileNotFoundError(f'No file matching "{args.polygon}"' f' found.') olu_cmd += f" --polygon {args.polygon}" - olu_cmd = Containerize().containerize_command( - olu_cmd, - args.system, - "run --rm", - args.olu_image, - container_name, - volumes=[("$(pwd)", "/update")], - working_directory="/update", - use_bash=False - ) + if args.system == "native": + if not binary_exists(args.olu_binary, "olu-binary"): + # 'binary_exists' will log an error message, so we raise the + # FileNotFoundError without an additional message. + raise FileNotFoundError() + else: + return f'{args.olu_binary} {olu_cmd}' + else: + return Containerize().containerize_command( + olu_cmd, + args.system, + "run --rm", + args.olu_image, + container_name, + volumes=[("$(pwd)", "/update")], + working_directory="/update", + use_bash=False + ) - return olu_cmd From 7713f5d03a22301262320033c37d8bab844f0918 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Mon, 28 Jul 2025 17:45:21 +0200 Subject: [PATCH 12/25] =?UTF-8?q?*=20The=20'get-polygon'=20command=20has?= =?UTF-8?q?=20been=20added,=20along=20with=20the=20'polygon'=20and=20'get?= =?UTF-8?q?=5Fpolygon=5Fcmd'=20arguments=20in=20QLeverfiles,=20to=20make?= =?UTF-8?q?=20it=20easier=20to=20use=20'osm-update'=20for=20country=20extr?= =?UTF-8?q?acts.=20Simply=20download=20the=20polyfile=20that=20defines=20t?= =?UTF-8?q?he=20boundaries=20of=20your=20country=20with=20'get-polygon',?= =?UTF-8?q?=20then=20start=20'osm-update'=20=E2=80=94=20there's=20no=20nee?= =?UTF-8?q?d=20to=20specify=20the=20polygon=20explicitly.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/qlever/Qleverfiles/Qleverfile.osm-country | 10 ++++ src/qlever/commands/get_polygon.py | 49 +++++++++++++++++++ src/qlever/commands/osm_update.py | 25 +++++----- src/qlever/qleverfile.py | 14 ++++++ 4 files changed, 84 insertions(+), 14 deletions(-) create mode 100644 src/qlever/commands/get_polygon.py diff --git a/src/qlever/Qleverfiles/Qleverfile.osm-country b/src/qlever/Qleverfiles/Qleverfile.osm-country index 1b2c334f4..7752f9790 100644 --- a/src/qlever/Qleverfiles/Qleverfile.osm-country +++ b/src/qlever/Qleverfiles/Qleverfile.osm-country @@ -7,6 +7,14 @@ # Make sure that osm2rdf is in your path. Set CONTINENT and COUNTRY such that # the link under GET_DATA_CMD exists (the names are usually the canonical # names). The time for osm2rdf is around the same as that for "qlever index". +# +# If you plan to use the 'osm-update' command, you should also define POLYGON +# and GET_POLYGON_CMD, which should return a polygon that describes the area of +# the dataset. Otherwise, the complete OSM planet data will be used to update +# your endpoint. +# +# qlever get-polygon # downloads the .polygon file from Geofabrik, that defines the boundaries of your extract +# qlever osm-update --granularity minute/hour/day # the endpoint will be updated every minute/hour/day # Dataset settings [data] @@ -17,6 +25,8 @@ PBF = ${NAME}.pbf WITH_TEXT = false VERSION = $$(ls -l --time-style=+%d.%m.%Y ${PBF} 2> /dev/null | cut -d' ' -f6) GET_DATA_CMD = wget -nc -O ${PBF} https://download.geofabrik.de/${CONTINENT}/${COUNTRY}-latest.osm.pbf; rm -f ${NAME}.*.bz2; ( time osm2rdf ${PBF} -o ${NAME}.ttl --cache . ) 2>&1 | tee ${NAME}.osm2rdf-log.txt; rm -f spatial-* +POLYGON = ${NAME}.polygon +GET_POLYGON_CMD = wget -O ${POLYGON} https://download.geofabrik.de/${CONTINENT}/${COUNTRY}.poly DESCRIPTION = OSM ${COUNTRY}, dump from ${VERSION} with ogc:sfContains # Indexer settings diff --git a/src/qlever/commands/get_polygon.py b/src/qlever/commands/get_polygon.py new file mode 100644 index 000000000..f4c72e542 --- /dev/null +++ b/src/qlever/commands/get_polygon.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import shlex + +from qlever.command import QleverCommand +from qlever.log import log +from qlever.util import get_total_file_size, run_command + + +class GetPolygonCommand(QleverCommand): + """ + Class for executing the `get-polygon` command. + """ + + def __init__(self): + pass + + def description(self) -> str: + return ("Get polygon for an OSM country extract using the " + "GET_POLYGON_CMD in the Qleverfile") + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: + return {"data": ["polygon", "get_polygon_cmd"]} + + def additional_arguments(self, subparser) -> None: + pass + + def execute(self, args) -> bool: + # Construct the command line and show it. + self.show(args.get_polygon_cmd, only_show=args.show) + if args.show: + return True + + # Execute the command line. + try: + run_command(args.get_polygon_cmd, show_output=True) + except Exception as e: + log.error(f"Problem executing \"{args.get_polygon_cmd}\": {e}") + return False + + # Show the total file size in GB and return. + patterns = shlex.split(args.polygon) + total_file_size = get_total_file_size(patterns) + print(f"Download successful, total file size: " + f"{total_file_size:,} bytes") + return True \ No newline at end of file diff --git a/src/qlever/commands/osm_update.py b/src/qlever/commands/osm_update.py index c3196f908..8fc2e2c33 100644 --- a/src/qlever/commands/osm_update.py +++ b/src/qlever/commands/osm_update.py @@ -41,7 +41,7 @@ def should_have_qleverfile(self) -> bool: return True def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: - return {"data": ["name"], + return {"data": ["name", "polygon"], "server": ["host_name", "port", "access_token"], "runtime": ["system"]} @@ -63,22 +63,14 @@ def additional_arguments(self, subparser) -> None: "Otherwise, it will be updated continuously at the specified " "granularity.", ) - subparser.add_argument( - "--polygon", - nargs='?', - type=str, - help="The polygon that defines the boundaries of your osm " - "dataset. (Poly files for country extracts are available at " - "https://download.geofabrik.de/). If no boundary is provided," - " the complete osm planet data will be used.", - ) subparser.add_argument( "--bbox", nargs='?', type=str, help="The bounding box (LEFT,BOTTOM,RIGHT,TOP) that defines the " - "boundaries of your osm dataset. If no boundary is provided," - " the complete osm planet data will be used.", + "boundaries of your OSM dataset. Not necessary if you want to" + " use the complete OSM planet data or if you have already run" + " the 'qlever get-polygon' command.", ) subparser.add_argument( "--replication-server", @@ -275,13 +267,18 @@ def construct_olu_cmd(self, replication_server_url: str, args) -> str: "Please choose one of them.") if args.bbox: olu_cmd += f" --bbox {args.bbox}" - if args.polygon: + elif args.polygon: # Check if the polygon file exists if not os.path.exists(args.polygon): raise FileNotFoundError(f'No file matching "{args.polygon}"' - f' found.') + f' found. Did you call ' + f'`qlever get-polygon`? If you did, ' + f'check POLYGON and GET_POLYGON_CMD in' + f' the QLeverfile"') olu_cmd += f" --polygon {args.polygon}" + # If the user has not specified a bounding box or polygon, we assume + # the user wants to use the complete OSM planet data. if args.system == "native": if not binary_exists(args.olu_binary, "olu-binary"): diff --git a/src/qlever/qleverfile.py b/src/qlever/qleverfile.py index d26762cf6..2d91b8174 100644 --- a/src/qlever/qleverfile.py +++ b/src/qlever/qleverfile.py @@ -70,6 +70,20 @@ def arg(*args, **kwargs): choices=["ttl", "nt", "nq"], help="The format of the data", ) + data_args["polygon"] = arg( + "--polygon", + type=str, + default=None, + help="The name of the file containing the polygon for an OSM " + "extract", + ) + data_args["get_polygon_cmd"] = arg( + "--get-polygon-cmd", + type=str, + default=None, + help="The command to get the polygon defining the area for an OSM " + "extract", + ) index_args["input_files"] = arg( "--input-files", From b187aca7d8cb2dcd5f85d9861ec42f125086ee19 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Thu, 31 Jul 2025 14:57:24 +0200 Subject: [PATCH 13/25] * Fixed file extension for polygon file --- src/qlever/Qleverfiles/Qleverfile.osm-country | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/qlever/Qleverfiles/Qleverfile.osm-country b/src/qlever/Qleverfiles/Qleverfile.osm-country index 7752f9790..db83d24a5 100644 --- a/src/qlever/Qleverfiles/Qleverfile.osm-country +++ b/src/qlever/Qleverfiles/Qleverfile.osm-country @@ -25,7 +25,7 @@ PBF = ${NAME}.pbf WITH_TEXT = false VERSION = $$(ls -l --time-style=+%d.%m.%Y ${PBF} 2> /dev/null | cut -d' ' -f6) GET_DATA_CMD = wget -nc -O ${PBF} https://download.geofabrik.de/${CONTINENT}/${COUNTRY}-latest.osm.pbf; rm -f ${NAME}.*.bz2; ( time osm2rdf ${PBF} -o ${NAME}.ttl --cache . ) 2>&1 | tee ${NAME}.osm2rdf-log.txt; rm -f spatial-* -POLYGON = ${NAME}.polygon +POLYGON = ${NAME}.poly GET_POLYGON_CMD = wget -O ${POLYGON} https://download.geofabrik.de/${CONTINENT}/${COUNTRY}.poly DESCRIPTION = OSM ${COUNTRY}, dump from ${VERSION} with ogc:sfContains From c8d23ecd0811b6b3ec7dac232864eb76ba295d42 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Fri, 8 Aug 2025 10:15:45 +0200 Subject: [PATCH 14/25] * Renamed command from osm-update to update-osm --- src/qlever/commands/{osm_update.py => update_osm.py} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename src/qlever/commands/{osm_update.py => update_osm.py} (99%) diff --git a/src/qlever/commands/osm_update.py b/src/qlever/commands/update_osm.py similarity index 99% rename from src/qlever/commands/osm_update.py rename to src/qlever/commands/update_osm.py index 8fc2e2c33..1d1060281 100644 --- a/src/qlever/commands/osm_update.py +++ b/src/qlever/commands/update_osm.py @@ -19,9 +19,9 @@ class UserInterruptException(Exception): pass -class OsmUpdateCommand(QleverCommand): +class UpdateOsmCommand(QleverCommand): """ - Class for executing the `osm-update` command. + Class for executing the `update-osm` command. """ def __init__(self): From ceb4ee0a8cc26f870e5c980dd6bccc22bbaca8de Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Fri, 8 Aug 2025 10:26:35 +0200 Subject: [PATCH 15/25] * Removed 'get_polygon' command and moved functionality to 'update-osm --get-polygon' --- src/qlever/commands/get_polygon.py | 49 ------------------------------ src/qlever/commands/update_osm.py | 46 ++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 55 deletions(-) delete mode 100644 src/qlever/commands/get_polygon.py diff --git a/src/qlever/commands/get_polygon.py b/src/qlever/commands/get_polygon.py deleted file mode 100644 index f4c72e542..000000000 --- a/src/qlever/commands/get_polygon.py +++ /dev/null @@ -1,49 +0,0 @@ -from __future__ import annotations - -import shlex - -from qlever.command import QleverCommand -from qlever.log import log -from qlever.util import get_total_file_size, run_command - - -class GetPolygonCommand(QleverCommand): - """ - Class for executing the `get-polygon` command. - """ - - def __init__(self): - pass - - def description(self) -> str: - return ("Get polygon for an OSM country extract using the " - "GET_POLYGON_CMD in the Qleverfile") - - def should_have_qleverfile(self) -> bool: - return True - - def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: - return {"data": ["polygon", "get_polygon_cmd"]} - - def additional_arguments(self, subparser) -> None: - pass - - def execute(self, args) -> bool: - # Construct the command line and show it. - self.show(args.get_polygon_cmd, only_show=args.show) - if args.show: - return True - - # Execute the command line. - try: - run_command(args.get_polygon_cmd, show_output=True) - except Exception as e: - log.error(f"Problem executing \"{args.get_polygon_cmd}\": {e}") - return False - - # Show the total file size in GB and return. - patterns = shlex.split(args.polygon) - total_file_size = get_total_file_size(patterns) - print(f"Download successful, total file size: " - f"{total_file_size:,} bytes") - return True \ No newline at end of file diff --git a/src/qlever/commands/update_osm.py b/src/qlever/commands/update_osm.py index 1d1060281..0359be191 100644 --- a/src/qlever/commands/update_osm.py +++ b/src/qlever/commands/update_osm.py @@ -3,12 +3,14 @@ import os import signal import subprocess +import shlex import time from typing import Optional from qlever.command import QleverCommand from qlever.log import log -from qlever.util import run_command, is_qlever_server_alive, binary_exists +from qlever.util import run_command, is_qlever_server_alive, binary_exists, \ + get_total_file_size from qlever.containerize import Containerize @@ -41,7 +43,7 @@ def should_have_qleverfile(self) -> bool: return True def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: - return {"data": ["name", "polygon"], + return {"data": ["name", "polygon", "get_polygon_cmd"], "server": ["host_name", "port", "access_token"], "runtime": ["system"]} @@ -51,7 +53,7 @@ def additional_arguments(self, subparser) -> None: nargs=1, choices=["minute", "hour", "day"], type=str, - required=True, + default="day", help="The granularity with which the OSM data should be updated. " "Choose from 'minute', 'hour', or 'day'.", ) @@ -93,6 +95,13 @@ def additional_arguments(self, subparser) -> None: help="The name or path of the compiled `osm-live-updates` binary" " to use when running natively.", ) + subparser.add_argument( + "--get-polygon", + action='store_true', + default=False, + help="If set, the command will get the polygon for an OSM country" + " extract using the GET_POLYGON_CMD in the Qleverfile." + ) # Handle Ctrl+C gracefully by finishing the current update and then # exiting. @@ -127,6 +136,12 @@ def handle_ctrl_z(self, args, signal_received, frame): raise UserInterruptException() def execute(self, args) -> bool: + # If the '--get-polygon' flag is set, we download the polygon and + # return. + if args.get_polygon: + self.get_polygon(args) + return True + # If the user has specified a replication server, use that one, # otherwise we use the planet replication server with the specified # granularity. @@ -272,9 +287,9 @@ def construct_olu_cmd(self, replication_server_url: str, args) -> str: if not os.path.exists(args.polygon): raise FileNotFoundError(f'No file matching "{args.polygon}"' f' found. Did you call ' - f'`qlever get-polygon`? If you did, ' - f'check POLYGON and GET_POLYGON_CMD in' - f' the QLeverfile"') + f'`qlever update-osm --get-polygon`? If' + f' you did, check POLYGON and ' + f'GET_POLYGON_CMD in the QLeverfile') olu_cmd += f" --polygon {args.polygon}" # If the user has not specified a bounding box or polygon, we assume @@ -299,3 +314,22 @@ def construct_olu_cmd(self, replication_server_url: str, args) -> str: use_bash=False ) + def get_polygon(self, args) -> bool: + # Construct the command line and show it. + self.show(args.get_polygon_cmd, only_show=args.show) + if args.show: + return True + + # Execute the command line. + try: + run_command(args.get_polygon_cmd, show_output=True) + except Exception as e: + log.error(f"Problem executing \"{args.get_polygon_cmd}\": {e}") + return False + + # Show the total file size in GB and return. + patterns = shlex.split(args.polygon) + total_file_size = get_total_file_size(patterns) + print(f"Download successful, total file size: " + f"{total_file_size:,} bytes") + return True \ No newline at end of file From b7a7d430aa1073d80a13c473f5bb844e0a7d467b Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Tue, 12 Aug 2025 10:00:35 +0200 Subject: [PATCH 16/25] * Fixed bug where default argument for 'granularity' was not correctly applied --- src/qlever/commands/update_osm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/qlever/commands/update_osm.py b/src/qlever/commands/update_osm.py index 0359be191..cbd591e0b 100644 --- a/src/qlever/commands/update_osm.py +++ b/src/qlever/commands/update_osm.py @@ -53,7 +53,7 @@ def additional_arguments(self, subparser) -> None: nargs=1, choices=["minute", "hour", "day"], type=str, - default="day", + default=["day"], help="The granularity with which the OSM data should be updated. " "Choose from 'minute', 'hour', or 'day'.", ) From c68fc2ca85a51554ee29ebb3c529ef151fbb34d4 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Wed, 27 Aug 2025 13:48:05 +0200 Subject: [PATCH 17/25] * --file-server option was renamed to --replication-server in osm-live-updates * use long options names when calling olu --- src/qlever/commands/update_osm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/qlever/commands/update_osm.py b/src/qlever/commands/update_osm.py index cbd591e0b..1cc8f822b 100644 --- a/src/qlever/commands/update_osm.py +++ b/src/qlever/commands/update_osm.py @@ -272,8 +272,8 @@ def construct_olu_cmd(self, replication_server_url: str, args) -> str: container_name = f"olu-{args.name}" olu_cmd = f"{sparql_endpoint}" - olu_cmd += f" -a {args.access_token}" - olu_cmd += f" -f {replication_server_url}" + olu_cmd += f" --access-token {args.access_token}" + olu_cmd += f" --replication-server {replication_server_url}" olu_cmd += f" --qlever" # If the user has specified a boundary, we add it to the command. From a4011b1cf58577507fd34827992fcc65e280b4f9 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Thu, 16 Oct 2025 10:54:19 +0200 Subject: [PATCH 18/25] * Remove get polygon commands from qleverfile.py --- src/qlever/qleverfile.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/qlever/qleverfile.py b/src/qlever/qleverfile.py index b92dc03d8..5ea39b21d 100644 --- a/src/qlever/qleverfile.py +++ b/src/qlever/qleverfile.py @@ -70,20 +70,6 @@ def arg(*args, **kwargs): choices=["ttl", "nt", "nq"], help="The format of the data", ) - data_args["polygon"] = arg( - "--polygon", - type=str, - default=None, - help="The name of the file containing the polygon for an OSM " - "extract", - ) - data_args["get_polygon_cmd"] = arg( - "--get-polygon-cmd", - type=str, - default=None, - help="The command to get the polygon defining the area for an OSM " - "extract", - ) index_args["input_files"] = arg( "--input-files", From 0ebd73bffd690c3142f509ebe3516507c56e8026 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Thu, 16 Oct 2025 10:56:48 +0200 Subject: [PATCH 19/25] * Remove get-polygon functionality from update-osm --- src/qlever/commands/update_osm.py | 32 ++++++------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/src/qlever/commands/update_osm.py b/src/qlever/commands/update_osm.py index 1cc8f822b..221fb21cd 100644 --- a/src/qlever/commands/update_osm.py +++ b/src/qlever/commands/update_osm.py @@ -96,11 +96,11 @@ def additional_arguments(self, subparser) -> None: " to use when running natively.", ) subparser.add_argument( - "--get-polygon", - action='store_true', - default=False, - help="If set, the command will get the polygon for an OSM country" - " extract using the GET_POLYGON_CMD in the Qleverfile." + "--polygon", + type=str, + default=None, + help="The name of the file containing the polygon for an OSM " + "extract", ) # Handle Ctrl+C gracefully by finishing the current update and then @@ -312,24 +312,4 @@ def construct_olu_cmd(self, replication_server_url: str, args) -> str: volumes=[("$(pwd)", "/update")], working_directory="/update", use_bash=False - ) - - def get_polygon(self, args) -> bool: - # Construct the command line and show it. - self.show(args.get_polygon_cmd, only_show=args.show) - if args.show: - return True - - # Execute the command line. - try: - run_command(args.get_polygon_cmd, show_output=True) - except Exception as e: - log.error(f"Problem executing \"{args.get_polygon_cmd}\": {e}") - return False - - # Show the total file size in GB and return. - patterns = shlex.split(args.polygon) - total_file_size = get_total_file_size(patterns) - print(f"Download successful, total file size: " - f"{total_file_size:,} bytes") - return True \ No newline at end of file + ) \ No newline at end of file From ccce6e4a9caa384faa9b2e52bd9bd942960de95c Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Thu, 16 Oct 2025 11:00:22 +0200 Subject: [PATCH 20/25] * Remove get-polygon functionality from update-osm --- src/qlever/commands/update_osm.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/qlever/commands/update_osm.py b/src/qlever/commands/update_osm.py index 221fb21cd..69d4fed46 100644 --- a/src/qlever/commands/update_osm.py +++ b/src/qlever/commands/update_osm.py @@ -43,7 +43,7 @@ def should_have_qleverfile(self) -> bool: return True def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: - return {"data": ["name", "polygon", "get_polygon_cmd"], + return {"data": ["name"], "server": ["host_name", "port", "access_token"], "runtime": ["system"]} @@ -286,10 +286,7 @@ def construct_olu_cmd(self, replication_server_url: str, args) -> str: # Check if the polygon file exists if not os.path.exists(args.polygon): raise FileNotFoundError(f'No file matching "{args.polygon}"' - f' found. Did you call ' - f'`qlever update-osm --get-polygon`? If' - f' you did, check POLYGON and ' - f'GET_POLYGON_CMD in the QLeverfile') + f' found.') olu_cmd += f" --polygon {args.polygon}" # If the user has not specified a bounding box or polygon, we assume From 86e220bd55495463b17c021a2632b6991e0e4c05 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Thu, 16 Oct 2025 11:32:31 +0200 Subject: [PATCH 21/25] * Remove get-polygon functionality from update-osm --- src/qlever/commands/update_osm.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/qlever/commands/update_osm.py b/src/qlever/commands/update_osm.py index 69d4fed46..45bb61931 100644 --- a/src/qlever/commands/update_osm.py +++ b/src/qlever/commands/update_osm.py @@ -136,12 +136,6 @@ def handle_ctrl_z(self, args, signal_received, frame): raise UserInterruptException() def execute(self, args) -> bool: - # If the '--get-polygon' flag is set, we download the polygon and - # return. - if args.get_polygon: - self.get_polygon(args) - return True - # If the user has specified a replication server, use that one, # otherwise we use the planet replication server with the specified # granularity. From 678836d132100b450d32039ed0f7772fe318273d Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Fri, 31 Oct 2025 17:48:33 +0100 Subject: [PATCH 22/25] * Add `tmp` option to olu command --- src/qlever/commands/update_osm.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/qlever/commands/update_osm.py b/src/qlever/commands/update_osm.py index 45bb61931..9358e566d 100644 --- a/src/qlever/commands/update_osm.py +++ b/src/qlever/commands/update_osm.py @@ -102,6 +102,12 @@ def additional_arguments(self, subparser) -> None: help="The name of the file containing the polygon for an OSM " "extract", ) + subparser.add_argument( + "--tmp", + type=str, + default="olu_tmp", + help="The directory to use for temporary files created by olu", + ) # Handle Ctrl+C gracefully by finishing the current update and then # exiting. @@ -204,6 +210,10 @@ def execute(self, args) -> bool: ) return False + # Create the temporary directory for olu if it does not exist yet. + if not os.path.exists(args.tmp): + os.makedirs(args.tmp) + # Pull the latest image for osm-live-updates if remote image is used. if pull_cmd: run_command(pull_cmd) @@ -269,6 +279,7 @@ def construct_olu_cmd(self, replication_server_url: str, args) -> str: olu_cmd += f" --access-token {args.access_token}" olu_cmd += f" --replication-server {replication_server_url}" olu_cmd += f" --qlever" + olu_cmd += f" --tmp {args.tmp}" # If the user has specified a boundary, we add it to the command. if args.bbox and args.polygon: From 846ee342f4761e5cf0c319845fe66d997592db11 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Sat, 1 Nov 2025 12:59:25 +0100 Subject: [PATCH 23/25] * Add `olu-statistics` option to olu command --- src/qlever/commands/update_osm.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/qlever/commands/update_osm.py b/src/qlever/commands/update_osm.py index 9358e566d..1a642ed89 100644 --- a/src/qlever/commands/update_osm.py +++ b/src/qlever/commands/update_osm.py @@ -108,6 +108,13 @@ def additional_arguments(self, subparser) -> None: default="olu_tmp", help="The directory to use for temporary files created by olu", ) + subparser.add_argument( + "--olu-statistics", + action='store_true', + default=False, + help="If set, olu will print extensive statistics about the update" + " process", + ) # Handle Ctrl+C gracefully by finishing the current update and then # exiting. @@ -281,6 +288,9 @@ def construct_olu_cmd(self, replication_server_url: str, args) -> str: olu_cmd += f" --qlever" olu_cmd += f" --tmp {args.tmp}" + if args.olu-statistics: + olu_cmd += f" --statistics" + # If the user has specified a boundary, we add it to the command. if args.bbox and args.polygon: raise ValueError("You cannot specify both --bbox and --polygon. " From 55a1ea31bd41d4b6b2517c06e86e3bad9f4a2883 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Sat, 1 Nov 2025 13:01:48 +0100 Subject: [PATCH 24/25] * Fix for `olu-statistics` option --- src/qlever/commands/update_osm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/qlever/commands/update_osm.py b/src/qlever/commands/update_osm.py index 1a642ed89..01e958a04 100644 --- a/src/qlever/commands/update_osm.py +++ b/src/qlever/commands/update_osm.py @@ -288,7 +288,7 @@ def construct_olu_cmd(self, replication_server_url: str, args) -> str: olu_cmd += f" --qlever" olu_cmd += f" --tmp {args.tmp}" - if args.olu-statistics: + if args.olu_statistics: olu_cmd += f" --statistics" # If the user has specified a boundary, we add it to the command. From 5046add6d8756039431be0f0f0a7dc13d7084311 Mon Sep 17 00:00:00 2001 From: Nicolas von Trott Date: Mon, 10 Nov 2025 17:06:35 +0100 Subject: [PATCH 25/25] * Use the correct hostname when working with Docker on macOS --- src/qlever/commands/update_osm.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/qlever/commands/update_osm.py b/src/qlever/commands/update_osm.py index 01e958a04..61efd8e4e 100644 --- a/src/qlever/commands/update_osm.py +++ b/src/qlever/commands/update_osm.py @@ -4,6 +4,7 @@ import signal import subprocess import shlex +from sys import platform import time from typing import Optional @@ -279,7 +280,13 @@ def execute(self, args) -> bool: return False def construct_olu_cmd(self, replication_server_url: str, args) -> str: - sparql_endpoint = f"http://{args.host_name}:{args.port}" + if args.system == "docker" and platform == "darwin": + # When using Docker on macOS, we need to use 'host.docker.internal' + # to access the host machine from within a Docker container. + sparql_endpoint = f"http://host.docker.internal:{args.port}" + else: + sparql_endpoint = f"http://{args.host_name}:{args.port}" + container_name = f"olu-{args.name}" olu_cmd = f"{sparql_endpoint}"