Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ Github = "https://github.com/ad-freiburg/qlever"

[project.scripts]
"qlever" = "qlever.qlever_main:main"
"qblazegraph" = "qlever.qlever_main:main"
"qoxigraph" = "qlever.qlever_main:main"
"qlever-old" = "qlever.qlever_old:main"

[tool.setuptools]
license-files = ["LICENSE"]
Expand Down
23 changes: 23 additions & 0 deletions src/qblazegraph/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Use an official OpenJDK runtime as a parent image
FROM openjdk:21-jdk-slim

RUN apt-get update && apt-get install -y wget gzip coreutils curl unzip

WORKDIR /opt

# Download and set up Blazegraph
RUN wget https://github.com/blazegraph/database/releases/download/BLAZEGRAPH_2_1_6_RC/blazegraph.jar

# Set ownership to the user passed by UID and GID
ARG UID
ARG GID
RUN if [ "${UID:-}" != "" ] && [ "${GID:-}" != "" ]; then \
chown -R ${UID}:${GID} /opt; \
fi

RUN chmod u+x blazegraph.jar

# Expose Blazegraph port
# EXPOSE 9999

CMD ["bash"]
Empty file added src/qblazegraph/__init__.py
Empty file.
28 changes: 28 additions & 0 deletions src/qblazegraph/blazegraph.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# This configuration turns off incremental inference for load and retract, so
# you must explicitly force these operations if you want to compute the closure
# of the knowledge base. Forcing the closure requires punching through the SAIL
# layer. Of course, if you are not using inference then this configuration is
# just the ticket and is quite fast.

# set the initial and maximum extent of the journal
com.bigdata.journal.AbstractJournal.initialExtent=209715200
com.bigdata.journal.AbstractJournal.maximumExtent=209715200

# turn off automatic inference in the SAIL
com.bigdata.rdf.sail.truthMaintenance=false

# don't store justification chains, meaning retraction requires full manual
# re-closure of the database
com.bigdata.rdf.store.AbstractTripleStore.justify=false

# turn off the statement identifiers feature for provenance
com.bigdata.rdf.store.AbstractTripleStore.statementIdentifiers=false
com.bigdata.rdf.store.AbstractTripleStore.quads=false
# turn off the free text index
com.bigdata.rdf.store.AbstractTripleStore.textIndex=true
com.bigdata.rdf.store.AbstractTripleStore.axiomsClass=com.bigdata.rdf.axioms.NoAxioms
com.bigdata.rdf.store.DataLoader.commit=Incremental
com.bigdata.rdf.store.DataLoader.closure=None

com.bigdata.journal.AbstractJournal.bufferMode=DiskRW
com.bigdata.journal.AbstractJournal.file=blazegraph.jnl
Empty file.
18 changes: 18 additions & 0 deletions src/qblazegraph/commands/example_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from __future__ import annotations

from qlever.commands.example_queries import (
ExampleQueriesCommand as QleverExampleQueriesCommand,
)


class ExampleQueriesCommand(QleverExampleQueriesCommand):
def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
return {"data": ["name"], "server": ["port"], "ui": ["ui_config"]}

def execute(self, args) -> bool:
if not args.sparql_endpoint:
args.sparql_endpoint = (
f"{args.host_name}:{args.port}/blazegraph/namespace/"
f"{args.name}/sparql"
)
return super().execute(args)
1 change: 1 addition & 0 deletions src/qblazegraph/commands/extract_queries.py
1 change: 1 addition & 0 deletions src/qblazegraph/commands/get_data.py
170 changes: 170 additions & 0 deletions src/qblazegraph/commands/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
from __future__ import annotations

import glob
import shlex
from pathlib import Path

from qlever.command import QleverCommand
from qlever.containerize import Containerize
from qlever.log import log
from qlever.util import run_command


class IndexCommand(QleverCommand):
def __init__(self):
self.script_name = "qblazegraph"

def description(self) -> str:
return "Build the index for a given RDF dataset"

def should_have_qleverfile(self) -> bool:
return True

def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
return {
"data": ["name", "format"],
"index": ["input_files", "java_heap_gb"],
"runtime": ["system", "image", "index_container"],
}

def additional_arguments(self, subparser):
subparser.add_argument(
"--blazegraph-jar",
type=str,
default="blazegraph.jar",
help=(
"Path to blazegraph.jar file (default: blazegraph.jar) "
"(this requires that you have Java installed and blazegraph.jar "
"downloaded on your machine)"
),
)

@staticmethod
def build_image(build_cmd: str, system: str, image: str) -> bool:
try:
run_command(build_cmd, show_output=True)
return True
except Exception as e:
log.error(f"Building the {system} image {image} failed: {e}")
return False

@staticmethod
def wrap_cmd_in_container(args, cmd: str) -> str:
return Containerize().containerize_command(
cmd=cmd,
container_system=args.system,
run_subcommand="run --rm",
image_name=args.image,
container_name=args.index_container,
volumes=[("$(pwd)", "/opt/index")],
working_directory="/opt/index",
)

def execute(self, args) -> bool:
system = args.system
input_files = args.input_files

jar_path = (
args.blazegraph_jar
if args.system == "native"
else "/opt/blazegraph.jar"
)

index_cmd = (
f"java -Xmx{args.java_heap_gb}g -XX:+UseG1GC -cp {jar_path} "
"com.bigdata.rdf.store.DataLoader -verbose "
f"-namespace {args.name} blazegraph.properties {input_files}"
)
index_cmd += f" | tee {args.name}.index-log.txt"

if args.system == "native":
cmd_to_show = index_cmd
else:
index_cmd = self.wrap_cmd_in_container(args, index_cmd)
dockerfile_dir = Path(__file__).parent.parent
dockerfile_path = dockerfile_dir / "Dockerfile"
build_cmd = (
f"{system} build -f {dockerfile_path} -t {args.image} --build-arg "
f"UID=$(id -u) --build-arg GID=$(id -g) {dockerfile_dir}"
)
image_id = run_command(
f"{system} images -q {args.image}", return_output=True
)
cmd_to_show = (
f"{build_cmd}\n\n{index_cmd}" if not image_id else index_cmd
)

# Show the command line.
self.show(cmd_to_show, only_show=args.show)
if args.show:
return True

# Check if all of the input files exist.
for pattern in shlex.split(input_files):
if len(glob.glob(pattern)) == 0:
log.error(f'No file matching "{pattern}" found')
log.info("")
log.info(
f"Did you call `{self.script_name} get-data`? If you did, "
"check GET_DATA_CMD and INPUT_FILES in the Qleverfile"
)
return False

# When running natively, check if the binary exists and works.
if args.system == "native":
try:
run_command("java --help")
except Exception as e:
log.error(f"Java not found on the machine! - {e}")
log.info(
"Blazegraph needs Java to execute the blazegraph.jar file"
)
return False
if not Path(args.blazegraph_jar).exists():
jar_link = (
"https://github.com/blazegraph/database/releases/download/"
"BLAZEGRAPH_2_1_6_RC/blazegraph.jar"
)
log.error(
"Couldn't find the blazegraph.jar in specified path: "
f"{Path(args.blazegraph_jar).absolute()}\n"
)
log.info(
"Are you sure you downloaded the blazegraph.jar file? "
f"blazegraph.jar can be downloaded from {jar_link}"
)
return False
else:
if Containerize().is_running(args.system, args.index_container):
log.info(
f"{args.system} container {args.index_container} is still up, "
"which means that data loading is in progress. Please wait..."
)
return False

if not image_id:
build_successful = self.build_image(
build_cmd, system, args.image
)
if not build_successful:
return False
else:
log.info(f"{args.image} image present on the system\n")

# index_dir = Path("blazegraph.jnl")
# if index_dir.exists() and any(index_dir.iterdir()):
# log.error(
# "Blazegraph journal blazegraph.jnl found in current working "
# "directory which shows presence of a previous index\n"
# )
# log.info("Aborting the index operation...")
# return False

# Run the index command.
try:
run_command(index_cmd, show_output=True)
except Exception as e:
log.error(f"Building the index failed: {e}")
return False

return True
1 change: 1 addition & 0 deletions src/qblazegraph/commands/log.py
16 changes: 16 additions & 0 deletions src/qblazegraph/commands/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from __future__ import annotations

from qoxigraph.commands.query import QueryCommand as QoxigraphQueryCommand


class QueryCommand(QoxigraphQueryCommand):
def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
return {"data": ["name"], "server": ["port", "access_token"]}

def execute(self, args) -> bool:
if not args.sparql_endpoint:
args.sparql_endpoint = (
f"localhost:{args.port}/blazegraph/namespace/"
f"{args.name}/sparql"
)
super().execute(args)
69 changes: 69 additions & 0 deletions src/qblazegraph/commands/setup_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from __future__ import annotations

import shutil
from pathlib import Path

from qlever.log import log
from qoxigraph.commands.setup_config import (
SetupConfigCommand as QoxigraphSetupConfigCommand,
)


class SetupConfigCommand(QoxigraphSetupConfigCommand):
"""
Should behave exactly the same as setup-config command in qoxigraph,
just with a different Docker image name
"""

IMAGE = "adfreiburg/qblazegraph"

def execute(self, args) -> bool:
qleverfile_path = Path("Qleverfile")
exit_status = self.validate_qleverfile_setup(args, qleverfile_path)
if exit_status is not None:
return exit_status

qleverfile_parser = self.get_filtered_qleverfile_parser(
args.config_name
)
# Add the java_heap_gb to index and server sections
qleverfile_parser.set("index", "JAVA_HEAP_GB", 6)
qleverfile_parser.set("server", "JAVA_HEAP_GB", 6)

# Copy the Qleverfile to the current directory.
try:
with qleverfile_path.open("w") as f:
qleverfile_parser.write(f)
except Exception as e:
log.error(
f'Could not copy "{qleverfile_path}" to current directory: {e}'
)
return False

# If we get here, everything went well.
log.info(
f'Created Qleverfile for config "{args.config_name}"'
f" in current directory"
)
log.info("")

log.info("Fetching blazegraph.properties file...")
properties_file_path = (
Path(__file__).parent.parent / "blazegraph.properties"
)
destination = Path("blazegraph.properties")
try:
shutil.copy(properties_file_path, destination)
log.info("Copied blazegraph.properties to current directory!")
return True
except Exception as e:
file_url = (
"https://github.com/ad-freiburg/qlever-control/tree/main/src/"
"qblazegraph/blazegraph.properties"
)
log.error(
"Couldn't copy blazegraph.properties file to current working "
f"directory! Error: {e}\n"
)
log.info(f"Download it manually from {file_url}")
return False
Loading