Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,11 @@ jobs:
- name: Run BHE job scheduling test
run: |
.venv/bin/pytest tests/test_bhe_job_scheduling.py -v

- name: Run preprocess lookup generation tests
run: |
.venv/bin/pytest tests/test_preproc.py -v

- name: Run DuckDB lookup exception handling tests
run: |
.venv/bin/pytest tests/test_lookup.py -v
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ local_scheme = "no-local-version"

[dependency-groups]
dev = [
"openhound-faker==0.0.4",
"openhound-faker==0.0.6",
"ipython>=9.12.0",
"pre-commit>=4.5.1",
"pytest>=9.0.1",
Expand Down
35 changes: 29 additions & 6 deletions src/openhound/core/lookup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import logging

import duckdb
from duckdb import DuckDBPyConnection

logger = logging.getLogger(__name__)


class LookupManager:
def __init__(self, client: DuckDBPyConnection, schema: str):
Expand All @@ -18,16 +23,34 @@ def _find_all_objects(self, *args) -> list:
Returns:
list: Query result rows as a list of tuples.
"""
self.client.execute(*args)
results = self.client.fetchall()
return results
try:
self.client.execute(*args)
results = self.client.fetchall()
return results

except duckdb.CatalogException as err:
logger.error("DuckDB lookup failed, missing table: %s", err)
return []

except duckdb.Error as err:
logger.error("DuckDB lookup query failed: %s", err)
return []

def _find_single_object(self, *args) -> str | None:
"""Execute a query and return the ID of the matching row

Returns:
str | None: The first column (ie. ID) value as a string or None if no result is found
"""
self.client.execute(*args)
result = self.client.fetchone()
return str(result[0]) if result else None
try:
self.client.execute(*args)
result = self.client.fetchone()
return str(result[0]) if result else None

except duckdb.CatalogException as err:
logger.error("DuckDB lookup failed, missing table: %s", err)
return None

except duckdb.Error as err:
logger.error("DuckDB lookup query failed: %s", err)
return None
34 changes: 34 additions & 0 deletions src/openhound/core/preproc.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from dataclasses import dataclass
from pathlib import Path
from typing import Callable
Expand All @@ -12,6 +13,33 @@
from openhound.core.progress import Progress
from openhound.sources.resource_files.source import resource_files

logger = logging.getLogger(__name__)


def run_transform(
transform: Callable[..., None],
con: duckdb.DuckDBPyConnection,
*args,
**kwargs,
) -> None:
"""A transformer helper function that handles DuckDB exceptions when generating a lookup"""
try:
transform(con, *args, **kwargs)

except duckdb.CatalogException as err:
logger.error(
"DuckDB preprocessing transform '%s' failed due to missing table: %s",
transform.__name__,
err,
)

except duckdb.Error as err:
logger.error(
"DuckDB preprocessing transform '%s' failed: %s",
transform.__name__,
err,
)


class PreProcessor(BasePipeline):
def __init__(
Expand Down Expand Up @@ -66,6 +94,12 @@ def run(
con = duckdb.connect(str(self.output_file))
try:
self.transformer(con)
except duckdb.CatalogException as err:
logger.error(
"DuckDB preprocessing failed due to missing table: %s", err
)
except duckdb.Error as err:
logger.error("DuckDB preprocessing failed: %s", err)
finally:
con.close()

Expand Down
41 changes: 41 additions & 0 deletions tests/test_lookup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import logging

import duckdb

from openhound.core.lookup import LookupManager


def test_find_single_object_returns_none_on_duckdb_error(caplog):
client = duckdb.connect(":memory:")
lookup = LookupManager(client, "main")
caplog.set_level(logging.ERROR, logger="openhound.core.lookup")

try:
result = lookup._find_single_object("SELECT id FROM missing_table")
finally:
client.close()

assert result is None
assert any(
"DuckDB lookup failed, missing table:" in record.message
and "missing_table" in record.message
for record in caplog.records
)


def test_find_all_objects_returns_empty_list_on_duckdb_error(caplog):
client = duckdb.connect(":memory:")
lookup = LookupManager(client, "main")
caplog.set_level(logging.ERROR, logger="openhound.core.lookup")

try:
result = lookup._find_all_objects("SELECT id FROM missing_table")
finally:
client.close()

assert result == []
assert any(
"DuckDB lookup failed, missing table:" in record.message
and "missing_table" in record.message
for record in caplog.records
)
93 changes: 93 additions & 0 deletions tests/test_preproc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import logging
import os
from pathlib import Path

import duckdb

os.environ["RUNTIME__LOG_PATH"] = "/tmp/openhound-test-logs"

from openhound.core.app import DEFAULT_LOOKUP_FILE, OpenHound
from openhound.core.preproc import PreProcessor, run_transform
from openhound.core.progress import Progress


def test_preproc_uses_default_lookup_file(monkeypatch, tmp_path):
captured: dict[str, Path] = {}

def fake_run(self, resources, filters=None):
captured["output_file"] = self.output_file
captured["resources"] = resources
return "ok"

monkeypatch.setattr(PreProcessor, "run", fake_run)

app = OpenHound("test", "test")

@app.preproc()
def preprocess(ctx):
return {"resource": "resource"}

result = app.preprocessor( # type: ignore[misc]
input_path=tmp_path,
progress=Progress.log,
)

assert result == "ok"
assert captured["output_file"] == DEFAULT_LOOKUP_FILE
assert captured["resources"] == {"resource": "resource"}


def test_preproc_logs_duckdb_transform_errors(monkeypatch, tmp_path, caplog):
def fake_run(self, source, **kwargs):
return "ok"

def missing_table_transform(con: duckdb.DuckDBPyConnection):
con.execute("SELECT * FROM missing_table")

monkeypatch.setattr(PreProcessor, "_run", fake_run)
caplog.set_level(logging.ERROR, logger="openhound.core.preproc")

preprocessor = PreProcessor(
name="test",
input_path=tmp_path,
output_file=tmp_path / "lookup.duckdb",
transformer=missing_table_transform,
)

result = preprocessor.run(resources={"resource": "resource"})

assert result == "ok"
assert any(
"DuckDB preprocessing failed due to missing table:" in record.message
and "missing_table" in record.message
for record in caplog.records
)


def test_run_transform_logs_transform_name_and_continues(caplog):
called: list[str] = []

def missing_table_transform(con: duckdb.DuckDBPyConnection):
called.append("missing")
con.execute("SELECT * FROM missing_table")

def successful_transform(con: duckdb.DuckDBPyConnection):
called.append("successful")
con.execute("SELECT 1")

con = duckdb.connect(":memory:")
caplog.set_level(logging.ERROR, logger="openhound.core.preproc")

try:
run_transform(missing_table_transform, con)
run_transform(successful_transform, con)
finally:
con.close()

assert called == ["missing", "successful"]
assert any(
"DuckDB preprocessing transform 'missing_table_transform' failed due to missing table:"
in record.message
and "missing_table" in record.message
for record in caplog.records
)
34 changes: 0 additions & 34 deletions tests/test_preproc_default_lookup.py

This file was deleted.

Loading