From 70f731ce983d3d26a9442608c29572f819dd965f Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 16:43:57 +0200 Subject: [PATCH 01/19] Add notebook CLI command --- src/openhound/cli/notebooks.py | 57 ++++++++++++++++++++++++++++++++++ src/openhound/main.py | 2 ++ 2 files changed, 59 insertions(+) create mode 100644 src/openhound/cli/notebooks.py diff --git a/src/openhound/cli/notebooks.py b/src/openhound/cli/notebooks.py new file mode 100644 index 0000000..be301a8 --- /dev/null +++ b/src/openhound/cli/notebooks.py @@ -0,0 +1,57 @@ +from pathlib import Path +from typing import Annotated, Literal + +import typer + +BASE_PATH = Path(__file__).resolve().parents[1] / "notebooks" + +NOTEBOOKS = { + "pipeline": BASE_PATH / "pipeline.py", +} + +notebooks_app = typer.Typer(help="Start OpenHound Marimo notebooks") + + +@notebooks_app.command() +def start( + notebook: Annotated[ + Literal["pipeline"], typer.Argument(help="Notebook to start") + ] = "pipeline", + host: Annotated[ + str, + typer.Option("--host", "-h", help="Host for the Marimo server"), + ] = "127.0.0.1", + port: Annotated[ + int, + typer.Option("--port", "-p", help="Port for the Marimo server"), + ] = 2718, + log_level: Annotated[ + Literal["critical", "error", "warning", "info"], + typer.Option("--log-level", "-l", help="Uvicorn logging level"), + ] = "info", +): + """Start one of the bundled OpenHound Marimo notebooks.""" + from rich.console import Console + + console = Console() + try: + import uvicorn + from fastapi import FastAPI + from marimo import create_asgi_app + + except ImportError: + console.print( + "[red]Error:[/red] Marimo is not installed. Install OpenHound with Marimo extras using openhound\\[notebook] [red]" + ) + raise typer.Exit(1) + + notebook_path = NOTEBOOKS[notebook] + server = create_asgi_app().with_app(path="/", root=str(notebook_path)) + + app = FastAPI() + app.mount("/", server.build()) + + console.print( + "[bold green]Starting notebook server, press CTL+C twice to stop[/bold green]" + ) + uvicorn.run(app, host=host, port=port, log_level=log_level) diff --git a/src/openhound/main.py b/src/openhound/main.py index 328aa36..23cb64e 100644 --- a/src/openhound/main.py +++ b/src/openhound/main.py @@ -6,6 +6,7 @@ from openhound.cli.collect import collect from openhound.cli.convert import convert from openhound.cli.create import create_app +from openhound.cli.notebooks import notebooks_app from openhound.cli.override import TyperOverride from openhound.cli.preproc import preprocess from openhound.cli.privilege_zone import privilege_zone @@ -22,6 +23,7 @@ app.add_typer(create_app, name="create") app.add_typer(saved_searches, name="searches") app.add_typer(privilege_zone, name="rules") +app.add_typer(notebooks_app, name="notebooks") if __name__ == "__main__": app() From 37a08fed8f2ba4b2b792f6ee167383c9255a161b Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 16:44:13 +0200 Subject: [PATCH 02/19] Add pipeline inspection notebook --- src/openhound/notebooks/pipeline.py | 358 ++++++++++++++++++++++++++++ 1 file changed, 358 insertions(+) create mode 100644 src/openhound/notebooks/pipeline.py diff --git a/src/openhound/notebooks/pipeline.py b/src/openhound/notebooks/pipeline.py new file mode 100644 index 0000000..1ecadd8 --- /dev/null +++ b/src/openhound/notebooks/pipeline.py @@ -0,0 +1,358 @@ +import marimo + +__generated_with = "0.23.4" +app = marimo.App(width="medium") + + +@app.cell +def _(): + from pathlib import Path + from dlt._workspace.cli.utils import list_local_pipelines + from dlt._workspace.helpers.dashboard.utils.pipeline import get_pipeline + from dlt._workspace.helpers.dashboard.utils.visualization import ( + load_package_status_labels, + ) + from dataclasses import dataclass + from datetime import datetime + from openhound.core.app import DEFAULT_LOOKUP_FILE + from openhound.core.manager import CollectorManager + import altair as alt + import duckdb + import marimo as mo + import polars as pl + import os + + return ( + CollectorManager, + DEFAULT_LOOKUP_FILE, + Path, + alt, + dataclass, + datetime, + duckdb, + get_pipeline, + list_local_pipelines, + load_package_status_labels, + mo, + os, + pl, + ) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # OpenHound Pipeline Dashboard + + Inspect OpenHound collection performance and preview their OpenGraph node representation. + Select a completed `*_collect` pipelin and choose a schema and table to inspect. The matching OpenHound extension is selected automatically based on the schema name. For extensions that use lookup data during conversion, run `preprocess` first so `lookup.duckdb` is available. + """) + return + + +@app.cell +def _(Path): + DEFAULT_PIPELINE_PATH = Path("~/.dlt/pipelines").expanduser() + return (DEFAULT_PIPELINE_PATH,) + + +@app.cell +def _(DEFAULT_PIPELINE_PATH, list_local_pipelines, mo): + dlt_pipeline_dir, all_dlt_pipelines = list_local_pipelines(DEFAULT_PIPELINE_PATH) + selected_pipeline = mo.ui.dropdown( + options=[pipeline["name"] for pipeline in all_dlt_pipelines if pipeline["name"].endswith("collect")], + label="Choose pipeline", + ) + selected_pipeline + return (selected_pipeline,) + + +@app.cell +def _(DEFAULT_PIPELINE_PATH, get_pipeline, mo, selected_pipeline): + # TODO: This has to be modified to utils.pipeline() when updating to the latest version of DLT dashboards + mo.stop(not selected_pipeline.value, "Select a pipeline to continue") + dlt_pipeline = get_pipeline(selected_pipeline.value, DEFAULT_PIPELINE_PATH) + return (dlt_pipeline,) + + +@app.cell +def _(dataclass, datetime, dlt_pipeline, pl): + last_trace = dlt_pipeline.last_trace + + pipeline_success = True + all_traces = [] + + @dataclass + class TraceStep: + name: str + started_at: datetime + finished_at: datetime + duration_ms: float + pipeline: str = "last" + + for step in last_trace.steps: + if step.step_exception is not None: + pipeline_success = False + + if not step.step == "run": + all_traces.append( + TraceStep( + name=step.step, + started_at=step.started_at, + finished_at=step.finished_at, + duration_ms=(step.finished_at - step.started_at).total_seconds() + * 1000, + ) + ) + traces_df = pl.DataFrame(all_traces) + return last_trace, pipeline_success, traces_df + + +@app.cell +def _(last_trace, load_package_status_labels): + _ = load_package_status_labels(last_trace) + return + + +@app.cell +def _(alt, mo, traces_df): + pipeline_duration_chart = mo.ui.altair_chart( + alt.Chart(traces_df) + .mark_bar() + .encode( + x="duration_ms", + y="pipeline", + color="name", + ).properties(height=30, width="container") + ) + return (pipeline_duration_chart,) + + +@app.cell +def _( + dlt_pipeline, + mo, + pipeline_duration_chart, + pipeline_success, + selected_pipeline, +): + trace_title = mo.md(f"## Pipeline stats: {selected_pipeline.value}") + pipeline_destination = mo.stat( + value=dlt_pipeline.destination.destination_type, label="Destination" + ) + pipeline_status = mo.stat( + value="Success" if pipeline_success else "Failed", label="Status" + ) + last_dataset = mo.stat(value=dlt_pipeline.dataset_name, label="Last dataset") + pipeline_basic_state = mo.hstack( + [pipeline_status, last_dataset, pipeline_destination], gap="2rem" + ) + pipeline_basic_stats = mo.vstack( + [trace_title, pipeline_basic_state, pipeline_duration_chart] + ) + pipeline_basic_stats + return + + +@app.cell +def _(dlt_pipeline, mo): + # Available schemas + selected_schema = mo.ui.dropdown( + options=dlt_pipeline.schema_names, label="Choose schema" + ) + return (selected_schema,) + + +@app.cell +def _(dlt_pipeline, mo, selected_schema): + # Load the dataset based on the selected schema + dlt_dataset = dlt_pipeline.dataset(schema=selected_schema.value) + + # Available tables for schema, excluding the built in _dlt tables + dataset_tables = [ + table for table in dlt_dataset.tables if not table.startswith("_dlt") + ] + selected_table = mo.ui.dropdown(options=dataset_tables, label="Choose a table") + return dlt_dataset, selected_table + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Dataset preview + Select a dataset and table to inspect the resource schema and show a preview of the collected resources + """) + return + + +@app.cell +def _(mo, selected_schema, selected_table): + data_filters = mo.hstack([selected_schema, selected_table]) + data_filters + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Table schema + The table schema displays the available columns and their datatypes + """) + return + + +@app.cell +def _(dlt_dataset, dlt_pipeline, mo, os, pl, selected_table): + mo.stop(not selected_table.value, "Select a table top continue") + last_load_info = dlt_pipeline.last_trace.last_load_info.asdict() + last_fs_destination = last_load_info["destination_displayable_credentials"] + os.environ["BUCKET_URL"] = last_fs_destination + dlt_table = dlt_dataset.table(table_name=selected_table.value) + available_columns_df = pl.DataFrame(list(dlt_table.schema["columns"].values())) + available_columns_df + return (last_fs_destination,) + + +@app.cell +def _(Path, last_fs_destination, pl, selected_schema, selected_table): + dataset_path = ( + Path(last_fs_destination.replace("file://", "")) + / selected_schema.value + / selected_table.value + ) + table_df = pl.read_ndjson(dataset_path) + return (table_df,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Data preview + Select a sample count to preview the collected resources and the OpenGraph representation. + """) + return + + +@app.cell +def _(CollectorManager): + available_collectors = CollectorManager.from_entrypoint(load_sources=True) + collector_options = { + collector.name: collector for collector in available_collectors.collectors + } + return (collector_options,) + + +@app.cell +def _(collector_options, mo, selected_schema): + mo.stop(not selected_schema.value, "Select a schema") + mo.stop( + selected_schema.value not in collector_options, + f"No loaded extension matches schema '{selected_schema.value}'", + ) + + selected_model = mo.ui.dropdown( + collector_options.keys(), value=selected_schema.value, label="Extension" + ) + return (selected_model,) + + +@app.cell +def _(mo, table_df): + mo.stop(table_df.height == 0, "Selected table has no rows") + max_sample_count = min(100, table_df.height) + sample_count = mo.ui.slider( + start=1, + stop=max_sample_count, + label=f"Sample count (max {max_sample_count})", + value=min(20, max_sample_count), + ) + return (sample_count,) + + +@app.cell +def _(mo, sample_count, selected_model): + mo.hstack([selected_model, sample_count]) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + #### Raw resource + """) + return + + +@app.cell +def _(mo, sample_count, table_df): + mo.stop(not sample_count.value, "Select a sample count") + mo.stop(table_df.height == 0, "Selected table has no rows") + sample_df = table_df.sample(n=min(sample_count.value, table_df.height)) + sample_df + return (sample_df,) + + +@app.cell +def _(collector_options, mo, selected_model): + mo.stop(not selected_model.value, "Select an extension") + selected_extension = collector_options[selected_model.value] + return (selected_extension,) + + +@app.cell +def _(selected_extension): + extension_dlt_resources = selected_extension.dlt_resources + table_to_asset = { + resource.table_name: resource.validator.model + for resource in extension_dlt_resources + if resource.validator and resource.validator.model in selected_extension.assets + } + return (table_to_asset,) + + +@app.cell +def _(DEFAULT_LOOKUP_FILE, duckdb, mo, selected_extension): + lookup_session = None + if selected_extension.lookup_factory: + mo.stop( + not DEFAULT_LOOKUP_FILE.exists(), + f"Run preproc before previewing graph output. Missing lookup file: {DEFAULT_LOOKUP_FILE}", + ) + lookup_client = duckdb.connect(str(DEFAULT_LOOKUP_FILE), read_only=True) + lookup_session = selected_extension.lookup_factory(lookup_client) + return (lookup_session,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + #### As node + """) + return + + +@app.cell +def _(lookup_session, mo, pl, sample_df, selected_table, table_to_asset): + mo.stop( + selected_table.value not in table_to_asset, + f"Selected table '{selected_table.value}' is not mapped to an OpenHound asset", + ) + + def as_node(row, model): + parsed_model = model.model_validate(row) + parsed_model._lookup = lookup_session + parsed_model._extras = {} + return parsed_model.as_node + + as_node_df = pl.DataFrame( + [ + as_node(row, table_to_asset[selected_table.value]) + for row in sample_df.iter_rows(named=True) + ] + ) + as_node_df + return + + +if __name__ == "__main__": + app.run() From 2550212009d76cc0bd38389adee1a2f1c3277457 Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 16:45:14 +0200 Subject: [PATCH 03/19] Added additional _load_extension_source which loads the source module in order to inspect all discovered dlt sources/resources/transformers (this is optional) --- src/openhound/core/manager.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/openhound/core/manager.py b/src/openhound/core/manager.py index 89a0687..8dc64c4 100644 --- a/src/openhound/core/manager.py +++ b/src/openhound/core/manager.py @@ -137,7 +137,11 @@ def validate_metadata(extension: EntryPoint) -> tuple[bool, Extension | None]: return False, None @classmethod - def from_entrypoint(cls, group: str = "openhound.sources") -> "CollectorManager": + def from_entrypoint( + cls, + group: str = "openhound.sources", + load_sources: bool = False, + ) -> "CollectorManager": discover_extension = entry_points(group=group) extensions: list[OpenHound] = [] for extension in discover_extension: @@ -149,6 +153,9 @@ def from_entrypoint(cls, group: str = "openhound.sources") -> "CollectorManager" ) load_extension: OpenHound = extension.load() + if load_sources: + cls._load_extension_source(extension) + is_valid_extension = cls.validate_extension(load_extension, extension.name) if is_valid_extension: load_extension.metadata = metadata @@ -163,3 +170,17 @@ def from_entrypoint(cls, group: str = "openhound.sources") -> "CollectorManager" extra={"extension": extension.name, "phase": "extension_loading"}, ) return cls(collectors=extensions) + + @staticmethod + def _load_extension_source(extension: EntryPoint) -> None: + parent_module_name = extension.module.rsplit(".", 1)[0] + source_module_name = f"{parent_module_name}.source" + try: + import_module(source_module_name) + except ModuleNotFoundError as err: + if err.name != source_module_name: + raise + logger.warning( + f"Extension '{extension.name}' does not have a source module '{source_module_name}'", + extra={"extension": extension.name, "phase": "extension_loading"}, + ) From 3753149c00127b9350ed1e2fcf34dfa1873e04d6 Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 16:45:29 +0200 Subject: [PATCH 04/19] Add lookup reference to the main openhound instance --- src/openhound/core/app.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/openhound/core/app.py b/src/openhound/core/app.py index 8b66861..ffed156 100644 --- a/src/openhound/core/app.py +++ b/src/openhound/core/app.py @@ -62,6 +62,7 @@ def __init__(self, name: str, source_kind: str, help: str = "OpenGraph collector self.collector: Callable | None = None self.converter: Callable | None = None self.preprocessor: Callable | None = None + self.lookup_factory: Callable | None = None # Store DLT resources/transformers for this source to be used when building the DLT pipeline self.dlt_source: DltSource | None = None @@ -147,6 +148,8 @@ def convert( progress (Literal["tqdm", "log", "alive_progress"], optional): Progress backend. Log is preferred for producteion use and alive_progress for interactive use. """ + self.lookup_factory = lookup + def decorator(func: Callable): def run_convert( input_path: InputPath, From d892cf470452c3d746f84b5cb8e77cf1fa0df7c8 Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 17:04:33 +0200 Subject: [PATCH 05/19] UX improvements to pipeline notebook --- src/openhound/notebooks/pipeline.py | 144 ++++++++++++++++++++-------- 1 file changed, 106 insertions(+), 38 deletions(-) diff --git a/src/openhound/notebooks/pipeline.py b/src/openhound/notebooks/pipeline.py index 1ecadd8..df56c0f 100644 --- a/src/openhound/notebooks/pipeline.py +++ b/src/openhound/notebooks/pipeline.py @@ -12,7 +12,7 @@ def _(): from dlt._workspace.helpers.dashboard.utils.visualization import ( load_package_status_labels, ) - from dataclasses import dataclass + from dataclasses import asdict, dataclass, is_dataclass from datetime import datetime from openhound.core.app import DEFAULT_LOOKUP_FILE from openhound.core.manager import CollectorManager @@ -27,10 +27,12 @@ def _(): DEFAULT_LOOKUP_FILE, Path, alt, + asdict, dataclass, datetime, duckdb, get_pipeline, + is_dataclass, list_local_pipelines, load_package_status_labels, mo, @@ -44,8 +46,9 @@ def _(mo): mo.md(r""" # OpenHound Pipeline Dashboard - Inspect OpenHound collection performance and preview their OpenGraph node representation. - Select a completed `*_collect` pipelin and choose a schema and table to inspect. The matching OpenHound extension is selected automatically based on the schema name. For extensions that use lookup data during conversion, run `preprocess` first so `lookup.duckdb` is available. + Inspect recent OpenHound collection runs and preview the OpenGraph representation of collected resources. + + Select a completed `*_collect` pipeline and choose a schema + resource table to inspect. The matching OpenHound extension is selected automatically from the schema name. For extensions that use lookup data during conversion, run `preprocess` first so `lookup.duckdb` is available. """) return @@ -61,7 +64,8 @@ def _(DEFAULT_PIPELINE_PATH, list_local_pipelines, mo): dlt_pipeline_dir, all_dlt_pipelines = list_local_pipelines(DEFAULT_PIPELINE_PATH) selected_pipeline = mo.ui.dropdown( options=[pipeline["name"] for pipeline in all_dlt_pipelines if pipeline["name"].endswith("collect")], - label="Choose pipeline", + label="Collect pipeline", + full_width=True, ) selected_pipeline return (selected_pipeline,) @@ -120,10 +124,11 @@ def _(alt, mo, traces_df): alt.Chart(traces_df) .mark_bar() .encode( - x="duration_ms", - y="pipeline", - color="name", - ).properties(height=30, width="container") + x=alt.X("duration_ms", title="Duration (ms)"), + y=alt.Y("pipeline", title=None), + color=alt.Color("name", title="Step"), + tooltip=["name", "duration_ms", "started_at", "finished_at"], + ).properties(height=40, width="container") ) return (pipeline_duration_chart,) @@ -136,7 +141,7 @@ def _( pipeline_success, selected_pipeline, ): - trace_title = mo.md(f"## Pipeline stats: {selected_pipeline.value}") + trace_title = mo.md(f"## Pipeline Overview: `{selected_pipeline.value}`") pipeline_destination = mo.stat( value=dlt_pipeline.destination.destination_type, label="Destination" ) @@ -157,8 +162,13 @@ def _( @app.cell def _(dlt_pipeline, mo): # Available schemas + mo.stop(not dlt_pipeline.schema_names, "Selected pipeline has no schemas") selected_schema = mo.ui.dropdown( - options=dlt_pipeline.schema_names, label="Choose schema" + options=dlt_pipeline.schema_names, + value=dlt_pipeline.schema_names[0], + label="Dataset schema", + full_width=True, + allow_select_none=False, ) return (selected_schema,) @@ -172,7 +182,14 @@ def _(dlt_pipeline, mo, selected_schema): dataset_tables = [ table for table in dlt_dataset.tables if not table.startswith("_dlt") ] - selected_table = mo.ui.dropdown(options=dataset_tables, label="Choose a table") + mo.stop(not dataset_tables, f"Schema '{selected_schema.value}' has no resource tables") + selected_table = mo.ui.dropdown( + options=dataset_tables, + value=dataset_tables[0], + label="Resource table", + full_width=True, + allow_select_none=False, + ) return dlt_dataset, selected_table @@ -186,8 +203,23 @@ def _(mo): @app.cell -def _(mo, selected_schema, selected_table): - data_filters = mo.hstack([selected_schema, selected_table]) +def _(matched_extension_stat, mo, selected_schema, selected_table): + context_message = ( + f"Inspecting `{selected_schema.value}.{selected_table.value}` with extension `{selected_schema.value}`." + if selected_schema.value and selected_table.value + else "Select a schema and table to inspect collected resources." + ) + data_filters = mo.vstack( + [ + mo.hstack( + [selected_schema, selected_table, matched_extension_stat], + gap=1, + widths="equal", + ), + mo.md(context_message), + ], + gap=1, + ) data_filters return @@ -202,8 +234,7 @@ def _(mo): @app.cell -def _(dlt_dataset, dlt_pipeline, mo, os, pl, selected_table): - mo.stop(not selected_table.value, "Select a table top continue") +def _(dlt_dataset, dlt_pipeline, os, pl, selected_table): last_load_info = dlt_pipeline.last_trace.last_load_info.asdict() last_fs_destination = last_load_info["destination_displayable_credentials"] os.environ["BUCKET_URL"] = last_fs_destination @@ -244,16 +275,22 @@ def _(CollectorManager): @app.cell def _(collector_options, mo, selected_schema): - mo.stop(not selected_schema.value, "Select a schema") - mo.stop( - selected_schema.value not in collector_options, - f"No loaded extension matches schema '{selected_schema.value}'", - ) - - selected_model = mo.ui.dropdown( - collector_options.keys(), value=selected_schema.value, label="Extension" - ) - return (selected_model,) + matched_extension_name = None + if not selected_schema.value: + matched_extension_stat = mo.callout("Select a schema", kind="info") + elif selected_schema.value not in collector_options: + matched_extension_stat = mo.callout( + f"No loaded extension matches schema `{selected_schema.value}`.", + kind="warn", + ) + else: + matched_extension_name = selected_schema.value + matched_extension_stat = mo.stat( + value=matched_extension_name, + label="Matched extension", + caption="Matched from dataset schema", + ) + return matched_extension_name, matched_extension_stat @app.cell @@ -270,8 +307,8 @@ def _(mo, table_df): @app.cell -def _(mo, sample_count, selected_model): - mo.hstack([selected_model, sample_count]) +def _(mo, sample_count): + mo.hstack([sample_count], widths="equal") return @@ -293,9 +330,9 @@ def _(mo, sample_count, table_df): @app.cell -def _(collector_options, mo, selected_model): - mo.stop(not selected_model.value, "Select an extension") - selected_extension = collector_options[selected_model.value] +def _(collector_options, matched_extension_name, mo): + mo.stop(not matched_extension_name, "No loaded extension matches the selected schema") + selected_extension = collector_options[matched_extension_name] return (selected_extension,) @@ -332,27 +369,58 @@ def _(mo): @app.cell -def _(lookup_session, mo, pl, sample_df, selected_table, table_to_asset): +def _( + asdict, + is_dataclass, + lookup_session, + mo, + pl, + sample_df, + selected_table, + table_to_asset, +): mo.stop( selected_table.value not in table_to_asset, f"Selected table '{selected_table.value}' is not mapped to an OpenHound asset", ) - def as_node(row, model): + def node_to_dict(node): + if hasattr(node, "model_dump"): + return node.model_dump(mode="json") + if is_dataclass(node): + return asdict(node) + return dict(node) + + def as_node_preview(row, model): parsed_model = model.model_validate(row) parsed_model._lookup = lookup_session parsed_model._extras = {} - return parsed_model.as_node - - as_node_df = pl.DataFrame( - [ - as_node(row, table_to_asset[selected_table.value]) + node = parsed_model.as_node + if node is None: + return None + + node_dict = node_to_dict(node) + properties = node_dict.pop("properties", {}) or {} + return {**node_dict, **properties} + + node_preview_rows = [ + preview_row + for preview_row in [ + as_node_preview(row, table_to_asset[selected_table.value]) for row in sample_df.iter_rows(named=True) ] - ) + if preview_row is not None + ] + + as_node_df = pl.DataFrame(node_preview_rows) as_node_df return +@app.cell +def _(): + return + + if __name__ == "__main__": app.run() From f716eaeb0eb52081f6eb267b8087036e85a629b0 Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 17:06:20 +0200 Subject: [PATCH 06/19] Add test that checks if the custom lookup definition is added to the app (OpenHound) instance --- tests/test_convert_lookup_file.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_convert_lookup_file.py b/tests/test_convert_lookup_file.py index 9cf33f8..f819f63 100644 --- a/tests/test_convert_lookup_file.py +++ b/tests/test_convert_lookup_file.py @@ -41,6 +41,19 @@ def convert(ctx): assert captured["lookup_session"] == "lookup-session" +def test_convert_stores_lookup_factory(): + app = OpenHound("test", "test") + + def lookup(client): + return client + + @app.convert(lookup=lookup) + def convert(ctx): + return object(), {} + + assert app.lookup_factory is lookup + + def test_convert_accepts_custom_lookup_file(monkeypatch, tmp_path): captured: dict[str, object] = {} custom_lookup = tmp_path / "custom.duckdb" From 4d57e8c9b356ed451be0673f5b16d26cdf9d4fdf Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 17:06:52 +0200 Subject: [PATCH 07/19] No longer ignore marimo notebooks --- .gitignore | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitignore b/.gitignore index 0819916..24fdcf7 100644 --- a/.gitignore +++ b/.gitignore @@ -5,8 +5,6 @@ site/ addons/ collectors/ -notebooks - output graph logs From a6e35df0a7fd680a901aa0cd0a21d05662c349bd Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 17:08:13 +0200 Subject: [PATCH 08/19] Bump DLT version and add dedicated notebooks dependencies (optional) --- pyproject.toml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 97fd180..5b23304 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ readme = "README.md" requires-python = ">=3.13" dependencies = [ "alive-progress>=3.3.0", - "dlt==1.22.2", + "dlt==1.26.0", "duckdb==1.5.0", "griffe>=1.15.0", "griffe-fieldz>=0.5.0", @@ -37,6 +37,14 @@ okta = [ "openhound-okta==0.1.1", ] +notebooks = [ + "marimo>=0.23.4", + "altair>=6.0.0", + "polars>=1.40.1", + "pyarrow>=24.0.0", + "fastapi>=0.129.0", +] + [project.scripts] openhound = "openhound.main:app" @@ -66,11 +74,8 @@ local_scheme = "no-local-version" [dependency-groups] dev = [ "openhound-faker==0.0.4", - "ipython>=9.12.0", "pre-commit>=4.5.1", "pytest>=9.0.1", - "marimo>=0.23.0", - "altair>=6.0.0", "fastapi>=0.129.0", "zensical>=0.0.23", "ruff>=0.15.4", From a9f324f591ce73683eb2847fa9eab44ef192b9cd Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 17:08:51 +0200 Subject: [PATCH 09/19] Add init to notebooks module --- src/openhound/notebooks/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/openhound/notebooks/__init__.py diff --git a/src/openhound/notebooks/__init__.py b/src/openhound/notebooks/__init__.py new file mode 100644 index 0000000..e69de29 From 48aee1240e10f78315eb3d92b8be7469edf32642 Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 17:36:16 +0200 Subject: [PATCH 10/19] Instead of starting our own FastAPI/uvicorn server, use Marimo's internal API to run the notebook --- src/openhound/cli/notebooks.py | 47 +++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/src/openhound/cli/notebooks.py b/src/openhound/cli/notebooks.py index be301a8..2731791 100644 --- a/src/openhound/cli/notebooks.py +++ b/src/openhound/cli/notebooks.py @@ -1,9 +1,12 @@ +import secrets +import string from pathlib import Path from typing import Annotated, Literal import typer BASE_PATH = Path(__file__).resolve().parents[1] / "notebooks" +TOKEN_LENGTH = 32 NOTEBOOKS = { "pipeline": BASE_PATH / "pipeline.py", @@ -12,6 +15,11 @@ notebooks_app = typer.Typer(help="Start OpenHound Marimo notebooks") +def _generate_token(length: int = TOKEN_LENGTH) -> str: + alphabet = string.ascii_letters + string.digits + return "".join(secrets.choice(alphabet) for _ in range(length)) + + @notebooks_app.command() def start( notebook: Annotated[ @@ -25,19 +33,17 @@ def start( int, typer.Option("--port", "-p", help="Port for the Marimo server"), ] = 2718, - log_level: Annotated[ - Literal["critical", "error", "warning", "info"], - typer.Option("--log-level", "-l", help="Uvicorn logging level"), - ] = "info", ): """Start one of the bundled OpenHound Marimo notebooks.""" from rich.console import Console console = Console() try: - import uvicorn - from fastapi import FastAPI - from marimo import create_asgi_app + from marimo._server.file_router import AppFileRouter + from marimo._server.start import start + from marimo._server.tokens import AuthToken + from marimo._session.model import SessionMode + from marimo._utils.marimo_path import MarimoPath except ImportError: console.print( @@ -46,12 +52,23 @@ def start( raise typer.Exit(1) notebook_path = NOTEBOOKS[notebook] - server = create_asgi_app().with_app(path="/", root=str(notebook_path)) - - app = FastAPI() - app.mount("/", server.build()) - - console.print( - "[bold green]Starting notebook server, press CTL+C twice to stop[/bold green]" + start( + file_router=AppFileRouter.from_filename(MarimoPath(str(notebook_path))), + mode=SessionMode.RUN, + development_mode=False, + quiet=False, + include_code=False, + ttl_seconds=120, + headless=False, + port=port, + host=host, + proxy=None, + watch=False, + cli_args={}, + argv=[], + base_url="", + allow_origins=None, + auth_token=AuthToken(_generate_token()), + redirect_console_to_browser=False, + skew_protection=True, ) - uvicorn.run(app, host=host, port=port, log_level=log_level) From 47a66b04ae95b1141adf2ca3c18a1fc26e337c0c Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 17:42:44 +0200 Subject: [PATCH 11/19] Remove fastapi as a seperate dependency --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5b23304..a3681a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,6 @@ notebooks = [ "altair>=6.0.0", "polars>=1.40.1", "pyarrow>=24.0.0", - "fastapi>=0.129.0", ] [project.scripts] From 2cd2477f0511b7a441be6d8b38b03a96505c72d8 Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 17:44:53 +0200 Subject: [PATCH 12/19] Fixed incorrect reference to openhound extras --- src/openhound/cli/notebooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openhound/cli/notebooks.py b/src/openhound/cli/notebooks.py index 2731791..9109811 100644 --- a/src/openhound/cli/notebooks.py +++ b/src/openhound/cli/notebooks.py @@ -47,7 +47,7 @@ def start( except ImportError: console.print( - "[red]Error:[/red] Marimo is not installed. Install OpenHound with Marimo extras using openhound\\[notebook] [red]" + "[red]Error:[/red] Marimo is not installed. Install OpenHound with Marimo extras using openhound\\[notebooks] [red]" ) raise typer.Exit(1) From 16727c1e0623490af763a9fe74b362075913e015 Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 17:48:36 +0200 Subject: [PATCH 13/19] Pin marimo version and only allow patches --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a3681a7..e515fdd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ okta = [ ] notebooks = [ - "marimo>=0.23.4", + "marimo~=0.23.4", "altair>=6.0.0", "polars>=1.40.1", "pyarrow>=24.0.0", From ec90be624cb560068814ea29222efa6fb6d06507 Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 17:48:53 +0200 Subject: [PATCH 14/19] Update lock --- uv.lock | 236 +++++++++++++++++++------------------------------------- 1 file changed, 79 insertions(+), 157 deletions(-) diff --git a/uv.lock b/uv.lock index 7ee7eea..81c4ed1 100644 --- a/uv.lock +++ b/uv.lock @@ -89,15 +89,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ed/c9/d7977eaacb9df673210491da99e6a247e93df98c715fc43fd136ce1d3d33/arrow-1.4.0-py3-none-any.whl", hash = "sha256:749f0769958ebdc79c173ff0b0670d59051a535fa26e8eba02953dc19eb43205", size = 68797, upload-time = "2025-10-18T17:46:45.663Z" }, ] -[[package]] -name = "asttokens" -version = "3.0.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/be/a5/8e3f9b6771b0b408517c82d97aed8f2036509bc247d46114925e32fe33f0/asttokens-3.0.1.tar.gz", hash = "sha256:71a4ee5de0bde6a31d64f6b13f2293ac190344478f081c3d1bccfcf5eacb0cb7", size = 62308, upload-time = "2025-11-15T16:43:48.578Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d2/39/e7eaf1799466a4aef85b6a4fe7bd175ad2b1c6345066aa33f1f58d4b18d0/asttokens-3.0.1-py3-none-any.whl", hash = "sha256:15a3ebc0f43c2d0a50eeafea25e19046c68398e487b9f1f5b517f7c0f40f976a", size = 27047, upload-time = "2025-11-15T16:43:16.109Z" }, -] - [[package]] name = "attrs" version = "26.1.0" @@ -329,15 +320,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/f1/00ce3bde3ca542d1acd8f8cfa38e446840945aa6363f9b74746394b14127/cryptography-46.0.7-cp38-abi3-win_amd64.whl", hash = "sha256:506c4ff91eff4f82bdac7633318a526b1d1309fc07ca76a3ad182cb5b686d6d3", size = 3472985, upload-time = "2026-04-08T01:57:36.714Z" }, ] -[[package]] -name = "decorator" -version = "5.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/43/fa/6d96a0978d19e17b68d634497769987b16c8f4cd0a7a05048bec693caa6b/decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360", size = 56711, upload-time = "2025-02-24T04:41:34.073Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" }, -] - [[package]] name = "deepmerge" version = "2.0" @@ -358,7 +340,7 @@ wheels = [ [[package]] name = "dlt" -version = "1.22.2" +version = "1.26.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -387,9 +369,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "tzdata" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c1/03/78a79269f004241f6644d56c2666fd105fd311a120ff8c747d5d106a6c37/dlt-1.22.2.tar.gz", hash = "sha256:55be2318c0e5ca024200000a02bd51dfd3078f5bbf0b233b3ee64329f9987031", size = 940374, upload-time = "2026-03-01T18:34:40.048Z" } +sdist = { url = "https://files.pythonhosted.org/packages/48/20/c47492cd3c78287133fbba98aa41192c8613a371b8db57738d5096d05b8b/dlt-1.26.0.tar.gz", hash = "sha256:1a066fed8df7ace96a695309d9102046f9e3b9d60692e1cb93e842406aaf55ec", size = 1042686, upload-time = "2026-04-28T17:52:20.379Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/81/a3/ca205dd3daaa8896f0550c2090983125494246c849c63b8aadb3f7f12708/dlt-1.22.2-py3-none-any.whl", hash = "sha256:ee0741fb80e16bcff444e9670d7008a1325d5db0c8650f4e1d53722181adc186", size = 1191198, upload-time = "2026-03-01T18:34:36.93Z" }, + { url = "https://files.pythonhosted.org/packages/72/1a/1430fc5989fe9a476b198a8a9b27e0795c2e6856d2593f7d21e660d99e2d/dlt-1.26.0-py3-none-any.whl", hash = "sha256:81e2d28bdc4d33e97978e3654542bdac95347342d15a35102d09fd79df50c2bd", size = 1312681, upload-time = "2026-04-28T17:52:16.234Z" }, ] [[package]] @@ -423,15 +405,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/64/aa/f14dd5e241ec80d9f9d82196ca65e0c53badfc8a7a619d5497c5626657ad/duckdb-1.5.0-cp314-cp314-win_arm64.whl", hash = "sha256:d6d2858c734d1a7e7a1b6e9b8403b3fce26dfefb4e0a2479c420fba6cd36db36", size = 14341879, upload-time = "2026-03-09T12:50:22.347Z" }, ] -[[package]] -name = "executing" -version = "2.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/cc/28/c14e053b6762b1044f34a13aab6859bbf40456d37d23aa286ac24cfd9a5d/executing-2.2.1.tar.gz", hash = "sha256:3632cc370565f6648cc328b32435bd120a1e4ebb20c77e3fdde9a13cd1e533c4", size = 1129488, upload-time = "2025-09-01T09:48:10.866Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/ea/53f2148663b321f21b5a606bd5f191517cf40b7072c0497d3c92c4a13b1e/executing-2.2.1-py2.py3-none-any.whl", hash = "sha256:760643d3452b4d777d295bb167ccc74c64a81df23fb5e08eff250c425a4b2017", size = 28317, upload-time = "2025-09-01T09:48:08.5Z" }, -] - [[package]] name = "faker" version = "40.13.0" @@ -665,39 +638,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, ] -[[package]] -name = "ipython" -version = "9.12.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "decorator" }, - { name = "ipython-pygments-lexers" }, - { name = "jedi" }, - { name = "matplotlib-inline" }, - { name = "pexpect", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, - { name = "prompt-toolkit" }, - { name = "pygments" }, - { name = "stack-data" }, - { name = "traitlets" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/3a/73/7114f80a8f9cabdb13c27732dce24af945b2923dcab80723602f7c8bc2d8/ipython-9.12.0.tar.gz", hash = "sha256:01daa83f504b693ba523b5a407246cabde4eb4513285a3c6acaff11a66735ee4", size = 4428879, upload-time = "2026-03-27T09:42:45.312Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/59/22/906c8108974c673ebef6356c506cebb6870d48cedea3c41e949e2dd556bb/ipython-9.12.0-py3-none-any.whl", hash = "sha256:0f2701e8ee86e117e37f50563205d36feaa259d2e08d4a6bc6b6d74b18ce128d", size = 625661, upload-time = "2026-03-27T09:42:42.831Z" }, -] - -[[package]] -name = "ipython-pygments-lexers" -version = "1.1.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pygments" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/ef/4c/5dd1d8af08107f88c7f741ead7a40854b8ac24ddf9ae850afbcf698aa552/ipython_pygments_lexers-1.1.1.tar.gz", hash = "sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81", size = 8393, upload-time = "2025-01-17T11:24:34.505Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d9/33/1f075bf72b0b747cb3288d011319aaf64083cf2efef8354174e3ed4540e2/ipython_pygments_lexers-1.1.1-py3-none-any.whl", hash = "sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c", size = 8074, upload-time = "2025-01-17T11:24:33.271Z" }, -] - [[package]] name = "itsdangerous" version = "2.2.0" @@ -883,7 +823,7 @@ wheels = [ [[package]] name = "marimo" -version = "0.23.1" +version = "0.23.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -905,9 +845,9 @@ dependencies = [ { name = "uvicorn" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d5/95/b7bb0fd8f00fc7b567a46eb6230492d0c0f53a622bd3c16fa142b54eef81/marimo-0.23.1.tar.gz", hash = "sha256:c1c1ed859cfac45f4bb54fe882fc6941ac22b3b4b7a4505285e6139e0321f886", size = 38270839, upload-time = "2026-04-10T23:14:04.103Z" } +sdist = { url = "https://files.pythonhosted.org/packages/99/1a/79f8fc2af1be74283ba3b701cba77472ec4f30949ffa88a3166d4bb7e751/marimo-0.23.4.tar.gz", hash = "sha256:8e5f1bd78a73cb04d775e650473cfab745303be746f141a3654dcd246de2a4e6", size = 38403567, upload-time = "2026-04-28T18:01:51.14Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/db/b4/53ca91d287b52ae1806ce947659a4357472cd4ab1f54ae5638cff8cc8a25/marimo-0.23.1-py3-none-any.whl", hash = "sha256:fb2546fdd669fcba5cf3b1aaaa8fd84196c292935bc2dce457cf8077ae2e8608", size = 38696007, upload-time = "2026-04-10T23:14:00.126Z" }, + { url = "https://files.pythonhosted.org/packages/8f/13/c1c3d0f889115ad33e2122efd16c842f8588aab7e0d243f29125a0ee8ad5/marimo-0.23.4-py3-none-any.whl", hash = "sha256:67806cf1dc4f438624e4640debf80e0689173399b8ee17de7f879cb07f6fb476", size = 38824790, upload-time = "2026-04-28T18:01:47.909Z" }, ] [[package]] @@ -983,18 +923,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, ] -[[package]] -name = "matplotlib-inline" -version = "0.2.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "traitlets" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c7/74/97e72a36efd4ae2bccb3463284300f8953f199b5ffbc04cbbb0ec78f74b1/matplotlib_inline-0.2.1.tar.gz", hash = "sha256:e1ee949c340d771fc39e241ea75683deb94762c8fa5f2927ec57c83c4dffa9fe", size = 8110, upload-time = "2025-10-23T09:00:22.126Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/af/33/ee4519fa02ed11a94aef9559552f3b17bb863f2ecfe1a35dc7f548cde231/matplotlib_inline-0.2.1-py3-none-any.whl", hash = "sha256:d56ce5156ba6085e00a9d54fead6ed29a9c47e215cd1bba2e976ef39f5710a76", size = 9516, upload-time = "2025-10-23T09:00:20.675Z" }, -] - [[package]] name = "mdurl" version = "0.1.2" @@ -1227,17 +1155,20 @@ github = [ jamf = [ { name = "openhound-jamf" }, ] +notebooks = [ + { name = "altair" }, + { name = "marimo" }, + { name = "polars" }, + { name = "pyarrow" }, +] okta = [ { name = "openhound-okta" }, ] [package.dev-dependencies] dev = [ - { name = "altair" }, { name = "fastapi" }, { name = "httpx" }, - { name = "ipython" }, - { name = "marimo" }, { name = "mypy" }, { name = "openhound-faker" }, { name = "pre-commit" }, @@ -1251,12 +1182,14 @@ dev = [ [package.metadata] requires-dist = [ { name = "alive-progress", specifier = ">=3.3.0" }, + { name = "altair", marker = "extra == 'notebooks'", specifier = ">=6.0.0" }, { name = "cookiecutter", specifier = ">=2.6.0" }, - { name = "dlt", specifier = "==1.22.2" }, + { name = "dlt", specifier = "==1.26.0" }, { name = "duckdb", specifier = "==1.5.0" }, { name = "griffe", specifier = ">=1.15.0" }, { name = "griffe-fieldz", specifier = ">=0.5.0" }, { name = "jinja2", specifier = ">=3.1.6" }, + { name = "marimo", marker = "extra == 'notebooks'", specifier = "~=0.23.4" }, { name = "mkdocstrings", extras = ["python"], specifier = ">=1.0.0" }, { name = "openhound-github", marker = "extra == 'all'", specifier = "==0.1.0" }, { name = "openhound-github", marker = "extra == 'github'", specifier = "==0.1.0" }, @@ -1264,21 +1197,20 @@ requires-dist = [ { name = "openhound-jamf", marker = "extra == 'jamf'", specifier = "==0.1.0" }, { name = "openhound-okta", marker = "extra == 'all'", specifier = "==0.1.1" }, { name = "openhound-okta", marker = "extra == 'okta'", specifier = "==0.1.1" }, + { name = "polars", marker = "extra == 'notebooks'", specifier = ">=1.40.1" }, { name = "psutil", specifier = ">=7.2.1" }, + { name = "pyarrow", marker = "extra == 'notebooks'", specifier = ">=24.0.0" }, { name = "pydantic", specifier = "==2.12.5" }, { name = "pydantic-extra-types", specifier = ">=2.11.0" }, { name = "tqdm", specifier = ">=4.67.1" }, { name = "typer", specifier = ">=0.19.2" }, ] -provides-extras = ["all", "github", "jamf", "okta"] +provides-extras = ["all", "github", "jamf", "notebooks", "okta"] [package.metadata.requires-dev] dev = [ - { name = "altair", specifier = ">=6.0.0" }, { name = "fastapi", specifier = ">=0.129.0" }, { name = "httpx", specifier = ">=0.28.1" }, - { name = "ipython", specifier = ">=9.12.0" }, - { name = "marimo", specifier = ">=0.23.0" }, { name = "mypy", specifier = ">=1.19.1" }, { name = "openhound-faker", specifier = "==0.0.4" }, { name = "pre-commit", specifier = ">=4.5.1" }, @@ -1443,18 +1375,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/fb/d65db067a67df7252f18b0cb7420dda84078b9e8bfb375215469c14a50be/pendulum-3.2.0-py3-none-any.whl", hash = "sha256:f3a9c18a89b4d9ef39c5fa6a78722aaff8d5be2597c129a3b16b9f40a561acf3", size = 114111, upload-time = "2026-01-30T11:22:22.361Z" }, ] -[[package]] -name = "pexpect" -version = "4.9.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "ptyprocess" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450, upload-time = "2023-11-25T09:07:26.339Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772, upload-time = "2023-11-25T06:56:14.81Z" }, -] - [[package]] name = "platformdirs" version = "4.9.6" @@ -1482,6 +1402,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a3/58/35da89ee790598a0700ea49b2a66594140f44dec458c07e8e3d4979137fc/ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce", size = 49567, upload-time = "2018-02-15T19:01:27.172Z" }, ] +[[package]] +name = "polars" +version = "1.40.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "polars-runtime-32" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/8c/bc9bc948058348ed43117cecc3007cd608f395915dae8a00974579a5dab1/polars-1.40.1.tar.gz", hash = "sha256:ab2694134b137596b5a59bfd7b4c54ebbc9b59f9403127f18e32d363777552e8", size = 733574, upload-time = "2026-04-22T19:15:55.507Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ea/91/74fc60d94488685a92ac9d49d7ec55f3e91fe9b77942a6235a5fa7f249c3/polars-1.40.1-py3-none-any.whl", hash = "sha256:c0f861219d1319cdea45c4ce4d30355a47176b8f98dcedf95ea8269f131b8abd", size = 828723, upload-time = "2026-04-22T19:14:25.452Z" }, +] + +[[package]] +name = "polars-runtime-32" +version = "1.40.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/ba/26d40f039be9f552b5fd7365a621bdfc0f8e912ef77094ae4693491b0bae/polars_runtime_32-1.40.1.tar.gz", hash = "sha256:37f3065615d1bf90d03b5326222df4c5c1f8a5d33e50470aa588e3465e6eb814", size = 2935843, upload-time = "2026-04-22T19:15:57.26Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7d/46/22c8af5eed68ac2eeb556e0fa3ca8a7b798e984ceff4450888f3b5ac61fd/polars_runtime_32-1.40.1-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:b748ef652270cc49e9e69f99a035e0eb4d5f856d42bcd6ac4d9d80a40142aa1e", size = 52098755, upload-time = "2026-04-22T19:14:28.555Z" }, + { url = "https://files.pythonhosted.org/packages/c6/3e/48599a38009ca60ff82a6f38c8a621ce3c0286aa7397c7d79e741bd9060e/polars_runtime_32-1.40.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:d249b3743e05986060cec0a7aaa542d020df6c6b876e556023a310efd581f9be", size = 46367542, upload-time = "2026-04-22T19:14:32.433Z" }, + { url = "https://files.pythonhosted.org/packages/43/e9/384bc069367a1a36ee31c13782c178dbd039b2b873b772d4a0fc23a2373d/polars_runtime_32-1.40.1-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5987b30e7aa1059d069498496e8dda35afd592b0ac3d46ed87e3ff8df1ad652c", size = 50252104, upload-time = "2026-04-22T19:14:35.945Z" }, + { url = "https://files.pythonhosted.org/packages/15/ef/7d57ceb0651af74194e97ed6583e148d352f03d696090221b8059cdfc90b/polars_runtime_32-1.40.1-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d7f42a8b3f16fc66002cc0f6516f7dd7653396886ae0ed362ab95c0b3408b59", size = 56250788, upload-time = "2026-04-22T19:14:39.743Z" }, + { url = "https://files.pythonhosted.org/packages/10/0f/e4b3ffc748827a14a474ec9c42e45c066050e440fec57e914091d9adda75/polars_runtime_32-1.40.1-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e5f7becc237a7ec9d9a10878dc8e54b73bbf4e2d94a2991c37d7a0b38590d8f9", size = 50432590, upload-time = "2026-04-22T19:14:43.388Z" }, + { url = "https://files.pythonhosted.org/packages/d9/0b/b8d95fbed869fa4caabe9c400e4210374913b376e925e96fdcfa9be6416b/polars_runtime_32-1.40.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:992d14cf191dde043d36fbdbc98a65e43fbc7e9a5024cecd45f838ac4988c1ee", size = 54155564, upload-time = "2026-04-22T19:14:47.239Z" }, + { url = "https://files.pythonhosted.org/packages/06/d9/d091d8fb5cbed5e9536adfed955c4c89987a4cc3b8e73ae4532402b91c74/polars_runtime_32-1.40.1-cp310-abi3-win_amd64.whl", hash = "sha256:f78bb2abd00101cbb23cc0cb068f7e36e081057a15d2ec2dde3dda280709f030", size = 51829755, upload-time = "2026-04-22T19:14:50.85Z" }, + { url = "https://files.pythonhosted.org/packages/65/ad/b33c3022a394f3eb55c3310597cec615412a8a33880055eee191d154a628/polars_runtime_32-1.40.1-cp310-abi3-win_arm64.whl", hash = "sha256:b5cbfaf6b085b420b4bfcbe24e8f665076d1cccfdb80c0484c02a023ce205537", size = 45822104, upload-time = "2026-04-22T19:14:54.192Z" }, +] + [[package]] name = "pre-commit" version = "4.5.1" @@ -1498,18 +1446,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5d/19/fd3ef348460c80af7bb4669ea7926651d1f95c23ff2df18b9d24bab4f3fa/pre_commit-4.5.1-py2.py3-none-any.whl", hash = "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77", size = 226437, upload-time = "2025-12-16T21:14:32.409Z" }, ] -[[package]] -name = "prompt-toolkit" -version = "3.0.52" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "wcwidth" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a1/96/06e01a7b38dce6fe1db213e061a4602dd6032a8a97ef6c1a862537732421/prompt_toolkit-3.0.52.tar.gz", hash = "sha256:28cde192929c8e7321de85de1ddbe736f1375148b02f2e17edd840042b1be855", size = 434198, upload-time = "2025-08-27T15:24:02.057Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/84/03/0d3ce49e2505ae70cf43bc5bb3033955d2fc9f932163e84dc0779cc47f48/prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955", size = 391431, upload-time = "2025-08-27T15:23:59.498Z" }, -] - [[package]] name = "psutil" version = "7.2.2" @@ -1539,21 +1475,39 @@ wheels = [ ] [[package]] -name = "ptyprocess" -version = "0.7.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/20/e5/16ff212c1e452235a90aeb09066144d0c5a6a8c0834397e03f5224495c4e/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220", size = 70762, upload-time = "2020-12-28T15:15:30.155Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993, upload-time = "2020-12-28T15:15:28.35Z" }, -] - -[[package]] -name = "pure-eval" -version = "0.2.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/cd/05/0a34433a064256a578f1783a10da6df098ceaa4a57bbeaa96a6c0352786b/pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42", size = 19752, upload-time = "2024-07-21T12:58:21.801Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842, upload-time = "2024-07-21T12:58:20.04Z" }, +name = "pyarrow" +version = "24.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/91/13/13e1069b351bdc3881266e11147ffccf687505dbb0ea74036237f5d454a5/pyarrow-24.0.0.tar.gz", hash = "sha256:85fe721a14dd823aca09127acbb06c3ca723efbd436c004f16bca601b04dcc83", size = 1180261, upload-time = "2026-04-21T10:51:25.837Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6f/d3/a1abf004482026ddc17f4503db227787fa3cfe41ec5091ff20e4fea55e57/pyarrow-24.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:02b001b3ed4723caa44f6cd1af2d5c86aa2cf9971dacc2ffa55b21237713dfba", size = 34976759, upload-time = "2026-04-21T10:48:07.258Z" }, + { url = "https://files.pythonhosted.org/packages/4f/4a/34f0a36d28a2dd32225301b79daad44e243dc1a2bb77d43b60749be255c4/pyarrow-24.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:04920d6a71aabd08a0417709efce97d45ea8e6fb733d9ca9ecffb13c67839f68", size = 36658471, upload-time = "2026-04-21T10:48:13.347Z" }, + { url = "https://files.pythonhosted.org/packages/1f/78/543b94712ae8bb1a6023bcc1acf1a740fbff8286747c289cd9468fced2a5/pyarrow-24.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a964266397740257f16f7bb2e4f08a0c81454004beab8ff59dd531b73610e9f2", size = 45675981, upload-time = "2026-04-21T10:48:20.201Z" }, + { url = "https://files.pythonhosted.org/packages/84/9f/8fb7c222b100d314137fa40ec050de56cd8c6d957d1cfff685ce72f15b17/pyarrow-24.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6f066b179d68c413374294bc1735f68475457c933258df594443bb9d88ddc2a0", size = 48859172, upload-time = "2026-04-21T10:48:27.541Z" }, + { url = "https://files.pythonhosted.org/packages/a7/d3/1ea72538e6c8b3b475ed78d1049a2c518e655761ea50fe1171fc855fcab7/pyarrow-24.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1183baeb14c5f587b1ec52831e665718ce632caab84b7cd6b85fd44f96114495", size = 49385733, upload-time = "2026-04-21T10:48:34.7Z" }, + { url = "https://files.pythonhosted.org/packages/c3/be/c3d8b06a1ba35f2260f8e1f771abbee7d5e345c0937aab90675706b1690a/pyarrow-24.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:806f24b4085453c197a5078218d1ee08783ebbba271badd153d1ae22a3ee804f", size = 51934335, upload-time = "2026-04-21T10:48:42.099Z" }, + { url = "https://files.pythonhosted.org/packages/9c/62/89e07a1e7329d2cde3e3c6994ba0839a24977a2beda8be6005ea3d860b99/pyarrow-24.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:e4505fc6583f7b05ab854934896bcac8253b04ac1171a77dfb73efef92076d91", size = 27271748, upload-time = "2026-04-21T10:49:42.532Z" }, + { url = "https://files.pythonhosted.org/packages/17/1a/cff3a59f80b5b1658549d46611b67163f65e0664431c076ad728bf9d5af4/pyarrow-24.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:1a4e45017efbf115032e4475ee876d525e0e36c742214fbe405332480ecd6275", size = 35238554, upload-time = "2026-04-21T10:48:48.526Z" }, + { url = "https://files.pythonhosted.org/packages/a8/99/cce0f42a327bfef2c420fb6078a3eb834826e5d6697bf3009fe11d2ad051/pyarrow-24.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:7986f1fa71cee060ad00758bcc79d3a93bab8559bf978fab9e53472a2e25a17b", size = 36782301, upload-time = "2026-04-21T10:48:55.181Z" }, + { url = "https://files.pythonhosted.org/packages/2a/66/8e560d5ff6793ca29aca213c53eec0dd482dd46cb93b2819e5aab52e4252/pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d3e0b61e8efb24ed38898e5cdc5fffa9124be480008d401a1f8071500494ae42", size = 45721929, upload-time = "2026-04-21T10:49:03.676Z" }, + { url = "https://files.pythonhosted.org/packages/27/0c/a26e25505d030716e078d9f16eb74973cbf0b33b672884e9f9da1c83b871/pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:55a3bc1e3df3b5567b7d27ef551b2283f0c68a5e86f1cd56abc569da4f31335b", size = 48825365, upload-time = "2026-04-21T10:49:11.714Z" }, + { url = "https://files.pythonhosted.org/packages/5f/eb/771f9ecb0c65e73fe9dccdd1717901b9594f08c4515d000c7c62df573811/pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:641f795b361874ac9da5294f8f443dfdbee355cf2bd9e3b8d97aaac2306b9b37", size = 49451819, upload-time = "2026-04-21T10:49:21.474Z" }, + { url = "https://files.pythonhosted.org/packages/48/da/61ae89a88732f5a785646f3ec6125dbb640fa98a540eb2b9889caa561403/pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8adc8e6ce5fccf5dc707046ae4914fd537def529709cc0d285d37a7f9cd442ca", size = 51909252, upload-time = "2026-04-21T10:49:31.164Z" }, + { url = "https://files.pythonhosted.org/packages/cb/1a/8dd5cafab7b66573fa91c03d06d213356ad4edd71813aa75e08ce2b3a844/pyarrow-24.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:9b18371ad2f44044b81a8d23bc2d8a9b6a6226dca775e8e16cfee640473d6c5d", size = 27388127, upload-time = "2026-04-21T10:49:37.334Z" }, + { url = "https://files.pythonhosted.org/packages/ad/80/d022a34ff05d2cbedd8ccf841fc1f532ecfa9eb5ed1711b56d0e0ea71fc9/pyarrow-24.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:1cc9057f0319e26333b357e17f3c2c022f1a83739b48a88b25bfd5fa2dc18838", size = 35007997, upload-time = "2026-04-21T10:49:48.796Z" }, + { url = "https://files.pythonhosted.org/packages/1a/ff/f01485fda6f4e5d441afb8dd5e7681e4db18826c1e271852f5d3957d6a80/pyarrow-24.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:e6f1278ee4785b6db21229374a1c9e54ec7c549de5d1efc9630b6207de7e170b", size = 36678720, upload-time = "2026-04-21T10:49:55.858Z" }, + { url = "https://files.pythonhosted.org/packages/9e/c2/2d2d5fea814237923f71b36495211f20b43a1576f9a4d6da7e751a64ec6f/pyarrow-24.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:adbbedc55506cbdabb830890444fb856bfb0060c46c6f8026c6c2f2cf86ae795", size = 45741852, upload-time = "2026-04-21T10:50:04.624Z" }, + { url = "https://files.pythonhosted.org/packages/8e/3a/28ba9c1c1ebdbb5f1b94dfebb46f207e52e6a554b7fe4132540fde29a3a0/pyarrow-24.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:ae8a1145af31d903fa9bb166824d7abe9b4681a000b0159c9fb99c11bc11ad26", size = 48889852, upload-time = "2026-04-21T10:50:12.293Z" }, + { url = "https://files.pythonhosted.org/packages/df/51/4a389acfd31dca009f8fb82d7f510bb4130f2b3a8e18cf00194d0687d8ac/pyarrow-24.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d7027eba1df3b2069e2e8d80f644fa0918b68c46432af3d088ddd390d063ecde", size = 49445207, upload-time = "2026-04-21T10:50:20.677Z" }, + { url = "https://files.pythonhosted.org/packages/19/4b/0bab2b23d2ae901b1b9a03c0efd4b2d070256f8ce3fc43f6e58c167b2081/pyarrow-24.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e56a1ffe9bf7b727432b89104cc0849c21582949dd7bdcb34f17b2001a351a76", size = 51954117, upload-time = "2026-04-21T10:50:29.14Z" }, + { url = "https://files.pythonhosted.org/packages/29/88/f4e9145da0417b3d2c12035a8492b35ff4a3dbc653e614fcfb51d9dedb38/pyarrow-24.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:38be1808cdd068605b787e6ca9119b27eb275a0234e50212c3492331680c3b1e", size = 28001155, upload-time = "2026-04-21T10:51:22.337Z" }, + { url = "https://files.pythonhosted.org/packages/79/4f/46a49a63f43526da895b1a45bbb51d5baf8e4d77159f8528fc3e5490007f/pyarrow-24.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:418e48ce50a45a6a6c73c454677203a9c75c966cb1e92ca3370959185f197a05", size = 35250387, upload-time = "2026-04-21T10:50:35.552Z" }, + { url = "https://files.pythonhosted.org/packages/a0/da/d5e0cd5ef00796922404806d5f00325cdadc3441ce2c13fe7115f2df9a64/pyarrow-24.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:2f16197705a230a78270cdd4ea8a1d57e86b2fdcbc34a1f6aebc72e65c986f9a", size = 36797102, upload-time = "2026-04-21T10:50:42.417Z" }, + { url = "https://files.pythonhosted.org/packages/34/c7/5904145b0a593a05236c882933d439b5720f0a145381179063722fbfc123/pyarrow-24.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:fb24ac194bfc5e86839d7dcd52092ee31e5fe6733fe11f5e3b06ef0812b20072", size = 45745118, upload-time = "2026-04-21T10:50:49.324Z" }, + { url = "https://files.pythonhosted.org/packages/13/d3/cca42fe166d1c6e4d5b80e530b7949104d10e17508a90ae202dac205ce2a/pyarrow-24.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:9700ebd9a51f5895ce75ff4ac4b3c47a7d4b42bc618be8e713e5d56bacf5f931", size = 48844765, upload-time = "2026-04-21T10:50:55.579Z" }, + { url = "https://files.pythonhosted.org/packages/b0/49/942c3b79878ba928324d1e17c274ed84581db8c0a749b24bcf4cbdf15bd3/pyarrow-24.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d8ddd2768da81d3ee08cfea9b597f4abb4e8e1dc8ae7e204b608d23a0d3ab699", size = 49471890, upload-time = "2026-04-21T10:51:02.439Z" }, + { url = "https://files.pythonhosted.org/packages/76/97/ff71431000a75d84135a1ace5ca4ba11726a231a8007bbb320a4c54075d5/pyarrow-24.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:61a3d7eaa97a14768b542f3d284dc6400dd2470d9f080708b13cd46b6ae18136", size = 51932250, upload-time = "2026-04-21T10:51:10.576Z" }, + { url = "https://files.pythonhosted.org/packages/51/be/6f79d55816d5c22557cf27533543d5d70dfe692adfbee4b99f2760674f38/pyarrow-24.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:c91d00057f23b8d353039520dc3a6c09d8608164c692e9f59a175a42b2ae0c19", size = 28131282, upload-time = "2026-04-21T10:51:16.815Z" }, ] [[package]] @@ -2066,20 +2020,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ee/19/7df8b292accba3bc0de92c611c1e89423b25c08c82c18b14ca1fdbcf6e44/sqlglot-30.4.3-py3-none-any.whl", hash = "sha256:58ea8e723444569da5cec91e4c8f16e385bce3f0ce0374b8c722c3088e1c1c7a", size = 670965, upload-time = "2026-04-13T17:05:13.128Z" }, ] -[[package]] -name = "stack-data" -version = "0.6.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "asttokens" }, - { name = "executing" }, - { name = "pure-eval" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/28/e3/55dcc2cfbc3ca9c29519eb6884dd1415ecb53b0e934862d3559ddcb7e20b/stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", size = 44707, upload-time = "2023-09-30T13:58:05.479Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521, upload-time = "2023-09-30T13:58:03.53Z" }, -] - [[package]] name = "starlette" version = "1.0.0" @@ -2131,15 +2071,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, ] -[[package]] -name = "traitlets" -version = "5.14.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/eb/79/72064e6a701c2183016abbbfedaba506d81e30e232a68c9f0d6f6fcd1574/traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7", size = 161621, upload-time = "2024-04-19T11:11:49.746Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359, upload-time = "2024-04-19T11:11:46.763Z" }, -] - [[package]] name = "typer" version = "0.24.1" @@ -2264,15 +2195,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload-time = "2024-11-01T14:07:11.845Z" }, ] -[[package]] -name = "wcwidth" -version = "0.6.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/35/a2/8e3becb46433538a38726c948d3399905a4c7cabd0df578ede5dc51f0ec2/wcwidth-0.6.0.tar.gz", hash = "sha256:cdc4e4262d6ef9a1a57e018384cbeb1208d8abbc64176027e2c2455c81313159", size = 159684, upload-time = "2026-02-06T19:19:40.919Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/68/5a/199c59e0a824a3db2b89c5d2dade7ab5f9624dbf6448dc291b46d5ec94d3/wcwidth-0.6.0-py3-none-any.whl", hash = "sha256:1a3a1e510b553315f8e146c54764f4fb6264ffad731b3d78088cdb1478ffbdad", size = 94189, upload-time = "2026-02-06T19:19:39.646Z" }, -] - [[package]] name = "websockets" version = "16.0" From 7407c6561a004d93592e509226736164e0d2fde4 Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 18:01:38 +0200 Subject: [PATCH 15/19] Add test for notebook using the openhound_faker collector --- tests/test_notebooks.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tests/test_notebooks.py diff --git a/tests/test_notebooks.py b/tests/test_notebooks.py new file mode 100644 index 0000000..d3bdb16 --- /dev/null +++ b/tests/test_notebooks.py @@ -0,0 +1,33 @@ +import os + +os.environ["RUNTIME__LOG_PATH"] = "/tmp/openhound-test-logs" + +import pytest + +from openhound.core.progress import Progress + + +def test_notebook_pipeline_faker(monkeypatch, tmp_path): + """Run the openhound_faker collector and test that the pipeline notebook is succesfully loaded with a preview/sample data""" + pytest.importorskip("marimo") + faker_main = pytest.importorskip("openhound_faker.main") + + monkeypatch.setenv("HOME", str(tmp_path)) + monkeypatch.chdir(tmp_path) + + faker_main.app.collector( + output_path=tmp_path / "output", + resources=[], + progress=Progress.log, + ) + + from openhound.notebooks.pipeline import app + + _outputs, defs = app.run() + + assert defs["selected_pipeline"].value == "faker_collect" + assert defs["matched_extension_name"] == "faker" + assert defs["selected_table"].value == "fake_computer" + assert defs["sample_df"].height > 0 + assert defs["as_node_df"].height > 0 + assert "prop_hostname" in defs["as_node_df"].columns From 7d4334a6eef266e42f8474a841907b69a2192410 Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 18:02:13 +0200 Subject: [PATCH 16/19] Fix added to the pipeline notebook to match with the actual collector name vs. schema --- src/openhound/notebooks/pipeline.py | 52 +++++++++++++++++------------ 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/src/openhound/notebooks/pipeline.py b/src/openhound/notebooks/pipeline.py index df56c0f..817519e 100644 --- a/src/openhound/notebooks/pipeline.py +++ b/src/openhound/notebooks/pipeline.py @@ -62,10 +62,18 @@ def _(Path): @app.cell def _(DEFAULT_PIPELINE_PATH, list_local_pipelines, mo): dlt_pipeline_dir, all_dlt_pipelines = list_local_pipelines(DEFAULT_PIPELINE_PATH) + collect_pipelines = [ + pipeline["name"] + for pipeline in all_dlt_pipelines + if pipeline["name"].endswith("collect") + ] + mo.stop(not collect_pipelines, "No collect pipelines found") selected_pipeline = mo.ui.dropdown( - options=[pipeline["name"] for pipeline in all_dlt_pipelines if pipeline["name"].endswith("collect")], + options=collect_pipelines, + value=collect_pipelines[0], label="Collect pipeline", full_width=True, + allow_select_none=False, ) selected_pipeline return (selected_pipeline,) @@ -245,10 +253,10 @@ def _(dlt_dataset, dlt_pipeline, os, pl, selected_table): @app.cell -def _(Path, last_fs_destination, pl, selected_schema, selected_table): +def _(Path, dlt_pipeline, last_fs_destination, pl, selected_table): dataset_path = ( Path(last_fs_destination.replace("file://", "")) - / selected_schema.value + / dlt_pipeline.dataset_name / selected_table.value ) table_df = pl.read_ndjson(dataset_path) @@ -274,17 +282,21 @@ def _(CollectorManager): @app.cell -def _(collector_options, mo, selected_schema): +def _(collector_options, dlt_pipeline, mo, selected_schema): matched_extension_name = None - if not selected_schema.value: - matched_extension_stat = mo.callout("Select a schema", kind="info") - elif selected_schema.value not in collector_options: + extension_name = ( + selected_schema.value + if selected_schema.value in collector_options + else dlt_pipeline.dataset_name + ) + + if extension_name not in collector_options: matched_extension_stat = mo.callout( - f"No loaded extension matches schema `{selected_schema.value}`.", + f"No loaded extension matches schema `{selected_schema.value}` or dataset `{dlt_pipeline.dataset_name}`.", kind="warn", ) else: - matched_extension_name = selected_schema.value + matched_extension_name = extension_name matched_extension_stat = mo.stat( value=matched_extension_name, label="Matched extension", @@ -351,12 +363,14 @@ def _(selected_extension): def _(DEFAULT_LOOKUP_FILE, duckdb, mo, selected_extension): lookup_session = None if selected_extension.lookup_factory: - mo.stop( - not DEFAULT_LOOKUP_FILE.exists(), - f"Run preproc before previewing graph output. Missing lookup file: {DEFAULT_LOOKUP_FILE}", - ) - lookup_client = duckdb.connect(str(DEFAULT_LOOKUP_FILE), read_only=True) - lookup_session = selected_extension.lookup_factory(lookup_client) + if DEFAULT_LOOKUP_FILE.exists(): + lookup_client = duckdb.connect(str(DEFAULT_LOOKUP_FILE), read_only=True) + lookup_session = selected_extension.lookup_factory(lookup_client) + else: + mo.callout( + f"Lookup file `{DEFAULT_LOOKUP_FILE}` was not found. Node preview will still run, but lookup-backed properties may fail.", + kind="warn", + ) return (lookup_session,) @@ -401,7 +415,7 @@ def as_node_preview(row, model): node_dict = node_to_dict(node) properties = node_dict.pop("properties", {}) or {} - return {**node_dict, **properties} + return {**node_dict, **{f"prop_{key}": value for key, value in properties.items()}} node_preview_rows = [ preview_row @@ -416,11 +430,5 @@ def as_node_preview(row, model): as_node_df return - -@app.cell -def _(): - return - - if __name__ == "__main__": app.run() From f142d870169d14f5843591d87a055a6acb907308 Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 18:03:26 +0200 Subject: [PATCH 17/19] Updated github workflow to include the new notebook test --- .github/workflows/test.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d26ea5b..0157361 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -59,3 +59,7 @@ jobs: - name: Run BHE job scheduling test run: | .venv/bin/pytest tests/test_bhe_job_scheduling.py -v + + - name: Run notebook execution test + run: | + .venv/bin/pytest tests/test_notebooks.py -v From b047f2e1b1f8e77e4a426605ddbf33a4f2d1706d Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 18:21:52 +0200 Subject: [PATCH 18/19] Bump openhound-faker to v0.0.6 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e515fdd..a3849dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,7 @@ local_scheme = "no-local-version" [dependency-groups] dev = [ - "openhound-faker==0.0.4", + "openhound-faker==0.0.6", "pre-commit>=4.5.1", "pytest>=9.0.1", "fastapi>=0.129.0", From d3d1ae050b3130a36bdc9993e1f6af68c7a3d3bd Mon Sep 17 00:00:00 2001 From: Joey Dreijer Date: Fri, 1 May 2026 18:25:31 +0200 Subject: [PATCH 19/19] Inlcude notebooks as dependency for tests --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0157361..f0046f7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -38,7 +38,7 @@ jobs: - name: Install dependencies run: | - uv pip install --python .venv -e ".[all]" --group dev + uv pip install --python .venv -e ".[all,notebooks]" --group dev - name: Run log handling tests run: |