diff --git a/pyproject.toml b/pyproject.toml index 4f4c546..e43428b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ build-backend = "setuptools.build_meta" [tool.pytest.ini_options] asyncio_mode = "auto" +addopts = "-n 2" [tool.mypy] ignore_missing_imports = true diff --git a/src/dt_browser/browser.py b/src/dt_browser/browser.py index 4574d7d..e0a7d5a 100644 --- a/src/dt_browser/browser.py +++ b/src/dt_browser/browser.py @@ -68,6 +68,8 @@ class TableWithBookmarks(CustomTable): ["datatable--row-bookmark", "datatable--row-search-result"] ) + _SEARCH_BG_COL = "__search_bg" + active_search_queue: reactive[list[int] | None] = reactive(None) def __init__(self, *args, bookmarks: Bookmarks, **kwargs): @@ -75,26 +77,58 @@ def __init__(self, *args, bookmarks: Bookmarks, **kwargs): self._bookmarks = bookmarks self._bookmark_highlight: Style = Style.null() self._search_highlight: Style = Style.null() + self._search_queue_set: frozenset[int] = frozenset() + self._has_search = False + + def watch_active_search_queue(self, value: list[int] | None) -> None: + self._search_queue_set = frozenset(value) if value else frozenset() + self._has_search = bool(value) def on_mount(self): self._bookmark_highlight = self.get_component_rich_style("datatable--row-bookmark") self._search_highlight = self.get_component_rich_style("datatable--row-search-result") + self._search_highlight_color = _color_name(self._search_highlight.bgcolor) + + @property + def render_header_and_table(self): + was_cached = self._render_header_and_table is not None + result = super().render_header_and_table + # Pre-compute search background column once when viewport is rebuilt. + # This avoids any per-row overhead during partial scroll updates. + if not was_cached: + header, render_df = result + if not render_df.is_empty() and INDEX_COL in render_df.columns: + if self._has_search: + visible_indices = set(render_df[INDEX_COL].to_list()) + viewport_hits = list(visible_indices & self._search_queue_set) + if viewport_hits: + search_bg_expr = ( + pl.when(pl.col(INDEX_COL).is_in(viewport_hits)) + .then(pl.lit(self._search_highlight_color)) + .otherwise(pl.lit(None)) + .alias(self._SEARCH_BG_COL) + ) + else: + search_bg_expr = pl.lit(None).cast(pl.Utf8).alias(self._SEARCH_BG_COL) + else: + search_bg_expr = pl.lit(None).cast(pl.Utf8).alias(self._SEARCH_BG_COL) + render_df = render_df.with_columns(search_bg_expr) + self._render_header_and_table = (header, render_df) + result = self._render_header_and_table + return result def _get_sel_col_bg_color(self, struct: dict[str, Any]) -> str: - if self.active_search_queue and struct[INDEX_COL] in self.active_search_queue: - return _color_name(self._search_highlight.bgcolor) + search_bg = struct.get(self._SEARCH_BG_COL) + if search_bg is not None: + return search_bg if self._bookmarks.has_bookmarks and struct[INDEX_COL] in self._bookmarks.meta_dt[INDEX_COL]: return _color_name(self._bookmark_highlight.bgcolor) return super()._get_sel_col_bg_color(struct) - def _get_row_bg_color_expr(self, cursor_row_idx: int) -> pl.Expr: + def _get_row_bg_color_expr(self, cursor_row_idx: int, render_df: pl.DataFrame | None = None) -> pl.Expr: tmp = super()._get_row_bg_color_expr(cursor_row_idx) - if self.active_search_queue: - tmp = ( - pl.when(pl.col(INDEX_COL).is_in(self.active_search_queue)) - .then(pl.lit(_color_name(self._search_highlight.bgcolor))) - .otherwise(tmp) - ) + # Search highlights are pre-computed in the _SEARCH_BG_COL column + # and applied via _resolve_row_bgcolor to avoid per-row overhead. if self._bookmarks.has_bookmarks: tmp = ( pl.when(pl.col(INDEX_COL).is_in(self._bookmarks.meta_dt[INDEX_COL])) @@ -103,6 +137,18 @@ def _get_row_bg_color_expr(self, cursor_row_idx: int) -> pl.Expr: ) return tmp + def _resolve_row_bgcolor(self, struct: dict[str, Any]) -> str | None: + """Apply search highlight from pre-computed column. + + Priority: bookmarks > search > cursor (base). + """ + base = struct["bgcolor"] + if base is None: + search_bg = struct.get(self._SEARCH_BG_COL) + if search_bg is not None: + return search_bg + return base + _ALREADY_DT = "dt" diff --git a/src/dt_browser/custom_table.py b/src/dt_browser/custom_table.py index 4d8527b..8184dbe 100644 --- a/src/dt_browser/custom_table.py +++ b/src/dt_browser/custom_table.py @@ -605,7 +605,7 @@ def build_selector(cols: list[str], needed_padding: int = 0): self._render_header_and_table = (header, rend.collect()) return self._render_header_and_table - def _get_row_bg_color_expr(self, cursor_row_idx: int) -> pl.Expr: + def _get_row_bg_color_expr(self, cursor_row_idx: int, render_df: pl.DataFrame | None = None) -> pl.Expr: return ( pl.when(pl.col(DISPLAY_IDX_COL) == cursor_row_idx) @@ -619,6 +619,14 @@ def _get_row_bg_color_expr(self, cursor_row_idx: int) -> pl.Expr: .otherwise(pl.lit(None)) ) + def _resolve_row_bgcolor(self, struct: dict[str, Any]) -> str | None: + """Hook for subclasses to adjust the row background color. + + Called per row in _gen_segments after the Polars bgcolor expression is evaluated. + Default implementation returns the Polars-computed bgcolor unchanged. + """ + return struct["bgcolor"] + def _get_sel_col_bg_color(self, struct: dict[str, Any]): return ( _color_name(self._row_col_highlight.bgcolor) @@ -641,10 +649,11 @@ def _gen_segments(self, lines: list[int] | None): cursor_row_idx = self.cursor_coordinate.row - scroll_y for struct in ( - rend.with_columns(self._get_row_bg_color_expr(cursor_row_idx).alias("bgcolor")) + rend.with_columns(self._get_row_bg_color_expr(cursor_row_idx, render_df=render_df).alias("bgcolor")) .collect() .iter_rows(named=True) ): + bgcolor = self._resolve_row_bgcolor(struct) segs = [ Segment( PADDING_STR, @@ -657,7 +666,7 @@ def _gen_segments(self, lines: list[int] | None): ) else None ), - bgcolor=struct["bgcolor"], + bgcolor=bgcolor, ), ), Segment( @@ -671,7 +680,7 @@ def _gen_segments(self, lines: list[int] | None): ) else struct[COLOR_COL] ), - bgcolor=struct["bgcolor"], + bgcolor=bgcolor, ), ), Segment( @@ -700,7 +709,7 @@ def _gen_segments(self, lines: list[int] | None): ) else struct[COLOR_COL] ), - bgcolor=struct["bgcolor"], + bgcolor=bgcolor, ), ), ] diff --git a/tests/test_search_scroll_perf.py b/tests/test_search_scroll_perf.py new file mode 100644 index 0000000..86b969a --- /dev/null +++ b/tests/test_search_scroll_perf.py @@ -0,0 +1,192 @@ +"""Performance test: scrolling with active search highlights vs without. + +Creates a large dataframe (200k rows, ~160 columns) with varied cardinality, +applies searches with different match distributions at different terminal sizes, +then measures scroll performance against the baseline (no search). +Fails if scrolling is >15% slower. +""" + +import random +import time + +import polars as pl +import pytest + +from dt_browser.browser import DtBrowser, DtBrowserApp +from dt_browser.filter_box import FilterBox + +_NUM_ROWS = 200_000 +_NUM_SCROLL_OPS = 60 +_SLOWDOWN_THRESHOLD = 0.15 + +# ~160 columns at ~20 chars each ≈ 3200 chars, enough to fill 3000-wide terminals +_STR_COLS_PER_GROUP = 30 +_NUM_COLS_PER_GROUP = 30 + +_SEARCH_PARAMS = [ + pytest.param( + "sentinel_col = 'NEEDLE'", + 1, + 1, + id="single_match", + ), + pytest.param( + "sparse_col = 1", + 100, + 300, + id="sparse_far_apart", + ), + pytest.param( + "adjacent_col = 1", + 40_000, + 60_000, + id="many_adjacent", + ), + pytest.param( + "int_col_0 > 5", + 100_000, + _NUM_ROWS, + id="dense_most_rows", + ), +] + +_SCREEN_SIZES = [ + pytest.param((120, 30), id="120x30"), + pytest.param((800, 200), id="800x200"), + pytest.param((3000, 1000), id="3000x1000"), +] + + +def _make_large_df() -> pl.DataFrame: + random.seed(42) + data: dict[str, list] = {} + + # 30 low-cardinality string columns (5-20 unique values) + for i in range(_STR_COLS_PER_GROUP): + card = random.randint(5, 20) + choices = [f"cat{i}_{j}" for j in range(card)] + data[f"low_card_str_{i}"] = [random.choice(choices) for _ in range(_NUM_ROWS)] + + # 30 medium-cardinality string columns (100-500 unique values) + for i in range(_STR_COLS_PER_GROUP): + card = random.randint(100, 500) + choices = [f"med{i}_{j}" for j in range(card)] + data[f"med_card_str_{i}"] = [random.choice(choices) for _ in range(_NUM_ROWS)] + + # 30 high-cardinality string columns (mostly unique) + for i in range(_STR_COLS_PER_GROUP): + data[f"high_card_str_{i}"] = [f"unique{i}_{j}" for j in range(_NUM_ROWS)] + + # 30 integer columns with varied ranges + for i in range(_NUM_COLS_PER_GROUP): + upper = random.choice([10, 100, 1000, 100_000]) + data[f"int_col_{i}"] = [random.randint(0, upper) for _ in range(_NUM_ROWS)] + + # 30 float columns + for i in range(_NUM_COLS_PER_GROUP): + data[f"float_col_{i}"] = [random.random() * 1000 for _ in range(_NUM_ROWS)] + + # --- Columns designed for specific search distributions --- + + # Single match: exactly one row has the sentinel value + data["sentinel_col"] = ["NEEDLE" if i == _NUM_ROWS // 2 else "haystack" for i in range(_NUM_ROWS)] + + # Sparse matches: every ~1000th row matches (spread far apart) + data["sparse_col"] = [1 if i % 1000 == 0 else 0 for i in range(_NUM_ROWS)] + + # Adjacent matches: a contiguous block of 50k rows in the middle + block_start = _NUM_ROWS // 4 + block_end = block_start + _NUM_ROWS // 4 + data["adjacent_col"] = [1 if block_start <= i < block_end else 0 for i in range(_NUM_ROWS)] + + return pl.DataFrame(data) + + +# Build the dataframe once at module level so parametrized tests share it. +_LARGE_DF = _make_large_df() + + +async def _scroll_around(pilot, n_ops: int) -> float: + """Perform n_ops scroll operations using only up/down arrow keys.""" + # Scroll down for 3/4 of the ops, then back up for the rest + down_count = (n_ops * 3) // 4 + up_count = n_ops - down_count + + # Warm up the rendering pipeline + for _ in range(3): + await pilot.press("down") + await pilot.pause() + for _ in range(3): + await pilot.press("up") + await pilot.pause() + + start = time.perf_counter() + for _ in range(down_count): + await pilot.press("down") + await pilot.pause() + for _ in range(up_count): + await pilot.press("up") + await pilot.pause() + elapsed = time.perf_counter() - start + return elapsed + + +async def _apply_search(pilot, query: str): + """Open search box, type query, submit, and wait for results.""" + await pilot.press("/") + await pilot.pause() + await pilot.press(*list(query)) + await pilot.press("enter") + await pilot.pause() + for _ in range(5): + await pilot.pause() + + +@pytest.mark.parametrize("screen_size", _SCREEN_SIZES) +@pytest.mark.parametrize("search_query, min_hits, max_hits", _SEARCH_PARAMS) +async def test_search_scroll_performance(search_query, min_hits, max_hits, screen_size): + """Scrolling with search highlights must be within 15% of baseline scroll speed.""" + app = DtBrowserApp("perf_test", _LARGE_DF) + + async with app.run_test(size=screen_size) as pilot: + await pilot.pause() + browser = app.query_one(DtBrowser) + + # --- Baseline: scroll without search --- + baseline_time = await _scroll_around(pilot, _NUM_SCROLL_OPS) + + # Reset cursor to top + await pilot.press("g") + await pilot.pause() + + # --- Apply search --- + await _apply_search(pilot, search_query) + + assert browser.active_search_queue is not None, "Search should have produced results" + hit_count = len(browser.active_search_queue) + assert min_hits <= hit_count <= max_hits, ( + f"Expected {min_hits}-{max_hits} hits, got {hit_count}" + ) + + # Reset cursor to top for fair comparison + await pilot.press("g") + await pilot.pause() + + # --- Measure: scroll with search active --- + search_time = await _scroll_around(pilot, _NUM_SCROLL_OPS) + + slowdown = (search_time - baseline_time) / baseline_time + w, h = screen_size + label = f"{w}x{h}" + print(f"\n[{label} | {search_query}]") + print(f" Baseline scroll time: {baseline_time:.3f}s") + print(f" Search scroll time: {search_time:.3f}s") + print(f" Slowdown: {slowdown:.1%}") + print(f" Search hits: {hit_count:,}") + + assert slowdown <= _SLOWDOWN_THRESHOLD, ( + f"[{label}] Search scrolling is {slowdown:.1%} slower than baseline " + f"(threshold: {_SLOWDOWN_THRESHOLD:.0%}). " + f"Baseline: {baseline_time:.3f}s, Search: {search_time:.3f}s, " + f"Hits: {hit_count:,}" + )