From 20c0f42442f6302a65cd54bcd3bdf5c4aa0b8212 Mon Sep 17 00:00:00 2001 From: Claude Code Date: Wed, 25 Mar 2026 04:32:57 +0000 Subject: [PATCH 1/2] Add search scroll performance tests with pytest-xdist parallelism Parametrized tests measuring scroll performance with active search highlights vs baseline across different search cardinalities (single match, sparse, adjacent block, dense) and screen sizes (120x30, 800x200, 3000x1000). Tests fail if search scrolling is >15% slower. Configured pytest-xdist with -n 2 for parallel execution. Co-Authored-By: Claude Opus 4.6 --- pyproject.toml | 1 + tests/test_search_scroll_perf.py | 192 +++++++++++++++++++++++++++++++ 2 files changed, 193 insertions(+) create mode 100644 tests/test_search_scroll_perf.py diff --git a/pyproject.toml b/pyproject.toml index 4f4c546..e43428b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ build-backend = "setuptools.build_meta" [tool.pytest.ini_options] asyncio_mode = "auto" +addopts = "-n 2" [tool.mypy] ignore_missing_imports = true diff --git a/tests/test_search_scroll_perf.py b/tests/test_search_scroll_perf.py new file mode 100644 index 0000000..86b969a --- /dev/null +++ b/tests/test_search_scroll_perf.py @@ -0,0 +1,192 @@ +"""Performance test: scrolling with active search highlights vs without. + +Creates a large dataframe (200k rows, ~160 columns) with varied cardinality, +applies searches with different match distributions at different terminal sizes, +then measures scroll performance against the baseline (no search). +Fails if scrolling is >15% slower. +""" + +import random +import time + +import polars as pl +import pytest + +from dt_browser.browser import DtBrowser, DtBrowserApp +from dt_browser.filter_box import FilterBox + +_NUM_ROWS = 200_000 +_NUM_SCROLL_OPS = 60 +_SLOWDOWN_THRESHOLD = 0.15 + +# ~160 columns at ~20 chars each ≈ 3200 chars, enough to fill 3000-wide terminals +_STR_COLS_PER_GROUP = 30 +_NUM_COLS_PER_GROUP = 30 + +_SEARCH_PARAMS = [ + pytest.param( + "sentinel_col = 'NEEDLE'", + 1, + 1, + id="single_match", + ), + pytest.param( + "sparse_col = 1", + 100, + 300, + id="sparse_far_apart", + ), + pytest.param( + "adjacent_col = 1", + 40_000, + 60_000, + id="many_adjacent", + ), + pytest.param( + "int_col_0 > 5", + 100_000, + _NUM_ROWS, + id="dense_most_rows", + ), +] + +_SCREEN_SIZES = [ + pytest.param((120, 30), id="120x30"), + pytest.param((800, 200), id="800x200"), + pytest.param((3000, 1000), id="3000x1000"), +] + + +def _make_large_df() -> pl.DataFrame: + random.seed(42) + data: dict[str, list] = {} + + # 30 low-cardinality string columns (5-20 unique values) + for i in range(_STR_COLS_PER_GROUP): + card = random.randint(5, 20) + choices = [f"cat{i}_{j}" for j in range(card)] + data[f"low_card_str_{i}"] = [random.choice(choices) for _ in range(_NUM_ROWS)] + + # 30 medium-cardinality string columns (100-500 unique values) + for i in range(_STR_COLS_PER_GROUP): + card = random.randint(100, 500) + choices = [f"med{i}_{j}" for j in range(card)] + data[f"med_card_str_{i}"] = [random.choice(choices) for _ in range(_NUM_ROWS)] + + # 30 high-cardinality string columns (mostly unique) + for i in range(_STR_COLS_PER_GROUP): + data[f"high_card_str_{i}"] = [f"unique{i}_{j}" for j in range(_NUM_ROWS)] + + # 30 integer columns with varied ranges + for i in range(_NUM_COLS_PER_GROUP): + upper = random.choice([10, 100, 1000, 100_000]) + data[f"int_col_{i}"] = [random.randint(0, upper) for _ in range(_NUM_ROWS)] + + # 30 float columns + for i in range(_NUM_COLS_PER_GROUP): + data[f"float_col_{i}"] = [random.random() * 1000 for _ in range(_NUM_ROWS)] + + # --- Columns designed for specific search distributions --- + + # Single match: exactly one row has the sentinel value + data["sentinel_col"] = ["NEEDLE" if i == _NUM_ROWS // 2 else "haystack" for i in range(_NUM_ROWS)] + + # Sparse matches: every ~1000th row matches (spread far apart) + data["sparse_col"] = [1 if i % 1000 == 0 else 0 for i in range(_NUM_ROWS)] + + # Adjacent matches: a contiguous block of 50k rows in the middle + block_start = _NUM_ROWS // 4 + block_end = block_start + _NUM_ROWS // 4 + data["adjacent_col"] = [1 if block_start <= i < block_end else 0 for i in range(_NUM_ROWS)] + + return pl.DataFrame(data) + + +# Build the dataframe once at module level so parametrized tests share it. +_LARGE_DF = _make_large_df() + + +async def _scroll_around(pilot, n_ops: int) -> float: + """Perform n_ops scroll operations using only up/down arrow keys.""" + # Scroll down for 3/4 of the ops, then back up for the rest + down_count = (n_ops * 3) // 4 + up_count = n_ops - down_count + + # Warm up the rendering pipeline + for _ in range(3): + await pilot.press("down") + await pilot.pause() + for _ in range(3): + await pilot.press("up") + await pilot.pause() + + start = time.perf_counter() + for _ in range(down_count): + await pilot.press("down") + await pilot.pause() + for _ in range(up_count): + await pilot.press("up") + await pilot.pause() + elapsed = time.perf_counter() - start + return elapsed + + +async def _apply_search(pilot, query: str): + """Open search box, type query, submit, and wait for results.""" + await pilot.press("/") + await pilot.pause() + await pilot.press(*list(query)) + await pilot.press("enter") + await pilot.pause() + for _ in range(5): + await pilot.pause() + + +@pytest.mark.parametrize("screen_size", _SCREEN_SIZES) +@pytest.mark.parametrize("search_query, min_hits, max_hits", _SEARCH_PARAMS) +async def test_search_scroll_performance(search_query, min_hits, max_hits, screen_size): + """Scrolling with search highlights must be within 15% of baseline scroll speed.""" + app = DtBrowserApp("perf_test", _LARGE_DF) + + async with app.run_test(size=screen_size) as pilot: + await pilot.pause() + browser = app.query_one(DtBrowser) + + # --- Baseline: scroll without search --- + baseline_time = await _scroll_around(pilot, _NUM_SCROLL_OPS) + + # Reset cursor to top + await pilot.press("g") + await pilot.pause() + + # --- Apply search --- + await _apply_search(pilot, search_query) + + assert browser.active_search_queue is not None, "Search should have produced results" + hit_count = len(browser.active_search_queue) + assert min_hits <= hit_count <= max_hits, ( + f"Expected {min_hits}-{max_hits} hits, got {hit_count}" + ) + + # Reset cursor to top for fair comparison + await pilot.press("g") + await pilot.pause() + + # --- Measure: scroll with search active --- + search_time = await _scroll_around(pilot, _NUM_SCROLL_OPS) + + slowdown = (search_time - baseline_time) / baseline_time + w, h = screen_size + label = f"{w}x{h}" + print(f"\n[{label} | {search_query}]") + print(f" Baseline scroll time: {baseline_time:.3f}s") + print(f" Search scroll time: {search_time:.3f}s") + print(f" Slowdown: {slowdown:.1%}") + print(f" Search hits: {hit_count:,}") + + assert slowdown <= _SLOWDOWN_THRESHOLD, ( + f"[{label}] Search scrolling is {slowdown:.1%} slower than baseline " + f"(threshold: {_SLOWDOWN_THRESHOLD:.0%}). " + f"Baseline: {baseline_time:.3f}s, Search: {search_time:.3f}s, " + f"Hits: {hit_count:,}" + ) From 39d9a1dcc8e9299c60a57d4613cd2518dd467102 Mon Sep 17 00:00:00 2001 From: Claude Code Date: Wed, 25 Mar 2026 05:52:24 +0000 Subject: [PATCH 2/2] Optimize search highlight rendering with viewport-scoped pre-computation Replace the O(n) linear scan on active_search_queue list and the per-render is_in(full_200k_list) Polars expression with a two-part optimization: 1. Convert search queue to frozenset for O(1) membership tests 2. Pre-compute a __search_bg column in the viewport dataframe once per viewport rebuild, using only the visible indices intersected with the search set. This avoids rebuilding any Polars is_in expression during partial scroll updates (up/down keys). 3. Add _resolve_row_bgcolor hook in CustomTable so subclasses can apply row-level bgcolor adjustments in Python without modifying the Polars expression chain. The search highlight is now read from the pre-computed column during the segment iteration loop, eliminating all per-row overhead during the hot scroll path. Co-Authored-By: Claude Opus 4.6 --- src/dt_browser/browser.py | 64 +++++++++++++++++++++++++++++----- src/dt_browser/custom_table.py | 19 +++++++--- 2 files changed, 69 insertions(+), 14 deletions(-) diff --git a/src/dt_browser/browser.py b/src/dt_browser/browser.py index 4574d7d..e0a7d5a 100644 --- a/src/dt_browser/browser.py +++ b/src/dt_browser/browser.py @@ -68,6 +68,8 @@ class TableWithBookmarks(CustomTable): ["datatable--row-bookmark", "datatable--row-search-result"] ) + _SEARCH_BG_COL = "__search_bg" + active_search_queue: reactive[list[int] | None] = reactive(None) def __init__(self, *args, bookmarks: Bookmarks, **kwargs): @@ -75,26 +77,58 @@ def __init__(self, *args, bookmarks: Bookmarks, **kwargs): self._bookmarks = bookmarks self._bookmark_highlight: Style = Style.null() self._search_highlight: Style = Style.null() + self._search_queue_set: frozenset[int] = frozenset() + self._has_search = False + + def watch_active_search_queue(self, value: list[int] | None) -> None: + self._search_queue_set = frozenset(value) if value else frozenset() + self._has_search = bool(value) def on_mount(self): self._bookmark_highlight = self.get_component_rich_style("datatable--row-bookmark") self._search_highlight = self.get_component_rich_style("datatable--row-search-result") + self._search_highlight_color = _color_name(self._search_highlight.bgcolor) + + @property + def render_header_and_table(self): + was_cached = self._render_header_and_table is not None + result = super().render_header_and_table + # Pre-compute search background column once when viewport is rebuilt. + # This avoids any per-row overhead during partial scroll updates. + if not was_cached: + header, render_df = result + if not render_df.is_empty() and INDEX_COL in render_df.columns: + if self._has_search: + visible_indices = set(render_df[INDEX_COL].to_list()) + viewport_hits = list(visible_indices & self._search_queue_set) + if viewport_hits: + search_bg_expr = ( + pl.when(pl.col(INDEX_COL).is_in(viewport_hits)) + .then(pl.lit(self._search_highlight_color)) + .otherwise(pl.lit(None)) + .alias(self._SEARCH_BG_COL) + ) + else: + search_bg_expr = pl.lit(None).cast(pl.Utf8).alias(self._SEARCH_BG_COL) + else: + search_bg_expr = pl.lit(None).cast(pl.Utf8).alias(self._SEARCH_BG_COL) + render_df = render_df.with_columns(search_bg_expr) + self._render_header_and_table = (header, render_df) + result = self._render_header_and_table + return result def _get_sel_col_bg_color(self, struct: dict[str, Any]) -> str: - if self.active_search_queue and struct[INDEX_COL] in self.active_search_queue: - return _color_name(self._search_highlight.bgcolor) + search_bg = struct.get(self._SEARCH_BG_COL) + if search_bg is not None: + return search_bg if self._bookmarks.has_bookmarks and struct[INDEX_COL] in self._bookmarks.meta_dt[INDEX_COL]: return _color_name(self._bookmark_highlight.bgcolor) return super()._get_sel_col_bg_color(struct) - def _get_row_bg_color_expr(self, cursor_row_idx: int) -> pl.Expr: + def _get_row_bg_color_expr(self, cursor_row_idx: int, render_df: pl.DataFrame | None = None) -> pl.Expr: tmp = super()._get_row_bg_color_expr(cursor_row_idx) - if self.active_search_queue: - tmp = ( - pl.when(pl.col(INDEX_COL).is_in(self.active_search_queue)) - .then(pl.lit(_color_name(self._search_highlight.bgcolor))) - .otherwise(tmp) - ) + # Search highlights are pre-computed in the _SEARCH_BG_COL column + # and applied via _resolve_row_bgcolor to avoid per-row overhead. if self._bookmarks.has_bookmarks: tmp = ( pl.when(pl.col(INDEX_COL).is_in(self._bookmarks.meta_dt[INDEX_COL])) @@ -103,6 +137,18 @@ def _get_row_bg_color_expr(self, cursor_row_idx: int) -> pl.Expr: ) return tmp + def _resolve_row_bgcolor(self, struct: dict[str, Any]) -> str | None: + """Apply search highlight from pre-computed column. + + Priority: bookmarks > search > cursor (base). + """ + base = struct["bgcolor"] + if base is None: + search_bg = struct.get(self._SEARCH_BG_COL) + if search_bg is not None: + return search_bg + return base + _ALREADY_DT = "dt" diff --git a/src/dt_browser/custom_table.py b/src/dt_browser/custom_table.py index 4d8527b..8184dbe 100644 --- a/src/dt_browser/custom_table.py +++ b/src/dt_browser/custom_table.py @@ -605,7 +605,7 @@ def build_selector(cols: list[str], needed_padding: int = 0): self._render_header_and_table = (header, rend.collect()) return self._render_header_and_table - def _get_row_bg_color_expr(self, cursor_row_idx: int) -> pl.Expr: + def _get_row_bg_color_expr(self, cursor_row_idx: int, render_df: pl.DataFrame | None = None) -> pl.Expr: return ( pl.when(pl.col(DISPLAY_IDX_COL) == cursor_row_idx) @@ -619,6 +619,14 @@ def _get_row_bg_color_expr(self, cursor_row_idx: int) -> pl.Expr: .otherwise(pl.lit(None)) ) + def _resolve_row_bgcolor(self, struct: dict[str, Any]) -> str | None: + """Hook for subclasses to adjust the row background color. + + Called per row in _gen_segments after the Polars bgcolor expression is evaluated. + Default implementation returns the Polars-computed bgcolor unchanged. + """ + return struct["bgcolor"] + def _get_sel_col_bg_color(self, struct: dict[str, Any]): return ( _color_name(self._row_col_highlight.bgcolor) @@ -641,10 +649,11 @@ def _gen_segments(self, lines: list[int] | None): cursor_row_idx = self.cursor_coordinate.row - scroll_y for struct in ( - rend.with_columns(self._get_row_bg_color_expr(cursor_row_idx).alias("bgcolor")) + rend.with_columns(self._get_row_bg_color_expr(cursor_row_idx, render_df=render_df).alias("bgcolor")) .collect() .iter_rows(named=True) ): + bgcolor = self._resolve_row_bgcolor(struct) segs = [ Segment( PADDING_STR, @@ -657,7 +666,7 @@ def _gen_segments(self, lines: list[int] | None): ) else None ), - bgcolor=struct["bgcolor"], + bgcolor=bgcolor, ), ), Segment( @@ -671,7 +680,7 @@ def _gen_segments(self, lines: list[int] | None): ) else struct[COLOR_COL] ), - bgcolor=struct["bgcolor"], + bgcolor=bgcolor, ), ), Segment( @@ -700,7 +709,7 @@ def _gen_segments(self, lines: list[int] | None): ) else struct[COLOR_COL] ), - bgcolor=struct["bgcolor"], + bgcolor=bgcolor, ), ), ]