From 20c0f42442f6302a65cd54bcd3bdf5c4aa0b8212 Mon Sep 17 00:00:00 2001 From: Claude Code Date: Wed, 25 Mar 2026 04:32:57 +0000 Subject: [PATCH 1/2] Add search scroll performance tests with pytest-xdist parallelism Parametrized tests measuring scroll performance with active search highlights vs baseline across different search cardinalities (single match, sparse, adjacent block, dense) and screen sizes (120x30, 800x200, 3000x1000). Tests fail if search scrolling is >15% slower. Configured pytest-xdist with -n 2 for parallel execution. Co-Authored-By: Claude Opus 4.6 --- pyproject.toml | 1 + tests/test_search_scroll_perf.py | 192 +++++++++++++++++++++++++++++++ 2 files changed, 193 insertions(+) create mode 100644 tests/test_search_scroll_perf.py diff --git a/pyproject.toml b/pyproject.toml index 4f4c546..e43428b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ build-backend = "setuptools.build_meta" [tool.pytest.ini_options] asyncio_mode = "auto" +addopts = "-n 2" [tool.mypy] ignore_missing_imports = true diff --git a/tests/test_search_scroll_perf.py b/tests/test_search_scroll_perf.py new file mode 100644 index 0000000..86b969a --- /dev/null +++ b/tests/test_search_scroll_perf.py @@ -0,0 +1,192 @@ +"""Performance test: scrolling with active search highlights vs without. + +Creates a large dataframe (200k rows, ~160 columns) with varied cardinality, +applies searches with different match distributions at different terminal sizes, +then measures scroll performance against the baseline (no search). +Fails if scrolling is >15% slower. +""" + +import random +import time + +import polars as pl +import pytest + +from dt_browser.browser import DtBrowser, DtBrowserApp +from dt_browser.filter_box import FilterBox + +_NUM_ROWS = 200_000 +_NUM_SCROLL_OPS = 60 +_SLOWDOWN_THRESHOLD = 0.15 + +# ~160 columns at ~20 chars each ≈ 3200 chars, enough to fill 3000-wide terminals +_STR_COLS_PER_GROUP = 30 +_NUM_COLS_PER_GROUP = 30 + +_SEARCH_PARAMS = [ + pytest.param( + "sentinel_col = 'NEEDLE'", + 1, + 1, + id="single_match", + ), + pytest.param( + "sparse_col = 1", + 100, + 300, + id="sparse_far_apart", + ), + pytest.param( + "adjacent_col = 1", + 40_000, + 60_000, + id="many_adjacent", + ), + pytest.param( + "int_col_0 > 5", + 100_000, + _NUM_ROWS, + id="dense_most_rows", + ), +] + +_SCREEN_SIZES = [ + pytest.param((120, 30), id="120x30"), + pytest.param((800, 200), id="800x200"), + pytest.param((3000, 1000), id="3000x1000"), +] + + +def _make_large_df() -> pl.DataFrame: + random.seed(42) + data: dict[str, list] = {} + + # 30 low-cardinality string columns (5-20 unique values) + for i in range(_STR_COLS_PER_GROUP): + card = random.randint(5, 20) + choices = [f"cat{i}_{j}" for j in range(card)] + data[f"low_card_str_{i}"] = [random.choice(choices) for _ in range(_NUM_ROWS)] + + # 30 medium-cardinality string columns (100-500 unique values) + for i in range(_STR_COLS_PER_GROUP): + card = random.randint(100, 500) + choices = [f"med{i}_{j}" for j in range(card)] + data[f"med_card_str_{i}"] = [random.choice(choices) for _ in range(_NUM_ROWS)] + + # 30 high-cardinality string columns (mostly unique) + for i in range(_STR_COLS_PER_GROUP): + data[f"high_card_str_{i}"] = [f"unique{i}_{j}" for j in range(_NUM_ROWS)] + + # 30 integer columns with varied ranges + for i in range(_NUM_COLS_PER_GROUP): + upper = random.choice([10, 100, 1000, 100_000]) + data[f"int_col_{i}"] = [random.randint(0, upper) for _ in range(_NUM_ROWS)] + + # 30 float columns + for i in range(_NUM_COLS_PER_GROUP): + data[f"float_col_{i}"] = [random.random() * 1000 for _ in range(_NUM_ROWS)] + + # --- Columns designed for specific search distributions --- + + # Single match: exactly one row has the sentinel value + data["sentinel_col"] = ["NEEDLE" if i == _NUM_ROWS // 2 else "haystack" for i in range(_NUM_ROWS)] + + # Sparse matches: every ~1000th row matches (spread far apart) + data["sparse_col"] = [1 if i % 1000 == 0 else 0 for i in range(_NUM_ROWS)] + + # Adjacent matches: a contiguous block of 50k rows in the middle + block_start = _NUM_ROWS // 4 + block_end = block_start + _NUM_ROWS // 4 + data["adjacent_col"] = [1 if block_start <= i < block_end else 0 for i in range(_NUM_ROWS)] + + return pl.DataFrame(data) + + +# Build the dataframe once at module level so parametrized tests share it. +_LARGE_DF = _make_large_df() + + +async def _scroll_around(pilot, n_ops: int) -> float: + """Perform n_ops scroll operations using only up/down arrow keys.""" + # Scroll down for 3/4 of the ops, then back up for the rest + down_count = (n_ops * 3) // 4 + up_count = n_ops - down_count + + # Warm up the rendering pipeline + for _ in range(3): + await pilot.press("down") + await pilot.pause() + for _ in range(3): + await pilot.press("up") + await pilot.pause() + + start = time.perf_counter() + for _ in range(down_count): + await pilot.press("down") + await pilot.pause() + for _ in range(up_count): + await pilot.press("up") + await pilot.pause() + elapsed = time.perf_counter() - start + return elapsed + + +async def _apply_search(pilot, query: str): + """Open search box, type query, submit, and wait for results.""" + await pilot.press("/") + await pilot.pause() + await pilot.press(*list(query)) + await pilot.press("enter") + await pilot.pause() + for _ in range(5): + await pilot.pause() + + +@pytest.mark.parametrize("screen_size", _SCREEN_SIZES) +@pytest.mark.parametrize("search_query, min_hits, max_hits", _SEARCH_PARAMS) +async def test_search_scroll_performance(search_query, min_hits, max_hits, screen_size): + """Scrolling with search highlights must be within 15% of baseline scroll speed.""" + app = DtBrowserApp("perf_test", _LARGE_DF) + + async with app.run_test(size=screen_size) as pilot: + await pilot.pause() + browser = app.query_one(DtBrowser) + + # --- Baseline: scroll without search --- + baseline_time = await _scroll_around(pilot, _NUM_SCROLL_OPS) + + # Reset cursor to top + await pilot.press("g") + await pilot.pause() + + # --- Apply search --- + await _apply_search(pilot, search_query) + + assert browser.active_search_queue is not None, "Search should have produced results" + hit_count = len(browser.active_search_queue) + assert min_hits <= hit_count <= max_hits, ( + f"Expected {min_hits}-{max_hits} hits, got {hit_count}" + ) + + # Reset cursor to top for fair comparison + await pilot.press("g") + await pilot.pause() + + # --- Measure: scroll with search active --- + search_time = await _scroll_around(pilot, _NUM_SCROLL_OPS) + + slowdown = (search_time - baseline_time) / baseline_time + w, h = screen_size + label = f"{w}x{h}" + print(f"\n[{label} | {search_query}]") + print(f" Baseline scroll time: {baseline_time:.3f}s") + print(f" Search scroll time: {search_time:.3f}s") + print(f" Slowdown: {slowdown:.1%}") + print(f" Search hits: {hit_count:,}") + + assert slowdown <= _SLOWDOWN_THRESHOLD, ( + f"[{label}] Search scrolling is {slowdown:.1%} slower than baseline " + f"(threshold: {_SLOWDOWN_THRESHOLD:.0%}). " + f"Baseline: {baseline_time:.3f}s, Search: {search_time:.3f}s, " + f"Hits: {hit_count:,}" + ) From 2b7d3c408c4c88ef9bbf41449204c61791a2f832 Mon Sep 17 00:00:00 2001 From: Claude Code Date: Wed, 25 Mar 2026 04:44:39 +0000 Subject: [PATCH 2/2] Optimize search highlight lookups with set and pre-computed Polars Series Convert active_search_queue lookups from O(n) list scans to O(1) set lookups for per-row color checks, and cache a Polars Series to avoid rebuilding from a Python list on every render call to is_in(). Co-Authored-By: Claude Opus 4.6 --- src/dt_browser/browser.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/dt_browser/browser.py b/src/dt_browser/browser.py index 4574d7d..442119a 100644 --- a/src/dt_browser/browser.py +++ b/src/dt_browser/browser.py @@ -75,13 +75,23 @@ def __init__(self, *args, bookmarks: Bookmarks, **kwargs): self._bookmarks = bookmarks self._bookmark_highlight: Style = Style.null() self._search_highlight: Style = Style.null() + self._search_set: set[int] = set() + self._search_series: pl.Series | None = None + + def watch_active_search_queue(self, queue: list[int] | None) -> None: + if queue: + self._search_set = set(queue) + self._search_series = pl.Series(INDEX_COL, queue) + else: + self._search_set = set() + self._search_series = None def on_mount(self): self._bookmark_highlight = self.get_component_rich_style("datatable--row-bookmark") self._search_highlight = self.get_component_rich_style("datatable--row-search-result") def _get_sel_col_bg_color(self, struct: dict[str, Any]) -> str: - if self.active_search_queue and struct[INDEX_COL] in self.active_search_queue: + if self._search_set and struct[INDEX_COL] in self._search_set: return _color_name(self._search_highlight.bgcolor) if self._bookmarks.has_bookmarks and struct[INDEX_COL] in self._bookmarks.meta_dt[INDEX_COL]: return _color_name(self._bookmark_highlight.bgcolor) @@ -89,9 +99,9 @@ def _get_sel_col_bg_color(self, struct: dict[str, Any]) -> str: def _get_row_bg_color_expr(self, cursor_row_idx: int) -> pl.Expr: tmp = super()._get_row_bg_color_expr(cursor_row_idx) - if self.active_search_queue: + if self._search_series is not None: tmp = ( - pl.when(pl.col(INDEX_COL).is_in(self.active_search_queue)) + pl.when(pl.col(INDEX_COL).is_in(self._search_series)) .then(pl.lit(_color_name(self._search_highlight.bgcolor))) .otherwise(tmp) )