Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ build-backend = "setuptools.build_meta"

[tool.pytest.ini_options]
asyncio_mode = "auto"
addopts = "-n 2"

[tool.mypy]
ignore_missing_imports = true
Expand Down
64 changes: 55 additions & 9 deletions src/dt_browser/browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,33 +68,67 @@ class TableWithBookmarks(CustomTable):
["datatable--row-bookmark", "datatable--row-search-result"]
)

_SEARCH_BG_COL = "__search_bg"

active_search_queue: reactive[list[int] | None] = reactive(None)

def __init__(self, *args, bookmarks: Bookmarks, **kwargs):
super().__init__(*args, **kwargs)
self._bookmarks = bookmarks
self._bookmark_highlight: Style = Style.null()
self._search_highlight: Style = Style.null()
self._search_queue_set: frozenset[int] = frozenset()
self._has_search = False

def watch_active_search_queue(self, value: list[int] | None) -> None:
self._search_queue_set = frozenset(value) if value else frozenset()
self._has_search = bool(value)

def on_mount(self):
self._bookmark_highlight = self.get_component_rich_style("datatable--row-bookmark")
self._search_highlight = self.get_component_rich_style("datatable--row-search-result")
self._search_highlight_color = _color_name(self._search_highlight.bgcolor)

@property
def render_header_and_table(self):
was_cached = self._render_header_and_table is not None
result = super().render_header_and_table
# Pre-compute search background column once when viewport is rebuilt.
# This avoids any per-row overhead during partial scroll updates.
if not was_cached:
header, render_df = result
if not render_df.is_empty() and INDEX_COL in render_df.columns:
if self._has_search:
visible_indices = set(render_df[INDEX_COL].to_list())
viewport_hits = list(visible_indices & self._search_queue_set)
if viewport_hits:
search_bg_expr = (
pl.when(pl.col(INDEX_COL).is_in(viewport_hits))
.then(pl.lit(self._search_highlight_color))
.otherwise(pl.lit(None))
.alias(self._SEARCH_BG_COL)
)
else:
search_bg_expr = pl.lit(None).cast(pl.Utf8).alias(self._SEARCH_BG_COL)
else:
search_bg_expr = pl.lit(None).cast(pl.Utf8).alias(self._SEARCH_BG_COL)
render_df = render_df.with_columns(search_bg_expr)
self._render_header_and_table = (header, render_df)
result = self._render_header_and_table
return result

def _get_sel_col_bg_color(self, struct: dict[str, Any]) -> str:
if self.active_search_queue and struct[INDEX_COL] in self.active_search_queue:
return _color_name(self._search_highlight.bgcolor)
search_bg = struct.get(self._SEARCH_BG_COL)
if search_bg is not None:
return search_bg
if self._bookmarks.has_bookmarks and struct[INDEX_COL] in self._bookmarks.meta_dt[INDEX_COL]:
return _color_name(self._bookmark_highlight.bgcolor)
return super()._get_sel_col_bg_color(struct)

def _get_row_bg_color_expr(self, cursor_row_idx: int) -> pl.Expr:
def _get_row_bg_color_expr(self, cursor_row_idx: int, render_df: pl.DataFrame | None = None) -> pl.Expr:
tmp = super()._get_row_bg_color_expr(cursor_row_idx)
if self.active_search_queue:
tmp = (
pl.when(pl.col(INDEX_COL).is_in(self.active_search_queue))
.then(pl.lit(_color_name(self._search_highlight.bgcolor)))
.otherwise(tmp)
)
# Search highlights are pre-computed in the _SEARCH_BG_COL column
# and applied via _resolve_row_bgcolor to avoid per-row overhead.
if self._bookmarks.has_bookmarks:
tmp = (
pl.when(pl.col(INDEX_COL).is_in(self._bookmarks.meta_dt[INDEX_COL]))
Expand All @@ -103,6 +137,18 @@ def _get_row_bg_color_expr(self, cursor_row_idx: int) -> pl.Expr:
)
return tmp

def _resolve_row_bgcolor(self, struct: dict[str, Any]) -> str | None:
"""Apply search highlight from pre-computed column.

Priority: bookmarks > search > cursor (base).
"""
base = struct["bgcolor"]
if base is None:
search_bg = struct.get(self._SEARCH_BG_COL)
if search_bg is not None:
return search_bg
return base


_ALREADY_DT = "dt"

Expand Down
19 changes: 14 additions & 5 deletions src/dt_browser/custom_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ def build_selector(cols: list[str], needed_padding: int = 0):
self._render_header_and_table = (header, rend.collect())
return self._render_header_and_table

def _get_row_bg_color_expr(self, cursor_row_idx: int) -> pl.Expr:
def _get_row_bg_color_expr(self, cursor_row_idx: int, render_df: pl.DataFrame | None = None) -> pl.Expr:

return (
pl.when(pl.col(DISPLAY_IDX_COL) == cursor_row_idx)
Expand All @@ -619,6 +619,14 @@ def _get_row_bg_color_expr(self, cursor_row_idx: int) -> pl.Expr:
.otherwise(pl.lit(None))
)

def _resolve_row_bgcolor(self, struct: dict[str, Any]) -> str | None:
"""Hook for subclasses to adjust the row background color.

Called per row in _gen_segments after the Polars bgcolor expression is evaluated.
Default implementation returns the Polars-computed bgcolor unchanged.
"""
return struct["bgcolor"]

def _get_sel_col_bg_color(self, struct: dict[str, Any]):
return (
_color_name(self._row_col_highlight.bgcolor)
Expand All @@ -641,10 +649,11 @@ def _gen_segments(self, lines: list[int] | None):
cursor_row_idx = self.cursor_coordinate.row - scroll_y

for struct in (
rend.with_columns(self._get_row_bg_color_expr(cursor_row_idx).alias("bgcolor"))
rend.with_columns(self._get_row_bg_color_expr(cursor_row_idx, render_df=render_df).alias("bgcolor"))
.collect()
.iter_rows(named=True)
):
bgcolor = self._resolve_row_bgcolor(struct)
segs = [
Segment(
PADDING_STR,
Expand All @@ -657,7 +666,7 @@ def _gen_segments(self, lines: list[int] | None):
)
else None
),
bgcolor=struct["bgcolor"],
bgcolor=bgcolor,
),
),
Segment(
Expand All @@ -671,7 +680,7 @@ def _gen_segments(self, lines: list[int] | None):
)
else struct[COLOR_COL]
),
bgcolor=struct["bgcolor"],
bgcolor=bgcolor,
),
),
Segment(
Expand Down Expand Up @@ -700,7 +709,7 @@ def _gen_segments(self, lines: list[int] | None):
)
else struct[COLOR_COL]
),
bgcolor=struct["bgcolor"],
bgcolor=bgcolor,
),
),
]
Expand Down
192 changes: 192 additions & 0 deletions tests/test_search_scroll_perf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
"""Performance test: scrolling with active search highlights vs without.

Creates a large dataframe (200k rows, ~160 columns) with varied cardinality,
applies searches with different match distributions at different terminal sizes,
then measures scroll performance against the baseline (no search).
Fails if scrolling is >15% slower.
"""

import random
import time

import polars as pl
import pytest

from dt_browser.browser import DtBrowser, DtBrowserApp
from dt_browser.filter_box import FilterBox

_NUM_ROWS = 200_000
_NUM_SCROLL_OPS = 60
_SLOWDOWN_THRESHOLD = 0.15

# ~160 columns at ~20 chars each ≈ 3200 chars, enough to fill 3000-wide terminals
_STR_COLS_PER_GROUP = 30
_NUM_COLS_PER_GROUP = 30

_SEARCH_PARAMS = [
pytest.param(
"sentinel_col = 'NEEDLE'",
1,
1,
id="single_match",
),
pytest.param(
"sparse_col = 1",
100,
300,
id="sparse_far_apart",
),
pytest.param(
"adjacent_col = 1",
40_000,
60_000,
id="many_adjacent",
),
pytest.param(
"int_col_0 > 5",
100_000,
_NUM_ROWS,
id="dense_most_rows",
),
]

_SCREEN_SIZES = [
pytest.param((120, 30), id="120x30"),
pytest.param((800, 200), id="800x200"),
pytest.param((3000, 1000), id="3000x1000"),
]


def _make_large_df() -> pl.DataFrame:
random.seed(42)
data: dict[str, list] = {}

# 30 low-cardinality string columns (5-20 unique values)
for i in range(_STR_COLS_PER_GROUP):
card = random.randint(5, 20)
choices = [f"cat{i}_{j}" for j in range(card)]
data[f"low_card_str_{i}"] = [random.choice(choices) for _ in range(_NUM_ROWS)]

# 30 medium-cardinality string columns (100-500 unique values)
for i in range(_STR_COLS_PER_GROUP):
card = random.randint(100, 500)
choices = [f"med{i}_{j}" for j in range(card)]
data[f"med_card_str_{i}"] = [random.choice(choices) for _ in range(_NUM_ROWS)]

# 30 high-cardinality string columns (mostly unique)
for i in range(_STR_COLS_PER_GROUP):
data[f"high_card_str_{i}"] = [f"unique{i}_{j}" for j in range(_NUM_ROWS)]

# 30 integer columns with varied ranges
for i in range(_NUM_COLS_PER_GROUP):
upper = random.choice([10, 100, 1000, 100_000])
data[f"int_col_{i}"] = [random.randint(0, upper) for _ in range(_NUM_ROWS)]

# 30 float columns
for i in range(_NUM_COLS_PER_GROUP):
data[f"float_col_{i}"] = [random.random() * 1000 for _ in range(_NUM_ROWS)]

# --- Columns designed for specific search distributions ---

# Single match: exactly one row has the sentinel value
data["sentinel_col"] = ["NEEDLE" if i == _NUM_ROWS // 2 else "haystack" for i in range(_NUM_ROWS)]

# Sparse matches: every ~1000th row matches (spread far apart)
data["sparse_col"] = [1 if i % 1000 == 0 else 0 for i in range(_NUM_ROWS)]

# Adjacent matches: a contiguous block of 50k rows in the middle
block_start = _NUM_ROWS // 4
block_end = block_start + _NUM_ROWS // 4
data["adjacent_col"] = [1 if block_start <= i < block_end else 0 for i in range(_NUM_ROWS)]

return pl.DataFrame(data)


# Build the dataframe once at module level so parametrized tests share it.
_LARGE_DF = _make_large_df()


async def _scroll_around(pilot, n_ops: int) -> float:
"""Perform n_ops scroll operations using only up/down arrow keys."""
# Scroll down for 3/4 of the ops, then back up for the rest
down_count = (n_ops * 3) // 4
up_count = n_ops - down_count

# Warm up the rendering pipeline
for _ in range(3):
await pilot.press("down")
await pilot.pause()
for _ in range(3):
await pilot.press("up")
await pilot.pause()

start = time.perf_counter()
for _ in range(down_count):
await pilot.press("down")
await pilot.pause()
for _ in range(up_count):
await pilot.press("up")
await pilot.pause()
elapsed = time.perf_counter() - start
return elapsed


async def _apply_search(pilot, query: str):
"""Open search box, type query, submit, and wait for results."""
await pilot.press("/")
await pilot.pause()
await pilot.press(*list(query))
await pilot.press("enter")
await pilot.pause()
for _ in range(5):
await pilot.pause()


@pytest.mark.parametrize("screen_size", _SCREEN_SIZES)
@pytest.mark.parametrize("search_query, min_hits, max_hits", _SEARCH_PARAMS)
async def test_search_scroll_performance(search_query, min_hits, max_hits, screen_size):
"""Scrolling with search highlights must be within 15% of baseline scroll speed."""
app = DtBrowserApp("perf_test", _LARGE_DF)

async with app.run_test(size=screen_size) as pilot:
await pilot.pause()
browser = app.query_one(DtBrowser)

# --- Baseline: scroll without search ---
baseline_time = await _scroll_around(pilot, _NUM_SCROLL_OPS)

# Reset cursor to top
await pilot.press("g")
await pilot.pause()

# --- Apply search ---
await _apply_search(pilot, search_query)

assert browser.active_search_queue is not None, "Search should have produced results"
hit_count = len(browser.active_search_queue)
assert min_hits <= hit_count <= max_hits, (
f"Expected {min_hits}-{max_hits} hits, got {hit_count}"
)

# Reset cursor to top for fair comparison
await pilot.press("g")
await pilot.pause()

# --- Measure: scroll with search active ---
search_time = await _scroll_around(pilot, _NUM_SCROLL_OPS)

slowdown = (search_time - baseline_time) / baseline_time
w, h = screen_size
label = f"{w}x{h}"
print(f"\n[{label} | {search_query}]")
print(f" Baseline scroll time: {baseline_time:.3f}s")
print(f" Search scroll time: {search_time:.3f}s")
print(f" Slowdown: {slowdown:.1%}")
print(f" Search hits: {hit_count:,}")

assert slowdown <= _SLOWDOWN_THRESHOLD, (
f"[{label}] Search scrolling is {slowdown:.1%} slower than baseline "
f"(threshold: {_SLOWDOWN_THRESHOLD:.0%}). "
f"Baseline: {baseline_time:.3f}s, Search: {search_time:.3f}s, "
f"Hits: {hit_count:,}"
)
Loading