Skip to content

Commit 4aa96e7

Browse files
rustyconoverclaude
andcommitted
worker: unwrap sealed attach envelope on all bind-call paths; fix lint debt
Unwrap attach_opaque_data to plaintext on table_function_cardinality, table_function_statistics, dynamic_to_string, and the streaming init path so function bodies always see the unsealed value, matching the existing bind() behavior. ts_client now emits a TS property as optional iff the field is wire-nullable, not merely Python-defaulted. Also clears the repo's standing ruff debt: shard_key docstring args on FunctionStorage protocol methods, D205/D400/D401 docstring reflows, test-method docstrings, an F402 loop-var shadow, and E501 wraps of the repeated stateful-catalog warning string. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 8ca51e7 commit 4aa96e7

19 files changed

Lines changed: 192 additions & 60 deletions

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "vgi"
3-
version = "0.7.4"
3+
version = "0.8.0"
44
description = "Vector Gateway Interface - Connect DuckDB to external programs via Apache Arrow"
55
readme = "README.md"
66
requires-python = ">=3.13"

tests/test_filter_pushdown_extension.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ class TestBoolColumnBool8Literal:
5151
"""
5252

5353
def test_eq_true(self) -> None:
54+
"""EQ against a bool8 literal matches the plain-bool column."""
5455
batch = _batch_with_bool("flag", [True, False, None, True])
5556
f = ConstantFilter(
5657
column_name="flag",
@@ -62,6 +63,7 @@ def test_eq_true(self) -> None:
6263
assert result.to_pylist() == [True, False, None, True]
6364

6465
def test_eq_false(self) -> None:
66+
"""EQ against a false bool8 literal."""
6567
batch = _batch_with_bool("flag", [True, False, None, True])
6668
f = ConstantFilter(
6769
column_name="flag",
@@ -72,6 +74,7 @@ def test_eq_false(self) -> None:
7274
assert f.evaluate(batch).to_pylist() == [False, True, None, False]
7375

7476
def test_ne_true(self) -> None:
77+
"""NE against a bool8 literal."""
7578
batch = _batch_with_bool("flag", [True, False, None, True])
7679
f = ConstantFilter(
7780
column_name="flag",
@@ -95,6 +98,7 @@ class TestBool8ColumnBool8Literal:
9598
"""
9699

97100
def test_eq_true(self) -> None:
101+
"""EQ with bool8 on both sides of the kernel."""
98102
batch = _batch_with_bool8("flag", [True, False, None, True])
99103
f = ConstantFilter(
100104
column_name="flag",
@@ -105,6 +109,7 @@ def test_eq_true(self) -> None:
105109
assert f.evaluate(batch).to_pylist() == [True, False, None, True]
106110

107111
def test_eq_false(self) -> None:
112+
"""EQ against a false literal with bool8 on both sides."""
108113
batch = _batch_with_bool8("flag", [True, False, None, True])
109114
f = ConstantFilter(
110115
column_name="flag",
@@ -128,11 +133,13 @@ class TestPlainTypesUnchanged:
128133
"""
129134

130135
def test_int32_eq(self) -> None:
136+
"""Plain int32 EQ flows through normalisation unchanged."""
131137
batch = pa.RecordBatch.from_pydict({"n": pa.array([1, 2, 3, 4], type=pa.int32())})
132138
f = ConstantFilter(column_name="n", column_index=0, op=ComparisonOp.EQ, value=pa.scalar(2, type=pa.int32()))
133139
assert f.evaluate(batch).to_pylist() == [False, True, False, False]
134140

135141
def test_string_eq(self) -> None:
142+
"""Plain string EQ flows through normalisation unchanged."""
136143
batch = pa.RecordBatch.from_pydict({"s": ["a", "b", "c"]})
137144
f = ConstantFilter(column_name="s", column_index=0, op=ComparisonOp.EQ, value=pa.scalar("b"))
138145
assert f.evaluate(batch).to_pylist() == [False, True, False]
@@ -158,6 +165,7 @@ class TestInFilterExtension:
158165
"""
159166

160167
def test_in_bool_with_bool8_values(self) -> None:
168+
"""IN with bool8 values against a plain-bool column."""
161169
batch = _batch_with_bool("flag", [True, False, None, True])
162170
# Build the values array as a bool8 extension array
163171
storage = pa.array([1], type=pa.int8())
@@ -173,11 +181,14 @@ def test_in_bool_with_bool8_values(self) -> None:
173181

174182

175183
class TestPlainLiteralBool8Column:
176-
"""Defensive: if some future code path emits a plain bool literal but
177-
the column happens to be bool8, normalisation should still align them.
184+
"""Defensive symmetry check: plain bool literal against a bool8 column.
185+
186+
If some future code path emits a plain bool literal but the column
187+
happens to be bool8, normalisation should still align them.
178188
"""
179189

180190
def test_plain_bool_literal_bool8_column(self) -> None:
191+
"""Plain bool literal against a bool8 column."""
181192
batch = _batch_with_bool8("flag", [True, False, None, True])
182193
f = ConstantFilter(
183194
column_name="flag",
@@ -197,8 +208,10 @@ def test_plain_bool_literal_bool8_column(self) -> None:
197208

198209

199210
def test_pyarrow_kernel_gap_still_present() -> None:
200-
"""Documents the underlying PyArrow gap. If this passes in a future
201-
PyArrow release the normalisation helper is over-defensive but harmless.
211+
"""Document the underlying PyArrow gap.
212+
213+
If this passes in a future PyArrow release the normalisation helper
214+
is over-defensive but harmless.
202215
"""
203216
import pyarrow.compute as pc
204217

@@ -245,6 +258,7 @@ class TestUuidExtension:
245258
]
246259

247260
def test_eq(self) -> None:
261+
"""EQ with arrow.uuid on both sides."""
248262
batch = pa.RecordBatch.from_arrays([_uuid_array(self.UUIDS)], names=["id"])
249263
f = ConstantFilter(
250264
column_name="id",
@@ -255,6 +269,7 @@ def test_eq(self) -> None:
255269
assert f.evaluate(batch).to_pylist() == [False, True, None, False]
256270

257271
def test_ne(self) -> None:
272+
"""NE with arrow.uuid on both sides."""
258273
batch = pa.RecordBatch.from_arrays([_uuid_array(self.UUIDS)], names=["id"])
259274
f = ConstantFilter(
260275
column_name="id",
@@ -265,6 +280,7 @@ def test_ne(self) -> None:
265280
assert f.evaluate(batch).to_pylist() == [True, False, None, True]
266281

267282
def test_in(self) -> None:
283+
"""IN with an arrow.uuid values array."""
268284
batch = pa.RecordBatch.from_arrays([_uuid_array(self.UUIDS)], names=["id"])
269285
values = _uuid_array(
270286
[
@@ -282,8 +298,10 @@ def test_in(self) -> None:
282298

283299

284300
def test_pyarrow_interval_kernel_gap() -> None:
285-
"""PyArrow has no ``equal`` kernel for any of the interval types
286-
(``month_day_nano_interval``, ``day_time_interval``, ``month_interval``).
301+
"""PyArrow has no ``equal`` kernel for any interval type.
302+
303+
The affected types are ``month_day_nano_interval``,
304+
``day_time_interval``, and ``month_interval``.
287305
288306
This means a filter pushdown like ``WHERE col = INTERVAL '1 day'``
289307
cannot be evaluated on the worker side regardless of whether we strip

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vgi/_test_fixtures/aggregate/window.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,10 @@ def window(
273273

274274

275275
class WindowSumBatchFunction(AggregateFunction[SumState]):
276-
"""Windowed running-sum that overrides ``window_batch`` to return a
277-
pre-built ``pa.Array`` rather than a Python list.
276+
"""Windowed running-sum returning a pre-built ``pa.Array``.
277+
278+
Overrides ``window_batch`` to return a pre-built ``pa.Array`` rather
279+
than a Python list.
278280
279281
Functionally equivalent to :class:`WindowSumFunction`. The point of this
280282
fixture is to exercise the framework's polymorphic batch return: when

vgi/_test_fixtures/simple_writable.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -476,11 +476,12 @@ def process(
476476

477477

478478
class BrokenReturningInsert(TableInOutGenerator[None, None]):
479-
"""Misbehaving INSERT handler: claims RETURNING support but always emits a
480-
(count BIGINT) batch — same shape that triggered the original SIGSEGV in
481-
the kafka worker. Used to verify the C++ extension's runtime schema
482-
validator throws a clean IOException instead of crashing inside
483-
ArrowToDuckDB.
479+
"""Misbehaving INSERT handler that lies about its RETURNING support.
480+
481+
Claims RETURNING support but always emits a (count BIGINT) batch —
482+
same shape that triggered the original SIGSEGV in the kafka worker.
483+
Used to verify the C++ extension's runtime schema validator throws a
484+
clean IOException instead of crashing inside ArrowToDuckDB.
484485
"""
485486

486487
class Meta:

vgi/_test_fixtures/table/partition_columns.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,11 @@ class _CountryPartitionedArgs:
7575

7676
@dataclass(kw_only=True)
7777
class _CountryPartitionedState(ArrowSerializableDataclass):
78-
"""Per-worker cursor. ``current_country`` is set after the worker
79-
pops a queue item; ``current_idx`` advances through emitted rows
80-
until the per-country quota is reached, then it pops the next item.
78+
"""Per-worker cursor over countries.
79+
80+
``current_country`` is set after the worker pops a queue item;
81+
``current_idx`` advances through emitted rows until the per-country
82+
quota is reached, then it pops the next item.
8183
"""
8284

8385
current_country: str | None = None

vgi/aggregate_function.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ def window_prepare(
420420
window_state: Any,
421421
params: ProcessParams[Any],
422422
) -> Any:
423-
"""Optional hook: derive per-partition state for the window() loop.
423+
"""Derive per-partition state for the window() loop (optional hook).
424424
425425
Called once per partition, after ``window_init`` (or after the state
426426
is rehydrated from storage on a cold reload), before any

vgi/client/cli_catalog.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,8 @@ def catalog_version(
160160
)
161161
if is_stateful and catalog_name:
162162
click.echo(
163-
"Warning: Using --catalog with a stateful catalog. Consider using --attach-opaque-data for session persistence.",
163+
"Warning: Using --catalog with a stateful catalog. "
164+
"Consider using --attach-opaque-data for session persistence.",
164165
err=True,
165166
)
166167
version = client.catalog_version(

vgi/client/cli_schema.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ def schema_list(
5353
)
5454
if is_stateful and catalog_name:
5555
click.echo(
56-
"Warning: Using --catalog with a stateful catalog. Consider using --attach-opaque-data for session persistence.",
56+
"Warning: Using --catalog with a stateful catalog. "
57+
"Consider using --attach-opaque-data for session persistence.",
5758
err=True,
5859
)
5960
for schema_info in client.schemas(
@@ -92,7 +93,8 @@ def schema_get(
9293
)
9394
if is_stateful and catalog_name:
9495
click.echo(
95-
"Warning: Using --catalog with a stateful catalog. Consider using --attach-opaque-data for session persistence.",
96+
"Warning: Using --catalog with a stateful catalog. "
97+
"Consider using --attach-opaque-data for session persistence.",
9698
err=True,
9799
)
98100
schema_info = client.schema_get(
@@ -139,7 +141,8 @@ def schema_create(
139141
)
140142
if is_stateful and catalog_name:
141143
click.echo(
142-
"Warning: Using --catalog with a stateful catalog. Consider using --attach-opaque-data for session persistence.",
144+
"Warning: Using --catalog with a stateful catalog. "
145+
"Consider using --attach-opaque-data for session persistence.",
143146
err=True,
144147
)
145148
tags_dict = parse_json_option(tags, "--tags")
@@ -186,7 +189,8 @@ def schema_drop(
186189
)
187190
if is_stateful and catalog_name:
188191
click.echo(
189-
"Warning: Using --catalog with a stateful catalog. Consider using --attach-opaque-data for session persistence.",
192+
"Warning: Using --catalog with a stateful catalog. "
193+
"Consider using --attach-opaque-data for session persistence.",
190194
err=True,
191195
)
192196
client.schema_drop(
@@ -238,7 +242,8 @@ def schema_contents(
238242
)
239243
if is_stateful and catalog_name:
240244
click.echo(
241-
"Warning: Using --catalog with a stateful catalog. Consider using --attach-opaque-data for session persistence.",
245+
"Warning: Using --catalog with a stateful catalog. "
246+
"Consider using --attach-opaque-data for session persistence.",
242247
err=True,
243248
)
244249

0 commit comments

Comments
 (0)