Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Release Notes for ParQuery

## Release 2.0.7
- Add configurable DuckDB memory limit via `DUCKDB_MEMORY_LIMIT` environment variable
- Prevents DuckDB OOM on containers with limited memory (e.g. ECS tasks with multiple Gunicorn workers)
- When set (e.g. `DUCKDB_MEMORY_LIMIT=2GB`), DuckDB spills to temp storage instead of allocating unbounded memory
- No performance penalty — benchmark shows equal or faster execution with bounded memory

## Release 2.0.6
- Add specific functions to public api for imports

Expand Down
2 changes: 1 addition & 1 deletion parquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from parquery.write import df_to_parquet

pre_release_version = os.getenv("PRE_RELEASE_VERSION", "")
__version__: str = pre_release_version if pre_release_version else "2.0.6"
__version__: str = pre_release_version if pre_release_version else "2.0.7"

__all__ = [
"aggregate_pq",
Expand Down
11 changes: 10 additions & 1 deletion parquery/aggregate_duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import gc
import logging
import os
from typing import Any

try:
Expand All @@ -17,6 +18,10 @@

logger = logging.getLogger(__name__)

# DuckDB memory limit per connection. Set via DUCKDB_MEMORY_LIMIT env var.
# Examples: "4GB", "2GB", "512MB". Default: no limit (DuckDB manages automatically).
DUCKDB_MEMORY_LIMIT: str | None = os.environ.get("DUCKDB_MEMORY_LIMIT")


def aggregate_pq_duckdb(
file_name: str,
Expand Down Expand Up @@ -134,8 +139,12 @@ def call_duckdb(sql) -> Any:
- Uses in-memory database (:memory:) for temporary processing.
- Connection is automatically closed after query execution.
- Results are streamed via RecordBatchReader and converted to Table.
- Set DUCKDB_MEMORY_LIMIT env var (e.g. "4GB") to cap memory per query.
"""
conn = duckdb.connect(":memory:")
config = {}
if DUCKDB_MEMORY_LIMIT:
config["memory_limit"] = DUCKDB_MEMORY_LIMIT
conn = duckdb.connect(":memory:", config=config)
# arrow() returns a RecordBatchReader, convert to Table
reader = conn.execute(sql).arrow()
result_arrow = reader.read_all()
Expand Down