diff --git a/.coverage b/.coverage
new file mode 100644
index 0000000..75369ae
Binary files /dev/null and b/.coverage differ
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index dc14d51..a796f54 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -1,6 +1,8 @@
-name: Test Pull Request
+name: Tests
on:
+ push:
+ branches: [main]
pull_request:
jobs:
@@ -23,5 +25,41 @@ jobs:
- name: Install dependencies
run: uv sync --group dev
- - name: Run tests
- run: uv run pytest -vv -W error
+ - name: Run tests with coverage
+ run: uv run pytest -vv -W error --cov=transformplan --cov-report=xml --cov-report=term
+
+ coverage-badge:
+ needs: test
+ runs-on: ubuntu-22.04
+ if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+ permissions:
+ contents: write
+
+ steps:
+ - uses: actions/checkout@v5
+
+ - name: Install uv
+ uses: astral-sh/setup-uv@v5
+ with:
+ enable-cache: true
+ cache-dependency-glob: pyproject.toml
+ python-version: "3.12"
+
+ - name: Install dependencies
+ run: uv sync --group dev
+
+ - name: Generate coverage report
+ run: uv run pytest --cov=transformplan --cov-report=xml --cov-report=term
+
+ - name: Create coverage badge
+ uses: tj-actions/coverage-badge-py@v2
+ with:
+ output: coverage.svg
+
+ - name: Commit badge
+ run: |
+ git config --local user.email "github-actions[bot]@users.noreply.github.com"
+ git config --local user.name "github-actions[bot]"
+ git add coverage.svg
+ git diff --staged --quiet || git commit -m "chore: update coverage badge [skip ci]"
+ git push
diff --git a/README.md b/README.md
index 7c990a9..b5e2a95 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,5 @@
+
+
# TransformPlan: Auditable Data Transformation Pipelines
@@ -36,7 +38,7 @@ plan = (
.dt_age_years(column="date_of_birth", new_column="age")
.math_clamp(column="age", min_value=0, max_value=120)
- # Categorize patients
+ # Categorize patients age
.map_discretize(column="age", bins=[18, 40, 65], labels=["young", "adult", "senior"], new_column="age_group")
# Filter and clean
diff --git a/pyproject.toml b/pyproject.toml
index 37cbece..09f37f7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,6 +29,7 @@ dev = [
"mkdocs>=1.6.0",
"mkdocs-material>=9.5.0",
"mkdocstrings[python]>=0.24.0",
+ "pytest-cov>=7.0.0",
]
[build-system]
diff --git a/transformplan/chunking.py b/transformplan/chunking.py
index a574aca..697e062 100644
--- a/transformplan/chunking.py
+++ b/transformplan/chunking.py
@@ -265,7 +265,7 @@ def set_operations(self, operations: list[dict[str, Any]]) -> None:
"""Record the operations that were applied."""
self._operations = operations
- def set_metadata(self, **kwargs: Any) -> None:
+ def set_metadata(self, **kwargs: Any) -> None: # noqa: ANN401
"""Set arbitrary metadata on the protocol."""
self._metadata.update(kwargs)
@@ -275,32 +275,56 @@ def add_chunk(self, chunk_info: ChunkInfo) -> None:
@property
def chunks(self) -> list[ChunkInfo]:
- """List of chunk information."""
+ """List of chunk information.
+
+ Returns:
+ List of ChunkInfo instances.
+ """
return self._chunks
@property
def total_input_rows(self) -> int:
- """Total rows across all input chunks."""
+ """Total rows across all input chunks.
+
+ Returns:
+ Sum of input rows.
+ """
return sum(c.input_rows for c in self._chunks)
@property
def total_output_rows(self) -> int:
- """Total rows across all output chunks."""
+ """Total rows across all output chunks.
+
+ Returns:
+ Sum of output rows.
+ """
return sum(c.output_rows for c in self._chunks)
@property
def total_elapsed_seconds(self) -> float:
- """Total processing time across all chunks."""
+ """Total processing time across all chunks.
+
+ Returns:
+ Sum of elapsed seconds.
+ """
return sum(c.elapsed_seconds for c in self._chunks)
@property
def num_chunks(self) -> int:
- """Number of chunks processed."""
+ """Number of chunks processed.
+
+ Returns:
+ Count of chunks.
+ """
return len(self._chunks)
@property
def metadata(self) -> dict[str, Any]:
- """Protocol metadata."""
+ """Protocol metadata.
+
+ Returns:
+ Dictionary of metadata.
+ """
return self._metadata
def output_hash(self) -> str:
@@ -409,9 +433,7 @@ def from_json(cls, source: str | Path) -> ChunkedProtocol:
Returns:
ChunkedProtocol instance.
"""
- if isinstance(source, Path) or (
- isinstance(source, str) and not source.strip().startswith("{")
- ):
+ if isinstance(source, Path) or not source.strip().startswith("{"):
content = Path(source).read_text()
else:
content = source
@@ -419,13 +441,21 @@ def from_json(cls, source: str | Path) -> ChunkedProtocol:
return cls.from_dict(json.loads(content))
def __repr__(self) -> str:
- """Return string representation of the protocol."""
+ """Return string representation of the protocol.
+
+ Returns:
+ Human-readable representation.
+ """
return (
f"ChunkedProtocol({self.num_chunks} chunks, {self.total_input_rows} rows)"
)
def __len__(self) -> int:
- """Return number of chunks processed."""
+ """Return number of chunks processed.
+
+ Returns:
+ Count of chunks.
+ """
return self.num_chunks
def summary(self) -> str:
@@ -452,12 +482,14 @@ def summary(self) -> str:
lines.append(f"Partition key: {self._partition_key}")
if self._chunk_size:
lines.append(f"Target chunk size: {self._chunk_size:,}")
- lines.append("-" * 70)
-
- # Summary stats
- lines.append(f"Chunks processed: {self.num_chunks}")
- lines.append(f"Total input rows: {self.total_input_rows:,}")
- lines.append(f"Total output rows: {self.total_output_rows:,}")
+ lines.extend(
+ [
+ "-" * 70,
+ f"Chunks processed: {self.num_chunks}",
+ f"Total input rows: {self.total_input_rows:,}",
+ f"Total output rows: {self.total_output_rows:,}",
+ ]
+ )
rows_diff = self.total_output_rows - self.total_input_rows
if rows_diff != 0:
lines.append(f"Row change: {rows_diff:+,}")
@@ -465,16 +497,17 @@ def summary(self) -> str:
if self.num_chunks > 0:
avg_time = self.total_elapsed_seconds / self.num_chunks
lines.append(f"Avg time per chunk: {avg_time:.4f}s")
- lines.append(f"Output hash: {self.output_hash()}")
- lines.append("-" * 70)
+ lines.extend((f"Output hash: {self.output_hash()}", "-" * 70))
# Per-chunk details
if self._chunks:
- lines.append("")
- lines.append(
- f"{'#':<6} {'Input':<12} {'Output':<12} {'Change':<10} {'Time':<10} {'Hash':<16}"
+ lines.extend(
+ (
+ "",
+ f"{'#':<6} {'Input':<12} {'Output':<12} {'Change':<10} {'Time':<10} {'Hash':<16}",
+ "-" * 70,
+ )
)
- lines.append("-" * 70)
for chunk in self._chunks:
idx = str(chunk.chunk_index)
@@ -494,10 +527,10 @@ def summary(self) -> str:
def print(self) -> None:
"""Print the protocol summary to stdout."""
- print(self.summary())
+ print(self.summary()) # noqa: T201
-def validate_chunked_pipeline(
+def validate_chunked_pipeline( # noqa: C901
operations: list[tuple[Any, dict[str, Any]]],
partition_key: str | list[str] | None = None,
) -> ChunkValidationResult:
diff --git a/transformplan/core.py b/transformplan/core.py
index 9e24f05..48c4d91 100644
--- a/transformplan/core.py
+++ b/transformplan/core.py
@@ -54,6 +54,7 @@ class TransformPlanBase:
VERSION = "1.0"
def __init__(self) -> None:
+ """Initialize an empty TransformPlanBase."""
self._operations: list[tuple[Callable[..., pl.DataFrame], dict[str, Any]]] = []
def _register(
@@ -61,25 +62,27 @@ def _register(
method: Callable[..., pl.DataFrame],
params: dict[str, Any],
) -> Self:
- """Register an operation for deferred execution."""
+ """Register an operation for deferred execution.
+
+ Returns:
+ Self for method chaining.
+ """
self._operations.append((method, params))
return self
def process(
- self, data: pl.DataFrame, validate: bool = True
+ self, data: pl.DataFrame, *, validate: bool = True
) -> tuple[pl.DataFrame, Protocol]:
"""Execute all registered operations and return transformed data with protocol.
Args:
data: DataFrame to process.
validate: If True, validate schema before execution (default).
- Set to False for performance in hot loops with pre-validated pipelines.
+ Set to False for performance in hot loops with pre-validated
+ pipelines.
Returns:
Tuple of (processed DataFrame, Protocol).
-
- Raises:
- SchemaValidationError: If validate=True and validation fails.
"""
if validate:
validate_schema(self._operations, dict(data.schema)).raise_if_invalid()
@@ -152,7 +155,11 @@ def dry_run(self, data: pl.DataFrame) -> DryRunResult:
return dry_run_schema(self._operations, dict(data.schema))
def to_dict(self) -> dict[str, Any]:
- """Serialize the pipeline to a dictionary."""
+ """Serialize the pipeline to a dictionary.
+
+ Returns:
+ Dictionary representation of the pipeline.
+ """
steps = []
for method, params in self._operations:
op_name = method.__name__.lstrip("_")
@@ -177,6 +184,9 @@ def from_dict(cls, data: dict[str, Any]) -> Self:
Returns:
New TransformPlan instance with operations loaded.
+
+ Raises:
+ ValueError: If an unknown operation is encountered.
"""
plan = cls()
@@ -187,7 +197,8 @@ def from_dict(cls, data: dict[str, Any]) -> Self:
# Find the public method on the class
method = getattr(plan, op_name, None)
if method is None:
- raise ValueError(f"Unknown operation: {op_name}")
+ msg = f"Unknown operation: {op_name}"
+ raise ValueError(msg)
# Call the method with params to register the operation
method(**params)
@@ -221,9 +232,7 @@ def from_json(cls, source: str | Path) -> Self:
Returns:
New TransformPlan instance.
"""
- if isinstance(source, Path) or (
- isinstance(source, str) and not source.strip().startswith("{")
- ):
+ if isinstance(source, Path) or not source.strip().startswith("{"):
content = Path(source).read_text()
else:
content = source
@@ -231,10 +240,19 @@ def from_json(cls, source: str | Path) -> Self:
return cls.from_dict(json.loads(content))
def __len__(self) -> int:
- """Return number of registered operations."""
+ """Return number of registered operations.
+
+ Returns:
+ Number of operations.
+ """
return len(self._operations)
def __repr__(self) -> str:
+ """Return string representation.
+
+ Returns:
+ Human-readable representation.
+ """
return f"TransformPlan({len(self._operations)} operations)"
def to_python(self, variable_name: str = "plan") -> str:
@@ -247,8 +265,7 @@ def to_python(self, variable_name: str = "plan") -> str:
Python code string.
"""
lines = ["from transformplan import TransformPlan, Col", ""]
- lines.append(f"{variable_name} = (")
- lines.append(" TransformPlan()")
+ lines.extend((f"{variable_name} = (", " TransformPlan()"))
for method, params in self._operations:
op_name = method.__name__.lstrip("_")
@@ -259,7 +276,11 @@ def to_python(self, variable_name: str = "plan") -> str:
return "\n".join(lines)
def _format_params_as_python(self, params: dict[str, Any]) -> str:
- """Format parameters as Python code."""
+ """Format parameters as Python code.
+
+ Returns:
+ Python code string for the parameters.
+ """
parts = []
for key, value in params.items():
@@ -272,21 +293,23 @@ def _format_params_as_python(self, params: dict[str, Any]) -> str:
parts.append(filter_str)
elif isinstance(value, str):
parts.append(f'{key}="{value}"')
- elif isinstance(value, bool):
- parts.append(f"{key}={value}")
- elif isinstance(value, (int, float)):
+ elif isinstance(value, (bool, int, float)):
parts.append(f"{key}={value}")
- elif isinstance(value, list):
- parts.append(f"{key}={value!r}")
- elif isinstance(value, dict):
+ elif isinstance(value, (list, dict)):
parts.append(f"{key}={value!r}")
else:
parts.append(f"{key}={value!r}")
return ", ".join(parts)
- def _format_filter_as_python(self, filter_dict: dict[str, Any]) -> str:
- """Convert a filter dict back to Col() expression string."""
+ def _format_filter_as_python( # noqa: C901
+ self, filter_dict: dict[str, Any]
+ ) -> str:
+ """Convert a filter dict back to Col() expression string.
+
+ Returns:
+ Python code string for the filter.
+ """
filter_type = filter_dict.get("type", "")
# Logical operators
@@ -294,11 +317,11 @@ def _format_filter_as_python(self, filter_dict: dict[str, Any]) -> str:
left = self._format_filter_as_python(filter_dict["left"])
right = self._format_filter_as_python(filter_dict["right"])
return f"({left}) & ({right})"
- elif filter_type == "or":
+ if filter_type == "or":
left = self._format_filter_as_python(filter_dict["left"])
right = self._format_filter_as_python(filter_dict["right"])
return f"({left}) | ({right})"
- elif filter_type == "not":
+ if filter_type == "not":
operand = self._format_filter_as_python(filter_dict["operand"])
return f"~({operand})"
@@ -318,25 +341,25 @@ def _format_filter_as_python(self, filter_dict: dict[str, Any]) -> str:
if filter_type in op_map:
op = op_map[filter_type]
return f'Col("{col}") {op} {val!r}'
- elif filter_type == "is_in":
+ if filter_type == "is_in":
values = filter_dict.get("values", [])
return f'Col("{col}").is_in({values!r})'
- elif filter_type == "is_null":
+ if filter_type == "is_null":
return f'Col("{col}").is_null()'
- elif filter_type == "is_not_null":
+ if filter_type == "is_not_null":
return f'Col("{col}").is_not_null()'
- elif filter_type == "between":
+ if filter_type == "between":
lower = filter_dict.get("lower")
upper = filter_dict.get("upper")
return f'Col("{col}").between({lower!r}, {upper!r})'
- elif filter_type == "str_contains":
+ if filter_type == "str_contains":
pattern = filter_dict.get("pattern", "")
literal = filter_dict.get("literal", True)
return f'Col("{col}").str_contains({pattern!r}, literal={literal})'
- elif filter_type == "str_starts_with":
+ if filter_type == "str_starts_with":
prefix = filter_dict.get("prefix", "")
return f'Col("{col}").str_starts_with({prefix!r})'
- elif filter_type == "str_ends_with":
+ if filter_type == "str_ends_with":
suffix = filter_dict.get("suffix", "")
return f'Col("{col}").str_ends_with({suffix!r})'
diff --git a/transformplan/filters.py b/transformplan/filters.py
index 6576062..390aef3 100644
--- a/transformplan/filters.py
+++ b/transformplan/filters.py
@@ -97,11 +97,13 @@ def from_dict(cls, data: dict[str, Any]) -> Filter:
"""
filter_type = data.get("type")
if filter_type is None:
- raise ValueError("Missing 'type' in filter dict")
+ msg = "Missing 'type' in filter dict"
+ raise ValueError(msg)
filter_cls = _FILTER_REGISTRY.get(filter_type)
if filter_cls is None:
- raise ValueError(f"Unknown filter type: {filter_type}")
+ msg = f"Unknown filter type: {filter_type}"
+ raise ValueError(msg)
return filter_cls._from_dict(data)
@@ -195,7 +197,7 @@ def __init__(self, name: str) -> None:
"""
self.name = name
- def __eq__(self, value: Any) -> Eq: # type: ignore[override]
+ def __eq__(self, value: object) -> Eq: # type: ignore[override]
"""Create an equality filter (column == value).
Args:
@@ -206,7 +208,7 @@ def __eq__(self, value: Any) -> Eq: # type: ignore[override]
"""
return Eq(self.name, value)
- def __ne__(self, value: Any) -> Ne: # type: ignore[override]
+ def __ne__(self, value: object) -> Ne: # type: ignore[override]
"""Create an inequality filter (column != value).
Args:
@@ -217,7 +219,7 @@ def __ne__(self, value: Any) -> Ne: # type: ignore[override]
"""
return Ne(self.name, value)
- def __gt__(self, value: Any) -> Gt:
+ def __gt__(self, value: Any) -> Gt: # noqa: ANN401
"""Create a greater-than filter (column > value).
Args:
@@ -228,7 +230,7 @@ def __gt__(self, value: Any) -> Gt:
"""
return Gt(self.name, value)
- def __ge__(self, value: Any) -> Ge:
+ def __ge__(self, value: Any) -> Ge: # noqa: ANN401
"""Create a greater-or-equal filter (column >= value).
Args:
@@ -239,7 +241,7 @@ def __ge__(self, value: Any) -> Ge:
"""
return Ge(self.name, value)
- def __lt__(self, value: Any) -> Lt:
+ def __lt__(self, value: Any) -> Lt: # noqa: ANN401
"""Create a less-than filter (column < value).
Args:
@@ -250,7 +252,7 @@ def __lt__(self, value: Any) -> Lt:
"""
return Lt(self.name, value)
- def __le__(self, value: Any) -> Le:
+ def __le__(self, value: Any) -> Le: # noqa: ANN401
"""Create a less-or-equal filter (column <= value).
Args:
@@ -297,8 +299,8 @@ def is_not_null(self) -> IsNotNull:
"""
return IsNotNull(self.name)
- def str_contains(self, pattern: str, literal: bool = True) -> StrContains:
- """Create a string contains filter.
+ def str_contains(self, pattern: str, *, literal: bool = True) -> StrContains:
+ r"""Create a string contains filter.
Args:
pattern: Substring or regex pattern to search for.
@@ -341,7 +343,7 @@ def str_ends_with(self, suffix: str) -> StrEndsWith:
"""
return StrEndsWith(self.name, suffix)
- def between(self, lower: Any, upper: Any) -> Between:
+ def between(self, lower: Any, upper: Any) -> Between: # noqa: ANN401
"""Create a range filter (lower <= column <= upper).
Args:
@@ -376,16 +378,28 @@ class Eq(Filter):
value: Any
def to_expr(self) -> pl.Expr:
- """Convert to Polars equality expression."""
+ """Convert to Polars equality expression.
+
+ Returns:
+ Polars expression for equality comparison.
+ """
return pl.col(self.column) == self.value
def to_dict(self) -> dict[str, Any]:
- """Serialize to dictionary."""
+ """Serialize to dictionary.
+
+ Returns:
+ Dictionary representation with type, column, and value.
+ """
return {"type": "eq", "column": self.column, "value": self.value}
@classmethod
def _from_dict(cls, data: dict[str, Any]) -> Eq:
- """Create from dictionary."""
+ """Create from dictionary.
+
+ Returns:
+ New Eq instance.
+ """
return cls(data["column"], data["value"])
@@ -402,16 +416,28 @@ class Ne(Filter):
value: Any
def to_expr(self) -> pl.Expr:
- """Convert to Polars inequality expression."""
+ """Convert to Polars inequality expression.
+
+ Returns:
+ Polars expression for inequality comparison.
+ """
return pl.col(self.column) != self.value
def to_dict(self) -> dict[str, Any]:
- """Serialize to dictionary."""
+ """Serialize to dictionary.
+
+ Returns:
+ Dictionary representation with type, column, and value.
+ """
return {"type": "ne", "column": self.column, "value": self.value}
@classmethod
def _from_dict(cls, data: dict[str, Any]) -> Ne:
- """Create from dictionary."""
+ """Create from dictionary.
+
+ Returns:
+ New Ne instance.
+ """
return cls(data["column"], data["value"])
@@ -428,16 +454,28 @@ class Gt(Filter):
value: Any
def to_expr(self) -> pl.Expr:
- """Convert to Polars greater-than expression."""
+ """Convert to Polars greater-than expression.
+
+ Returns:
+ Polars expression for greater-than comparison.
+ """
return pl.col(self.column) > self.value
def to_dict(self) -> dict[str, Any]:
- """Serialize to dictionary."""
+ """Serialize to dictionary.
+
+ Returns:
+ Dictionary representation with type, column, and value.
+ """
return {"type": "gt", "column": self.column, "value": self.value}
@classmethod
def _from_dict(cls, data: dict[str, Any]) -> Gt:
- """Create from dictionary."""
+ """Create from dictionary.
+
+ Returns:
+ New Gt instance.
+ """
return cls(data["column"], data["value"])
@@ -454,16 +492,28 @@ class Ge(Filter):
value: Any
def to_expr(self) -> pl.Expr:
- """Convert to Polars greater-or-equal expression."""
+ """Convert to Polars greater-or-equal expression.
+
+ Returns:
+ Polars expression for greater-or-equal comparison.
+ """
return pl.col(self.column) >= self.value
def to_dict(self) -> dict[str, Any]:
- """Serialize to dictionary."""
+ """Serialize to dictionary.
+
+ Returns:
+ Dictionary representation with type, column, and value.
+ """
return {"type": "ge", "column": self.column, "value": self.value}
@classmethod
def _from_dict(cls, data: dict[str, Any]) -> Ge:
- """Create from dictionary."""
+ """Create from dictionary.
+
+ Returns:
+ New Ge instance.
+ """
return cls(data["column"], data["value"])
@@ -480,16 +530,28 @@ class Lt(Filter):
value: Any
def to_expr(self) -> pl.Expr:
- """Convert to Polars less-than expression."""
+ """Convert to Polars less-than expression.
+
+ Returns:
+ Polars expression for less-than comparison.
+ """
return pl.col(self.column) < self.value
def to_dict(self) -> dict[str, Any]:
- """Serialize to dictionary."""
+ """Serialize to dictionary.
+
+ Returns:
+ Dictionary representation with type, column, and value.
+ """
return {"type": "lt", "column": self.column, "value": self.value}
@classmethod
def _from_dict(cls, data: dict[str, Any]) -> Lt:
- """Create from dictionary."""
+ """Create from dictionary.
+
+ Returns:
+ New Lt instance.
+ """
return cls(data["column"], data["value"])
@@ -506,16 +568,28 @@ class Le(Filter):
value: Any
def to_expr(self) -> pl.Expr:
- """Convert to Polars less-or-equal expression."""
+ """Convert to Polars less-or-equal expression.
+
+ Returns:
+ Polars expression for less-or-equal comparison.
+ """
return pl.col(self.column) <= self.value
def to_dict(self) -> dict[str, Any]:
- """Serialize to dictionary."""
+ """Serialize to dictionary.
+
+ Returns:
+ Dictionary representation with type, column, and value.
+ """
return {"type": "le", "column": self.column, "value": self.value}
@classmethod
def _from_dict(cls, data: dict[str, Any]) -> Le:
- """Create from dictionary."""
+ """Create from dictionary.
+
+ Returns:
+ New Le instance.
+ """
return cls(data["column"], data["value"])
@@ -532,16 +606,28 @@ class IsIn(Filter):
values: Sequence[Any]
def to_expr(self) -> pl.Expr:
- """Convert to Polars is_in expression."""
+ """Convert to Polars is_in expression.
+
+ Returns:
+ Polars expression for membership check.
+ """
return pl.col(self.column).is_in(self.values)
def to_dict(self) -> dict[str, Any]:
- """Serialize to dictionary."""
+ """Serialize to dictionary.
+
+ Returns:
+ Dictionary representation with type, column, and values.
+ """
return {"type": "is_in", "column": self.column, "values": list(self.values)}
@classmethod
def _from_dict(cls, data: dict[str, Any]) -> IsIn:
- """Create from dictionary."""
+ """Create from dictionary.
+
+ Returns:
+ New IsIn instance.
+ """
return cls(data["column"], data["values"])
@@ -560,11 +646,19 @@ class Between(Filter):
upper: Any
def to_expr(self) -> pl.Expr:
- """Convert to Polars is_between expression."""
+ """Convert to Polars is_between expression.
+
+ Returns:
+ Polars expression for range check.
+ """
return pl.col(self.column).is_between(self.lower, self.upper)
def to_dict(self) -> dict[str, Any]:
- """Serialize to dictionary."""
+ """Serialize to dictionary.
+
+ Returns:
+ Dictionary representation with type, column, lower, and upper.
+ """
return {
"type": "between",
"column": self.column,
@@ -574,7 +668,11 @@ def to_dict(self) -> dict[str, Any]:
@classmethod
def _from_dict(cls, data: dict[str, Any]) -> Between:
- """Create from dictionary."""
+ """Create from dictionary.
+
+ Returns:
+ New Between instance.
+ """
return cls(data["column"], data["lower"], data["upper"])
@@ -594,16 +692,28 @@ class IsNull(Filter):
column: str
def to_expr(self) -> pl.Expr:
- """Convert to Polars is_null expression."""
+ """Convert to Polars is_null expression.
+
+ Returns:
+ Polars expression for null check.
+ """
return pl.col(self.column).is_null()
def to_dict(self) -> dict[str, Any]:
- """Serialize to dictionary."""
+ """Serialize to dictionary.
+
+ Returns:
+ Dictionary representation with type and column.
+ """
return {"type": "is_null", "column": self.column}
@classmethod
def _from_dict(cls, data: dict[str, Any]) -> IsNull:
- """Create from dictionary."""
+ """Create from dictionary.
+
+ Returns:
+ New IsNull instance.
+ """
return cls(data["column"])
@@ -618,16 +728,28 @@ class IsNotNull(Filter):
column: str
def to_expr(self) -> pl.Expr:
- """Convert to Polars is_not_null expression."""
+ """Convert to Polars is_not_null expression.
+
+ Returns:
+ Polars expression for not-null check.
+ """
return pl.col(self.column).is_not_null()
def to_dict(self) -> dict[str, Any]:
- """Serialize to dictionary."""
+ """Serialize to dictionary.
+
+ Returns:
+ Dictionary representation with type and column.
+ """
return {"type": "is_not_null", "column": self.column}
@classmethod
def _from_dict(cls, data: dict[str, Any]) -> IsNotNull:
- """Create from dictionary."""
+ """Create from dictionary.
+
+ Returns:
+ New IsNotNull instance.
+ """
return cls(data["column"])
@@ -651,11 +773,19 @@ class StrContains(Filter):
literal: bool = True
def to_expr(self) -> pl.Expr:
- """Convert to Polars str.contains expression."""
+ """Convert to Polars str.contains expression.
+
+ Returns:
+ Polars expression for string containment check.
+ """
return pl.col(self.column).str.contains(self.pattern, literal=self.literal)
def to_dict(self) -> dict[str, Any]:
- """Serialize to dictionary."""
+ """Serialize to dictionary.
+
+ Returns:
+ Dictionary representation with type, column, pattern, and literal.
+ """
return {
"type": "str_contains",
"column": self.column,
@@ -665,7 +795,11 @@ def to_dict(self) -> dict[str, Any]:
@classmethod
def _from_dict(cls, data: dict[str, Any]) -> StrContains:
- """Create from dictionary."""
+ """Create from dictionary.
+
+ Returns:
+ New StrContains instance.
+ """
return cls(data["column"], data["pattern"], data.get("literal", True))
@@ -682,16 +816,28 @@ class StrStartsWith(Filter):
prefix: str
def to_expr(self) -> pl.Expr:
- """Convert to Polars str.starts_with expression."""
+ """Convert to Polars str.starts_with expression.
+
+ Returns:
+ Polars expression for prefix check.
+ """
return pl.col(self.column).str.starts_with(self.prefix)
def to_dict(self) -> dict[str, Any]:
- """Serialize to dictionary."""
+ """Serialize to dictionary.
+
+ Returns:
+ Dictionary representation with type, column, and prefix.
+ """
return {"type": "str_starts_with", "column": self.column, "prefix": self.prefix}
@classmethod
def _from_dict(cls, data: dict[str, Any]) -> StrStartsWith:
- """Create from dictionary."""
+ """Create from dictionary.
+
+ Returns:
+ New StrStartsWith instance.
+ """
return cls(data["column"], data["prefix"])
@@ -708,16 +854,28 @@ class StrEndsWith(Filter):
suffix: str
def to_expr(self) -> pl.Expr:
- """Convert to Polars str.ends_with expression."""
+ """Convert to Polars str.ends_with expression.
+
+ Returns:
+ Polars expression for suffix check.
+ """
return pl.col(self.column).str.ends_with(self.suffix)
def to_dict(self) -> dict[str, Any]:
- """Serialize to dictionary."""
+ """Serialize to dictionary.
+
+ Returns:
+ Dictionary representation with type, column, and suffix.
+ """
return {"type": "str_ends_with", "column": self.column, "suffix": self.suffix}
@classmethod
def _from_dict(cls, data: dict[str, Any]) -> StrEndsWith:
- """Create from dictionary."""
+ """Create from dictionary.
+
+ Returns:
+ New StrEndsWith instance.
+ """
return cls(data["column"], data["suffix"])
@@ -744,11 +902,19 @@ class And(Filter):
right: Filter
def to_expr(self) -> pl.Expr:
- """Convert to Polars AND expression."""
+ """Convert to Polars AND expression.
+
+ Returns:
+ Polars expression combining both conditions with AND.
+ """
return self.left.to_expr() & self.right.to_expr()
def to_dict(self) -> dict[str, Any]:
- """Serialize to dictionary with nested filter dicts."""
+ """Serialize to dictionary with nested filter dicts.
+
+ Returns:
+ Dictionary representation with type, left, and right.
+ """
return {
"type": "and",
"left": self.left.to_dict(),
@@ -757,7 +923,11 @@ def to_dict(self) -> dict[str, Any]:
@classmethod
def _from_dict(cls, data: dict[str, Any]) -> And:
- """Create from dictionary, recursively deserializing children."""
+ """Create from dictionary, recursively deserializing children.
+
+ Returns:
+ New And instance.
+ """
return cls(
Filter.from_dict(data["left"]),
Filter.from_dict(data["right"]),
@@ -782,11 +952,19 @@ class Or(Filter):
right: Filter
def to_expr(self) -> pl.Expr:
- """Convert to Polars OR expression."""
+ """Convert to Polars OR expression.
+
+ Returns:
+ Polars expression combining both conditions with OR.
+ """
return self.left.to_expr() | self.right.to_expr()
def to_dict(self) -> dict[str, Any]:
- """Serialize to dictionary with nested filter dicts."""
+ """Serialize to dictionary with nested filter dicts.
+
+ Returns:
+ Dictionary representation with type, left, and right.
+ """
return {
"type": "or",
"left": self.left.to_dict(),
@@ -795,7 +973,11 @@ def to_dict(self) -> dict[str, Any]:
@classmethod
def _from_dict(cls, data: dict[str, Any]) -> Or:
- """Create from dictionary, recursively deserializing children."""
+ """Create from dictionary, recursively deserializing children.
+
+ Returns:
+ New Or instance.
+ """
return cls(
Filter.from_dict(data["left"]),
Filter.from_dict(data["right"]),
@@ -818,16 +1000,28 @@ class Not(Filter):
operand: Filter
def to_expr(self) -> pl.Expr:
- """Convert to Polars NOT expression."""
+ """Convert to Polars NOT expression.
+
+ Returns:
+ Polars expression inverting the operand condition.
+ """
return ~self.operand.to_expr()
def to_dict(self) -> dict[str, Any]:
- """Serialize to dictionary with nested filter dict."""
+ """Serialize to dictionary with nested filter dict.
+
+ Returns:
+ Dictionary representation with type and operand.
+ """
return {"type": "not", "operand": self.operand.to_dict()}
@classmethod
def _from_dict(cls, data: dict[str, Any]) -> Not:
- """Create from dictionary, recursively deserializing operand."""
+ """Create from dictionary, recursively deserializing operand.
+
+ Returns:
+ New Not instance.
+ """
return cls(Filter.from_dict(data["operand"]))
diff --git a/transformplan/ops/column.py b/transformplan/ops/column.py
index bd467f3..8a7c405 100644
--- a/transformplan/ops/column.py
+++ b/transformplan/ops/column.py
@@ -30,16 +30,17 @@
import hashlib
import secrets
import string
-from typing import TYPE_CHECKING, Any, Sequence
+from typing import TYPE_CHECKING, Any, Literal, Sequence
import polars as pl
if TYPE_CHECKING:
from typing import Any, Callable
- import polars as pl
from typing_extensions import Self
+FillNullStrategy = Literal["forward", "backward", "min", "max", "mean", "zero"]
+
class ColumnOps:
"""Mixin providing column-level operations."""
@@ -53,14 +54,22 @@ def _register(
) -> Self: ...
def col_drop(self, column: str) -> Self:
- """Drop a column from the DataFrame."""
+ """Drop a column from the DataFrame.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(self._col_drop, {"column": column})
def _col_drop(self, data: pl.DataFrame, column: str) -> pl.DataFrame:
return data.drop(column)
def col_rename(self, column: str, new_name: str) -> Self:
- """Rename a column."""
+ """Rename a column.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(
self._col_rename, {"column": column, "new_name": new_name}
)
@@ -71,21 +80,33 @@ def _col_rename(
return data.rename({column: new_name})
def col_cast(self, column: str, dtype: type) -> Self:
- """Cast a column to a different dtype."""
+ """Cast a column to a different dtype.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(self._col_cast, {"column": column, "dtype": dtype})
def _col_cast(self, data: pl.DataFrame, column: str, dtype: type) -> pl.DataFrame:
return data.with_columns(pl.col(column).cast(dtype))
def col_reorder(self, columns: Sequence[str]) -> Self:
- """Reorder columns. Unlisted columns are dropped."""
+ """Reorder columns. Unlisted columns are dropped.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(self._col_reorder, {"columns": list(columns)})
def _col_reorder(self, data: pl.DataFrame, columns: list[str]) -> pl.DataFrame:
return data.select(columns)
def col_duplicate(self, column: str, new_name: str) -> Self:
- """Duplicate a column under a new name."""
+ """Duplicate a column under a new name.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(
self._col_duplicate, {"column": column, "new_name": new_name}
)
@@ -96,14 +117,21 @@ def _col_duplicate(
return data.with_columns(pl.col(column).alias(new_name))
def col_fill_null(
- self, column: str, value: Any = None, strategy: str | None = None
+ self,
+ column: str,
+ value: Any = None, # noqa: ANN401
+ strategy: FillNullStrategy | None = None,
) -> Self:
"""Fill null values in a column.
Args:
column: Column to fill.
value: Value to fill nulls with (if strategy is None).
- strategy: Fill strategy - 'forward', 'backward', 'mean', 'min', 'max', 'zero', 'one'.
+ strategy: Fill strategy - 'forward', 'backward', 'mean', 'min', 'max',
+ 'zero', 'one'.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._col_fill_null,
@@ -111,7 +139,11 @@ def col_fill_null(
)
def _col_fill_null(
- self, data: pl.DataFrame, column: str, value: Any, strategy: str | None
+ self,
+ data: pl.DataFrame,
+ column: str,
+ value: Any, # noqa: ANN401
+ strategy: FillNullStrategy | None,
) -> pl.DataFrame:
if strategy is not None:
return data.with_columns(pl.col(column).fill_null(strategy=strategy))
@@ -122,6 +154,9 @@ def col_drop_null(self, columns: str | Sequence[str] | None = None) -> Self:
Args:
columns: Column(s) to check for nulls. If None, checks all columns.
+
+ Returns:
+ Self for method chaining.
"""
if isinstance(columns, str):
columns = [columns]
@@ -133,7 +168,11 @@ def _col_drop_null(
return data.drop_nulls(subset=columns)
def col_drop_zero(self, column: str) -> Self:
- """Drop rows where the specified column is zero."""
+ """Drop rows where the specified column is zero.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(self._col_drop_zero, {"column": column})
def _col_drop_zero(self, data: pl.DataFrame, column: str) -> pl.DataFrame:
@@ -142,8 +181,8 @@ def _col_drop_zero(self, data: pl.DataFrame, column: str) -> pl.DataFrame:
def col_add(
self,
new_column: str,
- expr: str | int | float | None = None,
- value: Any = None,
+ expr: str | float | None = None,
+ value: Any = None, # noqa: ANN401
) -> Self:
"""Add a new column with a constant value or expression.
@@ -151,13 +190,20 @@ def col_add(
new_column: Name of the new column.
expr: Column name to copy from, or None for constant value.
value: Constant value to fill the column with.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._col_add, {"new_column": new_column, "expr": expr, "value": value}
)
def _col_add(
- self, data: pl.DataFrame, new_column: str, expr: str | None, value: Any
+ self,
+ data: pl.DataFrame,
+ new_column: str,
+ expr: str | None,
+ value: Any, # noqa: ANN401
) -> pl.DataFrame:
if expr is not None:
return data.with_columns(pl.col(expr).alias(new_column))
@@ -169,6 +215,9 @@ def col_add_uuid(self, column: str, length: int = 16) -> Self:
Args:
column: Name of the new column.
length: Length of the identifier string.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(self._col_add_uuid, {"column": column, "length": length})
@@ -194,6 +243,9 @@ def col_hash(
columns: Column(s) to hash.
new_column: Name for the hash column.
salt: Optional salt to add to the hash.
+
+ Returns:
+ Self for method chaining.
"""
if isinstance(columns, str):
columns = [columns]
@@ -205,7 +257,7 @@ def col_hash(
def _col_hash(
self, data: pl.DataFrame, columns: list[str], new_column: str, salt: str
) -> pl.DataFrame:
- def hash_row(values: tuple) -> str:
+ def hash_row(values: tuple[Any, ...]) -> str:
content = "|".join(str(v) for v in values) + salt
return hashlib.sha256(content.encode()).hexdigest()[:16]
@@ -223,6 +275,9 @@ def col_coalesce(
Args:
columns: Columns to coalesce (in priority order).
new_column: Name for the result column.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._col_coalesce, {"columns": list(columns), "new_column": new_column}
@@ -240,6 +295,9 @@ def col_select(self, columns: Sequence[str]) -> Self:
Args:
columns: Columns to keep.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(self._col_select, {"columns": list(columns)})
diff --git a/transformplan/ops/datetime.py b/transformplan/ops/datetime.py
index 11a9230..75466ab 100644
--- a/transformplan/ops/datetime.py
+++ b/transformplan/ops/datetime.py
@@ -36,16 +36,17 @@
from __future__ import annotations
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Literal
import polars as pl
if TYPE_CHECKING:
from typing import Any, Callable
- import polars as pl
from typing_extensions import Self
+ClosedInterval = Literal["left", "right", "both", "none"]
+
class DatetimeOps:
"""Mixin providing datetime operations on columns."""
@@ -64,6 +65,9 @@ def dt_year(self, column: str, new_column: str | None = None) -> Self:
Args:
column: Source datetime column.
new_column: Name for result column (None = modify in place).
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._dt_year, {"column": column, "new_column": new_column or column}
@@ -80,6 +84,9 @@ def dt_month(self, column: str, new_column: str | None = None) -> Self:
Args:
column: Source datetime column.
new_column: Name for result column (None = modify in place).
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._dt_month, {"column": column, "new_column": new_column or column}
@@ -96,6 +103,9 @@ def dt_day(self, column: str, new_column: str | None = None) -> Self:
Args:
column: Source datetime column.
new_column: Name for result column (None = modify in place).
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._dt_day, {"column": column, "new_column": new_column or column}
@@ -110,6 +120,9 @@ def dt_week(self, column: str, new_column: str | None = None) -> Self:
Args:
column: Source datetime column.
new_column: Name for result column (None = modify in place).
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._dt_week, {"column": column, "new_column": new_column or column}
@@ -126,6 +139,9 @@ def dt_quarter(self, column: str, new_column: str | None = None) -> Self:
Args:
column: Source datetime column.
new_column: Name for result column (None = modify in place).
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._dt_quarter, {"column": column, "new_column": new_column or column}
@@ -143,6 +159,9 @@ def dt_year_month(self, column: str, new_column: str, fmt: str = "%Y-%m") -> Sel
column: Source datetime column.
new_column: Name for result column.
fmt: Output format string.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._dt_year_month,
@@ -160,6 +179,9 @@ def dt_quarter_year(self, column: str, new_column: str) -> Self:
Args:
column: Source datetime column.
new_column: Name for result column.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._dt_quarter_year, {"column": column, "new_column": new_column}
@@ -183,6 +205,9 @@ def dt_calendar_week(self, column: str, new_column: str) -> Self:
Args:
column: Source datetime column.
new_column: Name for result column.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._dt_calendar_week, {"column": column, "new_column": new_column}
@@ -211,6 +236,9 @@ def dt_parse(
column: Source string column.
fmt: Date format string.
new_column: Name for result column (None = modify in place).
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._dt_parse,
@@ -231,6 +259,9 @@ def dt_format(self, column: str, fmt: str, new_column: str | None = None) -> Sel
column: Source datetime column.
fmt: Output format string.
new_column: Name for result column (None = modify in place).
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._dt_format,
@@ -249,6 +280,9 @@ def dt_diff_days(self, column_a: str, column_b: str, new_column: str) -> Self:
column_a: First date column.
column_b: Second date column.
new_column: Name for result column.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._dt_diff_days,
@@ -274,6 +308,9 @@ def dt_age_years(
birth_column: Column containing birth dates.
reference_column: Column containing reference dates (None = today).
new_column: Name for result column.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._dt_age_years,
@@ -316,6 +353,9 @@ def dt_truncate(
column: Source datetime column.
every: Truncation interval ('1d', '1mo', '1y', '1h', etc.).
new_column: Name for result column (None = modify in place).
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._dt_truncate,
@@ -333,7 +373,7 @@ def dt_is_between(
start: str,
end: str,
new_column: str,
- closed: str = "both",
+ closed: ClosedInterval = "both",
) -> Self:
"""Check if date falls within a range.
@@ -343,6 +383,9 @@ def dt_is_between(
end: End date (string, will be parsed).
new_column: Name for boolean result column.
closed: Which endpoints to include ('both', 'left', 'right', 'none').
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._dt_is_between,
@@ -362,7 +405,7 @@ def _dt_is_between(
start: str,
end: str,
new_column: str,
- closed: str,
+ closed: ClosedInterval,
) -> pl.DataFrame:
return data.with_columns(
pl.col(column)
diff --git a/transformplan/ops/map.py b/transformplan/ops/map.py
index 5c3f544..c01eb1a 100644
--- a/transformplan/ops/map.py
+++ b/transformplan/ops/map.py
@@ -34,7 +34,6 @@
if TYPE_CHECKING:
from typing import Any, Callable
- import polars as pl
from typing_extensions import Self
@@ -53,7 +52,8 @@ def map_values(
self,
column: str,
mapping: dict[Any, Any],
- default: Any = None,
+ default: Any = None, # noqa: ANN401
+ *,
keep_unmapped: bool = True,
) -> Self:
"""Map values in a column using a dictionary.
@@ -63,6 +63,9 @@ def map_values(
mapping: Dictionary mapping old values to new values.
default: Default value for unmapped values (if keep_unmapped=False).
keep_unmapped: If True, keep original value when not in mapping.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._map_values,
@@ -79,8 +82,8 @@ def _map_values(
data: pl.DataFrame,
column: str,
mapping: dict[Any, Any],
- default: Any,
- keep_unmapped: bool,
+ default: Any, # noqa: ANN401
+ keep_unmapped: bool, # noqa: FBT001
) -> pl.DataFrame:
# Build a when/then chain for the mapping
expr = pl.col(column)
@@ -109,6 +112,7 @@ def map_discretize(
bins: Sequence[float],
labels: Sequence[str] | None = None,
new_column: str | None = None,
+ *,
right: bool = True,
) -> Self:
"""Discretize a numeric column into bins/categories.
@@ -119,6 +123,9 @@ def map_discretize(
labels: Labels for each bin (must be len(bins)+1 if provided).
new_column: Name for result column (None = modify in place).
right: If True, bins are (left, right]. If False, [left, right).
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._map_discretize,
@@ -138,12 +145,12 @@ def _map_discretize(
bins: list[float],
labels: list[str] | None,
new_column: str,
- right: bool,
+ right: bool, # noqa: FBT001
) -> pl.DataFrame:
# Create labels if not provided
if labels is None:
labels = []
- edges = [-float("inf")] + bins + [float("inf")]
+ edges = [-float("inf"), *bins, float("inf")]
for i in range(len(edges) - 1):
if right:
labels.append(f"({edges[i]}, {edges[i + 1]}]")
@@ -152,7 +159,7 @@ def _map_discretize(
# Build when/then chain
col = pl.col(column)
- edges = [-float("inf")] + bins + [float("inf")]
+ edges = [-float("inf"), *bins, float("inf")]
# First bin
if right:
@@ -174,31 +181,49 @@ def _map_discretize(
return data.with_columns(chain.alias(new_column))
def map_bool_to_int(self, column: str) -> Self:
- """Convert a boolean column to integer (True=1, False=0)."""
+ """Convert a boolean column to integer (True=1, False=0).
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(self._map_bool_to_int, {"column": column})
def _map_bool_to_int(self, data: pl.DataFrame, column: str) -> pl.DataFrame:
return data.with_columns(pl.col(column).cast(pl.Int64))
- def map_null_to_value(self, column: str, value: Any) -> Self:
- """Replace null values with a specific value."""
+ def map_null_to_value(self, column: str, value: Any) -> Self: # noqa: ANN401
+ """Replace null values with a specific value.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(
self._map_null_to_value, {"column": column, "value": value}
)
def _map_null_to_value(
- self, data: pl.DataFrame, column: str, value: Any
+ self,
+ data: pl.DataFrame,
+ column: str,
+ value: Any, # noqa: ANN401
) -> pl.DataFrame:
return data.with_columns(pl.col(column).fill_null(value))
- def map_value_to_null(self, column: str, value: Any) -> Self:
- """Replace a specific value with null."""
+ def map_value_to_null(self, column: str, value: Any) -> Self: # noqa: ANN401
+ """Replace a specific value with null.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(
self._map_value_to_null, {"column": column, "value": value}
)
def _map_value_to_null(
- self, data: pl.DataFrame, column: str, value: Any
+ self,
+ data: pl.DataFrame,
+ column: str,
+ value: Any, # noqa: ANN401
) -> pl.DataFrame:
return data.with_columns(
pl.when(pl.col(column) == value)
@@ -211,7 +236,7 @@ def map_case(
self,
column: str,
cases: list[tuple[Any, Any]],
- default: Any = None,
+ default: Any = None, # noqa: ANN401
new_column: str | None = None,
) -> Self:
"""Apply case-when logic to a column.
@@ -222,6 +247,9 @@ def map_case(
default: Default value if no case matches.
new_column: Name for result column (None = modify in place).
+ Returns:
+ Self for method chaining.
+
Example:
.map_case('grade', [(90, 'A'), (80, 'B'), (70, 'C')], default='F')
Maps: >= 90 -> A, >= 80 -> B, >= 70 -> C, else F
@@ -241,7 +269,7 @@ def _map_case(
data: pl.DataFrame,
column: str,
cases: list[tuple[Any, Any]],
- default: Any,
+ default: Any, # noqa: ANN401
new_column: str,
) -> pl.DataFrame:
if not cases:
@@ -263,7 +291,7 @@ def map_from_column(
lookup_column: str,
value_column: str,
new_column: str | None = None,
- default: Any = None,
+ default: Any = None, # noqa: ANN401
) -> Self:
"""Map values using another column as lookup (like vlookup).
@@ -276,6 +304,9 @@ def map_from_column(
value_column: Column containing values to map to.
new_column: Name for result column (None = modify in place).
default: Default value if lookup fails.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._map_from_column,
@@ -295,10 +326,16 @@ def _map_from_column(
lookup_column: str,
value_column: str,
new_column: str,
- default: Any,
+ default: Any, # noqa: ANN401
) -> pl.DataFrame:
# Build lookup dict from the data
- lookup = dict(zip(data[lookup_column].to_list(), data[value_column].to_list()))
+ lookup = dict(
+ zip(
+ data[lookup_column].to_list(),
+ data[value_column].to_list(),
+ strict=False,
+ )
+ )
return data.with_columns(
pl.col(column).replace(lookup, default=default).alias(new_column)
diff --git a/transformplan/ops/math.py b/transformplan/ops/math.py
index 2de431c..d183c15 100644
--- a/transformplan/ops/math.py
+++ b/transformplan/ops/math.py
@@ -34,17 +34,17 @@
from __future__ import annotations
-from typing import TYPE_CHECKING, Union
+from typing import TYPE_CHECKING, Literal, Union
import polars as pl
if TYPE_CHECKING:
from typing import Any, Callable
- import polars as pl
from typing_extensions import Self
Numeric = Union[int, float]
+RankMethod = Literal["average", "min", "max", "dense", "ordinal", "random"]
class MathOps:
@@ -59,7 +59,11 @@ def _register(
) -> Self: ...
def math_add(self, column: str, value: Numeric) -> Self:
- """Add a scalar value to a column."""
+ """Add a scalar value to a column.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(self._math_add, {"column": column, "value": value})
def _math_add(
@@ -68,7 +72,11 @@ def _math_add(
return data.with_columns(pl.col(column) + value)
def math_subtract(self, column: str, value: Numeric) -> Self:
- """Subtract a scalar value from a column."""
+ """Subtract a scalar value from a column.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(self._math_subtract, {"column": column, "value": value})
def _math_subtract(
@@ -77,7 +85,11 @@ def _math_subtract(
return data.with_columns(pl.col(column) - value)
def math_multiply(self, column: str, value: Numeric) -> Self:
- """Multiply a column by a scalar value."""
+ """Multiply a column by a scalar value.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(self._math_multiply, {"column": column, "value": value})
def _math_multiply(
@@ -86,7 +98,11 @@ def _math_multiply(
return data.with_columns(pl.col(column) * value)
def math_divide(self, column: str, value: Numeric) -> Self:
- """Divide a column by a scalar value."""
+ """Divide a column by a scalar value.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(self._math_divide, {"column": column, "value": value})
def _math_divide(
@@ -100,7 +116,11 @@ def math_clamp(
lower: Numeric | None = None,
upper: Numeric | None = None,
) -> Self:
- """Clamp column values to a range."""
+ """Clamp column values to a range.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(
self._math_clamp, {"column": column, "lower": lower, "upper": upper}
)
@@ -115,7 +135,11 @@ def _math_clamp(
return data.with_columns(pl.col(column).clip(lower, upper))
def math_add_columns(self, column_a: str, column_b: str, new_column: str) -> Self:
- """Add two columns together into a new column."""
+ """Add two columns together into a new column.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(
self._math_add_columns,
{"column_a": column_a, "column_b": column_b, "new_column": new_column},
@@ -131,7 +155,11 @@ def _math_add_columns(
def math_subtract_columns(
self, column_a: str, column_b: str, new_column: str
) -> Self:
- """Subtract column_b from column_a into a new column."""
+ """Subtract column_b from column_a into a new column.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(
self._math_subtract_columns,
{"column_a": column_a, "column_b": column_b, "new_column": new_column},
@@ -147,7 +175,11 @@ def _math_subtract_columns(
def math_multiply_columns(
self, column_a: str, column_b: str, new_column: str
) -> Self:
- """Multiply two columns together into a new column."""
+ """Multiply two columns together into a new column.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(
self._math_multiply_columns,
{"column_a": column_a, "column_b": column_b, "new_column": new_column},
@@ -163,7 +195,11 @@ def _math_multiply_columns(
def math_divide_columns(
self, column_a: str, column_b: str, new_column: str
) -> Self:
- """Divide column_a by column_b into a new column."""
+ """Divide column_a by column_b into a new column.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(
self._math_divide_columns,
{"column_a": column_a, "column_b": column_b, "new_column": new_column},
@@ -177,7 +213,11 @@ def _math_divide_columns(
)
def math_set_min(self, column: str, min_value: Numeric) -> Self:
- """Set a minimum value for a column (values below are raised to min)."""
+ """Set a minimum value for a column (values below are raised to min).
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(
self._math_set_min, {"column": column, "min_value": min_value}
)
@@ -193,7 +233,11 @@ def _math_set_min(
)
def math_set_max(self, column: str, max_value: Numeric) -> Self:
- """Set a maximum value for a column (values above are lowered to max)."""
+ """Set a maximum value for a column (values above are lowered to max).
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(
self._math_set_max, {"column": column, "max_value": max_value}
)
@@ -209,14 +253,22 @@ def _math_set_max(
)
def math_abs(self, column: str) -> Self:
- """Take absolute value of a column."""
+ """Take absolute value of a column.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(self._math_abs, {"column": column})
def _math_abs(self, data: pl.DataFrame, column: str) -> pl.DataFrame:
return data.with_columns(pl.col(column).abs())
def math_round(self, column: str, decimals: int = 0) -> Self:
- """Round a column to specified decimal places."""
+ """Round a column to specified decimal places.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(
self._math_round, {"column": column, "decimals": decimals}
)
@@ -240,6 +292,9 @@ def math_percent_of(
total_column: Denominator column.
new_column: Name for result column.
multiply_by: Multiplier (default 100 for percentage).
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._math_percent_of,
@@ -275,6 +330,9 @@ def math_cumsum(
column: Column to sum.
new_column: Name for result column (None = modify in place).
group_by: Optional column(s) to group by.
+
+ Returns:
+ Self for method chaining.
"""
if isinstance(group_by, str):
group_by = [group_by]
@@ -304,7 +362,8 @@ def math_rank(
self,
column: str,
new_column: str,
- method: str = "ordinal",
+ method: RankMethod = "ordinal",
+ *,
descending: bool = False,
group_by: str | list[str] | None = None,
) -> Self:
@@ -316,6 +375,9 @@ def math_rank(
method: Ranking method ('ordinal', 'dense', 'min', 'max', 'average').
descending: Rank in descending order.
group_by: Optional column(s) to group by.
+
+ Returns:
+ Self for method chaining.
"""
if isinstance(group_by, str):
group_by = [group_by]
@@ -335,8 +397,8 @@ def _math_rank(
data: pl.DataFrame,
column: str,
new_column: str,
- method: str,
- descending: bool,
+ method: RankMethod,
+ descending: bool, # noqa: FBT001
group_by: list[str] | None,
) -> pl.DataFrame:
expr = pl.col(column).rank(method=method, descending=descending)
diff --git a/transformplan/ops/rows.py b/transformplan/ops/rows.py
index 8d0b079..84936b5 100644
--- a/transformplan/ops/rows.py
+++ b/transformplan/ops/rows.py
@@ -43,7 +43,7 @@
if TYPE_CHECKING:
from typing import Any, Callable
- import polars as pl
+ from polars._typing import PivotAgg
from typing_extensions import Self
@@ -59,7 +59,11 @@ def _register(
) -> Self: ...
def rows_drop_nulls(self, columns: str | Sequence[str] | None = None) -> Self:
- """Drop rows with null values in specified columns (or any column if None)."""
+ """Drop rows with null values in specified columns (or any column if None).
+
+ Returns:
+ Self for method chaining.
+ """
if isinstance(columns, str):
columns = [columns]
return self._register(self._rows_drop_nulls, {"columns": columns})
@@ -74,7 +78,11 @@ def rows_unique(
columns: str | Sequence[str] | None = None,
keep: Literal["first", "last", "any", "none"] = "first",
) -> Self:
- """Keep unique rows based on specified columns."""
+ """Keep unique rows based on specified columns.
+
+ Returns:
+ Self for method chaining.
+ """
if isinstance(columns, str):
columns = [columns]
return self._register(self._rows_unique, {"columns": columns, "keep": keep})
@@ -87,38 +95,38 @@ def _rows_unique(
) -> pl.DataFrame:
return data.unique(subset=columns, keep=keep)
- def rows_filter(self, filter: Filter | dict) -> Self:
+ def rows_filter(self, filter: Filter | dict[str, Any]) -> Self:
"""Filter rows using a serializable Filter expression.
+ Returns:
+ Self for method chaining.
+
Example:
from transformplan.filters import Col
.rows_filter(Col("age") > 18)
.rows_filter((Col("status") == "active") & (Col("score") >= 50))
"""
- if isinstance(filter, dict):
- filter_dict = filter
- else:
- filter_dict = filter.to_dict()
+ filter_dict = filter if isinstance(filter, dict) else filter.to_dict()
return self._register(self._rows_filter, {"filter": filter_dict})
- def _rows_filter(self, data: pl.DataFrame, filter: dict) -> pl.DataFrame:
+ def _rows_filter(self, data: pl.DataFrame, filter: dict[str, Any]) -> pl.DataFrame:
expr = Filter.from_dict(filter).to_expr()
return data.filter(expr)
- def rows_drop(self, filter: Filter | dict) -> Self:
+ def rows_drop(self, filter: Filter | dict[str, Any]) -> Self:
"""Drop rows matching a filter (inverse of rows_filter).
+ Returns:
+ Self for method chaining.
+
Example:
.rows_drop(Col("status") == "deleted")
"""
- if isinstance(filter, dict):
- filter_dict = filter
- else:
- filter_dict = filter.to_dict()
+ filter_dict = filter if isinstance(filter, dict) else filter.to_dict()
return self._register(self._rows_drop, {"filter": filter_dict})
- def _rows_drop(self, data: pl.DataFrame, filter: dict) -> pl.DataFrame:
+ def _rows_drop(self, data: pl.DataFrame, filter: dict[str, Any]) -> pl.DataFrame:
expr = Filter.from_dict(filter).to_expr()
return data.filter(~expr)
@@ -127,6 +135,7 @@ def rows_deduplicate(
columns: str | Sequence[str],
sort_by: str,
keep: Literal["first", "last"] = "first",
+ *,
descending: bool = False,
) -> Self:
"""Deduplicate rows by keeping first/last based on sort order.
@@ -136,6 +145,9 @@ def rows_deduplicate(
sort_by: Column to sort by before deduplication.
keep: Keep 'first' or 'last' after sorting.
descending: Sort in descending order.
+
+ Returns:
+ Self for method chaining.
"""
if isinstance(columns, str):
columns = [columns]
@@ -155,13 +167,17 @@ def _rows_deduplicate(
columns: list[str],
sort_by: str,
keep: Literal["first", "last"],
- descending: bool,
+ descending: bool, # noqa: FBT001
) -> pl.DataFrame:
sorted_data = data.sort(sort_by, descending=descending)
return sorted_data.unique(subset=columns, keep=keep, maintain_order=True)
def rows_explode(self, column: str) -> Self:
- """Explode a list column into multiple rows."""
+ """Explode a list column into multiple rows.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(self._rows_explode, {"column": column})
def _rows_explode(self, data: pl.DataFrame, column: str) -> pl.DataFrame:
@@ -181,6 +197,9 @@ def rows_melt(
value_columns: Columns to unpivot.
variable_name: Name for the variable column.
value_name: Name for the value column.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._rows_melt,
@@ -219,6 +238,9 @@ def rows_sample(
n: Number of rows to sample.
fraction: Fraction of rows to sample (0.0 to 1.0).
seed: Random seed for reproducibility.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._rows_sample, {"n": n, "fraction": fraction, "seed": seed}
@@ -234,14 +256,22 @@ def _rows_sample(
return data.sample(n=n, fraction=fraction, seed=seed)
def rows_head(self, n: int = 5) -> Self:
- """Keep only the first n rows."""
+ """Keep only the first n rows.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(self._rows_head, {"n": n})
def _rows_head(self, data: pl.DataFrame, n: int) -> pl.DataFrame:
return data.head(n)
def rows_tail(self, n: int = 5) -> Self:
- """Keep only the last n rows."""
+ """Keep only the last n rows.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(self._rows_tail, {"n": n})
def _rows_tail(self, data: pl.DataFrame, n: int) -> pl.DataFrame:
@@ -250,6 +280,7 @@ def _rows_tail(self, data: pl.DataFrame, n: int) -> pl.DataFrame:
def rows_sort(
self,
by: str | Sequence[str],
+ *,
descending: bool | Sequence[bool] = False,
) -> Self:
"""Sort rows by one or more columns.
@@ -257,6 +288,9 @@ def rows_sort(
Args:
by: Column(s) to sort by.
descending: Sort direction (single bool or list matching columns).
+
+ Returns:
+ Self for method chaining.
"""
if isinstance(by, str):
by = [by]
@@ -265,16 +299,20 @@ def rows_sort(
)
def _rows_sort(
- self, data: pl.DataFrame, by: list[str], descending: bool | Sequence[bool]
+ self,
+ data: pl.DataFrame,
+ by: list[str],
+ descending: bool | Sequence[bool], # noqa: FBT001
) -> pl.DataFrame:
return data.sort(by, descending=descending)
def rows_flag(
self,
- filter: Filter | dict,
+ filter: Filter | dict[str, Any],
new_column: str,
- true_value: Any = True,
- false_value: Any = False,
+ *,
+ true_value: Any = True, # noqa: ANN401
+ false_value: Any = False, # noqa: ANN401
) -> Self:
"""Add a flag column based on a filter condition (without dropping rows).
@@ -283,11 +321,11 @@ def rows_flag(
new_column: Name for the flag column.
true_value: Value when condition is True.
false_value: Value when condition is False.
+
+ Returns:
+ Self for method chaining.
"""
- if isinstance(filter, dict):
- filter_dict = filter
- else:
- filter_dict = filter.to_dict()
+ filter_dict = filter if isinstance(filter, dict) else filter.to_dict()
return self._register(
self._rows_flag,
{
@@ -301,10 +339,10 @@ def rows_flag(
def _rows_flag(
self,
data: pl.DataFrame,
- filter: dict,
+ filter: dict[str, Any],
new_column: str,
- true_value: Any,
- false_value: Any,
+ true_value: Any, # noqa: ANN401
+ false_value: Any, # noqa: ANN401
) -> pl.DataFrame:
expr = Filter.from_dict(filter).to_expr()
return data.with_columns(
@@ -319,7 +357,7 @@ def rows_pivot(
index: str | Sequence[str],
columns: str,
values: str,
- aggregate_function: str = "first",
+ aggregate_function: PivotAgg = "first",
) -> Self:
"""Pivot from long to wide format.
@@ -327,7 +365,11 @@ def rows_pivot(
index: Column(s) to use as row identifiers.
columns: Column whose unique values become new columns.
values: Column containing values to fill.
- aggregate_function: How to aggregate ('first', 'sum', 'mean', 'count', etc.).
+ aggregate_function: How to aggregate ('first', 'sum', 'mean', 'count',
+ etc.).
+
+ Returns:
+ Self for method chaining.
"""
if isinstance(index, str):
index = [index]
@@ -347,7 +389,7 @@ def _rows_pivot(
index: list[str],
columns: str,
values: str,
- aggregate_function: str,
+ aggregate_function: PivotAgg,
) -> pl.DataFrame:
return data.pivot(
index=index,
diff --git a/transformplan/ops/string.py b/transformplan/ops/string.py
index ea925be..6d48346 100644
--- a/transformplan/ops/string.py
+++ b/transformplan/ops/string.py
@@ -35,7 +35,6 @@
if TYPE_CHECKING:
from typing import Any, Callable
- import polars as pl
from typing_extensions import Self
@@ -55,6 +54,7 @@ def str_replace(
column: str,
pattern: str,
replacement: str,
+ *,
literal: bool = True,
) -> Self:
"""Replace occurrences of a pattern in a string column.
@@ -64,6 +64,9 @@ def str_replace(
pattern: Pattern to search for.
replacement: String to replace with.
literal: If True, treat pattern as literal string. If False, treat as regex.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._str_replace,
@@ -81,7 +84,7 @@ def _str_replace(
column: str,
pattern: str,
replacement: str,
- literal: bool,
+ literal: bool, # noqa: FBT001
) -> pl.DataFrame:
return data.with_columns(
pl.col(column).str.replace_all(pattern, replacement, literal=literal)
@@ -99,6 +102,9 @@ def str_slice(
column: Column to modify.
offset: Start position (0-indexed, negative counts from end).
length: Number of characters to extract (None = to end).
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._str_slice, {"column": column, "offset": offset, "length": length}
@@ -116,6 +122,9 @@ def str_truncate(self, column: str, max_length: int, suffix: str = "...") -> Sel
column: Column to modify.
max_length: Maximum length of the string (including suffix).
suffix: Suffix to append to truncated strings.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._str_truncate,
@@ -138,6 +147,7 @@ def str_split(
column: str,
separator: str,
new_columns: list[str] | None = None,
+ *,
keep_original: bool = False,
) -> Self:
"""Split a string column by separator.
@@ -147,6 +157,9 @@ def str_split(
separator: String to split on.
new_columns: Names for the resulting columns. If None, explodes into rows.
keep_original: Whether to keep the original column.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._str_split,
@@ -164,7 +177,7 @@ def _str_split(
column: str,
separator: str,
new_columns: list[str] | None,
- keep_original: bool,
+ keep_original: bool, # noqa: FBT001
) -> pl.DataFrame:
if new_columns is None:
# Explode into rows
@@ -183,14 +196,22 @@ def _str_split(
return result
def str_lower(self, column: str) -> Self:
- """Convert string column to lowercase."""
+ """Convert string column to lowercase.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(self._str_lower, {"column": column})
def _str_lower(self, data: pl.DataFrame, column: str) -> pl.DataFrame:
return data.with_columns(pl.col(column).str.to_lowercase())
def str_upper(self, column: str) -> Self:
- """Convert string column to uppercase."""
+ """Convert string column to uppercase.
+
+ Returns:
+ Self for method chaining.
+ """
return self._register(self._str_upper, {"column": column})
def _str_upper(self, data: pl.DataFrame, column: str) -> pl.DataFrame:
@@ -202,6 +223,9 @@ def str_strip(self, column: str, chars: str | None = None) -> Self:
Args:
column: Column to modify.
chars: Characters to strip (None = whitespace).
+
+ Returns:
+ Self for method chaining.
"""
return self._register(self._str_strip, {"column": column, "chars": chars})
@@ -226,6 +250,9 @@ def str_pad(
length: Target length.
fill_char: Character to pad with.
side: 'left' or 'right'.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._str_pad,
@@ -251,6 +278,9 @@ def str_concat(
columns: Columns to concatenate.
new_column: Name for the new column.
separator: Separator between values.
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._str_concat,
@@ -280,6 +310,9 @@ def str_extract(
pattern: Regex pattern with capture group(s).
group_index: Which capture group to extract (1-indexed).
new_column: Name for result column (None = modify in place).
+
+ Returns:
+ Self for method chaining.
"""
return self._register(
self._str_extract,
diff --git a/transformplan/protocol.py b/transformplan/protocol.py
index c977d9a..8c2ce95 100644
--- a/transformplan/protocol.py
+++ b/transformplan/protocol.py
@@ -24,7 +24,7 @@
import hashlib
import json
-from datetime import datetime
+from datetime import datetime, timezone
from pathlib import Path
from typing import Any
@@ -66,10 +66,13 @@ class Protocol:
VERSION = "1.0"
def __init__(self) -> None:
+ """Initialize an empty Protocol."""
self._steps: list[dict[str, Any]] = []
self._input_hash: str | None = None
self._input_shape: tuple[int, int] | None = None
- self._created_at: str = datetime.utcnow().isoformat() + "Z"
+ self._created_at: str = (
+ datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+ )
self._metadata: dict[str, Any] = {}
def set_input(self, hash_value: str, shape: tuple[int, int]) -> None:
@@ -77,7 +80,7 @@ def set_input(self, hash_value: str, shape: tuple[int, int]) -> None:
self._input_hash = hash_value
self._input_shape = shape
- def set_metadata(self, **kwargs: Any) -> None:
+ def set_metadata(self, **kwargs: Any) -> None: # noqa: ANN401
"""Set arbitrary metadata on the protocol.
Example:
@@ -94,6 +97,16 @@ def add_step(
elapsed: float,
output_hash: str,
) -> None:
+ """Record a transformation step in the protocol.
+
+ Args:
+ operation: Name of the operation.
+ params: Operation parameters.
+ old_shape: Shape before operation (rows, cols).
+ new_shape: Shape after operation (rows, cols).
+ elapsed: Time taken in seconds.
+ output_hash: Hash of the output DataFrame.
+ """
self._steps.append(
{
"step": len(self._steps) + 1,
@@ -110,22 +123,39 @@ def add_step(
@property
def input_hash(self) -> str | None:
- """Hash of the input DataFrame."""
+ """Hash of the input DataFrame.
+
+ Returns:
+ Hash string or None if not set.
+ """
return self._input_hash
@property
def output_hash(self) -> str | None:
- """Hash of the final output DataFrame."""
+ """Hash of the final output DataFrame.
+
+ Returns:
+ Hash string or None if no steps.
+ """
if not self._steps:
return self._input_hash
return self._steps[-1]["output_hash"]
@property
def metadata(self) -> dict[str, Any]:
- """Protocol metadata."""
+ """Protocol metadata.
+
+ Returns:
+ Dictionary of metadata.
+ """
return self._metadata
def to_dataframe(self) -> pl.DataFrame:
+ """Convert protocol to a Polars DataFrame.
+
+ Returns:
+ DataFrame with step information.
+ """
rows = []
# Step 0: input state
@@ -175,25 +205,29 @@ def to_csv(self, path: str | Path) -> None:
}
)
- for step in self._steps:
- rows.append(
- {
- "step": step["step"],
- "operation": step["operation"],
- "params": json.dumps(step["params"]) if step["params"] else None,
- "old_shape": str(list(step["old_shape"])),
- "new_shape": str(list(step["new_shape"])),
- "rows_changed": step["rows_changed"],
- "cols_changed": step["cols_changed"],
- "elapsed_seconds": step["elapsed_seconds"],
- "output_hash": step["output_hash"],
- }
- )
+ rows.extend(
+ {
+ "step": step["step"],
+ "operation": step["operation"],
+ "params": json.dumps(step["params"]) if step["params"] else None,
+ "old_shape": str(list(step["old_shape"])),
+ "new_shape": str(list(step["new_shape"])),
+ "rows_changed": step["rows_changed"],
+ "cols_changed": step["cols_changed"],
+ "elapsed_seconds": step["elapsed_seconds"],
+ "output_hash": step["output_hash"],
+ }
+ for step in self._steps
+ )
pl.DataFrame(rows).write_csv(path)
def to_dict(self) -> dict[str, Any]:
- """Serialize protocol to a dictionary."""
+ """Serialize protocol to a dictionary.
+
+ Returns:
+ Dictionary representation of the protocol.
+ """
return {
"version": self.VERSION,
"created_at": self._created_at,
@@ -220,7 +254,11 @@ def to_dict(self) -> dict[str, Any]:
@classmethod
def from_dict(cls, data: dict[str, Any]) -> Protocol:
- """Deserialize protocol from a dictionary."""
+ """Deserialize protocol from a dictionary.
+
+ Returns:
+ Protocol instance.
+ """
protocol = cls()
protocol._created_at = data.get("created_at", protocol._created_at)
protocol._metadata = data.get("metadata", {})
@@ -228,7 +266,7 @@ def from_dict(cls, data: dict[str, Any]) -> Protocol:
input_data = data.get("input", {})
protocol._input_hash = input_data.get("hash")
shape = input_data.get("shape")
- protocol._input_shape = tuple(shape) if shape else None
+ protocol._input_shape = (int(shape[0]), int(shape[1])) if shape else None
for step in data.get("steps", []):
protocol._steps.append(
@@ -274,9 +312,7 @@ def from_json(cls, source: str | Path) -> Protocol:
Returns:
Protocol instance.
"""
- if isinstance(source, Path) or (
- isinstance(source, str) and not source.strip().startswith("{")
- ):
+ if isinstance(source, Path) or not source.strip().startswith("{"):
# Treat as file path
content = Path(source).read_text()
else:
@@ -286,12 +322,22 @@ def from_json(cls, source: str | Path) -> Protocol:
return cls.from_dict(json.loads(content))
def __repr__(self) -> str:
+ """Return string representation.
+
+ Returns:
+ Human-readable representation.
+ """
return f"Protocol({len(self._steps)} steps)"
def __len__(self) -> int:
+ """Return number of steps.
+
+ Returns:
+ Number of transformation steps.
+ """
return len(self._steps)
- def summary(self, show_params: bool = True) -> str:
+ def summary(self, *, show_params: bool = True) -> str: # noqa: C901
"""Generate a clean, human-readable summary of the protocol.
Args:
@@ -303,9 +349,7 @@ def summary(self, show_params: bool = True) -> str:
lines = []
# Header
- lines.append("=" * 70)
- lines.append("TRANSFORM PROTOCOL")
- lines.append("=" * 70)
+ lines.extend(("=" * 70, "TRANSFORM PROTOCOL", "=" * 70))
# Metadata
if self._metadata:
@@ -316,7 +360,7 @@ def summary(self, show_params: bool = True) -> str:
# Input info
if self._input_hash:
shape_str = (
- f"{self._input_shape[0]} rows × {self._input_shape[1]} cols"
+ f"{self._input_shape[0]} rows x {self._input_shape[1]} cols"
if self._input_shape
else "unknown"
)
@@ -325,20 +369,20 @@ def summary(self, show_params: bool = True) -> str:
# Output info
if self._steps:
final = self._steps[-1]
- shape_str = f"{final['new_shape'][0]} rows × {final['new_shape'][1]} cols"
+ shape_str = f"{final['new_shape'][0]} rows x {final['new_shape'][1]} cols"
lines.append(f"Output: {shape_str} [{final['output_hash']}]")
# Total time
total_time = sum(s["elapsed_seconds"] for s in self._steps)
- lines.append(f"Total time: {total_time:.4f}s")
- lines.append("-" * 70)
-
- # Steps
- lines.append("")
- lines.append(
- f"{'#':<4} {'Operation':<20} {'Rows':<12} {'Cols':<12} {'Time':<10} {'Hash':<16}"
+ lines.extend(
+ [
+ f"Total time: {total_time:.4f}s",
+ "-" * 70,
+ "",
+ f"{'#':<4} {'Operation':<20} {'Rows':<12} {'Cols':<12} {'Time':<10} {'Hash':<16}",
+ "-" * 70,
+ ]
)
- lines.append("-" * 70)
# Input row
if self._input_hash:
@@ -402,16 +446,17 @@ def summary(self, show_params: bool = True) -> str:
return "\n".join(lines)
- def _format_params(self, params: dict, max_length: int = 60) -> str:
- """Format params dict as a readable string."""
+ def _format_params(self, params: dict[str, Any], max_length: int = 60) -> str:
+ """Format params dict as a readable string.
+
+ Returns:
+ Formatted string representation.
+ """
parts = []
for key, value in params.items():
if isinstance(value, dict):
# Nested dict (like filter) - show type or summarize
- if "type" in value:
- value_str = self._format_filter(value)
- else:
- value_str = "{...}"
+ value_str = self._format_filter(value) if "type" in value else "{...}"
elif isinstance(value, list) and len(value) > 3:
value_str = f"[{value[0]}, {value[1]}, ... ({len(value)} items)]"
else:
@@ -423,8 +468,12 @@ def _format_params(self, params: dict, max_length: int = 60) -> str:
result = result[: max_length - 3] + "..."
return result
- def _format_filter(self, filter_dict: dict) -> str:
- """Format a filter dict as a readable expression."""
+ def _format_filter(self, filter_dict: dict[str, Any]) -> str: # noqa: C901
+ """Format a filter dict as a readable expression.
+
+ Returns:
+ Human-readable filter expression.
+ """
filter_type = filter_dict.get("type", "")
if filter_type in ("and", "or"):
@@ -432,10 +481,10 @@ def _format_filter(self, filter_dict: dict) -> str:
right = self._format_filter(filter_dict["right"])
op = "&" if filter_type == "and" else "|"
return f"({left} {op} {right})"
- elif filter_type == "not":
+ if filter_type == "not":
operand = self._format_filter(filter_dict["operand"])
return f"~{operand}"
- elif filter_type in ("eq", "ne", "gt", "ge", "lt", "le"):
+ if filter_type in ("eq", "ne", "gt", "ge", "lt", "le"):
col = filter_dict.get("column", "?")
val = filter_dict.get("value", "?")
op_map = {
@@ -447,7 +496,7 @@ def _format_filter(self, filter_dict: dict) -> str:
"le": "<=",
}
return f"{col} {op_map[filter_type]} {val!r}"
- elif filter_type == "is_in":
+ if filter_type == "is_in":
col = filter_dict.get("column", "?")
values = filter_dict.get("values", [])
if len(values) > 3:
@@ -455,29 +504,29 @@ def _format_filter(self, filter_dict: dict) -> str:
else:
val_str = repr(values)
return f"{col} in {val_str}"
- elif filter_type == "is_null":
+ if filter_type == "is_null":
return f"{filter_dict.get('column', '?')} is null"
- elif filter_type == "is_not_null":
+ if filter_type == "is_not_null":
return f"{filter_dict.get('column', '?')} is not null"
- elif filter_type == "between":
+ if filter_type == "between":
col = filter_dict.get("column", "?")
lower = filter_dict.get("lower", "?")
upper = filter_dict.get("upper", "?")
return f"{col} between {lower!r} and {upper!r}"
- elif filter_type.startswith("str_"):
+ if filter_type.startswith("str_"):
col = filter_dict.get("column", "?")
if filter_type == "str_contains":
return f"{col}.contains({filter_dict.get('pattern', '?')!r})"
- elif filter_type == "str_starts_with":
+ if filter_type == "str_starts_with":
return f"{col}.starts_with({filter_dict.get('prefix', '?')!r})"
- elif filter_type == "str_ends_with":
+ if filter_type == "str_ends_with":
return f"{col}.ends_with({filter_dict.get('suffix', '?')!r})"
return f"<{filter_type}>"
- def print(self, show_params: bool = True) -> None:
+ def print(self, *, show_params: bool = True) -> None:
"""Print the protocol summary to stdout.
Args:
show_params: Whether to include operation parameters.
"""
- print(self.summary(show_params))
+ print(self.summary(show_params=show_params)) # noqa: T201
diff --git a/transformplan/validation.py b/transformplan/validation.py
index ac22aa8..f4e3070 100644
--- a/transformplan/validation.py
+++ b/transformplan/validation.py
@@ -35,60 +35,77 @@
from __future__ import annotations
from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any
+from functools import partial
+from typing import Any, Callable
import polars as pl
-if TYPE_CHECKING:
- from .filters import Filter
-
-
# =============================================================================
# Type categories for validation
# =============================================================================
NUMERIC_TYPES = {
- pl.Int8,
- pl.Int16,
- pl.Int32,
- pl.Int64,
- pl.UInt8,
- pl.UInt16,
- pl.UInt32,
- pl.UInt64,
- pl.Float32,
- pl.Float64,
+ pl.Int8(),
+ pl.Int16(),
+ pl.Int32(),
+ pl.Int64(),
+ pl.UInt8(),
+ pl.UInt16(),
+ pl.UInt32(),
+ pl.UInt64(),
+ pl.Float32(),
+ pl.Float64(),
}
-STRING_TYPES = {pl.Utf8, pl.String}
+STRING_TYPES = {pl.Utf8(), pl.String()}
-DATETIME_TYPES = {pl.Date, pl.Datetime, pl.Time, pl.Duration}
+DATETIME_TYPES = {pl.Date(), pl.Datetime(), pl.Time(), pl.Duration()}
-BOOLEAN_TYPES = {pl.Boolean}
+BOOLEAN_TYPES = {pl.Boolean()}
def is_numeric(dtype: pl.DataType) -> bool:
- """Check if dtype is numeric."""
- return dtype in NUMERIC_TYPES or dtype.base_type() in NUMERIC_TYPES
+ """Check if dtype is numeric.
+
+ Returns:
+ True if dtype is numeric, False otherwise.
+ """
+ return dtype in NUMERIC_TYPES or dtype.base_type()() in NUMERIC_TYPES
def is_string(dtype: pl.DataType) -> bool:
- """Check if dtype is string."""
- return dtype in STRING_TYPES or dtype.base_type() in STRING_TYPES
+ """Check if dtype is string.
+
+ Returns:
+ True if dtype is string, False otherwise.
+ """
+ return dtype in STRING_TYPES or dtype.base_type()() in STRING_TYPES
def is_datetime(dtype: pl.DataType) -> bool:
- """Check if dtype is datetime-related."""
- return dtype in DATETIME_TYPES or dtype.base_type() in DATETIME_TYPES
+ """Check if dtype is datetime-related.
+
+ Returns:
+ True if dtype is datetime-related, False otherwise.
+ """
+ return dtype in DATETIME_TYPES or dtype.base_type()() in DATETIME_TYPES
def is_boolean(dtype: pl.DataType) -> bool:
- """Check if dtype is boolean."""
- return dtype in BOOLEAN_TYPES or dtype.base_type() in BOOLEAN_TYPES
+ """Check if dtype is boolean.
+
+ Returns:
+ True if dtype is boolean, False otherwise.
+ """
+ return dtype in BOOLEAN_TYPES or dtype.base_type()() in BOOLEAN_TYPES
def dtype_name(dtype: pl.DataType) -> str:
- """Get a readable name for a dtype."""
+ """Get a readable name for a dtype.
+
+ Returns:
+ String representation of the dtype.
+ """
return str(dtype).split("(")[0]
@@ -106,6 +123,11 @@ class ValidationError:
message: str
def __str__(self) -> str:
+ """Return error message string.
+
+ Returns:
+ Formatted error message.
+ """
return f"Step {self.step} ({self.operation}): {self.message}"
@@ -113,28 +135,48 @@ class ValidationResult:
"""Result of schema validation."""
def __init__(self) -> None:
+ """Initialize an empty validation result."""
self._errors: list[ValidationError] = []
def add_error(self, step: int, operation: str, message: str) -> None:
+ """Add a validation error."""
self._errors.append(ValidationError(step, operation, message))
@property
def is_valid(self) -> bool:
+ """Check if validation passed.
+
+ Returns:
+ True if no errors, False otherwise.
+ """
return len(self._errors) == 0
@property
def errors(self) -> list[ValidationError]:
+ """Get list of validation errors.
+
+ Returns:
+ List of ValidationError instances.
+ """
return self._errors
def raise_if_invalid(self) -> None:
- """Raise ValidationError if validation failed."""
+ """Raise SchemaValidationError if validation failed.
+
+ Raises:
+ SchemaValidationError: If validation failed with errors.
+ """
if not self.is_valid:
error_messages = "\n".join(f" - {e}" for e in self._errors)
- raise SchemaValidationError(
- f"Schema validation failed with {len(self._errors)} error(s):\n{error_messages}"
- )
+ msg = f"Schema validation failed with {len(self._errors)} error(s):\n{error_messages}"
+ raise SchemaValidationError(msg)
def __repr__(self) -> str:
+ """Return string representation of validation result.
+
+ Returns:
+ Human-readable representation.
+ """
if self.is_valid:
return "ValidationResult(valid=True)"
return f"ValidationResult(valid=False, errors={len(self._errors)})"
@@ -175,48 +217,83 @@ def __init__(
steps: list[DryRunStep],
validation: ValidationResult,
) -> None:
+ """Initialize DryRunResult.
+
+ Args:
+ input_schema: Initial schema as column name to dtype mapping.
+ steps: List of dry run steps.
+ validation: Validation result with any errors.
+ """
self._input_schema = input_schema
self._steps = steps
self._validation = validation
@property
def is_valid(self) -> bool:
- """Whether the pipeline passed validation."""
+ """Whether the pipeline passed validation.
+
+ Returns:
+ True if validation passed, False otherwise.
+ """
return self._validation.is_valid
@property
def errors(self) -> list[ValidationError]:
- """Validation errors."""
+ """Validation errors.
+
+ Returns:
+ List of validation errors.
+ """
return self._validation.errors
@property
def steps(self) -> list[DryRunStep]:
- """List of dry run steps."""
+ """List of dry run steps.
+
+ Returns:
+ List of DryRunStep instances.
+ """
return self._steps
@property
def input_schema(self) -> dict[str, pl.DataType]:
- """Input schema."""
+ """Input schema.
+
+ Returns:
+ Dictionary mapping column names to dtypes.
+ """
return self._input_schema
@property
def output_schema(self) -> dict[str, str]:
- """Predicted output schema after all operations."""
+ """Predicted output schema after all operations.
+
+ Returns:
+ Dictionary mapping column names to dtype names.
+ """
if self._steps:
return self._steps[-1].schema_after
return {k: dtype_name(v) for k, v in self._input_schema.items()}
@property
def input_columns(self) -> list[str]:
- """Input column names."""
+ """Input column names.
+
+ Returns:
+ List of input column names.
+ """
return list(self._input_schema.keys())
@property
def output_columns(self) -> list[str]:
- """Predicted output column names."""
+ """Predicted output column names.
+
+ Returns:
+ List of predicted output column names.
+ """
return list(self.output_schema.keys())
- def summary(self, show_params: bool = True, show_schema: bool = False) -> str:
+ def summary(self, *, show_params: bool = True, show_schema: bool = False) -> str: # noqa: C901
"""Generate a human-readable summary.
Args:
@@ -229,32 +306,28 @@ def summary(self, show_params: bool = True, show_schema: bool = False) -> str:
lines = []
# Header
- lines.append("=" * 70)
- lines.append("DRY RUN PREVIEW")
- lines.append("=" * 70)
+ lines.extend(("=" * 70, "DRY RUN PREVIEW", "=" * 70))
# Validation status
if self.is_valid:
lines.append("✓ Validation: PASSED")
else:
lines.append(f"✗ Validation: FAILED ({len(self.errors)} errors)")
- for err in self.errors:
- lines.append(f" - {err}")
-
- lines.append("-" * 70)
+ lines.extend(f" - {err}" for err in self.errors)
- # Input schema summary
- lines.append(f"Input: {len(self._input_schema)} columns")
+ lines.extend(["-" * 70, f"Input: {len(self._input_schema)} columns"])
if show_schema:
for col, dtype in self._input_schema.items():
lines.append(f" {col}: {dtype_name(dtype)}")
- lines.append("-" * 70)
-
- # Steps
- lines.append("")
- lines.append(f"{'#':<4} {'Operation':<20} {'Columns':<15} {'Changes':<30}")
- lines.append("-" * 70)
+ lines.extend(
+ [
+ "-" * 70,
+ "",
+ f"{'#':<4} {'Operation':<20} {'Columns':<15} {'Changes':<30}",
+ "-" * 70,
+ ]
+ )
for step in self._steps:
step_num = str(step.step)
@@ -291,27 +364,33 @@ def summary(self, show_params: bool = True, show_schema: bool = False) -> str:
if show_schema:
lines.append(f" Schema: {step.schema_after}")
- lines.append("=" * 70)
-
- # Output schema summary
- lines.append(f"Output: {len(self.output_schema)} columns")
+ lines.extend(["=" * 70, f"Output: {len(self.output_schema)} columns"])
if show_schema:
for col, dtype in self.output_schema.items():
lines.append(f" {col}: {dtype}")
return "\n".join(lines)
- def print(self, show_params: bool = True, show_schema: bool = False) -> None:
+ def print(self, *, show_params: bool = True, show_schema: bool = False) -> None:
"""Print the dry run summary."""
- print(self.summary(show_params, show_schema))
+ print(self.summary(show_params=show_params, show_schema=show_schema)) # noqa: T201
def __repr__(self) -> str:
+ """Return string representation of dry run result.
+
+ Returns:
+ Human-readable representation.
+ """
status = "valid" if self.is_valid else f"invalid ({len(self.errors)} errors)"
return f"DryRunResult({len(self._steps)} steps, {status})"
-def _format_params_short(params: dict, max_length: int = 55) -> str:
- """Format params dict as a short string."""
+def _format_params_short(params: dict[str, Any], max_length: int = 55) -> str:
+ """Format params dict as a short string.
+
+ Returns:
+ Formatted string representation of params.
+ """
parts = []
for key, value in params.items():
if isinstance(value, dict) and "type" in value:
@@ -339,29 +418,54 @@ class SchemaTracker:
"""Tracks schema changes through a pipeline for validation."""
def __init__(self, schema: dict[str, pl.DataType]) -> None:
+ """Initialize tracker with a schema.
+
+ Args:
+ schema: Initial schema as column name to dtype mapping.
+ """
self._schema = dict(schema)
@property
def columns(self) -> set[str]:
+ """Get set of column names.
+
+ Returns:
+ Set of column names.
+ """
return set(self._schema.keys())
def has_column(self, name: str) -> bool:
+ """Check if column exists.
+
+ Returns:
+ True if column exists, False otherwise.
+ """
return name in self._schema
def get_dtype(self, name: str) -> pl.DataType | None:
+ """Get dtype for a column.
+
+ Returns:
+ DataType or None if column doesn't exist.
+ """
return self._schema.get(name)
def drop_column(self, name: str) -> None:
+ """Remove a column from the schema."""
self._schema.pop(name, None)
- def add_column(self, name: str, dtype: pl.DataType) -> None:
- self._schema[name] = dtype
+ def add_column(self, name: str, dtype: pl.DataType | None) -> None:
+ """Add a column to the schema."""
+ if dtype is not None:
+ self._schema[name] = dtype
def rename_column(self, old_name: str, new_name: str) -> None:
+ """Rename a column in the schema."""
if old_name in self._schema:
self._schema[new_name] = self._schema.pop(old_name)
def set_dtype(self, name: str, dtype: pl.DataType) -> None:
+ """Change the dtype of an existing column."""
if name in self._schema:
self._schema[name] = dtype
@@ -372,6 +476,10 @@ def set_columns(self, columns: list[str]) -> None:
}
+# Type alias for validator functions
+ValidatorFunc = Callable[[SchemaTracker, dict[str, Any], ValidationResult, int], None]
+
+
# =============================================================================
# Helper functions
# =============================================================================
@@ -384,7 +492,11 @@ def _check_column_exists(
step: int,
op_name: str,
) -> bool:
- """Check if column exists, add error if not. Returns True if exists."""
+ """Check if column exists, add error if not.
+
+ Returns:
+ True if column exists, False otherwise.
+ """
if not tracker.has_column(column):
result.add_error(step, op_name, f"Column '{column}' does not exist")
return False
@@ -398,7 +510,11 @@ def _check_column_numeric(
step: int,
op_name: str,
) -> bool:
- """Check if column is numeric, add error if not. Returns True if numeric."""
+ """Check if column is numeric, add error if not.
+
+ Returns:
+ True if column is numeric, False otherwise.
+ """
dtype = tracker.get_dtype(column)
if dtype and not is_numeric(dtype):
result.add_error(
@@ -415,7 +531,11 @@ def _check_column_string(
step: int,
op_name: str,
) -> bool:
- """Check if column is string, add error if not. Returns True if string."""
+ """Check if column is string, add error if not.
+
+ Returns:
+ True if column is string, False otherwise.
+ """
dtype = tracker.get_dtype(column)
if dtype and not is_string(dtype):
result.add_error(
@@ -432,7 +552,11 @@ def _check_column_datetime(
step: int,
op_name: str,
) -> bool:
- """Check if column is datetime, add error if not. Returns True if datetime."""
+ """Check if column is datetime, add error if not.
+
+ Returns:
+ True if column is datetime, False otherwise.
+ """
dtype = tracker.get_dtype(column)
if dtype and not is_datetime(dtype):
result.add_error(
@@ -553,7 +677,7 @@ def _validate_col_add(
if expr:
tracker.add_column(new_column, tracker.get_dtype(expr))
else:
- tracker.add_column(new_column, pl.Utf8) # default to string for literals
+ tracker.add_column(new_column, pl.Utf8()) # default to string for literals
def _validate_col_add_uuid(
@@ -563,7 +687,7 @@ def _validate_col_add_uuid(
if tracker.has_column(column):
result.add_error(step, "col_add_uuid", f"Column '{column}' already exists")
else:
- tracker.add_column(column, pl.Utf8)
+ tracker.add_column(column, pl.Utf8())
def _validate_col_hash(
@@ -577,7 +701,7 @@ def _validate_col_hash(
if tracker.has_column(new_column):
result.add_error(step, "col_hash", f"Column '{new_column}' already exists")
else:
- tracker.add_column(new_column, pl.Utf8)
+ tracker.add_column(new_column, pl.Utf8())
def _validate_col_coalesce(
@@ -631,7 +755,7 @@ def _validate_math_columns(
if b_exists:
_check_column_numeric(tracker, column_b, result, step, op_name)
- tracker.add_column(new_column, pl.Float64)
+ tracker.add_column(new_column, pl.Float64())
def _validate_math_cumsum(
@@ -671,7 +795,7 @@ def _validate_math_rank(
step, "math_rank", f"Group-by columns do not exist: {missing}"
)
- tracker.add_column(new_column, pl.UInt32)
+ tracker.add_column(new_column, pl.UInt32())
def _validate_math_percent_of(
@@ -686,7 +810,7 @@ def _validate_math_percent_of(
if _check_column_exists(tracker, total_column, result, step, "math_percent_of"):
_check_column_numeric(tracker, total_column, result, step, "math_percent_of")
- tracker.add_column(new_column, pl.Float64)
+ tracker.add_column(new_column, pl.Float64())
# =============================================================================
@@ -723,8 +847,8 @@ def _validate_str_split(
step, "str_split", f"Column '{new_col}' already exists"
)
else:
- tracker.add_column(new_col, pl.Utf8)
- if not params.get("keep_original", False):
+ tracker.add_column(new_col, pl.Utf8())
+ if not params.get("keep_original"):
tracker.drop_column(column)
@@ -738,7 +862,7 @@ def _validate_str_concat(
if _check_column_exists(tracker, col, result, step, "str_concat"):
_check_column_string(tracker, col, result, step, "str_concat")
- tracker.add_column(new_column, pl.Utf8)
+ tracker.add_column(new_column, pl.Utf8())
def _validate_str_extract(
@@ -751,7 +875,7 @@ def _validate_str_extract(
_check_column_string(tracker, column, result, step, "str_extract")
if new_column != column:
- tracker.add_column(new_column, pl.Utf8)
+ tracker.add_column(new_column, pl.Utf8())
# =============================================================================
@@ -765,9 +889,11 @@ def _validate_dt_op(
result: ValidationResult,
step: int,
op_name: str,
- output_dtype: pl.DataType = pl.Int32,
+ output_dtype: pl.DataType | None = None,
) -> None:
"""Validate datetime operation: column must exist and be datetime."""
+ if output_dtype is None:
+ output_dtype = pl.Int32()
column = params["column"]
new_column = params.get("new_column", column)
@@ -787,7 +913,7 @@ def _validate_dt_parse(
if _check_column_exists(tracker, column, result, step, "dt_parse"):
_check_column_string(tracker, column, result, step, "dt_parse")
- tracker.set_dtype(new_column, pl.Date)
+ tracker.set_dtype(new_column, pl.Date())
def _validate_dt_format(
@@ -800,9 +926,9 @@ def _validate_dt_format(
_check_column_datetime(tracker, column, result, step, "dt_format")
if new_column != column:
- tracker.add_column(new_column, pl.Utf8)
+ tracker.add_column(new_column, pl.Utf8())
else:
- tracker.set_dtype(column, pl.Utf8)
+ tracker.set_dtype(column, pl.Utf8())
def _validate_dt_diff_days(
@@ -817,7 +943,7 @@ def _validate_dt_diff_days(
if _check_column_exists(tracker, column_b, result, step, "dt_diff_days"):
_check_column_datetime(tracker, column_b, result, step, "dt_diff_days")
- tracker.add_column(new_column, pl.Int64)
+ tracker.add_column(new_column, pl.Int64())
def _validate_dt_age_years(
@@ -830,15 +956,12 @@ def _validate_dt_age_years(
if _check_column_exists(tracker, birth_column, result, step, "dt_age_years"):
_check_column_datetime(tracker, birth_column, result, step, "dt_age_years")
- if reference_column:
- if _check_column_exists(
- tracker, reference_column, result, step, "dt_age_years"
- ):
- _check_column_datetime(
- tracker, reference_column, result, step, "dt_age_years"
- )
+ if reference_column and _check_column_exists(
+ tracker, reference_column, result, step, "dt_age_years"
+ ):
+ _check_column_datetime(tracker, reference_column, result, step, "dt_age_years")
- tracker.add_column(new_column, pl.Int64)
+ tracker.add_column(new_column, pl.Int64())
def _validate_dt_is_between(
@@ -850,7 +973,7 @@ def _validate_dt_is_between(
if _check_column_exists(tracker, column, result, step, "dt_is_between"):
_check_column_datetime(tracker, column, result, step, "dt_is_between")
- tracker.add_column(new_column, pl.Boolean)
+ tracker.add_column(new_column, pl.Boolean())
# =============================================================================
@@ -887,7 +1010,11 @@ def _validate_filter_columns(
step: int,
op_name: str,
) -> list[str]:
- """Recursively validate columns and types from a filter dict."""
+ """Recursively validate columns and types from a filter dict.
+
+ Returns:
+ List of missing column names.
+ """
missing = []
filter_type = filter_dict.get("type")
@@ -917,22 +1044,29 @@ def _validate_filter_columns(
dtype = tracker.get_dtype(column)
# Numeric comparisons
- if filter_type in ("gt", "ge", "lt", "le", "between"):
- if dtype and not is_numeric(dtype) and not is_datetime(dtype):
- result.add_error(
- step,
- op_name,
- f"Column '{column}' is {dtype_name(dtype)}, cannot use numeric comparison",
- )
+ if (
+ filter_type in ("gt", "ge", "lt", "le", "between")
+ and dtype
+ and not is_numeric(dtype)
+ and not is_datetime(dtype)
+ ):
+ result.add_error(
+ step,
+ op_name,
+ f"Column '{column}' is {dtype_name(dtype)}, cannot use numeric comparison",
+ )
# String operations
- if filter_type in ("str_contains", "str_starts_with", "str_ends_with"):
- if dtype and not is_string(dtype):
- result.add_error(
- step,
- op_name,
- f"Column '{column}' is {dtype_name(dtype)}, cannot use string filter",
- )
+ if (
+ filter_type in ("str_contains", "str_starts_with", "str_ends_with")
+ and dtype
+ and not is_string(dtype)
+ ):
+ result.add_error(
+ step,
+ op_name,
+ f"Column '{column}' is {dtype_name(dtype)}, cannot use string filter",
+ )
return missing
@@ -973,7 +1107,7 @@ def _validate_rows_flag(
if tracker.has_column(new_column):
result.add_error(step, "rows_flag", f"Column '{new_column}' already exists")
else:
- tracker.add_column(new_column, pl.Boolean)
+ tracker.add_column(new_column, pl.Boolean())
def _validate_rows_sort(
@@ -1072,9 +1206,9 @@ def _validate_map_discretize(
_check_column_numeric(tracker, column, result, step, "map_discretize")
if new_column != column:
- tracker.add_column(new_column, pl.Utf8)
+ tracker.add_column(new_column, pl.Utf8())
else:
- tracker.set_dtype(column, pl.Utf8)
+ tracker.set_dtype(column, pl.Utf8())
def _validate_map_from_column(
@@ -1097,7 +1231,7 @@ def _validate_map_from_column(
# Validator registry
# =============================================================================
-_VALIDATORS: dict[str, Any] = {
+_VALIDATORS: dict[str, ValidatorFunc] = {
# Column ops
"col_drop": _validate_col_drop,
"col_rename": _validate_col_rename,
@@ -1113,66 +1247,56 @@ def _validate_map_from_column(
"col_hash": _validate_col_hash,
"col_coalesce": _validate_col_coalesce,
# Math ops
- "math_add": lambda t, p, r, s: _validate_math_scalar(t, p, r, s, "math_add"),
- "math_subtract": lambda t, p, r, s: _validate_math_scalar(
- t, p, r, s, "math_subtract"
- ),
- "math_multiply": lambda t, p, r, s: _validate_math_scalar(
- t, p, r, s, "math_multiply"
- ),
- "math_divide": lambda t, p, r, s: _validate_math_scalar(t, p, r, s, "math_divide"),
- "math_clamp": lambda t, p, r, s: _validate_math_scalar(t, p, r, s, "math_clamp"),
- "math_abs": lambda t, p, r, s: _validate_math_scalar(t, p, r, s, "math_abs"),
- "math_round": lambda t, p, r, s: _validate_math_scalar(t, p, r, s, "math_round"),
- "math_set_min": lambda t, p, r, s: _validate_math_scalar(
- t, p, r, s, "math_set_min"
- ),
- "math_set_max": lambda t, p, r, s: _validate_math_scalar(
- t, p, r, s, "math_set_max"
- ),
- "math_add_columns": lambda t, p, r, s: _validate_math_columns(
- t, p, r, s, "math_add_columns"
- ),
- "math_subtract_columns": lambda t, p, r, s: _validate_math_columns(
- t, p, r, s, "math_subtract_columns"
+ "math_add": partial(_validate_math_scalar, op_name="math_add"),
+ "math_subtract": partial(_validate_math_scalar, op_name="math_subtract"),
+ "math_multiply": partial(_validate_math_scalar, op_name="math_multiply"),
+ "math_divide": partial(_validate_math_scalar, op_name="math_divide"),
+ "math_clamp": partial(_validate_math_scalar, op_name="math_clamp"),
+ "math_abs": partial(_validate_math_scalar, op_name="math_abs"),
+ "math_round": partial(_validate_math_scalar, op_name="math_round"),
+ "math_set_min": partial(_validate_math_scalar, op_name="math_set_min"),
+ "math_set_max": partial(_validate_math_scalar, op_name="math_set_max"),
+ "math_add_columns": partial(_validate_math_columns, op_name="math_add_columns"),
+ "math_subtract_columns": partial(
+ _validate_math_columns, op_name="math_subtract_columns"
),
- "math_multiply_columns": lambda t, p, r, s: _validate_math_columns(
- t, p, r, s, "math_multiply_columns"
+ "math_multiply_columns": partial(
+ _validate_math_columns, op_name="math_multiply_columns"
),
- "math_divide_columns": lambda t, p, r, s: _validate_math_columns(
- t, p, r, s, "math_divide_columns"
+ "math_divide_columns": partial(
+ _validate_math_columns, op_name="math_divide_columns"
),
"math_cumsum": _validate_math_cumsum,
"math_rank": _validate_math_rank,
"math_percent_of": _validate_math_percent_of,
# String ops
- "str_replace": lambda t, p, r, s: _validate_str_op(t, p, r, s, "str_replace"),
- "str_slice": lambda t, p, r, s: _validate_str_op(t, p, r, s, "str_slice"),
- "str_truncate": lambda t, p, r, s: _validate_str_op(t, p, r, s, "str_truncate"),
- "str_lower": lambda t, p, r, s: _validate_str_op(t, p, r, s, "str_lower"),
- "str_upper": lambda t, p, r, s: _validate_str_op(t, p, r, s, "str_upper"),
- "str_strip": lambda t, p, r, s: _validate_str_op(t, p, r, s, "str_strip"),
- "str_pad": lambda t, p, r, s: _validate_str_op(t, p, r, s, "str_pad"),
+ "str_replace": partial(_validate_str_op, op_name="str_replace"),
+ "str_slice": partial(_validate_str_op, op_name="str_slice"),
+ "str_truncate": partial(_validate_str_op, op_name="str_truncate"),
+ "str_lower": partial(_validate_str_op, op_name="str_lower"),
+ "str_upper": partial(_validate_str_op, op_name="str_upper"),
+ "str_strip": partial(_validate_str_op, op_name="str_strip"),
+ "str_pad": partial(_validate_str_op, op_name="str_pad"),
"str_split": _validate_str_split,
"str_concat": _validate_str_concat,
"str_extract": _validate_str_extract,
# Datetime ops
- "dt_year": lambda t, p, r, s: _validate_dt_op(t, p, r, s, "dt_year"),
- "dt_month": lambda t, p, r, s: _validate_dt_op(t, p, r, s, "dt_month"),
- "dt_day": lambda t, p, r, s: _validate_dt_op(t, p, r, s, "dt_day"),
- "dt_week": lambda t, p, r, s: _validate_dt_op(t, p, r, s, "dt_week"),
- "dt_quarter": lambda t, p, r, s: _validate_dt_op(t, p, r, s, "dt_quarter"),
- "dt_year_month": lambda t, p, r, s: _validate_dt_op(
- t, p, r, s, "dt_year_month", pl.Utf8
+ "dt_year": partial(_validate_dt_op, op_name="dt_year"),
+ "dt_month": partial(_validate_dt_op, op_name="dt_month"),
+ "dt_day": partial(_validate_dt_op, op_name="dt_day"),
+ "dt_week": partial(_validate_dt_op, op_name="dt_week"),
+ "dt_quarter": partial(_validate_dt_op, op_name="dt_quarter"),
+ "dt_year_month": partial(
+ _validate_dt_op, op_name="dt_year_month", output_dtype=pl.Utf8()
),
- "dt_quarter_year": lambda t, p, r, s: _validate_dt_op(
- t, p, r, s, "dt_quarter_year", pl.Utf8
+ "dt_quarter_year": partial(
+ _validate_dt_op, op_name="dt_quarter_year", output_dtype=pl.Utf8()
),
- "dt_calendar_week": lambda t, p, r, s: _validate_dt_op(
- t, p, r, s, "dt_calendar_week", pl.Utf8
+ "dt_calendar_week": partial(
+ _validate_dt_op, op_name="dt_calendar_week", output_dtype=pl.Utf8()
),
- "dt_truncate": lambda t, p, r, s: _validate_dt_op(
- t, p, r, s, "dt_truncate", pl.Date
+ "dt_truncate": partial(
+ _validate_dt_op, op_name="dt_truncate", output_dtype=pl.Date()
),
"dt_parse": _validate_dt_parse,
"dt_format": _validate_dt_format,
@@ -1259,10 +1383,11 @@ def dry_run_schema(
columns_removed = list(cols_before - cols_after)
# Detect type modifications (columns that exist in both but changed type)
- columns_modified = []
- for col in cols_before & cols_after:
- if schema_before.get(col) != schema_after.get(col):
- columns_modified.append(col)
+ columns_modified = [
+ col
+ for col in cols_before & cols_after
+ if schema_before.get(col) != schema_after.get(col)
+ ]
# Check if this step had an error
step_error = None
diff --git a/uv.lock b/uv.lock
index d70dd8d..d1ab366 100644
--- a/uv.lock
+++ b/uv.lock
@@ -153,6 +153,110 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
]
+[[package]]
+name = "coverage"
+version = "7.13.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ad/49/349848445b0e53660e258acbcc9b0d014895b6739237920886672240f84b/coverage-7.13.2.tar.gz", hash = "sha256:044c6951ec37146b72a50cc81ef02217d27d4c3640efd2640311393cbbf143d3", size = 826523, upload-time = "2026-01-25T13:00:04.889Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a4/2d/63e37369c8e81a643afe54f76073b020f7b97ddbe698c5c944b51b0a2bc5/coverage-7.13.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f4af3b01763909f477ea17c962e2cca8f39b350a4e46e3a30838b2c12e31b81b", size = 218842, upload-time = "2026-01-25T12:57:15.3Z" },
+ { url = "https://files.pythonhosted.org/packages/57/06/86ce882a8d58cbcb3030e298788988e618da35420d16a8c66dac34f138d0/coverage-7.13.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:36393bd2841fa0b59498f75466ee9bdec4f770d3254f031f23e8fd8e140ffdd2", size = 219360, upload-time = "2026-01-25T12:57:17.572Z" },
+ { url = "https://files.pythonhosted.org/packages/cd/84/70b0eb1ee19ca4ef559c559054c59e5b2ae4ec9af61398670189e5d276e9/coverage-7.13.2-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9cc7573518b7e2186bd229b1a0fe24a807273798832c27032c4510f47ffdb896", size = 246123, upload-time = "2026-01-25T12:57:19.087Z" },
+ { url = "https://files.pythonhosted.org/packages/35/fb/05b9830c2e8275ebc031e0019387cda99113e62bb500ab328bb72578183b/coverage-7.13.2-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ca9566769b69a5e216a4e176d54b9df88f29d750c5b78dbb899e379b4e14b30c", size = 247930, upload-time = "2026-01-25T12:57:20.929Z" },
+ { url = "https://files.pythonhosted.org/packages/81/aa/3f37858ca2eed4f09b10ca3c6ddc9041be0a475626cd7fd2712f4a2d526f/coverage-7.13.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c9bdea644e94fd66d75a6f7e9a97bb822371e1fe7eadae2cacd50fcbc28e4dc", size = 249804, upload-time = "2026-01-25T12:57:22.904Z" },
+ { url = "https://files.pythonhosted.org/packages/b6/b3/c904f40c56e60a2d9678a5ee8df3d906d297d15fb8bec5756c3b0a67e2df/coverage-7.13.2-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5bd447332ec4f45838c1ad42268ce21ca87c40deb86eabd59888859b66be22a5", size = 246815, upload-time = "2026-01-25T12:57:24.314Z" },
+ { url = "https://files.pythonhosted.org/packages/41/91/ddc1c5394ca7fd086342486440bfdd6b9e9bda512bf774599c7c7a0081e0/coverage-7.13.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7c79ad5c28a16a1277e1187cf83ea8dafdcc689a784228a7d390f19776db7c31", size = 247843, upload-time = "2026-01-25T12:57:26.544Z" },
+ { url = "https://files.pythonhosted.org/packages/87/d2/cdff8f4cd33697883c224ea8e003e9c77c0f1a837dc41d95a94dd26aad67/coverage-7.13.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:76e06ccacd1fb6ada5d076ed98a8c6f66e2e6acd3df02819e2ee29fd637b76ad", size = 245850, upload-time = "2026-01-25T12:57:28.507Z" },
+ { url = "https://files.pythonhosted.org/packages/f5/42/e837febb7866bf2553ab53dd62ed52f9bb36d60c7e017c55376ad21fbb05/coverage-7.13.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:49d49e9a5e9f4dc3d3dac95278a020afa6d6bdd41f63608a76fa05a719d5b66f", size = 246116, upload-time = "2026-01-25T12:57:30.16Z" },
+ { url = "https://files.pythonhosted.org/packages/09/b1/4a3f935d7df154df02ff4f71af8d61298d713a7ba305d050ae475bfbdde2/coverage-7.13.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ed2bce0e7bfa53f7b0b01c722da289ef6ad4c18ebd52b1f93704c21f116360c8", size = 246720, upload-time = "2026-01-25T12:57:32.165Z" },
+ { url = "https://files.pythonhosted.org/packages/e1/fe/538a6fd44c515f1c5197a3f078094cbaf2ce9f945df5b44e29d95c864bff/coverage-7.13.2-cp310-cp310-win32.whl", hash = "sha256:1574983178b35b9af4db4a9f7328a18a14a0a0ce76ffaa1c1bacb4cc82089a7c", size = 221465, upload-time = "2026-01-25T12:57:33.511Z" },
+ { url = "https://files.pythonhosted.org/packages/5e/09/4b63a024295f326ec1a40ec8def27799300ce8775b1cbf0d33b1790605c4/coverage-7.13.2-cp310-cp310-win_amd64.whl", hash = "sha256:a360a8baeb038928ceb996f5623a4cd508728f8f13e08d4e96ce161702f3dd99", size = 222397, upload-time = "2026-01-25T12:57:34.927Z" },
+ { url = "https://files.pythonhosted.org/packages/6c/01/abca50583a8975bb6e1c59eff67ed8e48bb127c07dad5c28d9e96ccc09ec/coverage-7.13.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:060ebf6f2c51aff5ba38e1f43a2095e087389b1c69d559fde6049a4b0001320e", size = 218971, upload-time = "2026-01-25T12:57:36.953Z" },
+ { url = "https://files.pythonhosted.org/packages/eb/0e/b6489f344d99cd1e5b4d5e1be52dfd3f8a3dc5112aa6c33948da8cabad4e/coverage-7.13.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c1ea8ca9db5e7469cd364552985e15911548ea5b69c48a17291f0cac70484b2e", size = 219473, upload-time = "2026-01-25T12:57:38.934Z" },
+ { url = "https://files.pythonhosted.org/packages/17/11/db2f414915a8e4ec53f60b17956c27f21fb68fcf20f8a455ce7c2ccec638/coverage-7.13.2-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b780090d15fd58f07cf2011943e25a5f0c1c894384b13a216b6c86c8a8a7c508", size = 249896, upload-time = "2026-01-25T12:57:40.365Z" },
+ { url = "https://files.pythonhosted.org/packages/80/06/0823fe93913663c017e508e8810c998c8ebd3ec2a5a85d2c3754297bdede/coverage-7.13.2-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:88a800258d83acb803c38175b4495d293656d5fac48659c953c18e5f539a274b", size = 251810, upload-time = "2026-01-25T12:57:42.045Z" },
+ { url = "https://files.pythonhosted.org/packages/61/dc/b151c3cc41b28cdf7f0166c5fa1271cbc305a8ec0124cce4b04f74791a18/coverage-7.13.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6326e18e9a553e674d948536a04a80d850a5eeefe2aae2e6d7cf05d54046c01b", size = 253920, upload-time = "2026-01-25T12:57:44.026Z" },
+ { url = "https://files.pythonhosted.org/packages/2d/35/e83de0556e54a4729a2b94ea816f74ce08732e81945024adee46851c2264/coverage-7.13.2-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:59562de3f797979e1ff07c587e2ac36ba60ca59d16c211eceaa579c266c5022f", size = 250025, upload-time = "2026-01-25T12:57:45.624Z" },
+ { url = "https://files.pythonhosted.org/packages/39/67/af2eb9c3926ce3ea0d58a0d2516fcbdacf7a9fc9559fe63076beaf3f2596/coverage-7.13.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:27ba1ed6f66b0e2d61bfa78874dffd4f8c3a12f8e2b5410e515ab345ba7bc9c3", size = 251612, upload-time = "2026-01-25T12:57:47.713Z" },
+ { url = "https://files.pythonhosted.org/packages/26/62/5be2e25f3d6c711d23b71296f8b44c978d4c8b4e5b26871abfc164297502/coverage-7.13.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8be48da4d47cc68754ce643ea50b3234557cbefe47c2f120495e7bd0a2756f2b", size = 249670, upload-time = "2026-01-25T12:57:49.378Z" },
+ { url = "https://files.pythonhosted.org/packages/b3/51/400d1b09a8344199f9b6a6fc1868005d766b7ea95e7882e494fa862ca69c/coverage-7.13.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2a47a4223d3361b91176aedd9d4e05844ca67d7188456227b6bf5e436630c9a1", size = 249395, upload-time = "2026-01-25T12:57:50.86Z" },
+ { url = "https://files.pythonhosted.org/packages/e0/36/f02234bc6e5230e2f0a63fd125d0a2093c73ef20fdf681c7af62a140e4e7/coverage-7.13.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c6f141b468740197d6bd38f2b26ade124363228cc3f9858bd9924ab059e00059", size = 250298, upload-time = "2026-01-25T12:57:52.287Z" },
+ { url = "https://files.pythonhosted.org/packages/b0/06/713110d3dd3151b93611c9cbfc65c15b4156b44f927fced49ac0b20b32a4/coverage-7.13.2-cp311-cp311-win32.whl", hash = "sha256:89567798404af067604246e01a49ef907d112edf2b75ef814b1364d5ce267031", size = 221485, upload-time = "2026-01-25T12:57:53.876Z" },
+ { url = "https://files.pythonhosted.org/packages/16/0c/3ae6255fa1ebcb7dec19c9a59e85ef5f34566d1265c70af5b2fc981da834/coverage-7.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:21dd57941804ae2ac7e921771a5e21bbf9aabec317a041d164853ad0a96ce31e", size = 222421, upload-time = "2026-01-25T12:57:55.433Z" },
+ { url = "https://files.pythonhosted.org/packages/b5/37/fabc3179af4d61d89ea47bd04333fec735cd5e8b59baad44fed9fc4170d7/coverage-7.13.2-cp311-cp311-win_arm64.whl", hash = "sha256:10758e0586c134a0bafa28f2d37dd2cdb5e4a90de25c0fc0c77dabbad46eca28", size = 221088, upload-time = "2026-01-25T12:57:57.41Z" },
+ { url = "https://files.pythonhosted.org/packages/46/39/e92a35f7800222d3f7b2cbb7bbc3b65672ae8d501cb31801b2d2bd7acdf1/coverage-7.13.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f106b2af193f965d0d3234f3f83fc35278c7fb935dfbde56ae2da3dd2c03b84d", size = 219142, upload-time = "2026-01-25T12:58:00.448Z" },
+ { url = "https://files.pythonhosted.org/packages/45/7a/8bf9e9309c4c996e65c52a7c5a112707ecdd9fbaf49e10b5a705a402bbb4/coverage-7.13.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:78f45d21dc4d5d6bd29323f0320089ef7eae16e4bef712dff79d184fa7330af3", size = 219503, upload-time = "2026-01-25T12:58:02.451Z" },
+ { url = "https://files.pythonhosted.org/packages/87/93/17661e06b7b37580923f3f12406ac91d78aeed293fb6da0b69cc7957582f/coverage-7.13.2-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:fae91dfecd816444c74531a9c3d6ded17a504767e97aa674d44f638107265b99", size = 251006, upload-time = "2026-01-25T12:58:04.059Z" },
+ { url = "https://files.pythonhosted.org/packages/12/f0/f9e59fb8c310171497f379e25db060abef9fa605e09d63157eebec102676/coverage-7.13.2-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:264657171406c114787b441484de620e03d8f7202f113d62fcd3d9688baa3e6f", size = 253750, upload-time = "2026-01-25T12:58:05.574Z" },
+ { url = "https://files.pythonhosted.org/packages/e5/b1/1935e31add2232663cf7edd8269548b122a7d100047ff93475dbaaae673e/coverage-7.13.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae47d8dcd3ded0155afbb59c62bd8ab07ea0fd4902e1c40567439e6db9dcaf2f", size = 254862, upload-time = "2026-01-25T12:58:07.647Z" },
+ { url = "https://files.pythonhosted.org/packages/af/59/b5e97071ec13df5f45da2b3391b6cdbec78ba20757bc92580a5b3d5fa53c/coverage-7.13.2-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8a0b33e9fd838220b007ce8f299114d406c1e8edb21336af4c97a26ecfd185aa", size = 251420, upload-time = "2026-01-25T12:58:09.309Z" },
+ { url = "https://files.pythonhosted.org/packages/3f/75/9495932f87469d013dc515fb0ce1aac5fa97766f38f6b1a1deb1ee7b7f3a/coverage-7.13.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b3becbea7f3ce9a2d4d430f223ec15888e4deb31395840a79e916368d6004cce", size = 252786, upload-time = "2026-01-25T12:58:10.909Z" },
+ { url = "https://files.pythonhosted.org/packages/6a/59/af550721f0eb62f46f7b8cb7e6f1860592189267b1c411a4e3a057caacee/coverage-7.13.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:f819c727a6e6eeb8711e4ce63d78c620f69630a2e9d53bc95ca5379f57b6ba94", size = 250928, upload-time = "2026-01-25T12:58:12.449Z" },
+ { url = "https://files.pythonhosted.org/packages/9b/b1/21b4445709aae500be4ab43bbcfb4e53dc0811c3396dcb11bf9f23fd0226/coverage-7.13.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:4f7b71757a3ab19f7ba286e04c181004c1d61be921795ee8ba6970fd0ec91da5", size = 250496, upload-time = "2026-01-25T12:58:14.047Z" },
+ { url = "https://files.pythonhosted.org/packages/ba/b1/0f5d89dfe0392990e4f3980adbde3eb34885bc1effb2dc369e0bf385e389/coverage-7.13.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b7fc50d2afd2e6b4f6f2f403b70103d280a8e0cb35320cbbe6debcda02a1030b", size = 252373, upload-time = "2026-01-25T12:58:15.976Z" },
+ { url = "https://files.pythonhosted.org/packages/01/c9/0cf1a6a57a9968cc049a6b896693faa523c638a5314b1fc374eb2b2ac904/coverage-7.13.2-cp312-cp312-win32.whl", hash = "sha256:292250282cf9bcf206b543d7608bda17ca6fc151f4cbae949fc7e115112fbd41", size = 221696, upload-time = "2026-01-25T12:58:17.517Z" },
+ { url = "https://files.pythonhosted.org/packages/4d/05/d7540bf983f09d32803911afed135524570f8c47bb394bf6206c1dc3a786/coverage-7.13.2-cp312-cp312-win_amd64.whl", hash = "sha256:eeea10169fac01549a7921d27a3e517194ae254b542102267bef7a93ed38c40e", size = 222504, upload-time = "2026-01-25T12:58:19.115Z" },
+ { url = "https://files.pythonhosted.org/packages/15/8b/1a9f037a736ced0a12aacf6330cdaad5008081142a7070bc58b0f7930cbc/coverage-7.13.2-cp312-cp312-win_arm64.whl", hash = "sha256:2a5b567f0b635b592c917f96b9a9cb3dbd4c320d03f4bf94e9084e494f2e8894", size = 221120, upload-time = "2026-01-25T12:58:21.334Z" },
+ { url = "https://files.pythonhosted.org/packages/a7/f0/3d3eac7568ab6096ff23791a526b0048a1ff3f49d0e236b2af6fb6558e88/coverage-7.13.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ed75de7d1217cf3b99365d110975f83af0528c849ef5180a12fd91b5064df9d6", size = 219168, upload-time = "2026-01-25T12:58:23.376Z" },
+ { url = "https://files.pythonhosted.org/packages/a3/a6/f8b5cfeddbab95fdef4dcd682d82e5dcff7a112ced57a959f89537ee9995/coverage-7.13.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:97e596de8fa9bada4d88fde64a3f4d37f1b6131e4faa32bad7808abc79887ddc", size = 219537, upload-time = "2026-01-25T12:58:24.932Z" },
+ { url = "https://files.pythonhosted.org/packages/7b/e6/8d8e6e0c516c838229d1e41cadcec91745f4b1031d4db17ce0043a0423b4/coverage-7.13.2-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:68c86173562ed4413345410c9480a8d64864ac5e54a5cda236748031e094229f", size = 250528, upload-time = "2026-01-25T12:58:26.567Z" },
+ { url = "https://files.pythonhosted.org/packages/8e/78/befa6640f74092b86961f957f26504c8fba3d7da57cc2ab7407391870495/coverage-7.13.2-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7be4d613638d678b2b3773b8f687537b284d7074695a43fe2fbbfc0e31ceaed1", size = 253132, upload-time = "2026-01-25T12:58:28.251Z" },
+ { url = "https://files.pythonhosted.org/packages/9d/10/1630db1edd8ce675124a2ee0f7becc603d2bb7b345c2387b4b95c6907094/coverage-7.13.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7f63ce526a96acd0e16c4af8b50b64334239550402fb1607ce6a584a6d62ce9", size = 254374, upload-time = "2026-01-25T12:58:30.294Z" },
+ { url = "https://files.pythonhosted.org/packages/ed/1d/0d9381647b1e8e6d310ac4140be9c428a0277330991e0c35bdd751e338a4/coverage-7.13.2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:406821f37f864f968e29ac14c3fccae0fec9fdeba48327f0341decf4daf92d7c", size = 250762, upload-time = "2026-01-25T12:58:32.036Z" },
+ { url = "https://files.pythonhosted.org/packages/43/e4/5636dfc9a7c871ee8776af83ee33b4c26bc508ad6cee1e89b6419a366582/coverage-7.13.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ee68e5a4e3e5443623406b905db447dceddffee0dceb39f4e0cd9ec2a35004b5", size = 252502, upload-time = "2026-01-25T12:58:33.961Z" },
+ { url = "https://files.pythonhosted.org/packages/02/2a/7ff2884d79d420cbb2d12fed6fff727b6d0ef27253140d3cdbbd03187ee0/coverage-7.13.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2ee0e58cca0c17dd9c6c1cdde02bb705c7b3fbfa5f3b0b5afeda20d4ebff8ef4", size = 250463, upload-time = "2026-01-25T12:58:35.529Z" },
+ { url = "https://files.pythonhosted.org/packages/91/c0/ba51087db645b6c7261570400fc62c89a16278763f36ba618dc8657a187b/coverage-7.13.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:6e5bbb5018bf76a56aabdb64246b5288d5ae1b7d0dd4d0534fe86df2c2992d1c", size = 250288, upload-time = "2026-01-25T12:58:37.226Z" },
+ { url = "https://files.pythonhosted.org/packages/03/07/44e6f428551c4d9faf63ebcefe49b30e5c89d1be96f6a3abd86a52da9d15/coverage-7.13.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a55516c68ef3e08e134e818d5e308ffa6b1337cc8b092b69b24287bf07d38e31", size = 252063, upload-time = "2026-01-25T12:58:38.821Z" },
+ { url = "https://files.pythonhosted.org/packages/c2/67/35b730ad7e1859dd57e834d1bc06080d22d2f87457d53f692fce3f24a5a9/coverage-7.13.2-cp313-cp313-win32.whl", hash = "sha256:5b20211c47a8abf4abc3319d8ce2464864fa9f30c5fcaf958a3eed92f4f1fef8", size = 221716, upload-time = "2026-01-25T12:58:40.484Z" },
+ { url = "https://files.pythonhosted.org/packages/0d/82/e5fcf5a97c72f45fc14829237a6550bf49d0ab882ac90e04b12a69db76b4/coverage-7.13.2-cp313-cp313-win_amd64.whl", hash = "sha256:14f500232e521201cf031549fb1ebdfc0a40f401cf519157f76c397e586c3beb", size = 222522, upload-time = "2026-01-25T12:58:43.247Z" },
+ { url = "https://files.pythonhosted.org/packages/b1/f1/25d7b2f946d239dd2d6644ca2cc060d24f97551e2af13b6c24c722ae5f97/coverage-7.13.2-cp313-cp313-win_arm64.whl", hash = "sha256:9779310cb5a9778a60c899f075a8514c89fa6d10131445c2207fc893e0b14557", size = 221145, upload-time = "2026-01-25T12:58:45Z" },
+ { url = "https://files.pythonhosted.org/packages/9e/f7/080376c029c8f76fadfe43911d0daffa0cbdc9f9418a0eead70c56fb7f4b/coverage-7.13.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e64fa5a1e41ce5df6b547cbc3d3699381c9e2c2c369c67837e716ed0f549d48e", size = 219861, upload-time = "2026-01-25T12:58:46.586Z" },
+ { url = "https://files.pythonhosted.org/packages/42/11/0b5e315af5ab35f4c4a70e64d3314e4eec25eefc6dec13be3a7d5ffe8ac5/coverage-7.13.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b01899e82a04085b6561eb233fd688474f57455e8ad35cd82286463ba06332b7", size = 220207, upload-time = "2026-01-25T12:58:48.277Z" },
+ { url = "https://files.pythonhosted.org/packages/b2/0c/0874d0318fb1062117acbef06a09cf8b63f3060c22265adaad24b36306b7/coverage-7.13.2-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:838943bea48be0e2768b0cf7819544cdedc1bbb2f28427eabb6eb8c9eb2285d3", size = 261504, upload-time = "2026-01-25T12:58:49.904Z" },
+ { url = "https://files.pythonhosted.org/packages/83/5e/1cd72c22ecb30751e43a72f40ba50fcef1b7e93e3ea823bd9feda8e51f9a/coverage-7.13.2-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:93d1d25ec2b27e90bcfef7012992d1f5121b51161b8bffcda756a816cf13c2c3", size = 263582, upload-time = "2026-01-25T12:58:51.582Z" },
+ { url = "https://files.pythonhosted.org/packages/9b/da/8acf356707c7a42df4d0657020308e23e5a07397e81492640c186268497c/coverage-7.13.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93b57142f9621b0d12349c43fc7741fe578e4bc914c1e5a54142856cfc0bf421", size = 266008, upload-time = "2026-01-25T12:58:53.234Z" },
+ { url = "https://files.pythonhosted.org/packages/41/41/ea1730af99960309423c6ea8d6a4f1fa5564b2d97bd1d29dda4b42611f04/coverage-7.13.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f06799ae1bdfff7ccb8665d75f8291c69110ba9585253de254688aa8a1ccc6c5", size = 260762, upload-time = "2026-01-25T12:58:55.372Z" },
+ { url = "https://files.pythonhosted.org/packages/22/fa/02884d2080ba71db64fdc127b311db60e01fe6ba797d9c8363725e39f4d5/coverage-7.13.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:7f9405ab4f81d490811b1d91c7a20361135a2df4c170e7f0b747a794da5b7f23", size = 263571, upload-time = "2026-01-25T12:58:57.52Z" },
+ { url = "https://files.pythonhosted.org/packages/d2/6b/4083aaaeba9b3112f55ac57c2ce7001dc4d8fa3fcc228a39f09cc84ede27/coverage-7.13.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f9ab1d5b86f8fbc97a5b3cd6280a3fd85fef3b028689d8a2c00918f0d82c728c", size = 261200, upload-time = "2026-01-25T12:58:59.255Z" },
+ { url = "https://files.pythonhosted.org/packages/e9/d2/aea92fa36d61955e8c416ede9cf9bf142aa196f3aea214bb67f85235a050/coverage-7.13.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:f674f59712d67e841525b99e5e2b595250e39b529c3bda14764e4f625a3fa01f", size = 260095, upload-time = "2026-01-25T12:59:01.066Z" },
+ { url = "https://files.pythonhosted.org/packages/0d/ae/04ffe96a80f107ea21b22b2367175c621da920063260a1c22f9452fd7866/coverage-7.13.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c6cadac7b8ace1ba9144feb1ae3cb787a6065ba6d23ffc59a934b16406c26573", size = 262284, upload-time = "2026-01-25T12:59:02.802Z" },
+ { url = "https://files.pythonhosted.org/packages/1c/7a/6f354dcd7dfc41297791d6fb4e0d618acb55810bde2c1fd14b3939e05c2b/coverage-7.13.2-cp313-cp313t-win32.whl", hash = "sha256:14ae4146465f8e6e6253eba0cccd57423e598a4cb925958b240c805300918343", size = 222389, upload-time = "2026-01-25T12:59:04.563Z" },
+ { url = "https://files.pythonhosted.org/packages/8d/d5/080ad292a4a3d3daf411574be0a1f56d6dee2c4fdf6b005342be9fac807f/coverage-7.13.2-cp313-cp313t-win_amd64.whl", hash = "sha256:9074896edd705a05769e3de0eac0a8388484b503b68863dd06d5e473f874fd47", size = 223450, upload-time = "2026-01-25T12:59:06.677Z" },
+ { url = "https://files.pythonhosted.org/packages/88/96/df576fbacc522e9fb8d1c4b7a7fc62eb734be56e2cba1d88d2eabe08ea3f/coverage-7.13.2-cp313-cp313t-win_arm64.whl", hash = "sha256:69e526e14f3f854eda573d3cf40cffd29a1a91c684743d904c33dbdcd0e0f3e7", size = 221707, upload-time = "2026-01-25T12:59:08.363Z" },
+ { url = "https://files.pythonhosted.org/packages/55/53/1da9e51a0775634b04fcc11eb25c002fc58ee4f92ce2e8512f94ac5fc5bf/coverage-7.13.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:387a825f43d680e7310e6f325b2167dd093bc8ffd933b83e9aa0983cf6e0a2ef", size = 219213, upload-time = "2026-01-25T12:59:11.909Z" },
+ { url = "https://files.pythonhosted.org/packages/46/35/b3caac3ebbd10230fea5a33012b27d19e999a17c9285c4228b4b2e35b7da/coverage-7.13.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f0d7fea9d8e5d778cd5a9e8fc38308ad688f02040e883cdc13311ef2748cb40f", size = 219549, upload-time = "2026-01-25T12:59:13.638Z" },
+ { url = "https://files.pythonhosted.org/packages/76/9c/e1cf7def1bdc72c1907e60703983a588f9558434a2ff94615747bd73c192/coverage-7.13.2-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e080afb413be106c95c4ee96b4fffdc9e2fa56a8bbf90b5c0918e5c4449412f5", size = 250586, upload-time = "2026-01-25T12:59:15.808Z" },
+ { url = "https://files.pythonhosted.org/packages/ba/49/f54ec02ed12be66c8d8897270505759e057b0c68564a65c429ccdd1f139e/coverage-7.13.2-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a7fc042ba3c7ce25b8a9f097eb0f32a5ce1ccdb639d9eec114e26def98e1f8a4", size = 253093, upload-time = "2026-01-25T12:59:17.491Z" },
+ { url = "https://files.pythonhosted.org/packages/fb/5e/aaf86be3e181d907e23c0f61fccaeb38de8e6f6b47aed92bf57d8fc9c034/coverage-7.13.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d0ba505e021557f7f8173ee8cd6b926373d8653e5ff7581ae2efce1b11ef4c27", size = 254446, upload-time = "2026-01-25T12:59:19.752Z" },
+ { url = "https://files.pythonhosted.org/packages/28/c8/a5fa01460e2d75b0c853b392080d6829d3ca8b5ab31e158fa0501bc7c708/coverage-7.13.2-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7de326f80e3451bd5cc7239ab46c73ddb658fe0b7649476bc7413572d36cd548", size = 250615, upload-time = "2026-01-25T12:59:21.928Z" },
+ { url = "https://files.pythonhosted.org/packages/86/0b/6d56315a55f7062bb66410732c24879ccb2ec527ab6630246de5fe45a1df/coverage-7.13.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:abaea04f1e7e34841d4a7b343904a3f59481f62f9df39e2cd399d69a187a9660", size = 252452, upload-time = "2026-01-25T12:59:23.592Z" },
+ { url = "https://files.pythonhosted.org/packages/30/19/9bc550363ebc6b0ea121977ee44d05ecd1e8bf79018b8444f1028701c563/coverage-7.13.2-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9f93959ee0c604bccd8e0697be21de0887b1f73efcc3aa73a3ec0fd13feace92", size = 250418, upload-time = "2026-01-25T12:59:25.392Z" },
+ { url = "https://files.pythonhosted.org/packages/1f/53/580530a31ca2f0cc6f07a8f2ab5460785b02bb11bdf815d4c4d37a4c5169/coverage-7.13.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:13fe81ead04e34e105bf1b3c9f9cdf32ce31736ee5d90a8d2de02b9d3e1bcb82", size = 250231, upload-time = "2026-01-25T12:59:27.888Z" },
+ { url = "https://files.pythonhosted.org/packages/e2/42/dd9093f919dc3088cb472893651884bd675e3df3d38a43f9053656dca9a2/coverage-7.13.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d6d16b0f71120e365741bca2cb473ca6fe38930bc5431c5e850ba949f708f892", size = 251888, upload-time = "2026-01-25T12:59:29.636Z" },
+ { url = "https://files.pythonhosted.org/packages/fa/a6/0af4053e6e819774626e133c3d6f70fae4d44884bfc4b126cb647baee8d3/coverage-7.13.2-cp314-cp314-win32.whl", hash = "sha256:9b2f4714bb7d99ba3790ee095b3b4ac94767e1347fe424278a0b10acb3ff04fe", size = 221968, upload-time = "2026-01-25T12:59:31.424Z" },
+ { url = "https://files.pythonhosted.org/packages/c4/cc/5aff1e1f80d55862442855517bb8ad8ad3a68639441ff6287dde6a58558b/coverage-7.13.2-cp314-cp314-win_amd64.whl", hash = "sha256:e4121a90823a063d717a96e0a0529c727fb31ea889369a0ee3ec00ed99bf6859", size = 222783, upload-time = "2026-01-25T12:59:33.118Z" },
+ { url = "https://files.pythonhosted.org/packages/de/20/09abafb24f84b3292cc658728803416c15b79f9ee5e68d25238a895b07d9/coverage-7.13.2-cp314-cp314-win_arm64.whl", hash = "sha256:6873f0271b4a15a33e7590f338d823f6f66f91ed147a03938d7ce26efd04eee6", size = 221348, upload-time = "2026-01-25T12:59:34.939Z" },
+ { url = "https://files.pythonhosted.org/packages/b6/60/a3820c7232db63be060e4019017cd3426751c2699dab3c62819cdbcea387/coverage-7.13.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f61d349f5b7cd95c34017f1927ee379bfbe9884300d74e07cf630ccf7a610c1b", size = 219950, upload-time = "2026-01-25T12:59:36.624Z" },
+ { url = "https://files.pythonhosted.org/packages/fd/37/e4ef5975fdeb86b1e56db9a82f41b032e3d93a840ebaf4064f39e770d5c5/coverage-7.13.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a43d34ce714f4ca674c0d90beb760eb05aad906f2c47580ccee9da8fe8bfb417", size = 220209, upload-time = "2026-01-25T12:59:38.339Z" },
+ { url = "https://files.pythonhosted.org/packages/54/df/d40e091d00c51adca1e251d3b60a8b464112efa3004949e96a74d7c19a64/coverage-7.13.2-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bff1b04cb9d4900ce5c56c4942f047dc7efe57e2608cb7c3c8936e9970ccdbee", size = 261576, upload-time = "2026-01-25T12:59:40.446Z" },
+ { url = "https://files.pythonhosted.org/packages/c5/44/5259c4bed54e3392e5c176121af9f71919d96dde853386e7730e705f3520/coverage-7.13.2-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6ae99e4560963ad8e163e819e5d77d413d331fd00566c1e0856aa252303552c1", size = 263704, upload-time = "2026-01-25T12:59:42.346Z" },
+ { url = "https://files.pythonhosted.org/packages/16/bd/ae9f005827abcbe2c70157459ae86053971c9fa14617b63903abbdce26d9/coverage-7.13.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e79a8c7d461820257d9aa43716c4efc55366d7b292e46b5b37165be1d377405d", size = 266109, upload-time = "2026-01-25T12:59:44.073Z" },
+ { url = "https://files.pythonhosted.org/packages/a2/c0/8e279c1c0f5b1eaa3ad9b0fb7a5637fc0379ea7d85a781c0fe0bb3cfc2ab/coverage-7.13.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:060ee84f6a769d40c492711911a76811b4befb6fba50abb450371abb720f5bd6", size = 260686, upload-time = "2026-01-25T12:59:45.804Z" },
+ { url = "https://files.pythonhosted.org/packages/b2/47/3a8112627e9d863e7cddd72894171c929e94491a597811725befdcd76bce/coverage-7.13.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3bca209d001fd03ea2d978f8a4985093240a355c93078aee3f799852c23f561a", size = 263568, upload-time = "2026-01-25T12:59:47.929Z" },
+ { url = "https://files.pythonhosted.org/packages/92/bc/7ea367d84afa3120afc3ce6de294fd2dcd33b51e2e7fbe4bbfd200f2cb8c/coverage-7.13.2-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:6b8092aa38d72f091db61ef83cb66076f18f02da3e1a75039a4f218629600e04", size = 261174, upload-time = "2026-01-25T12:59:49.717Z" },
+ { url = "https://files.pythonhosted.org/packages/33/b7/f1092dcecb6637e31cc2db099581ee5c61a17647849bae6b8261a2b78430/coverage-7.13.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:4a3158dc2dcce5200d91ec28cd315c999eebff355437d2765840555d765a6e5f", size = 260017, upload-time = "2026-01-25T12:59:51.463Z" },
+ { url = "https://files.pythonhosted.org/packages/2b/cd/f3d07d4b95fbe1a2ef0958c15da614f7e4f557720132de34d2dc3aa7e911/coverage-7.13.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3973f353b2d70bd9796cc12f532a05945232ccae966456c8ed7034cb96bbfd6f", size = 262337, upload-time = "2026-01-25T12:59:53.407Z" },
+ { url = "https://files.pythonhosted.org/packages/e0/db/b0d5b2873a07cb1e06a55d998697c0a5a540dcefbf353774c99eb3874513/coverage-7.13.2-cp314-cp314t-win32.whl", hash = "sha256:79f6506a678a59d4ded048dc72f1859ebede8ec2b9a2d509ebe161f01c2879d3", size = 222749, upload-time = "2026-01-25T12:59:56.316Z" },
+ { url = "https://files.pythonhosted.org/packages/e5/2f/838a5394c082ac57d85f57f6aba53093b30d9089781df72412126505716f/coverage-7.13.2-cp314-cp314t-win_amd64.whl", hash = "sha256:196bfeabdccc5a020a57d5a368c681e3a6ceb0447d153aeccc1ab4d70a5032ba", size = 223857, upload-time = "2026-01-25T12:59:58.201Z" },
+ { url = "https://files.pythonhosted.org/packages/44/d4/b608243e76ead3a4298824b50922b89ef793e50069ce30316a65c1b4d7ef/coverage-7.13.2-cp314-cp314t-win_arm64.whl", hash = "sha256:69269ab58783e090bfbf5b916ab3d188126e22d6070bbfc93098fdd474ef937c", size = 221881, upload-time = "2026-01-25T13:00:00.449Z" },
+ { url = "https://files.pythonhosted.org/packages/d2/db/d291e30fdf7ea617a335531e72294e0c723356d7fdde8fba00610a76bda9/coverage-7.13.2-py3-none-any.whl", hash = "sha256:40ce1ea1e25125556d8e76bd0b61500839a07944cc287ac21d5626f3e620cad5", size = 210943, upload-time = "2026-01-25T13:00:02.388Z" },
+]
+
+[package.optional-dependencies]
+toml = [
+ { name = "tomli", marker = "python_full_version <= '3.11'" },
+]
+
[[package]]
name = "exceptiongroup"
version = "1.3.1"
@@ -917,6 +1021,20 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
]
+[[package]]
+name = "pytest-cov"
+version = "7.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "coverage", extra = ["toml"] },
+ { name = "pluggy" },
+ { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
+]
+
[[package]]
name = "python-dateutil"
version = "2.9.0.post0"
@@ -1136,6 +1254,7 @@ dev = [
{ name = "mkdocstrings", extra = ["python"] },
{ name = "pyright" },
{ name = "pytest" },
+ { name = "pytest-cov" },
{ name = "ruff" },
]
@@ -1153,6 +1272,7 @@ dev = [
{ name = "mkdocstrings", extras = ["python"], specifier = ">=0.24.0" },
{ name = "pyright", specifier = ">=1.1.370,<1.1.374" },
{ name = "pytest", specifier = ">=8.2.2" },
+ { name = "pytest-cov", specifier = ">=7.0.0" },
{ name = "ruff", specifier = ">=0.4.9" },
]