From 2a13ed0122c635c3f97664ee3f78faa20ae6f9cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Gro=C3=9Fer?= Date: Thu, 29 Jan 2026 16:53:42 +0100 Subject: [PATCH 1/7] Minor text update --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7c990a9..b1be475 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ plan = ( .dt_age_years(column="date_of_birth", new_column="age") .math_clamp(column="age", min_value=0, max_value=120) - # Categorize patients + # Categorize patients age .map_discretize(column="age", bins=[18, 40, 65], labels=["young", "adult", "senior"], new_column="age_group") # Filter and clean From 749b017eace7980db537a763fa11409dc968e512 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Gro=C3=9Fer?= Date: Thu, 29 Jan 2026 16:59:55 +0100 Subject: [PATCH 2/7] coverage --- .coverage | Bin 0 -> 53248 bytes .github/workflows/tests.yml | 44 ++++++++++++- README.md | 2 + pyproject.toml | 1 + uv.lock | 120 ++++++++++++++++++++++++++++++++++++ 5 files changed, 164 insertions(+), 3 deletions(-) create mode 100644 .coverage diff --git a/.coverage b/.coverage new file mode 100644 index 0000000000000000000000000000000000000000..75369ae6de2b6903f9ebb98af2bc04d756283ad5 GIT binary patch literal 53248 zcmeI4eQXrR6~JeA?`!XNZ;1?c9Fuw#2aL=4j7ce9QEdv8FY}SqMhKc7>$|nRa9`~1 z@zJR99Z8z-M+*V9p)CTdDk?2%LPK4YiXvBOLTHjgS|tsFD3%gMX%r|WK|nxu-|X)B z&ICu14n%_9`tD|TcHW!!e)DE#=gzy|eV|1%RkS-gm-@P z!j3C{04Yp;P!@V3#nBZwqMN5FdsUxFG=~fCP}h zZ9^dUfZ(mDtYkL-&{W#vs;MhcRkuII*$>X0)jC&ho%QXOxw4%nPw>mI)z{0jWIeS( zPOG}yp~Y2MOUAUQVrt1w+3ZsDr89;aqjMJI!K_6KhAnxz7tvxMWvZQ^BCTr)Mc*W^ zRX2sK0#0qFy3w>VK#8h#CTR(If+Joh>uQIptI4Qp*j7x?Vs-wyN4x#rin2213DxpR z(WCGR4D$vu3a%{Z07cQ19;4Q`>q;`(r5Yhw8S*fi0y{UFwzqmJ(O{Gmo6Ms4bOAYU?uUP~(T> zf@H{YBbf@x)Cq`yaRE=o^fHD(RH_?|b#b_KiYOT~WgSPL@gt4!Av|`6&s$Ml%{;Nr z3KWVb(yp1t&<;N=Xoj*!2pxKUMsMrl_us1MQ9dR13C)v$Bc9u{tO zAVKIf@}7#h)gy(@PzXdChj#tBN{_dqs*2gnTVdq1mXmmgg;2gFuQQD1PbE~v$mscj zND>W5#7@Me$&h@pH zP5H@%4)d+h>GJ&xb`l9C8FS_vKi(aEMuhcn9N36X5 za2l9UtqCP&cX!$-MH)ABsh?42&qi|;Q#G}OD$^pCF;-x0Pz*V$t8mhpOD)5ISl6{y z*=pv)nfw@PdsA1-eO~O)CXO`HPr6F>(M!2pHRG+AIFZR^oE~fsP7wpH1QJT#SLcu& z7FpMvECkt77^UG7sW`YO@FIrv(5``wh<3BL+vxJ8oC_p>9bMov(iH+P+>ihgKmter z2_OL^fCP{L5S@NN3>#Hza@rkN^@u0!RP}AOR$R1dsp{ zKmthMD@j0Lxx1zOzlBU1#c)-X&i?>R4L9Bwp6my&Sm^^IeIQ-*uO?=UV{!`+qR-5|Iu{9nxLF*Mm<6XNn(* zzZ6@9E5frvyD$b$;)Vo}01`j~NB{{S0VIF~kieISz~Z0_?lo>`GvGGshIE$}*V5^5 zrw(_Ct9nCPPpwg-rqKWoKS>(!1c*dBt|YHHN~Mhk!_?tPC2;5UA`#S8m8uSIQZ}eE z;YKKfr;~MovV{RqR#~cL2_+4>mij?gO{u!z)^O`?b5OXL2ZbfKLNpc6B$CuV-;J;@ z648>H8HrH)yrtR)&nD2oVYrhY)GhLWx{{&RoQR)nCk*dU9;$q>d za6pI(<$sNB{{S z0VIF~kN^@u0!RP}AOR$R1a4~r9K#Wj{{GKMR|&juLjp(u2_OL^fCP{L5Lu5Z@M`6@Mi@1SfGr0!RP}AOR$R z1dsp{Kmter3EUC_xd}lx!*5>ZWuf{m_tiZM>-LULPd#<^2fN*;yUS0nTpNGWFL!Qx z>f!a;uKC}|%-HLWA4+TxN1byW<~Np|tJ=0Ba{j$>wU@{3n78CxKWjO^{eG5R(syRu zp%>npaqmUg|I-V{!Y4hhljB!CSO5Ier<2?3W$C={En{zk8Zz|=N1W}Dbw99q-c{QRAL$L3~t)Og?&H`x!R zJ46aB>qlGz;U zd2sK-pH42@w_}U-r=8c%|8`0ptv8nKc0DtV4A%5tVj(h0<&Mv;ykGq&oNuwD?>6)9NPur)=jZvFr|h zT3vr^a^Kdz-)$kaZqSsixYq+x3we+_G4Vil<&HJIvhxHx#_ z%9(e`BYzH;Pu@A5G;tp`q}!&vxpJ+$;nKy64W5giZe_c~Z4>s%zJaSv&vqYNMt1-E z@uLeC|LbFpXIwmA4rKZ&to#3^9wJ?lK9Sy+o{`Q-zma;RccizZ*QLKpFGz=_UTPk0 zNB{{S0VIF~kN^@u0!RP}AOR$R1dzbZBS63QV7x&q@rYL9609T`uoBU4B?50H0iTul zy;j0|ti01`j~NB{{S0VIF~kN^@u0!RP} z+sxp!l)YwAOR$R X1dsp{Kmter2_OL^fCP}httaq*8QizU literal 0 HcmV?d00001 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index dc14d51..a796f54 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,6 +1,8 @@ -name: Test Pull Request +name: Tests on: + push: + branches: [main] pull_request: jobs: @@ -23,5 +25,41 @@ jobs: - name: Install dependencies run: uv sync --group dev - - name: Run tests - run: uv run pytest -vv -W error + - name: Run tests with coverage + run: uv run pytest -vv -W error --cov=transformplan --cov-report=xml --cov-report=term + + coverage-badge: + needs: test + runs-on: ubuntu-22.04 + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + permissions: + contents: write + + steps: + - uses: actions/checkout@v5 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + cache-dependency-glob: pyproject.toml + python-version: "3.12" + + - name: Install dependencies + run: uv sync --group dev + + - name: Generate coverage report + run: uv run pytest --cov=transformplan --cov-report=xml --cov-report=term + + - name: Create coverage badge + uses: tj-actions/coverage-badge-py@v2 + with: + output: coverage.svg + + - name: Commit badge + run: | + git config --local user.email "github-actions[bot]@users.noreply.github.com" + git config --local user.name "github-actions[bot]" + git add coverage.svg + git diff --staged --quiet || git commit -m "chore: update coverage badge [skip ci]" + git push diff --git a/README.md b/README.md index b1be475..b5e2a95 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +![Coverage](./coverage.svg) + # TransformPlan: Auditable Data Transformation Pipelines diff --git a/pyproject.toml b/pyproject.toml index 37cbece..09f37f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ dev = [ "mkdocs>=1.6.0", "mkdocs-material>=9.5.0", "mkdocstrings[python]>=0.24.0", + "pytest-cov>=7.0.0", ] [build-system] diff --git a/uv.lock b/uv.lock index d70dd8d..d1ab366 100644 --- a/uv.lock +++ b/uv.lock @@ -153,6 +153,110 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] +[[package]] +name = "coverage" +version = "7.13.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ad/49/349848445b0e53660e258acbcc9b0d014895b6739237920886672240f84b/coverage-7.13.2.tar.gz", hash = "sha256:044c6951ec37146b72a50cc81ef02217d27d4c3640efd2640311393cbbf143d3", size = 826523, upload-time = "2026-01-25T13:00:04.889Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/2d/63e37369c8e81a643afe54f76073b020f7b97ddbe698c5c944b51b0a2bc5/coverage-7.13.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f4af3b01763909f477ea17c962e2cca8f39b350a4e46e3a30838b2c12e31b81b", size = 218842, upload-time = "2026-01-25T12:57:15.3Z" }, + { url = "https://files.pythonhosted.org/packages/57/06/86ce882a8d58cbcb3030e298788988e618da35420d16a8c66dac34f138d0/coverage-7.13.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:36393bd2841fa0b59498f75466ee9bdec4f770d3254f031f23e8fd8e140ffdd2", size = 219360, upload-time = "2026-01-25T12:57:17.572Z" }, + { url = "https://files.pythonhosted.org/packages/cd/84/70b0eb1ee19ca4ef559c559054c59e5b2ae4ec9af61398670189e5d276e9/coverage-7.13.2-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9cc7573518b7e2186bd229b1a0fe24a807273798832c27032c4510f47ffdb896", size = 246123, upload-time = "2026-01-25T12:57:19.087Z" }, + { url = "https://files.pythonhosted.org/packages/35/fb/05b9830c2e8275ebc031e0019387cda99113e62bb500ab328bb72578183b/coverage-7.13.2-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ca9566769b69a5e216a4e176d54b9df88f29d750c5b78dbb899e379b4e14b30c", size = 247930, upload-time = "2026-01-25T12:57:20.929Z" }, + { url = "https://files.pythonhosted.org/packages/81/aa/3f37858ca2eed4f09b10ca3c6ddc9041be0a475626cd7fd2712f4a2d526f/coverage-7.13.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c9bdea644e94fd66d75a6f7e9a97bb822371e1fe7eadae2cacd50fcbc28e4dc", size = 249804, upload-time = "2026-01-25T12:57:22.904Z" }, + { url = "https://files.pythonhosted.org/packages/b6/b3/c904f40c56e60a2d9678a5ee8df3d906d297d15fb8bec5756c3b0a67e2df/coverage-7.13.2-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5bd447332ec4f45838c1ad42268ce21ca87c40deb86eabd59888859b66be22a5", size = 246815, upload-time = "2026-01-25T12:57:24.314Z" }, + { url = "https://files.pythonhosted.org/packages/41/91/ddc1c5394ca7fd086342486440bfdd6b9e9bda512bf774599c7c7a0081e0/coverage-7.13.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7c79ad5c28a16a1277e1187cf83ea8dafdcc689a784228a7d390f19776db7c31", size = 247843, upload-time = "2026-01-25T12:57:26.544Z" }, + { url = "https://files.pythonhosted.org/packages/87/d2/cdff8f4cd33697883c224ea8e003e9c77c0f1a837dc41d95a94dd26aad67/coverage-7.13.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:76e06ccacd1fb6ada5d076ed98a8c6f66e2e6acd3df02819e2ee29fd637b76ad", size = 245850, upload-time = "2026-01-25T12:57:28.507Z" }, + { url = "https://files.pythonhosted.org/packages/f5/42/e837febb7866bf2553ab53dd62ed52f9bb36d60c7e017c55376ad21fbb05/coverage-7.13.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:49d49e9a5e9f4dc3d3dac95278a020afa6d6bdd41f63608a76fa05a719d5b66f", size = 246116, upload-time = "2026-01-25T12:57:30.16Z" }, + { url = "https://files.pythonhosted.org/packages/09/b1/4a3f935d7df154df02ff4f71af8d61298d713a7ba305d050ae475bfbdde2/coverage-7.13.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ed2bce0e7bfa53f7b0b01c722da289ef6ad4c18ebd52b1f93704c21f116360c8", size = 246720, upload-time = "2026-01-25T12:57:32.165Z" }, + { url = "https://files.pythonhosted.org/packages/e1/fe/538a6fd44c515f1c5197a3f078094cbaf2ce9f945df5b44e29d95c864bff/coverage-7.13.2-cp310-cp310-win32.whl", hash = "sha256:1574983178b35b9af4db4a9f7328a18a14a0a0ce76ffaa1c1bacb4cc82089a7c", size = 221465, upload-time = "2026-01-25T12:57:33.511Z" }, + { url = "https://files.pythonhosted.org/packages/5e/09/4b63a024295f326ec1a40ec8def27799300ce8775b1cbf0d33b1790605c4/coverage-7.13.2-cp310-cp310-win_amd64.whl", hash = "sha256:a360a8baeb038928ceb996f5623a4cd508728f8f13e08d4e96ce161702f3dd99", size = 222397, upload-time = "2026-01-25T12:57:34.927Z" }, + { url = "https://files.pythonhosted.org/packages/6c/01/abca50583a8975bb6e1c59eff67ed8e48bb127c07dad5c28d9e96ccc09ec/coverage-7.13.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:060ebf6f2c51aff5ba38e1f43a2095e087389b1c69d559fde6049a4b0001320e", size = 218971, upload-time = "2026-01-25T12:57:36.953Z" }, + { url = "https://files.pythonhosted.org/packages/eb/0e/b6489f344d99cd1e5b4d5e1be52dfd3f8a3dc5112aa6c33948da8cabad4e/coverage-7.13.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c1ea8ca9db5e7469cd364552985e15911548ea5b69c48a17291f0cac70484b2e", size = 219473, upload-time = "2026-01-25T12:57:38.934Z" }, + { url = "https://files.pythonhosted.org/packages/17/11/db2f414915a8e4ec53f60b17956c27f21fb68fcf20f8a455ce7c2ccec638/coverage-7.13.2-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b780090d15fd58f07cf2011943e25a5f0c1c894384b13a216b6c86c8a8a7c508", size = 249896, upload-time = "2026-01-25T12:57:40.365Z" }, + { url = "https://files.pythonhosted.org/packages/80/06/0823fe93913663c017e508e8810c998c8ebd3ec2a5a85d2c3754297bdede/coverage-7.13.2-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:88a800258d83acb803c38175b4495d293656d5fac48659c953c18e5f539a274b", size = 251810, upload-time = "2026-01-25T12:57:42.045Z" }, + { url = "https://files.pythonhosted.org/packages/61/dc/b151c3cc41b28cdf7f0166c5fa1271cbc305a8ec0124cce4b04f74791a18/coverage-7.13.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6326e18e9a553e674d948536a04a80d850a5eeefe2aae2e6d7cf05d54046c01b", size = 253920, upload-time = "2026-01-25T12:57:44.026Z" }, + { url = "https://files.pythonhosted.org/packages/2d/35/e83de0556e54a4729a2b94ea816f74ce08732e81945024adee46851c2264/coverage-7.13.2-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:59562de3f797979e1ff07c587e2ac36ba60ca59d16c211eceaa579c266c5022f", size = 250025, upload-time = "2026-01-25T12:57:45.624Z" }, + { url = "https://files.pythonhosted.org/packages/39/67/af2eb9c3926ce3ea0d58a0d2516fcbdacf7a9fc9559fe63076beaf3f2596/coverage-7.13.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:27ba1ed6f66b0e2d61bfa78874dffd4f8c3a12f8e2b5410e515ab345ba7bc9c3", size = 251612, upload-time = "2026-01-25T12:57:47.713Z" }, + { url = "https://files.pythonhosted.org/packages/26/62/5be2e25f3d6c711d23b71296f8b44c978d4c8b4e5b26871abfc164297502/coverage-7.13.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8be48da4d47cc68754ce643ea50b3234557cbefe47c2f120495e7bd0a2756f2b", size = 249670, upload-time = "2026-01-25T12:57:49.378Z" }, + { url = "https://files.pythonhosted.org/packages/b3/51/400d1b09a8344199f9b6a6fc1868005d766b7ea95e7882e494fa862ca69c/coverage-7.13.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2a47a4223d3361b91176aedd9d4e05844ca67d7188456227b6bf5e436630c9a1", size = 249395, upload-time = "2026-01-25T12:57:50.86Z" }, + { url = "https://files.pythonhosted.org/packages/e0/36/f02234bc6e5230e2f0a63fd125d0a2093c73ef20fdf681c7af62a140e4e7/coverage-7.13.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c6f141b468740197d6bd38f2b26ade124363228cc3f9858bd9924ab059e00059", size = 250298, upload-time = "2026-01-25T12:57:52.287Z" }, + { url = "https://files.pythonhosted.org/packages/b0/06/713110d3dd3151b93611c9cbfc65c15b4156b44f927fced49ac0b20b32a4/coverage-7.13.2-cp311-cp311-win32.whl", hash = "sha256:89567798404af067604246e01a49ef907d112edf2b75ef814b1364d5ce267031", size = 221485, upload-time = "2026-01-25T12:57:53.876Z" }, + { url = "https://files.pythonhosted.org/packages/16/0c/3ae6255fa1ebcb7dec19c9a59e85ef5f34566d1265c70af5b2fc981da834/coverage-7.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:21dd57941804ae2ac7e921771a5e21bbf9aabec317a041d164853ad0a96ce31e", size = 222421, upload-time = "2026-01-25T12:57:55.433Z" }, + { url = "https://files.pythonhosted.org/packages/b5/37/fabc3179af4d61d89ea47bd04333fec735cd5e8b59baad44fed9fc4170d7/coverage-7.13.2-cp311-cp311-win_arm64.whl", hash = "sha256:10758e0586c134a0bafa28f2d37dd2cdb5e4a90de25c0fc0c77dabbad46eca28", size = 221088, upload-time = "2026-01-25T12:57:57.41Z" }, + { url = "https://files.pythonhosted.org/packages/46/39/e92a35f7800222d3f7b2cbb7bbc3b65672ae8d501cb31801b2d2bd7acdf1/coverage-7.13.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f106b2af193f965d0d3234f3f83fc35278c7fb935dfbde56ae2da3dd2c03b84d", size = 219142, upload-time = "2026-01-25T12:58:00.448Z" }, + { url = "https://files.pythonhosted.org/packages/45/7a/8bf9e9309c4c996e65c52a7c5a112707ecdd9fbaf49e10b5a705a402bbb4/coverage-7.13.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:78f45d21dc4d5d6bd29323f0320089ef7eae16e4bef712dff79d184fa7330af3", size = 219503, upload-time = "2026-01-25T12:58:02.451Z" }, + { url = "https://files.pythonhosted.org/packages/87/93/17661e06b7b37580923f3f12406ac91d78aeed293fb6da0b69cc7957582f/coverage-7.13.2-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:fae91dfecd816444c74531a9c3d6ded17a504767e97aa674d44f638107265b99", size = 251006, upload-time = "2026-01-25T12:58:04.059Z" }, + { url = "https://files.pythonhosted.org/packages/12/f0/f9e59fb8c310171497f379e25db060abef9fa605e09d63157eebec102676/coverage-7.13.2-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:264657171406c114787b441484de620e03d8f7202f113d62fcd3d9688baa3e6f", size = 253750, upload-time = "2026-01-25T12:58:05.574Z" }, + { url = "https://files.pythonhosted.org/packages/e5/b1/1935e31add2232663cf7edd8269548b122a7d100047ff93475dbaaae673e/coverage-7.13.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae47d8dcd3ded0155afbb59c62bd8ab07ea0fd4902e1c40567439e6db9dcaf2f", size = 254862, upload-time = "2026-01-25T12:58:07.647Z" }, + { url = "https://files.pythonhosted.org/packages/af/59/b5e97071ec13df5f45da2b3391b6cdbec78ba20757bc92580a5b3d5fa53c/coverage-7.13.2-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8a0b33e9fd838220b007ce8f299114d406c1e8edb21336af4c97a26ecfd185aa", size = 251420, upload-time = "2026-01-25T12:58:09.309Z" }, + { url = "https://files.pythonhosted.org/packages/3f/75/9495932f87469d013dc515fb0ce1aac5fa97766f38f6b1a1deb1ee7b7f3a/coverage-7.13.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b3becbea7f3ce9a2d4d430f223ec15888e4deb31395840a79e916368d6004cce", size = 252786, upload-time = "2026-01-25T12:58:10.909Z" }, + { url = "https://files.pythonhosted.org/packages/6a/59/af550721f0eb62f46f7b8cb7e6f1860592189267b1c411a4e3a057caacee/coverage-7.13.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:f819c727a6e6eeb8711e4ce63d78c620f69630a2e9d53bc95ca5379f57b6ba94", size = 250928, upload-time = "2026-01-25T12:58:12.449Z" }, + { url = "https://files.pythonhosted.org/packages/9b/b1/21b4445709aae500be4ab43bbcfb4e53dc0811c3396dcb11bf9f23fd0226/coverage-7.13.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:4f7b71757a3ab19f7ba286e04c181004c1d61be921795ee8ba6970fd0ec91da5", size = 250496, upload-time = "2026-01-25T12:58:14.047Z" }, + { url = "https://files.pythonhosted.org/packages/ba/b1/0f5d89dfe0392990e4f3980adbde3eb34885bc1effb2dc369e0bf385e389/coverage-7.13.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b7fc50d2afd2e6b4f6f2f403b70103d280a8e0cb35320cbbe6debcda02a1030b", size = 252373, upload-time = "2026-01-25T12:58:15.976Z" }, + { url = "https://files.pythonhosted.org/packages/01/c9/0cf1a6a57a9968cc049a6b896693faa523c638a5314b1fc374eb2b2ac904/coverage-7.13.2-cp312-cp312-win32.whl", hash = "sha256:292250282cf9bcf206b543d7608bda17ca6fc151f4cbae949fc7e115112fbd41", size = 221696, upload-time = "2026-01-25T12:58:17.517Z" }, + { url = "https://files.pythonhosted.org/packages/4d/05/d7540bf983f09d32803911afed135524570f8c47bb394bf6206c1dc3a786/coverage-7.13.2-cp312-cp312-win_amd64.whl", hash = "sha256:eeea10169fac01549a7921d27a3e517194ae254b542102267bef7a93ed38c40e", size = 222504, upload-time = "2026-01-25T12:58:19.115Z" }, + { url = "https://files.pythonhosted.org/packages/15/8b/1a9f037a736ced0a12aacf6330cdaad5008081142a7070bc58b0f7930cbc/coverage-7.13.2-cp312-cp312-win_arm64.whl", hash = "sha256:2a5b567f0b635b592c917f96b9a9cb3dbd4c320d03f4bf94e9084e494f2e8894", size = 221120, upload-time = "2026-01-25T12:58:21.334Z" }, + { url = "https://files.pythonhosted.org/packages/a7/f0/3d3eac7568ab6096ff23791a526b0048a1ff3f49d0e236b2af6fb6558e88/coverage-7.13.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ed75de7d1217cf3b99365d110975f83af0528c849ef5180a12fd91b5064df9d6", size = 219168, upload-time = "2026-01-25T12:58:23.376Z" }, + { url = "https://files.pythonhosted.org/packages/a3/a6/f8b5cfeddbab95fdef4dcd682d82e5dcff7a112ced57a959f89537ee9995/coverage-7.13.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:97e596de8fa9bada4d88fde64a3f4d37f1b6131e4faa32bad7808abc79887ddc", size = 219537, upload-time = "2026-01-25T12:58:24.932Z" }, + { url = "https://files.pythonhosted.org/packages/7b/e6/8d8e6e0c516c838229d1e41cadcec91745f4b1031d4db17ce0043a0423b4/coverage-7.13.2-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:68c86173562ed4413345410c9480a8d64864ac5e54a5cda236748031e094229f", size = 250528, upload-time = "2026-01-25T12:58:26.567Z" }, + { url = "https://files.pythonhosted.org/packages/8e/78/befa6640f74092b86961f957f26504c8fba3d7da57cc2ab7407391870495/coverage-7.13.2-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7be4d613638d678b2b3773b8f687537b284d7074695a43fe2fbbfc0e31ceaed1", size = 253132, upload-time = "2026-01-25T12:58:28.251Z" }, + { url = "https://files.pythonhosted.org/packages/9d/10/1630db1edd8ce675124a2ee0f7becc603d2bb7b345c2387b4b95c6907094/coverage-7.13.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7f63ce526a96acd0e16c4af8b50b64334239550402fb1607ce6a584a6d62ce9", size = 254374, upload-time = "2026-01-25T12:58:30.294Z" }, + { url = "https://files.pythonhosted.org/packages/ed/1d/0d9381647b1e8e6d310ac4140be9c428a0277330991e0c35bdd751e338a4/coverage-7.13.2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:406821f37f864f968e29ac14c3fccae0fec9fdeba48327f0341decf4daf92d7c", size = 250762, upload-time = "2026-01-25T12:58:32.036Z" }, + { url = "https://files.pythonhosted.org/packages/43/e4/5636dfc9a7c871ee8776af83ee33b4c26bc508ad6cee1e89b6419a366582/coverage-7.13.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ee68e5a4e3e5443623406b905db447dceddffee0dceb39f4e0cd9ec2a35004b5", size = 252502, upload-time = "2026-01-25T12:58:33.961Z" }, + { url = "https://files.pythonhosted.org/packages/02/2a/7ff2884d79d420cbb2d12fed6fff727b6d0ef27253140d3cdbbd03187ee0/coverage-7.13.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2ee0e58cca0c17dd9c6c1cdde02bb705c7b3fbfa5f3b0b5afeda20d4ebff8ef4", size = 250463, upload-time = "2026-01-25T12:58:35.529Z" }, + { url = "https://files.pythonhosted.org/packages/91/c0/ba51087db645b6c7261570400fc62c89a16278763f36ba618dc8657a187b/coverage-7.13.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:6e5bbb5018bf76a56aabdb64246b5288d5ae1b7d0dd4d0534fe86df2c2992d1c", size = 250288, upload-time = "2026-01-25T12:58:37.226Z" }, + { url = "https://files.pythonhosted.org/packages/03/07/44e6f428551c4d9faf63ebcefe49b30e5c89d1be96f6a3abd86a52da9d15/coverage-7.13.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a55516c68ef3e08e134e818d5e308ffa6b1337cc8b092b69b24287bf07d38e31", size = 252063, upload-time = "2026-01-25T12:58:38.821Z" }, + { url = "https://files.pythonhosted.org/packages/c2/67/35b730ad7e1859dd57e834d1bc06080d22d2f87457d53f692fce3f24a5a9/coverage-7.13.2-cp313-cp313-win32.whl", hash = "sha256:5b20211c47a8abf4abc3319d8ce2464864fa9f30c5fcaf958a3eed92f4f1fef8", size = 221716, upload-time = "2026-01-25T12:58:40.484Z" }, + { url = "https://files.pythonhosted.org/packages/0d/82/e5fcf5a97c72f45fc14829237a6550bf49d0ab882ac90e04b12a69db76b4/coverage-7.13.2-cp313-cp313-win_amd64.whl", hash = "sha256:14f500232e521201cf031549fb1ebdfc0a40f401cf519157f76c397e586c3beb", size = 222522, upload-time = "2026-01-25T12:58:43.247Z" }, + { url = "https://files.pythonhosted.org/packages/b1/f1/25d7b2f946d239dd2d6644ca2cc060d24f97551e2af13b6c24c722ae5f97/coverage-7.13.2-cp313-cp313-win_arm64.whl", hash = "sha256:9779310cb5a9778a60c899f075a8514c89fa6d10131445c2207fc893e0b14557", size = 221145, upload-time = "2026-01-25T12:58:45Z" }, + { url = "https://files.pythonhosted.org/packages/9e/f7/080376c029c8f76fadfe43911d0daffa0cbdc9f9418a0eead70c56fb7f4b/coverage-7.13.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e64fa5a1e41ce5df6b547cbc3d3699381c9e2c2c369c67837e716ed0f549d48e", size = 219861, upload-time = "2026-01-25T12:58:46.586Z" }, + { url = "https://files.pythonhosted.org/packages/42/11/0b5e315af5ab35f4c4a70e64d3314e4eec25eefc6dec13be3a7d5ffe8ac5/coverage-7.13.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b01899e82a04085b6561eb233fd688474f57455e8ad35cd82286463ba06332b7", size = 220207, upload-time = "2026-01-25T12:58:48.277Z" }, + { url = "https://files.pythonhosted.org/packages/b2/0c/0874d0318fb1062117acbef06a09cf8b63f3060c22265adaad24b36306b7/coverage-7.13.2-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:838943bea48be0e2768b0cf7819544cdedc1bbb2f28427eabb6eb8c9eb2285d3", size = 261504, upload-time = "2026-01-25T12:58:49.904Z" }, + { url = "https://files.pythonhosted.org/packages/83/5e/1cd72c22ecb30751e43a72f40ba50fcef1b7e93e3ea823bd9feda8e51f9a/coverage-7.13.2-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:93d1d25ec2b27e90bcfef7012992d1f5121b51161b8bffcda756a816cf13c2c3", size = 263582, upload-time = "2026-01-25T12:58:51.582Z" }, + { url = "https://files.pythonhosted.org/packages/9b/da/8acf356707c7a42df4d0657020308e23e5a07397e81492640c186268497c/coverage-7.13.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93b57142f9621b0d12349c43fc7741fe578e4bc914c1e5a54142856cfc0bf421", size = 266008, upload-time = "2026-01-25T12:58:53.234Z" }, + { url = "https://files.pythonhosted.org/packages/41/41/ea1730af99960309423c6ea8d6a4f1fa5564b2d97bd1d29dda4b42611f04/coverage-7.13.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f06799ae1bdfff7ccb8665d75f8291c69110ba9585253de254688aa8a1ccc6c5", size = 260762, upload-time = "2026-01-25T12:58:55.372Z" }, + { url = "https://files.pythonhosted.org/packages/22/fa/02884d2080ba71db64fdc127b311db60e01fe6ba797d9c8363725e39f4d5/coverage-7.13.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:7f9405ab4f81d490811b1d91c7a20361135a2df4c170e7f0b747a794da5b7f23", size = 263571, upload-time = "2026-01-25T12:58:57.52Z" }, + { url = "https://files.pythonhosted.org/packages/d2/6b/4083aaaeba9b3112f55ac57c2ce7001dc4d8fa3fcc228a39f09cc84ede27/coverage-7.13.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f9ab1d5b86f8fbc97a5b3cd6280a3fd85fef3b028689d8a2c00918f0d82c728c", size = 261200, upload-time = "2026-01-25T12:58:59.255Z" }, + { url = "https://files.pythonhosted.org/packages/e9/d2/aea92fa36d61955e8c416ede9cf9bf142aa196f3aea214bb67f85235a050/coverage-7.13.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:f674f59712d67e841525b99e5e2b595250e39b529c3bda14764e4f625a3fa01f", size = 260095, upload-time = "2026-01-25T12:59:01.066Z" }, + { url = "https://files.pythonhosted.org/packages/0d/ae/04ffe96a80f107ea21b22b2367175c621da920063260a1c22f9452fd7866/coverage-7.13.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c6cadac7b8ace1ba9144feb1ae3cb787a6065ba6d23ffc59a934b16406c26573", size = 262284, upload-time = "2026-01-25T12:59:02.802Z" }, + { url = "https://files.pythonhosted.org/packages/1c/7a/6f354dcd7dfc41297791d6fb4e0d618acb55810bde2c1fd14b3939e05c2b/coverage-7.13.2-cp313-cp313t-win32.whl", hash = "sha256:14ae4146465f8e6e6253eba0cccd57423e598a4cb925958b240c805300918343", size = 222389, upload-time = "2026-01-25T12:59:04.563Z" }, + { url = "https://files.pythonhosted.org/packages/8d/d5/080ad292a4a3d3daf411574be0a1f56d6dee2c4fdf6b005342be9fac807f/coverage-7.13.2-cp313-cp313t-win_amd64.whl", hash = "sha256:9074896edd705a05769e3de0eac0a8388484b503b68863dd06d5e473f874fd47", size = 223450, upload-time = "2026-01-25T12:59:06.677Z" }, + { url = "https://files.pythonhosted.org/packages/88/96/df576fbacc522e9fb8d1c4b7a7fc62eb734be56e2cba1d88d2eabe08ea3f/coverage-7.13.2-cp313-cp313t-win_arm64.whl", hash = "sha256:69e526e14f3f854eda573d3cf40cffd29a1a91c684743d904c33dbdcd0e0f3e7", size = 221707, upload-time = "2026-01-25T12:59:08.363Z" }, + { url = "https://files.pythonhosted.org/packages/55/53/1da9e51a0775634b04fcc11eb25c002fc58ee4f92ce2e8512f94ac5fc5bf/coverage-7.13.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:387a825f43d680e7310e6f325b2167dd093bc8ffd933b83e9aa0983cf6e0a2ef", size = 219213, upload-time = "2026-01-25T12:59:11.909Z" }, + { url = "https://files.pythonhosted.org/packages/46/35/b3caac3ebbd10230fea5a33012b27d19e999a17c9285c4228b4b2e35b7da/coverage-7.13.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f0d7fea9d8e5d778cd5a9e8fc38308ad688f02040e883cdc13311ef2748cb40f", size = 219549, upload-time = "2026-01-25T12:59:13.638Z" }, + { url = "https://files.pythonhosted.org/packages/76/9c/e1cf7def1bdc72c1907e60703983a588f9558434a2ff94615747bd73c192/coverage-7.13.2-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e080afb413be106c95c4ee96b4fffdc9e2fa56a8bbf90b5c0918e5c4449412f5", size = 250586, upload-time = "2026-01-25T12:59:15.808Z" }, + { url = "https://files.pythonhosted.org/packages/ba/49/f54ec02ed12be66c8d8897270505759e057b0c68564a65c429ccdd1f139e/coverage-7.13.2-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a7fc042ba3c7ce25b8a9f097eb0f32a5ce1ccdb639d9eec114e26def98e1f8a4", size = 253093, upload-time = "2026-01-25T12:59:17.491Z" }, + { url = "https://files.pythonhosted.org/packages/fb/5e/aaf86be3e181d907e23c0f61fccaeb38de8e6f6b47aed92bf57d8fc9c034/coverage-7.13.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d0ba505e021557f7f8173ee8cd6b926373d8653e5ff7581ae2efce1b11ef4c27", size = 254446, upload-time = "2026-01-25T12:59:19.752Z" }, + { url = "https://files.pythonhosted.org/packages/28/c8/a5fa01460e2d75b0c853b392080d6829d3ca8b5ab31e158fa0501bc7c708/coverage-7.13.2-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7de326f80e3451bd5cc7239ab46c73ddb658fe0b7649476bc7413572d36cd548", size = 250615, upload-time = "2026-01-25T12:59:21.928Z" }, + { url = "https://files.pythonhosted.org/packages/86/0b/6d56315a55f7062bb66410732c24879ccb2ec527ab6630246de5fe45a1df/coverage-7.13.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:abaea04f1e7e34841d4a7b343904a3f59481f62f9df39e2cd399d69a187a9660", size = 252452, upload-time = "2026-01-25T12:59:23.592Z" }, + { url = "https://files.pythonhosted.org/packages/30/19/9bc550363ebc6b0ea121977ee44d05ecd1e8bf79018b8444f1028701c563/coverage-7.13.2-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9f93959ee0c604bccd8e0697be21de0887b1f73efcc3aa73a3ec0fd13feace92", size = 250418, upload-time = "2026-01-25T12:59:25.392Z" }, + { url = "https://files.pythonhosted.org/packages/1f/53/580530a31ca2f0cc6f07a8f2ab5460785b02bb11bdf815d4c4d37a4c5169/coverage-7.13.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:13fe81ead04e34e105bf1b3c9f9cdf32ce31736ee5d90a8d2de02b9d3e1bcb82", size = 250231, upload-time = "2026-01-25T12:59:27.888Z" }, + { url = "https://files.pythonhosted.org/packages/e2/42/dd9093f919dc3088cb472893651884bd675e3df3d38a43f9053656dca9a2/coverage-7.13.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d6d16b0f71120e365741bca2cb473ca6fe38930bc5431c5e850ba949f708f892", size = 251888, upload-time = "2026-01-25T12:59:29.636Z" }, + { url = "https://files.pythonhosted.org/packages/fa/a6/0af4053e6e819774626e133c3d6f70fae4d44884bfc4b126cb647baee8d3/coverage-7.13.2-cp314-cp314-win32.whl", hash = "sha256:9b2f4714bb7d99ba3790ee095b3b4ac94767e1347fe424278a0b10acb3ff04fe", size = 221968, upload-time = "2026-01-25T12:59:31.424Z" }, + { url = "https://files.pythonhosted.org/packages/c4/cc/5aff1e1f80d55862442855517bb8ad8ad3a68639441ff6287dde6a58558b/coverage-7.13.2-cp314-cp314-win_amd64.whl", hash = "sha256:e4121a90823a063d717a96e0a0529c727fb31ea889369a0ee3ec00ed99bf6859", size = 222783, upload-time = "2026-01-25T12:59:33.118Z" }, + { url = "https://files.pythonhosted.org/packages/de/20/09abafb24f84b3292cc658728803416c15b79f9ee5e68d25238a895b07d9/coverage-7.13.2-cp314-cp314-win_arm64.whl", hash = "sha256:6873f0271b4a15a33e7590f338d823f6f66f91ed147a03938d7ce26efd04eee6", size = 221348, upload-time = "2026-01-25T12:59:34.939Z" }, + { url = "https://files.pythonhosted.org/packages/b6/60/a3820c7232db63be060e4019017cd3426751c2699dab3c62819cdbcea387/coverage-7.13.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f61d349f5b7cd95c34017f1927ee379bfbe9884300d74e07cf630ccf7a610c1b", size = 219950, upload-time = "2026-01-25T12:59:36.624Z" }, + { url = "https://files.pythonhosted.org/packages/fd/37/e4ef5975fdeb86b1e56db9a82f41b032e3d93a840ebaf4064f39e770d5c5/coverage-7.13.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a43d34ce714f4ca674c0d90beb760eb05aad906f2c47580ccee9da8fe8bfb417", size = 220209, upload-time = "2026-01-25T12:59:38.339Z" }, + { url = "https://files.pythonhosted.org/packages/54/df/d40e091d00c51adca1e251d3b60a8b464112efa3004949e96a74d7c19a64/coverage-7.13.2-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bff1b04cb9d4900ce5c56c4942f047dc7efe57e2608cb7c3c8936e9970ccdbee", size = 261576, upload-time = "2026-01-25T12:59:40.446Z" }, + { url = "https://files.pythonhosted.org/packages/c5/44/5259c4bed54e3392e5c176121af9f71919d96dde853386e7730e705f3520/coverage-7.13.2-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6ae99e4560963ad8e163e819e5d77d413d331fd00566c1e0856aa252303552c1", size = 263704, upload-time = "2026-01-25T12:59:42.346Z" }, + { url = "https://files.pythonhosted.org/packages/16/bd/ae9f005827abcbe2c70157459ae86053971c9fa14617b63903abbdce26d9/coverage-7.13.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e79a8c7d461820257d9aa43716c4efc55366d7b292e46b5b37165be1d377405d", size = 266109, upload-time = "2026-01-25T12:59:44.073Z" }, + { url = "https://files.pythonhosted.org/packages/a2/c0/8e279c1c0f5b1eaa3ad9b0fb7a5637fc0379ea7d85a781c0fe0bb3cfc2ab/coverage-7.13.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:060ee84f6a769d40c492711911a76811b4befb6fba50abb450371abb720f5bd6", size = 260686, upload-time = "2026-01-25T12:59:45.804Z" }, + { url = "https://files.pythonhosted.org/packages/b2/47/3a8112627e9d863e7cddd72894171c929e94491a597811725befdcd76bce/coverage-7.13.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3bca209d001fd03ea2d978f8a4985093240a355c93078aee3f799852c23f561a", size = 263568, upload-time = "2026-01-25T12:59:47.929Z" }, + { url = "https://files.pythonhosted.org/packages/92/bc/7ea367d84afa3120afc3ce6de294fd2dcd33b51e2e7fbe4bbfd200f2cb8c/coverage-7.13.2-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:6b8092aa38d72f091db61ef83cb66076f18f02da3e1a75039a4f218629600e04", size = 261174, upload-time = "2026-01-25T12:59:49.717Z" }, + { url = "https://files.pythonhosted.org/packages/33/b7/f1092dcecb6637e31cc2db099581ee5c61a17647849bae6b8261a2b78430/coverage-7.13.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:4a3158dc2dcce5200d91ec28cd315c999eebff355437d2765840555d765a6e5f", size = 260017, upload-time = "2026-01-25T12:59:51.463Z" }, + { url = "https://files.pythonhosted.org/packages/2b/cd/f3d07d4b95fbe1a2ef0958c15da614f7e4f557720132de34d2dc3aa7e911/coverage-7.13.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3973f353b2d70bd9796cc12f532a05945232ccae966456c8ed7034cb96bbfd6f", size = 262337, upload-time = "2026-01-25T12:59:53.407Z" }, + { url = "https://files.pythonhosted.org/packages/e0/db/b0d5b2873a07cb1e06a55d998697c0a5a540dcefbf353774c99eb3874513/coverage-7.13.2-cp314-cp314t-win32.whl", hash = "sha256:79f6506a678a59d4ded048dc72f1859ebede8ec2b9a2d509ebe161f01c2879d3", size = 222749, upload-time = "2026-01-25T12:59:56.316Z" }, + { url = "https://files.pythonhosted.org/packages/e5/2f/838a5394c082ac57d85f57f6aba53093b30d9089781df72412126505716f/coverage-7.13.2-cp314-cp314t-win_amd64.whl", hash = "sha256:196bfeabdccc5a020a57d5a368c681e3a6ceb0447d153aeccc1ab4d70a5032ba", size = 223857, upload-time = "2026-01-25T12:59:58.201Z" }, + { url = "https://files.pythonhosted.org/packages/44/d4/b608243e76ead3a4298824b50922b89ef793e50069ce30316a65c1b4d7ef/coverage-7.13.2-cp314-cp314t-win_arm64.whl", hash = "sha256:69269ab58783e090bfbf5b916ab3d188126e22d6070bbfc93098fdd474ef937c", size = 221881, upload-time = "2026-01-25T13:00:00.449Z" }, + { url = "https://files.pythonhosted.org/packages/d2/db/d291e30fdf7ea617a335531e72294e0c723356d7fdde8fba00610a76bda9/coverage-7.13.2-py3-none-any.whl", hash = "sha256:40ce1ea1e25125556d8e76bd0b61500839a07944cc287ac21d5626f3e620cad5", size = 210943, upload-time = "2026-01-25T13:00:02.388Z" }, +] + +[package.optional-dependencies] +toml = [ + { name = "tomli", marker = "python_full_version <= '3.11'" }, +] + [[package]] name = "exceptiongroup" version = "1.3.1" @@ -917,6 +1021,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, ] +[[package]] +name = "pytest-cov" +version = "7.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coverage", extra = ["toml"] }, + { name = "pluggy" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -1136,6 +1254,7 @@ dev = [ { name = "mkdocstrings", extra = ["python"] }, { name = "pyright" }, { name = "pytest" }, + { name = "pytest-cov" }, { name = "ruff" }, ] @@ -1153,6 +1272,7 @@ dev = [ { name = "mkdocstrings", extras = ["python"], specifier = ">=0.24.0" }, { name = "pyright", specifier = ">=1.1.370,<1.1.374" }, { name = "pytest", specifier = ">=8.2.2" }, + { name = "pytest-cov", specifier = ">=7.0.0" }, { name = "ruff", specifier = ">=0.4.9" }, ] From 61d5da6c94ac2b14921ac7528458600f70de6e5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Gro=C3=9Fer?= Date: Thu, 29 Jan 2026 17:03:48 +0100 Subject: [PATCH 3/7] python 3.12 test fix --- transformplan/protocol.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/transformplan/protocol.py b/transformplan/protocol.py index c977d9a..d7e60e4 100644 --- a/transformplan/protocol.py +++ b/transformplan/protocol.py @@ -24,7 +24,7 @@ import hashlib import json -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path from typing import Any @@ -69,7 +69,7 @@ def __init__(self) -> None: self._steps: list[dict[str, Any]] = [] self._input_hash: str | None = None self._input_shape: tuple[int, int] | None = None - self._created_at: str = datetime.utcnow().isoformat() + "Z" + self._created_at: str = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") self._metadata: dict[str, Any] = {} def set_input(self, hash_value: str, shape: tuple[int, int]) -> None: From 343de573eddf2746a9eac30b9eaaee0cb769523e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Gro=C3=9Fer?= Date: Thu, 29 Jan 2026 17:06:38 +0100 Subject: [PATCH 4/7] formatting fix --- transformplan/protocol.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/transformplan/protocol.py b/transformplan/protocol.py index d7e60e4..8f227e9 100644 --- a/transformplan/protocol.py +++ b/transformplan/protocol.py @@ -69,7 +69,9 @@ def __init__(self) -> None: self._steps: list[dict[str, Any]] = [] self._input_hash: str | None = None self._input_shape: tuple[int, int] | None = None - self._created_at: str = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") + self._created_at: str = ( + datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") + ) self._metadata: dict[str, Any] = {} def set_input(self, hash_value: str, shape: tuple[int, int]) -> None: From cbe0c0d0fc5353347c135753d896e0866cf1179b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Gro=C3=9Fer?= Date: Thu, 29 Jan 2026 18:14:06 +0100 Subject: [PATCH 5/7] linting error fixed --- transformplan/chunking.py | 81 ++++++--- transformplan/core.py | 83 +++++---- transformplan/filters.py | 312 +++++++++++++++++++++++++++------- transformplan/ops/column.py | 82 +++++++-- transformplan/ops/datetime.py | 43 ++++- transformplan/ops/map.py | 75 +++++--- transformplan/ops/math.py | 91 ++++++++-- transformplan/ops/rows.py | 103 +++++++---- transformplan/ops/string.py | 43 ++++- transformplan/protocol.py | 155 +++++++++++------ transformplan/validation.py | 284 ++++++++++++++++++++++--------- 11 files changed, 1026 insertions(+), 326 deletions(-) diff --git a/transformplan/chunking.py b/transformplan/chunking.py index a574aca..87413bd 100644 --- a/transformplan/chunking.py +++ b/transformplan/chunking.py @@ -265,7 +265,7 @@ def set_operations(self, operations: list[dict[str, Any]]) -> None: """Record the operations that were applied.""" self._operations = operations - def set_metadata(self, **kwargs: Any) -> None: + def set_metadata(self, **kwargs: Any) -> None: # noqa: ANN401 """Set arbitrary metadata on the protocol.""" self._metadata.update(kwargs) @@ -275,32 +275,56 @@ def add_chunk(self, chunk_info: ChunkInfo) -> None: @property def chunks(self) -> list[ChunkInfo]: - """List of chunk information.""" + """List of chunk information. + + Returns: + List of ChunkInfo instances. + """ return self._chunks @property def total_input_rows(self) -> int: - """Total rows across all input chunks.""" + """Total rows across all input chunks. + + Returns: + Sum of input rows. + """ return sum(c.input_rows for c in self._chunks) @property def total_output_rows(self) -> int: - """Total rows across all output chunks.""" + """Total rows across all output chunks. + + Returns: + Sum of output rows. + """ return sum(c.output_rows for c in self._chunks) @property def total_elapsed_seconds(self) -> float: - """Total processing time across all chunks.""" + """Total processing time across all chunks. + + Returns: + Sum of elapsed seconds. + """ return sum(c.elapsed_seconds for c in self._chunks) @property def num_chunks(self) -> int: - """Number of chunks processed.""" + """Number of chunks processed. + + Returns: + Count of chunks. + """ return len(self._chunks) @property def metadata(self) -> dict[str, Any]: - """Protocol metadata.""" + """Protocol metadata. + + Returns: + Dictionary of metadata. + """ return self._metadata def output_hash(self) -> str: @@ -419,13 +443,21 @@ def from_json(cls, source: str | Path) -> ChunkedProtocol: return cls.from_dict(json.loads(content)) def __repr__(self) -> str: - """Return string representation of the protocol.""" + """Return string representation of the protocol. + + Returns: + Human-readable representation. + """ return ( f"ChunkedProtocol({self.num_chunks} chunks, {self.total_input_rows} rows)" ) def __len__(self) -> int: - """Return number of chunks processed.""" + """Return number of chunks processed. + + Returns: + Count of chunks. + """ return self.num_chunks def summary(self) -> str: @@ -452,12 +484,14 @@ def summary(self) -> str: lines.append(f"Partition key: {self._partition_key}") if self._chunk_size: lines.append(f"Target chunk size: {self._chunk_size:,}") - lines.append("-" * 70) - - # Summary stats - lines.append(f"Chunks processed: {self.num_chunks}") - lines.append(f"Total input rows: {self.total_input_rows:,}") - lines.append(f"Total output rows: {self.total_output_rows:,}") + lines.extend( + [ + "-" * 70, + f"Chunks processed: {self.num_chunks}", + f"Total input rows: {self.total_input_rows:,}", + f"Total output rows: {self.total_output_rows:,}", + ] + ) rows_diff = self.total_output_rows - self.total_input_rows if rows_diff != 0: lines.append(f"Row change: {rows_diff:+,}") @@ -465,16 +499,17 @@ def summary(self) -> str: if self.num_chunks > 0: avg_time = self.total_elapsed_seconds / self.num_chunks lines.append(f"Avg time per chunk: {avg_time:.4f}s") - lines.append(f"Output hash: {self.output_hash()}") - lines.append("-" * 70) + lines.extend((f"Output hash: {self.output_hash()}", "-" * 70)) # Per-chunk details if self._chunks: - lines.append("") - lines.append( - f"{'#':<6} {'Input':<12} {'Output':<12} {'Change':<10} {'Time':<10} {'Hash':<16}" + lines.extend( + ( + "", + f"{'#':<6} {'Input':<12} {'Output':<12} {'Change':<10} {'Time':<10} {'Hash':<16}", + "-" * 70, + ) ) - lines.append("-" * 70) for chunk in self._chunks: idx = str(chunk.chunk_index) @@ -494,10 +529,10 @@ def summary(self) -> str: def print(self) -> None: """Print the protocol summary to stdout.""" - print(self.summary()) + print(self.summary()) # noqa: T201 -def validate_chunked_pipeline( +def validate_chunked_pipeline( # noqa: C901 operations: list[tuple[Any, dict[str, Any]]], partition_key: str | list[str] | None = None, ) -> ChunkValidationResult: diff --git a/transformplan/core.py b/transformplan/core.py index 9e24f05..93b276c 100644 --- a/transformplan/core.py +++ b/transformplan/core.py @@ -54,6 +54,7 @@ class TransformPlanBase: VERSION = "1.0" def __init__(self) -> None: + """Initialize an empty TransformPlanBase.""" self._operations: list[tuple[Callable[..., pl.DataFrame], dict[str, Any]]] = [] def _register( @@ -61,25 +62,27 @@ def _register( method: Callable[..., pl.DataFrame], params: dict[str, Any], ) -> Self: - """Register an operation for deferred execution.""" + """Register an operation for deferred execution. + + Returns: + Self for method chaining. + """ self._operations.append((method, params)) return self def process( - self, data: pl.DataFrame, validate: bool = True + self, data: pl.DataFrame, *, validate: bool = True ) -> tuple[pl.DataFrame, Protocol]: """Execute all registered operations and return transformed data with protocol. Args: data: DataFrame to process. validate: If True, validate schema before execution (default). - Set to False for performance in hot loops with pre-validated pipelines. + Set to False for performance in hot loops with pre-validated + pipelines. Returns: Tuple of (processed DataFrame, Protocol). - - Raises: - SchemaValidationError: If validate=True and validation fails. """ if validate: validate_schema(self._operations, dict(data.schema)).raise_if_invalid() @@ -152,7 +155,11 @@ def dry_run(self, data: pl.DataFrame) -> DryRunResult: return dry_run_schema(self._operations, dict(data.schema)) def to_dict(self) -> dict[str, Any]: - """Serialize the pipeline to a dictionary.""" + """Serialize the pipeline to a dictionary. + + Returns: + Dictionary representation of the pipeline. + """ steps = [] for method, params in self._operations: op_name = method.__name__.lstrip("_") @@ -177,6 +184,9 @@ def from_dict(cls, data: dict[str, Any]) -> Self: Returns: New TransformPlan instance with operations loaded. + + Raises: + ValueError: If an unknown operation is encountered. """ plan = cls() @@ -187,7 +197,8 @@ def from_dict(cls, data: dict[str, Any]) -> Self: # Find the public method on the class method = getattr(plan, op_name, None) if method is None: - raise ValueError(f"Unknown operation: {op_name}") + msg = f"Unknown operation: {op_name}" + raise ValueError(msg) # Call the method with params to register the operation method(**params) @@ -231,10 +242,19 @@ def from_json(cls, source: str | Path) -> Self: return cls.from_dict(json.loads(content)) def __len__(self) -> int: - """Return number of registered operations.""" + """Return number of registered operations. + + Returns: + Number of operations. + """ return len(self._operations) def __repr__(self) -> str: + """Return string representation. + + Returns: + Human-readable representation. + """ return f"TransformPlan({len(self._operations)} operations)" def to_python(self, variable_name: str = "plan") -> str: @@ -247,8 +267,7 @@ def to_python(self, variable_name: str = "plan") -> str: Python code string. """ lines = ["from transformplan import TransformPlan, Col", ""] - lines.append(f"{variable_name} = (") - lines.append(" TransformPlan()") + lines.extend((f"{variable_name} = (", " TransformPlan()")) for method, params in self._operations: op_name = method.__name__.lstrip("_") @@ -259,7 +278,11 @@ def to_python(self, variable_name: str = "plan") -> str: return "\n".join(lines) def _format_params_as_python(self, params: dict[str, Any]) -> str: - """Format parameters as Python code.""" + """Format parameters as Python code. + + Returns: + Python code string for the parameters. + """ parts = [] for key, value in params.items(): @@ -272,21 +295,23 @@ def _format_params_as_python(self, params: dict[str, Any]) -> str: parts.append(filter_str) elif isinstance(value, str): parts.append(f'{key}="{value}"') - elif isinstance(value, bool): - parts.append(f"{key}={value}") - elif isinstance(value, (int, float)): + elif isinstance(value, (bool, int, float)): parts.append(f"{key}={value}") - elif isinstance(value, list): - parts.append(f"{key}={value!r}") - elif isinstance(value, dict): + elif isinstance(value, (list, dict)): parts.append(f"{key}={value!r}") else: parts.append(f"{key}={value!r}") return ", ".join(parts) - def _format_filter_as_python(self, filter_dict: dict[str, Any]) -> str: - """Convert a filter dict back to Col() expression string.""" + def _format_filter_as_python( # noqa: C901 + self, filter_dict: dict[str, Any] + ) -> str: + """Convert a filter dict back to Col() expression string. + + Returns: + Python code string for the filter. + """ filter_type = filter_dict.get("type", "") # Logical operators @@ -294,11 +319,11 @@ def _format_filter_as_python(self, filter_dict: dict[str, Any]) -> str: left = self._format_filter_as_python(filter_dict["left"]) right = self._format_filter_as_python(filter_dict["right"]) return f"({left}) & ({right})" - elif filter_type == "or": + if filter_type == "or": left = self._format_filter_as_python(filter_dict["left"]) right = self._format_filter_as_python(filter_dict["right"]) return f"({left}) | ({right})" - elif filter_type == "not": + if filter_type == "not": operand = self._format_filter_as_python(filter_dict["operand"]) return f"~({operand})" @@ -318,25 +343,25 @@ def _format_filter_as_python(self, filter_dict: dict[str, Any]) -> str: if filter_type in op_map: op = op_map[filter_type] return f'Col("{col}") {op} {val!r}' - elif filter_type == "is_in": + if filter_type == "is_in": values = filter_dict.get("values", []) return f'Col("{col}").is_in({values!r})' - elif filter_type == "is_null": + if filter_type == "is_null": return f'Col("{col}").is_null()' - elif filter_type == "is_not_null": + if filter_type == "is_not_null": return f'Col("{col}").is_not_null()' - elif filter_type == "between": + if filter_type == "between": lower = filter_dict.get("lower") upper = filter_dict.get("upper") return f'Col("{col}").between({lower!r}, {upper!r})' - elif filter_type == "str_contains": + if filter_type == "str_contains": pattern = filter_dict.get("pattern", "") literal = filter_dict.get("literal", True) return f'Col("{col}").str_contains({pattern!r}, literal={literal})' - elif filter_type == "str_starts_with": + if filter_type == "str_starts_with": prefix = filter_dict.get("prefix", "") return f'Col("{col}").str_starts_with({prefix!r})' - elif filter_type == "str_ends_with": + if filter_type == "str_ends_with": suffix = filter_dict.get("suffix", "") return f'Col("{col}").str_ends_with({suffix!r})' diff --git a/transformplan/filters.py b/transformplan/filters.py index 6576062..390aef3 100644 --- a/transformplan/filters.py +++ b/transformplan/filters.py @@ -97,11 +97,13 @@ def from_dict(cls, data: dict[str, Any]) -> Filter: """ filter_type = data.get("type") if filter_type is None: - raise ValueError("Missing 'type' in filter dict") + msg = "Missing 'type' in filter dict" + raise ValueError(msg) filter_cls = _FILTER_REGISTRY.get(filter_type) if filter_cls is None: - raise ValueError(f"Unknown filter type: {filter_type}") + msg = f"Unknown filter type: {filter_type}" + raise ValueError(msg) return filter_cls._from_dict(data) @@ -195,7 +197,7 @@ def __init__(self, name: str) -> None: """ self.name = name - def __eq__(self, value: Any) -> Eq: # type: ignore[override] + def __eq__(self, value: object) -> Eq: # type: ignore[override] """Create an equality filter (column == value). Args: @@ -206,7 +208,7 @@ def __eq__(self, value: Any) -> Eq: # type: ignore[override] """ return Eq(self.name, value) - def __ne__(self, value: Any) -> Ne: # type: ignore[override] + def __ne__(self, value: object) -> Ne: # type: ignore[override] """Create an inequality filter (column != value). Args: @@ -217,7 +219,7 @@ def __ne__(self, value: Any) -> Ne: # type: ignore[override] """ return Ne(self.name, value) - def __gt__(self, value: Any) -> Gt: + def __gt__(self, value: Any) -> Gt: # noqa: ANN401 """Create a greater-than filter (column > value). Args: @@ -228,7 +230,7 @@ def __gt__(self, value: Any) -> Gt: """ return Gt(self.name, value) - def __ge__(self, value: Any) -> Ge: + def __ge__(self, value: Any) -> Ge: # noqa: ANN401 """Create a greater-or-equal filter (column >= value). Args: @@ -239,7 +241,7 @@ def __ge__(self, value: Any) -> Ge: """ return Ge(self.name, value) - def __lt__(self, value: Any) -> Lt: + def __lt__(self, value: Any) -> Lt: # noqa: ANN401 """Create a less-than filter (column < value). Args: @@ -250,7 +252,7 @@ def __lt__(self, value: Any) -> Lt: """ return Lt(self.name, value) - def __le__(self, value: Any) -> Le: + def __le__(self, value: Any) -> Le: # noqa: ANN401 """Create a less-or-equal filter (column <= value). Args: @@ -297,8 +299,8 @@ def is_not_null(self) -> IsNotNull: """ return IsNotNull(self.name) - def str_contains(self, pattern: str, literal: bool = True) -> StrContains: - """Create a string contains filter. + def str_contains(self, pattern: str, *, literal: bool = True) -> StrContains: + r"""Create a string contains filter. Args: pattern: Substring or regex pattern to search for. @@ -341,7 +343,7 @@ def str_ends_with(self, suffix: str) -> StrEndsWith: """ return StrEndsWith(self.name, suffix) - def between(self, lower: Any, upper: Any) -> Between: + def between(self, lower: Any, upper: Any) -> Between: # noqa: ANN401 """Create a range filter (lower <= column <= upper). Args: @@ -376,16 +378,28 @@ class Eq(Filter): value: Any def to_expr(self) -> pl.Expr: - """Convert to Polars equality expression.""" + """Convert to Polars equality expression. + + Returns: + Polars expression for equality comparison. + """ return pl.col(self.column) == self.value def to_dict(self) -> dict[str, Any]: - """Serialize to dictionary.""" + """Serialize to dictionary. + + Returns: + Dictionary representation with type, column, and value. + """ return {"type": "eq", "column": self.column, "value": self.value} @classmethod def _from_dict(cls, data: dict[str, Any]) -> Eq: - """Create from dictionary.""" + """Create from dictionary. + + Returns: + New Eq instance. + """ return cls(data["column"], data["value"]) @@ -402,16 +416,28 @@ class Ne(Filter): value: Any def to_expr(self) -> pl.Expr: - """Convert to Polars inequality expression.""" + """Convert to Polars inequality expression. + + Returns: + Polars expression for inequality comparison. + """ return pl.col(self.column) != self.value def to_dict(self) -> dict[str, Any]: - """Serialize to dictionary.""" + """Serialize to dictionary. + + Returns: + Dictionary representation with type, column, and value. + """ return {"type": "ne", "column": self.column, "value": self.value} @classmethod def _from_dict(cls, data: dict[str, Any]) -> Ne: - """Create from dictionary.""" + """Create from dictionary. + + Returns: + New Ne instance. + """ return cls(data["column"], data["value"]) @@ -428,16 +454,28 @@ class Gt(Filter): value: Any def to_expr(self) -> pl.Expr: - """Convert to Polars greater-than expression.""" + """Convert to Polars greater-than expression. + + Returns: + Polars expression for greater-than comparison. + """ return pl.col(self.column) > self.value def to_dict(self) -> dict[str, Any]: - """Serialize to dictionary.""" + """Serialize to dictionary. + + Returns: + Dictionary representation with type, column, and value. + """ return {"type": "gt", "column": self.column, "value": self.value} @classmethod def _from_dict(cls, data: dict[str, Any]) -> Gt: - """Create from dictionary.""" + """Create from dictionary. + + Returns: + New Gt instance. + """ return cls(data["column"], data["value"]) @@ -454,16 +492,28 @@ class Ge(Filter): value: Any def to_expr(self) -> pl.Expr: - """Convert to Polars greater-or-equal expression.""" + """Convert to Polars greater-or-equal expression. + + Returns: + Polars expression for greater-or-equal comparison. + """ return pl.col(self.column) >= self.value def to_dict(self) -> dict[str, Any]: - """Serialize to dictionary.""" + """Serialize to dictionary. + + Returns: + Dictionary representation with type, column, and value. + """ return {"type": "ge", "column": self.column, "value": self.value} @classmethod def _from_dict(cls, data: dict[str, Any]) -> Ge: - """Create from dictionary.""" + """Create from dictionary. + + Returns: + New Ge instance. + """ return cls(data["column"], data["value"]) @@ -480,16 +530,28 @@ class Lt(Filter): value: Any def to_expr(self) -> pl.Expr: - """Convert to Polars less-than expression.""" + """Convert to Polars less-than expression. + + Returns: + Polars expression for less-than comparison. + """ return pl.col(self.column) < self.value def to_dict(self) -> dict[str, Any]: - """Serialize to dictionary.""" + """Serialize to dictionary. + + Returns: + Dictionary representation with type, column, and value. + """ return {"type": "lt", "column": self.column, "value": self.value} @classmethod def _from_dict(cls, data: dict[str, Any]) -> Lt: - """Create from dictionary.""" + """Create from dictionary. + + Returns: + New Lt instance. + """ return cls(data["column"], data["value"]) @@ -506,16 +568,28 @@ class Le(Filter): value: Any def to_expr(self) -> pl.Expr: - """Convert to Polars less-or-equal expression.""" + """Convert to Polars less-or-equal expression. + + Returns: + Polars expression for less-or-equal comparison. + """ return pl.col(self.column) <= self.value def to_dict(self) -> dict[str, Any]: - """Serialize to dictionary.""" + """Serialize to dictionary. + + Returns: + Dictionary representation with type, column, and value. + """ return {"type": "le", "column": self.column, "value": self.value} @classmethod def _from_dict(cls, data: dict[str, Any]) -> Le: - """Create from dictionary.""" + """Create from dictionary. + + Returns: + New Le instance. + """ return cls(data["column"], data["value"]) @@ -532,16 +606,28 @@ class IsIn(Filter): values: Sequence[Any] def to_expr(self) -> pl.Expr: - """Convert to Polars is_in expression.""" + """Convert to Polars is_in expression. + + Returns: + Polars expression for membership check. + """ return pl.col(self.column).is_in(self.values) def to_dict(self) -> dict[str, Any]: - """Serialize to dictionary.""" + """Serialize to dictionary. + + Returns: + Dictionary representation with type, column, and values. + """ return {"type": "is_in", "column": self.column, "values": list(self.values)} @classmethod def _from_dict(cls, data: dict[str, Any]) -> IsIn: - """Create from dictionary.""" + """Create from dictionary. + + Returns: + New IsIn instance. + """ return cls(data["column"], data["values"]) @@ -560,11 +646,19 @@ class Between(Filter): upper: Any def to_expr(self) -> pl.Expr: - """Convert to Polars is_between expression.""" + """Convert to Polars is_between expression. + + Returns: + Polars expression for range check. + """ return pl.col(self.column).is_between(self.lower, self.upper) def to_dict(self) -> dict[str, Any]: - """Serialize to dictionary.""" + """Serialize to dictionary. + + Returns: + Dictionary representation with type, column, lower, and upper. + """ return { "type": "between", "column": self.column, @@ -574,7 +668,11 @@ def to_dict(self) -> dict[str, Any]: @classmethod def _from_dict(cls, data: dict[str, Any]) -> Between: - """Create from dictionary.""" + """Create from dictionary. + + Returns: + New Between instance. + """ return cls(data["column"], data["lower"], data["upper"]) @@ -594,16 +692,28 @@ class IsNull(Filter): column: str def to_expr(self) -> pl.Expr: - """Convert to Polars is_null expression.""" + """Convert to Polars is_null expression. + + Returns: + Polars expression for null check. + """ return pl.col(self.column).is_null() def to_dict(self) -> dict[str, Any]: - """Serialize to dictionary.""" + """Serialize to dictionary. + + Returns: + Dictionary representation with type and column. + """ return {"type": "is_null", "column": self.column} @classmethod def _from_dict(cls, data: dict[str, Any]) -> IsNull: - """Create from dictionary.""" + """Create from dictionary. + + Returns: + New IsNull instance. + """ return cls(data["column"]) @@ -618,16 +728,28 @@ class IsNotNull(Filter): column: str def to_expr(self) -> pl.Expr: - """Convert to Polars is_not_null expression.""" + """Convert to Polars is_not_null expression. + + Returns: + Polars expression for not-null check. + """ return pl.col(self.column).is_not_null() def to_dict(self) -> dict[str, Any]: - """Serialize to dictionary.""" + """Serialize to dictionary. + + Returns: + Dictionary representation with type and column. + """ return {"type": "is_not_null", "column": self.column} @classmethod def _from_dict(cls, data: dict[str, Any]) -> IsNotNull: - """Create from dictionary.""" + """Create from dictionary. + + Returns: + New IsNotNull instance. + """ return cls(data["column"]) @@ -651,11 +773,19 @@ class StrContains(Filter): literal: bool = True def to_expr(self) -> pl.Expr: - """Convert to Polars str.contains expression.""" + """Convert to Polars str.contains expression. + + Returns: + Polars expression for string containment check. + """ return pl.col(self.column).str.contains(self.pattern, literal=self.literal) def to_dict(self) -> dict[str, Any]: - """Serialize to dictionary.""" + """Serialize to dictionary. + + Returns: + Dictionary representation with type, column, pattern, and literal. + """ return { "type": "str_contains", "column": self.column, @@ -665,7 +795,11 @@ def to_dict(self) -> dict[str, Any]: @classmethod def _from_dict(cls, data: dict[str, Any]) -> StrContains: - """Create from dictionary.""" + """Create from dictionary. + + Returns: + New StrContains instance. + """ return cls(data["column"], data["pattern"], data.get("literal", True)) @@ -682,16 +816,28 @@ class StrStartsWith(Filter): prefix: str def to_expr(self) -> pl.Expr: - """Convert to Polars str.starts_with expression.""" + """Convert to Polars str.starts_with expression. + + Returns: + Polars expression for prefix check. + """ return pl.col(self.column).str.starts_with(self.prefix) def to_dict(self) -> dict[str, Any]: - """Serialize to dictionary.""" + """Serialize to dictionary. + + Returns: + Dictionary representation with type, column, and prefix. + """ return {"type": "str_starts_with", "column": self.column, "prefix": self.prefix} @classmethod def _from_dict(cls, data: dict[str, Any]) -> StrStartsWith: - """Create from dictionary.""" + """Create from dictionary. + + Returns: + New StrStartsWith instance. + """ return cls(data["column"], data["prefix"]) @@ -708,16 +854,28 @@ class StrEndsWith(Filter): suffix: str def to_expr(self) -> pl.Expr: - """Convert to Polars str.ends_with expression.""" + """Convert to Polars str.ends_with expression. + + Returns: + Polars expression for suffix check. + """ return pl.col(self.column).str.ends_with(self.suffix) def to_dict(self) -> dict[str, Any]: - """Serialize to dictionary.""" + """Serialize to dictionary. + + Returns: + Dictionary representation with type, column, and suffix. + """ return {"type": "str_ends_with", "column": self.column, "suffix": self.suffix} @classmethod def _from_dict(cls, data: dict[str, Any]) -> StrEndsWith: - """Create from dictionary.""" + """Create from dictionary. + + Returns: + New StrEndsWith instance. + """ return cls(data["column"], data["suffix"]) @@ -744,11 +902,19 @@ class And(Filter): right: Filter def to_expr(self) -> pl.Expr: - """Convert to Polars AND expression.""" + """Convert to Polars AND expression. + + Returns: + Polars expression combining both conditions with AND. + """ return self.left.to_expr() & self.right.to_expr() def to_dict(self) -> dict[str, Any]: - """Serialize to dictionary with nested filter dicts.""" + """Serialize to dictionary with nested filter dicts. + + Returns: + Dictionary representation with type, left, and right. + """ return { "type": "and", "left": self.left.to_dict(), @@ -757,7 +923,11 @@ def to_dict(self) -> dict[str, Any]: @classmethod def _from_dict(cls, data: dict[str, Any]) -> And: - """Create from dictionary, recursively deserializing children.""" + """Create from dictionary, recursively deserializing children. + + Returns: + New And instance. + """ return cls( Filter.from_dict(data["left"]), Filter.from_dict(data["right"]), @@ -782,11 +952,19 @@ class Or(Filter): right: Filter def to_expr(self) -> pl.Expr: - """Convert to Polars OR expression.""" + """Convert to Polars OR expression. + + Returns: + Polars expression combining both conditions with OR. + """ return self.left.to_expr() | self.right.to_expr() def to_dict(self) -> dict[str, Any]: - """Serialize to dictionary with nested filter dicts.""" + """Serialize to dictionary with nested filter dicts. + + Returns: + Dictionary representation with type, left, and right. + """ return { "type": "or", "left": self.left.to_dict(), @@ -795,7 +973,11 @@ def to_dict(self) -> dict[str, Any]: @classmethod def _from_dict(cls, data: dict[str, Any]) -> Or: - """Create from dictionary, recursively deserializing children.""" + """Create from dictionary, recursively deserializing children. + + Returns: + New Or instance. + """ return cls( Filter.from_dict(data["left"]), Filter.from_dict(data["right"]), @@ -818,16 +1000,28 @@ class Not(Filter): operand: Filter def to_expr(self) -> pl.Expr: - """Convert to Polars NOT expression.""" + """Convert to Polars NOT expression. + + Returns: + Polars expression inverting the operand condition. + """ return ~self.operand.to_expr() def to_dict(self) -> dict[str, Any]: - """Serialize to dictionary with nested filter dict.""" + """Serialize to dictionary with nested filter dict. + + Returns: + Dictionary representation with type and operand. + """ return {"type": "not", "operand": self.operand.to_dict()} @classmethod def _from_dict(cls, data: dict[str, Any]) -> Not: - """Create from dictionary, recursively deserializing operand.""" + """Create from dictionary, recursively deserializing operand. + + Returns: + New Not instance. + """ return cls(Filter.from_dict(data["operand"])) diff --git a/transformplan/ops/column.py b/transformplan/ops/column.py index bd467f3..8e460a6 100644 --- a/transformplan/ops/column.py +++ b/transformplan/ops/column.py @@ -37,7 +37,6 @@ if TYPE_CHECKING: from typing import Any, Callable - import polars as pl from typing_extensions import Self @@ -53,14 +52,22 @@ def _register( ) -> Self: ... def col_drop(self, column: str) -> Self: - """Drop a column from the DataFrame.""" + """Drop a column from the DataFrame. + + Returns: + Self for method chaining. + """ return self._register(self._col_drop, {"column": column}) def _col_drop(self, data: pl.DataFrame, column: str) -> pl.DataFrame: return data.drop(column) def col_rename(self, column: str, new_name: str) -> Self: - """Rename a column.""" + """Rename a column. + + Returns: + Self for method chaining. + """ return self._register( self._col_rename, {"column": column, "new_name": new_name} ) @@ -71,21 +78,33 @@ def _col_rename( return data.rename({column: new_name}) def col_cast(self, column: str, dtype: type) -> Self: - """Cast a column to a different dtype.""" + """Cast a column to a different dtype. + + Returns: + Self for method chaining. + """ return self._register(self._col_cast, {"column": column, "dtype": dtype}) def _col_cast(self, data: pl.DataFrame, column: str, dtype: type) -> pl.DataFrame: return data.with_columns(pl.col(column).cast(dtype)) def col_reorder(self, columns: Sequence[str]) -> Self: - """Reorder columns. Unlisted columns are dropped.""" + """Reorder columns. Unlisted columns are dropped. + + Returns: + Self for method chaining. + """ return self._register(self._col_reorder, {"columns": list(columns)}) def _col_reorder(self, data: pl.DataFrame, columns: list[str]) -> pl.DataFrame: return data.select(columns) def col_duplicate(self, column: str, new_name: str) -> Self: - """Duplicate a column under a new name.""" + """Duplicate a column under a new name. + + Returns: + Self for method chaining. + """ return self._register( self._col_duplicate, {"column": column, "new_name": new_name} ) @@ -96,14 +115,21 @@ def _col_duplicate( return data.with_columns(pl.col(column).alias(new_name)) def col_fill_null( - self, column: str, value: Any = None, strategy: str | None = None + self, + column: str, + value: Any = None, # noqa: ANN401 + strategy: str | None = None, ) -> Self: """Fill null values in a column. Args: column: Column to fill. value: Value to fill nulls with (if strategy is None). - strategy: Fill strategy - 'forward', 'backward', 'mean', 'min', 'max', 'zero', 'one'. + strategy: Fill strategy - 'forward', 'backward', 'mean', 'min', 'max', + 'zero', 'one'. + + Returns: + Self for method chaining. """ return self._register( self._col_fill_null, @@ -111,7 +137,11 @@ def col_fill_null( ) def _col_fill_null( - self, data: pl.DataFrame, column: str, value: Any, strategy: str | None + self, + data: pl.DataFrame, + column: str, + value: Any, # noqa: ANN401 + strategy: str | None, ) -> pl.DataFrame: if strategy is not None: return data.with_columns(pl.col(column).fill_null(strategy=strategy)) @@ -122,6 +152,9 @@ def col_drop_null(self, columns: str | Sequence[str] | None = None) -> Self: Args: columns: Column(s) to check for nulls. If None, checks all columns. + + Returns: + Self for method chaining. """ if isinstance(columns, str): columns = [columns] @@ -133,7 +166,11 @@ def _col_drop_null( return data.drop_nulls(subset=columns) def col_drop_zero(self, column: str) -> Self: - """Drop rows where the specified column is zero.""" + """Drop rows where the specified column is zero. + + Returns: + Self for method chaining. + """ return self._register(self._col_drop_zero, {"column": column}) def _col_drop_zero(self, data: pl.DataFrame, column: str) -> pl.DataFrame: @@ -142,8 +179,8 @@ def _col_drop_zero(self, data: pl.DataFrame, column: str) -> pl.DataFrame: def col_add( self, new_column: str, - expr: str | int | float | None = None, - value: Any = None, + expr: str | float | None = None, + value: Any = None, # noqa: ANN401 ) -> Self: """Add a new column with a constant value or expression. @@ -151,13 +188,20 @@ def col_add( new_column: Name of the new column. expr: Column name to copy from, or None for constant value. value: Constant value to fill the column with. + + Returns: + Self for method chaining. """ return self._register( self._col_add, {"new_column": new_column, "expr": expr, "value": value} ) def _col_add( - self, data: pl.DataFrame, new_column: str, expr: str | None, value: Any + self, + data: pl.DataFrame, + new_column: str, + expr: str | None, + value: Any, # noqa: ANN401 ) -> pl.DataFrame: if expr is not None: return data.with_columns(pl.col(expr).alias(new_column)) @@ -169,6 +213,9 @@ def col_add_uuid(self, column: str, length: int = 16) -> Self: Args: column: Name of the new column. length: Length of the identifier string. + + Returns: + Self for method chaining. """ return self._register(self._col_add_uuid, {"column": column, "length": length}) @@ -194,6 +241,9 @@ def col_hash( columns: Column(s) to hash. new_column: Name for the hash column. salt: Optional salt to add to the hash. + + Returns: + Self for method chaining. """ if isinstance(columns, str): columns = [columns] @@ -223,6 +273,9 @@ def col_coalesce( Args: columns: Columns to coalesce (in priority order). new_column: Name for the result column. + + Returns: + Self for method chaining. """ return self._register( self._col_coalesce, {"columns": list(columns), "new_column": new_column} @@ -240,6 +293,9 @@ def col_select(self, columns: Sequence[str]) -> Self: Args: columns: Columns to keep. + + Returns: + Self for method chaining. """ return self._register(self._col_select, {"columns": list(columns)}) diff --git a/transformplan/ops/datetime.py b/transformplan/ops/datetime.py index 11a9230..35db87d 100644 --- a/transformplan/ops/datetime.py +++ b/transformplan/ops/datetime.py @@ -43,7 +43,6 @@ if TYPE_CHECKING: from typing import Any, Callable - import polars as pl from typing_extensions import Self @@ -64,6 +63,9 @@ def dt_year(self, column: str, new_column: str | None = None) -> Self: Args: column: Source datetime column. new_column: Name for result column (None = modify in place). + + Returns: + Self for method chaining. """ return self._register( self._dt_year, {"column": column, "new_column": new_column or column} @@ -80,6 +82,9 @@ def dt_month(self, column: str, new_column: str | None = None) -> Self: Args: column: Source datetime column. new_column: Name for result column (None = modify in place). + + Returns: + Self for method chaining. """ return self._register( self._dt_month, {"column": column, "new_column": new_column or column} @@ -96,6 +101,9 @@ def dt_day(self, column: str, new_column: str | None = None) -> Self: Args: column: Source datetime column. new_column: Name for result column (None = modify in place). + + Returns: + Self for method chaining. """ return self._register( self._dt_day, {"column": column, "new_column": new_column or column} @@ -110,6 +118,9 @@ def dt_week(self, column: str, new_column: str | None = None) -> Self: Args: column: Source datetime column. new_column: Name for result column (None = modify in place). + + Returns: + Self for method chaining. """ return self._register( self._dt_week, {"column": column, "new_column": new_column or column} @@ -126,6 +137,9 @@ def dt_quarter(self, column: str, new_column: str | None = None) -> Self: Args: column: Source datetime column. new_column: Name for result column (None = modify in place). + + Returns: + Self for method chaining. """ return self._register( self._dt_quarter, {"column": column, "new_column": new_column or column} @@ -143,6 +157,9 @@ def dt_year_month(self, column: str, new_column: str, fmt: str = "%Y-%m") -> Sel column: Source datetime column. new_column: Name for result column. fmt: Output format string. + + Returns: + Self for method chaining. """ return self._register( self._dt_year_month, @@ -160,6 +177,9 @@ def dt_quarter_year(self, column: str, new_column: str) -> Self: Args: column: Source datetime column. new_column: Name for result column. + + Returns: + Self for method chaining. """ return self._register( self._dt_quarter_year, {"column": column, "new_column": new_column} @@ -183,6 +203,9 @@ def dt_calendar_week(self, column: str, new_column: str) -> Self: Args: column: Source datetime column. new_column: Name for result column. + + Returns: + Self for method chaining. """ return self._register( self._dt_calendar_week, {"column": column, "new_column": new_column} @@ -211,6 +234,9 @@ def dt_parse( column: Source string column. fmt: Date format string. new_column: Name for result column (None = modify in place). + + Returns: + Self for method chaining. """ return self._register( self._dt_parse, @@ -231,6 +257,9 @@ def dt_format(self, column: str, fmt: str, new_column: str | None = None) -> Sel column: Source datetime column. fmt: Output format string. new_column: Name for result column (None = modify in place). + + Returns: + Self for method chaining. """ return self._register( self._dt_format, @@ -249,6 +278,9 @@ def dt_diff_days(self, column_a: str, column_b: str, new_column: str) -> Self: column_a: First date column. column_b: Second date column. new_column: Name for result column. + + Returns: + Self for method chaining. """ return self._register( self._dt_diff_days, @@ -274,6 +306,9 @@ def dt_age_years( birth_column: Column containing birth dates. reference_column: Column containing reference dates (None = today). new_column: Name for result column. + + Returns: + Self for method chaining. """ return self._register( self._dt_age_years, @@ -316,6 +351,9 @@ def dt_truncate( column: Source datetime column. every: Truncation interval ('1d', '1mo', '1y', '1h', etc.). new_column: Name for result column (None = modify in place). + + Returns: + Self for method chaining. """ return self._register( self._dt_truncate, @@ -343,6 +381,9 @@ def dt_is_between( end: End date (string, will be parsed). new_column: Name for boolean result column. closed: Which endpoints to include ('both', 'left', 'right', 'none'). + + Returns: + Self for method chaining. """ return self._register( self._dt_is_between, diff --git a/transformplan/ops/map.py b/transformplan/ops/map.py index 5c3f544..c01eb1a 100644 --- a/transformplan/ops/map.py +++ b/transformplan/ops/map.py @@ -34,7 +34,6 @@ if TYPE_CHECKING: from typing import Any, Callable - import polars as pl from typing_extensions import Self @@ -53,7 +52,8 @@ def map_values( self, column: str, mapping: dict[Any, Any], - default: Any = None, + default: Any = None, # noqa: ANN401 + *, keep_unmapped: bool = True, ) -> Self: """Map values in a column using a dictionary. @@ -63,6 +63,9 @@ def map_values( mapping: Dictionary mapping old values to new values. default: Default value for unmapped values (if keep_unmapped=False). keep_unmapped: If True, keep original value when not in mapping. + + Returns: + Self for method chaining. """ return self._register( self._map_values, @@ -79,8 +82,8 @@ def _map_values( data: pl.DataFrame, column: str, mapping: dict[Any, Any], - default: Any, - keep_unmapped: bool, + default: Any, # noqa: ANN401 + keep_unmapped: bool, # noqa: FBT001 ) -> pl.DataFrame: # Build a when/then chain for the mapping expr = pl.col(column) @@ -109,6 +112,7 @@ def map_discretize( bins: Sequence[float], labels: Sequence[str] | None = None, new_column: str | None = None, + *, right: bool = True, ) -> Self: """Discretize a numeric column into bins/categories. @@ -119,6 +123,9 @@ def map_discretize( labels: Labels for each bin (must be len(bins)+1 if provided). new_column: Name for result column (None = modify in place). right: If True, bins are (left, right]. If False, [left, right). + + Returns: + Self for method chaining. """ return self._register( self._map_discretize, @@ -138,12 +145,12 @@ def _map_discretize( bins: list[float], labels: list[str] | None, new_column: str, - right: bool, + right: bool, # noqa: FBT001 ) -> pl.DataFrame: # Create labels if not provided if labels is None: labels = [] - edges = [-float("inf")] + bins + [float("inf")] + edges = [-float("inf"), *bins, float("inf")] for i in range(len(edges) - 1): if right: labels.append(f"({edges[i]}, {edges[i + 1]}]") @@ -152,7 +159,7 @@ def _map_discretize( # Build when/then chain col = pl.col(column) - edges = [-float("inf")] + bins + [float("inf")] + edges = [-float("inf"), *bins, float("inf")] # First bin if right: @@ -174,31 +181,49 @@ def _map_discretize( return data.with_columns(chain.alias(new_column)) def map_bool_to_int(self, column: str) -> Self: - """Convert a boolean column to integer (True=1, False=0).""" + """Convert a boolean column to integer (True=1, False=0). + + Returns: + Self for method chaining. + """ return self._register(self._map_bool_to_int, {"column": column}) def _map_bool_to_int(self, data: pl.DataFrame, column: str) -> pl.DataFrame: return data.with_columns(pl.col(column).cast(pl.Int64)) - def map_null_to_value(self, column: str, value: Any) -> Self: - """Replace null values with a specific value.""" + def map_null_to_value(self, column: str, value: Any) -> Self: # noqa: ANN401 + """Replace null values with a specific value. + + Returns: + Self for method chaining. + """ return self._register( self._map_null_to_value, {"column": column, "value": value} ) def _map_null_to_value( - self, data: pl.DataFrame, column: str, value: Any + self, + data: pl.DataFrame, + column: str, + value: Any, # noqa: ANN401 ) -> pl.DataFrame: return data.with_columns(pl.col(column).fill_null(value)) - def map_value_to_null(self, column: str, value: Any) -> Self: - """Replace a specific value with null.""" + def map_value_to_null(self, column: str, value: Any) -> Self: # noqa: ANN401 + """Replace a specific value with null. + + Returns: + Self for method chaining. + """ return self._register( self._map_value_to_null, {"column": column, "value": value} ) def _map_value_to_null( - self, data: pl.DataFrame, column: str, value: Any + self, + data: pl.DataFrame, + column: str, + value: Any, # noqa: ANN401 ) -> pl.DataFrame: return data.with_columns( pl.when(pl.col(column) == value) @@ -211,7 +236,7 @@ def map_case( self, column: str, cases: list[tuple[Any, Any]], - default: Any = None, + default: Any = None, # noqa: ANN401 new_column: str | None = None, ) -> Self: """Apply case-when logic to a column. @@ -222,6 +247,9 @@ def map_case( default: Default value if no case matches. new_column: Name for result column (None = modify in place). + Returns: + Self for method chaining. + Example: .map_case('grade', [(90, 'A'), (80, 'B'), (70, 'C')], default='F') Maps: >= 90 -> A, >= 80 -> B, >= 70 -> C, else F @@ -241,7 +269,7 @@ def _map_case( data: pl.DataFrame, column: str, cases: list[tuple[Any, Any]], - default: Any, + default: Any, # noqa: ANN401 new_column: str, ) -> pl.DataFrame: if not cases: @@ -263,7 +291,7 @@ def map_from_column( lookup_column: str, value_column: str, new_column: str | None = None, - default: Any = None, + default: Any = None, # noqa: ANN401 ) -> Self: """Map values using another column as lookup (like vlookup). @@ -276,6 +304,9 @@ def map_from_column( value_column: Column containing values to map to. new_column: Name for result column (None = modify in place). default: Default value if lookup fails. + + Returns: + Self for method chaining. """ return self._register( self._map_from_column, @@ -295,10 +326,16 @@ def _map_from_column( lookup_column: str, value_column: str, new_column: str, - default: Any, + default: Any, # noqa: ANN401 ) -> pl.DataFrame: # Build lookup dict from the data - lookup = dict(zip(data[lookup_column].to_list(), data[value_column].to_list())) + lookup = dict( + zip( + data[lookup_column].to_list(), + data[value_column].to_list(), + strict=False, + ) + ) return data.with_columns( pl.col(column).replace(lookup, default=default).alias(new_column) diff --git a/transformplan/ops/math.py b/transformplan/ops/math.py index 2de431c..cc8e78a 100644 --- a/transformplan/ops/math.py +++ b/transformplan/ops/math.py @@ -41,7 +41,6 @@ if TYPE_CHECKING: from typing import Any, Callable - import polars as pl from typing_extensions import Self Numeric = Union[int, float] @@ -59,7 +58,11 @@ def _register( ) -> Self: ... def math_add(self, column: str, value: Numeric) -> Self: - """Add a scalar value to a column.""" + """Add a scalar value to a column. + + Returns: + Self for method chaining. + """ return self._register(self._math_add, {"column": column, "value": value}) def _math_add( @@ -68,7 +71,11 @@ def _math_add( return data.with_columns(pl.col(column) + value) def math_subtract(self, column: str, value: Numeric) -> Self: - """Subtract a scalar value from a column.""" + """Subtract a scalar value from a column. + + Returns: + Self for method chaining. + """ return self._register(self._math_subtract, {"column": column, "value": value}) def _math_subtract( @@ -77,7 +84,11 @@ def _math_subtract( return data.with_columns(pl.col(column) - value) def math_multiply(self, column: str, value: Numeric) -> Self: - """Multiply a column by a scalar value.""" + """Multiply a column by a scalar value. + + Returns: + Self for method chaining. + """ return self._register(self._math_multiply, {"column": column, "value": value}) def _math_multiply( @@ -86,7 +97,11 @@ def _math_multiply( return data.with_columns(pl.col(column) * value) def math_divide(self, column: str, value: Numeric) -> Self: - """Divide a column by a scalar value.""" + """Divide a column by a scalar value. + + Returns: + Self for method chaining. + """ return self._register(self._math_divide, {"column": column, "value": value}) def _math_divide( @@ -100,7 +115,11 @@ def math_clamp( lower: Numeric | None = None, upper: Numeric | None = None, ) -> Self: - """Clamp column values to a range.""" + """Clamp column values to a range. + + Returns: + Self for method chaining. + """ return self._register( self._math_clamp, {"column": column, "lower": lower, "upper": upper} ) @@ -115,7 +134,11 @@ def _math_clamp( return data.with_columns(pl.col(column).clip(lower, upper)) def math_add_columns(self, column_a: str, column_b: str, new_column: str) -> Self: - """Add two columns together into a new column.""" + """Add two columns together into a new column. + + Returns: + Self for method chaining. + """ return self._register( self._math_add_columns, {"column_a": column_a, "column_b": column_b, "new_column": new_column}, @@ -131,7 +154,11 @@ def _math_add_columns( def math_subtract_columns( self, column_a: str, column_b: str, new_column: str ) -> Self: - """Subtract column_b from column_a into a new column.""" + """Subtract column_b from column_a into a new column. + + Returns: + Self for method chaining. + """ return self._register( self._math_subtract_columns, {"column_a": column_a, "column_b": column_b, "new_column": new_column}, @@ -147,7 +174,11 @@ def _math_subtract_columns( def math_multiply_columns( self, column_a: str, column_b: str, new_column: str ) -> Self: - """Multiply two columns together into a new column.""" + """Multiply two columns together into a new column. + + Returns: + Self for method chaining. + """ return self._register( self._math_multiply_columns, {"column_a": column_a, "column_b": column_b, "new_column": new_column}, @@ -163,7 +194,11 @@ def _math_multiply_columns( def math_divide_columns( self, column_a: str, column_b: str, new_column: str ) -> Self: - """Divide column_a by column_b into a new column.""" + """Divide column_a by column_b into a new column. + + Returns: + Self for method chaining. + """ return self._register( self._math_divide_columns, {"column_a": column_a, "column_b": column_b, "new_column": new_column}, @@ -177,7 +212,11 @@ def _math_divide_columns( ) def math_set_min(self, column: str, min_value: Numeric) -> Self: - """Set a minimum value for a column (values below are raised to min).""" + """Set a minimum value for a column (values below are raised to min). + + Returns: + Self for method chaining. + """ return self._register( self._math_set_min, {"column": column, "min_value": min_value} ) @@ -193,7 +232,11 @@ def _math_set_min( ) def math_set_max(self, column: str, max_value: Numeric) -> Self: - """Set a maximum value for a column (values above are lowered to max).""" + """Set a maximum value for a column (values above are lowered to max). + + Returns: + Self for method chaining. + """ return self._register( self._math_set_max, {"column": column, "max_value": max_value} ) @@ -209,14 +252,22 @@ def _math_set_max( ) def math_abs(self, column: str) -> Self: - """Take absolute value of a column.""" + """Take absolute value of a column. + + Returns: + Self for method chaining. + """ return self._register(self._math_abs, {"column": column}) def _math_abs(self, data: pl.DataFrame, column: str) -> pl.DataFrame: return data.with_columns(pl.col(column).abs()) def math_round(self, column: str, decimals: int = 0) -> Self: - """Round a column to specified decimal places.""" + """Round a column to specified decimal places. + + Returns: + Self for method chaining. + """ return self._register( self._math_round, {"column": column, "decimals": decimals} ) @@ -240,6 +291,9 @@ def math_percent_of( total_column: Denominator column. new_column: Name for result column. multiply_by: Multiplier (default 100 for percentage). + + Returns: + Self for method chaining. """ return self._register( self._math_percent_of, @@ -275,6 +329,9 @@ def math_cumsum( column: Column to sum. new_column: Name for result column (None = modify in place). group_by: Optional column(s) to group by. + + Returns: + Self for method chaining. """ if isinstance(group_by, str): group_by = [group_by] @@ -305,6 +362,7 @@ def math_rank( column: str, new_column: str, method: str = "ordinal", + *, descending: bool = False, group_by: str | list[str] | None = None, ) -> Self: @@ -316,6 +374,9 @@ def math_rank( method: Ranking method ('ordinal', 'dense', 'min', 'max', 'average'). descending: Rank in descending order. group_by: Optional column(s) to group by. + + Returns: + Self for method chaining. """ if isinstance(group_by, str): group_by = [group_by] @@ -336,7 +397,7 @@ def _math_rank( column: str, new_column: str, method: str, - descending: bool, + descending: bool, # noqa: FBT001 group_by: list[str] | None, ) -> pl.DataFrame: expr = pl.col(column).rank(method=method, descending=descending) diff --git a/transformplan/ops/rows.py b/transformplan/ops/rows.py index 8d0b079..f716b7e 100644 --- a/transformplan/ops/rows.py +++ b/transformplan/ops/rows.py @@ -43,7 +43,6 @@ if TYPE_CHECKING: from typing import Any, Callable - import polars as pl from typing_extensions import Self @@ -59,7 +58,11 @@ def _register( ) -> Self: ... def rows_drop_nulls(self, columns: str | Sequence[str] | None = None) -> Self: - """Drop rows with null values in specified columns (or any column if None).""" + """Drop rows with null values in specified columns (or any column if None). + + Returns: + Self for method chaining. + """ if isinstance(columns, str): columns = [columns] return self._register(self._rows_drop_nulls, {"columns": columns}) @@ -74,7 +77,11 @@ def rows_unique( columns: str | Sequence[str] | None = None, keep: Literal["first", "last", "any", "none"] = "first", ) -> Self: - """Keep unique rows based on specified columns.""" + """Keep unique rows based on specified columns. + + Returns: + Self for method chaining. + """ if isinstance(columns, str): columns = [columns] return self._register(self._rows_unique, {"columns": columns, "keep": keep}) @@ -87,38 +94,38 @@ def _rows_unique( ) -> pl.DataFrame: return data.unique(subset=columns, keep=keep) - def rows_filter(self, filter: Filter | dict) -> Self: + def rows_filter(self, filter: Filter | dict[str, Any]) -> Self: """Filter rows using a serializable Filter expression. + Returns: + Self for method chaining. + Example: from transformplan.filters import Col .rows_filter(Col("age") > 18) .rows_filter((Col("status") == "active") & (Col("score") >= 50)) """ - if isinstance(filter, dict): - filter_dict = filter - else: - filter_dict = filter.to_dict() + filter_dict = filter if isinstance(filter, dict) else filter.to_dict() return self._register(self._rows_filter, {"filter": filter_dict}) - def _rows_filter(self, data: pl.DataFrame, filter: dict) -> pl.DataFrame: + def _rows_filter(self, data: pl.DataFrame, filter: dict[str, Any]) -> pl.DataFrame: expr = Filter.from_dict(filter).to_expr() return data.filter(expr) - def rows_drop(self, filter: Filter | dict) -> Self: + def rows_drop(self, filter: Filter | dict[str, Any]) -> Self: """Drop rows matching a filter (inverse of rows_filter). + Returns: + Self for method chaining. + Example: .rows_drop(Col("status") == "deleted") """ - if isinstance(filter, dict): - filter_dict = filter - else: - filter_dict = filter.to_dict() + filter_dict = filter if isinstance(filter, dict) else filter.to_dict() return self._register(self._rows_drop, {"filter": filter_dict}) - def _rows_drop(self, data: pl.DataFrame, filter: dict) -> pl.DataFrame: + def _rows_drop(self, data: pl.DataFrame, filter: dict[str, Any]) -> pl.DataFrame: expr = Filter.from_dict(filter).to_expr() return data.filter(~expr) @@ -127,6 +134,7 @@ def rows_deduplicate( columns: str | Sequence[str], sort_by: str, keep: Literal["first", "last"] = "first", + *, descending: bool = False, ) -> Self: """Deduplicate rows by keeping first/last based on sort order. @@ -136,6 +144,9 @@ def rows_deduplicate( sort_by: Column to sort by before deduplication. keep: Keep 'first' or 'last' after sorting. descending: Sort in descending order. + + Returns: + Self for method chaining. """ if isinstance(columns, str): columns = [columns] @@ -155,13 +166,17 @@ def _rows_deduplicate( columns: list[str], sort_by: str, keep: Literal["first", "last"], - descending: bool, + descending: bool, # noqa: FBT001 ) -> pl.DataFrame: sorted_data = data.sort(sort_by, descending=descending) return sorted_data.unique(subset=columns, keep=keep, maintain_order=True) def rows_explode(self, column: str) -> Self: - """Explode a list column into multiple rows.""" + """Explode a list column into multiple rows. + + Returns: + Self for method chaining. + """ return self._register(self._rows_explode, {"column": column}) def _rows_explode(self, data: pl.DataFrame, column: str) -> pl.DataFrame: @@ -181,6 +196,9 @@ def rows_melt( value_columns: Columns to unpivot. variable_name: Name for the variable column. value_name: Name for the value column. + + Returns: + Self for method chaining. """ return self._register( self._rows_melt, @@ -219,6 +237,9 @@ def rows_sample( n: Number of rows to sample. fraction: Fraction of rows to sample (0.0 to 1.0). seed: Random seed for reproducibility. + + Returns: + Self for method chaining. """ return self._register( self._rows_sample, {"n": n, "fraction": fraction, "seed": seed} @@ -234,14 +255,22 @@ def _rows_sample( return data.sample(n=n, fraction=fraction, seed=seed) def rows_head(self, n: int = 5) -> Self: - """Keep only the first n rows.""" + """Keep only the first n rows. + + Returns: + Self for method chaining. + """ return self._register(self._rows_head, {"n": n}) def _rows_head(self, data: pl.DataFrame, n: int) -> pl.DataFrame: return data.head(n) def rows_tail(self, n: int = 5) -> Self: - """Keep only the last n rows.""" + """Keep only the last n rows. + + Returns: + Self for method chaining. + """ return self._register(self._rows_tail, {"n": n}) def _rows_tail(self, data: pl.DataFrame, n: int) -> pl.DataFrame: @@ -250,6 +279,7 @@ def _rows_tail(self, data: pl.DataFrame, n: int) -> pl.DataFrame: def rows_sort( self, by: str | Sequence[str], + *, descending: bool | Sequence[bool] = False, ) -> Self: """Sort rows by one or more columns. @@ -257,6 +287,9 @@ def rows_sort( Args: by: Column(s) to sort by. descending: Sort direction (single bool or list matching columns). + + Returns: + Self for method chaining. """ if isinstance(by, str): by = [by] @@ -265,16 +298,20 @@ def rows_sort( ) def _rows_sort( - self, data: pl.DataFrame, by: list[str], descending: bool | Sequence[bool] + self, + data: pl.DataFrame, + by: list[str], + descending: bool | Sequence[bool], # noqa: FBT001 ) -> pl.DataFrame: return data.sort(by, descending=descending) def rows_flag( self, - filter: Filter | dict, + filter: Filter | dict[str, Any], new_column: str, - true_value: Any = True, - false_value: Any = False, + *, + true_value: Any = True, # noqa: ANN401 + false_value: Any = False, # noqa: ANN401 ) -> Self: """Add a flag column based on a filter condition (without dropping rows). @@ -283,11 +320,11 @@ def rows_flag( new_column: Name for the flag column. true_value: Value when condition is True. false_value: Value when condition is False. + + Returns: + Self for method chaining. """ - if isinstance(filter, dict): - filter_dict = filter - else: - filter_dict = filter.to_dict() + filter_dict = filter if isinstance(filter, dict) else filter.to_dict() return self._register( self._rows_flag, { @@ -301,10 +338,10 @@ def rows_flag( def _rows_flag( self, data: pl.DataFrame, - filter: dict, + filter: dict[str, Any], new_column: str, - true_value: Any, - false_value: Any, + true_value: Any, # noqa: ANN401 + false_value: Any, # noqa: ANN401 ) -> pl.DataFrame: expr = Filter.from_dict(filter).to_expr() return data.with_columns( @@ -327,7 +364,11 @@ def rows_pivot( index: Column(s) to use as row identifiers. columns: Column whose unique values become new columns. values: Column containing values to fill. - aggregate_function: How to aggregate ('first', 'sum', 'mean', 'count', etc.). + aggregate_function: How to aggregate ('first', 'sum', 'mean', 'count', + etc.). + + Returns: + Self for method chaining. """ if isinstance(index, str): index = [index] diff --git a/transformplan/ops/string.py b/transformplan/ops/string.py index ea925be..6d48346 100644 --- a/transformplan/ops/string.py +++ b/transformplan/ops/string.py @@ -35,7 +35,6 @@ if TYPE_CHECKING: from typing import Any, Callable - import polars as pl from typing_extensions import Self @@ -55,6 +54,7 @@ def str_replace( column: str, pattern: str, replacement: str, + *, literal: bool = True, ) -> Self: """Replace occurrences of a pattern in a string column. @@ -64,6 +64,9 @@ def str_replace( pattern: Pattern to search for. replacement: String to replace with. literal: If True, treat pattern as literal string. If False, treat as regex. + + Returns: + Self for method chaining. """ return self._register( self._str_replace, @@ -81,7 +84,7 @@ def _str_replace( column: str, pattern: str, replacement: str, - literal: bool, + literal: bool, # noqa: FBT001 ) -> pl.DataFrame: return data.with_columns( pl.col(column).str.replace_all(pattern, replacement, literal=literal) @@ -99,6 +102,9 @@ def str_slice( column: Column to modify. offset: Start position (0-indexed, negative counts from end). length: Number of characters to extract (None = to end). + + Returns: + Self for method chaining. """ return self._register( self._str_slice, {"column": column, "offset": offset, "length": length} @@ -116,6 +122,9 @@ def str_truncate(self, column: str, max_length: int, suffix: str = "...") -> Sel column: Column to modify. max_length: Maximum length of the string (including suffix). suffix: Suffix to append to truncated strings. + + Returns: + Self for method chaining. """ return self._register( self._str_truncate, @@ -138,6 +147,7 @@ def str_split( column: str, separator: str, new_columns: list[str] | None = None, + *, keep_original: bool = False, ) -> Self: """Split a string column by separator. @@ -147,6 +157,9 @@ def str_split( separator: String to split on. new_columns: Names for the resulting columns. If None, explodes into rows. keep_original: Whether to keep the original column. + + Returns: + Self for method chaining. """ return self._register( self._str_split, @@ -164,7 +177,7 @@ def _str_split( column: str, separator: str, new_columns: list[str] | None, - keep_original: bool, + keep_original: bool, # noqa: FBT001 ) -> pl.DataFrame: if new_columns is None: # Explode into rows @@ -183,14 +196,22 @@ def _str_split( return result def str_lower(self, column: str) -> Self: - """Convert string column to lowercase.""" + """Convert string column to lowercase. + + Returns: + Self for method chaining. + """ return self._register(self._str_lower, {"column": column}) def _str_lower(self, data: pl.DataFrame, column: str) -> pl.DataFrame: return data.with_columns(pl.col(column).str.to_lowercase()) def str_upper(self, column: str) -> Self: - """Convert string column to uppercase.""" + """Convert string column to uppercase. + + Returns: + Self for method chaining. + """ return self._register(self._str_upper, {"column": column}) def _str_upper(self, data: pl.DataFrame, column: str) -> pl.DataFrame: @@ -202,6 +223,9 @@ def str_strip(self, column: str, chars: str | None = None) -> Self: Args: column: Column to modify. chars: Characters to strip (None = whitespace). + + Returns: + Self for method chaining. """ return self._register(self._str_strip, {"column": column, "chars": chars}) @@ -226,6 +250,9 @@ def str_pad( length: Target length. fill_char: Character to pad with. side: 'left' or 'right'. + + Returns: + Self for method chaining. """ return self._register( self._str_pad, @@ -251,6 +278,9 @@ def str_concat( columns: Columns to concatenate. new_column: Name for the new column. separator: Separator between values. + + Returns: + Self for method chaining. """ return self._register( self._str_concat, @@ -280,6 +310,9 @@ def str_extract( pattern: Regex pattern with capture group(s). group_index: Which capture group to extract (1-indexed). new_column: Name for result column (None = modify in place). + + Returns: + Self for method chaining. """ return self._register( self._str_extract, diff --git a/transformplan/protocol.py b/transformplan/protocol.py index 8f227e9..f210ffa 100644 --- a/transformplan/protocol.py +++ b/transformplan/protocol.py @@ -66,6 +66,7 @@ class Protocol: VERSION = "1.0" def __init__(self) -> None: + """Initialize an empty Protocol.""" self._steps: list[dict[str, Any]] = [] self._input_hash: str | None = None self._input_shape: tuple[int, int] | None = None @@ -79,7 +80,7 @@ def set_input(self, hash_value: str, shape: tuple[int, int]) -> None: self._input_hash = hash_value self._input_shape = shape - def set_metadata(self, **kwargs: Any) -> None: + def set_metadata(self, **kwargs: Any) -> None: # noqa: ANN401 """Set arbitrary metadata on the protocol. Example: @@ -96,6 +97,16 @@ def add_step( elapsed: float, output_hash: str, ) -> None: + """Record a transformation step in the protocol. + + Args: + operation: Name of the operation. + params: Operation parameters. + old_shape: Shape before operation (rows, cols). + new_shape: Shape after operation (rows, cols). + elapsed: Time taken in seconds. + output_hash: Hash of the output DataFrame. + """ self._steps.append( { "step": len(self._steps) + 1, @@ -112,22 +123,39 @@ def add_step( @property def input_hash(self) -> str | None: - """Hash of the input DataFrame.""" + """Hash of the input DataFrame. + + Returns: + Hash string or None if not set. + """ return self._input_hash @property def output_hash(self) -> str | None: - """Hash of the final output DataFrame.""" + """Hash of the final output DataFrame. + + Returns: + Hash string or None if no steps. + """ if not self._steps: return self._input_hash return self._steps[-1]["output_hash"] @property def metadata(self) -> dict[str, Any]: - """Protocol metadata.""" + """Protocol metadata. + + Returns: + Dictionary of metadata. + """ return self._metadata def to_dataframe(self) -> pl.DataFrame: + """Convert protocol to a Polars DataFrame. + + Returns: + DataFrame with step information. + """ rows = [] # Step 0: input state @@ -177,25 +205,29 @@ def to_csv(self, path: str | Path) -> None: } ) - for step in self._steps: - rows.append( - { - "step": step["step"], - "operation": step["operation"], - "params": json.dumps(step["params"]) if step["params"] else None, - "old_shape": str(list(step["old_shape"])), - "new_shape": str(list(step["new_shape"])), - "rows_changed": step["rows_changed"], - "cols_changed": step["cols_changed"], - "elapsed_seconds": step["elapsed_seconds"], - "output_hash": step["output_hash"], - } - ) + rows.extend( + { + "step": step["step"], + "operation": step["operation"], + "params": json.dumps(step["params"]) if step["params"] else None, + "old_shape": str(list(step["old_shape"])), + "new_shape": str(list(step["new_shape"])), + "rows_changed": step["rows_changed"], + "cols_changed": step["cols_changed"], + "elapsed_seconds": step["elapsed_seconds"], + "output_hash": step["output_hash"], + } + for step in self._steps + ) pl.DataFrame(rows).write_csv(path) def to_dict(self) -> dict[str, Any]: - """Serialize protocol to a dictionary.""" + """Serialize protocol to a dictionary. + + Returns: + Dictionary representation of the protocol. + """ return { "version": self.VERSION, "created_at": self._created_at, @@ -222,7 +254,11 @@ def to_dict(self) -> dict[str, Any]: @classmethod def from_dict(cls, data: dict[str, Any]) -> Protocol: - """Deserialize protocol from a dictionary.""" + """Deserialize protocol from a dictionary. + + Returns: + Protocol instance. + """ protocol = cls() protocol._created_at = data.get("created_at", protocol._created_at) protocol._metadata = data.get("metadata", {}) @@ -288,12 +324,22 @@ def from_json(cls, source: str | Path) -> Protocol: return cls.from_dict(json.loads(content)) def __repr__(self) -> str: + """Return string representation. + + Returns: + Human-readable representation. + """ return f"Protocol({len(self._steps)} steps)" def __len__(self) -> int: + """Return number of steps. + + Returns: + Number of transformation steps. + """ return len(self._steps) - def summary(self, show_params: bool = True) -> str: + def summary(self, *, show_params: bool = True) -> str: # noqa: C901 """Generate a clean, human-readable summary of the protocol. Args: @@ -305,9 +351,7 @@ def summary(self, show_params: bool = True) -> str: lines = [] # Header - lines.append("=" * 70) - lines.append("TRANSFORM PROTOCOL") - lines.append("=" * 70) + lines.extend(("=" * 70, "TRANSFORM PROTOCOL", "=" * 70)) # Metadata if self._metadata: @@ -318,7 +362,7 @@ def summary(self, show_params: bool = True) -> str: # Input info if self._input_hash: shape_str = ( - f"{self._input_shape[0]} rows × {self._input_shape[1]} cols" + f"{self._input_shape[0]} rows x {self._input_shape[1]} cols" if self._input_shape else "unknown" ) @@ -327,20 +371,20 @@ def summary(self, show_params: bool = True) -> str: # Output info if self._steps: final = self._steps[-1] - shape_str = f"{final['new_shape'][0]} rows × {final['new_shape'][1]} cols" + shape_str = f"{final['new_shape'][0]} rows x {final['new_shape'][1]} cols" lines.append(f"Output: {shape_str} [{final['output_hash']}]") # Total time total_time = sum(s["elapsed_seconds"] for s in self._steps) - lines.append(f"Total time: {total_time:.4f}s") - lines.append("-" * 70) - - # Steps - lines.append("") - lines.append( - f"{'#':<4} {'Operation':<20} {'Rows':<12} {'Cols':<12} {'Time':<10} {'Hash':<16}" + lines.extend( + [ + f"Total time: {total_time:.4f}s", + "-" * 70, + "", + f"{'#':<4} {'Operation':<20} {'Rows':<12} {'Cols':<12} {'Time':<10} {'Hash':<16}", + "-" * 70, + ] ) - lines.append("-" * 70) # Input row if self._input_hash: @@ -404,16 +448,17 @@ def summary(self, show_params: bool = True) -> str: return "\n".join(lines) - def _format_params(self, params: dict, max_length: int = 60) -> str: - """Format params dict as a readable string.""" + def _format_params(self, params: dict[str, Any], max_length: int = 60) -> str: + """Format params dict as a readable string. + + Returns: + Formatted string representation. + """ parts = [] for key, value in params.items(): if isinstance(value, dict): # Nested dict (like filter) - show type or summarize - if "type" in value: - value_str = self._format_filter(value) - else: - value_str = "{...}" + value_str = self._format_filter(value) if "type" in value else "{...}" elif isinstance(value, list) and len(value) > 3: value_str = f"[{value[0]}, {value[1]}, ... ({len(value)} items)]" else: @@ -425,8 +470,12 @@ def _format_params(self, params: dict, max_length: int = 60) -> str: result = result[: max_length - 3] + "..." return result - def _format_filter(self, filter_dict: dict) -> str: - """Format a filter dict as a readable expression.""" + def _format_filter(self, filter_dict: dict[str, Any]) -> str: # noqa: C901 + """Format a filter dict as a readable expression. + + Returns: + Human-readable filter expression. + """ filter_type = filter_dict.get("type", "") if filter_type in ("and", "or"): @@ -434,10 +483,10 @@ def _format_filter(self, filter_dict: dict) -> str: right = self._format_filter(filter_dict["right"]) op = "&" if filter_type == "and" else "|" return f"({left} {op} {right})" - elif filter_type == "not": + if filter_type == "not": operand = self._format_filter(filter_dict["operand"]) return f"~{operand}" - elif filter_type in ("eq", "ne", "gt", "ge", "lt", "le"): + if filter_type in ("eq", "ne", "gt", "ge", "lt", "le"): col = filter_dict.get("column", "?") val = filter_dict.get("value", "?") op_map = { @@ -449,7 +498,7 @@ def _format_filter(self, filter_dict: dict) -> str: "le": "<=", } return f"{col} {op_map[filter_type]} {val!r}" - elif filter_type == "is_in": + if filter_type == "is_in": col = filter_dict.get("column", "?") values = filter_dict.get("values", []) if len(values) > 3: @@ -457,29 +506,29 @@ def _format_filter(self, filter_dict: dict) -> str: else: val_str = repr(values) return f"{col} in {val_str}" - elif filter_type == "is_null": + if filter_type == "is_null": return f"{filter_dict.get('column', '?')} is null" - elif filter_type == "is_not_null": + if filter_type == "is_not_null": return f"{filter_dict.get('column', '?')} is not null" - elif filter_type == "between": + if filter_type == "between": col = filter_dict.get("column", "?") lower = filter_dict.get("lower", "?") upper = filter_dict.get("upper", "?") return f"{col} between {lower!r} and {upper!r}" - elif filter_type.startswith("str_"): + if filter_type.startswith("str_"): col = filter_dict.get("column", "?") if filter_type == "str_contains": return f"{col}.contains({filter_dict.get('pattern', '?')!r})" - elif filter_type == "str_starts_with": + if filter_type == "str_starts_with": return f"{col}.starts_with({filter_dict.get('prefix', '?')!r})" - elif filter_type == "str_ends_with": + if filter_type == "str_ends_with": return f"{col}.ends_with({filter_dict.get('suffix', '?')!r})" return f"<{filter_type}>" - def print(self, show_params: bool = True) -> None: + def print(self, *, show_params: bool = True) -> None: """Print the protocol summary to stdout. Args: show_params: Whether to include operation parameters. """ - print(self.summary(show_params)) + print(self.summary(show_params=show_params)) # noqa: T201 diff --git a/transformplan/validation.py b/transformplan/validation.py index ac22aa8..e67d757 100644 --- a/transformplan/validation.py +++ b/transformplan/validation.py @@ -35,14 +35,10 @@ from __future__ import annotations from dataclasses import dataclass -from typing import TYPE_CHECKING, Any +from typing import Any import polars as pl -if TYPE_CHECKING: - from .filters import Filter - - # ============================================================================= # Type categories for validation # ============================================================================= @@ -68,27 +64,47 @@ def is_numeric(dtype: pl.DataType) -> bool: - """Check if dtype is numeric.""" + """Check if dtype is numeric. + + Returns: + True if dtype is numeric, False otherwise. + """ return dtype in NUMERIC_TYPES or dtype.base_type() in NUMERIC_TYPES def is_string(dtype: pl.DataType) -> bool: - """Check if dtype is string.""" + """Check if dtype is string. + + Returns: + True if dtype is string, False otherwise. + """ return dtype in STRING_TYPES or dtype.base_type() in STRING_TYPES def is_datetime(dtype: pl.DataType) -> bool: - """Check if dtype is datetime-related.""" + """Check if dtype is datetime-related. + + Returns: + True if dtype is datetime-related, False otherwise. + """ return dtype in DATETIME_TYPES or dtype.base_type() in DATETIME_TYPES def is_boolean(dtype: pl.DataType) -> bool: - """Check if dtype is boolean.""" + """Check if dtype is boolean. + + Returns: + True if dtype is boolean, False otherwise. + """ return dtype in BOOLEAN_TYPES or dtype.base_type() in BOOLEAN_TYPES def dtype_name(dtype: pl.DataType) -> str: - """Get a readable name for a dtype.""" + """Get a readable name for a dtype. + + Returns: + String representation of the dtype. + """ return str(dtype).split("(")[0] @@ -106,6 +122,11 @@ class ValidationError: message: str def __str__(self) -> str: + """Return error message string. + + Returns: + Formatted error message. + """ return f"Step {self.step} ({self.operation}): {self.message}" @@ -113,28 +134,48 @@ class ValidationResult: """Result of schema validation.""" def __init__(self) -> None: + """Initialize an empty validation result.""" self._errors: list[ValidationError] = [] def add_error(self, step: int, operation: str, message: str) -> None: + """Add a validation error.""" self._errors.append(ValidationError(step, operation, message)) @property def is_valid(self) -> bool: + """Check if validation passed. + + Returns: + True if no errors, False otherwise. + """ return len(self._errors) == 0 @property def errors(self) -> list[ValidationError]: + """Get list of validation errors. + + Returns: + List of ValidationError instances. + """ return self._errors def raise_if_invalid(self) -> None: - """Raise ValidationError if validation failed.""" + """Raise SchemaValidationError if validation failed. + + Raises: + SchemaValidationError: If validation failed with errors. + """ if not self.is_valid: error_messages = "\n".join(f" - {e}" for e in self._errors) - raise SchemaValidationError( - f"Schema validation failed with {len(self._errors)} error(s):\n{error_messages}" - ) + msg = f"Schema validation failed with {len(self._errors)} error(s):\n{error_messages}" + raise SchemaValidationError(msg) def __repr__(self) -> str: + """Return string representation of validation result. + + Returns: + Human-readable representation. + """ if self.is_valid: return "ValidationResult(valid=True)" return f"ValidationResult(valid=False, errors={len(self._errors)})" @@ -175,48 +216,83 @@ def __init__( steps: list[DryRunStep], validation: ValidationResult, ) -> None: + """Initialize DryRunResult. + + Args: + input_schema: Initial schema as column name to dtype mapping. + steps: List of dry run steps. + validation: Validation result with any errors. + """ self._input_schema = input_schema self._steps = steps self._validation = validation @property def is_valid(self) -> bool: - """Whether the pipeline passed validation.""" + """Whether the pipeline passed validation. + + Returns: + True if validation passed, False otherwise. + """ return self._validation.is_valid @property def errors(self) -> list[ValidationError]: - """Validation errors.""" + """Validation errors. + + Returns: + List of validation errors. + """ return self._validation.errors @property def steps(self) -> list[DryRunStep]: - """List of dry run steps.""" + """List of dry run steps. + + Returns: + List of DryRunStep instances. + """ return self._steps @property def input_schema(self) -> dict[str, pl.DataType]: - """Input schema.""" + """Input schema. + + Returns: + Dictionary mapping column names to dtypes. + """ return self._input_schema @property def output_schema(self) -> dict[str, str]: - """Predicted output schema after all operations.""" + """Predicted output schema after all operations. + + Returns: + Dictionary mapping column names to dtype names. + """ if self._steps: return self._steps[-1].schema_after return {k: dtype_name(v) for k, v in self._input_schema.items()} @property def input_columns(self) -> list[str]: - """Input column names.""" + """Input column names. + + Returns: + List of input column names. + """ return list(self._input_schema.keys()) @property def output_columns(self) -> list[str]: - """Predicted output column names.""" + """Predicted output column names. + + Returns: + List of predicted output column names. + """ return list(self.output_schema.keys()) - def summary(self, show_params: bool = True, show_schema: bool = False) -> str: + def summary(self, *, show_params: bool = True, show_schema: bool = False) -> str: # noqa: C901 """Generate a human-readable summary. Args: @@ -229,32 +305,28 @@ def summary(self, show_params: bool = True, show_schema: bool = False) -> str: lines = [] # Header - lines.append("=" * 70) - lines.append("DRY RUN PREVIEW") - lines.append("=" * 70) + lines.extend(("=" * 70, "DRY RUN PREVIEW", "=" * 70)) # Validation status if self.is_valid: lines.append("✓ Validation: PASSED") else: lines.append(f"✗ Validation: FAILED ({len(self.errors)} errors)") - for err in self.errors: - lines.append(f" - {err}") - - lines.append("-" * 70) + lines.extend(f" - {err}" for err in self.errors) - # Input schema summary - lines.append(f"Input: {len(self._input_schema)} columns") + lines.extend(["-" * 70, f"Input: {len(self._input_schema)} columns"]) if show_schema: for col, dtype in self._input_schema.items(): lines.append(f" {col}: {dtype_name(dtype)}") - lines.append("-" * 70) - - # Steps - lines.append("") - lines.append(f"{'#':<4} {'Operation':<20} {'Columns':<15} {'Changes':<30}") - lines.append("-" * 70) + lines.extend( + [ + "-" * 70, + "", + f"{'#':<4} {'Operation':<20} {'Columns':<15} {'Changes':<30}", + "-" * 70, + ] + ) for step in self._steps: step_num = str(step.step) @@ -291,27 +363,33 @@ def summary(self, show_params: bool = True, show_schema: bool = False) -> str: if show_schema: lines.append(f" Schema: {step.schema_after}") - lines.append("=" * 70) - - # Output schema summary - lines.append(f"Output: {len(self.output_schema)} columns") + lines.extend(["=" * 70, f"Output: {len(self.output_schema)} columns"]) if show_schema: for col, dtype in self.output_schema.items(): lines.append(f" {col}: {dtype}") return "\n".join(lines) - def print(self, show_params: bool = True, show_schema: bool = False) -> None: + def print(self, *, show_params: bool = True, show_schema: bool = False) -> None: """Print the dry run summary.""" - print(self.summary(show_params, show_schema)) + print(self.summary(show_params=show_params, show_schema=show_schema)) # noqa: T201 def __repr__(self) -> str: + """Return string representation of dry run result. + + Returns: + Human-readable representation. + """ status = "valid" if self.is_valid else f"invalid ({len(self.errors)} errors)" return f"DryRunResult({len(self._steps)} steps, {status})" -def _format_params_short(params: dict, max_length: int = 55) -> str: - """Format params dict as a short string.""" +def _format_params_short(params: dict[str, Any], max_length: int = 55) -> str: + """Format params dict as a short string. + + Returns: + Formatted string representation of params. + """ parts = [] for key, value in params.items(): if isinstance(value, dict) and "type" in value: @@ -339,29 +417,54 @@ class SchemaTracker: """Tracks schema changes through a pipeline for validation.""" def __init__(self, schema: dict[str, pl.DataType]) -> None: + """Initialize tracker with a schema. + + Args: + schema: Initial schema as column name to dtype mapping. + """ self._schema = dict(schema) @property def columns(self) -> set[str]: + """Get set of column names. + + Returns: + Set of column names. + """ return set(self._schema.keys()) def has_column(self, name: str) -> bool: + """Check if column exists. + + Returns: + True if column exists, False otherwise. + """ return name in self._schema def get_dtype(self, name: str) -> pl.DataType | None: + """Get dtype for a column. + + Returns: + DataType or None if column doesn't exist. + """ return self._schema.get(name) def drop_column(self, name: str) -> None: + """Remove a column from the schema.""" self._schema.pop(name, None) - def add_column(self, name: str, dtype: pl.DataType) -> None: - self._schema[name] = dtype + def add_column(self, name: str, dtype: pl.DataType | None) -> None: + """Add a column to the schema.""" + if dtype is not None: + self._schema[name] = dtype def rename_column(self, old_name: str, new_name: str) -> None: + """Rename a column in the schema.""" if old_name in self._schema: self._schema[new_name] = self._schema.pop(old_name) def set_dtype(self, name: str, dtype: pl.DataType) -> None: + """Change the dtype of an existing column.""" if name in self._schema: self._schema[name] = dtype @@ -384,7 +487,11 @@ def _check_column_exists( step: int, op_name: str, ) -> bool: - """Check if column exists, add error if not. Returns True if exists.""" + """Check if column exists, add error if not. + + Returns: + True if column exists, False otherwise. + """ if not tracker.has_column(column): result.add_error(step, op_name, f"Column '{column}' does not exist") return False @@ -398,7 +505,11 @@ def _check_column_numeric( step: int, op_name: str, ) -> bool: - """Check if column is numeric, add error if not. Returns True if numeric.""" + """Check if column is numeric, add error if not. + + Returns: + True if column is numeric, False otherwise. + """ dtype = tracker.get_dtype(column) if dtype and not is_numeric(dtype): result.add_error( @@ -415,7 +526,11 @@ def _check_column_string( step: int, op_name: str, ) -> bool: - """Check if column is string, add error if not. Returns True if string.""" + """Check if column is string, add error if not. + + Returns: + True if column is string, False otherwise. + """ dtype = tracker.get_dtype(column) if dtype and not is_string(dtype): result.add_error( @@ -432,7 +547,11 @@ def _check_column_datetime( step: int, op_name: str, ) -> bool: - """Check if column is datetime, add error if not. Returns True if datetime.""" + """Check if column is datetime, add error if not. + + Returns: + True if column is datetime, False otherwise. + """ dtype = tracker.get_dtype(column) if dtype and not is_datetime(dtype): result.add_error( @@ -724,7 +843,7 @@ def _validate_str_split( ) else: tracker.add_column(new_col, pl.Utf8) - if not params.get("keep_original", False): + if not params.get("keep_original"): tracker.drop_column(column) @@ -830,13 +949,10 @@ def _validate_dt_age_years( if _check_column_exists(tracker, birth_column, result, step, "dt_age_years"): _check_column_datetime(tracker, birth_column, result, step, "dt_age_years") - if reference_column: - if _check_column_exists( - tracker, reference_column, result, step, "dt_age_years" - ): - _check_column_datetime( - tracker, reference_column, result, step, "dt_age_years" - ) + if reference_column and _check_column_exists( + tracker, reference_column, result, step, "dt_age_years" + ): + _check_column_datetime(tracker, reference_column, result, step, "dt_age_years") tracker.add_column(new_column, pl.Int64) @@ -887,7 +1003,11 @@ def _validate_filter_columns( step: int, op_name: str, ) -> list[str]: - """Recursively validate columns and types from a filter dict.""" + """Recursively validate columns and types from a filter dict. + + Returns: + List of missing column names. + """ missing = [] filter_type = filter_dict.get("type") @@ -917,22 +1037,29 @@ def _validate_filter_columns( dtype = tracker.get_dtype(column) # Numeric comparisons - if filter_type in ("gt", "ge", "lt", "le", "between"): - if dtype and not is_numeric(dtype) and not is_datetime(dtype): - result.add_error( - step, - op_name, - f"Column '{column}' is {dtype_name(dtype)}, cannot use numeric comparison", - ) + if ( + filter_type in ("gt", "ge", "lt", "le", "between") + and dtype + and not is_numeric(dtype) + and not is_datetime(dtype) + ): + result.add_error( + step, + op_name, + f"Column '{column}' is {dtype_name(dtype)}, cannot use numeric comparison", + ) # String operations - if filter_type in ("str_contains", "str_starts_with", "str_ends_with"): - if dtype and not is_string(dtype): - result.add_error( - step, - op_name, - f"Column '{column}' is {dtype_name(dtype)}, cannot use string filter", - ) + if ( + filter_type in ("str_contains", "str_starts_with", "str_ends_with") + and dtype + and not is_string(dtype) + ): + result.add_error( + step, + op_name, + f"Column '{column}' is {dtype_name(dtype)}, cannot use string filter", + ) return missing @@ -1259,10 +1386,11 @@ def dry_run_schema( columns_removed = list(cols_before - cols_after) # Detect type modifications (columns that exist in both but changed type) - columns_modified = [] - for col in cols_before & cols_after: - if schema_before.get(col) != schema_after.get(col): - columns_modified.append(col) + columns_modified = [ + col + for col in cols_before & cols_after + if schema_before.get(col) != schema_after.get(col) + ] # Check if this step had an error step_error = None From eb11d1c962724cf1b4db0cc0f71b79dada72077b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Gro=C3=9Fer?= Date: Thu, 29 Jan 2026 18:26:10 +0100 Subject: [PATCH 6/7] linting fixes --- transformplan/chunking.py | 4 +- transformplan/core.py | 4 +- transformplan/ops/column.py | 10 +- transformplan/ops/datetime.py | 8 +- transformplan/ops/math.py | 7 +- transformplan/ops/rows.py | 5 +- transformplan/protocol.py | 6 +- transformplan/validation.py | 167 +++++++++++++++++----------------- 8 files changed, 103 insertions(+), 108 deletions(-) diff --git a/transformplan/chunking.py b/transformplan/chunking.py index 87413bd..697e062 100644 --- a/transformplan/chunking.py +++ b/transformplan/chunking.py @@ -433,9 +433,7 @@ def from_json(cls, source: str | Path) -> ChunkedProtocol: Returns: ChunkedProtocol instance. """ - if isinstance(source, Path) or ( - isinstance(source, str) and not source.strip().startswith("{") - ): + if isinstance(source, Path) or not source.strip().startswith("{"): content = Path(source).read_text() else: content = source diff --git a/transformplan/core.py b/transformplan/core.py index 93b276c..48c4d91 100644 --- a/transformplan/core.py +++ b/transformplan/core.py @@ -232,9 +232,7 @@ def from_json(cls, source: str | Path) -> Self: Returns: New TransformPlan instance. """ - if isinstance(source, Path) or ( - isinstance(source, str) and not source.strip().startswith("{") - ): + if isinstance(source, Path) or not source.strip().startswith("{"): content = Path(source).read_text() else: content = source diff --git a/transformplan/ops/column.py b/transformplan/ops/column.py index 8e460a6..8a7c405 100644 --- a/transformplan/ops/column.py +++ b/transformplan/ops/column.py @@ -30,7 +30,7 @@ import hashlib import secrets import string -from typing import TYPE_CHECKING, Any, Sequence +from typing import TYPE_CHECKING, Any, Literal, Sequence import polars as pl @@ -39,6 +39,8 @@ from typing_extensions import Self +FillNullStrategy = Literal["forward", "backward", "min", "max", "mean", "zero"] + class ColumnOps: """Mixin providing column-level operations.""" @@ -118,7 +120,7 @@ def col_fill_null( self, column: str, value: Any = None, # noqa: ANN401 - strategy: str | None = None, + strategy: FillNullStrategy | None = None, ) -> Self: """Fill null values in a column. @@ -141,7 +143,7 @@ def _col_fill_null( data: pl.DataFrame, column: str, value: Any, # noqa: ANN401 - strategy: str | None, + strategy: FillNullStrategy | None, ) -> pl.DataFrame: if strategy is not None: return data.with_columns(pl.col(column).fill_null(strategy=strategy)) @@ -255,7 +257,7 @@ def col_hash( def _col_hash( self, data: pl.DataFrame, columns: list[str], new_column: str, salt: str ) -> pl.DataFrame: - def hash_row(values: tuple) -> str: + def hash_row(values: tuple[Any, ...]) -> str: content = "|".join(str(v) for v in values) + salt return hashlib.sha256(content.encode()).hexdigest()[:16] diff --git a/transformplan/ops/datetime.py b/transformplan/ops/datetime.py index 35db87d..75466ab 100644 --- a/transformplan/ops/datetime.py +++ b/transformplan/ops/datetime.py @@ -36,7 +36,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal import polars as pl @@ -45,6 +45,8 @@ from typing_extensions import Self +ClosedInterval = Literal["left", "right", "both", "none"] + class DatetimeOps: """Mixin providing datetime operations on columns.""" @@ -371,7 +373,7 @@ def dt_is_between( start: str, end: str, new_column: str, - closed: str = "both", + closed: ClosedInterval = "both", ) -> Self: """Check if date falls within a range. @@ -403,7 +405,7 @@ def _dt_is_between( start: str, end: str, new_column: str, - closed: str, + closed: ClosedInterval, ) -> pl.DataFrame: return data.with_columns( pl.col(column) diff --git a/transformplan/ops/math.py b/transformplan/ops/math.py index cc8e78a..d183c15 100644 --- a/transformplan/ops/math.py +++ b/transformplan/ops/math.py @@ -34,7 +34,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Union +from typing import TYPE_CHECKING, Literal, Union import polars as pl @@ -44,6 +44,7 @@ from typing_extensions import Self Numeric = Union[int, float] +RankMethod = Literal["average", "min", "max", "dense", "ordinal", "random"] class MathOps: @@ -361,7 +362,7 @@ def math_rank( self, column: str, new_column: str, - method: str = "ordinal", + method: RankMethod = "ordinal", *, descending: bool = False, group_by: str | list[str] | None = None, @@ -396,7 +397,7 @@ def _math_rank( data: pl.DataFrame, column: str, new_column: str, - method: str, + method: RankMethod, descending: bool, # noqa: FBT001 group_by: list[str] | None, ) -> pl.DataFrame: diff --git a/transformplan/ops/rows.py b/transformplan/ops/rows.py index f716b7e..84936b5 100644 --- a/transformplan/ops/rows.py +++ b/transformplan/ops/rows.py @@ -43,6 +43,7 @@ if TYPE_CHECKING: from typing import Any, Callable + from polars._typing import PivotAgg from typing_extensions import Self @@ -356,7 +357,7 @@ def rows_pivot( index: str | Sequence[str], columns: str, values: str, - aggregate_function: str = "first", + aggregate_function: PivotAgg = "first", ) -> Self: """Pivot from long to wide format. @@ -388,7 +389,7 @@ def _rows_pivot( index: list[str], columns: str, values: str, - aggregate_function: str, + aggregate_function: PivotAgg, ) -> pl.DataFrame: return data.pivot( index=index, diff --git a/transformplan/protocol.py b/transformplan/protocol.py index f210ffa..8c2ce95 100644 --- a/transformplan/protocol.py +++ b/transformplan/protocol.py @@ -266,7 +266,7 @@ def from_dict(cls, data: dict[str, Any]) -> Protocol: input_data = data.get("input", {}) protocol._input_hash = input_data.get("hash") shape = input_data.get("shape") - protocol._input_shape = tuple(shape) if shape else None + protocol._input_shape = (int(shape[0]), int(shape[1])) if shape else None for step in data.get("steps", []): protocol._steps.append( @@ -312,9 +312,7 @@ def from_json(cls, source: str | Path) -> Protocol: Returns: Protocol instance. """ - if isinstance(source, Path) or ( - isinstance(source, str) and not source.strip().startswith("{") - ): + if isinstance(source, Path) or not source.strip().startswith("{"): # Treat as file path content = Path(source).read_text() else: diff --git a/transformplan/validation.py b/transformplan/validation.py index e67d757..d42a058 100644 --- a/transformplan/validation.py +++ b/transformplan/validation.py @@ -35,7 +35,8 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Any +from functools import partial +from typing import Any, Callable import polars as pl @@ -44,23 +45,23 @@ # ============================================================================= NUMERIC_TYPES = { - pl.Int8, - pl.Int16, - pl.Int32, - pl.Int64, - pl.UInt8, - pl.UInt16, - pl.UInt32, - pl.UInt64, - pl.Float32, - pl.Float64, + pl.Int8(), + pl.Int16(), + pl.Int32(), + pl.Int64(), + pl.UInt8(), + pl.UInt16(), + pl.UInt32(), + pl.UInt64(), + pl.Float32(), + pl.Float64(), } -STRING_TYPES = {pl.Utf8, pl.String} +STRING_TYPES = {pl.Utf8(), pl.String()} -DATETIME_TYPES = {pl.Date, pl.Datetime, pl.Time, pl.Duration} +DATETIME_TYPES = {pl.Date(), pl.Datetime(), pl.Time(), pl.Duration()} -BOOLEAN_TYPES = {pl.Boolean} +BOOLEAN_TYPES = {pl.Boolean()} def is_numeric(dtype: pl.DataType) -> bool: @@ -69,7 +70,7 @@ def is_numeric(dtype: pl.DataType) -> bool: Returns: True if dtype is numeric, False otherwise. """ - return dtype in NUMERIC_TYPES or dtype.base_type() in NUMERIC_TYPES + return dtype in NUMERIC_TYPES or dtype.base_type()() in NUMERIC_TYPES def is_string(dtype: pl.DataType) -> bool: @@ -78,7 +79,7 @@ def is_string(dtype: pl.DataType) -> bool: Returns: True if dtype is string, False otherwise. """ - return dtype in STRING_TYPES or dtype.base_type() in STRING_TYPES + return dtype in STRING_TYPES or dtype.base_type()() in STRING_TYPES def is_datetime(dtype: pl.DataType) -> bool: @@ -87,7 +88,7 @@ def is_datetime(dtype: pl.DataType) -> bool: Returns: True if dtype is datetime-related, False otherwise. """ - return dtype in DATETIME_TYPES or dtype.base_type() in DATETIME_TYPES + return dtype in DATETIME_TYPES or dtype.base_type()() in DATETIME_TYPES def is_boolean(dtype: pl.DataType) -> bool: @@ -96,7 +97,7 @@ def is_boolean(dtype: pl.DataType) -> bool: Returns: True if dtype is boolean, False otherwise. """ - return dtype in BOOLEAN_TYPES or dtype.base_type() in BOOLEAN_TYPES + return dtype in BOOLEAN_TYPES or dtype.base_type()() in BOOLEAN_TYPES def dtype_name(dtype: pl.DataType) -> str: @@ -475,6 +476,12 @@ def set_columns(self, columns: list[str]) -> None: } +# Type alias for validator functions +ValidatorFunc = Callable[ + [SchemaTracker, dict[str, Any], ValidationResult, int], None +] + + # ============================================================================= # Helper functions # ============================================================================= @@ -672,7 +679,7 @@ def _validate_col_add( if expr: tracker.add_column(new_column, tracker.get_dtype(expr)) else: - tracker.add_column(new_column, pl.Utf8) # default to string for literals + tracker.add_column(new_column, pl.Utf8()) # default to string for literals def _validate_col_add_uuid( @@ -682,7 +689,7 @@ def _validate_col_add_uuid( if tracker.has_column(column): result.add_error(step, "col_add_uuid", f"Column '{column}' already exists") else: - tracker.add_column(column, pl.Utf8) + tracker.add_column(column, pl.Utf8()) def _validate_col_hash( @@ -696,7 +703,7 @@ def _validate_col_hash( if tracker.has_column(new_column): result.add_error(step, "col_hash", f"Column '{new_column}' already exists") else: - tracker.add_column(new_column, pl.Utf8) + tracker.add_column(new_column, pl.Utf8()) def _validate_col_coalesce( @@ -750,7 +757,7 @@ def _validate_math_columns( if b_exists: _check_column_numeric(tracker, column_b, result, step, op_name) - tracker.add_column(new_column, pl.Float64) + tracker.add_column(new_column, pl.Float64()) def _validate_math_cumsum( @@ -790,7 +797,7 @@ def _validate_math_rank( step, "math_rank", f"Group-by columns do not exist: {missing}" ) - tracker.add_column(new_column, pl.UInt32) + tracker.add_column(new_column, pl.UInt32()) def _validate_math_percent_of( @@ -805,7 +812,7 @@ def _validate_math_percent_of( if _check_column_exists(tracker, total_column, result, step, "math_percent_of"): _check_column_numeric(tracker, total_column, result, step, "math_percent_of") - tracker.add_column(new_column, pl.Float64) + tracker.add_column(new_column, pl.Float64()) # ============================================================================= @@ -842,7 +849,7 @@ def _validate_str_split( step, "str_split", f"Column '{new_col}' already exists" ) else: - tracker.add_column(new_col, pl.Utf8) + tracker.add_column(new_col, pl.Utf8()) if not params.get("keep_original"): tracker.drop_column(column) @@ -857,7 +864,7 @@ def _validate_str_concat( if _check_column_exists(tracker, col, result, step, "str_concat"): _check_column_string(tracker, col, result, step, "str_concat") - tracker.add_column(new_column, pl.Utf8) + tracker.add_column(new_column, pl.Utf8()) def _validate_str_extract( @@ -870,7 +877,7 @@ def _validate_str_extract( _check_column_string(tracker, column, result, step, "str_extract") if new_column != column: - tracker.add_column(new_column, pl.Utf8) + tracker.add_column(new_column, pl.Utf8()) # ============================================================================= @@ -884,9 +891,11 @@ def _validate_dt_op( result: ValidationResult, step: int, op_name: str, - output_dtype: pl.DataType = pl.Int32, + output_dtype: pl.DataType | None = None, ) -> None: """Validate datetime operation: column must exist and be datetime.""" + if output_dtype is None: + output_dtype = pl.Int32() column = params["column"] new_column = params.get("new_column", column) @@ -906,7 +915,7 @@ def _validate_dt_parse( if _check_column_exists(tracker, column, result, step, "dt_parse"): _check_column_string(tracker, column, result, step, "dt_parse") - tracker.set_dtype(new_column, pl.Date) + tracker.set_dtype(new_column, pl.Date()) def _validate_dt_format( @@ -919,9 +928,9 @@ def _validate_dt_format( _check_column_datetime(tracker, column, result, step, "dt_format") if new_column != column: - tracker.add_column(new_column, pl.Utf8) + tracker.add_column(new_column, pl.Utf8()) else: - tracker.set_dtype(column, pl.Utf8) + tracker.set_dtype(column, pl.Utf8()) def _validate_dt_diff_days( @@ -936,7 +945,7 @@ def _validate_dt_diff_days( if _check_column_exists(tracker, column_b, result, step, "dt_diff_days"): _check_column_datetime(tracker, column_b, result, step, "dt_diff_days") - tracker.add_column(new_column, pl.Int64) + tracker.add_column(new_column, pl.Int64()) def _validate_dt_age_years( @@ -954,7 +963,7 @@ def _validate_dt_age_years( ): _check_column_datetime(tracker, reference_column, result, step, "dt_age_years") - tracker.add_column(new_column, pl.Int64) + tracker.add_column(new_column, pl.Int64()) def _validate_dt_is_between( @@ -966,7 +975,7 @@ def _validate_dt_is_between( if _check_column_exists(tracker, column, result, step, "dt_is_between"): _check_column_datetime(tracker, column, result, step, "dt_is_between") - tracker.add_column(new_column, pl.Boolean) + tracker.add_column(new_column, pl.Boolean()) # ============================================================================= @@ -1100,7 +1109,7 @@ def _validate_rows_flag( if tracker.has_column(new_column): result.add_error(step, "rows_flag", f"Column '{new_column}' already exists") else: - tracker.add_column(new_column, pl.Boolean) + tracker.add_column(new_column, pl.Boolean()) def _validate_rows_sort( @@ -1199,9 +1208,9 @@ def _validate_map_discretize( _check_column_numeric(tracker, column, result, step, "map_discretize") if new_column != column: - tracker.add_column(new_column, pl.Utf8) + tracker.add_column(new_column, pl.Utf8()) else: - tracker.set_dtype(column, pl.Utf8) + tracker.set_dtype(column, pl.Utf8()) def _validate_map_from_column( @@ -1224,7 +1233,7 @@ def _validate_map_from_column( # Validator registry # ============================================================================= -_VALIDATORS: dict[str, Any] = { +_VALIDATORS: dict[str, ValidatorFunc] = { # Column ops "col_drop": _validate_col_drop, "col_rename": _validate_col_rename, @@ -1240,67 +1249,53 @@ def _validate_map_from_column( "col_hash": _validate_col_hash, "col_coalesce": _validate_col_coalesce, # Math ops - "math_add": lambda t, p, r, s: _validate_math_scalar(t, p, r, s, "math_add"), - "math_subtract": lambda t, p, r, s: _validate_math_scalar( - t, p, r, s, "math_subtract" - ), - "math_multiply": lambda t, p, r, s: _validate_math_scalar( - t, p, r, s, "math_multiply" - ), - "math_divide": lambda t, p, r, s: _validate_math_scalar(t, p, r, s, "math_divide"), - "math_clamp": lambda t, p, r, s: _validate_math_scalar(t, p, r, s, "math_clamp"), - "math_abs": lambda t, p, r, s: _validate_math_scalar(t, p, r, s, "math_abs"), - "math_round": lambda t, p, r, s: _validate_math_scalar(t, p, r, s, "math_round"), - "math_set_min": lambda t, p, r, s: _validate_math_scalar( - t, p, r, s, "math_set_min" - ), - "math_set_max": lambda t, p, r, s: _validate_math_scalar( - t, p, r, s, "math_set_max" + "math_add": partial(_validate_math_scalar, op_name="math_add"), + "math_subtract": partial(_validate_math_scalar, op_name="math_subtract"), + "math_multiply": partial(_validate_math_scalar, op_name="math_multiply"), + "math_divide": partial(_validate_math_scalar, op_name="math_divide"), + "math_clamp": partial(_validate_math_scalar, op_name="math_clamp"), + "math_abs": partial(_validate_math_scalar, op_name="math_abs"), + "math_round": partial(_validate_math_scalar, op_name="math_round"), + "math_set_min": partial(_validate_math_scalar, op_name="math_set_min"), + "math_set_max": partial(_validate_math_scalar, op_name="math_set_max"), + "math_add_columns": partial(_validate_math_columns, op_name="math_add_columns"), + "math_subtract_columns": partial( + _validate_math_columns, op_name="math_subtract_columns" ), - "math_add_columns": lambda t, p, r, s: _validate_math_columns( - t, p, r, s, "math_add_columns" + "math_multiply_columns": partial( + _validate_math_columns, op_name="math_multiply_columns" ), - "math_subtract_columns": lambda t, p, r, s: _validate_math_columns( - t, p, r, s, "math_subtract_columns" - ), - "math_multiply_columns": lambda t, p, r, s: _validate_math_columns( - t, p, r, s, "math_multiply_columns" - ), - "math_divide_columns": lambda t, p, r, s: _validate_math_columns( - t, p, r, s, "math_divide_columns" + "math_divide_columns": partial( + _validate_math_columns, op_name="math_divide_columns" ), "math_cumsum": _validate_math_cumsum, "math_rank": _validate_math_rank, "math_percent_of": _validate_math_percent_of, # String ops - "str_replace": lambda t, p, r, s: _validate_str_op(t, p, r, s, "str_replace"), - "str_slice": lambda t, p, r, s: _validate_str_op(t, p, r, s, "str_slice"), - "str_truncate": lambda t, p, r, s: _validate_str_op(t, p, r, s, "str_truncate"), - "str_lower": lambda t, p, r, s: _validate_str_op(t, p, r, s, "str_lower"), - "str_upper": lambda t, p, r, s: _validate_str_op(t, p, r, s, "str_upper"), - "str_strip": lambda t, p, r, s: _validate_str_op(t, p, r, s, "str_strip"), - "str_pad": lambda t, p, r, s: _validate_str_op(t, p, r, s, "str_pad"), + "str_replace": partial(_validate_str_op, op_name="str_replace"), + "str_slice": partial(_validate_str_op, op_name="str_slice"), + "str_truncate": partial(_validate_str_op, op_name="str_truncate"), + "str_lower": partial(_validate_str_op, op_name="str_lower"), + "str_upper": partial(_validate_str_op, op_name="str_upper"), + "str_strip": partial(_validate_str_op, op_name="str_strip"), + "str_pad": partial(_validate_str_op, op_name="str_pad"), "str_split": _validate_str_split, "str_concat": _validate_str_concat, "str_extract": _validate_str_extract, # Datetime ops - "dt_year": lambda t, p, r, s: _validate_dt_op(t, p, r, s, "dt_year"), - "dt_month": lambda t, p, r, s: _validate_dt_op(t, p, r, s, "dt_month"), - "dt_day": lambda t, p, r, s: _validate_dt_op(t, p, r, s, "dt_day"), - "dt_week": lambda t, p, r, s: _validate_dt_op(t, p, r, s, "dt_week"), - "dt_quarter": lambda t, p, r, s: _validate_dt_op(t, p, r, s, "dt_quarter"), - "dt_year_month": lambda t, p, r, s: _validate_dt_op( - t, p, r, s, "dt_year_month", pl.Utf8 - ), - "dt_quarter_year": lambda t, p, r, s: _validate_dt_op( - t, p, r, s, "dt_quarter_year", pl.Utf8 - ), - "dt_calendar_week": lambda t, p, r, s: _validate_dt_op( - t, p, r, s, "dt_calendar_week", pl.Utf8 + "dt_year": partial(_validate_dt_op, op_name="dt_year"), + "dt_month": partial(_validate_dt_op, op_name="dt_month"), + "dt_day": partial(_validate_dt_op, op_name="dt_day"), + "dt_week": partial(_validate_dt_op, op_name="dt_week"), + "dt_quarter": partial(_validate_dt_op, op_name="dt_quarter"), + "dt_year_month": partial(_validate_dt_op, op_name="dt_year_month", output_dtype=pl.Utf8()), + "dt_quarter_year": partial( + _validate_dt_op, op_name="dt_quarter_year", output_dtype=pl.Utf8() ), - "dt_truncate": lambda t, p, r, s: _validate_dt_op( - t, p, r, s, "dt_truncate", pl.Date + "dt_calendar_week": partial( + _validate_dt_op, op_name="dt_calendar_week", output_dtype=pl.Utf8() ), + "dt_truncate": partial(_validate_dt_op, op_name="dt_truncate", output_dtype=pl.Date()), "dt_parse": _validate_dt_parse, "dt_format": _validate_dt_format, "dt_diff_days": _validate_dt_diff_days, From 5502be39d5181138cb6de8052aa500b86959f0c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Gro=C3=9Fer?= Date: Thu, 29 Jan 2026 18:27:44 +0100 Subject: [PATCH 7/7] format --- transformplan/validation.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/transformplan/validation.py b/transformplan/validation.py index d42a058..f4e3070 100644 --- a/transformplan/validation.py +++ b/transformplan/validation.py @@ -477,9 +477,7 @@ def set_columns(self, columns: list[str]) -> None: # Type alias for validator functions -ValidatorFunc = Callable[ - [SchemaTracker, dict[str, Any], ValidationResult, int], None -] +ValidatorFunc = Callable[[SchemaTracker, dict[str, Any], ValidationResult, int], None] # ============================================================================= @@ -1288,14 +1286,18 @@ def _validate_map_from_column( "dt_day": partial(_validate_dt_op, op_name="dt_day"), "dt_week": partial(_validate_dt_op, op_name="dt_week"), "dt_quarter": partial(_validate_dt_op, op_name="dt_quarter"), - "dt_year_month": partial(_validate_dt_op, op_name="dt_year_month", output_dtype=pl.Utf8()), + "dt_year_month": partial( + _validate_dt_op, op_name="dt_year_month", output_dtype=pl.Utf8() + ), "dt_quarter_year": partial( _validate_dt_op, op_name="dt_quarter_year", output_dtype=pl.Utf8() ), "dt_calendar_week": partial( _validate_dt_op, op_name="dt_calendar_week", output_dtype=pl.Utf8() ), - "dt_truncate": partial(_validate_dt_op, op_name="dt_truncate", output_dtype=pl.Date()), + "dt_truncate": partial( + _validate_dt_op, op_name="dt_truncate", output_dtype=pl.Date() + ), "dt_parse": _validate_dt_parse, "dt_format": _validate_dt_format, "dt_diff_days": _validate_dt_diff_days,