From beefa701c6232fa4a5c730f6e6ea8a74aa55fbf9 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Mon, 19 Jan 2026 17:24:26 -0800 Subject: [PATCH 1/6] upgrade pyiceberg-core --- pyproject.toml | 4 ++-- uv.lock | 20 +++++++++++--------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 14cd882b34..749e047517 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,7 @@ pyiceberg = "pyiceberg.cli.console:run" [project.optional-dependencies] pyarrow = [ "pyarrow>=17.0.0", - "pyiceberg-core>=0.5.1,<0.8.0", + "pyiceberg-core>=0.5.1,<0.9.0", ] pandas = [ "pandas>=1.0.0,<3.0.0", @@ -94,7 +94,7 @@ sql-sqlite = ["sqlalchemy>=2.0.18,<3"] gcsfs = ["gcsfs>=2023.1.0"] rest-sigv4 = ["boto3>=1.24.59"] hf = ["huggingface-hub>=0.24.0"] -pyiceberg-core = ["pyiceberg-core>=0.5.1,<0.8.0"] +pyiceberg-core = ["pyiceberg-core>=0.5.1,<0.9.0"] datafusion = ["datafusion>=45,<49"] gcp-auth = ["google-auth>=2.4.0"] diff --git a/uv.lock b/uv.lock index 9d6b97d0a9..9409dd105c 100644 --- a/uv.lock +++ b/uv.lock @@ -3841,8 +3841,8 @@ requires-dist = [ { name = "pyarrow", marker = "extra == 'pyarrow'", specifier = ">=17.0.0" }, { name = "pyarrow", marker = "extra == 'ray'", specifier = ">=17.0.0" }, { name = "pydantic", specifier = ">=2.0,!=2.4.0,!=2.4.1,!=2.12.0,!=2.12.1,<3.0" }, - { name = "pyiceberg-core", marker = "extra == 'pyarrow'", specifier = ">=0.5.1,<0.8.0" }, - { name = "pyiceberg-core", marker = "extra == 'pyiceberg-core'", specifier = ">=0.5.1,<0.8.0" }, + { name = "pyiceberg-core", marker = "extra == 'pyarrow'", specifier = ">=0.5.1,<0.9.0" }, + { name = "pyiceberg-core", marker = "extra == 'pyiceberg-core'", specifier = ">=0.5.1,<0.9.0" }, { name = "pyparsing", specifier = ">=3.1.0,<4.0.0" }, { name = "pyroaring", specifier = ">=1.0.0,<2.0.0" }, { name = "python-snappy", marker = "extra == 'snappy'", specifier = ">=0.6.0,<1.0.0" }, @@ -3900,15 +3900,17 @@ docs = [ [[package]] name = "pyiceberg-core" -version = "0.7.0" +version = "0.8.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c8/85/d3ec2e229d4e1bad3b9c4092889cae102eaf0d4ed62ce0e2de2b6e32cc4d/pyiceberg_core-0.7.0.tar.gz", hash = "sha256:8166883ace30a388d2f659634bec87731cad7bc52341c997fcdd4e13780e4345", size = 504657, upload-time = "2025-10-10T16:30:16.202Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9c/a0/0bcedbbe901484aacb6c605505f8574fd65954826e592fdb163e1cfb09f2/pyiceberg_core-0.8.0.tar.gz", hash = "sha256:59021ca5bc7ca95f2b06fb0730280fb3f60ed898060bcd874c156d093853b5f3", size = 618882, upload-time = "2026-01-20T00:50:40.076Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/70/35/2942dcaf19ca1becc7f9a9001d0cf98168634732a33efbb06a6f382c36b1/pyiceberg_core-0.7.0-cp39-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:99e23463c30c4180329719fe1f120e779b20616a36bbdd42042b70063a13bd39", size = 56895735, upload-time = "2025-10-10T16:30:00.949Z" }, - { url = "https://files.pythonhosted.org/packages/93/54/dbf169474b8c336316657041fd6e2791b534f8ab3cffe50509533993de03/pyiceberg_core-0.7.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:94fe0281f09c84cdd4e56d217865ef1d81e0cb0b708ba6eb2f4c5ae8bf86f0ba", size = 30991747, upload-time = "2025-10-10T16:30:04.515Z" }, - { url = "https://files.pythonhosted.org/packages/ed/34/545cf261c343a8d04f75e25aa259f0cf5020b913ede0cb3bdf17c2c7690a/pyiceberg_core-0.7.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74543885cc97e8d976707f97dd01e8f03dce8a5d3a01e41ada0aaefa13c742f9", size = 31572481, upload-time = "2025-10-10T16:30:07.319Z" }, - { url = "https://files.pythonhosted.org/packages/63/6b/c8ae2eb1fd60798b819720915e50ea29befafb8816141182df7fa5e788d6/pyiceberg_core-0.7.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:2303276b0d8b57b8ce4a3ef9e5050ffdefd9561684728cd0a9a72a9ad0a6a74d", size = 30588256, upload-time = "2025-10-10T16:30:11.047Z" }, - { url = "https://files.pythonhosted.org/packages/4a/8f/e63e03afb2e655d6f984a4e57ea53aa860a545298eeb5bc887a5d5db9c3b/pyiceberg_core-0.7.0-cp39-abi3-win_amd64.whl", hash = "sha256:64dd3c3c7af6d39097e1d070161e98118e5dd423392992a9074e1ab813fd467b", size = 26606962, upload-time = "2025-10-10T16:30:13.96Z" }, + { url = "https://files.pythonhosted.org/packages/77/e0/9a8fa537d29d34e3265682056d6517b926975107b5b1af6057d1713557d6/pyiceberg_core-0.8.0-cp310-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:d60c75a741a1d9199277a9e50fc3adbc84ab286a881f9b1f721fa120e7197912", size = 24733948, upload-time = "2026-01-20T00:50:20.566Z" }, + { url = "https://files.pythonhosted.org/packages/6d/3e/f5522c1e9c20c3e89bfd76b2f54ba38e57389e5a2872233e49e60a131e04/pyiceberg_core-0.8.0-cp310-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1d71e566b2d56141760ff8734667eede5a5d60963dfbcdce80c2dd3cf2edb39d", size = 11682041, upload-time = "2026-01-20T00:50:22.843Z" }, + { url = "https://files.pythonhosted.org/packages/95/4b/f799e5c7a2b2ede75514e64901503358a7a134ca1ea217fd86535af533b6/pyiceberg_core-0.8.0-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82782d1b974200c5526d069391ba2bc235a868b5d0d6ac17ca406df735ab89a3", size = 13835428, upload-time = "2026-01-20T00:50:25.021Z" }, + { url = "https://files.pythonhosted.org/packages/0b/ff/2dbd6f7c99a2f782f908be2cc997371de45cc1df61abeeff1fc0165c05b6/pyiceberg_core-0.8.0-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0e14b2aea26293ba5878c398adc880fff0f1ce5d989e00d4b1a930c143541114", size = 14580807, upload-time = "2026-01-20T00:50:27.158Z" }, + { url = "https://files.pythonhosted.org/packages/bc/13/176c2b00a9b804af79d8b697ba1a2525f4390e959be777076972071ca069/pyiceberg_core-0.8.0-cp310-abi3-win_amd64.whl", hash = "sha256:a5726cc62f9ac2582a0d5dde92e4140b711b5e29ec0c6c636d6d2782d984031b", size = 13354110, upload-time = "2026-01-20T00:50:29.785Z" }, + { url = "https://files.pythonhosted.org/packages/43/33/2c93b4d40f38e173dcfb2f555a2d992e3345d38eba8f8fbb2731c2e18462/pyiceberg_core-0.8.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0c51f1e6a4112864c8fface904429814b26baa45efab7efbae8013796e815915", size = 11682427, upload-time = "2026-01-20T00:50:31.714Z" }, + { url = "https://files.pythonhosted.org/packages/7c/97/25e835c2bf9b090bf97174638bc26a034952208e2767730035e285766bb6/pyiceberg_core-0.8.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:6d2e71fb83ae578d93f24699ee131b77e5ee630b8ec6310acf570d04062ef2fb", size = 14581679, upload-time = "2026-01-20T00:50:33.869Z" }, ] [[package]] From 0f356cc3313eed4bba59b3d638eb3133db252dde Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Mon, 19 Jan 2026 17:30:55 -0800 Subject: [PATCH 2/6] upgrade datafusion --- pyproject.toml | 2 +- uv.lock | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 749e047517..d01c77798e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,7 +95,7 @@ gcsfs = ["gcsfs>=2023.1.0"] rest-sigv4 = ["boto3>=1.24.59"] hf = ["huggingface-hub>=0.24.0"] pyiceberg-core = ["pyiceberg-core>=0.5.1,<0.9.0"] -datafusion = ["datafusion>=45,<49"] +datafusion = ["datafusion>=45,<52"] gcp-auth = ["google-auth>=2.4.0"] [dependency-groups] diff --git a/uv.lock b/uv.lock index 9409dd105c..54e61859bd 100644 --- a/uv.lock +++ b/uv.lock @@ -1042,19 +1042,19 @@ wheels = [ [[package]] name = "datafusion" -version = "48.0.0" +version = "51.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pyarrow" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/84/6a/9363ca73aa2593fce9ac3ad1c6e97db7ec78530a316a3dbc0fa2a330b597/datafusion-48.0.0.tar.gz", hash = "sha256:fcb89124db22a43e00bf5a1a4542157155d83d69589677c5309f106e83156a32", size = 182992, upload-time = "2025-07-12T11:44:18.091Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/6d/d0e2632c93bbcca0687eeda672af3f92042ecd349df7be55da86253594a9/datafusion-51.0.0.tar.gz", hash = "sha256:1887c7d5ed3ae5d9f389e62ba869864afad4006a3f7c99ef0ca4707782a7838f", size = 193751, upload-time = "2026-01-09T13:23:41.562Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f6/68/f02fe93c53dd77afdd0b187d592e618b6a10e9477f8de114baa7f8f4ce51/datafusion-48.0.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:24984e3c4077caca7b3746bdcf6d67171c4976325d035970b97bf59d49327c5b", size = 25819127, upload-time = "2025-07-12T11:44:02.883Z" }, - { url = "https://files.pythonhosted.org/packages/a1/21/fdbb3bf1f5bb8f8c06cf80de967ee56519c0ead4ad3354ee0ba22b4bff99/datafusion-48.0.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:31e841d02147b0904984850421ae18499d4ab2492ff1ef4dd9d15d3cba3fbef3", size = 23400042, upload-time = "2025-07-12T11:44:06.516Z" }, - { url = "https://files.pythonhosted.org/packages/5f/73/95daf83a61e6cc877da78831a848aa13b0af050ca0c9df23a96bb61cf234/datafusion-48.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6b1ed4552c496b961d648d2cbbb6a43aaae3c6442acebc795a4ef256f549cd4", size = 28555364, upload-time = "2025-07-12T11:44:09.412Z" }, - { url = "https://files.pythonhosted.org/packages/3c/ca/0227e285fbf1b35d1a45d15f25dc698b594c718b1a514851a1bc1caab812/datafusion-48.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3d316dc339c0231588ac3f4139af490c556912c54c4508c443e3466c81ff457b", size = 26791000, upload-time = "2025-07-12T11:44:12.641Z" }, - { url = "https://files.pythonhosted.org/packages/83/c8/48abb69d2482477996cc1cf33274b953524471ae7eea68dd06d374489aa3/datafusion-48.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:3d75026f93083febef2e8b362f56e19cfbd5d8058c61c3847f04e786697fc4bd", size = 28104564, upload-time = "2025-07-12T11:44:15.913Z" }, + { url = "https://files.pythonhosted.org/packages/cf/a9/7717cec053a3309be3020fe3147e3f76e5bf21295fa8adf9b52dd44ea3ff/datafusion-51.0.0-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:0c0d265fe3ee0dcbfa7cc3c64c7cd94fc493f38418bd79debb7ec29f29b7176e", size = 30389413, upload-time = "2026-01-09T13:23:23.266Z" }, + { url = "https://files.pythonhosted.org/packages/55/45/72c9874fd3740a4cb9d55049fdbae0df512dc5433e9f1176f3cfd970f1a1/datafusion-51.0.0-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:43e6011db86e950bf9a21ed73cc089c2346b340a41a4f1044268af6c3a357acc", size = 26982206, upload-time = "2026-01-09T13:23:27.437Z" }, + { url = "https://files.pythonhosted.org/packages/21/ac/b32ba1f25d38fc16e7623cc4bfb7bd68db61be2ef27b2d9969ea5c865765/datafusion-51.0.0-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e76803907150159aa059d5cc9291645bbaac1b6a46d07e56035118d327b741ae", size = 33246117, upload-time = "2026-01-09T13:23:30.981Z" }, + { url = "https://files.pythonhosted.org/packages/0b/4e/437121422ef010690fc3cdd7f080203e986ba00e0e3c3b577e03f5b54ca2/datafusion-51.0.0-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:9d0cfabfe1853994adc2e6e9da5f36c1eb061102e34a2f1101fa935c6991c9e1", size = 31421867, upload-time = "2026-01-09T13:23:34.436Z" }, + { url = "https://files.pythonhosted.org/packages/db/fc/58cf27fcb85b2fd2a698253ae46213b1cbda784407e205c148f4006c1429/datafusion-51.0.0-cp310-abi3-win_amd64.whl", hash = "sha256:fd5f9abfd6669062debf0658d13e4583234c89d4df95faf381927b11cea411f5", size = 32517679, upload-time = "2026-01-09T13:23:39.615Z" }, ] [[package]] @@ -3823,7 +3823,7 @@ requires-dist = [ { name = "cachetools", specifier = ">=5.5,<7.0" }, { name = "click", specifier = ">=7.1.1,<9.0.0" }, { name = "daft", marker = "extra == 'daft'", specifier = ">=0.5.0" }, - { name = "datafusion", marker = "extra == 'datafusion'", specifier = ">=45,<49" }, + { name = "datafusion", marker = "extra == 'datafusion'", specifier = ">=45,<52" }, { name = "duckdb", marker = "extra == 'duckdb'", specifier = ">=0.5.0,<2.0.0" }, { name = "fsspec", specifier = ">=2023.1.0" }, { name = "gcsfs", marker = "extra == 'gcsfs'", specifier = ">=2023.1.0" }, From e0eb643771dd8f1604c2cd2441e582558ec7feb2 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 17 Nov 2025 17:12:08 +0100 Subject: [PATCH 3/6] Remove deprecated datafusion APIs --- mkdocs/docs/api.md | 2 +- pyiceberg/table/__init__.py | 4 ++-- tests/table/test_datafusion.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md index 5d43a94078..69bf77c105 100644 --- a/mkdocs/docs/api.md +++ b/mkdocs/docs/api.md @@ -1967,7 +1967,7 @@ iceberg_table.append(data) # Register the table with DataFusion ctx = SessionContext() -ctx.register_table_provider("test", iceberg_table) +ctx.register_table("test", iceberg_table) # Query the table using DataFusion SQL ctx.table("test").show() diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index b30a1426e7..ec49d482b9 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -1601,7 +1601,7 @@ def __datafusion_table_provider__(self) -> IcebergDataFusionTable: To support DataFusion features such as push down filtering, this function will return a PyCapsule interface that conforms to the FFI Table Provider required by DataFusion. From an end user perspective - you should not need to call this function directly. Instead you can use ``register_table_provider`` in + you should not need to call this function directly. Instead you can use ``register_table`` in the DataFusion SessionContext. Returns: @@ -1618,7 +1618,7 @@ def __datafusion_table_provider__(self) -> IcebergDataFusionTable: iceberg_table = catalog.create_table("default.test", schema=data.schema) iceberg_table.append(data) ctx = SessionContext() - ctx.register_table_provider("test", iceberg_table) + ctx.register_table("test", iceberg_table) ctx.table("test").show() ``` Results in diff --git a/tests/table/test_datafusion.py b/tests/table/test_datafusion.py index d9fa3e1e7b..136145ce8a 100644 --- a/tests/table/test_datafusion.py +++ b/tests/table/test_datafusion.py @@ -49,7 +49,7 @@ def test_datafusion_register_pyiceberg_table(catalog: Catalog, arrow_table_with_ iceberg_table.append(arrow_table_with_null) ctx = SessionContext() - ctx.register_table_provider("test", iceberg_table) + ctx.register_table("test", iceberg_table) datafusion_table = ctx.table("test") assert datafusion_table is not None From b8043969a00b523e914c5683842988ed55a1c66b Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Mon, 19 Jan 2026 17:50:33 -0800 Subject: [PATCH 4/6] pin datafusion to v51 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d01c77798e..11039d1b48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,7 +95,7 @@ gcsfs = ["gcsfs>=2023.1.0"] rest-sigv4 = ["boto3>=1.24.59"] hf = ["huggingface-hub>=0.24.0"] pyiceberg-core = ["pyiceberg-core>=0.5.1,<0.9.0"] -datafusion = ["datafusion>=45,<52"] +datafusion = ["datafusion>=51,<52"] gcp-auth = ["google-auth>=2.4.0"] [dependency-groups] From 107573e8c85167951a4b708941f1f637c2e36d49 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Mon, 19 Jan 2026 17:53:31 -0800 Subject: [PATCH 5/6] update docs --- mkdocs/docs/api.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md index 69bf77c105..1c3273196d 100644 --- a/mkdocs/docs/api.md +++ b/mkdocs/docs/api.md @@ -1941,7 +1941,7 @@ PyIceberg integrates with [Apache DataFusion](https://datafusion.apache.org/) th The integration has a few caveats: - - Only works with `datafusion >= 45, < 49` + - Only works with `datafusion == 51`, aligns with the version used in `pyiceberg-core` - Depends directly on `iceberg-rust` instead of PyIceberg's implementation - Has limited features compared to the full PyIceberg API From 792bf9b36617b16792789157808def94c9923c1e Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Mon, 19 Jan 2026 18:54:30 -0800 Subject: [PATCH 6/6] thx drew --- uv.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uv.lock b/uv.lock index 54e61859bd..5085d86520 100644 --- a/uv.lock +++ b/uv.lock @@ -3823,7 +3823,7 @@ requires-dist = [ { name = "cachetools", specifier = ">=5.5,<7.0" }, { name = "click", specifier = ">=7.1.1,<9.0.0" }, { name = "daft", marker = "extra == 'daft'", specifier = ">=0.5.0" }, - { name = "datafusion", marker = "extra == 'datafusion'", specifier = ">=45,<52" }, + { name = "datafusion", marker = "extra == 'datafusion'", specifier = ">=51,<52" }, { name = "duckdb", marker = "extra == 'duckdb'", specifier = ">=0.5.0,<2.0.0" }, { name = "fsspec", specifier = ">=2023.1.0" }, { name = "gcsfs", marker = "extra == 'gcsfs'", specifier = ">=2023.1.0" },