diff --git a/.cargo/audit.toml b/.cargo/audit.toml
index 09e2d35c50..71354ea3a5 100644
--- a/.cargo/audit.toml
+++ b/.cargo/audit.toml
@@ -33,4 +33,9 @@ ignore = [
   #
   # Introduced by object_store, see https://github.com/apache/arrow-rs-object-store/issues/564
   "RUSTSEC-2025-0134",
+  # `rand` unsoundness with custom logger using `rand::rng()`
+  #
+  # Direct dependency upgraded to 0.9.3+. Transitive rand 0.8.5 remains
+  # from reqsign/sqllogictest/rustc-hash — no 0.8.x patch exists.
+  "RUSTSEC-2026-0097",
 ]
diff --git a/.github/workflows/asf-allowlist-check.yml b/.github/workflows/asf-allowlist-check.yml
index d4e84c5922..65dbe8bcbe 100644
--- a/.github/workflows/asf-allowlist-check.yml
+++ b/.github/workflows/asf-allowlist-check.yml
@@ -43,5 +43,4 @@ jobs:
     - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
       with:
         persist-credentials: false
-    # Intentionally unpinned to always use the latest allowlist from the ASF.
-    - uses: apache/infrastructure-actions/allowlist-check@main # zizmor: ignore[unpinned-uses]
+    - uses: apache/infrastructure-actions/allowlist-check@4e9c961f587f72b170874b6f5cd4ac15f7f26eb8  # main
diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index 68731cbed3..3f9865ed8a 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -37,7 +37,10 @@ on:
     - cron: '0 0 * * *'
 
 permissions:
+  # All other permissions are set to none
   contents: read
+  checks: write
+  issues: write
 
 jobs:
   security_audit:
diff --git a/.github/workflows/bindings_python_ci.yml b/.github/workflows/bindings_python_ci.yml
index a02ae9f0af..842fce7f83 100644
--- a/.github/workflows/bindings_python_ci.yml
+++ b/.github/workflows/bindings_python_ci.yml
@@ -63,7 +63,7 @@ jobs:
       - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
         with:
           persist-credentials: false
-      - uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
+      - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
         with:
           version: "0.9.3"
           enable-cache: true
@@ -95,12 +95,12 @@ jobs:
       - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
         with:
           python-version: 3.12
-      - uses: PyO3/maturin-action@04ac600d27cdf7a9a280dadf7147097c42b757ad # v1.50.1
+      - uses: PyO3/maturin-action@e83996d129638aa358a18fbd1dfb82f0b0fb5d3b # v1.51.0
         with:
           working-directory: "bindings/python"
           command: build
           args: --out dist -i python3.12 # Explicitly set interpreter; manylinux containers have multiple Pythons and maturin may pick an older one
-      - uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
+      - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
         with:
           version: "0.9.3"
           enable-cache: true
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 071d6dbcbf..8b31386e47 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -163,7 +163,7 @@ jobs:
 
       - name: Install cargo-nextest
         if: matrix.test-suite.name == 'default'
-        uses: taiki-e/install-action@0fde6d128a3d980ceac30be8c8b8739abd963b81 # v2.70.0
+        uses: taiki-e/install-action@055f5df8c3f65ea01cd41e9dc855becd88953486 # v2.75.18
         with:
           tool: cargo-nextest
 
diff --git a/.github/workflows/ci_typos.yml b/.github/workflows/ci_typos.yml
index 9373c7295d..fff347e638 100644
--- a/.github/workflows/ci_typos.yml
+++ b/.github/workflows/ci_typos.yml
@@ -47,4 +47,4 @@ jobs:
         with:
           persist-credentials: false
       - name: Check typos
-        uses: crate-ci/typos@631208b7aac2daa8b707f55e7331f9112b0e062d # v1.44.0
+        uses: crate-ci/typos@cf5f1c29a8ac336af8568821ec41919923b05a83 # v1.45.1
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index fe0459aeb7..7e9c8208c8 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -46,11 +46,11 @@ jobs:
         persist-credentials: false
 
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@c10b8064de6f491fea524254123dbe5e09572f13 # v4.35.1
+      uses: github/codeql-action/init@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
       with:
         languages: actions
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@c10b8064de6f491fea524254123dbe5e09572f13 # v4.35.1
+      uses: github/codeql-action/analyze@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
       with:
         category: "/language:actions"
diff --git a/.github/workflows/release_python.yml b/.github/workflows/release_python.yml
index c9817e064c..f93f299d56 100644
--- a/.github/workflows/release_python.yml
+++ b/.github/workflows/release_python.yml
@@ -61,21 +61,21 @@ jobs:
             exit 1
           fi
           echo "✅ Release tag format is valid: $RELEASE_TAG"
-          
+
           # Strip 'v' prefix for cargo version
           CARGO_VERSION="${RELEASE_TAG#v}"
           echo "Cargo version (without v prefix): $CARGO_VERSION"
-          
+
           # For manual triggers, validate that the tag matches the version in Cargo.toml
           if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
             # Extract base version (without -rc.X suffix) for comparison with Cargo.toml
             BASE_VERSION="${CARGO_VERSION%-rc.*}"
             echo "Base version (for Cargo.toml comparison): $BASE_VERSION"
-            
+
             # Read version from Cargo.toml and validate it matches
             CARGO_TOML_VERSION=$(grep '^version = ' bindings/python/Cargo.toml | head -1 | sed 's/version = "\(.*\)"/\1/')
             echo "Version in bindings/python/Cargo.toml: $CARGO_TOML_VERSION"
-            
+
             if [ "$BASE_VERSION" != "$CARGO_TOML_VERSION" ]; then
               echo "❌ Version mismatch!"
               echo "   Release tag base version: $BASE_VERSION"
@@ -85,7 +85,7 @@ jobs:
             fi
             echo "✅ Version matches bindings/python/Cargo.toml"
           fi
-          
+
           # Check if this is a release candidate
           if [[ "$RELEASE_TAG" =~ -rc\.[0-9]+$ ]]; then
             IS_RC="true"
@@ -94,7 +94,7 @@ jobs:
             IS_RC="false"
             echo "This is a stable release"
           fi
-          
+
           # Set outputs for other jobs to use
           echo "cargo-version=$CARGO_VERSION" >> $GITHUB_OUTPUT
           echo "is-rc=$IS_RC" >> $GITHUB_OUTPUT
@@ -110,7 +110,7 @@ jobs:
       - name: Install toml-cli
         if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }}
         run: cargo install toml-cli
-      
+
       - name: Set cargo version for RC
         if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }}
         working-directory: "bindings/python"
@@ -124,13 +124,13 @@ jobs:
         env:
           NEEDS_VALIDATE_RELEASE_TAG_OUTPUTS_CARGO_VERSION: ${{ needs.validate-release-tag.outputs.cargo-version }}
 
-      - uses: PyO3/maturin-action@04ac600d27cdf7a9a280dadf7147097c42b757ad # v1.50.1
+      - uses: PyO3/maturin-action@e83996d129638aa358a18fbd1dfb82f0b0fb5d3b # v1.51.0
         with:
           working-directory: "bindings/python"
           command: sdist
           args: -o dist
       - name: Upload sdist
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
           name: wheels-sdist
           path: bindings/python/dist
@@ -159,7 +159,7 @@ jobs:
       - name: Install toml-cli
         if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }}
         run: cargo install toml-cli
-      
+
       - name: Set cargo version for RC
         if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }}
         working-directory: "bindings/python"
@@ -184,7 +184,7 @@ jobs:
         uses: ./.github/actions/setup-builder
         with:
           rust-version: ${{ steps.get-msrv.outputs.msrv }}
-      - uses: PyO3/maturin-action@04ac600d27cdf7a9a280dadf7147097c42b757ad # v1.50.1
+      - uses: PyO3/maturin-action@e83996d129638aa358a18fbd1dfb82f0b0fb5d3b # v1.51.0
         with:
           target: ${{ matrix.target }}
           manylinux: ${{ matrix.manylinux || 'auto' }}
@@ -192,7 +192,7 @@ jobs:
           command: build
           args: --release -o dist -i python3.12 # Explicitly set interpreter; manylinux containers have multiple Pythons and maturin may pick an older one
       - name: Upload wheels
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
           name: wheels-${{ matrix.os }}-${{ matrix.target }}
           path: bindings/python/dist
diff --git a/.github/workflows/release_python_nightly.yml b/.github/workflows/release_python_nightly.yml
index 55695784e9..26b034554c 100644
--- a/.github/workflows/release_python_nightly.yml
+++ b/.github/workflows/release_python_nightly.yml
@@ -48,14 +48,14 @@ jobs:
         with:
           timestamp: ${{ needs.set-version.outputs.TIMESTAMP }}
 
-      - uses: PyO3/maturin-action@04ac600d27cdf7a9a280dadf7147097c42b757ad # v1.50.1
+      - uses: PyO3/maturin-action@e83996d129638aa358a18fbd1dfb82f0b0fb5d3b # v1.51.0
         with:
           working-directory: "bindings/python"
           command: sdist
           args: -o dist
 
       - name: Upload sdist
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
           name: wheels-sdist
           path: bindings/python/dist
@@ -98,7 +98,7 @@ jobs:
         with:
           rust-version: ${{ steps.get-msrv.outputs.msrv }}
 
-      - uses: PyO3/maturin-action@04ac600d27cdf7a9a280dadf7147097c42b757ad # v1.50.1
+      - uses: PyO3/maturin-action@e83996d129638aa358a18fbd1dfb82f0b0fb5d3b # v1.51.0
         with:
           target: ${{ matrix.target }}
           manylinux: ${{ matrix.manylinux || 'auto' }}
@@ -107,7 +107,7 @@ jobs:
           args: --release -o dist -i python3.12 # Explicitly set interpreter; manylinux containers have multiple Pythons and maturin may pick an older one
 
       - name: Upload wheels
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
           name: wheels-${{ matrix.os }}-${{ matrix.target }}
           path: bindings/python/dist
diff --git a/.github/workflows/zizmor.yml b/.github/workflows/zizmor.yml
index 313835fcbe..9306853937 100644
--- a/.github/workflows/zizmor.yml
+++ b/.github/workflows/zizmor.yml
@@ -39,6 +39,6 @@ jobs:
           persist-credentials: false
 
       - name: Run zizmor 🌈
-        uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2
+        uses: zizmorcore/zizmor-action@b1d7e1fb5de872772f31590499237e7cce841e8e # v0.5.3
         with:
           advanced-security: false
diff --git a/.typos.toml b/.typos.toml
index 407ce8168c..36996a553a 100644
--- a/.typos.toml
+++ b/.typos.toml
@@ -18,5 +18,9 @@
 [type.rust]
 extend-ignore-identifiers-re = ["^bimap$"]
 
+[default.extend-words]
+ags = "ags"
+AGS = "AGS"
+
 [files]
 extend-exclude = ["**/testdata", "CHANGELOG.md"]
diff --git a/Cargo.lock b/Cargo.lock
index d3b5bb6646..2dad4ba41d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -133,7 +133,7 @@ version = "1.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
 dependencies = [
- "windows-sys 0.61.2",
+ "windows-sys 0.60.2",
 ]
 
 [[package]]
@@ -144,7 +144,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
 dependencies = [
  "anstyle",
  "once_cell_polyfill",
- "windows-sys 0.61.2",
+ "windows-sys 0.60.2",
 ]
 
 [[package]]
@@ -169,7 +169,7 @@ dependencies = [
  "miniz_oxide",
  "num-bigint",
  "quad-rand",
- "rand 0.9.2",
+ "rand 0.9.4",
  "regex-lite",
  "serde",
  "serde_bytes",
@@ -1062,7 +1062,7 @@ version = "3.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c"
 dependencies = [
- "darling 0.23.0",
+ "darling 0.20.11",
  "ident_case",
  "prettyplease",
  "proc-macro2",
@@ -1103,6 +1103,20 @@ name = "bytemuck"
 version = "1.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
+dependencies = [
+ "bytemuck_derive",
+]
+
+[[package]]
+name = "bytemuck_derive"
+version = "1.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
 
 [[package]]
 name = "byteorder"
@@ -1286,7 +1300,7 @@ version = "3.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34"
 dependencies = [
- "windows-sys 0.61.2",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -1605,9 +1619,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de9f8117889ba9503440f1dd79ebab32ba52ccf1720bb83cd718a29d4edc0d16"
+checksum = "93db0e623840612f7f2cd757f7e8a8922064192363732c88692e0870016e141b"
 dependencies = [
  "arrow",
  "arrow-schema",
@@ -1649,7 +1663,7 @@ dependencies = [
  "object_store",
  "parking_lot",
  "parquet",
- "rand 0.9.2",
+ "rand 0.9.4",
  "regex",
  "sqlparser",
  "tempfile",
@@ -1661,9 +1675,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be893b73a13671f310ffcc8da2c546b81efcc54c22e0382c0a28aa3537017137"
+checksum = "37cefde60b26a7f4ff61e9d2ff2833322f91df2b568d7238afe67bde5bdffb66"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1686,9 +1700,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog-listing"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "830487b51ed83807d6b32d6325f349c3144ae0c9bf772cf2a712db180c31d5e6"
+checksum = "17e112307715d6a7a331111a4c2330ff54bc237183511c319e3708a4cff431fb"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1709,9 +1723,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-cli"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8735220c84a731c3917dce75ec837a8376eddf5462b0c5dbaf5a2e354c9b6e05"
+checksum = "84a22c001ad1ac11cda09dab69b151eef5b1a992e23bc524ab0d1e63e5dea327"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1737,9 +1751,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d7663f3af955292f8004e74bcaf8f7ea3d66cc38438749615bb84815b61a293"
+checksum = "d72a11ca44a95e1081870d3abb80c717496e8a7acb467a1d3e932bb636af5cc2"
 dependencies = [
  "ahash",
  "apache-avro",
@@ -1764,9 +1778,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f590205c7e32fe1fea48dd53ffb406e56ae0e7a062213a3ac848db8771641bd"
+checksum = "89f4afaed29670ec4fd6053643adc749fe3f4bc9d1ce1b8c5679b22c67d12def"
 dependencies = [
  "futures",
  "log",
@@ -1775,9 +1789,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fde1e030a9dc87b743c806fbd631f5ecfa2ccaa4ffb61fa19144a07fea406b79"
+checksum = "e9fb386e1691355355a96419978a0022b7947b44d4a24a6ea99f00b6b485cbb6"
 dependencies = [
  "arrow",
  "async-compression",
@@ -1801,7 +1815,7 @@ dependencies = [
  "liblzma",
  "log",
  "object_store",
- "rand 0.9.2",
+ "rand 0.9.4",
  "tokio",
  "tokio-util",
  "url",
@@ -1810,9 +1824,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-arrow"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "331ebae7055dc108f9b54994b93dff91f3a17445539efe5b74e89264f7b36e15"
+checksum = "ffa6c52cfed0734c5f93754d1c0175f558175248bf686c944fb05c373e5fc096"
 dependencies = [
  "arrow",
  "arrow-ipc",
@@ -1834,9 +1848,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-avro"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49dda81c79b6ba57b1853a9158abc66eb85a3aa1cede0c517dabec6d8a4ed3aa"
+checksum = "a579c3bd290c66ea4b269493e75e8a3ed42c9c895a651f10210a29538aee50c4"
 dependencies = [
  "apache-avro",
  "arrow",
@@ -1854,9 +1868,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-csv"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e0d475088325e2986876aa27bb30d0574f72a22955a527d202f454681d55c5c"
+checksum = "503f29e0582c1fc189578d665ff57d9300da1f80c282777d7eb67bb79fb8cdca"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1877,9 +1891,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-json"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ea1520d81f31770f3ad6ee98b391e75e87a68a5bb90de70064ace5e0a7182fe8"
+checksum = "e33804749abc8d0c8cb7473228483cb8070e524c6f6086ee1b85a64debe2b3d2"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1901,9 +1915,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-parquet"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95be805d0742ab129720f4c51ad9242cd872599cdb076098b03f061fcdc7f946"
+checksum = "32a8e0365e0e08e8ff94d912f0ababcf9065a1a304018ba90b1fc83c855b4997"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1931,15 +1945,15 @@ dependencies = [
 
 [[package]]
 name = "datafusion-doc"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c93ad9e37730d2c7196e68616f3f2dd3b04c892e03acd3a8eeca6e177f3c06a"
+checksum = "8de6ac0df1662b9148ad3c987978b32cbec7c772f199b1d53520c8fa764a87ee"
 
 [[package]]
 name = "datafusion-execution"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9437d3cd5d363f9319f8122182d4d233427de79c7eb748f23054c9aaa0fdd8df"
+checksum = "c03c7fbdaefcca4ef6ffe425a5fc2325763bfb426599bb0bf4536466efabe709"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -1954,16 +1968,16 @@ dependencies = [
  "object_store",
  "parking_lot",
  "parquet",
- "rand 0.9.2",
+ "rand 0.9.4",
  "tempfile",
  "url",
 ]
 
 [[package]]
 name = "datafusion-expr"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "67164333342b86521d6d93fa54081ee39839894fb10f7a700c099af96d7552cf"
+checksum = "574b9b6977fedbd2a611cbff12e5caf90f31640ad9dc5870f152836d94bad0dd"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1984,9 +1998,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab05fdd00e05d5a6ee362882546d29d6d3df43a6c55355164a7fbee12d163bc9"
+checksum = "7d7c3adf3db8bf61e92eb90cb659c8e8b734593a8f7c8e12a843c7ddba24b87e"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1997,9 +2011,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04fb863482d987cf938db2079e07ab0d3bb64595f28907a6c2f8671ad71cca7e"
+checksum = "f28aa4e10384e782774b10e72aca4d93ef7b31aa653095d9d4536b0a3dbc51b6"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -2020,7 +2034,7 @@ dependencies = [
  "md-5",
  "memchr",
  "num-traits",
- "rand 0.9.2",
+ "rand 0.9.4",
  "regex",
  "sha2",
  "unicode-segmentation",
@@ -2029,9 +2043,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "829856f4e14275fb376c104f27cbf3c3b57a9cfe24885d98677525f5e43ce8d6"
+checksum = "00aa6217e56098ba84e0a338176fe52f0a84cca398021512c6c8c5eff806d0ad"
 dependencies = [
  "ahash",
  "arrow",
@@ -2051,9 +2065,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08af79cc3d2aa874a362fb97decfcbd73d687190cb096f16a6c85a7780cce311"
+checksum = "b511250349407db7c43832ab2de63f5557b19a20dfd236b39ca2c04468b50d47"
 dependencies = [
  "ahash",
  "arrow",
@@ -2064,9 +2078,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "465ae3368146d49c2eda3e2c0ef114424c87e8a6b509ab34c1026ace6497e790"
+checksum = "ef13a858e20d50f0a9bb5e96e7ac82b4e7597f247515bccca4fdd2992df0212a"
 dependencies = [
  "arrow",
  "arrow-ord",
@@ -2089,9 +2103,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-table"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6156e6b22fcf1784112fc0173f3ae6e78c8fdb4d3ed0eace9543873b437e2af6"
+checksum = "72b40d3f5bbb3905f9ccb1ce9485a9595c77b69758a7c24d3ba79e334ff51e7e"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2105,9 +2119,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca7baec14f866729012efb89011a6973f3a346dc8090c567bfcd328deff551c1"
+checksum = "d4e88ec9d57c9b685d02f58bfee7be62d72610430ddcedb82a08e5d9925dbfb6"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2123,9 +2137,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "159228c3280d342658466bb556dc24de30047fe1d7e559dc5d16ccc5324166f9"
+checksum = "8307bb93519b1a91913723a1130cfafeee3f72200d870d88e91a6fc5470ede5c"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -2133,9 +2147,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-macros"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5427e5da5edca4d21ea1c7f50e1c9421775fe33d7d5726e5641a833566e7578"
+checksum = "2e367e6a71051d0ebdd29b2f85d12059b38b1d1f172c6906e80016da662226bd"
 dependencies = [
  "datafusion-doc",
  "quote",
@@ -2144,9 +2158,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89099eefcd5b223ec685c36a41d35c69239236310d71d339f2af0fa4383f3f46"
+checksum = "e929015451a67f77d9d8b727b2bf3a40c4445fdef6cdc53281d7d97c76888ace"
 dependencies = [
  "arrow",
  "chrono",
@@ -2164,9 +2178,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f222df5195d605d79098ef37bdd5323bff0131c9d877a24da6ec98dfca9fe36"
+checksum = "4b1e68aba7a4b350401cfdf25a3d6f989ad898a7410164afe9ca52080244cb59"
 dependencies = [
  "ahash",
  "arrow",
@@ -2188,9 +2202,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-adapter"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40838625d63d9c12549d81979db3dd675d159055eb9135009ba272ab0e8d0f64"
+checksum = "ea22315f33cf2e0adc104e8ec42e285f6ed93998d565c65e82fec6a9ee9f9db4"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2203,9 +2217,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eacbcc4cfd502558184ed58fa3c72e775ec65bf077eef5fd2b3453db676f893c"
+checksum = "b04b45ea8ad3ac2d78f2ea2a76053e06591c9629c7a603eda16c10649ecf4362"
 dependencies = [
  "ahash",
  "arrow",
@@ -2220,9 +2234,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d501d0e1d0910f015677121601ac177ec59272ef5c9324d1147b394988f40941"
+checksum = "7cb13397809a425918f608dfe8653f332015a3e330004ab191b4404187238b95"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2239,9 +2253,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "463c88ad6f1ecab1810f4c9f046898bee035b370137eb79b2b2db925e270631d"
+checksum = "5edc023675791af9d5fb4cc4c24abf5f7bd3bd4dcf9e5bd90ea1eff6976dcc79"
 dependencies = [
  "ahash",
  "arrow",
@@ -2271,9 +2285,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-pruning"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2857618a0ecbd8cd0cf29826889edd3a25774ec26b2995fc3862095c95d88fc6"
+checksum = "ac8c76860e355616555081cab5968cec1af7a80701ff374510860bcd567e365a"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2288,9 +2302,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-session"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef8637e35022c5c775003b3ab1debc6b4a8f0eb41b069bdd5475dd3aa93f6eba"
+checksum = "5412111aa48e2424ba926112e192f7a6b7e4ccb450145d25ce5ede9f19dc491e"
 dependencies = [
  "async-trait",
  "datafusion-common",
@@ -2302,9 +2316,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-spark"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "923a8b871962a9d860f036f743a20af50ff04729f1da2468ed220dab4f61c97d"
+checksum = "e059dcf8544da0d6598d0235be3cc29c209094a5976b2e4822e4a2cf91c2b5c5"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -2320,7 +2334,7 @@ dependencies = [
  "datafusion-functions-nested",
  "log",
  "percent-encoding",
- "rand 0.9.2",
+ "rand 0.9.4",
  "serde_json",
  "sha1",
  "sha2",
@@ -2329,9 +2343,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12d9e9f16a1692a11c94bcc418191fa15fd2b4d72a0c1a0c607db93c0b84dd81"
+checksum = "fa0d133ddf8b9b3b872acac900157f783e7b879fe9a6bccf389abebbfac45ec1"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -2348,9 +2362,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sqllogictest"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a43746bd59e7f2655be4c5553ede4a1ceb1cd34005932fa9e2bd0641c714c46e"
+checksum = "04e5a4a7a49143a68936992b6dbb0db44121c635e9992b2482817278f1e69c56"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2374,9 +2388,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-substrait"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d5e5656a7e63d51dd3e5af3dbd347ea83bbe993a77c66b854b74961570d16490"
+checksum = "98494539a5468979cc42d86c7bc5f0f8cb71ee5c742694c26fc34efdd29dd2e5"
 dependencies = [
  "async-recursion",
  "async-trait",
@@ -2480,7 +2494,7 @@ dependencies = [
  "libc",
  "option-ext",
  "redox_users",
- "windows-sys 0.61.2",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -2633,7 +2647,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
 dependencies = [
  "libc",
- "windows-sys 0.61.2",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -3035,6 +3049,7 @@ version = "2.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
 dependencies = [
+ "bytemuck",
  "cfg-if",
  "crunchy",
  "num-traits",
@@ -3265,7 +3280,7 @@ dependencies = [
  "libc",
  "percent-encoding",
  "pin-project-lite",
- "socket2 0.6.3",
+ "socket2 0.5.10",
  "tokio",
  "tower-service",
  "tracing",
@@ -3335,12 +3350,13 @@ dependencies = [
  "ordered-float 4.6.0",
  "parquet",
  "pretty_assertions",
- "rand 0.8.5",
+ "rand 0.9.4",
  "regex",
  "reqsign",
  "reqwest",
  "roaring",
  "serde",
+ "serde_arrow",
  "serde_bytes",
  "serde_derive",
  "serde_json",
@@ -3449,14 +3465,19 @@ name = "iceberg-catalog-s3tables"
 version = "0.9.0"
 dependencies = [
  "anyhow",
+ "arrow-array",
+ "arrow-schema",
  "async-trait",
  "aws-config",
  "aws-sdk-s3tables",
+ "futures",
  "iceberg",
  "iceberg-storage-opendal",
  "iceberg_test_utils",
  "itertools 0.13.0",
+ "parquet",
  "tokio",
+ "uuid",
 ]
 
 [[package]]
@@ -3834,7 +3855,7 @@ dependencies = [
  "portable-atomic",
  "portable-atomic-util",
  "serde_core",
- "windows-sys 0.61.2",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -4114,6 +4135,21 @@ dependencies = [
  "twox-hash",
 ]
 
+[[package]]
+name = "marrow"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f5240d6977234968ff9ad254bfa73aa397fb51e41dcb22b1eb85835e9295485b"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "bytemuck",
+ "half",
+ "serde",
+]
+
 [[package]]
 name = "md-5"
 version = "0.10.6"
@@ -4241,7 +4277,7 @@ dependencies = [
  "hyper-util",
  "log",
  "pin-project-lite",
- "rand 0.9.2",
+ "rand 0.9.4",
  "regex",
  "serde_json",
  "serde_urlencoded",
@@ -4370,7 +4406,7 @@ version = "0.50.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
 dependencies = [
- "windows-sys 0.61.2",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -4499,7 +4535,7 @@ dependencies = [
  "parking_lot",
  "percent-encoding",
  "quick-xml 0.39.2",
- "rand 0.10.0",
+ "rand 0.10.1",
  "reqwest",
  "ring",
  "rustls-pki-types",
@@ -5026,7 +5062,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7"
 dependencies = [
  "heck",
- "itertools 0.14.0",
+ "itertools 0.13.0",
  "log",
  "multimap",
  "petgraph",
@@ -5045,7 +5081,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b"
 dependencies = [
  "anyhow",
- "itertools 0.14.0",
+ "itertools 0.13.0",
  "proc-macro2",
  "quote",
  "syn",
@@ -5139,7 +5175,7 @@ dependencies = [
  "quinn-udp",
  "rustc-hash",
  "rustls",
- "socket2 0.6.3",
+ "socket2 0.5.10",
  "thiserror 2.0.18",
  "tokio",
  "tracing",
@@ -5155,7 +5191,7 @@ dependencies = [
  "bytes",
  "getrandom 0.3.4",
  "lru-slab",
- "rand 0.9.2",
+ "rand 0.9.4",
  "ring",
  "rustc-hash",
  "rustls",
@@ -5176,7 +5212,7 @@ dependencies = [
  "cfg_aliases",
  "libc",
  "once_cell",
- "socket2 0.6.3",
+ "socket2 0.5.10",
  "tracing",
  "windows-sys 0.60.2",
 ]
@@ -5235,9 +5271,9 @@ dependencies = [
 
 [[package]]
 name = "rand"
-version = "0.9.2"
+version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
+checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
 dependencies = [
  "rand_chacha 0.9.0",
  "rand_core 0.9.5",
@@ -5245,9 +5281,9 @@ dependencies = [
 
 [[package]]
 name = "rand"
-version = "0.10.0"
+version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8"
+checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207"
 dependencies = [
  "chacha20",
  "getrandom 0.4.2",
@@ -5641,7 +5677,7 @@ dependencies = [
  "errno",
  "libc",
  "linux-raw-sys",
- "windows-sys 0.61.2",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -5683,9 +5719,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-webpki"
-version = "0.103.10"
+version = "0.103.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef"
+checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e"
 dependencies = [
  "aws-lc-rs",
  "ring",
@@ -5877,6 +5913,21 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "serde_arrow"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2784e59a0315568e850cb01ddadf458f8c09e28d8cfc4880c2cc08f5dc3444e0"
+dependencies = [
+ "arrow-array",
+ "arrow-schema",
+ "bytemuck",
+ "chrono",
+ "half",
+ "marrow",
+ "serde",
+]
+
 [[package]]
 name = "serde_bytes"
 version = "0.11.19"
@@ -6150,7 +6201,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e"
 dependencies = [
  "libc",
- "windows-sys 0.61.2",
+ "windows-sys 0.60.2",
 ]
 
 [[package]]
@@ -6589,7 +6640,7 @@ dependencies = [
  "getrandom 0.4.2",
  "once_cell",
  "rustix",
- "windows-sys 0.61.2",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -6725,9 +6776,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.50.0"
+version = "1.52.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d"
+checksum = "b67dee974fe86fd92cc45b7a95fdd2f99a36a6d7b0d431a231178d3d670bbcc6"
 dependencies = [
  "bytes",
  "libc",
@@ -6742,9 +6793,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-macros"
-version = "2.6.1"
+version = "2.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c"
+checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -7231,7 +7282,7 @@ dependencies = [
  "nix 0.29.0",
  "once_cell",
  "pin-project",
- "rand 0.9.2",
+ "rand 0.9.4",
  "socket2 0.5.10",
  "thiserror 2.0.18",
  "tokio",
@@ -7481,7 +7532,7 @@ version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
 dependencies = [
- "windows-sys 0.61.2",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index 778e69c9d9..7f612c44bf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -66,7 +66,7 @@ cfg-if = "1"
 chrono = "0.4.41"
 clap = { version = "4.5.48", features = ["derive", "cargo"] }
 dashmap = "6"
-datafusion = "53.0.0"
+datafusion = "53.1.0"
 datafusion-cli = "53.0.0"
 datafusion-sqllogictest = "53.0.0"
 derive_builder = "0.20"
@@ -108,7 +108,7 @@ ordered-float = "4"
 parquet = "58"
 pilota = "0.11.10"
 pretty_assertions = "1.4"
-rand = "0.8.5"
+rand = "0.9.3"
 regex = "1.11.3"
 reqwest = { version = "0.12.12", default-features = false, features = ["json"] }
 roaring = { version = "0.11" }
diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock
index 1b5c06f492..72ea322d7b 100644
--- a/bindings/python/Cargo.lock
+++ b/bindings/python/Cargo.lock
@@ -163,7 +163,7 @@ dependencies = [
  "miniz_oxide",
  "num-bigint",
  "quad-rand",
- "rand 0.9.2",
+ "rand 0.9.4",
  "regex-lite",
  "serde",
  "serde_bytes",
@@ -1052,9 +1052,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de9f8117889ba9503440f1dd79ebab32ba52ccf1720bb83cd718a29d4edc0d16"
+checksum = "93db0e623840612f7f2cd757f7e8a8922064192363732c88692e0870016e141b"
 dependencies = [
  "arrow",
  "arrow-schema",
@@ -1095,7 +1095,7 @@ dependencies = [
  "object_store",
  "parking_lot",
  "parquet",
- "rand 0.9.2",
+ "rand 0.9.4",
  "regex",
  "sqlparser",
  "tempfile",
@@ -1107,9 +1107,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be893b73a13671f310ffcc8da2c546b81efcc54c22e0382c0a28aa3537017137"
+checksum = "37cefde60b26a7f4ff61e9d2ff2833322f91df2b568d7238afe67bde5bdffb66"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1132,9 +1132,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog-listing"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "830487b51ed83807d6b32d6325f349c3144ae0c9bf772cf2a712db180c31d5e6"
+checksum = "17e112307715d6a7a331111a4c2330ff54bc237183511c319e3708a4cff431fb"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1155,9 +1155,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d7663f3af955292f8004e74bcaf8f7ea3d66cc38438749615bb84815b61a293"
+checksum = "d72a11ca44a95e1081870d3abb80c717496e8a7acb467a1d3e932bb636af5cc2"
 dependencies = [
  "ahash",
  "arrow",
@@ -1180,9 +1180,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f590205c7e32fe1fea48dd53ffb406e56ae0e7a062213a3ac848db8771641bd"
+checksum = "89f4afaed29670ec4fd6053643adc749fe3f4bc9d1ce1b8c5679b22c67d12def"
 dependencies = [
  "futures",
  "log",
@@ -1191,9 +1191,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fde1e030a9dc87b743c806fbd631f5ecfa2ccaa4ffb61fa19144a07fea406b79"
+checksum = "e9fb386e1691355355a96419978a0022b7947b44d4a24a6ea99f00b6b485cbb6"
 dependencies = [
  "arrow",
  "async-compression",
@@ -1217,7 +1217,7 @@ dependencies = [
  "liblzma",
  "log",
  "object_store",
- "rand 0.9.2",
+ "rand 0.9.4",
  "tokio",
  "tokio-util",
  "url",
@@ -1226,9 +1226,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-arrow"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "331ebae7055dc108f9b54994b93dff91f3a17445539efe5b74e89264f7b36e15"
+checksum = "ffa6c52cfed0734c5f93754d1c0175f558175248bf686c944fb05c373e5fc096"
 dependencies = [
  "arrow",
  "arrow-ipc",
@@ -1250,9 +1250,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-csv"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e0d475088325e2986876aa27bb30d0574f72a22955a527d202f454681d55c5c"
+checksum = "503f29e0582c1fc189578d665ff57d9300da1f80c282777d7eb67bb79fb8cdca"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1273,9 +1273,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-json"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ea1520d81f31770f3ad6ee98b391e75e87a68a5bb90de70064ace5e0a7182fe8"
+checksum = "e33804749abc8d0c8cb7473228483cb8070e524c6f6086ee1b85a64debe2b3d2"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1297,9 +1297,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-parquet"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95be805d0742ab129720f4c51ad9242cd872599cdb076098b03f061fcdc7f946"
+checksum = "32a8e0365e0e08e8ff94d912f0ababcf9065a1a304018ba90b1fc83c855b4997"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1327,15 +1327,15 @@ dependencies = [
 
 [[package]]
 name = "datafusion-doc"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c93ad9e37730d2c7196e68616f3f2dd3b04c892e03acd3a8eeca6e177f3c06a"
+checksum = "8de6ac0df1662b9148ad3c987978b32cbec7c772f199b1d53520c8fa764a87ee"
 
 [[package]]
 name = "datafusion-execution"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9437d3cd5d363f9319f8122182d4d233427de79c7eb748f23054c9aaa0fdd8df"
+checksum = "c03c7fbdaefcca4ef6ffe425a5fc2325763bfb426599bb0bf4536466efabe709"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -1349,16 +1349,16 @@ dependencies = [
  "log",
  "object_store",
  "parking_lot",
- "rand 0.9.2",
+ "rand 0.9.4",
  "tempfile",
  "url",
 ]
 
 [[package]]
 name = "datafusion-expr"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "67164333342b86521d6d93fa54081ee39839894fb10f7a700c099af96d7552cf"
+checksum = "574b9b6977fedbd2a611cbff12e5caf90f31640ad9dc5870f152836d94bad0dd"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1379,9 +1379,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab05fdd00e05d5a6ee362882546d29d6d3df43a6c55355164a7fbee12d163bc9"
+checksum = "7d7c3adf3db8bf61e92eb90cb659c8e8b734593a8f7c8e12a843c7ddba24b87e"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1422,9 +1422,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04fb863482d987cf938db2079e07ab0d3bb64595f28907a6c2f8671ad71cca7e"
+checksum = "f28aa4e10384e782774b10e72aca4d93ef7b31aa653095d9d4536b0a3dbc51b6"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -1445,7 +1445,7 @@ dependencies = [
  "md-5",
  "memchr",
  "num-traits",
- "rand 0.9.2",
+ "rand 0.9.4",
  "regex",
  "sha2",
  "unicode-segmentation",
@@ -1454,9 +1454,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "829856f4e14275fb376c104f27cbf3c3b57a9cfe24885d98677525f5e43ce8d6"
+checksum = "00aa6217e56098ba84e0a338176fe52f0a84cca398021512c6c8c5eff806d0ad"
 dependencies = [
  "ahash",
  "arrow",
@@ -1476,9 +1476,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08af79cc3d2aa874a362fb97decfcbd73d687190cb096f16a6c85a7780cce311"
+checksum = "b511250349407db7c43832ab2de63f5557b19a20dfd236b39ca2c04468b50d47"
 dependencies = [
  "ahash",
  "arrow",
@@ -1489,9 +1489,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "465ae3368146d49c2eda3e2c0ef114424c87e8a6b509ab34c1026ace6497e790"
+checksum = "ef13a858e20d50f0a9bb5e96e7ac82b4e7597f247515bccca4fdd2992df0212a"
 dependencies = [
  "arrow",
  "arrow-ord",
@@ -1514,9 +1514,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-table"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6156e6b22fcf1784112fc0173f3ae6e78c8fdb4d3ed0eace9543873b437e2af6"
+checksum = "72b40d3f5bbb3905f9ccb1ce9485a9595c77b69758a7c24d3ba79e334ff51e7e"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1530,9 +1530,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca7baec14f866729012efb89011a6973f3a346dc8090c567bfcd328deff551c1"
+checksum = "d4e88ec9d57c9b685d02f58bfee7be62d72610430ddcedb82a08e5d9925dbfb6"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1548,9 +1548,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "159228c3280d342658466bb556dc24de30047fe1d7e559dc5d16ccc5324166f9"
+checksum = "8307bb93519b1a91913723a1130cfafeee3f72200d870d88e91a6fc5470ede5c"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -1558,9 +1558,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-macros"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5427e5da5edca4d21ea1c7f50e1c9421775fe33d7d5726e5641a833566e7578"
+checksum = "2e367e6a71051d0ebdd29b2f85d12059b38b1d1f172c6906e80016da662226bd"
 dependencies = [
  "datafusion-doc",
  "quote",
@@ -1569,9 +1569,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89099eefcd5b223ec685c36a41d35c69239236310d71d339f2af0fa4383f3f46"
+checksum = "e929015451a67f77d9d8b727b2bf3a40c4445fdef6cdc53281d7d97c76888ace"
 dependencies = [
  "arrow",
  "chrono",
@@ -1589,9 +1589,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f222df5195d605d79098ef37bdd5323bff0131c9d877a24da6ec98dfca9fe36"
+checksum = "4b1e68aba7a4b350401cfdf25a3d6f989ad898a7410164afe9ca52080244cb59"
 dependencies = [
  "ahash",
  "arrow",
@@ -1613,9 +1613,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-adapter"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40838625d63d9c12549d81979db3dd675d159055eb9135009ba272ab0e8d0f64"
+checksum = "ea22315f33cf2e0adc104e8ec42e285f6ed93998d565c65e82fec6a9ee9f9db4"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1628,9 +1628,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eacbcc4cfd502558184ed58fa3c72e775ec65bf077eef5fd2b3453db676f893c"
+checksum = "b04b45ea8ad3ac2d78f2ea2a76053e06591c9629c7a603eda16c10649ecf4362"
 dependencies = [
  "ahash",
  "arrow",
@@ -1645,9 +1645,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d501d0e1d0910f015677121601ac177ec59272ef5c9324d1147b394988f40941"
+checksum = "7cb13397809a425918f608dfe8653f332015a3e330004ab191b4404187238b95"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1664,9 +1664,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "463c88ad6f1ecab1810f4c9f046898bee035b370137eb79b2b2db925e270631d"
+checksum = "5edc023675791af9d5fb4cc4c24abf5f7bd3bd4dcf9e5bd90ea1eff6976dcc79"
 dependencies = [
  "ahash",
  "arrow",
@@ -1719,7 +1719,7 @@ dependencies = [
  "datafusion-proto-common",
  "object_store",
  "prost",
- "rand 0.9.2",
+ "rand 0.9.4",
 ]
 
 [[package]]
@@ -1735,9 +1735,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-pruning"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2857618a0ecbd8cd0cf29826889edd3a25774ec26b2995fc3862095c95d88fc6"
+checksum = "ac8c76860e355616555081cab5968cec1af7a80701ff374510860bcd567e365a"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1752,9 +1752,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-session"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef8637e35022c5c775003b3ab1debc6b4a8f0eb41b069bdd5475dd3aa93f6eba"
+checksum = "5412111aa48e2424ba926112e192f7a6b7e4ccb450145d25ce5ede9f19dc491e"
 dependencies = [
  "async-trait",
  "datafusion-common",
@@ -1766,9 +1766,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12d9e9f16a1692a11c94bcc418191fa15fd2b4d72a0c1a0c607db93c0b84dd81"
+checksum = "fa0d133ddf8b9b3b872acac900157f783e7b879fe9a6bccf389abebbfac45ec1"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -2435,7 +2435,7 @@ dependencies = [
  "once_cell",
  "ordered-float 4.6.0",
  "parquet",
- "rand 0.8.5",
+ "rand 0.9.4",
  "reqwest",
  "roaring",
  "serde",
@@ -3587,7 +3587,7 @@ dependencies = [
  "bytes",
  "getrandom 0.3.4",
  "lru-slab",
- "rand 0.9.2",
+ "rand 0.9.4",
  "ring",
  "rustc-hash",
  "rustls",
@@ -3647,9 +3647,9 @@ dependencies = [
 
 [[package]]
 name = "rand"
-version = "0.9.2"
+version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
+checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
 dependencies = [
  "rand_chacha 0.9.0",
  "rand_core 0.9.5",
@@ -3968,9 +3968,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-webpki"
-version = "0.103.10"
+version = "0.103.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef"
+checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e"
 dependencies = [
  "ring",
  "rustls-pki-types",
diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs
index a7e0171337..5b3ccf3b39 100644
--- a/crates/catalog/glue/src/catalog.rs
+++ b/crates/catalog/glue/src/catalog.rs
@@ -203,7 +203,6 @@ impl GlueCatalog {
         // Use provided factory or default to OpenDalStorageFactory::S3
         let factory = storage_factory.unwrap_or_else(|| {
             Arc::new(OpenDalStorageFactory::S3 {
-                configured_scheme: "s3a".to_string(),
                 customized_credential_load: None,
             })
         });
diff --git a/crates/catalog/hms/tests/hms_catalog_test.rs b/crates/catalog/hms/tests/hms_catalog_test.rs
index f19cf7bff4..d0e6486ad8 100644
--- a/crates/catalog/hms/tests/hms_catalog_test.rs
+++ b/crates/catalog/hms/tests/hms_catalog_test.rs
@@ -23,7 +23,10 @@
 use std::collections::HashMap;
 use std::sync::Arc;
 
-use iceberg::io::{FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY};
+use iceberg::io::{
+    FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_PATH_STYLE_ACCESS, S3_REGION,
+    S3_SECRET_ACCESS_KEY,
+};
 use iceberg::{Catalog, CatalogBuilder, Namespace, NamespaceIdent};
 use iceberg_catalog_hms::{
     HMS_CATALOG_PROP_THRIFT_TRANSPORT, HMS_CATALOG_PROP_URI, HMS_CATALOG_PROP_WAREHOUSE,
@@ -56,11 +59,11 @@ async fn get_catalog() -> HmsCatalog {
         (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()),
         (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()),
         (S3_REGION.to_string(), "us-east-1".to_string()),
+        (S3_PATH_STYLE_ACCESS.to_string(), "true".to_string()),
     ]);
 
     // Wait for bucket to actually exist
     let file_io = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 {
-        configured_scheme: "s3a".to_string(),
         customized_credential_load: None,
     }))
     .with_props(props.clone())
@@ -79,7 +82,6 @@ async fn get_catalog() -> HmsCatalog {
 
     HmsCatalogBuilder::default()
         .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3a".to_string(),
             customized_credential_load: None,
         }))
         .load("hms", props)
diff --git a/crates/catalog/loader/tests/common/mod.rs b/crates/catalog/loader/tests/common/mod.rs
index 6524d56339..dfa9535672 100644
--- a/crates/catalog/loader/tests/common/mod.rs
+++ b/crates/catalog/loader/tests/common/mod.rs
@@ -24,8 +24,8 @@ use std::fmt;
 use std::sync::Arc;
 
 use iceberg::io::{
-    FileIOBuilder, LocalFsStorageFactory, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION,
-    S3_SECRET_ACCESS_KEY,
+    FileIOBuilder, LocalFsStorageFactory, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_PATH_STYLE_ACCESS,
+    S3_REGION, S3_SECRET_ACCESS_KEY,
 };
 use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder};
 use iceberg::spec::{NestedField, PrimitiveType, Schema, Type};
@@ -242,10 +242,10 @@ async fn glue_catalog() -> GlueCatalog {
         (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()),
         (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()),
         (S3_REGION.to_string(), "us-east-1".to_string()),
+        (S3_PATH_STYLE_ACCESS.to_string(), "true".to_string()),
     ]);
 
     let file_io = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 {
-        configured_scheme: "s3a".to_string(),
         customized_credential_load: None,
     }))
     .with_props(props.clone())
@@ -293,10 +293,10 @@ async fn hms_catalog() -> HmsCatalog {
         (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()),
         (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()),
         (S3_REGION.to_string(), "us-east-1".to_string()),
+        (S3_PATH_STYLE_ACCESS.to_string(), "true".to_string()),
     ]);
 
     let file_io = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 {
-        configured_scheme: "s3a".to_string(),
         customized_credential_load: None,
     }))
     .with_props(props.clone())
@@ -313,7 +313,6 @@ async fn hms_catalog() -> HmsCatalog {
 
     HmsCatalogBuilder::default()
         .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3a".to_string(),
             customized_credential_load: None,
         }))
         .load("hms", props)
diff --git a/crates/catalog/s3tables/Cargo.toml b/crates/catalog/s3tables/Cargo.toml
index 2fe096fec9..dc7be3027f 100644
--- a/crates/catalog/s3tables/Cargo.toml
+++ b/crates/catalog/s3tables/Cargo.toml
@@ -39,6 +39,11 @@ iceberg-storage-opendal = { workspace = true, features = ["opendal-s3"] }
 
 
 [dev-dependencies]
+arrow-array = { workspace = true }
+arrow-schema = { workspace = true }
+futures = { workspace = true }
 iceberg_test_utils = { path = "../../test_utils", features = ["tests"] }
 itertools = { workspace = true }
+parquet = { workspace = true }
 tokio = { workspace = true }
+uuid = { workspace = true }
diff --git a/crates/catalog/s3tables/src/catalog.rs b/crates/catalog/s3tables/src/catalog.rs
index b88bd77d29..cc43446943 100644
--- a/crates/catalog/s3tables/src/catalog.rs
+++ b/crates/catalog/s3tables/src/catalog.rs
@@ -202,7 +202,6 @@ impl S3TablesCatalog {
         // Use provided factory or default to OpenDalStorageFactory::S3
         let factory = storage_factory.unwrap_or_else(|| {
             Arc::new(OpenDalStorageFactory::S3 {
-                configured_scheme: "s3a".to_string(),
                 customized_credential_load: None,
             })
         });
@@ -707,6 +706,7 @@ where T: std::fmt::Debug {
 
 #[cfg(test)]
 mod tests {
+    use futures::TryStreamExt;
     use iceberg::spec::{NestedField, PrimitiveType, Schema, Type};
     use iceberg::transaction::{ApplyTransactionAction, Transaction};
 
@@ -1175,4 +1175,108 @@ mod tests {
             assert_eq!(err.message(), "Catalog name cannot be empty");
         }
     }
+
+    /// Verify that an S3 Table catalog can create a table, write data, load the same table, and read from it.
+    #[tokio::test]
+    async fn test_s3tables_create_table_write_load_table_read() {
+        use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder;
+        use iceberg::writer::file_writer::ParquetWriterBuilder;
+        use iceberg::writer::file_writer::location_generator::{
+            DefaultFileNameGenerator, DefaultLocationGenerator,
+        };
+        use iceberg::writer::file_writer::rolling_writer::RollingFileWriterBuilder;
+        use iceberg::writer::{IcebergWriter, IcebergWriterBuilder};
+
+        let catalog = match load_s3tables_catalog_from_env().await {
+            Ok(Some(c)) => c,
+            Ok(None) => return,
+            Err(e) => panic!("Error loading catalog: {e}"),
+        };
+
+        let ns = NamespaceIdent::new(format!("test_rw_{}", uuid::Uuid::new_v4().simple()));
+        catalog.create_namespace(&ns, HashMap::new()).await.unwrap();
+
+        let table_name = String::from("table");
+
+        let schema = Schema::builder()
+            .with_fields(vec![
+                NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+            ])
+            .build()
+            .unwrap();
+        let creation = TableCreation::builder()
+            .name(table_name.clone())
+            .schema(schema)
+            .build();
+
+        let table = catalog.create_table(&ns, creation).await.unwrap();
+
+        // Write one row.
+        let arrow_schema: Arc<arrow_schema::Schema> = Arc::new(
+            table
+                .metadata()
+                .current_schema()
+                .as_ref()
+                .try_into()
+                .unwrap(),
+        );
+        let batch = arrow_array::RecordBatch::try_new(arrow_schema, vec![Arc::new(
+            arrow_array::Int32Array::from(vec![42]),
+        )])
+        .unwrap();
+
+        // Locations will be generated based on the table metadata, which will be using `s3://` for Amazon S3 Tables.
+        let location_generator = DefaultLocationGenerator::new(table.metadata().clone()).unwrap();
+        let file_name_generator = DefaultFileNameGenerator::new(
+            "test".to_string(),
+            None,
+            iceberg::spec::DataFileFormat::Parquet,
+        );
+        let parquet_writer_builder = ParquetWriterBuilder::new(
+            parquet::file::properties::WriterProperties::default(),
+            table.metadata().current_schema().clone(),
+        );
+        let rw = RollingFileWriterBuilder::new_with_default_file_size(
+            parquet_writer_builder,
+            table.file_io().clone(),
+            location_generator,
+            file_name_generator,
+        );
+        let mut writer = DataFileWriterBuilder::new(rw).build(None).await.unwrap();
+        writer.write(batch.clone()).await.unwrap();
+        let data_files = writer.close().await.unwrap();
+
+        let tx = Transaction::new(&table);
+        let tx = tx
+            .fast_append()
+            .add_data_files(data_files)
+            .apply(tx)
+            .unwrap();
+        tx.commit(&catalog).await.unwrap();
+
+        // Reload from catalog and read back.
+        let table_ident = TableIdent::new(ns.clone(), table_name.clone());
+        let reloaded = catalog.load_table(&table_ident).await.unwrap();
+        let batches: Vec<arrow_array::RecordBatch> = reloaded
+            .scan()
+            .select_all()
+            .build()
+            .expect("scan to be valid (snapshot exists, schema is OK)")
+            .to_arrow()
+            .await
+            .expect("scan tasks should be OK")
+            .try_collect()
+            .await
+            .expect("scan should complete successfully");
+
+        assert_eq!(batches.len(), 1);
+        assert_eq!(
+            batches[0], batch,
+            "read records should match records written earlier"
+        );
+
+        // Clean up.
+        catalog.purge_table(&table_ident).await.ok();
+        catalog.drop_namespace(&ns).await.ok();
+    }
 }
diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml
index 7e91050605..7f5c235c47 100644
--- a/crates/iceberg/Cargo.toml
+++ b/crates/iceberg/Cargo.toml
@@ -100,6 +100,7 @@ rand = { workspace = true }
 regex = { workspace = true }
 tempfile = { workspace = true }
 minijinja = { workspace = true }
+serde_arrow = { version = "0.14", features = ["arrow-58"] }
 
 [package.metadata.cargo-machete]
 # These dependencies are added to ensure minimal dependency version
diff --git a/crates/iceberg/src/arrow/caching_delete_file_loader.rs b/crates/iceberg/src/arrow/caching_delete_file_loader.rs
index ae97534d83..231971fd54 100644
--- a/crates/iceberg/src/arrow/caching_delete_file_loader.rs
+++ b/crates/iceberg/src/arrow/caching_delete_file_loader.rs
@@ -25,6 +25,7 @@ use tokio::sync::oneshot::{Receiver, channel};
 
 use super::delete_filter::{DeleteFilter, PosDelLoadAction};
 use crate::arrow::delete_file_loader::BasicDeleteFileLoader;
+use crate::arrow::scan_metrics::ScanMetrics;
 use crate::arrow::{arrow_primitive_to_literal, arrow_schema_to_schema};
 use crate::delete_vector::DeleteVector;
 use crate::expr::Predicate::AlwaysTrue;
@@ -77,13 +78,22 @@ enum ParsedDeleteFileContext {
 #[allow(unused_variables)]
 impl CachingDeleteFileLoader {
     pub(crate) fn new(file_io: FileIO, concurrency_limit_data_files: usize) -> Self {
+        let scan_metrics = ScanMetrics::new();
         CachingDeleteFileLoader {
-            basic_delete_file_loader: BasicDeleteFileLoader::new(file_io),
+            basic_delete_file_loader: BasicDeleteFileLoader::new(file_io, scan_metrics),
             concurrency_limit_data_files,
             delete_filter: DeleteFilter::default(),
         }
     }
 
+    pub(crate) fn with_scan_metrics(mut self, scan_metrics: ScanMetrics) -> Self {
+        self.basic_delete_file_loader = BasicDeleteFileLoader::new(
+            self.basic_delete_file_loader.file_io().clone(),
+            scan_metrics,
+        );
+        self
+    }
+
     /// Initiates loading of all deletes for all the specified tasks
     ///
     /// Returned future completes once all positional deletes and delete vectors
@@ -612,7 +622,8 @@ mod tests {
 
         let eq_delete_file_path = setup_write_equality_delete_file_1(table_location);
 
-        let basic_delete_file_loader = BasicDeleteFileLoader::new(file_io.clone());
+        let basic_delete_file_loader =
+            BasicDeleteFileLoader::new(file_io.clone(), ScanMetrics::new());
         let record_batch_stream = basic_delete_file_loader
             .parquet_to_batch_stream(
                 &eq_delete_file_path,
@@ -808,7 +819,8 @@ mod tests {
         };
 
         let file_io = FileIO::new_with_fs();
-        let basic_delete_file_loader = BasicDeleteFileLoader::new(file_io.clone());
+        let basic_delete_file_loader =
+            BasicDeleteFileLoader::new(file_io.clone(), ScanMetrics::new());
 
         let batch_stream = basic_delete_file_loader
             .parquet_to_batch_stream(
@@ -994,7 +1006,8 @@ mod tests {
         writer.write(&record_batch).unwrap();
         writer.close().unwrap();
 
-        let basic_delete_file_loader = BasicDeleteFileLoader::new(file_io.clone());
+        let basic_delete_file_loader =
+            BasicDeleteFileLoader::new(file_io.clone(), ScanMetrics::new());
         let record_batch_stream = basic_delete_file_loader
             .parquet_to_batch_stream(&path, std::fs::metadata(&path).unwrap().len())
             .await
diff --git a/crates/iceberg/src/arrow/delete_file_loader.rs b/crates/iceberg/src/arrow/delete_file_loader.rs
index 0be62ad496..134b029613 100644
--- a/crates/iceberg/src/arrow/delete_file_loader.rs
+++ b/crates/iceberg/src/arrow/delete_file_loader.rs
@@ -23,6 +23,7 @@ use parquet::arrow::ParquetRecordBatchStreamBuilder;
 use crate::arrow::ArrowReader;
 use crate::arrow::reader::ParquetReadOptions;
 use crate::arrow::record_batch_transformer::RecordBatchTransformerBuilder;
+use crate::arrow::scan_metrics::ScanMetrics;
 use crate::io::FileIO;
 use crate::scan::{ArrowRecordBatchStream, FileScanTaskDeleteFile};
 use crate::spec::{Schema, SchemaRef};
@@ -45,13 +46,22 @@ pub trait DeleteFileLoader {
 #[derive(Clone, Debug)]
 pub(crate) struct BasicDeleteFileLoader {
     file_io: FileIO,
+    scan_metrics: ScanMetrics,
 }
 
 #[allow(unused_variables)]
 impl BasicDeleteFileLoader {
-    pub fn new(file_io: FileIO) -> Self {
-        BasicDeleteFileLoader { file_io }
+    pub fn new(file_io: FileIO, scan_metrics: ScanMetrics) -> Self {
+        BasicDeleteFileLoader {
+            file_io,
+            scan_metrics,
+        }
     }
+
+    pub(crate) fn file_io(&self) -> &FileIO {
+        &self.file_io
+    }
+
     /// Loads a RecordBatchStream for a given datafile.
     pub(crate) async fn parquet_to_batch_stream(
         &self,
@@ -69,6 +79,7 @@ impl BasicDeleteFileLoader {
             &self.file_io,
             file_size_in_bytes,
             parquet_read_options,
+            self.scan_metrics.bytes_read_counter(),
         )
         .await?;
 
@@ -137,7 +148,8 @@ mod tests {
         let table_location = tmp_dir.path();
         let file_io = FileIO::new_with_fs();
 
-        let delete_file_loader = BasicDeleteFileLoader::new(file_io.clone());
+        let scan_metrics = ScanMetrics::new();
+        let delete_file_loader = BasicDeleteFileLoader::new(file_io.clone(), scan_metrics);
 
         let file_scan_tasks = setup(table_location);
 
diff --git a/crates/iceberg/src/arrow/incremental.rs b/crates/iceberg/src/arrow/incremental.rs
index 8149c6d677..8d6269071f 100644
--- a/crates/iceberg/src/arrow/incremental.rs
+++ b/crates/iceberg/src/arrow/incremental.rs
@@ -25,6 +25,7 @@ use futures::stream::select;
 use futures::{Stream, StreamExt, TryStreamExt};
 
 use crate::arrow::reader::{ParquetReadOptions, process_record_batch_stream};
+use crate::arrow::scan_metrics::ScanMetrics;
 use crate::arrow::{ArrowReader, StreamsInto};
 use crate::delete_vector::DeleteVector;
 use crate::expr::Bind;
@@ -49,18 +50,34 @@ pub enum IncrementalBatchType {
     Delete,
 }
 
-/// The stream of incremental Arrow `RecordBatch`es with batch type.
-pub type CombinedIncrementalBatchRecordStream =
+/// Inner stream type for [`CombinedIncrementalScanResult`].
+pub type CombinedIncrementalBatchStream =
     Pin<Box<dyn Stream<Item = Result<(IncrementalBatchType, RecordBatch)>> + Send + 'static>>;
 
-/// Stream type for obtaining a separate stream of appended and deleted record batches.
-pub type UnzippedIncrementalBatchRecordStream = (ArrowRecordBatchStream, ArrowRecordBatchStream);
+/// The stream of incremental Arrow `RecordBatch`es with batch type, together with scan metrics.
+pub struct CombinedIncrementalScanResult {
+    /// Combined stream of appended and deleted record batches, each tagged with its type.
+    pub stream: CombinedIncrementalBatchStream,
+    /// Metrics collected during the incremental scan (e.g. bytes read from storage).
+    pub metrics: ScanMetrics,
+}
+
+/// Separate streams for appended and deleted record batches, together with scan metrics.
+pub struct UnzippedIncrementalScanResult {
+    /// Stream of appended record batches.
+    pub appends: ArrowRecordBatchStream,
+    /// Stream of deleted record batches.
+    pub deletes: ArrowRecordBatchStream,
+    /// Metrics collected during the incremental scan (e.g. bytes read from storage).
+    pub metrics: ScanMetrics,
+}
 
 async fn process_incremental_append_task(
     task: AppendedFileScanTask,
     batch_size: Option<usize>,
     file_io: FileIO,
     parquet_read_options: ParquetReadOptions,
+    scan_metrics: ScanMetrics,
 ) -> Result<ArrowRecordBatchStream> {
     let AppendedFileScanTask {
         base,
@@ -80,6 +97,8 @@ async fn process_incremental_append_task(
         ArrowReader::build_virtual_columns(&base.project_field_ids),
         batch_size,
         None, // name_mapping not yet supported in incremental scan
+        Some(Arc::clone(scan_metrics.bytes_read_counter())),
+        Some(&base.schema),
     )
     .await?;
 
@@ -184,6 +203,7 @@ async fn process_equality_delete_task(
     batch_size: Option<usize>,
     file_io: FileIO,
     parquet_read_options: ParquetReadOptions,
+    scan_metrics: ScanMetrics,
 ) -> Result<ArrowRecordBatchStream> {
     let file_path = task.data_file_path().to_string();
 
@@ -205,6 +225,8 @@ async fn process_equality_delete_task(
         vec![Arc::clone(row_pos_field())],
         batch_size,
         None, // name_mapping not yet supported in incremental scan
+        Some(Arc::clone(scan_metrics.bytes_read_counter())),
+        Some(&task.base.schema),
     )
     .await?;
 
@@ -281,28 +303,32 @@ async fn process_equality_delete_task(
     Ok(Box::pin(stream) as ArrowRecordBatchStream)
 }
 
-impl StreamsInto<ArrowReader, CombinedIncrementalBatchRecordStream>
-    for IncrementalFileScanTaskStreams
-{
+impl StreamsInto<ArrowReader, CombinedIncrementalScanResult> for IncrementalFileScanTaskStreams {
     /// Takes separate streams of appended and deleted file scan tasks and reads all the files.
-    /// Returns a combined stream of Arrow `RecordBatch`es containing the data from the files.
-    fn stream(self, reader: ArrowReader) -> Result<CombinedIncrementalBatchRecordStream> {
-        let (appends, deletes) =
-            StreamsInto::<ArrowReader, UnzippedIncrementalBatchRecordStream>::stream(self, reader)?;
+    /// Returns a [`CombinedIncrementalScanResult`] containing a combined stream of Arrow
+    /// `RecordBatch`es and scan metrics.
+    fn stream(self, reader: ArrowReader) -> Result<CombinedIncrementalScanResult> {
+        let UnzippedIncrementalScanResult {
+            appends,
+            deletes,
+            metrics,
+        } = StreamsInto::<ArrowReader, UnzippedIncrementalScanResult>::stream(self, reader)?;
 
         let left = appends.map(|res| res.map(|batch| (IncrementalBatchType::Append, batch)));
         let right = deletes.map(|res| res.map(|batch| (IncrementalBatchType::Delete, batch)));
 
-        Ok(Box::pin(select(left, right)) as CombinedIncrementalBatchRecordStream)
+        Ok(CombinedIncrementalScanResult {
+            stream: Box::pin(select(left, right)),
+            metrics,
+        })
     }
 }
 
-impl StreamsInto<ArrowReader, UnzippedIncrementalBatchRecordStream>
-    for IncrementalFileScanTaskStreams
-{
+impl StreamsInto<ArrowReader, UnzippedIncrementalScanResult> for IncrementalFileScanTaskStreams {
     /// Takes separate streams of appended and deleted file scan tasks and reads all the files.
-    /// Returns two separate streams of Arrow `RecordBatch`es containing appended data and deleted records.
-    fn stream(self, reader: ArrowReader) -> Result<UnzippedIncrementalBatchRecordStream> {
+    /// Returns an [`UnzippedIncrementalScanResult`] containing separate streams of appended and
+    /// deleted record batches together with scan metrics.
+    fn stream(self, reader: ArrowReader) -> Result<UnzippedIncrementalScanResult> {
         let (appends_tx, appends_rx) =
             channel::<Result<RecordBatch>>(reader.concurrency_limit_data_files);
         let (deletes_tx, deletes_rx) =
@@ -310,16 +336,19 @@ impl StreamsInto<ArrowReader, UnzippedIncrementalBatchRecordStream>
 
         let batch_size = reader.batch_size;
         let parquet_read_options = reader.parquet_read_options;
+        let scan_metrics = ScanMetrics::new();
 
         let (append_stream, delete_stream) = self;
 
         // Process append tasks
         let file_io_append = reader.file_io.clone();
+        let scan_metrics_append = scan_metrics.clone();
         spawn(async move {
             let _ = append_stream
                 .try_for_each_concurrent(reader.concurrency_limit_data_files, |append_task| {
                     let file_io = file_io_append.clone();
                     let appends_tx = appends_tx.clone();
+                    let scan_metrics = scan_metrics_append.clone();
                     async move {
                         // Inner spawn: each file's IO runs on its own tokio task for true
                         // parallelism. Awaiting it keeps the concurrency slot occupied until
@@ -335,6 +364,7 @@ impl StreamsInto<ArrowReader, UnzippedIncrementalBatchRecordStream>
                                 batch_size,
                                 file_io,
                                 append_read_options,
+                                scan_metrics,
                             )
                             .await;
 
@@ -355,11 +385,13 @@ impl StreamsInto<ArrowReader, UnzippedIncrementalBatchRecordStream>
 
         // Process delete tasks
         let file_io_delete = reader.file_io.clone();
+        let scan_metrics_delete = scan_metrics.clone();
         spawn(async move {
             let _ = delete_stream
                 .try_for_each_concurrent(reader.concurrency_limit_data_files, |delete_task| {
                     let deletes_tx = deletes_tx.clone();
                     let file_io = file_io_delete.clone();
+                    let scan_metrics = scan_metrics_delete.clone();
                     async move {
                         // Inner spawn: same pattern as full-scan reader — spawn for parallelism,
                         // await to keep the concurrency slot occupied until the task completes.
@@ -406,6 +438,7 @@ impl StreamsInto<ArrowReader, UnzippedIncrementalBatchRecordStream>
                                         batch_size,
                                         file_io.clone(),
                                         eq_read_options,
+                                        scan_metrics,
                                     )
                                     .await;
 
@@ -426,9 +459,10 @@ impl StreamsInto<ArrowReader, UnzippedIncrementalBatchRecordStream>
                 .await;
         });
 
-        Ok((
-            Box::pin(appends_rx) as ArrowRecordBatchStream,
-            Box::pin(deletes_rx) as ArrowRecordBatchStream,
-        ))
+        Ok(UnzippedIncrementalScanResult {
+            appends: Box::pin(appends_rx) as ArrowRecordBatchStream,
+            deletes: Box::pin(deletes_rx) as ArrowRecordBatchStream,
+            metrics: scan_metrics,
+        })
     }
 }
diff --git a/crates/iceberg/src/arrow/int96.rs b/crates/iceberg/src/arrow/int96.rs
new file mode 100644
index 0000000000..63a7a30f1a
--- /dev/null
+++ b/crates/iceberg/src/arrow/int96.rs
@@ -0,0 +1,578 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! INT96 timestamp coercion for Parquet files.
+
+use std::sync::Arc;
+
+use arrow_schema::{
+    DataType, Field, Fields, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, TimeUnit,
+};
+use parquet::arrow::PARQUET_FIELD_ID_META_KEY;
+
+use crate::arrow::schema::{ArrowSchemaVisitor, DEFAULT_MAP_FIELD_NAME, visit_schema};
+use crate::error::Result;
+use crate::spec::{PrimitiveType, Schema, Type};
+use crate::{Error, ErrorKind};
+
+/// Coerce Arrow schema types for INT96 columns to match the Iceberg table schema.
+///
+/// arrow-rs defaults INT96 to `Timestamp(Nanosecond)`, which overflows i64 for dates outside
+/// ~1677-2262. We use arrow-rs's schema hint mechanism to read INT96 at the resolution
+/// specified by the Iceberg schema (`timestamp` → microsecond, `timestamp_ns` → nanosecond).
+///
+/// Iceberg Java handles this differently: it bypasses parquet-mr with a custom column reader
+/// (`GenericParquetReaders.TimestampInt96Reader`). We achieve the same result via schema hints.
+///
+/// References:
+/// - Iceberg spec primitive types: <https://iceberg.apache.org/spec/#primitive-types>
+/// - arrow-rs schema hint support: <https://github.com/apache/arrow-rs/pull/7285>
+pub(crate) fn coerce_int96_timestamps(
+    arrow_schema: &ArrowSchemaRef,
+    iceberg_schema: &Schema,
+) -> Option<Arc<ArrowSchema>> {
+    let mut visitor = Int96CoercionVisitor::new(iceberg_schema);
+    let coerced = visit_schema(arrow_schema, &mut visitor).ok()?;
+    if visitor.changed {
+        Some(Arc::new(coerced))
+    } else {
+        None
+    }
+}
+
+/// Visitor that coerces `Timestamp(Nanosecond)` Arrow fields to the resolution
+/// indicated by the Iceberg schema.
+struct Int96CoercionVisitor<'a> {
+    iceberg_schema: &'a Schema,
+    // TODO(#2310): use FieldRef (Arc<Field>) once ArrowSchemaVisitor passes FieldRef.
+    field_stack: Vec<Field>,
+    changed: bool,
+}
+
+impl<'a> Int96CoercionVisitor<'a> {
+    fn new(iceberg_schema: &'a Schema) -> Self {
+        Self {
+            iceberg_schema,
+            field_stack: Vec::new(),
+            changed: false,
+        }
+    }
+
+    /// Determine the target TimeUnit for a Timestamp(Nanosecond) field based on the
+    /// Iceberg schema. Falls back to microsecond when field IDs are unavailable,
+    /// matching Iceberg Java behavior.
+    fn target_unit(&self, field: &Field) -> Option<TimeUnit> {
+        if !matches!(
+            field.data_type(),
+            DataType::Timestamp(TimeUnit::Nanosecond, _)
+        ) {
+            return None;
+        }
+
+        let target = field
+            .metadata()
+            .get(PARQUET_FIELD_ID_META_KEY)
+            .and_then(|id_str| id_str.parse::<i32>().ok())
+            .and_then(|field_id| self.iceberg_schema.field_by_id(field_id))
+            .and_then(|f| match &*f.field_type {
+                Type::Primitive(PrimitiveType::Timestamp | PrimitiveType::Timestamptz) => {
+                    Some(TimeUnit::Microsecond)
+                }
+                Type::Primitive(PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs) => {
+                    Some(TimeUnit::Nanosecond)
+                }
+                _ => None,
+            })
+            // Iceberg Java reads INT96 as microseconds by default
+            .unwrap_or(TimeUnit::Microsecond);
+
+        if target == TimeUnit::Nanosecond {
+            None
+        } else {
+            Some(target)
+        }
+    }
+}
+
+impl ArrowSchemaVisitor for Int96CoercionVisitor<'_> {
+    type T = Field;
+    type U = ArrowSchema;
+
+    fn before_field(&mut self, field: &Field) -> Result<()> {
+        self.field_stack.push(field.as_ref().clone());
+        Ok(())
+    }
+
+    fn after_field(&mut self, _field: &Field) -> Result<()> {
+        self.field_stack.pop();
+        Ok(())
+    }
+
+    fn before_list_element(&mut self, field: &Field) -> Result<()> {
+        self.field_stack.push(field.as_ref().clone());
+        Ok(())
+    }
+
+    fn after_list_element(&mut self, _field: &Field) -> Result<()> {
+        self.field_stack.pop();
+        Ok(())
+    }
+
+    fn before_map_key(&mut self, field: &Field) -> Result<()> {
+        self.field_stack.push(field.as_ref().clone());
+        Ok(())
+    }
+
+    fn after_map_key(&mut self, _field: &Field) -> Result<()> {
+        self.field_stack.pop();
+        Ok(())
+    }
+
+    fn before_map_value(&mut self, field: &Field) -> Result<()> {
+        self.field_stack.push(field.as_ref().clone());
+        Ok(())
+    }
+
+    fn after_map_value(&mut self, _field: &Field) -> Result<()> {
+        self.field_stack.pop();
+        Ok(())
+    }
+
+    fn schema(&mut self, schema: &ArrowSchema, values: Vec<Field>) -> Result<ArrowSchema> {
+        Ok(ArrowSchema::new_with_metadata(
+            values,
+            schema.metadata().clone(),
+        ))
+    }
+
+    fn r#struct(&mut self, _fields: &Fields, results: Vec<Field>) -> Result<Field> {
+        let field_info = self
+            .field_stack
+            .last()
+            .ok_or_else(|| Error::new(ErrorKind::Unexpected, "Field stack underflow in struct"))?;
+        Ok(Field::new(
+            field_info.name(),
+            DataType::Struct(Fields::from(results)),
+            field_info.is_nullable(),
+        )
+        .with_metadata(field_info.metadata().clone()))
+    }
+
+    fn list(&mut self, list: &DataType, value: Field) -> Result<Field> {
+        let field_info = self
+            .field_stack
+            .last()
+            .ok_or_else(|| Error::new(ErrorKind::Unexpected, "Field stack underflow in list"))?;
+        let list_type = match list {
+            DataType::List(_) => DataType::List(Arc::new(value)),
+            DataType::LargeList(_) => DataType::LargeList(Arc::new(value)),
+            DataType::FixedSizeList(_, size) => DataType::FixedSizeList(Arc::new(value), *size),
+            _ => {
+                return Err(Error::new(
+                    ErrorKind::Unexpected,
+                    format!("Expected list type, got {list}"),
+                ));
+            }
+        };
+        Ok(
+            Field::new(field_info.name(), list_type, field_info.is_nullable())
+                .with_metadata(field_info.metadata().clone()),
+        )
+    }
+
+    fn map(&mut self, map: &DataType, key_value: Field, value: Field) -> Result<Field> {
+        let field_info = self
+            .field_stack
+            .last()
+            .ok_or_else(|| Error::new(ErrorKind::Unexpected, "Field stack underflow in map"))?;
+        let sorted = match map {
+            DataType::Map(_, sorted) => *sorted,
+            _ => {
+                return Err(Error::new(
+                    ErrorKind::Unexpected,
+                    format!("Expected map type, got {map}"),
+                ));
+            }
+        };
+        let struct_field = Field::new(
+            DEFAULT_MAP_FIELD_NAME,
+            DataType::Struct(Fields::from(vec![key_value, value])),
+            false,
+        );
+        Ok(Field::new(
+            field_info.name(),
+            DataType::Map(Arc::new(struct_field), sorted),
+            field_info.is_nullable(),
+        )
+        .with_metadata(field_info.metadata().clone()))
+    }
+
+    fn primitive(&mut self, p: &DataType) -> Result<Field> {
+        let field_info = self.field_stack.last().ok_or_else(|| {
+            Error::new(ErrorKind::Unexpected, "Field stack underflow in primitive")
+        })?;
+
+        if let Some(target_unit) = self.target_unit(field_info) {
+            let tz = match field_info.data_type() {
+                DataType::Timestamp(_, tz) => tz.clone(),
+                _ => None,
+            };
+            self.changed = true;
+            Ok(Field::new(
+                field_info.name(),
+                DataType::Timestamp(target_unit, tz),
+                field_info.is_nullable(),
+            )
+            .with_metadata(field_info.metadata().clone()))
+        } else {
+            Ok(
+                Field::new(field_info.name(), p.clone(), field_info.is_nullable())
+                    .with_metadata(field_info.metadata().clone()),
+            )
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::sync::Arc;
+
+    use arrow_schema::{DataType, Field, Schema as ArrowSchema, TimeUnit};
+    use parquet::arrow::PARQUET_FIELD_ID_META_KEY;
+
+    use super::coerce_int96_timestamps;
+    use crate::spec::{ListType, MapType, NestedField, PrimitiveType, Schema, StructType, Type};
+
+    fn iceberg_schema_with_timestamp() -> Schema {
+        Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp)).into(),
+                NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
+            ])
+            .build()
+            .unwrap()
+    }
+
+    fn field_id_meta(id: i32) -> HashMap<String, String> {
+        HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), id.to_string())])
+    }
+
+    #[test]
+    fn test_coerce_timestamp_ns_to_us() {
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), true)
+                .with_metadata(field_id_meta(1)),
+            Field::new("id", DataType::Int32, false).with_metadata(field_id_meta(2)),
+        ]));
+        let iceberg = iceberg_schema_with_timestamp();
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        assert_eq!(
+            coerced.field(0).data_type(),
+            &DataType::Timestamp(TimeUnit::Microsecond, None)
+        );
+        // Non-timestamp field unchanged
+        assert_eq!(coerced.field(1).data_type(), &DataType::Int32);
+    }
+
+    #[test]
+    fn test_coerce_timestamptz_ns_to_us() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamptz)).into(),
+            ])
+            .build()
+            .unwrap();
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new(
+                "ts",
+                DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())),
+                true,
+            )
+            .with_metadata(field_id_meta(1)),
+        ]));
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        assert_eq!(
+            coerced.field(0).data_type(),
+            &DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into()))
+        );
+    }
+
+    #[test]
+    fn test_no_coercion_when_iceberg_is_timestamp_ns() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::TimestampNs)).into(),
+            ])
+            .build()
+            .unwrap();
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), true)
+                .with_metadata(field_id_meta(1)),
+        ]));
+
+        assert!(coerce_int96_timestamps(&arrow_schema, &iceberg).is_none());
+    }
+
+    #[test]
+    fn test_no_coercion_when_iceberg_is_timestamptz_ns() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::TimestamptzNs))
+                    .into(),
+            ])
+            .build()
+            .unwrap();
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new(
+                "ts",
+                DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())),
+                true,
+            )
+            .with_metadata(field_id_meta(1)),
+        ]));
+
+        assert!(coerce_int96_timestamps(&arrow_schema, &iceberg).is_none());
+    }
+
+    #[test]
+    fn test_no_coercion_when_already_microsecond() {
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("ts", DataType::Timestamp(TimeUnit::Microsecond, None), true)
+                .with_metadata(field_id_meta(1)),
+            Field::new("id", DataType::Int32, false).with_metadata(field_id_meta(2)),
+        ]));
+        let iceberg = iceberg_schema_with_timestamp();
+
+        assert!(coerce_int96_timestamps(&arrow_schema, &iceberg).is_none());
+    }
+
+    // Without field IDs, the visitor can't look up the Iceberg type and falls back
+    // to microsecond to match Iceberg Java behavior.
+    #[test]
+    fn test_defaults_to_us_without_field_ids() {
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
+            "ts",
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            true,
+        )]));
+        let iceberg = iceberg_schema_with_timestamp();
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        assert_eq!(
+            coerced.field(0).data_type(),
+            &DataType::Timestamp(TimeUnit::Microsecond, None)
+        );
+    }
+
+    // Field ID exists but points to a non-timestamp Iceberg type. The field_by_id
+    // lookup succeeds but the match arm returns None, so unwrap_or falls back to
+    // microsecond.
+    #[test]
+    fn test_defaults_to_us_when_iceberg_type_is_not_timestamp() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::String)).into(),
+            ])
+            .build()
+            .unwrap();
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), true)
+                .with_metadata(field_id_meta(1)),
+        ]));
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        assert_eq!(
+            coerced.field(0).data_type(),
+            &DataType::Timestamp(TimeUnit::Microsecond, None)
+        );
+    }
+
+    #[test]
+    fn test_coerce_preserves_field_metadata() {
+        let mut meta = field_id_meta(1);
+        meta.insert("custom_key".to_string(), "custom_value".to_string());
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), true)
+                .with_metadata(meta.clone()),
+        ]));
+        let iceberg = iceberg_schema_with_timestamp();
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        assert_eq!(coerced.field(0).metadata(), &meta);
+    }
+
+    #[test]
+    fn test_coerce_timestamp_in_struct() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::required(
+                    1,
+                    "data",
+                    Type::Struct(StructType::new(vec![
+                        NestedField::optional(2, "ts", Type::Primitive(PrimitiveType::Timestamp))
+                            .into(),
+                    ])),
+                )
+                .into(),
+            ])
+            .build()
+            .unwrap();
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new(
+                "data",
+                DataType::Struct(
+                    vec![
+                        Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), true)
+                            .with_metadata(field_id_meta(2)),
+                    ]
+                    .into(),
+                ),
+                false,
+            )
+            .with_metadata(field_id_meta(1)),
+        ]));
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        let inner = match coerced.field(0).data_type() {
+            DataType::Struct(fields) => fields,
+            other => panic!("Expected Struct, got {other}"),
+        };
+        assert_eq!(
+            inner[0].data_type(),
+            &DataType::Timestamp(TimeUnit::Microsecond, None)
+        );
+    }
+
+    #[test]
+    fn test_coerce_timestamp_in_list() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(
+                    1,
+                    "timestamps",
+                    Type::List(ListType {
+                        element_field: NestedField::optional(
+                            2,
+                            "element",
+                            Type::Primitive(PrimitiveType::Timestamp),
+                        )
+                        .into(),
+                    }),
+                )
+                .into(),
+            ])
+            .build()
+            .unwrap();
+
+        let element_field = Field::new(
+            "element",
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            true,
+        )
+        .with_metadata(field_id_meta(2));
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("timestamps", DataType::List(Arc::new(element_field)), true)
+                .with_metadata(field_id_meta(1)),
+        ]));
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        let element_dt = match coerced.field(0).data_type() {
+            DataType::List(f) => f.data_type(),
+            other => panic!("Expected List, got {other}"),
+        };
+        assert_eq!(
+            element_dt,
+            &DataType::Timestamp(TimeUnit::Microsecond, None)
+        );
+    }
+
+    #[test]
+    fn test_coerce_timestamp_in_map_value() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(
+                    1,
+                    "ts_map",
+                    Type::Map(MapType {
+                        key_field: NestedField::required(
+                            2,
+                            "key",
+                            Type::Primitive(PrimitiveType::String),
+                        )
+                        .into(),
+                        value_field: NestedField::optional(
+                            3,
+                            "value",
+                            Type::Primitive(PrimitiveType::Timestamp),
+                        )
+                        .into(),
+                    }),
+                )
+                .into(),
+            ])
+            .build()
+            .unwrap();
+
+        let key_field = Field::new("key", DataType::Utf8, false).with_metadata(field_id_meta(2));
+        let value_field = Field::new(
+            "value",
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            true,
+        )
+        .with_metadata(field_id_meta(3));
+        let entries_field = Field::new(
+            "key_value",
+            DataType::Struct(vec![key_field, value_field].into()),
+            false,
+        );
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new(
+                "ts_map",
+                DataType::Map(Arc::new(entries_field), false),
+                true,
+            )
+            .with_metadata(field_id_meta(1)),
+        ]));
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        let value_dt = match coerced.field(0).data_type() {
+            DataType::Map(entries, _) => match entries.data_type() {
+                DataType::Struct(fields) => fields[1].data_type().clone(),
+                other => panic!("Expected Struct inside Map, got {other}"),
+            },
+            other => panic!("Expected Map, got {other}"),
+        };
+        assert_eq!(value_dt, DataType::Timestamp(TimeUnit::Microsecond, None));
+    }
+}
diff --git a/crates/iceberg/src/arrow/mod.rs b/crates/iceberg/src/arrow/mod.rs
index 15b386109d..089d01cad0 100644
--- a/crates/iceberg/src/arrow/mod.rs
+++ b/crates/iceberg/src/arrow/mod.rs
@@ -27,15 +27,18 @@ pub(crate) mod caching_delete_file_loader;
 pub mod delete_file_loader;
 pub(crate) mod delete_filter;
 
+mod int96;
 mod reader;
 /// RecordBatch projection utilities
 pub mod record_batch_projector;
 pub(crate) mod record_batch_transformer;
+mod scan_metrics;
 mod value;
 
 mod incremental;
 pub use incremental::*;
 pub use reader::*;
+pub use scan_metrics::{ScanMetrics, ScanResult};
 pub use value::*;
 
 // Re-export delete file constants for convenience
diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs
deleted file mode 100644
index 3696e88584..0000000000
--- a/crates/iceberg/src/arrow/reader.rs
+++ /dev/null
@@ -1,5037 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Parquet file data reader
-
-use std::collections::{HashMap, HashSet};
-use std::ops::Range;
-use std::str::FromStr;
-use std::sync::{Arc, Mutex};
-
-use arrow_arith::boolean::{and, and_kleene, is_not_null, is_null, not, or, or_kleene};
-use arrow_array::{Array, ArrayRef, BooleanArray, Datum as ArrowDatum, RecordBatch, Scalar};
-use arrow_cast::cast::cast;
-use arrow_ord::cmp::{eq, gt, gt_eq, lt, lt_eq, neq};
-use arrow_schema::{
-    ArrowError, DataType, FieldRef, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef,
-};
-use arrow_string::like::starts_with;
-use bytes::Bytes;
-use fnv::FnvHashSet;
-use futures::channel::mpsc::channel;
-use futures::future::BoxFuture;
-use futures::{FutureExt, SinkExt, Stream, StreamExt, TryFutureExt, TryStreamExt};
-use parquet::arrow::arrow_reader::{
-    ArrowPredicateFn, ArrowReaderMetadata, ArrowReaderOptions, RowFilter, RowSelection, RowSelector,
-};
-use parquet::arrow::async_reader::AsyncFileReader;
-use parquet::arrow::{PARQUET_FIELD_ID_META_KEY, ParquetRecordBatchStreamBuilder, ProjectionMask};
-use parquet::file::metadata::{
-    PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData,
-};
-use parquet::schema::types::{SchemaDescriptor, Type as ParquetType};
-use typed_builder::TypedBuilder;
-
-use crate::arrow::caching_delete_file_loader::CachingDeleteFileLoader;
-use crate::arrow::record_batch_transformer::{
-    RecordBatchTransformer, RecordBatchTransformerBuilder,
-};
-use crate::arrow::{arrow_schema_to_schema, get_arrow_datum};
-use crate::delete_vector::DeleteVector;
-use crate::error::Result;
-use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit};
-use crate::expr::visitors::page_index_evaluator::PageIndexEvaluator;
-use crate::expr::visitors::row_group_metrics_evaluator::RowGroupMetricsEvaluator;
-use crate::expr::{BoundPredicate, BoundReference};
-use crate::io::{FileIO, FileMetadata, FileRead};
-use crate::metadata_columns::{
-    RESERVED_FIELD_ID_FILE, RESERVED_FIELD_ID_POS, is_metadata_field, row_pos_field,
-};
-use crate::runtime::spawn;
-use crate::scan::{ArrowRecordBatchStream, FileScanTask, FileScanTaskStream};
-use crate::spec::{
-    Datum, NameMapping, NestedField, PartitionSpec, PrimitiveType, Schema, SchemaRef, Struct, Type,
-};
-use crate::util::available_parallelism;
-use crate::{Error, ErrorKind};
-
-/// Default gap between byte ranges below which they are coalesced into a
-/// single request. Matches object_store's `OBJECT_STORE_COALESCE_DEFAULT`.
-const DEFAULT_RANGE_COALESCE_BYTES: u64 = 1024 * 1024;
-
-/// Default maximum number of coalesced byte ranges fetched concurrently.
-/// Matches object_store's `OBJECT_STORE_COALESCE_PARALLEL`.
-const DEFAULT_RANGE_FETCH_CONCURRENCY: usize = 10;
-
-/// Default number of bytes to prefetch when parsing Parquet footer metadata.
-/// Matches DataFusion's default `ParquetOptions::metadata_size_hint`.
-const DEFAULT_METADATA_SIZE_HINT: usize = 512 * 1024;
-
-/// Options for tuning Parquet file I/O.
-#[derive(Clone, Copy, Debug, TypedBuilder)]
-#[builder(field_defaults(setter(prefix = "with_")))]
-pub(crate) struct ParquetReadOptions {
-    /// Number of bytes to prefetch for parsing the Parquet metadata.
-    ///
-    /// This hint can help reduce the number of fetch requests. For more details see the
-    /// [ParquetMetaDataReader documentation](https://docs.rs/parquet/latest/parquet/file/metadata/struct.ParquetMetaDataReader.html#method.with_prefetch_hint).
-    ///
-    /// Defaults to 512 KiB, matching DataFusion's default `ParquetOptions::metadata_size_hint`.
-    #[builder(default = Some(DEFAULT_METADATA_SIZE_HINT))]
-    pub(crate) metadata_size_hint: Option<usize>,
-    /// Gap threshold for merging nearby byte ranges into a single request.
-    /// Ranges with gaps smaller than this value will be coalesced.
-    ///
-    /// Defaults to 1 MiB, matching object_store's `OBJECT_STORE_COALESCE_DEFAULT`.
-    #[builder(default = DEFAULT_RANGE_COALESCE_BYTES)]
-    pub(crate) range_coalesce_bytes: u64,
-    /// Maximum number of merged byte ranges to fetch concurrently.
-    ///
-    /// Defaults to 10, matching object_store's `OBJECT_STORE_COALESCE_PARALLEL`.
-    #[builder(default = DEFAULT_RANGE_FETCH_CONCURRENCY)]
-    pub(crate) range_fetch_concurrency: usize,
-    /// Whether to preload the column index when reading Parquet metadata.
-    #[builder(default = true)]
-    pub(crate) preload_column_index: bool,
-    /// Whether to preload the offset index when reading Parquet metadata.
-    #[builder(default = true)]
-    pub(crate) preload_offset_index: bool,
-    /// Whether to preload the page index when reading Parquet metadata.
-    #[builder(default = false)]
-    pub(crate) preload_page_index: bool,
-}
-
-impl ParquetReadOptions {
-    pub(crate) fn metadata_size_hint(&self) -> Option<usize> {
-        self.metadata_size_hint
-    }
-
-    pub(crate) fn range_coalesce_bytes(&self) -> u64 {
-        self.range_coalesce_bytes
-    }
-
-    pub(crate) fn range_fetch_concurrency(&self) -> usize {
-        self.range_fetch_concurrency
-    }
-
-    pub(crate) fn preload_column_index(&self) -> bool {
-        self.preload_column_index
-    }
-
-    pub(crate) fn preload_offset_index(&self) -> bool {
-        self.preload_offset_index
-    }
-
-    pub(crate) fn preload_page_index(&self) -> bool {
-        self.preload_page_index
-    }
-}
-
-/// Builder to create ArrowReader
-pub struct ArrowReaderBuilder {
-    batch_size: Option<usize>,
-    file_io: FileIO,
-    concurrency_limit_data_files: usize,
-    row_group_filtering_enabled: bool,
-    row_selection_enabled: bool,
-    parquet_read_options: ParquetReadOptions,
-}
-
-impl ArrowReaderBuilder {
-    /// Create a new ArrowReaderBuilder
-    pub fn new(file_io: FileIO) -> Self {
-        let num_cpus = available_parallelism().get();
-
-        ArrowReaderBuilder {
-            batch_size: None,
-            file_io,
-            concurrency_limit_data_files: num_cpus,
-            row_group_filtering_enabled: true,
-            row_selection_enabled: false,
-            parquet_read_options: ParquetReadOptions::builder().build(),
-        }
-    }
-
-    /// Sets the max number of in flight data files that are being fetched
-    pub fn with_data_file_concurrency_limit(mut self, val: usize) -> Self {
-        self.concurrency_limit_data_files = val;
-        self
-    }
-
-    /// Sets the desired size of batches in the response
-    /// to something other than the default
-    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
-        self.batch_size = Some(batch_size);
-        self
-    }
-
-    /// Determines whether to enable row group filtering.
-    pub fn with_row_group_filtering_enabled(mut self, row_group_filtering_enabled: bool) -> Self {
-        self.row_group_filtering_enabled = row_group_filtering_enabled;
-        self
-    }
-
-    /// Determines whether to enable row selection.
-    pub fn with_row_selection_enabled(mut self, row_selection_enabled: bool) -> Self {
-        self.row_selection_enabled = row_selection_enabled;
-        self
-    }
-
-    /// Provide a hint as to the number of bytes to prefetch for parsing the Parquet metadata
-    ///
-    /// This hint can help reduce the number of fetch requests. For more details see the
-    /// [ParquetMetaDataReader documentation](https://docs.rs/parquet/latest/parquet/file/metadata/struct.ParquetMetaDataReader.html#method.with_prefetch_hint).
-    pub fn with_metadata_size_hint(mut self, metadata_size_hint: usize) -> Self {
-        self.parquet_read_options.metadata_size_hint = Some(metadata_size_hint);
-        self
-    }
-
-    /// Sets the gap threshold for merging nearby byte ranges into a single request.
-    /// Ranges with gaps smaller than this value will be coalesced.
-    ///
-    /// Defaults to 1 MiB, matching object_store's OBJECT_STORE_COALESCE_DEFAULT.
-    pub fn with_range_coalesce_bytes(mut self, range_coalesce_bytes: u64) -> Self {
-        self.parquet_read_options.range_coalesce_bytes = range_coalesce_bytes;
-        self
-    }
-
-    /// Sets the maximum number of merged byte ranges to fetch concurrently.
-    ///
-    /// Defaults to 10, matching object_store's OBJECT_STORE_COALESCE_PARALLEL.
-    pub fn with_range_fetch_concurrency(mut self, range_fetch_concurrency: usize) -> Self {
-        self.parquet_read_options.range_fetch_concurrency = range_fetch_concurrency;
-        self
-    }
-
-    /// Build the ArrowReader.
-    pub fn build(self) -> ArrowReader {
-        ArrowReader {
-            batch_size: self.batch_size,
-            file_io: self.file_io.clone(),
-            delete_file_loader: CachingDeleteFileLoader::new(
-                self.file_io.clone(),
-                self.concurrency_limit_data_files,
-            ),
-            concurrency_limit_data_files: self.concurrency_limit_data_files,
-            row_group_filtering_enabled: self.row_group_filtering_enabled,
-            row_selection_enabled: self.row_selection_enabled,
-            parquet_read_options: self.parquet_read_options,
-        }
-    }
-}
-
-/// Reads data from Parquet files
-#[derive(Clone)]
-pub struct ArrowReader {
-    pub(crate) batch_size: Option<usize>,
-    pub(crate) file_io: FileIO,
-    delete_file_loader: CachingDeleteFileLoader,
-
-    /// the maximum number of data files that can be fetched at the same time
-    pub(crate) concurrency_limit_data_files: usize,
-
-    pub(crate) row_group_filtering_enabled: bool,
-    pub(crate) row_selection_enabled: bool,
-    pub(crate) parquet_read_options: ParquetReadOptions,
-}
-
-/// Trait indicating that the implementing type streams into a stream of type `S` using
-/// a reader of type `R`.
-pub trait StreamsInto<R, S = ArrowRecordBatchStream> {
-    /// Stream from the reader and produce a stream of type `S`.
-    fn stream(self, reader: R) -> Result<S>;
-}
-
-/// Helper function to process a stream of record batches and send through a channel.
-/// Handles the Result<Stream> pattern, so callers don't need to match on the stream result.
-/// This pattern is used in both reader.rs and incremental.rs.
-pub(crate) async fn process_record_batch_stream<E, S, T>(
-    record_batch_stream: Result<S>,
-    mut tx: T,
-    error_context: &str,
-) where
-    E: std::error::Error + Send + Sync + 'static,
-    S: Stream<Item = std::result::Result<RecordBatch, E>> + Send + Unpin + 'static,
-    T: SinkExt<Result<RecordBatch>> + Unpin + Send + 'static,
-{
-    match record_batch_stream {
-        Ok(mut stream) => {
-            while let Some(batch_result) = stream.next().await {
-                let batch = batch_result
-                    .map_err(|e| Error::new(ErrorKind::Unexpected, error_context).with_source(e));
-                let _ = tx.send(batch).await;
-            }
-        }
-        Err(e) => {
-            let _ = tx.send(Err(e)).await;
-        }
-    }
-}
-
-impl ArrowReader {
-    /// Take a stream of FileScanTasks and reads all the files.
-    /// Returns a stream of Arrow RecordBatches containing the data from the files.
-    ///
-    /// This implementation provides both file-level and batch-level parallelism:
-    /// - Multiple files are processed in parallel (IO-heavy operations)
-    /// - Multiple batches are processed in parallel across all files (CPU-heavy operations)
-    pub fn read(self, tasks: FileScanTaskStream) -> Result<ArrowRecordBatchStream> {
-        let file_io = self.file_io;
-        let batch_size = self.batch_size;
-        let concurrency_limit_data_files = self.concurrency_limit_data_files;
-        let row_group_filtering_enabled = self.row_group_filtering_enabled;
-        let row_selection_enabled = self.row_selection_enabled;
-        let parquet_read_options = self.parquet_read_options;
-
-        // Fast-path for single concurrency to avoid overhead of try_flatten_unordered
-        let stream: ArrowRecordBatchStream = if concurrency_limit_data_files == 1 {
-            Box::pin(
-                tasks
-                    .and_then(move |task| {
-                        let file_io = file_io.clone();
-
-                        Self::process_file_scan_task(
-                            task,
-                            batch_size,
-                            file_io,
-                            self.delete_file_loader.clone(),
-                            row_group_filtering_enabled,
-                            row_selection_enabled,
-                            parquet_read_options,
-                        )
-                    })
-                    .map_err(|err| {
-                        Error::new(ErrorKind::Unexpected, "file scan task generate failed")
-                            .with_source(err)
-                    })
-                    .try_flatten(),
-            )
-        } else {
-            // Multi-concurrency path: spawn each file's IO-heavy processing as an independent
-            // tokio task for true parallelism, streaming results through a channel.
-            let (tx, rx) = channel::<Result<RecordBatch>>(concurrency_limit_data_files);
-            let delete_file_loader = self.delete_file_loader;
-
-            // Outer spawn: runs the task coordination loop without blocking the caller.
-            spawn(async move {
-                let _ = tasks
-                    .try_for_each_concurrent(concurrency_limit_data_files, |task| {
-                        let file_io = file_io.clone();
-                        let delete_file_loader = delete_file_loader.clone();
-                        let tx = tx.clone();
-
-                        async move {
-                            // Inner spawn: each file's IO operations run on their own tokio task.
-                            spawn(async move {
-                                let record_batch_stream = Self::process_file_scan_task(
-                                    task,
-                                    batch_size,
-                                    file_io,
-                                    delete_file_loader,
-                                    row_group_filtering_enabled,
-                                    row_selection_enabled,
-                                    parquet_read_options,
-                                )
-                                .await;
-
-                                process_record_batch_stream(
-                                    record_batch_stream,
-                                    tx,
-                                    "failed to read record batch",
-                                )
-                                .await;
-                            })
-                            .await;
-
-                            Ok(())
-                        }
-                    })
-                    .await;
-            });
-
-            Box::pin(rx) as ArrowRecordBatchStream
-        };
-
-        Ok(stream)
-    }
-
-    async fn process_file_scan_task(
-        task: FileScanTask,
-        batch_size: Option<usize>,
-        file_io: FileIO,
-        delete_file_loader: CachingDeleteFileLoader,
-        row_group_filtering_enabled: bool,
-        row_selection_enabled: bool,
-        parquet_read_options: ParquetReadOptions,
-    ) -> Result<ArrowRecordBatchStream> {
-        let should_load_page_index =
-            (row_selection_enabled && task.predicate.is_some()) || !task.deletes.is_empty();
-        let mut parquet_read_options = parquet_read_options;
-        parquet_read_options.preload_page_index = should_load_page_index;
-
-        // Open the Parquet file and load delete files concurrently.
-        let delete_filter_rx =
-            delete_file_loader.load_deletes(&task.deletes, Arc::clone(&task.schema));
-
-        let (parquet_result, delete_filter) = futures::join!(
-            Self::open_parquet_stream_builder(
-                &task.data_file_path,
-                task.file_size_in_bytes,
-                file_io,
-                parquet_read_options,
-                Self::build_virtual_columns(task.project_field_ids()),
-                batch_size,
-                task.name_mapping.as_deref(),
-            ),
-            async { delete_filter_rx.await.unwrap() },
-        );
-        let (builder, has_missing_field_ids) = parquet_result?;
-        let delete_filter = delete_filter?;
-        let delete_predicate = delete_filter.build_equality_delete_predicate(&task).await?;
-
-        // In addition to the optional predicate supplied in the `FileScanTask`,
-        // we also have an optional predicate resulting from equality delete files.
-        // If both are present, we logical-AND them together to form a single filter
-        // predicate that we can pass to the `RecordBatchStreamBuilder`.
-        let final_predicate = match (&task.predicate, delete_predicate) {
-            (None, None) => None,
-            (Some(predicate), None) => Some(predicate.clone()),
-            (None, Some(ref predicate)) => Some(predicate.clone()),
-            (Some(filter_predicate), Some(delete_predicate)) => {
-                Some(filter_predicate.clone().and(delete_predicate))
-            }
-        };
-
-        let positional_delete_indexes = delete_filter.get_delete_vector(&task);
-
-        let builder = Self::apply_parquet_filters(
-            builder,
-            task.start,
-            task.length,
-            &task.schema,
-            final_predicate.as_ref(),
-            positional_delete_indexes.as_deref(),
-            row_group_filtering_enabled,
-            row_selection_enabled,
-            false, // use_predicate_projection: projection is handled by build_projected_record_batch_stream
-            has_missing_field_ids,
-        )?;
-
-        Self::build_projected_record_batch_stream(
-            builder,
-            task.project_field_ids(),
-            task.schema_ref(),
-            has_missing_field_ids,
-            &task.data_file_path,
-            task.partition_spec.clone(),
-            task.partition.clone(),
-        )
-    }
-
-    /// Opens a Parquet file and loads its metadata, returning both the reader and metadata.
-    /// The reader can be reused to build a `ParquetRecordBatchStreamBuilder` without
-    /// reopening the file.
-    pub(crate) async fn open_parquet_file(
-        data_file_path: &str,
-        file_io: &FileIO,
-        file_size_in_bytes: u64,
-        parquet_read_options: ParquetReadOptions,
-    ) -> Result<(ArrowFileReader, ArrowReaderMetadata)> {
-        let parquet_file = file_io.new_input(data_file_path)?;
-        let parquet_reader = parquet_file.reader().await?;
-        let mut reader = ArrowFileReader::new(
-            FileMetadata {
-                size: file_size_in_bytes,
-            },
-            parquet_reader,
-        )
-        .with_parquet_read_options(parquet_read_options);
-
-        let arrow_metadata = ArrowReaderMetadata::load_async(&mut reader, Default::default())
-            .await
-            .map_err(|e| {
-                Error::new(ErrorKind::Unexpected, "Failed to load Parquet metadata").with_source(e)
-            })?;
-
-        Ok((reader, arrow_metadata))
-    }
-
-    /// Opens a Parquet file, resolves its schema (name-mapping / field-ID fallback), and
-    /// applies the batch size. Returns `(builder, has_missing_field_ids)`.
-    ///
-    /// This is the async phase shared by every reading path. Callers that have background
-    /// work to overlap (e.g. delete-file loading) can run this concurrently with that work
-    /// using [`futures::join!`], then pass the result to [`Self::apply_parquet_filters`].
-    ///
-    /// Implements the three-branch schema resolution strategy matching Java's `ReadConf` constructor:
-    /// - Branch 1: file has embedded field IDs → trust them, use as-is
-    /// - Branch 2: name_mapping present → apply name mapping to assign correct Iceberg field IDs
-    /// - Branch 3: no name mapping → assign fallback position-based IDs
-    #[allow(clippy::too_many_arguments)]
-    pub(crate) async fn open_parquet_stream_builder(
-        data_file_path: &str,
-        file_size_in_bytes: u64,
-        file_io: FileIO,
-        parquet_read_options: ParquetReadOptions,
-        virtual_columns: Vec<Arc<arrow_schema::Field>>,
-        batch_size: Option<usize>,
-        name_mapping: Option<&NameMapping>,
-    ) -> Result<(ParquetRecordBatchStreamBuilder<ArrowFileReader>, bool)> {
-        let (file_reader, arrow_metadata) = Self::open_parquet_file(
-            data_file_path,
-            &file_io,
-            file_size_in_bytes,
-            parquet_read_options,
-        )
-        .await?;
-
-        // Check if Parquet file has embedded field IDs.
-        // Corresponds to Java's ParquetSchemaUtil.hasIds()
-        let has_missing_field_ids = arrow_metadata
-            .schema()
-            .fields()
-            .iter()
-            .next()
-            .is_some_and(|f| f.metadata().get(PARQUET_FIELD_ID_META_KEY).is_none());
-
-        // Three-branch schema resolution strategy matching Java's ReadConf constructor.
-        //
-        // When Parquet files lack field IDs (e.g., Hive/Spark migrations via add_files),
-        // we must assign field IDs BEFORE reading data to enable correct column projection.
-        let arrow_metadata = if has_missing_field_ids {
-            // Parquet file lacks field IDs - must assign them before reading.
-            let arrow_schema = if let Some(nm) = name_mapping {
-                // Branch 2: Apply name mapping to assign correct Iceberg field IDs.
-                // Corresponds to Java's ParquetSchemaUtil.applyNameMapping()
-                apply_name_mapping_to_arrow_schema(Arc::clone(arrow_metadata.schema()), nm)?
-            } else {
-                // Branch 3: No name mapping - use position-based fallback IDs.
-                // Corresponds to Java's ParquetSchemaUtil.addFallbackIds()
-                add_fallback_field_ids_to_arrow_schema(arrow_metadata.schema())
-            };
-            let mut options = ArrowReaderOptions::new().with_schema(arrow_schema);
-            if !virtual_columns.is_empty() {
-                options = options.with_virtual_columns(virtual_columns)?;
-            }
-            ArrowReaderMetadata::try_new(Arc::clone(arrow_metadata.metadata()), options).map_err(
-                |e| {
-                    Error::new(
-                        ErrorKind::Unexpected,
-                        "Failed to create ArrowReaderMetadata with field ID schema",
-                    )
-                    .with_source(e)
-                },
-            )?
-        } else {
-            // Branch 1: File has embedded field IDs - trust them.
-            if !virtual_columns.is_empty() {
-                let options = ArrowReaderOptions::new().with_virtual_columns(virtual_columns)?;
-                ArrowReaderMetadata::try_new(Arc::clone(arrow_metadata.metadata()), options)
-                    .map_err(|e| {
-                        Error::new(
-                            ErrorKind::Unexpected,
-                            "Failed to create ArrowReaderMetadata with virtual columns",
-                        )
-                        .with_source(e)
-                    })?
-            } else {
-                arrow_metadata
-            }
-        };
-
-        let mut builder =
-            ParquetRecordBatchStreamBuilder::new_with_metadata(file_reader, arrow_metadata);
-
-        if let Some(batch_size) = batch_size {
-            builder = builder.with_batch_size(batch_size);
-        }
-
-        Ok((builder, has_missing_field_ids))
-    }
-
-    /// Applies all row-level and row-group-level filters to a builder returned by
-    /// [`Self::open_parquet_stream_builder`].
-    ///
-    /// Handles byte-range row group pruning, predicate row filtering (with optional
-    /// projection), and positional-delete row selection.
-    #[allow(clippy::too_many_arguments)]
-    pub(crate) fn apply_parquet_filters(
-        mut builder: ParquetRecordBatchStreamBuilder<ArrowFileReader>,
-        start: u64,
-        length: u64,
-        schema: &Schema,
-        bound_predicate: Option<&BoundPredicate>,
-        positional_deletes: Option<&Mutex<DeleteVector>>,
-        row_group_filtering_enabled: bool,
-        row_selection_enabled: bool,
-        use_predicate_projection: bool,
-        has_missing_field_ids: bool,
-    ) -> Result<ParquetRecordBatchStreamBuilder<ArrowFileReader>> {
-        // There are three possible sources for potential lists of selected RowGroup indices,
-        // and two for `RowSelection`s.
-        // Selected RowGroup index lists can come from three sources:
-        //   * When task.start and task.length specify a byte range (file splitting);
-        //   * When there are equality delete files that are applicable;
-        //   * When there is a scan predicate and row_group_filtering_enabled = true.
-        // `RowSelection`s can be created in either or both of the following cases:
-        //   * When there are positional delete files that are applicable;
-        //   * When there is a scan predicate and row_selection_enabled = true
-        // Note that row group filtering from predicates only happens when
-        // there is a scan predicate AND row_group_filtering_enabled = true,
-        // but we perform row selection filtering if there are applicable
-        // equality delete files OR (there is a scan predicate AND row_selection_enabled),
-        // since the only implemented method of applying positional deletes is
-        // by using a `RowSelection`.
-        let mut selected_row_group_indices = None;
-        let mut row_selection = None;
-
-        if start != 0 || length != 0 {
-            selected_row_group_indices = Some(Self::filter_row_groups_by_byte_range(
-                builder.metadata(),
-                start,
-                length,
-            )?);
-        }
-
-        if let Some(predicate) = bound_predicate {
-            let (iceberg_field_ids, field_id_map) =
-                Self::build_field_id_set_and_map(builder.parquet_schema(), predicate)?;
-
-            if use_predicate_projection {
-                let predicate_field_ids: Vec<i32> = iceberg_field_ids.iter().copied().collect();
-                builder = Self::apply_projection(
-                    builder,
-                    &predicate_field_ids,
-                    schema,
-                    has_missing_field_ids,
-                )?;
-            }
-
-            let row_filter = Self::get_row_filter(
-                predicate,
-                builder.parquet_schema(),
-                &iceberg_field_ids,
-                &field_id_map,
-            )?;
-            builder = builder.with_row_filter(row_filter);
-
-            if row_group_filtering_enabled {
-                let predicate_filtered = Self::get_selected_row_group_indices(
-                    predicate,
-                    builder.metadata(),
-                    &field_id_map,
-                    schema,
-                )?;
-                selected_row_group_indices = Some(match selected_row_group_indices.take() {
-                    Some(existing) => existing
-                        .into_iter()
-                        .filter(|idx| predicate_filtered.contains(idx))
-                        .collect(),
-                    None => predicate_filtered,
-                });
-            }
-
-            if row_selection_enabled {
-                row_selection = Some(Self::get_row_selection_for_filter_predicate(
-                    predicate,
-                    builder.metadata(),
-                    &selected_row_group_indices,
-                    &field_id_map,
-                    schema,
-                )?);
-            }
-        }
-
-        if let Some(positional_delete_indexes) = positional_deletes {
-            let delete_row_selection = {
-                let guard = positional_delete_indexes.lock().unwrap();
-                Self::build_deletes_row_selection(
-                    builder.metadata().row_groups(),
-                    &selected_row_group_indices,
-                    &guard,
-                )
-            }?;
-            row_selection = Some(match row_selection.take() {
-                None => delete_row_selection,
-                Some(prev) => prev.intersection(&delete_row_selection),
-            });
-        }
-
-        if let Some(sel) = row_selection {
-            builder = builder.with_row_selection(sel);
-        }
-        if let Some(groups) = selected_row_group_indices {
-            builder = builder.with_row_groups(groups);
-        }
-
-        Ok(builder)
-    }
-
-    /// computes a `RowSelection` from positional delete indices.
-    ///
-    /// Using the Parquet page index, we build a `RowSelection` that rejects rows that are indicated
-    /// as having been deleted by a positional delete, taking into account any row groups that have
-    /// been skipped entirely by the filter predicate
-    fn build_deletes_row_selection(
-        row_group_metadata_list: &[RowGroupMetaData],
-        selected_row_groups: &Option<Vec<usize>>,
-        positional_deletes: &DeleteVector,
-    ) -> Result<RowSelection> {
-        let mut results: Vec<RowSelector> = Vec::new();
-        let mut selected_row_groups_idx = 0;
-        let mut current_row_group_base_idx: u64 = 0;
-        let mut delete_vector_iter = positional_deletes.iter();
-        let mut next_deleted_row_idx_opt = delete_vector_iter.next();
-
-        for (idx, row_group_metadata) in row_group_metadata_list.iter().enumerate() {
-            let row_group_num_rows = row_group_metadata.num_rows() as u64;
-            let next_row_group_base_idx = current_row_group_base_idx + row_group_num_rows;
-
-            // if row group selection is enabled,
-            if let Some(selected_row_groups) = selected_row_groups {
-                // if we've consumed all the selected row groups, we're done
-                if selected_row_groups_idx == selected_row_groups.len() {
-                    break;
-                }
-
-                if idx == selected_row_groups[selected_row_groups_idx] {
-                    // we're in a selected row group. Increment selected_row_groups_idx
-                    // so that next time around the for loop we're looking for the next
-                    // selected row group
-                    selected_row_groups_idx += 1;
-                } else {
-                    // Advance iterator past all deletes in the skipped row group.
-                    // advance_to() positions the iterator to the first delete >= next_row_group_base_idx.
-                    // However, if our cached next_deleted_row_idx_opt is in the skipped range,
-                    // we need to call next() to update the cache with the newly positioned value.
-                    delete_vector_iter.advance_to(next_row_group_base_idx);
-                    // Only update the cache if the cached value is stale (in the skipped range)
-                    if let Some(cached_idx) = next_deleted_row_idx_opt
-                        && cached_idx < next_row_group_base_idx
-                    {
-                        next_deleted_row_idx_opt = delete_vector_iter.next();
-                    }
-
-                    // still increment the current page base index but then skip to the next row group
-                    // in the file
-                    current_row_group_base_idx += row_group_num_rows;
-                    continue;
-                }
-            }
-
-            let mut next_deleted_row_idx = match next_deleted_row_idx_opt {
-                Some(next_deleted_row_idx) => {
-                    // if the index of the next deleted row is beyond this row group, add a selection for
-                    // the remainder of this row group and skip to the next row group
-                    if next_deleted_row_idx >= next_row_group_base_idx {
-                        results.push(RowSelector::select(row_group_num_rows as usize));
-                        current_row_group_base_idx += row_group_num_rows;
-                        continue;
-                    }
-
-                    next_deleted_row_idx
-                }
-
-                // If there are no more pos deletes, add a selector for the entirety of this row group.
-                _ => {
-                    results.push(RowSelector::select(row_group_num_rows as usize));
-                    current_row_group_base_idx += row_group_num_rows;
-                    continue;
-                }
-            };
-
-            let mut current_idx = current_row_group_base_idx;
-            'chunks: while next_deleted_row_idx < next_row_group_base_idx {
-                // `select` all rows that precede the next delete index
-                if current_idx < next_deleted_row_idx {
-                    let run_length = next_deleted_row_idx - current_idx;
-                    results.push(RowSelector::select(run_length as usize));
-                    current_idx += run_length;
-                }
-
-                // `skip` all consecutive deleted rows in the current row group
-                let mut run_length = 0;
-                while next_deleted_row_idx == current_idx
-                    && next_deleted_row_idx < next_row_group_base_idx
-                {
-                    run_length += 1;
-                    current_idx += 1;
-
-                    next_deleted_row_idx_opt = delete_vector_iter.next();
-                    next_deleted_row_idx = match next_deleted_row_idx_opt {
-                        Some(next_deleted_row_idx) => next_deleted_row_idx,
-                        _ => {
-                            // We've processed the final positional delete.
-                            // Conclude the skip and then break so that we select the remaining
-                            // rows in the row group and move on to the next row group
-                            results.push(RowSelector::skip(run_length));
-                            break 'chunks;
-                        }
-                    };
-                }
-                if run_length > 0 {
-                    results.push(RowSelector::skip(run_length));
-                }
-            }
-
-            if current_idx < next_row_group_base_idx {
-                results.push(RowSelector::select(
-                    (next_row_group_base_idx - current_idx) as usize,
-                ));
-            }
-
-            current_row_group_base_idx += row_group_num_rows;
-        }
-
-        Ok(results.into())
-    }
-
-    fn build_field_id_set_and_map(
-        parquet_schema: &SchemaDescriptor,
-        predicate: &BoundPredicate,
-    ) -> Result<(HashSet<i32>, HashMap<i32, usize>)> {
-        // Collects all Iceberg field IDs referenced in the filter predicate
-        let mut collector = CollectFieldIdVisitor {
-            field_ids: HashSet::default(),
-        };
-        visit(&mut collector, predicate)?;
-
-        let iceberg_field_ids = collector.field_ids();
-
-        // Without embedded field IDs, we fall back to position-based mapping for compatibility
-        let field_id_map = match build_field_id_map(parquet_schema)? {
-            Some(map) => map,
-            None => build_fallback_field_id_map(parquet_schema),
-        };
-
-        Ok((iceberg_field_ids, field_id_map))
-    }
-
-    /// Recursively extract leaf field IDs because Parquet projection works at the leaf column level.
-    /// Nested types (struct/list/map) are flattened in Parquet's columnar format.
-    fn include_leaf_field_id(field: &NestedField, field_ids: &mut Vec<i32>) {
-        match field.field_type.as_ref() {
-            Type::Primitive(_) => {
-                field_ids.push(field.id);
-            }
-            Type::Struct(struct_type) => {
-                for nested_field in struct_type.fields() {
-                    Self::include_leaf_field_id(nested_field, field_ids);
-                }
-            }
-            Type::List(list_type) => {
-                Self::include_leaf_field_id(&list_type.element_field, field_ids);
-            }
-            Type::Map(map_type) => {
-                Self::include_leaf_field_id(&map_type.key_field, field_ids);
-                Self::include_leaf_field_id(&map_type.value_field, field_ids);
-            }
-        }
-    }
-
-    fn get_arrow_projection_mask(
-        field_ids: &[i32],
-        iceberg_schema_of_task: &Schema,
-        parquet_schema: &SchemaDescriptor,
-        arrow_schema: &ArrowSchemaRef,
-        use_fallback: bool, // Whether file lacks embedded field IDs (e.g., migrated from Hive/Spark)
-    ) -> Result<ProjectionMask> {
-        fn type_promotion_is_valid(
-            file_type: Option<&PrimitiveType>,
-            projected_type: Option<&PrimitiveType>,
-        ) -> bool {
-            match (file_type, projected_type) {
-                (Some(lhs), Some(rhs)) if lhs == rhs => true,
-                (Some(PrimitiveType::Int), Some(PrimitiveType::Long)) => true,
-                (Some(PrimitiveType::Float), Some(PrimitiveType::Double)) => true,
-                (
-                    Some(PrimitiveType::Decimal {
-                        precision: file_precision,
-                        scale: file_scale,
-                    }),
-                    Some(PrimitiveType::Decimal {
-                        precision: requested_precision,
-                        scale: requested_scale,
-                    }),
-                ) if requested_precision >= file_precision && file_scale == requested_scale => true,
-                // Uuid will be store as Fixed(16) in parquet file, so the read back type will be Fixed(16).
-                (Some(PrimitiveType::Fixed(16)), Some(PrimitiveType::Uuid)) => true,
-                // Some Parquet writers (e.g. Snowflake) store FIXED_LEN_BYTE_ARRAY as
-                // Arrow Binary rather than FixedSizeBinary. Allow Binary -> Fixed(N)
-                // since the underlying bytes are the same.
-                (Some(PrimitiveType::Binary), Some(PrimitiveType::Fixed(_))) => true,
-                _ => false,
-            }
-        }
-
-        if field_ids.is_empty() {
-            return Ok(ProjectionMask::all());
-        }
-
-        if use_fallback {
-            // Position-based projection necessary because file lacks embedded field IDs
-            Self::get_arrow_projection_mask_fallback(field_ids, parquet_schema)
-        } else {
-            // Field-ID-based projection using embedded field IDs from Parquet metadata
-
-            // Parquet's columnar format requires leaf-level (not top-level struct/list/map) projection
-            let mut leaf_field_ids = vec![];
-            for field_id in field_ids {
-                let field = iceberg_schema_of_task.field_by_id(*field_id);
-                if let Some(field) = field {
-                    Self::include_leaf_field_id(field, &mut leaf_field_ids);
-                }
-            }
-
-            Self::get_arrow_projection_mask_with_field_ids(
-                &leaf_field_ids,
-                iceberg_schema_of_task,
-                parquet_schema,
-                arrow_schema,
-                type_promotion_is_valid,
-            )
-        }
-    }
-
-    /// Standard projection using embedded field IDs from Parquet metadata.
-    /// For iceberg-java compatibility with ParquetSchemaUtil.pruneColumns().
-    fn get_arrow_projection_mask_with_field_ids(
-        leaf_field_ids: &[i32],
-        iceberg_schema_of_task: &Schema,
-        parquet_schema: &SchemaDescriptor,
-        arrow_schema: &ArrowSchemaRef,
-        type_promotion_is_valid: fn(Option<&PrimitiveType>, Option<&PrimitiveType>) -> bool,
-    ) -> Result<ProjectionMask> {
-        let mut column_map = HashMap::new();
-        let fields = arrow_schema.fields();
-
-        // Pre-project only the fields that have been selected, possibly avoiding converting
-        // some Arrow types that are not yet supported.
-        let mut projected_fields: HashMap<FieldRef, i32> = HashMap::new();
-        let projected_arrow_schema = ArrowSchema::new_with_metadata(
-            fields.filter_leaves(|_, f| {
-                f.metadata()
-                    .get(PARQUET_FIELD_ID_META_KEY)
-                    .and_then(|field_id| i32::from_str(field_id).ok())
-                    .is_some_and(|field_id| {
-                        projected_fields.insert((*f).clone(), field_id);
-                        leaf_field_ids.contains(&field_id)
-                    })
-            }),
-            arrow_schema.metadata().clone(),
-        );
-        let iceberg_schema = arrow_schema_to_schema(&projected_arrow_schema)?;
-
-        fields.filter_leaves(|idx, field| {
-            let Some(field_id) = projected_fields.get(field).cloned() else {
-                return false;
-            };
-
-            let iceberg_field = iceberg_schema_of_task.field_by_id(field_id);
-            let parquet_iceberg_field = iceberg_schema.field_by_id(field_id);
-
-            if iceberg_field.is_none() || parquet_iceberg_field.is_none() {
-                return false;
-            }
-
-            if !type_promotion_is_valid(
-                parquet_iceberg_field
-                    .unwrap()
-                    .field_type
-                    .as_primitive_type(),
-                iceberg_field.unwrap().field_type.as_primitive_type(),
-            ) {
-                return false;
-            }
-
-            column_map.insert(field_id, idx);
-            true
-        });
-
-        // Schema evolution: New columns may not exist in old Parquet files.
-        // We only project existing columns; RecordBatchTransformer adds default/NULL values.
-        let mut indices = vec![];
-        for field_id in leaf_field_ids {
-            if let Some(col_idx) = column_map.get(field_id) {
-                indices.push(*col_idx);
-            }
-        }
-
-        if indices.is_empty() {
-            // Edge case: All requested columns are new (don't exist in file).
-            // Project all columns so RecordBatchTransformer has a batch to transform.
-            Ok(ProjectionMask::all())
-        } else {
-            Ok(ProjectionMask::leaves(parquet_schema, indices))
-        }
-    }
-
-    /// Fallback projection for Parquet files without field IDs.
-    /// Uses position-based matching: field ID N → column position N-1.
-    /// Projects entire top-level columns (including nested content) for iceberg-java compatibility.
-    fn get_arrow_projection_mask_fallback(
-        field_ids: &[i32],
-        parquet_schema: &SchemaDescriptor,
-    ) -> Result<ProjectionMask> {
-        // Position-based: field_id N → column N-1 (field IDs are 1-indexed)
-        let parquet_root_fields = parquet_schema.root_schema().get_fields();
-        let mut root_indices = vec![];
-
-        for field_id in field_ids.iter() {
-            let parquet_pos = (*field_id - 1) as usize;
-
-            if parquet_pos < parquet_root_fields.len() {
-                root_indices.push(parquet_pos);
-            }
-            // RecordBatchTransformer adds missing columns with NULL values
-        }
-
-        if root_indices.is_empty() {
-            Ok(ProjectionMask::all())
-        } else {
-            Ok(ProjectionMask::roots(parquet_schema, root_indices))
-        }
-    }
-
-    fn get_row_filter(
-        predicates: &BoundPredicate,
-        parquet_schema: &SchemaDescriptor,
-        iceberg_field_ids: &HashSet<i32>,
-        field_id_map: &HashMap<i32, usize>,
-    ) -> Result<RowFilter> {
-        // Collect Parquet column indices from field ids.
-        // If the field id is not found in Parquet schema, it will be ignored due to schema evolution.
-        let mut column_indices = iceberg_field_ids
-            .iter()
-            .filter_map(|field_id| field_id_map.get(field_id).cloned())
-            .collect::<Vec<_>>();
-        column_indices.sort();
-
-        // The converter that converts `BoundPredicates` to `ArrowPredicates`
-        let mut converter = PredicateConverter {
-            parquet_schema,
-            column_map: field_id_map,
-            column_indices: &column_indices,
-        };
-
-        // After collecting required leaf column indices used in the predicate,
-        // creates the projection mask for the Arrow predicates.
-        let projection_mask = ProjectionMask::leaves(parquet_schema, column_indices.clone());
-        let predicate_func = visit(&mut converter, predicates)?;
-        let arrow_predicate = ArrowPredicateFn::new(projection_mask, predicate_func);
-        Ok(RowFilter::new(vec![Box::new(arrow_predicate)]))
-    }
-
-    fn get_selected_row_group_indices(
-        predicate: &BoundPredicate,
-        parquet_metadata: &Arc<ParquetMetaData>,
-        field_id_map: &HashMap<i32, usize>,
-        snapshot_schema: &Schema,
-    ) -> Result<Vec<usize>> {
-        let row_groups_metadata = parquet_metadata.row_groups();
-        let mut results = Vec::with_capacity(row_groups_metadata.len());
-
-        for (idx, row_group_metadata) in row_groups_metadata.iter().enumerate() {
-            if RowGroupMetricsEvaluator::eval(
-                predicate,
-                row_group_metadata,
-                field_id_map,
-                snapshot_schema,
-            )? {
-                results.push(idx);
-            }
-        }
-
-        Ok(results)
-    }
-
-    /// Applies a projection mask derived from `field_ids` to a builder.
-    ///
-    /// Wraps `get_arrow_projection_mask` + `with_projection` into a single call.
-    fn apply_projection(
-        builder: ParquetRecordBatchStreamBuilder<ArrowFileReader>,
-        field_ids: &[i32],
-        schema: &Schema,
-        has_missing_field_ids: bool,
-    ) -> Result<ParquetRecordBatchStreamBuilder<ArrowFileReader>> {
-        // Metadata fields (e.g. _file, _pos) are virtual — they don't exist as Parquet columns.
-        // Filter them out so get_arrow_projection_mask only sees real schema field IDs.
-        let project_field_ids_without_metadata: Vec<i32> = field_ids
-            .iter()
-            .filter(|&&id| !is_metadata_field(id))
-            .copied()
-            .collect();
-        let mask = Self::get_arrow_projection_mask(
-            &project_field_ids_without_metadata,
-            schema,
-            builder.parquet_schema(),
-            builder.schema(),
-            has_missing_field_ids,
-        )?;
-        Ok(builder.with_projection(mask))
-    }
-
-    /// Builds a [`RecordBatchTransformer`] for a data file scan task.
-    ///
-    /// Handles the three optional transformations that are common to both the full
-    /// Returns the list of virtual columns to request from the Parquet reader for the
-    /// given projection. Currently, only `_pos` is a virtual column (produced by the
-    /// Parquet reader itself rather than read from file data).
-    pub(crate) fn build_virtual_columns(
-        project_field_ids: &[i32],
-    ) -> Vec<Arc<arrow_schema::Field>> {
-        let mut virtual_columns = Vec::new();
-        if project_field_ids.contains(&RESERVED_FIELD_ID_POS) {
-            virtual_columns.push(Arc::clone(row_pos_field()));
-        }
-        virtual_columns
-    }
-
-    /// scan (`process_file_scan_task`) and the incremental append scan
-    /// (`process_incremental_append_task`):
-    /// - `_file` constant column (only when `RESERVED_FIELD_ID_FILE` is projected)
-    /// - `_pos` virtual column (only when `RESERVED_FIELD_ID_POS` is projected)
-    /// - identity-transform partition columns (only when partition metadata is present)
-    fn build_record_batch_transformer(
-        schema: SchemaRef,
-        project_field_ids: &[i32],
-        data_file_path: &str,
-        partition_spec: Option<Arc<PartitionSpec>>,
-        partition: Option<Struct>,
-    ) -> Result<RecordBatchTransformer> {
-        let mut builder = RecordBatchTransformerBuilder::new(schema, project_field_ids);
-
-        if project_field_ids.contains(&RESERVED_FIELD_ID_FILE) {
-            builder = builder.with_constant(RESERVED_FIELD_ID_FILE, Datum::string(data_file_path));
-        }
-
-        if project_field_ids.contains(&RESERVED_FIELD_ID_POS) {
-            builder = builder.with_virtual_field(Arc::clone(row_pos_field()))?;
-        }
-
-        if let (Some(spec), Some(data)) = (partition_spec, partition) {
-            builder = builder.with_partition(spec, data)?;
-        }
-
-        Ok(builder.build())
-    }
-
-    fn get_row_selection_for_filter_predicate(
-        predicate: &BoundPredicate,
-        parquet_metadata: &Arc<ParquetMetaData>,
-        selected_row_groups: &Option<Vec<usize>>,
-        field_id_map: &HashMap<i32, usize>,
-        snapshot_schema: &Schema,
-    ) -> Result<RowSelection> {
-        let Some(column_index) = parquet_metadata.column_index() else {
-            return Err(Error::new(
-                ErrorKind::Unexpected,
-                "Parquet file metadata does not contain a column index",
-            ));
-        };
-
-        let Some(offset_index) = parquet_metadata.offset_index() else {
-            return Err(Error::new(
-                ErrorKind::Unexpected,
-                "Parquet file metadata does not contain an offset index",
-            ));
-        };
-
-        // If all row groups were filtered out, return an empty RowSelection (select no rows)
-        if let Some(selected_row_groups) = selected_row_groups
-            && selected_row_groups.is_empty()
-        {
-            return Ok(RowSelection::from(Vec::new()));
-        }
-
-        let mut selected_row_groups_idx = 0;
-
-        let page_index = column_index
-            .iter()
-            .enumerate()
-            .zip(offset_index)
-            .zip(parquet_metadata.row_groups());
-
-        let mut results = Vec::new();
-        for (((idx, column_index), offset_index), row_group_metadata) in page_index {
-            if let Some(selected_row_groups) = selected_row_groups {
-                // skip row groups that aren't present in selected_row_groups
-                if idx == selected_row_groups[selected_row_groups_idx] {
-                    selected_row_groups_idx += 1;
-                } else {
-                    continue;
-                }
-            }
-
-            let selections_for_page = PageIndexEvaluator::eval(
-                predicate,
-                column_index,
-                offset_index,
-                row_group_metadata,
-                field_id_map,
-                snapshot_schema,
-            )?;
-
-            results.push(selections_for_page);
-
-            if let Some(selected_row_groups) = selected_row_groups
-                && selected_row_groups_idx == selected_row_groups.len()
-            {
-                break;
-            }
-        }
-
-        Ok(results.into_iter().flatten().collect::<Vec<_>>().into())
-    }
-
-    /// Filters row groups by byte range to support Iceberg's file splitting.
-    ///
-    /// Applies an optional row group list and optional `RowSelection` to a builder.
-    ///
-    /// Centralises the final "commit" step shared by all Parquet reading paths.
-    /// Applies projection to `builder`, constructs a `RecordBatchTransformer`, builds the
-    /// Parquet stream, and wraps it so every batch is passed through the transformer.
-    ///
-    /// This is the shared finalization step used by every data-file reading path.
-    pub(crate) fn build_projected_record_batch_stream(
-        builder: ParquetRecordBatchStreamBuilder<ArrowFileReader>,
-        project_field_ids: &[i32],
-        schema: SchemaRef,
-        has_missing_field_ids: bool,
-        data_file_path: &str,
-        partition_spec: Option<Arc<PartitionSpec>>,
-        partition: Option<Struct>,
-    ) -> Result<ArrowRecordBatchStream> {
-        let builder =
-            Self::apply_projection(builder, project_field_ids, &schema, has_missing_field_ids)?;
-
-        let mut record_batch_transformer = Self::build_record_batch_transformer(
-            schema,
-            project_field_ids,
-            data_file_path,
-            partition_spec,
-            partition,
-        )?;
-
-        let record_batch_stream = builder.build()?.map(move |batch| match batch {
-            Ok(batch) => record_batch_transformer.process_record_batch(batch),
-            Err(err) => Err(err.into()),
-        });
-
-        Ok(Box::pin(record_batch_stream) as ArrowRecordBatchStream)
-    }
-
-    fn filter_row_groups_by_byte_range(
-        parquet_metadata: &Arc<ParquetMetaData>,
-        start: u64,
-        length: u64,
-    ) -> Result<Vec<usize>> {
-        let row_groups = parquet_metadata.row_groups();
-        let mut selected = Vec::new();
-        let end = start + length;
-
-        // Row groups are stored sequentially after the 4-byte magic header.
-        let mut current_byte_offset = 4u64;
-
-        for (idx, row_group) in row_groups.iter().enumerate() {
-            let row_group_size = row_group.compressed_size() as u64;
-            let row_group_end = current_byte_offset + row_group_size;
-
-            if current_byte_offset < end && start < row_group_end {
-                selected.push(idx);
-            }
-
-            current_byte_offset = row_group_end;
-        }
-
-        Ok(selected)
-    }
-}
-
-/// Build the map of parquet field id to Parquet column index in the schema.
-/// Returns None if the Parquet file doesn't have field IDs embedded (e.g., migrated tables).
-fn build_field_id_map(parquet_schema: &SchemaDescriptor) -> Result<Option<HashMap<i32, usize>>> {
-    let mut column_map = HashMap::new();
-
-    for (idx, field) in parquet_schema.columns().iter().enumerate() {
-        let field_type = field.self_type();
-        match field_type {
-            ParquetType::PrimitiveType { basic_info, .. } => {
-                if !basic_info.has_id() {
-                    return Ok(None);
-                }
-                column_map.insert(basic_info.id(), idx);
-            }
-            ParquetType::GroupType { .. } => {
-                return Err(Error::new(
-                    ErrorKind::DataInvalid,
-                    format!(
-                        "Leave column in schema should be primitive type but got {field_type:?}"
-                    ),
-                ));
-            }
-        };
-    }
-
-    Ok(Some(column_map))
-}
-
-/// Build a fallback field ID map for Parquet files without embedded field IDs.
-/// Position-based (1, 2, 3, ...) for compatibility with iceberg-java migrations.
-fn build_fallback_field_id_map(parquet_schema: &SchemaDescriptor) -> HashMap<i32, usize> {
-    let mut column_map = HashMap::new();
-
-    // 1-indexed to match iceberg-java's convention
-    for (idx, _field) in parquet_schema.columns().iter().enumerate() {
-        let field_id = (idx + 1) as i32;
-        column_map.insert(field_id, idx);
-    }
-
-    column_map
-}
-
-/// Apply name mapping to Arrow schema for Parquet files lacking field IDs.
-///
-/// Assigns Iceberg field IDs based on column names using the name mapping,
-/// enabling correct projection on migrated files (e.g., from Hive/Spark via add_files).
-///
-/// Per Iceberg spec Column Projection rule #2:
-/// "Use schema.name-mapping.default metadata to map field id to columns without field id"
-/// https://iceberg.apache.org/spec/#column-projection
-///
-/// Corresponds to Java's ParquetSchemaUtil.applyNameMapping() and ApplyNameMapping visitor.
-/// The key difference is Java operates on Parquet MessageType, while we operate on Arrow Schema.
-///
-/// # Arguments
-/// * `arrow_schema` - Arrow schema from Parquet file (without field IDs)
-/// * `name_mapping` - Name mapping from table metadata (TableProperties.DEFAULT_NAME_MAPPING)
-///
-/// # Returns
-/// Arrow schema with field IDs assigned based on name mapping
-fn apply_name_mapping_to_arrow_schema(
-    arrow_schema: ArrowSchemaRef,
-    name_mapping: &NameMapping,
-) -> Result<Arc<ArrowSchema>> {
-    debug_assert!(
-        arrow_schema
-            .fields()
-            .iter()
-            .next()
-            .is_none_or(|f| f.metadata().get(PARQUET_FIELD_ID_META_KEY).is_none()),
-        "Schema already has field IDs - name mapping should not be applied"
-    );
-
-    use arrow_schema::Field;
-
-    let fields_with_mapped_ids: Vec<_> = arrow_schema
-        .fields()
-        .iter()
-        .map(|field| {
-            // Look up this column name in name mapping to get the Iceberg field ID.
-            // Corresponds to Java's ApplyNameMapping visitor which calls
-            // nameMapping.find(currentPath()) and returns field.withId() if found.
-            //
-            // If the field isn't in the mapping, leave it WITHOUT assigning an ID
-            // (matching Java's behavior of returning the field unchanged).
-            // Later, during projection, fields without IDs are filtered out.
-            let mapped_field_opt = name_mapping
-                .fields()
-                .iter()
-                .find(|f| f.names().contains(&field.name().to_string()));
-
-            let mut metadata = field.metadata().clone();
-
-            if let Some(mapped_field) = mapped_field_opt
-                && let Some(field_id) = mapped_field.field_id()
-            {
-                // Field found in mapping with a field_id → assign it
-                metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field_id.to_string());
-            }
-            // If field_id is None, leave the field without an ID (will be filtered by projection)
-
-            Field::new(field.name(), field.data_type().clone(), field.is_nullable())
-                .with_metadata(metadata)
-        })
-        .collect();
-
-    Ok(Arc::new(ArrowSchema::new_with_metadata(
-        fields_with_mapped_ids,
-        arrow_schema.metadata().clone(),
-    )))
-}
-
-/// Add position-based fallback field IDs to Arrow schema for Parquet files lacking them.
-/// Enables projection on migrated files (e.g., from Hive/Spark).
-///
-/// Why at schema level (not per-batch): Efficiency - avoids repeated schema modification.
-/// Why only top-level: Nested projection uses leaf column indices, not parent struct IDs.
-/// Why 1-indexed: Compatibility with iceberg-java's ParquetSchemaUtil.addFallbackIds().
-fn add_fallback_field_ids_to_arrow_schema(arrow_schema: &ArrowSchemaRef) -> Arc<ArrowSchema> {
-    debug_assert!(
-        arrow_schema
-            .fields()
-            .iter()
-            .next()
-            .is_none_or(|f| f.metadata().get(PARQUET_FIELD_ID_META_KEY).is_none()),
-        "Schema already has field IDs"
-    );
-
-    use arrow_schema::Field;
-
-    let fields_with_fallback_ids: Vec<_> = arrow_schema
-        .fields()
-        .iter()
-        .enumerate()
-        .map(|(pos, field)| {
-            let mut metadata = field.metadata().clone();
-            let field_id = (pos + 1) as i32; // 1-indexed for Java compatibility
-            metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field_id.to_string());
-
-            Field::new(field.name(), field.data_type().clone(), field.is_nullable())
-                .with_metadata(metadata)
-        })
-        .collect();
-
-    Arc::new(ArrowSchema::new_with_metadata(
-        fields_with_fallback_ids,
-        arrow_schema.metadata().clone(),
-    ))
-}
-
-/// A visitor to collect field ids from bound predicates.
-struct CollectFieldIdVisitor {
-    field_ids: HashSet<i32>,
-}
-
-impl CollectFieldIdVisitor {
-    fn field_ids(self) -> HashSet<i32> {
-        self.field_ids
-    }
-}
-
-impl BoundPredicateVisitor for CollectFieldIdVisitor {
-    type T = ();
-
-    fn always_true(&mut self) -> Result<()> {
-        Ok(())
-    }
-
-    fn always_false(&mut self) -> Result<()> {
-        Ok(())
-    }
-
-    fn and(&mut self, _lhs: (), _rhs: ()) -> Result<()> {
-        Ok(())
-    }
-
-    fn or(&mut self, _lhs: (), _rhs: ()) -> Result<()> {
-        Ok(())
-    }
-
-    fn not(&mut self, _inner: ()) -> Result<()> {
-        Ok(())
-    }
-
-    fn is_null(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn not_null(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn is_nan(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn not_nan(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn less_than(
-        &mut self,
-        reference: &BoundReference,
-        _literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn less_than_or_eq(
-        &mut self,
-        reference: &BoundReference,
-        _literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn greater_than(
-        &mut self,
-        reference: &BoundReference,
-        _literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn greater_than_or_eq(
-        &mut self,
-        reference: &BoundReference,
-        _literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn eq(
-        &mut self,
-        reference: &BoundReference,
-        _literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn not_eq(
-        &mut self,
-        reference: &BoundReference,
-        _literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn starts_with(
-        &mut self,
-        reference: &BoundReference,
-        _literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn not_starts_with(
-        &mut self,
-        reference: &BoundReference,
-        _literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn r#in(
-        &mut self,
-        reference: &BoundReference,
-        _literals: &FnvHashSet<Datum>,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn not_in(
-        &mut self,
-        reference: &BoundReference,
-        _literals: &FnvHashSet<Datum>,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-}
-
-/// A visitor to convert Iceberg bound predicates to Arrow predicates.
-struct PredicateConverter<'a> {
-    /// The Parquet schema descriptor.
-    pub parquet_schema: &'a SchemaDescriptor,
-    /// The map between field id and leaf column index in Parquet schema.
-    pub column_map: &'a HashMap<i32, usize>,
-    /// The required column indices in Parquet schema for the predicates.
-    pub column_indices: &'a Vec<usize>,
-}
-
-impl PredicateConverter<'_> {
-    /// When visiting a bound reference, we return index of the leaf column in the
-    /// required column indices which is used to project the column in the record batch.
-    /// Return None if the field id is not found in the column map, which is possible
-    /// due to schema evolution.
-    fn bound_reference(&mut self, reference: &BoundReference) -> Result<Option<usize>> {
-        // The leaf column's index in Parquet schema.
-        if let Some(column_idx) = self.column_map.get(&reference.field().id) {
-            if self.parquet_schema.get_column_root(*column_idx).is_group() {
-                return Err(Error::new(
-                    ErrorKind::DataInvalid,
-                    format!(
-                        "Leave column `{}` in predicates isn't a root column in Parquet schema.",
-                        reference.field().name
-                    ),
-                ));
-            }
-
-            // The leaf column's index in the required column indices.
-            let index = self
-                .column_indices
-                .iter()
-                .position(|&idx| idx == *column_idx)
-                .ok_or(Error::new(
-                    ErrorKind::DataInvalid,
-                    format!(
-                "Leave column `{}` in predicates cannot be found in the required column indices.",
-                reference.field().name
-            ),
-                ))?;
-
-            Ok(Some(index))
-        } else {
-            Ok(None)
-        }
-    }
-
-    /// Build an Arrow predicate that always returns true.
-    fn build_always_true(&self) -> Result<Box<PredicateResult>> {
-        Ok(Box::new(|batch| {
-            Ok(BooleanArray::from(vec![true; batch.num_rows()]))
-        }))
-    }
-
-    /// Build an Arrow predicate that always returns false.
-    fn build_always_false(&self) -> Result<Box<PredicateResult>> {
-        Ok(Box::new(|batch| {
-            Ok(BooleanArray::from(vec![false; batch.num_rows()]))
-        }))
-    }
-}
-
-/// Gets the leaf column from the record batch for the required column index. Only
-/// supports top-level columns for now.
-fn project_column(
-    batch: &RecordBatch,
-    column_idx: usize,
-) -> std::result::Result<ArrayRef, ArrowError> {
-    let column = batch.column(column_idx);
-
-    match column.data_type() {
-        DataType::Struct(_) => Err(ArrowError::SchemaError(
-            "Does not support struct column yet.".to_string(),
-        )),
-        _ => Ok(column.clone()),
-    }
-}
-
-type PredicateResult =
-    dyn FnMut(RecordBatch) -> std::result::Result<BooleanArray, ArrowError> + Send + 'static;
-
-impl BoundPredicateVisitor for PredicateConverter<'_> {
-    type T = Box<PredicateResult>;
-
-    fn always_true(&mut self) -> Result<Box<PredicateResult>> {
-        self.build_always_true()
-    }
-
-    fn always_false(&mut self) -> Result<Box<PredicateResult>> {
-        self.build_always_false()
-    }
-
-    fn and(
-        &mut self,
-        mut lhs: Box<PredicateResult>,
-        mut rhs: Box<PredicateResult>,
-    ) -> Result<Box<PredicateResult>> {
-        Ok(Box::new(move |batch| {
-            let left = lhs(batch.clone())?;
-            let right = rhs(batch)?;
-            and_kleene(&left, &right)
-        }))
-    }
-
-    fn or(
-        &mut self,
-        mut lhs: Box<PredicateResult>,
-        mut rhs: Box<PredicateResult>,
-    ) -> Result<Box<PredicateResult>> {
-        Ok(Box::new(move |batch| {
-            let left = lhs(batch.clone())?;
-            let right = rhs(batch)?;
-            or_kleene(&left, &right)
-        }))
-    }
-
-    fn not(&mut self, mut inner: Box<PredicateResult>) -> Result<Box<PredicateResult>> {
-        Ok(Box::new(move |batch| {
-            let pred_ret = inner(batch)?;
-            not(&pred_ret)
-        }))
-    }
-
-    fn is_null(
-        &mut self,
-        reference: &BoundReference,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            Ok(Box::new(move |batch| {
-                let column = project_column(&batch, idx)?;
-                is_null(&column)
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_true()
-        }
-    }
-
-    fn not_null(
-        &mut self,
-        reference: &BoundReference,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            Ok(Box::new(move |batch| {
-                let column = project_column(&batch, idx)?;
-                is_not_null(&column)
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_false()
-        }
-    }
-
-    fn is_nan(
-        &mut self,
-        reference: &BoundReference,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if self.bound_reference(reference)?.is_some() {
-            self.build_always_true()
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_false()
-        }
-    }
-
-    fn not_nan(
-        &mut self,
-        reference: &BoundReference,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if self.bound_reference(reference)?.is_some() {
-            self.build_always_false()
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_true()
-        }
-    }
-
-    fn less_than(
-        &mut self,
-        reference: &BoundReference,
-        literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literal = get_arrow_datum(literal)?;
-
-            Ok(Box::new(move |batch| {
-                let left = project_column(&batch, idx)?;
-                let literal = try_cast_literal(&literal, left.data_type())?;
-                lt(&left, literal.as_ref())
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_true()
-        }
-    }
-
-    fn less_than_or_eq(
-        &mut self,
-        reference: &BoundReference,
-        literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literal = get_arrow_datum(literal)?;
-
-            Ok(Box::new(move |batch| {
-                let left = project_column(&batch, idx)?;
-                let literal = try_cast_literal(&literal, left.data_type())?;
-                lt_eq(&left, literal.as_ref())
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_true()
-        }
-    }
-
-    fn greater_than(
-        &mut self,
-        reference: &BoundReference,
-        literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literal = get_arrow_datum(literal)?;
-
-            Ok(Box::new(move |batch| {
-                let left = project_column(&batch, idx)?;
-                let literal = try_cast_literal(&literal, left.data_type())?;
-                gt(&left, literal.as_ref())
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_false()
-        }
-    }
-
-    fn greater_than_or_eq(
-        &mut self,
-        reference: &BoundReference,
-        literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literal = get_arrow_datum(literal)?;
-
-            Ok(Box::new(move |batch| {
-                let left = project_column(&batch, idx)?;
-                let literal = try_cast_literal(&literal, left.data_type())?;
-                gt_eq(&left, literal.as_ref())
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_false()
-        }
-    }
-
-    fn eq(
-        &mut self,
-        reference: &BoundReference,
-        literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literal = get_arrow_datum(literal)?;
-
-            Ok(Box::new(move |batch| {
-                let left = project_column(&batch, idx)?;
-                let literal = try_cast_literal(&literal, left.data_type())?;
-                eq(&left, literal.as_ref())
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_false()
-        }
-    }
-
-    fn not_eq(
-        &mut self,
-        reference: &BoundReference,
-        literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literal = get_arrow_datum(literal)?;
-
-            Ok(Box::new(move |batch| {
-                let left = project_column(&batch, idx)?;
-                let literal = try_cast_literal(&literal, left.data_type())?;
-                neq(&left, literal.as_ref())
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_false()
-        }
-    }
-
-    fn starts_with(
-        &mut self,
-        reference: &BoundReference,
-        literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literal = get_arrow_datum(literal)?;
-
-            Ok(Box::new(move |batch| {
-                let left = project_column(&batch, idx)?;
-                let literal = try_cast_literal(&literal, left.data_type())?;
-                starts_with(&left, literal.as_ref())
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_false()
-        }
-    }
-
-    fn not_starts_with(
-        &mut self,
-        reference: &BoundReference,
-        literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literal = get_arrow_datum(literal)?;
-
-            Ok(Box::new(move |batch| {
-                let left = project_column(&batch, idx)?;
-                let literal = try_cast_literal(&literal, left.data_type())?;
-                // update here if arrow ever adds a native not_starts_with
-                not(&starts_with(&left, literal.as_ref())?)
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_true()
-        }
-    }
-
-    fn r#in(
-        &mut self,
-        reference: &BoundReference,
-        literals: &FnvHashSet<Datum>,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literals: Vec<_> = literals
-                .iter()
-                .map(|lit| get_arrow_datum(lit).unwrap())
-                .collect();
-
-            Ok(Box::new(move |batch| {
-                // update this if arrow ever adds a native is_in kernel
-                let left = project_column(&batch, idx)?;
-
-                let mut acc = BooleanArray::from(vec![false; batch.num_rows()]);
-                for literal in &literals {
-                    let literal = try_cast_literal(literal, left.data_type())?;
-                    acc = or(&acc, &eq(&left, literal.as_ref())?)?
-                }
-
-                Ok(acc)
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_false()
-        }
-    }
-
-    fn not_in(
-        &mut self,
-        reference: &BoundReference,
-        literals: &FnvHashSet<Datum>,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literals: Vec<_> = literals
-                .iter()
-                .map(|lit| get_arrow_datum(lit).unwrap())
-                .collect();
-
-            Ok(Box::new(move |batch| {
-                // update this if arrow ever adds a native not_in kernel
-                let left = project_column(&batch, idx)?;
-                let mut acc = BooleanArray::from(vec![true; batch.num_rows()]);
-                for literal in &literals {
-                    let literal = try_cast_literal(literal, left.data_type())?;
-                    acc = and(&acc, &neq(&left, literal.as_ref())?)?
-                }
-
-                Ok(acc)
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_true()
-        }
-    }
-}
-
-/// ArrowFileReader is a wrapper around a FileRead that impls parquets AsyncFileReader.
-pub struct ArrowFileReader {
-    meta: FileMetadata,
-    parquet_read_options: ParquetReadOptions,
-    r: Box<dyn FileRead>,
-}
-
-impl ArrowFileReader {
-    /// Create a new ArrowFileReader
-    pub fn new(meta: FileMetadata, r: Box<dyn FileRead>) -> Self {
-        Self {
-            meta,
-            parquet_read_options: ParquetReadOptions::builder().build(),
-            r,
-        }
-    }
-
-    /// Configure all Parquet read options.
-    pub(crate) fn with_parquet_read_options(mut self, options: ParquetReadOptions) -> Self {
-        self.parquet_read_options = options;
-        self
-    }
-}
-
-impl AsyncFileReader for ArrowFileReader {
-    fn get_bytes(&mut self, range: Range<u64>) -> BoxFuture<'_, parquet::errors::Result<Bytes>> {
-        Box::pin(
-            self.r
-                .read(range.start..range.end)
-                .map_err(|err| parquet::errors::ParquetError::External(Box::new(err))),
-        )
-    }
-
-    /// Override the default `get_byte_ranges` which calls `get_bytes` sequentially.
-    /// The parquet reader calls this to fetch column chunks for a row group, so
-    /// without this override each column chunk is a serial round-trip to object storage.
-    /// Adapted from object_store's `coalesce_ranges` in `util.rs`.
-    fn get_byte_ranges(
-        &mut self,
-        ranges: Vec<Range<u64>>,
-    ) -> BoxFuture<'_, parquet::errors::Result<Vec<Bytes>>> {
-        let coalesce_bytes = self.parquet_read_options.range_coalesce_bytes();
-        let concurrency = self.parquet_read_options.range_fetch_concurrency().max(1);
-
-        async move {
-            // Merge nearby ranges to reduce the number of object store requests.
-            let fetch_ranges = merge_ranges(&ranges, coalesce_bytes);
-            let r = &self.r;
-
-            // Fetch merged ranges concurrently.
-            let fetched: Vec<Bytes> = futures::stream::iter(fetch_ranges.iter().cloned())
-                .map(|range| async move {
-                    r.read(range)
-                        .await
-                        .map_err(|e| parquet::errors::ParquetError::External(Box::new(e)))
-                })
-                .buffered(concurrency)
-                .try_collect()
-                .await?;
-
-            // Slice the fetched data back into the originally requested ranges.
-            Ok(ranges
-                .iter()
-                .map(|range| {
-                    let idx = fetch_ranges.partition_point(|v| v.start <= range.start) - 1;
-                    let fetch_range = &fetch_ranges[idx];
-                    let fetch_bytes = &fetched[idx];
-                    let start = (range.start - fetch_range.start) as usize;
-                    let end = (range.end - fetch_range.start) as usize;
-                    fetch_bytes.slice(start..end.min(fetch_bytes.len()))
-                })
-                .collect())
-        }
-        .boxed()
-    }
-
-    // TODO: currently we don't respect `ArrowReaderOptions` cause it don't expose any method to access the option field
-    // we will fix it after `v55.1.0` is released in https://github.com/apache/arrow-rs/issues/7393
-    fn get_metadata(
-        &mut self,
-        _options: Option<&'_ ArrowReaderOptions>,
-    ) -> BoxFuture<'_, parquet::errors::Result<Arc<ParquetMetaData>>> {
-        async move {
-            fn page_index_policy(enabled: bool) -> PageIndexPolicy {
-                if enabled {
-                    PageIndexPolicy::Optional
-                } else {
-                    PageIndexPolicy::Skip
-                }
-            }
-
-            let reader = ParquetMetaDataReader::new()
-                .with_prefetch_hint(self.parquet_read_options.metadata_size_hint())
-                // Set the page policy first because it updates both column and offset policies.
-                .with_page_index_policy(page_index_policy(
-                    self.parquet_read_options.preload_page_index(),
-                ))
-                .with_column_index_policy(page_index_policy(
-                    self.parquet_read_options.preload_column_index(),
-                ))
-                .with_offset_index_policy(page_index_policy(
-                    self.parquet_read_options.preload_offset_index(),
-                ));
-            let size = self.meta.size;
-            let meta = reader.load_and_finish(self, size).await?;
-
-            Ok(Arc::new(meta))
-        }
-        .boxed()
-    }
-}
-
-/// Merge overlapping or nearby byte ranges, combining ranges with gaps <= `coalesce` bytes.
-/// Adapted from object_store's `merge_ranges` in `util.rs`.
-fn merge_ranges(ranges: &[Range<u64>], coalesce: u64) -> Vec<Range<u64>> {
-    if ranges.is_empty() {
-        return vec![];
-    }
-
-    let mut ranges = ranges.to_vec();
-    ranges.sort_unstable_by_key(|r| r.start);
-
-    let mut merged = Vec::with_capacity(ranges.len());
-    let mut start_idx = 0;
-    let mut end_idx = 1;
-
-    while start_idx != ranges.len() {
-        let mut range_end = ranges[start_idx].end;
-
-        while end_idx != ranges.len()
-            && ranges[end_idx]
-                .start
-                .checked_sub(range_end)
-                .map(|delta| delta <= coalesce)
-                .unwrap_or(true)
-        {
-            range_end = range_end.max(ranges[end_idx].end);
-            end_idx += 1;
-        }
-
-        merged.push(ranges[start_idx].start..range_end);
-        start_idx = end_idx;
-        end_idx += 1;
-    }
-
-    merged
-}
-
-/// The Arrow type of an array that the Parquet reader reads may not match the exact Arrow type
-/// that Iceberg uses for literals - but they are effectively the same logical type,
-/// i.e. LargeUtf8 and Utf8 or Utf8View and Utf8 or Utf8View and LargeUtf8.
-///
-/// The Arrow compute kernels that we use must match the type exactly, so first cast the literal
-/// into the type of the batch we read from Parquet before sending it to the compute kernel.
-fn try_cast_literal(
-    literal: &Arc<dyn ArrowDatum + Send + Sync>,
-    column_type: &DataType,
-) -> std::result::Result<Arc<dyn ArrowDatum + Send + Sync>, ArrowError> {
-    let literal_array = literal.get().0;
-
-    // No cast required
-    if literal_array.data_type() == column_type {
-        return Ok(Arc::clone(literal));
-    }
-
-    let literal_array = cast(literal_array, column_type)?;
-    Ok(Arc::new(Scalar::new(literal_array)))
-}
-
-#[cfg(test)]
-mod tests {
-    use std::collections::{HashMap, HashSet};
-    use std::fs::File;
-    use std::ops::Range;
-    use std::sync::Arc;
-
-    use arrow_array::cast::AsArray;
-    use arrow_array::{
-        Array, ArrayRef, BinaryArray, FixedSizeBinaryArray, Int32Array, LargeStringArray,
-        RecordBatch, StringArray,
-    };
-    use arrow_schema::{DataType, Field, Schema as ArrowSchema, TimeUnit};
-    use futures::TryStreamExt;
-    use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
-    use parquet::arrow::{ArrowWriter, ProjectionMask};
-    use parquet::basic::Compression;
-    use parquet::file::metadata::{ColumnChunkMetaData, RowGroupMetaData};
-    use parquet::file::properties::WriterProperties;
-    use parquet::schema::parser::parse_message_type;
-    use parquet::schema::types::{SchemaDescPtr, SchemaDescriptor};
-    use roaring::RoaringTreemap;
-    use tempfile::TempDir;
-
-    use crate::ErrorKind;
-    use crate::arrow::reader::{CollectFieldIdVisitor, PARQUET_FIELD_ID_META_KEY};
-    use crate::arrow::{ArrowReader, ArrowReaderBuilder};
-    use crate::delete_vector::DeleteVector;
-    use crate::expr::visitors::bound_predicate_visitor::visit;
-    use crate::expr::{Bind, Predicate, Reference};
-    use crate::io::FileIO;
-    use crate::scan::{FileScanTask, FileScanTaskDeleteFile, FileScanTaskStream};
-    use crate::spec::{
-        DataContentType, DataFileFormat, Datum, NestedField, PrimitiveType, Schema, SchemaRef, Type,
-    };
-
-    fn table_schema_simple() -> SchemaRef {
-        Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_identifier_field_ids(vec![2])
-                .with_fields(vec![
-                    NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
-                    NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
-                    NestedField::optional(4, "qux", Type::Primitive(PrimitiveType::Float)).into(),
-                ])
-                .build()
-                .unwrap(),
-        )
-    }
-
-    #[test]
-    fn test_collect_field_id() {
-        let schema = table_schema_simple();
-        let expr = Reference::new("qux").is_null();
-        let bound_expr = expr.bind(schema, true).unwrap();
-
-        let mut visitor = CollectFieldIdVisitor {
-            field_ids: HashSet::default(),
-        };
-        visit(&mut visitor, &bound_expr).unwrap();
-
-        let mut expected = HashSet::default();
-        expected.insert(4_i32);
-
-        assert_eq!(visitor.field_ids, expected);
-    }
-
-    #[test]
-    fn test_collect_field_id_with_and() {
-        let schema = table_schema_simple();
-        let expr = Reference::new("qux")
-            .is_null()
-            .and(Reference::new("baz").is_null());
-        let bound_expr = expr.bind(schema, true).unwrap();
-
-        let mut visitor = CollectFieldIdVisitor {
-            field_ids: HashSet::default(),
-        };
-        visit(&mut visitor, &bound_expr).unwrap();
-
-        let mut expected = HashSet::default();
-        expected.insert(4_i32);
-        expected.insert(3);
-
-        assert_eq!(visitor.field_ids, expected);
-    }
-
-    #[test]
-    fn test_collect_field_id_with_or() {
-        let schema = table_schema_simple();
-        let expr = Reference::new("qux")
-            .is_null()
-            .or(Reference::new("baz").is_null());
-        let bound_expr = expr.bind(schema, true).unwrap();
-
-        let mut visitor = CollectFieldIdVisitor {
-            field_ids: HashSet::default(),
-        };
-        visit(&mut visitor, &bound_expr).unwrap();
-
-        let mut expected = HashSet::default();
-        expected.insert(4_i32);
-        expected.insert(3);
-
-        assert_eq!(visitor.field_ids, expected);
-    }
-
-    #[test]
-    fn test_arrow_projection_mask() {
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_identifier_field_ids(vec![1])
-                .with_fields(vec![
-                    NestedField::required(1, "c1", Type::Primitive(PrimitiveType::String)).into(),
-                    NestedField::optional(2, "c2", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::optional(
-                        3,
-                        "c3",
-                        Type::Primitive(PrimitiveType::Decimal {
-                            precision: 38,
-                            scale: 3,
-                        }),
-                    )
-                    .into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("c1", DataType::Utf8, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-            // Type not supported
-            Field::new("c2", DataType::Duration(TimeUnit::Microsecond), true).with_metadata(
-                HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "2".to_string())]),
-            ),
-            // Precision is beyond the supported range
-            Field::new("c3", DataType::Decimal128(39, 3), true).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "3".to_string(),
-            )])),
-        ]));
-
-        let message_type = "
-message schema {
-  required binary c1 (STRING) = 1;
-  optional int32 c2 (INTEGER(8,true)) = 2;
-  optional fixed_len_byte_array(17) c3 (DECIMAL(39,3)) = 3;
-}
-    ";
-        let parquet_type = parse_message_type(message_type).expect("should parse schema");
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_type));
-
-        // Try projecting the fields c2 and c3 with the unsupported data types
-        let err = ArrowReader::get_arrow_projection_mask(
-            &[1, 2, 3],
-            &schema,
-            &parquet_schema,
-            &arrow_schema,
-            false,
-        )
-        .unwrap_err();
-
-        assert_eq!(err.kind(), ErrorKind::DataInvalid);
-        assert_eq!(
-            err.to_string(),
-            "DataInvalid => Unsupported Arrow data type: Duration(µs)".to_string()
-        );
-
-        // Omitting field c2, we still get an error due to c3 being selected
-        let err = ArrowReader::get_arrow_projection_mask(
-            &[1, 3],
-            &schema,
-            &parquet_schema,
-            &arrow_schema,
-            false,
-        )
-        .unwrap_err();
-
-        assert_eq!(err.kind(), ErrorKind::DataInvalid);
-        assert_eq!(
-            err.to_string(),
-            "DataInvalid => Failed to create decimal type, source: DataInvalid => Decimals with precision larger than 38 are not supported: 39".to_string()
-        );
-
-        // Finally avoid selecting fields with unsupported data types
-        let mask = ArrowReader::get_arrow_projection_mask(
-            &[1],
-            &schema,
-            &parquet_schema,
-            &arrow_schema,
-            false,
-        )
-        .expect("Some ProjectionMask");
-        assert_eq!(mask, ProjectionMask::leaves(&parquet_schema, vec![0]));
-    }
-
-    #[tokio::test]
-    async fn test_kleene_logic_or_behaviour() {
-        // a IS NULL OR a = 'foo'
-        let predicate = Reference::new("a")
-            .is_null()
-            .or(Reference::new("a").equal_to(Datum::string("foo")));
-
-        // Table data: [NULL, "foo", "bar"]
-        let data_for_col_a = vec![None, Some("foo".to_string()), Some("bar".to_string())];
-
-        // Expected: [NULL, "foo"].
-        let expected = vec![None, Some("foo".to_string())];
-
-        let (file_io, schema, table_location, _temp_dir) =
-            setup_kleene_logic(data_for_col_a, DataType::Utf8);
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let result_data = test_perform_read(predicate, schema, table_location, reader).await;
-
-        assert_eq!(result_data, expected);
-    }
-
-    #[tokio::test]
-    async fn test_kleene_logic_and_behaviour() {
-        // a IS NOT NULL AND a != 'foo'
-        let predicate = Reference::new("a")
-            .is_not_null()
-            .and(Reference::new("a").not_equal_to(Datum::string("foo")));
-
-        // Table data: [NULL, "foo", "bar"]
-        let data_for_col_a = vec![None, Some("foo".to_string()), Some("bar".to_string())];
-
-        // Expected: ["bar"].
-        let expected = vec![Some("bar".to_string())];
-
-        let (file_io, schema, table_location, _temp_dir) =
-            setup_kleene_logic(data_for_col_a, DataType::Utf8);
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let result_data = test_perform_read(predicate, schema, table_location, reader).await;
-
-        assert_eq!(result_data, expected);
-    }
-
-    #[tokio::test]
-    async fn test_predicate_cast_literal() {
-        let predicates = vec![
-            // a == 'foo'
-            (Reference::new("a").equal_to(Datum::string("foo")), vec![
-                Some("foo".to_string()),
-            ]),
-            // a != 'foo'
-            (
-                Reference::new("a").not_equal_to(Datum::string("foo")),
-                vec![Some("bar".to_string())],
-            ),
-            // STARTS_WITH(a, 'foo')
-            (Reference::new("a").starts_with(Datum::string("f")), vec![
-                Some("foo".to_string()),
-            ]),
-            // NOT STARTS_WITH(a, 'foo')
-            (
-                Reference::new("a").not_starts_with(Datum::string("f")),
-                vec![Some("bar".to_string())],
-            ),
-            // a < 'foo'
-            (Reference::new("a").less_than(Datum::string("foo")), vec![
-                Some("bar".to_string()),
-            ]),
-            // a <= 'foo'
-            (
-                Reference::new("a").less_than_or_equal_to(Datum::string("foo")),
-                vec![Some("foo".to_string()), Some("bar".to_string())],
-            ),
-            // a > 'foo'
-            (
-                Reference::new("a").greater_than(Datum::string("bar")),
-                vec![Some("foo".to_string())],
-            ),
-            // a >= 'foo'
-            (
-                Reference::new("a").greater_than_or_equal_to(Datum::string("foo")),
-                vec![Some("foo".to_string())],
-            ),
-            // a IN ('foo', 'bar')
-            (
-                Reference::new("a").is_in([Datum::string("foo"), Datum::string("baz")]),
-                vec![Some("foo".to_string())],
-            ),
-            // a NOT IN ('foo', 'bar')
-            (
-                Reference::new("a").is_not_in([Datum::string("foo"), Datum::string("baz")]),
-                vec![Some("bar".to_string())],
-            ),
-        ];
-
-        // Table data: ["foo", "bar"]
-        let data_for_col_a = vec![Some("foo".to_string()), Some("bar".to_string())];
-
-        let (file_io, schema, table_location, _temp_dir) =
-            setup_kleene_logic(data_for_col_a, DataType::LargeUtf8);
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        for (predicate, expected) in predicates {
-            println!("testing predicate {predicate}");
-            let result_data = test_perform_read(
-                predicate.clone(),
-                schema.clone(),
-                table_location.clone(),
-                reader.clone(),
-            )
-            .await;
-
-            assert_eq!(result_data, expected, "predicate={predicate}");
-        }
-    }
-
-    async fn test_perform_read(
-        predicate: Predicate,
-        schema: SchemaRef,
-        table_location: String,
-        reader: ArrowReader,
-    ) -> Vec<Option<String>> {
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1],
-                predicate: Some(predicate.bind(schema, true).unwrap()),
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        result[0].columns()[0]
-            .as_string_opt::<i32>()
-            .unwrap()
-            .iter()
-            .map(|v| v.map(ToOwned::to_owned))
-            .collect::<Vec<_>>()
-    }
-
-    fn setup_kleene_logic(
-        data_for_col_a: Vec<Option<String>>,
-        col_a_type: DataType,
-    ) -> (FileIO, SchemaRef, String, TempDir) {
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::optional(1, "a", Type::Primitive(PrimitiveType::String)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("a", col_a_type.clone(), true).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-
-        let file_io = FileIO::new_with_fs();
-
-        let col = match col_a_type {
-            DataType::Utf8 => Arc::new(StringArray::from(data_for_col_a)) as ArrayRef,
-            DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data_for_col_a)) as ArrayRef,
-            _ => panic!("unexpected col_a_type"),
-        };
-
-        let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![col]).unwrap();
-
-        // Write the Parquet files
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
-        let mut writer =
-            ArrowWriter::try_new(file, to_write.schema(), Some(props.clone())).unwrap();
-
-        writer.write(&to_write).expect("Writing batch");
-
-        // writer must be closed to write footer
-        writer.close().unwrap();
-
-        (file_io, schema, table_location, tmp_dir)
-    }
-
-    #[test]
-    fn test_build_deletes_row_selection() {
-        let schema_descr = get_test_schema_descr();
-
-        let mut columns = vec![];
-        for ptr in schema_descr.columns() {
-            let column = ColumnChunkMetaData::builder(ptr.clone()).build().unwrap();
-            columns.push(column);
-        }
-
-        let row_groups_metadata = vec![
-            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 1000, 0),
-            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 500, 1),
-            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 500, 2),
-            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 1000, 3),
-            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 500, 4),
-        ];
-
-        let selected_row_groups = Some(vec![1, 3]);
-
-        /* cases to cover:
-           * {skip|select} {first|intermediate|last} {one row|multiple rows} in
-             {first|intermediate|last} {skipped|selected} row group
-           * row group selection disabled
-        */
-
-        let positional_deletes = RoaringTreemap::from_iter(&[
-            1, // in skipped rg 0, should be ignored
-            3, // run of three consecutive items in skipped rg0
-            4, 5, 998, // two consecutive items at end of skipped rg0
-            999, 1000, // solitary row at start of selected rg1 (1, 9)
-            1010, // run of 3 rows in selected rg1
-            1011, 1012, // (3, 485)
-            1498, // run of two items at end of selected rg1
-            1499, 1500, // run of two items at start of skipped rg2
-            1501, 1600, // should ignore, in skipped rg2
-            1999, // single row at end of skipped rg2
-            2000, // run of two items at start of selected rg3
-            2001, // (4, 98)
-            2100, // single row in selected row group 3 (1, 99)
-            2200, // run of 3 consecutive rows in selected row group 3
-            2201, 2202, // (3, 796)
-            2999, // single item at end of selected rg3 (1)
-            3000, // single item at start of skipped rg4
-        ]);
-
-        let positional_deletes = DeleteVector::new(positional_deletes);
-
-        // using selected row groups 1 and 3
-        let result = ArrowReader::build_deletes_row_selection(
-            &row_groups_metadata,
-            &selected_row_groups,
-            &positional_deletes,
-        )
-        .unwrap();
-
-        let expected = RowSelection::from(vec![
-            RowSelector::skip(1),
-            RowSelector::select(9),
-            RowSelector::skip(3),
-            RowSelector::select(485),
-            RowSelector::skip(4),
-            RowSelector::select(98),
-            RowSelector::skip(1),
-            RowSelector::select(99),
-            RowSelector::skip(3),
-            RowSelector::select(796),
-            RowSelector::skip(1),
-        ]);
-
-        assert_eq!(result, expected);
-
-        // selecting all row groups
-        let result = ArrowReader::build_deletes_row_selection(
-            &row_groups_metadata,
-            &None,
-            &positional_deletes,
-        )
-        .unwrap();
-
-        let expected = RowSelection::from(vec![
-            RowSelector::select(1),
-            RowSelector::skip(1),
-            RowSelector::select(1),
-            RowSelector::skip(3),
-            RowSelector::select(992),
-            RowSelector::skip(3),
-            RowSelector::select(9),
-            RowSelector::skip(3),
-            RowSelector::select(485),
-            RowSelector::skip(4),
-            RowSelector::select(98),
-            RowSelector::skip(1),
-            RowSelector::select(398),
-            RowSelector::skip(3),
-            RowSelector::select(98),
-            RowSelector::skip(1),
-            RowSelector::select(99),
-            RowSelector::skip(3),
-            RowSelector::select(796),
-            RowSelector::skip(2),
-            RowSelector::select(499),
-        ]);
-
-        assert_eq!(result, expected);
-    }
-
-    fn build_test_row_group_meta(
-        schema_descr: SchemaDescPtr,
-        columns: Vec<ColumnChunkMetaData>,
-        num_rows: i64,
-        ordinal: i16,
-    ) -> RowGroupMetaData {
-        RowGroupMetaData::builder(schema_descr.clone())
-            .set_num_rows(num_rows)
-            .set_total_byte_size(2000)
-            .set_column_metadata(columns)
-            .set_ordinal(ordinal)
-            .build()
-            .unwrap()
-    }
-
-    fn get_test_schema_descr() -> SchemaDescPtr {
-        use parquet::schema::types::Type as SchemaType;
-
-        let schema = SchemaType::group_type_builder("schema")
-            .with_fields(vec![
-                Arc::new(
-                    SchemaType::primitive_type_builder("a", parquet::basic::Type::INT32)
-                        .build()
-                        .unwrap(),
-                ),
-                Arc::new(
-                    SchemaType::primitive_type_builder("b", parquet::basic::Type::INT32)
-                        .build()
-                        .unwrap(),
-                ),
-            ])
-            .build()
-            .unwrap();
-
-        Arc::new(SchemaDescriptor::new(Arc::new(schema)))
-    }
-
-    /// Verifies that file splits respect byte ranges and only read specific row groups.
-    #[tokio::test]
-    async fn test_file_splits_respect_byte_ranges() {
-        use arrow_array::Int32Array;
-        use parquet::file::reader::{FileReader, SerializedFileReader};
-
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_path = format!("{table_location}/multi_row_group.parquet");
-
-        // Force each batch into its own row group for testing byte range filtering.
-        let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from(
-            (0..100).collect::<Vec<i32>>(),
-        ))])
-        .unwrap();
-        let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from(
-            (100..200).collect::<Vec<i32>>(),
-        ))])
-        .unwrap();
-        let batch3 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from(
-            (200..300).collect::<Vec<i32>>(),
-        ))])
-        .unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .set_max_row_group_row_count(Some(100))
-            .build();
-
-        let file = File::create(&file_path).unwrap();
-        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
-        writer.write(&batch1).expect("Writing batch 1");
-        writer.write(&batch2).expect("Writing batch 2");
-        writer.write(&batch3).expect("Writing batch 3");
-        writer.close().unwrap();
-
-        // Read the file metadata to get row group byte positions
-        let file = File::open(&file_path).unwrap();
-        let reader = SerializedFileReader::new(file).unwrap();
-        let metadata = reader.metadata();
-
-        println!("File has {} row groups", metadata.num_row_groups());
-        assert_eq!(metadata.num_row_groups(), 3, "Expected 3 row groups");
-
-        // Get byte positions for each row group
-        let row_group_0 = metadata.row_group(0);
-        let row_group_1 = metadata.row_group(1);
-        let row_group_2 = metadata.row_group(2);
-
-        let rg0_start = 4u64; // Parquet files start with 4-byte magic "PAR1"
-        let rg1_start = rg0_start + row_group_0.compressed_size() as u64;
-        let rg2_start = rg1_start + row_group_1.compressed_size() as u64;
-        let file_end = rg2_start + row_group_2.compressed_size() as u64;
-
-        println!(
-            "Row group 0: {} rows, starts at byte {}, {} bytes compressed",
-            row_group_0.num_rows(),
-            rg0_start,
-            row_group_0.compressed_size()
-        );
-        println!(
-            "Row group 1: {} rows, starts at byte {}, {} bytes compressed",
-            row_group_1.num_rows(),
-            rg1_start,
-            row_group_1.compressed_size()
-        );
-        println!(
-            "Row group 2: {} rows, starts at byte {}, {} bytes compressed",
-            row_group_2.num_rows(),
-            rg2_start,
-            row_group_2.compressed_size()
-        );
-
-        let file_io = FileIO::new_with_fs();
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        // Task 1: read only the first row group
-        let task1 = FileScanTask {
-            file_size_in_bytes: std::fs::metadata(&file_path).unwrap().len(),
-            start: rg0_start,
-            length: row_group_0.compressed_size() as u64,
-            record_count: Some(100),
-            data_file_path: file_path.clone(),
-            data_file_format: DataFileFormat::Parquet,
-            schema: schema.clone(),
-            project_field_ids: vec![1],
-            predicate: None,
-            deletes: vec![],
-            partition: None,
-            partition_spec: None,
-            name_mapping: None,
-            case_sensitive: false,
-        };
-
-        // Task 2: read the second and third row groups
-        let task2 = FileScanTask {
-            file_size_in_bytes: std::fs::metadata(&file_path).unwrap().len(),
-            start: rg1_start,
-            length: file_end - rg1_start,
-            record_count: Some(200),
-            data_file_path: file_path.clone(),
-            data_file_format: DataFileFormat::Parquet,
-            schema: schema.clone(),
-            project_field_ids: vec![1],
-            predicate: None,
-            deletes: vec![],
-            partition: None,
-            partition_spec: None,
-            name_mapping: None,
-            case_sensitive: false,
-        };
-
-        let tasks1 = Box::pin(futures::stream::iter(vec![Ok(task1)])) as FileScanTaskStream;
-        let result1 = reader
-            .clone()
-            .read(tasks1)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        let total_rows_task1: usize = result1.iter().map(|b| b.num_rows()).sum();
-        println!(
-            "Task 1 (bytes {}-{}) returned {} rows",
-            rg0_start,
-            rg0_start + row_group_0.compressed_size() as u64,
-            total_rows_task1
-        );
-
-        let tasks2 = Box::pin(futures::stream::iter(vec![Ok(task2)])) as FileScanTaskStream;
-        let result2 = reader
-            .read(tasks2)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        let total_rows_task2: usize = result2.iter().map(|b| b.num_rows()).sum();
-        println!("Task 2 (bytes {rg1_start}-{file_end}) returned {total_rows_task2} rows");
-
-        assert_eq!(
-            total_rows_task1, 100,
-            "Task 1 should read only the first row group (100 rows), but got {total_rows_task1} rows"
-        );
-
-        assert_eq!(
-            total_rows_task2, 200,
-            "Task 2 should read only the second+third row groups (200 rows), but got {total_rows_task2} rows"
-        );
-
-        // Verify the actual data values are correct (not just the row count)
-        if total_rows_task1 > 0 {
-            let first_batch = &result1[0];
-            let id_col = first_batch
-                .column(0)
-                .as_primitive::<arrow_array::types::Int32Type>();
-            let first_val = id_col.value(0);
-            let last_val = id_col.value(id_col.len() - 1);
-            println!("Task 1 data range: {first_val} to {last_val}");
-
-            assert_eq!(first_val, 0, "Task 1 should start with id=0");
-            assert_eq!(last_val, 99, "Task 1 should end with id=99");
-        }
-
-        if total_rows_task2 > 0 {
-            let first_batch = &result2[0];
-            let id_col = first_batch
-                .column(0)
-                .as_primitive::<arrow_array::types::Int32Type>();
-            let first_val = id_col.value(0);
-            println!("Task 2 first value: {first_val}");
-
-            assert_eq!(first_val, 100, "Task 2 should start with id=100, not id=0");
-        }
-    }
-
-    /// Test schema evolution: reading old Parquet file (with only column 'a')
-    /// using a newer table schema (with columns 'a' and 'b').
-    /// This tests that:
-    /// 1. get_arrow_projection_mask allows missing columns
-    /// 2. RecordBatchTransformer adds missing column 'b' with NULL values
-    #[tokio::test]
-    async fn test_schema_evolution_add_column() {
-        use arrow_array::{Array, Int32Array};
-
-        // New table schema: columns 'a' and 'b' (b was added later, file only has 'a')
-        let new_schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(2)
-                .with_fields(vec![
-                    NestedField::required(1, "a", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::optional(2, "b", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        // Create Arrow schema for old Parquet file (only has column 'a')
-        let arrow_schema_old = Arc::new(ArrowSchema::new(vec![
-            Field::new("a", DataType::Int32, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-        ]));
-
-        // Write old Parquet file with only column 'a'
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        let data_a = Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef;
-        let to_write = RecordBatch::try_new(arrow_schema_old.clone(), vec![data_a]).unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-        let file = File::create(format!("{table_location}/old_file.parquet")).unwrap();
-        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
-        writer.write(&to_write).expect("Writing batch");
-        writer.close().unwrap();
-
-        // Read the old Parquet file using the NEW schema (with column 'b')
-        let reader = ArrowReaderBuilder::new(file_io).build();
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/old_file.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/old_file.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: new_schema.clone(),
-                project_field_ids: vec![1, 2], // Request both columns 'a' and 'b'
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        // Verify we got the correct data
-        assert_eq!(result.len(), 1);
-        let batch = &result[0];
-
-        // Should have 2 columns now
-        assert_eq!(batch.num_columns(), 2);
-        assert_eq!(batch.num_rows(), 3);
-
-        // Column 'a' should have the original data
-        let col_a = batch
-            .column(0)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(col_a.values(), &[1, 2, 3]);
-
-        // Column 'b' should be all NULLs (it didn't exist in the old file)
-        let col_b = batch
-            .column(1)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(col_b.null_count(), 3);
-        assert!(col_b.is_null(0));
-        assert!(col_b.is_null(1));
-        assert!(col_b.is_null(2));
-    }
-
-    /// Test for bug where position deletes in later row groups are not applied correctly.
-    ///
-    /// When a file has multiple row groups and a position delete targets a row in a later
-    /// row group, the `build_deletes_row_selection` function had a bug where it would
-    /// fail to increment `current_row_group_base_idx` when skipping row groups.
-    ///
-    /// This test creates:
-    /// - A data file with 200 rows split into 2 row groups (0-99, 100-199)
-    /// - A position delete file that deletes row 199 (last row in second row group)
-    ///
-    /// Expected behavior: Should return 199 rows (with id=200 deleted)
-    /// Bug behavior: Returns 200 rows (delete is not applied)
-    ///
-    /// This bug was discovered while running Apache Spark + Apache Iceberg integration tests
-    /// through DataFusion Comet. The following Iceberg Java tests failed due to this bug:
-    /// - `org.apache.iceberg.spark.extensions.TestMergeOnReadDelete::testDeleteWithMultipleRowGroupsParquet`
-    /// - `org.apache.iceberg.spark.extensions.TestMergeOnReadUpdate::testUpdateWithMultipleRowGroupsParquet`
-    #[tokio::test]
-    async fn test_position_delete_across_multiple_row_groups() {
-        use arrow_array::{Int32Array, Int64Array};
-        use parquet::file::reader::{FileReader, SerializedFileReader};
-
-        // Field IDs for positional delete schema
-        const FIELD_ID_POSITIONAL_DELETE_FILE_PATH: u64 = 2147483546;
-        const FIELD_ID_POSITIONAL_DELETE_POS: u64 = 2147483545;
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-
-        // Create table schema with a single 'id' column
-        let table_schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-        ]));
-
-        // Step 1: Create data file with 200 rows in 2 row groups
-        // Row group 0: rows 0-99 (ids 1-100)
-        // Row group 1: rows 100-199 (ids 101-200)
-        let data_file_path = format!("{table_location}/data.parquet");
-
-        let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
-            Int32Array::from_iter_values(1..=100),
-        )])
-        .unwrap();
-
-        let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
-            Int32Array::from_iter_values(101..=200),
-        )])
-        .unwrap();
-
-        // Force each batch into its own row group
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .set_max_row_group_row_count(Some(100))
-            .build();
-
-        let file = File::create(&data_file_path).unwrap();
-        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
-        writer.write(&batch1).expect("Writing batch 1");
-        writer.write(&batch2).expect("Writing batch 2");
-        writer.close().unwrap();
-
-        // Verify we created 2 row groups
-        let verify_file = File::open(&data_file_path).unwrap();
-        let verify_reader = SerializedFileReader::new(verify_file).unwrap();
-        assert_eq!(
-            verify_reader.metadata().num_row_groups(),
-            2,
-            "Should have 2 row groups"
-        );
-
-        // Step 2: Create position delete file that deletes row 199 (id=200, last row in row group 1)
-        let delete_file_path = format!("{table_location}/deletes.parquet");
-
-        let delete_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                FIELD_ID_POSITIONAL_DELETE_FILE_PATH.to_string(),
-            )])),
-            Field::new("pos", DataType::Int64, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                FIELD_ID_POSITIONAL_DELETE_POS.to_string(),
-            )])),
-        ]));
-
-        // Delete row at position 199 (0-indexed, so it's the last row: id=200)
-        let delete_batch = RecordBatch::try_new(delete_schema.clone(), vec![
-            Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])),
-            Arc::new(Int64Array::from_iter_values(vec![199i64])),
-        ])
-        .unwrap();
-
-        let delete_props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let delete_file = File::create(&delete_file_path).unwrap();
-        let mut delete_writer =
-            ArrowWriter::try_new(delete_file, delete_schema, Some(delete_props)).unwrap();
-        delete_writer.write(&delete_batch).unwrap();
-        delete_writer.close().unwrap();
-
-        // Step 3: Read the data file with the delete applied
-        let file_io = FileIO::new_with_fs();
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let task = FileScanTask {
-            file_size_in_bytes: std::fs::metadata(&data_file_path).unwrap().len(),
-            start: 0,
-            length: 0,
-            record_count: Some(200),
-            data_file_path: data_file_path.clone(),
-            data_file_format: DataFileFormat::Parquet,
-            schema: table_schema.clone(),
-            project_field_ids: vec![1],
-            predicate: None,
-            deletes: vec![FileScanTaskDeleteFile {
-                file_size_in_bytes: std::fs::metadata(&delete_file_path).unwrap().len(),
-                file_path: delete_file_path,
-                file_type: DataContentType::PositionDeletes,
-                partition_spec_id: 0,
-                equality_ids: None,
-            }],
-            partition: None,
-            partition_spec: None,
-            name_mapping: None,
-            case_sensitive: false,
-        };
-
-        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        // Step 4: Verify we got 199 rows (not 200)
-        let total_rows: usize = result.iter().map(|b| b.num_rows()).sum();
-
-        println!("Total rows read: {total_rows}");
-        println!("Expected: 199 rows (deleted row 199 which had id=200)");
-
-        // This assertion will FAIL before the fix and PASS after the fix
-        assert_eq!(
-            total_rows, 199,
-            "Expected 199 rows after deleting row 199, but got {total_rows} rows. \
-             The bug causes position deletes in later row groups to be ignored."
-        );
-
-        // Verify the deleted row (id=200) is not present
-        let all_ids: Vec<i32> = result
-            .iter()
-            .flat_map(|batch| {
-                batch
-                    .column(0)
-                    .as_primitive::<arrow_array::types::Int32Type>()
-                    .values()
-                    .iter()
-                    .copied()
-            })
-            .collect();
-
-        assert!(
-            !all_ids.contains(&200),
-            "Row with id=200 should be deleted but was found in results"
-        );
-
-        // Verify we have all other ids (1-199)
-        let expected_ids: Vec<i32> = (1..=199).collect();
-        assert_eq!(
-            all_ids, expected_ids,
-            "Should have ids 1-199 but got different values"
-        );
-    }
-
-    /// Test for bug where position deletes are lost when skipping unselected row groups.
-    ///
-    /// This is a variant of `test_position_delete_across_multiple_row_groups` that exercises
-    /// the row group selection code path (`selected_row_groups: Some([...])`).
-    ///
-    /// When a file has multiple row groups and only some are selected for reading,
-    /// the `build_deletes_row_selection` function must correctly skip over deletes in
-    /// unselected row groups WITHOUT consuming deletes that belong to selected row groups.
-    ///
-    /// This test creates:
-    /// - A data file with 200 rows split into 2 row groups (0-99, 100-199)
-    /// - A position delete file that deletes row 199 (last row in second row group)
-    /// - Row group selection that reads ONLY row group 1 (rows 100-199)
-    ///
-    /// Expected behavior: Should return 99 rows (with row 199 deleted)
-    /// Bug behavior: Returns 100 rows (delete is lost when skipping row group 0)
-    ///
-    /// The bug occurs when processing row group 0 (unselected):
-    /// ```rust
-    /// delete_vector_iter.advance_to(next_row_group_base_idx); // Position at first delete >= 100
-    /// next_deleted_row_idx_opt = delete_vector_iter.next(); // BUG: Consumes delete at 199!
-    /// ```
-    ///
-    /// The fix is to NOT call `next()` after `advance_to()` when skipping unselected row groups,
-    /// because `advance_to()` already positions the iterator correctly without consuming elements.
-    #[tokio::test]
-    async fn test_position_delete_with_row_group_selection() {
-        use arrow_array::{Int32Array, Int64Array};
-        use parquet::file::reader::{FileReader, SerializedFileReader};
-
-        // Field IDs for positional delete schema
-        const FIELD_ID_POSITIONAL_DELETE_FILE_PATH: u64 = 2147483546;
-        const FIELD_ID_POSITIONAL_DELETE_POS: u64 = 2147483545;
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-
-        // Create table schema with a single 'id' column
-        let table_schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-        ]));
-
-        // Step 1: Create data file with 200 rows in 2 row groups
-        // Row group 0: rows 0-99 (ids 1-100)
-        // Row group 1: rows 100-199 (ids 101-200)
-        let data_file_path = format!("{table_location}/data.parquet");
-
-        let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
-            Int32Array::from_iter_values(1..=100),
-        )])
-        .unwrap();
-
-        let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
-            Int32Array::from_iter_values(101..=200),
-        )])
-        .unwrap();
-
-        // Force each batch into its own row group
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .set_max_row_group_row_count(Some(100))
-            .build();
-
-        let file = File::create(&data_file_path).unwrap();
-        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
-        writer.write(&batch1).expect("Writing batch 1");
-        writer.write(&batch2).expect("Writing batch 2");
-        writer.close().unwrap();
-
-        // Verify we created 2 row groups
-        let verify_file = File::open(&data_file_path).unwrap();
-        let verify_reader = SerializedFileReader::new(verify_file).unwrap();
-        assert_eq!(
-            verify_reader.metadata().num_row_groups(),
-            2,
-            "Should have 2 row groups"
-        );
-
-        // Step 2: Create position delete file that deletes row 199 (id=200, last row in row group 1)
-        let delete_file_path = format!("{table_location}/deletes.parquet");
-
-        let delete_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                FIELD_ID_POSITIONAL_DELETE_FILE_PATH.to_string(),
-            )])),
-            Field::new("pos", DataType::Int64, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                FIELD_ID_POSITIONAL_DELETE_POS.to_string(),
-            )])),
-        ]));
-
-        // Delete row at position 199 (0-indexed, so it's the last row: id=200)
-        let delete_batch = RecordBatch::try_new(delete_schema.clone(), vec![
-            Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])),
-            Arc::new(Int64Array::from_iter_values(vec![199i64])),
-        ])
-        .unwrap();
-
-        let delete_props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let delete_file = File::create(&delete_file_path).unwrap();
-        let mut delete_writer =
-            ArrowWriter::try_new(delete_file, delete_schema, Some(delete_props)).unwrap();
-        delete_writer.write(&delete_batch).unwrap();
-        delete_writer.close().unwrap();
-
-        // Step 3: Get byte ranges to read ONLY row group 1 (rows 100-199)
-        // This exercises the row group selection code path where row group 0 is skipped
-        let metadata_file = File::open(&data_file_path).unwrap();
-        let metadata_reader = SerializedFileReader::new(metadata_file).unwrap();
-        let metadata = metadata_reader.metadata();
-
-        let row_group_0 = metadata.row_group(0);
-        let row_group_1 = metadata.row_group(1);
-
-        let rg0_start = 4u64; // Parquet files start with 4-byte magic "PAR1"
-        let rg1_start = rg0_start + row_group_0.compressed_size() as u64;
-        let rg1_length = row_group_1.compressed_size() as u64;
-
-        println!(
-            "Row group 0: starts at byte {}, {} bytes compressed",
-            rg0_start,
-            row_group_0.compressed_size()
-        );
-        println!(
-            "Row group 1: starts at byte {}, {} bytes compressed",
-            rg1_start,
-            row_group_1.compressed_size()
-        );
-
-        let file_io = FileIO::new_with_fs();
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        // Create FileScanTask that reads ONLY row group 1 via byte range filtering
-        let task = FileScanTask {
-            file_size_in_bytes: std::fs::metadata(&data_file_path).unwrap().len(),
-            start: rg1_start,
-            length: rg1_length,
-            record_count: Some(100), // Row group 1 has 100 rows
-            data_file_path: data_file_path.clone(),
-            data_file_format: DataFileFormat::Parquet,
-            schema: table_schema.clone(),
-            project_field_ids: vec![1],
-            predicate: None,
-            deletes: vec![FileScanTaskDeleteFile {
-                file_size_in_bytes: std::fs::metadata(&delete_file_path).unwrap().len(),
-                file_path: delete_file_path,
-                file_type: DataContentType::PositionDeletes,
-                partition_spec_id: 0,
-                equality_ids: None,
-            }],
-            partition: None,
-            partition_spec: None,
-            name_mapping: None,
-            case_sensitive: false,
-        };
-
-        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        // Step 4: Verify we got 99 rows (not 100)
-        // Row group 1 has 100 rows (ids 101-200), minus 1 delete (id=200) = 99 rows
-        let total_rows: usize = result.iter().map(|b| b.num_rows()).sum();
-
-        println!("Total rows read from row group 1: {total_rows}");
-        println!("Expected: 99 rows (row group 1 has 100 rows, 1 delete at position 199)");
-
-        // This assertion will FAIL before the fix and PASS after the fix
-        assert_eq!(
-            total_rows, 99,
-            "Expected 99 rows from row group 1 after deleting position 199, but got {total_rows} rows. \
-             The bug causes position deletes to be lost when advance_to() is followed by next() \
-             when skipping unselected row groups."
-        );
-
-        // Verify the deleted row (id=200) is not present
-        let all_ids: Vec<i32> = result
-            .iter()
-            .flat_map(|batch| {
-                batch
-                    .column(0)
-                    .as_primitive::<arrow_array::types::Int32Type>()
-                    .values()
-                    .iter()
-                    .copied()
-            })
-            .collect();
-
-        assert!(
-            !all_ids.contains(&200),
-            "Row with id=200 should be deleted but was found in results"
-        );
-
-        // Verify we have ids 101-199 (not 101-200)
-        let expected_ids: Vec<i32> = (101..=199).collect();
-        assert_eq!(
-            all_ids, expected_ids,
-            "Should have ids 101-199 but got different values"
-        );
-    }
-    /// Test for bug where stale cached delete causes infinite loop when skipping row groups.
-    ///
-    /// This test exposes the inverse scenario of `test_position_delete_with_row_group_selection`:
-    /// - Position delete targets a row in the SKIPPED row group (not the selected one)
-    /// - After calling advance_to(), the cached delete index is stale
-    /// - Without updating the cache, the code enters an infinite loop
-    ///
-    /// This test creates:
-    /// - A data file with 200 rows split into 2 row groups (0-99, 100-199)
-    /// - A position delete file that deletes row 0 (first row in SKIPPED row group 0)
-    /// - Row group selection that reads ONLY row group 1 (rows 100-199)
-    ///
-    /// The bug occurs when skipping row group 0:
-    /// ```rust
-    /// let mut next_deleted_row_idx_opt = delete_vector_iter.next(); // Some(0)
-    /// // ... skip to row group 1 ...
-    /// delete_vector_iter.advance_to(100); // Iterator advances past delete at 0
-    /// // BUG: next_deleted_row_idx_opt is still Some(0) - STALE!
-    /// // When processing row group 1:
-    /// //   current_idx = 100, next_deleted_row_idx = 0, next_row_group_base_idx = 200
-    /// //   Loop condition: 0 < 200 (true)
-    /// //   But: current_idx (100) > next_deleted_row_idx (0)
-    /// //   And: current_idx (100) != next_deleted_row_idx (0)
-    /// //   Neither branch executes -> INFINITE LOOP!
-    /// ```
-    ///
-    /// Expected behavior: Should return 100 rows (delete at 0 doesn't affect row group 1)
-    /// Bug behavior: Infinite loop in build_deletes_row_selection
-    #[tokio::test]
-    async fn test_position_delete_in_skipped_row_group() {
-        use arrow_array::{Int32Array, Int64Array};
-        use parquet::file::reader::{FileReader, SerializedFileReader};
-
-        // Field IDs for positional delete schema
-        const FIELD_ID_POSITIONAL_DELETE_FILE_PATH: u64 = 2147483546;
-        const FIELD_ID_POSITIONAL_DELETE_POS: u64 = 2147483545;
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-
-        // Create table schema with a single 'id' column
-        let table_schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-        ]));
-
-        // Step 1: Create data file with 200 rows in 2 row groups
-        // Row group 0: rows 0-99 (ids 1-100)
-        // Row group 1: rows 100-199 (ids 101-200)
-        let data_file_path = format!("{table_location}/data.parquet");
-
-        let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
-            Int32Array::from_iter_values(1..=100),
-        )])
-        .unwrap();
-
-        let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
-            Int32Array::from_iter_values(101..=200),
-        )])
-        .unwrap();
-
-        // Force each batch into its own row group
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .set_max_row_group_row_count(Some(100))
-            .build();
-
-        let file = File::create(&data_file_path).unwrap();
-        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
-        writer.write(&batch1).expect("Writing batch 1");
-        writer.write(&batch2).expect("Writing batch 2");
-        writer.close().unwrap();
-
-        // Verify we created 2 row groups
-        let verify_file = File::open(&data_file_path).unwrap();
-        let verify_reader = SerializedFileReader::new(verify_file).unwrap();
-        assert_eq!(
-            verify_reader.metadata().num_row_groups(),
-            2,
-            "Should have 2 row groups"
-        );
-
-        // Step 2: Create position delete file that deletes row 0 (id=1, first row in row group 0)
-        let delete_file_path = format!("{table_location}/deletes.parquet");
-
-        let delete_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                FIELD_ID_POSITIONAL_DELETE_FILE_PATH.to_string(),
-            )])),
-            Field::new("pos", DataType::Int64, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                FIELD_ID_POSITIONAL_DELETE_POS.to_string(),
-            )])),
-        ]));
-
-        // Delete row at position 0 (0-indexed, so it's the first row: id=1)
-        let delete_batch = RecordBatch::try_new(delete_schema.clone(), vec![
-            Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])),
-            Arc::new(Int64Array::from_iter_values(vec![0i64])),
-        ])
-        .unwrap();
-
-        let delete_props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let delete_file = File::create(&delete_file_path).unwrap();
-        let mut delete_writer =
-            ArrowWriter::try_new(delete_file, delete_schema, Some(delete_props)).unwrap();
-        delete_writer.write(&delete_batch).unwrap();
-        delete_writer.close().unwrap();
-
-        // Step 3: Get byte ranges to read ONLY row group 1 (rows 100-199)
-        // This exercises the row group selection code path where row group 0 is skipped
-        let metadata_file = File::open(&data_file_path).unwrap();
-        let metadata_reader = SerializedFileReader::new(metadata_file).unwrap();
-        let metadata = metadata_reader.metadata();
-
-        let row_group_0 = metadata.row_group(0);
-        let row_group_1 = metadata.row_group(1);
-
-        let rg0_start = 4u64; // Parquet files start with 4-byte magic "PAR1"
-        let rg1_start = rg0_start + row_group_0.compressed_size() as u64;
-        let rg1_length = row_group_1.compressed_size() as u64;
-
-        let file_io = FileIO::new_with_fs();
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        // Create FileScanTask that reads ONLY row group 1 via byte range filtering
-        let task = FileScanTask {
-            file_size_in_bytes: std::fs::metadata(&data_file_path).unwrap().len(),
-            start: rg1_start,
-            length: rg1_length,
-            record_count: Some(100), // Row group 1 has 100 rows
-            data_file_path: data_file_path.clone(),
-            data_file_format: DataFileFormat::Parquet,
-            schema: table_schema.clone(),
-            project_field_ids: vec![1],
-            predicate: None,
-            deletes: vec![FileScanTaskDeleteFile {
-                file_size_in_bytes: std::fs::metadata(&delete_file_path).unwrap().len(),
-                file_path: delete_file_path,
-                file_type: DataContentType::PositionDeletes,
-                partition_spec_id: 0,
-                equality_ids: None,
-            }],
-            partition: None,
-            partition_spec: None,
-            name_mapping: None,
-            case_sensitive: false,
-        };
-
-        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        // Step 4: Verify we got 100 rows (all of row group 1)
-        // The delete at position 0 is in row group 0, which is skipped, so it doesn't affect us
-        let total_rows: usize = result.iter().map(|b| b.num_rows()).sum();
-
-        assert_eq!(
-            total_rows, 100,
-            "Expected 100 rows from row group 1 (delete at position 0 is in skipped row group 0). \
-             If this hangs or fails, it indicates the cached delete index was not updated after advance_to()."
-        );
-
-        // Verify we have all ids from row group 1 (101-200)
-        let all_ids: Vec<i32> = result
-            .iter()
-            .flat_map(|batch| {
-                batch
-                    .column(0)
-                    .as_primitive::<arrow_array::types::Int32Type>()
-                    .values()
-                    .iter()
-                    .copied()
-            })
-            .collect();
-
-        let expected_ids: Vec<i32> = (101..=200).collect();
-        assert_eq!(
-            all_ids, expected_ids,
-            "Should have ids 101-200 (all of row group 1)"
-        );
-    }
-
-    /// Test reading Parquet files without field ID metadata (e.g., migrated tables).
-    /// This exercises the position-based fallback path.
-    ///
-    /// Corresponds to Java's ParquetSchemaUtil.addFallbackIds() + pruneColumnsFallback()
-    /// in /parquet/src/main/java/org/apache/iceberg/parquet/ParquetSchemaUtil.java
-    #[tokio::test]
-    async fn test_read_parquet_file_without_field_ids() {
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "name", Type::Primitive(PrimitiveType::String)).into(),
-                    NestedField::required(2, "age", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        // Parquet file from a migrated table - no field ID metadata
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("name", DataType::Utf8, false),
-            Field::new("age", DataType::Int32, false),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        let name_data = vec!["Alice", "Bob", "Charlie"];
-        let age_data = vec![30, 25, 35];
-
-        use arrow_array::Int32Array;
-        let name_col = Arc::new(StringArray::from(name_data.clone())) as ArrayRef;
-        let age_col = Arc::new(Int32Array::from(age_data.clone())) as ArrayRef;
-
-        let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![name_col, age_col]).unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
-        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
-
-        writer.write(&to_write).expect("Writing batch");
-        writer.close().unwrap();
-
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 1);
-        let batch = &result[0];
-        assert_eq!(batch.num_rows(), 3);
-        assert_eq!(batch.num_columns(), 2);
-
-        // Verify position-based mapping: field_id 1 → position 0, field_id 2 → position 1
-        let name_array = batch.column(0).as_string::<i32>();
-        assert_eq!(name_array.value(0), "Alice");
-        assert_eq!(name_array.value(1), "Bob");
-        assert_eq!(name_array.value(2), "Charlie");
-
-        let age_array = batch
-            .column(1)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(age_array.value(0), 30);
-        assert_eq!(age_array.value(1), 25);
-        assert_eq!(age_array.value(2), 35);
-    }
-
-    /// Test reading Parquet files without field IDs with partial projection.
-    /// Only a subset of columns are requested, verifying position-based fallback
-    /// handles column selection correctly.
-    #[tokio::test]
-    async fn test_read_parquet_without_field_ids_partial_projection() {
-        use arrow_array::Int32Array;
-
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "col1", Type::Primitive(PrimitiveType::String)).into(),
-                    NestedField::required(2, "col2", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::required(3, "col3", Type::Primitive(PrimitiveType::String)).into(),
-                    NestedField::required(4, "col4", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("col1", DataType::Utf8, false),
-            Field::new("col2", DataType::Int32, false),
-            Field::new("col3", DataType::Utf8, false),
-            Field::new("col4", DataType::Int32, false),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        let col1_data = Arc::new(StringArray::from(vec!["a", "b"])) as ArrayRef;
-        let col2_data = Arc::new(Int32Array::from(vec![10, 20])) as ArrayRef;
-        let col3_data = Arc::new(StringArray::from(vec!["c", "d"])) as ArrayRef;
-        let col4_data = Arc::new(Int32Array::from(vec![30, 40])) as ArrayRef;
-
-        let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![
-            col1_data, col2_data, col3_data, col4_data,
-        ])
-        .unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
-        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
-
-        writer.write(&to_write).expect("Writing batch");
-        writer.close().unwrap();
-
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 3],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 1);
-        let batch = &result[0];
-        assert_eq!(batch.num_rows(), 2);
-        assert_eq!(batch.num_columns(), 2);
-
-        let col1_array = batch.column(0).as_string::<i32>();
-        assert_eq!(col1_array.value(0), "a");
-        assert_eq!(col1_array.value(1), "b");
-
-        let col3_array = batch.column(1).as_string::<i32>();
-        assert_eq!(col3_array.value(0), "c");
-        assert_eq!(col3_array.value(1), "d");
-    }
-
-    /// Test reading Parquet files without field IDs with schema evolution.
-    /// The Iceberg schema has more fields than the Parquet file, testing that
-    /// missing columns are filled with NULLs.
-    #[tokio::test]
-    async fn test_read_parquet_without_field_ids_schema_evolution() {
-        use arrow_array::{Array, Int32Array};
-
-        // Schema with field 3 added after the file was written
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "name", Type::Primitive(PrimitiveType::String)).into(),
-                    NestedField::required(2, "age", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::optional(3, "city", Type::Primitive(PrimitiveType::String)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("name", DataType::Utf8, false),
-            Field::new("age", DataType::Int32, false),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        let name_data = Arc::new(StringArray::from(vec!["Alice", "Bob"])) as ArrayRef;
-        let age_data = Arc::new(Int32Array::from(vec![30, 25])) as ArrayRef;
-
-        let to_write =
-            RecordBatch::try_new(arrow_schema.clone(), vec![name_data, age_data]).unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
-        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
-
-        writer.write(&to_write).expect("Writing batch");
-        writer.close().unwrap();
-
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2, 3],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 1);
-        let batch = &result[0];
-        assert_eq!(batch.num_rows(), 2);
-        assert_eq!(batch.num_columns(), 3);
-
-        let name_array = batch.column(0).as_string::<i32>();
-        assert_eq!(name_array.value(0), "Alice");
-        assert_eq!(name_array.value(1), "Bob");
-
-        let age_array = batch
-            .column(1)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(age_array.value(0), 30);
-        assert_eq!(age_array.value(1), 25);
-
-        // Verify missing column filled with NULLs
-        let city_array = batch.column(2).as_string::<i32>();
-        assert_eq!(city_array.null_count(), 2);
-        assert!(city_array.is_null(0));
-        assert!(city_array.is_null(1));
-    }
-
-    /// Test reading Parquet files without field IDs that have multiple row groups.
-    /// This ensures the position-based fallback works correctly across row group boundaries.
-    #[tokio::test]
-    async fn test_read_parquet_without_field_ids_multiple_row_groups() {
-        use arrow_array::Int32Array;
-
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "name", Type::Primitive(PrimitiveType::String)).into(),
-                    NestedField::required(2, "value", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("name", DataType::Utf8, false),
-            Field::new("value", DataType::Int32, false),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        // Small row group size to create multiple row groups
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .set_write_batch_size(2)
-            .set_max_row_group_row_count(Some(2))
-            .build();
-
-        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
-        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
-
-        // Write 6 rows in 3 batches (will create 3 row groups)
-        for batch_num in 0..3 {
-            let name_data = Arc::new(StringArray::from(vec![
-                format!("name_{}", batch_num * 2),
-                format!("name_{}", batch_num * 2 + 1),
-            ])) as ArrayRef;
-            let value_data =
-                Arc::new(Int32Array::from(vec![batch_num * 2, batch_num * 2 + 1])) as ArrayRef;
-
-            let batch =
-                RecordBatch::try_new(arrow_schema.clone(), vec![name_data, value_data]).unwrap();
-            writer.write(&batch).expect("Writing batch");
-        }
-        writer.close().unwrap();
-
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        assert!(!result.is_empty());
-
-        let mut all_names = Vec::new();
-        let mut all_values = Vec::new();
-
-        for batch in &result {
-            let name_array = batch.column(0).as_string::<i32>();
-            let value_array = batch
-                .column(1)
-                .as_primitive::<arrow_array::types::Int32Type>();
-
-            for i in 0..batch.num_rows() {
-                all_names.push(name_array.value(i).to_string());
-                all_values.push(value_array.value(i));
-            }
-        }
-
-        assert_eq!(all_names.len(), 6);
-        assert_eq!(all_values.len(), 6);
-
-        for i in 0..6 {
-            assert_eq!(all_names[i], format!("name_{i}"));
-            assert_eq!(all_values[i], i as i32);
-        }
-    }
-
-    /// Test reading Parquet files without field IDs with nested types (struct).
-    /// Java's pruneColumnsFallback() projects entire top-level columns including nested content.
-    /// This test verifies that a top-level struct field is projected correctly with all its nested fields.
-    #[tokio::test]
-    async fn test_read_parquet_without_field_ids_with_struct() {
-        use arrow_array::{Int32Array, StructArray};
-        use arrow_schema::Fields;
-
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::required(
-                        2,
-                        "person",
-                        Type::Struct(crate::spec::StructType::new(vec![
-                            NestedField::required(
-                                3,
-                                "name",
-                                Type::Primitive(PrimitiveType::String),
-                            )
-                            .into(),
-                            NestedField::required(4, "age", Type::Primitive(PrimitiveType::Int))
-                                .into(),
-                        ])),
-                    )
-                    .into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new(
-                "person",
-                DataType::Struct(Fields::from(vec![
-                    Field::new("name", DataType::Utf8, false),
-                    Field::new("age", DataType::Int32, false),
-                ])),
-                false,
-            ),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        let id_data = Arc::new(Int32Array::from(vec![1, 2])) as ArrayRef;
-        let name_data = Arc::new(StringArray::from(vec!["Alice", "Bob"])) as ArrayRef;
-        let age_data = Arc::new(Int32Array::from(vec![30, 25])) as ArrayRef;
-        let person_data = Arc::new(StructArray::from(vec![
-            (
-                Arc::new(Field::new("name", DataType::Utf8, false)),
-                name_data,
-            ),
-            (
-                Arc::new(Field::new("age", DataType::Int32, false)),
-                age_data,
-            ),
-        ])) as ArrayRef;
-
-        let to_write =
-            RecordBatch::try_new(arrow_schema.clone(), vec![id_data, person_data]).unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
-        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
-
-        writer.write(&to_write).expect("Writing batch");
-        writer.close().unwrap();
-
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 1);
-        let batch = &result[0];
-        assert_eq!(batch.num_rows(), 2);
-        assert_eq!(batch.num_columns(), 2);
-
-        let id_array = batch
-            .column(0)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(id_array.value(0), 1);
-        assert_eq!(id_array.value(1), 2);
-
-        let person_array = batch.column(1).as_struct();
-        assert_eq!(person_array.num_columns(), 2);
-
-        let name_array = person_array.column(0).as_string::<i32>();
-        assert_eq!(name_array.value(0), "Alice");
-        assert_eq!(name_array.value(1), "Bob");
-
-        let age_array = person_array
-            .column(1)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(age_array.value(0), 30);
-        assert_eq!(age_array.value(1), 25);
-    }
-
-    /// Test reading Parquet files without field IDs with schema evolution - column added in the middle.
-    /// When a new column is inserted between existing columns in the schema order,
-    /// the fallback projection must correctly map field IDs to output positions.
-    #[tokio::test]
-    async fn test_read_parquet_without_field_ids_schema_evolution_add_column_in_middle() {
-        use arrow_array::{Array, Int32Array};
-
-        let arrow_schema_old = Arc::new(ArrowSchema::new(vec![
-            Field::new("col0", DataType::Int32, true),
-            Field::new("col1", DataType::Int32, true),
-        ]));
-
-        // New column added between existing columns: col0 (id=1), newCol (id=5), col1 (id=2)
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::optional(1, "col0", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::optional(5, "newCol", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::optional(2, "col1", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        let col0_data = Arc::new(Int32Array::from(vec![1, 2])) as ArrayRef;
-        let col1_data = Arc::new(Int32Array::from(vec![10, 20])) as ArrayRef;
-
-        let to_write =
-            RecordBatch::try_new(arrow_schema_old.clone(), vec![col0_data, col1_data]).unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
-        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
-        writer.write(&to_write).expect("Writing batch");
-        writer.close().unwrap();
-
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 5, 2],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 1);
-        let batch = &result[0];
-        assert_eq!(batch.num_rows(), 2);
-        assert_eq!(batch.num_columns(), 3);
-
-        let result_col0 = batch
-            .column(0)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(result_col0.value(0), 1);
-        assert_eq!(result_col0.value(1), 2);
-
-        // New column should be NULL (doesn't exist in old file)
-        let result_newcol = batch
-            .column(1)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(result_newcol.null_count(), 2);
-        assert!(result_newcol.is_null(0));
-        assert!(result_newcol.is_null(1));
-
-        let result_col1 = batch
-            .column(2)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(result_col1.value(0), 10);
-        assert_eq!(result_col1.value(1), 20);
-    }
-
-    /// Test reading Parquet files without field IDs with a filter that eliminates all row groups.
-    /// During development of field ID mapping, we saw a panic when row_selection_enabled=true and
-    /// all row groups are filtered out.
-    #[tokio::test]
-    async fn test_read_parquet_without_field_ids_filter_eliminates_all_rows() {
-        use arrow_array::{Float64Array, Int32Array};
-
-        // Schema with fields that will use fallback IDs 1, 2, 3
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::required(2, "name", Type::Primitive(PrimitiveType::String)).into(),
-                    NestedField::required(3, "value", Type::Primitive(PrimitiveType::Double))
-                        .into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("name", DataType::Utf8, false),
-            Field::new("value", DataType::Float64, false),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        // Write data where all ids are >= 10
-        let id_data = Arc::new(Int32Array::from(vec![10, 11, 12])) as ArrayRef;
-        let name_data = Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef;
-        let value_data = Arc::new(Float64Array::from(vec![100.0, 200.0, 300.0])) as ArrayRef;
-
-        let to_write =
-            RecordBatch::try_new(arrow_schema.clone(), vec![id_data, name_data, value_data])
-                .unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
-        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
-        writer.write(&to_write).expect("Writing batch");
-        writer.close().unwrap();
-
-        // Filter that eliminates all row groups: id < 5
-        let predicate = Reference::new("id").less_than(Datum::int(5));
-
-        // Enable both row_group_filtering and row_selection - triggered the panic
-        let reader = ArrowReaderBuilder::new(file_io)
-            .with_row_group_filtering_enabled(true)
-            .with_row_selection_enabled(true)
-            .build();
-
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2, 3],
-                predicate: Some(predicate.bind(schema, true).unwrap()),
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        // Should no longer panic
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        // Should return empty results
-        assert!(result.is_empty() || result.iter().all(|batch| batch.num_rows() == 0));
-    }
-
-    /// Test that concurrency=1 reads all files correctly and in deterministic order.
-    /// This verifies the fast-path optimization for single concurrency.
-    #[tokio::test]
-    async fn test_read_with_concurrency_one() {
-        use arrow_array::Int32Array;
-
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::required(2, "file_num", Type::Primitive(PrimitiveType::Int))
-                        .into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-            Field::new("file_num", DataType::Int32, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "2".to_string(),
-            )])),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        // Create 3 parquet files with different data
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        for file_num in 0..3 {
-            let id_data = Arc::new(Int32Array::from_iter_values(
-                file_num * 10..(file_num + 1) * 10,
-            )) as ArrayRef;
-            let file_num_data = Arc::new(Int32Array::from(vec![file_num; 10])) as ArrayRef;
-
-            let to_write =
-                RecordBatch::try_new(arrow_schema.clone(), vec![id_data, file_num_data]).unwrap();
-
-            let file = File::create(format!("{table_location}/file_{file_num}.parquet")).unwrap();
-            let mut writer =
-                ArrowWriter::try_new(file, to_write.schema(), Some(props.clone())).unwrap();
-            writer.write(&to_write).expect("Writing batch");
-            writer.close().unwrap();
-        }
-
-        // Read with concurrency=1 (fast-path)
-        let reader = ArrowReaderBuilder::new(file_io)
-            .with_data_file_concurrency_limit(1)
-            .build();
-
-        // Create tasks in a specific order: file_0, file_1, file_2
-        let tasks = vec![
-            Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/file_0.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/file_0.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            }),
-            Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/file_1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/file_1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            }),
-            Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/file_2.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/file_2.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            }),
-        ];
-
-        let tasks_stream = Box::pin(futures::stream::iter(tasks)) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks_stream)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        // Verify we got all 30 rows (10 from each file)
-        let total_rows: usize = result.iter().map(|b| b.num_rows()).sum();
-        assert_eq!(total_rows, 30, "Should have 30 total rows");
-
-        // Collect all ids and file_nums to verify data
-        let mut all_ids = Vec::new();
-        let mut all_file_nums = Vec::new();
-
-        for batch in &result {
-            let id_col = batch
-                .column(0)
-                .as_primitive::<arrow_array::types::Int32Type>();
-            let file_num_col = batch
-                .column(1)
-                .as_primitive::<arrow_array::types::Int32Type>();
-
-            for i in 0..batch.num_rows() {
-                all_ids.push(id_col.value(i));
-                all_file_nums.push(file_num_col.value(i));
-            }
-        }
-
-        assert_eq!(all_ids.len(), 30);
-        assert_eq!(all_file_nums.len(), 30);
-
-        // With concurrency=1 and sequential processing, files should be processed in order
-        // file_0: ids 0-9, file_num=0
-        // file_1: ids 10-19, file_num=1
-        // file_2: ids 20-29, file_num=2
-        for i in 0..10 {
-            assert_eq!(all_file_nums[i], 0, "First 10 rows should be from file_0");
-            assert_eq!(all_ids[i], i as i32, "IDs should be 0-9");
-        }
-        for i in 10..20 {
-            assert_eq!(all_file_nums[i], 1, "Next 10 rows should be from file_1");
-            assert_eq!(all_ids[i], i as i32, "IDs should be 10-19");
-        }
-        for i in 20..30 {
-            assert_eq!(all_file_nums[i], 2, "Last 10 rows should be from file_2");
-            assert_eq!(all_ids[i], i as i32, "IDs should be 20-29");
-        }
-    }
-
-    /// Test bucket partitioning reads source column from data file (not partition metadata).
-    ///
-    /// This is an integration test verifying the complete ArrowReader pipeline with bucket partitioning.
-    /// It corresponds to TestRuntimeFiltering tests in Iceberg Java (e.g., testRenamedSourceColumnTable).
-    ///
-    /// # Iceberg Spec Requirements
-    ///
-    /// Per the Iceberg spec "Column Projection" section:
-    /// > "Return the value from partition metadata if an **Identity Transform** exists for the field"
-    ///
-    /// This means:
-    /// - Identity transforms (e.g., `identity(dept)`) use constants from partition metadata
-    /// - Non-identity transforms (e.g., `bucket(4, id)`) must read source columns from data files
-    /// - Partition metadata for bucket transforms stores bucket numbers (0-3), NOT source values
-    ///
-    /// Java's PartitionUtil.constantsMap() implements this via:
-    /// ```java
-    /// if (field.transform().isIdentity()) {
-    ///     idToConstant.put(field.sourceId(), converted);
-    /// }
-    /// ```
-    ///
-    /// # What This Test Verifies
-    ///
-    /// This test ensures the full ArrowReader → RecordBatchTransformer pipeline correctly handles
-    /// bucket partitioning when FileScanTask provides partition_spec and partition_data:
-    ///
-    /// - Parquet file has field_id=1 named "id" with actual data [1, 5, 9, 13]
-    /// - FileScanTask specifies partition_spec with bucket(4, id) and partition_data with bucket=1
-    /// - RecordBatchTransformer.constants_map() excludes bucket-partitioned field from constants
-    /// - ArrowReader correctly reads [1, 5, 9, 13] from the data file
-    /// - Values are NOT replaced with constant 1 from partition metadata
-    ///
-    /// # Why This Matters
-    ///
-    /// Without correct handling:
-    /// - Runtime filtering would break (e.g., `WHERE id = 5` would fail)
-    /// - Query results would be incorrect (all rows would have id=1)
-    /// - Bucket partitioning would be unusable for query optimization
-    ///
-    /// # References
-    /// - Iceberg spec: format/spec.md "Column Projection" + "Partition Transforms"
-    /// - Java test: spark/src/test/java/.../TestRuntimeFiltering.java
-    /// - Java impl: core/src/main/java/org/apache/iceberg/util/PartitionUtil.java
-    #[tokio::test]
-    async fn test_bucket_partitioning_reads_source_column_from_file() {
-        use arrow_array::Int32Array;
-
-        use crate::spec::{Literal, PartitionSpec, Struct, Transform};
-
-        // Iceberg schema with id and name columns
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(0)
-                .with_fields(vec![
-                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::optional(2, "name", Type::Primitive(PrimitiveType::String)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        // Partition spec: bucket(4, id)
-        let partition_spec = Arc::new(
-            PartitionSpec::builder(schema.clone())
-                .with_spec_id(0)
-                .add_partition_field("id", "id_bucket", Transform::Bucket(4))
-                .unwrap()
-                .build()
-                .unwrap(),
-        );
-
-        // Partition data: bucket value is 1
-        let partition_data = Struct::from_iter(vec![Some(Literal::int(1))]);
-
-        // Create Arrow schema with field IDs for Parquet file
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-            Field::new("name", DataType::Utf8, true).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "2".to_string(),
-            )])),
-        ]));
-
-        // Write Parquet file with data
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        let id_data = Arc::new(Int32Array::from(vec![1, 5, 9, 13])) as ArrayRef;
-        let name_data =
-            Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie", "Dave"])) as ArrayRef;
-
-        let to_write =
-            RecordBatch::try_new(arrow_schema.clone(), vec![id_data, name_data]).unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-        let file = File::create(format!("{}/data.parquet", &table_location)).unwrap();
-        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
-        writer.write(&to_write).expect("Writing batch");
-        writer.close().unwrap();
-
-        // Read the Parquet file with partition spec and data
-        let reader = ArrowReaderBuilder::new(file_io).build();
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/data.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/data.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2],
-                predicate: None,
-                deletes: vec![],
-                partition: Some(partition_data),
-                partition_spec: Some(partition_spec),
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        // Verify we got the correct data
-        assert_eq!(result.len(), 1);
-        let batch = &result[0];
-
-        assert_eq!(batch.num_columns(), 2);
-        assert_eq!(batch.num_rows(), 4);
-
-        // The id column MUST contain actual values from the Parquet file [1, 5, 9, 13],
-        // NOT the constant partition value 1
-        let id_col = batch
-            .column(0)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(id_col.value(0), 1);
-        assert_eq!(id_col.value(1), 5);
-        assert_eq!(id_col.value(2), 9);
-        assert_eq!(id_col.value(3), 13);
-
-        let name_col = batch.column(1).as_string::<i32>();
-        assert_eq!(name_col.value(0), "Alice");
-        assert_eq!(name_col.value(1), "Bob");
-        assert_eq!(name_col.value(2), "Charlie");
-        assert_eq!(name_col.value(3), "Dave");
-    }
-
-    #[test]
-    fn test_merge_ranges_empty() {
-        assert_eq!(super::merge_ranges(&[], 1024), Vec::<Range<u64>>::new());
-    }
-
-    #[test]
-    fn test_merge_ranges_no_coalesce() {
-        // Ranges far apart should not be merged
-        let ranges = vec![0..100, 1_000_000..1_000_100];
-        let merged = super::merge_ranges(&ranges, 1024);
-        assert_eq!(merged, vec![0..100, 1_000_000..1_000_100]);
-    }
-
-    #[test]
-    fn test_merge_ranges_coalesce() {
-        // Ranges within the gap threshold should be merged
-        let ranges = vec![0..100, 200..300, 500..600];
-        let merged = super::merge_ranges(&ranges, 1024);
-        assert_eq!(merged, vec![0..600]);
-    }
-
-    #[test]
-    fn test_merge_ranges_overlapping() {
-        let ranges = vec![0..200, 100..300];
-        let merged = super::merge_ranges(&ranges, 0);
-        assert_eq!(merged, vec![0..300]);
-    }
-
-    #[test]
-    fn test_merge_ranges_unsorted() {
-        let ranges = vec![500..600, 0..100, 200..300];
-        let merged = super::merge_ranges(&ranges, 1024);
-        assert_eq!(merged, vec![0..600]);
-    }
-
-    /// Mock FileRead backed by a flat byte buffer.
-    struct MockFileRead {
-        data: bytes::Bytes,
-    }
-
-    impl MockFileRead {
-        fn new(size: usize) -> Self {
-            // Fill with sequential byte values so slices are verifiable.
-            let data: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
-            Self {
-                data: bytes::Bytes::from(data),
-            }
-        }
-    }
-
-    #[async_trait::async_trait]
-    impl crate::io::FileRead for MockFileRead {
-        async fn read(&self, range: Range<u64>) -> crate::Result<bytes::Bytes> {
-            Ok(self.data.slice(range.start as usize..range.end as usize))
-        }
-    }
-
-    #[tokio::test]
-    async fn test_get_byte_ranges_no_coalesce() {
-        use parquet::arrow::async_reader::AsyncFileReader;
-
-        let mock = MockFileRead::new(2048);
-        let expected_0 = mock.data.slice(0..100);
-        let expected_1 = mock.data.slice(1500..1600);
-
-        let mut reader =
-            super::ArrowFileReader::new(crate::io::FileMetadata { size: 2048 }, Box::new(mock))
-                .with_parquet_read_options(
-                    super::ParquetReadOptions::builder()
-                        .with_range_coalesce_bytes(0)
-                        .build(),
-                );
-
-        let result = reader
-            .get_byte_ranges(vec![0..100, 1500..1600])
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 2);
-        assert_eq!(result[0], expected_0);
-        assert_eq!(result[1], expected_1);
-    }
-
-    #[tokio::test]
-    async fn test_get_byte_ranges_with_coalesce() {
-        use parquet::arrow::async_reader::AsyncFileReader;
-
-        let mock = MockFileRead::new(1024);
-        let expected_0 = mock.data.slice(0..100);
-        let expected_1 = mock.data.slice(200..300);
-        let expected_2 = mock.data.slice(500..600);
-
-        let mut reader =
-            super::ArrowFileReader::new(crate::io::FileMetadata { size: 1024 }, Box::new(mock))
-                .with_parquet_read_options(
-                    super::ParquetReadOptions::builder()
-                        .with_range_coalesce_bytes(1024)
-                        .build(),
-                );
-
-        // All ranges within coalesce threshold — should merge into one fetch.
-        let result = reader
-            .get_byte_ranges(vec![0..100, 200..300, 500..600])
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 3);
-        assert_eq!(result[0], expected_0);
-        assert_eq!(result[1], expected_1);
-        assert_eq!(result[2], expected_2);
-    }
-
-    #[tokio::test]
-    async fn test_get_byte_ranges_empty() {
-        use parquet::arrow::async_reader::AsyncFileReader;
-
-        let mock = MockFileRead::new(1024);
-        let mut reader =
-            super::ArrowFileReader::new(crate::io::FileMetadata { size: 1024 }, Box::new(mock));
-
-        let result = reader.get_byte_ranges(vec![]).await.unwrap();
-        assert!(result.is_empty());
-    }
-
-    #[tokio::test]
-    async fn test_get_byte_ranges_coalesce_max() {
-        use parquet::arrow::async_reader::AsyncFileReader;
-
-        let mock = MockFileRead::new(2048);
-        let expected_0 = mock.data.slice(0..100);
-        let expected_1 = mock.data.slice(1500..1600);
-
-        let mut reader =
-            super::ArrowFileReader::new(crate::io::FileMetadata { size: 2048 }, Box::new(mock))
-                .with_parquet_read_options(
-                    super::ParquetReadOptions::builder()
-                        .with_range_coalesce_bytes(u64::MAX)
-                        .build(),
-                );
-
-        // u64::MAX coalesce — all ranges merge into a single fetch.
-        let result = reader
-            .get_byte_ranges(vec![0..100, 1500..1600])
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 2);
-        assert_eq!(result[0], expected_0);
-        assert_eq!(result[1], expected_1);
-    }
-
-    #[tokio::test]
-    async fn test_get_byte_ranges_concurrency_zero() {
-        use parquet::arrow::async_reader::AsyncFileReader;
-
-        // concurrency=0 is clamped to 1, so this should not hang.
-        let mock = MockFileRead::new(1024);
-        let expected = mock.data.slice(0..100);
-
-        let mut reader =
-            super::ArrowFileReader::new(crate::io::FileMetadata { size: 1024 }, Box::new(mock))
-                .with_parquet_read_options(
-                    super::ParquetReadOptions::builder()
-                        .with_range_fetch_concurrency(0)
-                        .build(),
-                );
-
-        let result = reader
-            .get_byte_ranges(vec![0..100, 200..300])
-            .await
-            .unwrap();
-        assert_eq!(result.len(), 2);
-        assert_eq!(result[0], expected);
-    }
-
-    #[tokio::test]
-    async fn test_get_byte_ranges_concurrency_one() {
-        use parquet::arrow::async_reader::AsyncFileReader;
-
-        let mock = MockFileRead::new(2048);
-        let expected_0 = mock.data.slice(0..100);
-        let expected_1 = mock.data.slice(500..600);
-        let expected_2 = mock.data.slice(1500..1600);
-
-        let mut reader =
-            super::ArrowFileReader::new(crate::io::FileMetadata { size: 2048 }, Box::new(mock))
-                .with_parquet_read_options(
-                    super::ParquetReadOptions::builder()
-                        .with_range_coalesce_bytes(0)
-                        .with_range_fetch_concurrency(1)
-                        .build(),
-                );
-
-        // concurrency=1 with no coalescing — sequential fetches.
-        let result = reader
-            .get_byte_ranges(vec![0..100, 500..600, 1500..1600])
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 3);
-        assert_eq!(result[0], expected_0);
-        assert_eq!(result[1], expected_1);
-        assert_eq!(result[2], expected_2);
-    }
-
-    /// Test that a Parquet file written with Arrow Binary type can be read when the
-    /// Iceberg schema declares the column as Fixed(N).
-    ///
-    /// This reproduces a real-world issue where Snowflake writes `FIXED_LEN_BYTE_ARRAY`
-    /// columns that the Arrow Parquet reader decodes as `Binary` rather than
-    /// `FixedSizeBinary(N)`. Without the `(Binary, Fixed(_))` arm in
-    /// `type_promotion_is_valid`, the column is silently excluded from projection and
-    /// filled with nulls.
-    #[tokio::test]
-    async fn test_binary_to_fixed_type_promotion() {
-        // UUID-like 16-byte values
-        let uuid_bytes: Vec<[u8; 16]> = vec![
-            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
-            [
-                0xA1, 0xB2, 0xC3, 0xD4, 0xE5, 0xF6, 0x07, 0x18, 0x29, 0x3A, 0x4B, 0x5C, 0x6D, 0x7E,
-                0x8F, 0x90,
-            ],
-            [0xFF; 16],
-        ];
-        let int_data = vec![1i32, 2, 3];
-
-        // Iceberg schema: field 1 = Int, field 2 = Fixed(16)
-        let iceberg_schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::optional(2, "uuid_col", Type::Primitive(PrimitiveType::Fixed(16)))
-                        .into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        // Arrow schema: write uuid_col as Binary (not FixedSizeBinary), simulating
-        // what the Arrow Parquet reader produces for some writers (e.g. Snowflake).
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-            Field::new("uuid_col", DataType::Binary, true).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "2".to_string(),
-            )])),
-        ]));
-
-        let id_col = Arc::new(Int32Array::from(int_data.clone())) as ArrayRef;
-        let uuid_col = Arc::new(BinaryArray::from_vec(
-            uuid_bytes.iter().map(|b| b.as_slice()).collect(),
-        )) as ArrayRef;
-
-        let batch = RecordBatch::try_new(arrow_schema.clone(), vec![id_col, uuid_col]).unwrap();
-
-        // Write Parquet file
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let parquet_path = format!("{table_location}/1.parquet");
-        let file = File::create(&parquet_path).unwrap();
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-        let mut writer = ArrowWriter::try_new(file, batch.schema(), Some(props)).unwrap();
-        writer.write(&batch).unwrap();
-        writer.close().unwrap();
-
-        let file_io = FileIO::new_with_fs();
-        let file_size = std::fs::metadata(&parquet_path).unwrap().len();
-        let reader = ArrowReaderBuilder::new(file_io.clone()).build();
-
-        // --- Test 1: Full scan (all columns projected) ---
-        // This is the case that previously failed.
-        {
-            let tasks = Box::pin(futures::stream::iter(vec![Ok(FileScanTask {
-                file_size_in_bytes: file_size,
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: parquet_path.clone(),
-                data_file_format: DataFileFormat::Parquet,
-                schema: iceberg_schema.clone(),
-                project_field_ids: vec![1, 2],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })])) as FileScanTaskStream;
-
-            let batches: Vec<RecordBatch> =
-                reader.read(tasks).unwrap().try_collect().await.unwrap();
-
-            assert_eq!(batches.len(), 1);
-            let result = &batches[0];
-            assert_eq!(result.num_rows(), 3);
-            assert_eq!(result.num_columns(), 2);
-
-            // Verify id column
-            let id_arr = result
-                .column_by_name("id")
-                .unwrap()
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap();
-            assert_eq!(id_arr.values(), &int_data);
-
-            // Verify uuid_col: data must come through as Binary, preserving every byte
-            let uuid_arr = result.column_by_name("uuid_col").unwrap();
-            assert_eq!(uuid_arr.null_count(), 0, "uuid_col should have no nulls");
-            // The transformer may cast Binary -> FixedSizeBinary to match the target schema
-            let uuid_values: Vec<&[u8]> =
-                if let Some(bin) = uuid_arr.as_any().downcast_ref::<BinaryArray>() {
-                    (0..bin.len()).map(|i| bin.value(i)).collect()
-                } else if let Some(fsb) = uuid_arr.as_any().downcast_ref::<FixedSizeBinaryArray>() {
-                    (0..fsb.len()).map(|i| fsb.value(i)).collect()
-                } else {
-                    panic!("uuid_col has unexpected type: {}", uuid_arr.data_type())
-                };
-            for (i, expected) in uuid_bytes.iter().enumerate() {
-                assert_eq!(
-                    uuid_values[i],
-                    expected.as_slice(),
-                    "uuid_col row {i} bytes mismatch"
-                );
-            }
-        }
-
-        // --- Test 2: Projected scan (only uuid_col) ---
-        {
-            let reader2 = ArrowReaderBuilder::new(file_io).build();
-            let tasks = Box::pin(futures::stream::iter(vec![Ok(FileScanTask {
-                file_size_in_bytes: file_size,
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: parquet_path.clone(),
-                data_file_format: DataFileFormat::Parquet,
-                schema: iceberg_schema.clone(),
-                project_field_ids: vec![2],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })])) as FileScanTaskStream;
-
-            let batches: Vec<RecordBatch> =
-                reader2.read(tasks).unwrap().try_collect().await.unwrap();
-
-            assert_eq!(batches.len(), 1);
-            let result = &batches[0];
-            assert_eq!(result.num_rows(), 3);
-            assert_eq!(result.num_columns(), 1);
-
-            let uuid_arr = result.column(0);
-            assert_eq!(uuid_arr.null_count(), 0, "uuid_col should have no nulls");
-            let uuid_values: Vec<&[u8]> =
-                if let Some(bin) = uuid_arr.as_any().downcast_ref::<BinaryArray>() {
-                    (0..bin.len()).map(|i| bin.value(i)).collect()
-                } else if let Some(fsb) = uuid_arr.as_any().downcast_ref::<FixedSizeBinaryArray>() {
-                    (0..fsb.len()).map(|i| fsb.value(i)).collect()
-                } else {
-                    panic!("uuid_col has unexpected type: {}", uuid_arr.data_type())
-                };
-            for (i, expected) in uuid_bytes.iter().enumerate() {
-                assert_eq!(
-                    uuid_values[i],
-                    expected.as_slice(),
-                    "uuid_col row {i} bytes mismatch in projected scan"
-                );
-            }
-        }
-    }
-}
diff --git a/crates/iceberg/src/arrow/reader/file_reader.rs b/crates/iceberg/src/arrow/reader/file_reader.rs
new file mode 100644
index 0000000000..fb0482caf5
--- /dev/null
+++ b/crates/iceberg/src/arrow/reader/file_reader.rs
@@ -0,0 +1,376 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Async Parquet file reader that adapts an Iceberg `FileRead` to parquet's `AsyncFileReader`.
+
+use std::ops::Range;
+use std::sync::Arc;
+
+use bytes::Bytes;
+use futures::future::BoxFuture;
+use futures::{FutureExt, StreamExt, TryFutureExt, TryStreamExt};
+use parquet::arrow::arrow_reader::ArrowReaderOptions;
+use parquet::arrow::async_reader::AsyncFileReader;
+use parquet::file::metadata::{PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader};
+
+use super::ParquetReadOptions;
+use crate::io::{FileMetadata, FileRead};
+
+/// ArrowFileReader is a wrapper around a FileRead that impls parquets AsyncFileReader.
+pub struct ArrowFileReader {
+    meta: FileMetadata,
+    parquet_read_options: ParquetReadOptions,
+    r: Box<dyn FileRead>,
+}
+
+impl ArrowFileReader {
+    /// Create a new ArrowFileReader
+    pub fn new(meta: FileMetadata, r: Box<dyn FileRead>) -> Self {
+        Self {
+            meta,
+            parquet_read_options: ParquetReadOptions::builder().build(),
+            r,
+        }
+    }
+
+    /// Configure all Parquet read options.
+    pub(crate) fn with_parquet_read_options(mut self, options: ParquetReadOptions) -> Self {
+        self.parquet_read_options = options;
+        self
+    }
+}
+
+impl AsyncFileReader for ArrowFileReader {
+    fn get_bytes(&mut self, range: Range<u64>) -> BoxFuture<'_, parquet::errors::Result<Bytes>> {
+        Box::pin(
+            self.r
+                .read(range.start..range.end)
+                .map_err(|err| parquet::errors::ParquetError::External(Box::new(err))),
+        )
+    }
+
+    /// Override the default `get_byte_ranges` which calls `get_bytes` sequentially.
+    /// The parquet reader calls this to fetch column chunks for a row group, so
+    /// without this override each column chunk is a serial round-trip to object storage.
+    /// Adapted from object_store's `coalesce_ranges` in `util.rs`.
+    fn get_byte_ranges(
+        &mut self,
+        ranges: Vec<Range<u64>>,
+    ) -> BoxFuture<'_, parquet::errors::Result<Vec<Bytes>>> {
+        let coalesce_bytes = self.parquet_read_options.range_coalesce_bytes();
+        let concurrency = self.parquet_read_options.range_fetch_concurrency().max(1);
+
+        async move {
+            // Merge nearby ranges to reduce the number of object store requests.
+            let fetch_ranges = merge_ranges(&ranges, coalesce_bytes);
+            let r = &self.r;
+
+            // Fetch merged ranges concurrently.
+            let fetched: Vec<Bytes> = futures::stream::iter(fetch_ranges.iter().cloned())
+                .map(|range| async move {
+                    r.read(range)
+                        .await
+                        .map_err(|e| parquet::errors::ParquetError::External(Box::new(e)))
+                })
+                .buffered(concurrency)
+                .try_collect()
+                .await?;
+
+            // Slice the fetched data back into the originally requested ranges.
+            Ok(ranges
+                .iter()
+                .map(|range| {
+                    let idx = fetch_ranges.partition_point(|v| v.start <= range.start) - 1;
+                    let fetch_range = &fetch_ranges[idx];
+                    let fetch_bytes = &fetched[idx];
+                    let start = (range.start - fetch_range.start) as usize;
+                    let end = (range.end - fetch_range.start) as usize;
+                    fetch_bytes.slice(start..end.min(fetch_bytes.len()))
+                })
+                .collect())
+        }
+        .boxed()
+    }
+
+    // TODO: currently we don't respect `ArrowReaderOptions` cause it don't expose any method to access the option field
+    // we will fix it after `v55.1.0` is released in https://github.com/apache/arrow-rs/issues/7393
+    fn get_metadata(
+        &mut self,
+        _options: Option<&'_ ArrowReaderOptions>,
+    ) -> BoxFuture<'_, parquet::errors::Result<Arc<ParquetMetaData>>> {
+        async move {
+            fn page_index_policy(enabled: bool) -> PageIndexPolicy {
+                if enabled {
+                    PageIndexPolicy::Optional
+                } else {
+                    PageIndexPolicy::Skip
+                }
+            }
+
+            let reader = ParquetMetaDataReader::new()
+                .with_prefetch_hint(self.parquet_read_options.metadata_size_hint())
+                // Set the page policy first because it updates both column and offset policies.
+                .with_page_index_policy(page_index_policy(
+                    self.parquet_read_options.preload_page_index(),
+                ))
+                .with_column_index_policy(page_index_policy(
+                    self.parquet_read_options.preload_column_index(),
+                ))
+                .with_offset_index_policy(page_index_policy(
+                    self.parquet_read_options.preload_offset_index(),
+                ));
+            let size = self.meta.size;
+            let meta = reader.load_and_finish(self, size).await?;
+
+            Ok(Arc::new(meta))
+        }
+        .boxed()
+    }
+}
+
+/// Merge overlapping or nearby byte ranges, combining ranges with gaps <= `coalesce` bytes.
+/// Adapted from object_store's `merge_ranges` in `util.rs`.
+fn merge_ranges(ranges: &[Range<u64>], coalesce: u64) -> Vec<Range<u64>> {
+    if ranges.is_empty() {
+        return vec![];
+    }
+
+    let mut ranges = ranges.to_vec();
+    ranges.sort_unstable_by_key(|r| r.start);
+
+    let mut merged = Vec::with_capacity(ranges.len());
+    let mut start_idx = 0;
+    let mut end_idx = 1;
+
+    while start_idx != ranges.len() {
+        let mut range_end = ranges[start_idx].end;
+
+        while end_idx != ranges.len()
+            && ranges[end_idx]
+                .start
+                .checked_sub(range_end)
+                .map(|delta| delta <= coalesce)
+                .unwrap_or(true)
+        {
+            range_end = range_end.max(ranges[end_idx].end);
+            end_idx += 1;
+        }
+
+        merged.push(ranges[start_idx].start..range_end);
+        start_idx = end_idx;
+        end_idx += 1;
+    }
+
+    merged
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::Range;
+
+    use parquet::arrow::async_reader::AsyncFileReader;
+
+    use super::{ArrowFileReader, ParquetReadOptions, merge_ranges};
+    use crate::io::{FileMetadata, FileRead};
+
+    #[test]
+    fn test_merge_ranges_empty() {
+        assert_eq!(merge_ranges(&[], 1024), Vec::<Range<u64>>::new());
+    }
+
+    #[test]
+    fn test_merge_ranges_no_coalesce() {
+        // Ranges far apart should not be merged
+        let ranges = vec![0..100, 1_000_000..1_000_100];
+        let merged = merge_ranges(&ranges, 1024);
+        assert_eq!(merged, vec![0..100, 1_000_000..1_000_100]);
+    }
+
+    #[test]
+    fn test_merge_ranges_coalesce() {
+        // Ranges within the gap threshold should be merged
+        let ranges = vec![0..100, 200..300, 500..600];
+        let merged = merge_ranges(&ranges, 1024);
+        assert_eq!(merged, vec![0..600]);
+    }
+
+    #[test]
+    fn test_merge_ranges_overlapping() {
+        let ranges = vec![0..200, 100..300];
+        let merged = merge_ranges(&ranges, 0);
+        assert_eq!(merged, vec![0..300]);
+    }
+
+    #[test]
+    fn test_merge_ranges_unsorted() {
+        let ranges = vec![500..600, 0..100, 200..300];
+        let merged = merge_ranges(&ranges, 1024);
+        assert_eq!(merged, vec![0..600]);
+    }
+
+    /// Mock FileRead backed by a flat byte buffer.
+    struct MockFileRead {
+        data: bytes::Bytes,
+    }
+
+    impl MockFileRead {
+        fn new(size: usize) -> Self {
+            // Fill with sequential byte values so slices are verifiable.
+            let data: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
+            Self {
+                data: bytes::Bytes::from(data),
+            }
+        }
+    }
+
+    #[async_trait::async_trait]
+    impl FileRead for MockFileRead {
+        async fn read(&self, range: Range<u64>) -> crate::Result<bytes::Bytes> {
+            Ok(self.data.slice(range.start as usize..range.end as usize))
+        }
+    }
+
+    #[tokio::test]
+    async fn test_get_byte_ranges_no_coalesce() {
+        let mock = MockFileRead::new(2048);
+        let expected_0 = mock.data.slice(0..100);
+        let expected_1 = mock.data.slice(1500..1600);
+
+        let mut reader = ArrowFileReader::new(FileMetadata { size: 2048 }, Box::new(mock))
+            .with_parquet_read_options(
+                ParquetReadOptions::builder()
+                    .with_range_coalesce_bytes(0)
+                    .build(),
+            );
+
+        let result = reader
+            .get_byte_ranges(vec![0..100, 1500..1600])
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 2);
+        assert_eq!(result[0], expected_0);
+        assert_eq!(result[1], expected_1);
+    }
+
+    #[tokio::test]
+    async fn test_get_byte_ranges_with_coalesce() {
+        let mock = MockFileRead::new(1024);
+        let expected_0 = mock.data.slice(0..100);
+        let expected_1 = mock.data.slice(200..300);
+        let expected_2 = mock.data.slice(500..600);
+
+        let mut reader = ArrowFileReader::new(FileMetadata { size: 1024 }, Box::new(mock))
+            .with_parquet_read_options(
+                ParquetReadOptions::builder()
+                    .with_range_coalesce_bytes(1024)
+                    .build(),
+            );
+
+        // All ranges within coalesce threshold — should merge into one fetch.
+        let result = reader
+            .get_byte_ranges(vec![0..100, 200..300, 500..600])
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 3);
+        assert_eq!(result[0], expected_0);
+        assert_eq!(result[1], expected_1);
+        assert_eq!(result[2], expected_2);
+    }
+
+    #[tokio::test]
+    async fn test_get_byte_ranges_empty() {
+        let mock = MockFileRead::new(1024);
+        let mut reader = ArrowFileReader::new(FileMetadata { size: 1024 }, Box::new(mock));
+
+        let result = reader.get_byte_ranges(vec![]).await.unwrap();
+        assert!(result.is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_get_byte_ranges_coalesce_max() {
+        let mock = MockFileRead::new(2048);
+        let expected_0 = mock.data.slice(0..100);
+        let expected_1 = mock.data.slice(1500..1600);
+
+        let mut reader = ArrowFileReader::new(FileMetadata { size: 2048 }, Box::new(mock))
+            .with_parquet_read_options(
+                ParquetReadOptions::builder()
+                    .with_range_coalesce_bytes(u64::MAX)
+                    .build(),
+            );
+
+        // u64::MAX coalesce — all ranges merge into a single fetch.
+        let result = reader
+            .get_byte_ranges(vec![0..100, 1500..1600])
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 2);
+        assert_eq!(result[0], expected_0);
+        assert_eq!(result[1], expected_1);
+    }
+
+    #[tokio::test]
+    async fn test_get_byte_ranges_concurrency_zero() {
+        // concurrency=0 is clamped to 1, so this should not hang.
+        let mock = MockFileRead::new(1024);
+        let expected = mock.data.slice(0..100);
+
+        let mut reader = ArrowFileReader::new(FileMetadata { size: 1024 }, Box::new(mock))
+            .with_parquet_read_options(
+                ParquetReadOptions::builder()
+                    .with_range_fetch_concurrency(0)
+                    .build(),
+            );
+
+        let result = reader
+            .get_byte_ranges(vec![0..100, 200..300])
+            .await
+            .unwrap();
+        assert_eq!(result.len(), 2);
+        assert_eq!(result[0], expected);
+    }
+
+    #[tokio::test]
+    async fn test_get_byte_ranges_concurrency_one() {
+        let mock = MockFileRead::new(2048);
+        let expected_0 = mock.data.slice(0..100);
+        let expected_1 = mock.data.slice(500..600);
+        let expected_2 = mock.data.slice(1500..1600);
+
+        let mut reader = ArrowFileReader::new(FileMetadata { size: 2048 }, Box::new(mock))
+            .with_parquet_read_options(
+                ParquetReadOptions::builder()
+                    .with_range_coalesce_bytes(0)
+                    .with_range_fetch_concurrency(1)
+                    .build(),
+            );
+
+        // concurrency=1 with no coalescing — sequential fetches.
+        let result = reader
+            .get_byte_ranges(vec![0..100, 500..600, 1500..1600])
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 3);
+        assert_eq!(result[0], expected_0);
+        assert_eq!(result[1], expected_1);
+        assert_eq!(result[2], expected_2);
+    }
+}
diff --git a/crates/iceberg/src/arrow/reader/mod.rs b/crates/iceberg/src/arrow/reader/mod.rs
new file mode 100644
index 0000000000..bc465e9973
--- /dev/null
+++ b/crates/iceberg/src/arrow/reader/mod.rs
@@ -0,0 +1,193 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Parquet file data reader
+
+use arrow_array::RecordBatch;
+use futures::{SinkExt, Stream, StreamExt};
+
+use crate::arrow::caching_delete_file_loader::CachingDeleteFileLoader;
+use crate::error::Result;
+use crate::io::FileIO;
+use crate::scan::ArrowRecordBatchStream;
+use crate::util::available_parallelism;
+use crate::{Error, ErrorKind};
+
+/// Default gap between byte ranges below which they are coalesced into a
+/// single request. Matches object_store's `OBJECT_STORE_COALESCE_DEFAULT`.
+const DEFAULT_RANGE_COALESCE_BYTES: u64 = 1024 * 1024;
+
+/// Default maximum number of coalesced byte ranges fetched concurrently.
+/// Matches object_store's `OBJECT_STORE_COALESCE_PARALLEL`.
+const DEFAULT_RANGE_FETCH_CONCURRENCY: usize = 10;
+
+/// Default number of bytes to prefetch when parsing Parquet footer metadata.
+/// Matches DataFusion's default `ParquetOptions::metadata_size_hint`.
+const DEFAULT_METADATA_SIZE_HINT: usize = 512 * 1024;
+
+mod file_reader;
+mod options;
+mod pipeline;
+mod positional_deletes;
+mod predicate_visitor;
+mod projection;
+mod row_filter;
+pub use file_reader::ArrowFileReader;
+pub(crate) use options::ParquetReadOptions;
+use predicate_visitor::{CollectFieldIdVisitor, PredicateConverter};
+use projection::{add_fallback_field_ids_to_arrow_schema, apply_name_mapping_to_arrow_schema};
+
+/// Builder to create ArrowReader
+pub struct ArrowReaderBuilder {
+    batch_size: Option<usize>,
+    file_io: FileIO,
+    concurrency_limit_data_files: usize,
+    row_group_filtering_enabled: bool,
+    row_selection_enabled: bool,
+    parquet_read_options: ParquetReadOptions,
+}
+
+impl ArrowReaderBuilder {
+    /// Create a new ArrowReaderBuilder
+    pub fn new(file_io: FileIO) -> Self {
+        let num_cpus = available_parallelism().get();
+
+        ArrowReaderBuilder {
+            batch_size: None,
+            file_io,
+            concurrency_limit_data_files: num_cpus,
+            row_group_filtering_enabled: true,
+            row_selection_enabled: false,
+            parquet_read_options: ParquetReadOptions::builder().build(),
+        }
+    }
+
+    /// Sets the max number of in flight data files that are being fetched
+    pub fn with_data_file_concurrency_limit(mut self, val: usize) -> Self {
+        self.concurrency_limit_data_files = val;
+        self
+    }
+
+    /// Sets the desired size of batches in the response
+    /// to something other than the default
+    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
+        self.batch_size = Some(batch_size);
+        self
+    }
+
+    /// Determines whether to enable row group filtering.
+    pub fn with_row_group_filtering_enabled(mut self, row_group_filtering_enabled: bool) -> Self {
+        self.row_group_filtering_enabled = row_group_filtering_enabled;
+        self
+    }
+
+    /// Determines whether to enable row selection.
+    pub fn with_row_selection_enabled(mut self, row_selection_enabled: bool) -> Self {
+        self.row_selection_enabled = row_selection_enabled;
+        self
+    }
+
+    /// Provide a hint as to the number of bytes to prefetch for parsing the Parquet metadata
+    ///
+    /// This hint can help reduce the number of fetch requests. For more details see the
+    /// [ParquetMetaDataReader documentation](https://docs.rs/parquet/latest/parquet/file/metadata/struct.ParquetMetaDataReader.html#method.with_prefetch_hint).
+    pub fn with_metadata_size_hint(mut self, metadata_size_hint: usize) -> Self {
+        self.parquet_read_options.metadata_size_hint = Some(metadata_size_hint);
+        self
+    }
+
+    /// Sets the gap threshold for merging nearby byte ranges into a single request.
+    /// Ranges with gaps smaller than this value will be coalesced.
+    ///
+    /// Defaults to 1 MiB, matching object_store's OBJECT_STORE_COALESCE_DEFAULT.
+    pub fn with_range_coalesce_bytes(mut self, range_coalesce_bytes: u64) -> Self {
+        self.parquet_read_options.range_coalesce_bytes = range_coalesce_bytes;
+        self
+    }
+
+    /// Sets the maximum number of merged byte ranges to fetch concurrently.
+    ///
+    /// Defaults to 10, matching object_store's OBJECT_STORE_COALESCE_PARALLEL.
+    pub fn with_range_fetch_concurrency(mut self, range_fetch_concurrency: usize) -> Self {
+        self.parquet_read_options.range_fetch_concurrency = range_fetch_concurrency;
+        self
+    }
+
+    /// Build the ArrowReader.
+    pub fn build(self) -> ArrowReader {
+        ArrowReader {
+            batch_size: self.batch_size,
+            file_io: self.file_io.clone(),
+            delete_file_loader: CachingDeleteFileLoader::new(
+                self.file_io.clone(),
+                self.concurrency_limit_data_files,
+            ),
+            concurrency_limit_data_files: self.concurrency_limit_data_files,
+            row_group_filtering_enabled: self.row_group_filtering_enabled,
+            row_selection_enabled: self.row_selection_enabled,
+            parquet_read_options: self.parquet_read_options,
+        }
+    }
+}
+
+/// Reads data from Parquet files
+#[derive(Clone)]
+pub struct ArrowReader {
+    pub(crate) batch_size: Option<usize>,
+    pub(crate) file_io: FileIO,
+    delete_file_loader: CachingDeleteFileLoader,
+
+    /// the maximum number of data files that can be fetched at the same time
+    pub(crate) concurrency_limit_data_files: usize,
+
+    pub(crate) row_group_filtering_enabled: bool,
+    pub(crate) row_selection_enabled: bool,
+    pub(crate) parquet_read_options: ParquetReadOptions,
+}
+
+/// Trait indicating that the implementing type streams into a stream of type `S` using
+/// a reader of type `R`.
+pub trait StreamsInto<R, S = ArrowRecordBatchStream> {
+    /// Stream from the reader and produce a stream of type `S`.
+    fn stream(self, reader: R) -> Result<S>;
+}
+
+/// Helper function to process a stream of record batches and send through a channel.
+/// Handles the Result<Stream> pattern, so callers don't need to match on the stream result.
+/// This pattern is used in both reader.rs and incremental.rs.
+pub(crate) async fn process_record_batch_stream<E, S, T>(
+    record_batch_stream: Result<S>,
+    mut tx: T,
+    error_context: &str,
+) where
+    E: std::error::Error + Send + Sync + 'static,
+    S: Stream<Item = std::result::Result<RecordBatch, E>> + Send + Unpin + 'static,
+    T: SinkExt<Result<RecordBatch>> + Unpin + Send + 'static,
+{
+    match record_batch_stream {
+        Ok(mut stream) => {
+            while let Some(batch_result) = stream.next().await {
+                let batch = batch_result
+                    .map_err(|e| Error::new(ErrorKind::Unexpected, error_context).with_source(e));
+                let _ = tx.send(batch).await;
+            }
+        }
+        Err(e) => {
+            let _ = tx.send(Err(e)).await;
+        }
+    }
+}
diff --git a/crates/iceberg/src/arrow/reader/options.rs b/crates/iceberg/src/arrow/reader/options.rs
new file mode 100644
index 0000000000..ae6a3ed18e
--- /dev/null
+++ b/crates/iceberg/src/arrow/reader/options.rs
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tunables for Parquet file I/O used by `ArrowReader`.
+
+use typed_builder::TypedBuilder;
+
+use super::{
+    DEFAULT_METADATA_SIZE_HINT, DEFAULT_RANGE_COALESCE_BYTES, DEFAULT_RANGE_FETCH_CONCURRENCY,
+};
+
+/// Options for tuning Parquet file I/O.
+#[derive(Clone, Copy, Debug, TypedBuilder)]
+#[builder(field_defaults(setter(prefix = "with_")))]
+pub(crate) struct ParquetReadOptions {
+    /// Number of bytes to prefetch for parsing the Parquet metadata.
+    ///
+    /// This hint can help reduce the number of fetch requests. For more details see the
+    /// [ParquetMetaDataReader documentation](https://docs.rs/parquet/latest/parquet/file/metadata/struct.ParquetMetaDataReader.html#method.with_prefetch_hint).
+    ///
+    /// Defaults to 512 KiB, matching DataFusion's default `ParquetOptions::metadata_size_hint`.
+    #[builder(default = Some(DEFAULT_METADATA_SIZE_HINT))]
+    pub(crate) metadata_size_hint: Option<usize>,
+    /// Gap threshold for merging nearby byte ranges into a single request.
+    /// Ranges with gaps smaller than this value will be coalesced.
+    ///
+    /// Defaults to 1 MiB, matching object_store's `OBJECT_STORE_COALESCE_DEFAULT`.
+    #[builder(default = DEFAULT_RANGE_COALESCE_BYTES)]
+    pub(crate) range_coalesce_bytes: u64,
+    /// Maximum number of merged byte ranges to fetch concurrently.
+    ///
+    /// Defaults to 10, matching object_store's `OBJECT_STORE_COALESCE_PARALLEL`.
+    #[builder(default = DEFAULT_RANGE_FETCH_CONCURRENCY)]
+    pub(crate) range_fetch_concurrency: usize,
+    /// Whether to preload the column index when reading Parquet metadata.
+    #[builder(default = true)]
+    pub(crate) preload_column_index: bool,
+    /// Whether to preload the offset index when reading Parquet metadata.
+    #[builder(default = true)]
+    pub(crate) preload_offset_index: bool,
+    /// Whether to preload the page index when reading Parquet metadata.
+    #[builder(default = false)]
+    pub(crate) preload_page_index: bool,
+}
+
+impl ParquetReadOptions {
+    pub(crate) fn metadata_size_hint(&self) -> Option<usize> {
+        self.metadata_size_hint
+    }
+
+    pub(crate) fn range_coalesce_bytes(&self) -> u64 {
+        self.range_coalesce_bytes
+    }
+
+    pub(crate) fn range_fetch_concurrency(&self) -> usize {
+        self.range_fetch_concurrency
+    }
+
+    pub(crate) fn preload_column_index(&self) -> bool {
+        self.preload_column_index
+    }
+
+    pub(crate) fn preload_offset_index(&self) -> bool {
+        self.preload_offset_index
+    }
+
+    pub(crate) fn preload_page_index(&self) -> bool {
+        self.preload_page_index
+    }
+}
diff --git a/crates/iceberg/src/arrow/reader/pipeline.rs b/crates/iceberg/src/arrow/reader/pipeline.rs
new file mode 100644
index 0000000000..7bb7feb9d3
--- /dev/null
+++ b/crates/iceberg/src/arrow/reader/pipeline.rs
@@ -0,0 +1,1330 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! The main `ArrowReader` pipeline: reading a stream of `FileScanTask`s,
+//! opening Parquet files and resolving schemas, then wiring projection,
+//! predicates, row-group / row selection, and delete handling into a stream
+//! of transformed Arrow `RecordBatch`es.
+
+use std::sync::atomic::AtomicU64;
+use std::sync::{Arc, Mutex};
+
+use arrow_array::RecordBatch;
+use futures::channel::mpsc::channel;
+use futures::{StreamExt, TryStreamExt};
+use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
+use parquet::arrow::{PARQUET_FIELD_ID_META_KEY, ParquetRecordBatchStreamBuilder};
+
+use super::{
+    ArrowFileReader, ArrowReader, ParquetReadOptions, add_fallback_field_ids_to_arrow_schema,
+    apply_name_mapping_to_arrow_schema, process_record_batch_stream,
+};
+use crate::arrow::caching_delete_file_loader::CachingDeleteFileLoader;
+use crate::arrow::int96::coerce_int96_timestamps;
+use crate::arrow::record_batch_transformer::{
+    RecordBatchTransformer, RecordBatchTransformerBuilder,
+};
+use crate::arrow::scan_metrics::{CountingFileRead, ScanMetrics, ScanResult};
+use crate::delete_vector::DeleteVector;
+use crate::error::Result;
+use crate::io::{FileIO, FileMetadata, FileRead};
+use crate::metadata_columns::{
+    RESERVED_FIELD_ID_FILE, RESERVED_FIELD_ID_POS, is_metadata_field, row_pos_field,
+};
+use crate::runtime::spawn;
+use crate::scan::{ArrowRecordBatchStream, FileScanTask, FileScanTaskStream};
+use crate::spec::{Datum, NameMapping, PartitionSpec, SchemaRef, Struct};
+use crate::{Error, ErrorKind};
+
+impl ArrowReader {
+    /// Take a stream of FileScanTasks and reads all the files.
+    /// Returns a [`ScanResult`] containing the record batch stream and scan metrics.
+    pub fn read(self, tasks: FileScanTaskStream) -> Result<ScanResult> {
+        let concurrency_limit_data_files = self.concurrency_limit_data_files;
+        let scan_metrics = ScanMetrics::new();
+
+        let task_reader = FileScanTaskReader {
+            batch_size: self.batch_size,
+            file_io: self.file_io,
+            delete_file_loader: self
+                .delete_file_loader
+                .with_scan_metrics(scan_metrics.clone()),
+            row_group_filtering_enabled: self.row_group_filtering_enabled,
+            row_selection_enabled: self.row_selection_enabled,
+            parquet_read_options: self.parquet_read_options,
+            scan_metrics: scan_metrics.clone(),
+        };
+
+        // Fast-path for single concurrency to avoid overhead of try_flatten_unordered
+        let stream: ArrowRecordBatchStream = if concurrency_limit_data_files == 1 {
+            Box::pin(
+                tasks
+                    .and_then(move |task| task_reader.clone().process(task))
+                    .map_err(|err| {
+                        Error::new(ErrorKind::Unexpected, "file scan task generate failed")
+                            .with_source(err)
+                    })
+                    .try_flatten(),
+            )
+        } else {
+            // Multi-concurrency path: spawn each file's IO-heavy processing as an independent
+            // tokio task for true parallelism, streaming results through a channel.
+            let (tx, rx) = channel::<Result<RecordBatch>>(concurrency_limit_data_files);
+
+            // Outer spawn: runs the task coordination loop without blocking the caller.
+            spawn(async move {
+                let _ = tasks
+                    .try_for_each_concurrent(concurrency_limit_data_files, |task| {
+                        let task_reader = task_reader.clone();
+                        let tx = tx.clone();
+
+                        async move {
+                            // Inner spawn: each file's IO operations run on their own tokio task.
+                            spawn(async move {
+                                let record_batch_stream = task_reader.process(task).await;
+                                process_record_batch_stream(
+                                    record_batch_stream,
+                                    tx,
+                                    "failed to read record batch",
+                                )
+                                .await;
+                            })
+                            .await;
+
+                            Ok(())
+                        }
+                    })
+                    .await;
+            });
+
+            Box::pin(rx) as ArrowRecordBatchStream
+        };
+
+        Ok(ScanResult::new(stream, scan_metrics))
+    }
+}
+
+/// Per-scan state for processing [`FileScanTask`]s. Created once per
+/// [`ArrowReader::read`] call and cloned per task.
+#[derive(Clone)]
+struct FileScanTaskReader {
+    batch_size: Option<usize>,
+    file_io: FileIO,
+    delete_file_loader: CachingDeleteFileLoader,
+    row_group_filtering_enabled: bool,
+    row_selection_enabled: bool,
+    parquet_read_options: ParquetReadOptions,
+    scan_metrics: ScanMetrics,
+}
+
+impl FileScanTaskReader {
+    async fn process(self, task: FileScanTask) -> Result<ArrowRecordBatchStream> {
+        let should_load_page_index =
+            (self.row_selection_enabled && task.predicate.is_some()) || !task.deletes.is_empty();
+        let mut parquet_read_options = self.parquet_read_options;
+        parquet_read_options.preload_page_index = should_load_page_index;
+
+        // Concurrently open the Parquet file and start loading delete files.
+        let open_fut = ArrowReader::open_parquet_stream_builder(
+            &task.data_file_path,
+            task.file_size_in_bytes,
+            self.file_io.clone(),
+            parquet_read_options,
+            ArrowReader::build_virtual_columns(&task.project_field_ids),
+            self.batch_size,
+            task.name_mapping.as_deref(),
+            Some(Arc::clone(self.scan_metrics.bytes_read_counter())),
+            Some(&task.schema),
+        );
+        let delete_filter_rx = self
+            .delete_file_loader
+            .load_deletes(&task.deletes, Arc::clone(&task.schema));
+
+        let (open_result, delete_filter) =
+            futures::join!(open_fut, async { delete_filter_rx.await.unwrap() });
+
+        let (builder, has_missing_field_ids) = open_result?;
+        let delete_filter = delete_filter?;
+
+        let delete_predicate = delete_filter.build_equality_delete_predicate(&task).await?;
+
+        // In addition to the optional predicate supplied in the `FileScanTask`,
+        // we also have an optional predicate resulting from equality delete files.
+        // If both are present, we logical-AND them together to form a single filter
+        // predicate that we can pass to the `RecordBatchStreamBuilder`.
+        let final_predicate = match (&task.predicate, delete_predicate) {
+            (None, None) => None,
+            (Some(predicate), None) => Some(predicate.clone()),
+            (None, Some(ref predicate)) => Some(predicate.clone()),
+            (Some(filter_predicate), Some(delete_predicate)) => {
+                Some(filter_predicate.clone().and(delete_predicate))
+            }
+        };
+
+        let positional_deletes = delete_filter.get_delete_vector(&task);
+
+        let builder = ArrowReader::apply_parquet_filters(
+            builder,
+            task.start,
+            task.length,
+            &task.schema,
+            final_predicate.as_ref(),
+            positional_deletes.as_deref(),
+            self.row_group_filtering_enabled,
+            self.row_selection_enabled,
+            false, // use_predicate_projection: projection applied separately via build_projected_record_batch_stream
+            has_missing_field_ids,
+        )?;
+
+        ArrowReader::build_projected_record_batch_stream(
+            builder,
+            &task.project_field_ids,
+            task.schema_ref(),
+            has_missing_field_ids,
+            &task.data_file_path,
+            task.partition_spec,
+            task.partition,
+        )
+    }
+}
+
+impl ArrowReader {
+    /// Opens a Parquet file and loads its metadata, wrapping the reader with
+    /// [`CountingFileRead`] so all I/O is accumulated into `bytes_read`.
+    pub(crate) async fn open_parquet_file(
+        data_file_path: &str,
+        file_io: &FileIO,
+        file_size_in_bytes: u64,
+        parquet_read_options: ParquetReadOptions,
+        bytes_read: &Arc<AtomicU64>,
+    ) -> Result<(ArrowFileReader, ArrowReaderMetadata)> {
+        let parquet_file = file_io.new_input(data_file_path)?;
+        let counting_reader =
+            CountingFileRead::new(parquet_file.reader().await?, Arc::clone(bytes_read));
+        Self::build_parquet_reader(
+            Box::new(counting_reader),
+            file_size_in_bytes,
+            parquet_read_options,
+        )
+        .await
+    }
+
+    async fn build_parquet_reader(
+        parquet_reader: Box<dyn FileRead>,
+        file_size_in_bytes: u64,
+        parquet_read_options: ParquetReadOptions,
+    ) -> Result<(ArrowFileReader, ArrowReaderMetadata)> {
+        let mut reader = ArrowFileReader::new(
+            FileMetadata {
+                size: file_size_in_bytes,
+            },
+            parquet_reader,
+        )
+        .with_parquet_read_options(parquet_read_options);
+
+        let arrow_metadata = ArrowReaderMetadata::load_async(&mut reader, Default::default())
+            .await
+            .map_err(|e| {
+                Error::new(ErrorKind::Unexpected, "Failed to load Parquet metadata").with_source(e)
+            })?;
+
+        Ok((reader, arrow_metadata))
+    }
+
+    /// Opens a Parquet file, resolves its schema (name-mapping / field-ID fallback), and
+    /// applies the batch size. Returns `(builder, has_missing_field_ids)`.
+    ///
+    /// This is the async phase shared by every reading path. Callers that have background
+    /// work to overlap (e.g. delete-file loading) can run this concurrently with that work
+    /// using [`futures::join!`], then pass the result to [`Self::apply_parquet_filters`].
+    ///
+    /// Implements the three-branch schema resolution strategy matching Java's `ReadConf` constructor:
+    /// - Branch 1: file has embedded field IDs → trust them, use as-is
+    /// - Branch 2: name_mapping present → apply name mapping to assign correct Iceberg field IDs
+    /// - Branch 3: no name mapping → assign fallback position-based IDs
+    ///
+    /// When `iceberg_schema` is `Some`, INT96 timestamp columns are coerced to the resolution
+    /// specified by the Iceberg schema before building the stream reader.
+    ///
+    /// When `bytes_read` is `Some`, wraps the file reader with [`CountingFileRead`] so all
+    /// I/O bytes are accumulated into the provided counter.
+    #[allow(clippy::too_many_arguments)]
+    pub(crate) async fn open_parquet_stream_builder(
+        data_file_path: &str,
+        file_size_in_bytes: u64,
+        file_io: FileIO,
+        parquet_read_options: ParquetReadOptions,
+        virtual_columns: Vec<Arc<arrow_schema::Field>>,
+        batch_size: Option<usize>,
+        name_mapping: Option<&NameMapping>,
+        bytes_read: Option<Arc<AtomicU64>>,
+        iceberg_schema: Option<&crate::spec::Schema>,
+    ) -> Result<(ParquetRecordBatchStreamBuilder<ArrowFileReader>, bool)> {
+        let parquet_file = file_io.new_input(data_file_path)?;
+        let raw_reader = parquet_file.reader().await?;
+        let boxed_reader: Box<dyn FileRead> = if let Some(counter) = bytes_read {
+            Box::new(CountingFileRead::new(raw_reader, counter))
+        } else {
+            Box::new(raw_reader)
+        };
+        let (file_reader, arrow_metadata) =
+            Self::build_parquet_reader(boxed_reader, file_size_in_bytes, parquet_read_options)
+                .await?;
+
+        // Check if Parquet file has embedded field IDs.
+        // Corresponds to Java's ParquetSchemaUtil.hasIds()
+        let has_missing_field_ids = arrow_metadata
+            .schema()
+            .fields()
+            .iter()
+            .next()
+            .is_some_and(|f| f.metadata().get(PARQUET_FIELD_ID_META_KEY).is_none());
+
+        // Three-branch schema resolution strategy matching Java's ReadConf constructor.
+        //
+        // When Parquet files lack field IDs (e.g., Hive/Spark migrations via add_files),
+        // we must assign field IDs BEFORE reading data to enable correct column projection.
+        let arrow_metadata = if has_missing_field_ids {
+            // Parquet file lacks field IDs - must assign them before reading.
+            let arrow_schema = if let Some(nm) = name_mapping {
+                // Branch 2: Apply name mapping to assign correct Iceberg field IDs.
+                // Corresponds to Java's ParquetSchemaUtil.applyNameMapping()
+                apply_name_mapping_to_arrow_schema(Arc::clone(arrow_metadata.schema()), nm)?
+            } else {
+                // Branch 3: No name mapping - use position-based fallback IDs.
+                // Corresponds to Java's ParquetSchemaUtil.addFallbackIds()
+                add_fallback_field_ids_to_arrow_schema(arrow_metadata.schema())
+            };
+            let mut options = ArrowReaderOptions::new().with_schema(arrow_schema);
+            if !virtual_columns.is_empty() {
+                options = options.with_virtual_columns(virtual_columns)?;
+            }
+            ArrowReaderMetadata::try_new(Arc::clone(arrow_metadata.metadata()), options).map_err(
+                |e| {
+                    Error::new(
+                        ErrorKind::Unexpected,
+                        "Failed to create ArrowReaderMetadata with field ID schema",
+                    )
+                    .with_source(e)
+                },
+            )?
+        } else {
+            // Branch 1: File has embedded field IDs - trust them.
+            if !virtual_columns.is_empty() {
+                let options = ArrowReaderOptions::new().with_virtual_columns(virtual_columns)?;
+                ArrowReaderMetadata::try_new(Arc::clone(arrow_metadata.metadata()), options)
+                    .map_err(|e| {
+                        Error::new(
+                            ErrorKind::Unexpected,
+                            "Failed to create ArrowReaderMetadata with virtual columns",
+                        )
+                        .with_source(e)
+                    })?
+            } else {
+                arrow_metadata
+            }
+        };
+
+        // Coerce INT96 timestamp columns to the resolution specified by the Iceberg schema.
+        // This must happen before building the stream reader to avoid i64 overflow in arrow-rs.
+        let arrow_metadata = if let Some(schema) = iceberg_schema {
+            if let Some(coerced_schema) = coerce_int96_timestamps(arrow_metadata.schema(), schema) {
+                let options = ArrowReaderOptions::new().with_schema(Arc::clone(&coerced_schema));
+                ArrowReaderMetadata::try_new(Arc::clone(arrow_metadata.metadata()), options)
+                    .map_err(|e| {
+                        Error::new(
+                            ErrorKind::Unexpected,
+                            format!(
+                                "Failed to create ArrowReaderMetadata with INT96-coerced schema: {coerced_schema}"
+                            ),
+                        )
+                        .with_source(e)
+                    })?
+            } else {
+                arrow_metadata
+            }
+        } else {
+            arrow_metadata
+        };
+
+        let mut builder =
+            ParquetRecordBatchStreamBuilder::new_with_metadata(file_reader, arrow_metadata);
+
+        if let Some(batch_size) = batch_size {
+            builder = builder.with_batch_size(batch_size);
+        }
+
+        Ok((builder, has_missing_field_ids))
+    }
+
+    /// Applies all row-level and row-group-level filters to a builder returned by
+    /// [`Self::open_parquet_stream_builder`].
+    ///
+    /// Handles byte-range row group pruning, predicate row filtering (with optional
+    /// projection), and positional-delete row selection.
+    #[allow(clippy::too_many_arguments)]
+    pub(crate) fn apply_parquet_filters(
+        mut builder: ParquetRecordBatchStreamBuilder<ArrowFileReader>,
+        start: u64,
+        length: u64,
+        schema: &crate::spec::Schema,
+        bound_predicate: Option<&crate::expr::BoundPredicate>,
+        positional_deletes: Option<&Mutex<DeleteVector>>,
+        row_group_filtering_enabled: bool,
+        row_selection_enabled: bool,
+        use_predicate_projection: bool,
+        has_missing_field_ids: bool,
+    ) -> Result<ParquetRecordBatchStreamBuilder<ArrowFileReader>> {
+        let mut selected_row_group_indices = None;
+        let mut row_selection = None;
+
+        if start != 0 || length != 0 {
+            selected_row_group_indices = Some(Self::filter_row_groups_by_byte_range(
+                builder.metadata(),
+                start,
+                length,
+            )?);
+        }
+
+        if let Some(predicate) = bound_predicate {
+            let (iceberg_field_ids, field_id_map) =
+                Self::build_field_id_set_and_map(builder.parquet_schema(), predicate)?;
+
+            if use_predicate_projection {
+                let predicate_field_ids: Vec<i32> = iceberg_field_ids.iter().copied().collect();
+                builder = Self::apply_projection(
+                    builder,
+                    &predicate_field_ids,
+                    schema,
+                    has_missing_field_ids,
+                )?;
+            }
+
+            let row_filter = Self::get_row_filter(
+                predicate,
+                builder.parquet_schema(),
+                &iceberg_field_ids,
+                &field_id_map,
+            )?;
+            builder = builder.with_row_filter(row_filter);
+
+            if row_group_filtering_enabled {
+                let predicate_filtered = Self::get_selected_row_group_indices(
+                    predicate,
+                    builder.metadata(),
+                    &field_id_map,
+                    schema,
+                )?;
+                selected_row_group_indices = Some(match selected_row_group_indices.take() {
+                    Some(existing) => existing
+                        .into_iter()
+                        .filter(|idx| predicate_filtered.contains(idx))
+                        .collect(),
+                    None => predicate_filtered,
+                });
+            }
+
+            if row_selection_enabled {
+                row_selection = Some(Self::get_row_selection_for_filter_predicate(
+                    predicate,
+                    builder.metadata(),
+                    &selected_row_group_indices,
+                    &field_id_map,
+                    schema,
+                )?);
+            }
+        }
+
+        if let Some(positional_delete_indexes) = positional_deletes {
+            let delete_row_selection = {
+                let guard = positional_delete_indexes.lock().unwrap();
+                Self::build_deletes_row_selection(
+                    builder.metadata().row_groups(),
+                    &selected_row_group_indices,
+                    &guard,
+                )
+            }?;
+            row_selection = Some(match row_selection.take() {
+                None => delete_row_selection,
+                Some(prev) => prev.intersection(&delete_row_selection),
+            });
+        }
+
+        if let Some(sel) = row_selection {
+            builder = builder.with_row_selection(sel);
+        }
+        if let Some(groups) = selected_row_group_indices {
+            builder = builder.with_row_groups(groups);
+        }
+
+        Ok(builder)
+    }
+
+    /// Applies a projection mask derived from `field_ids` to a builder.
+    ///
+    /// Wraps `get_arrow_projection_mask` + `with_projection` into a single call.
+    fn apply_projection(
+        builder: ParquetRecordBatchStreamBuilder<ArrowFileReader>,
+        field_ids: &[i32],
+        schema: &crate::spec::Schema,
+        has_missing_field_ids: bool,
+    ) -> Result<ParquetRecordBatchStreamBuilder<ArrowFileReader>> {
+        // Metadata fields (e.g. _file, _pos) are virtual — they don't exist as Parquet columns.
+        // Filter them out so get_arrow_projection_mask only sees real schema field IDs.
+        let project_field_ids_without_metadata: Vec<i32> = field_ids
+            .iter()
+            .filter(|&&id| !is_metadata_field(id))
+            .copied()
+            .collect();
+        let mask = Self::get_arrow_projection_mask(
+            &project_field_ids_without_metadata,
+            schema,
+            builder.parquet_schema(),
+            builder.schema(),
+            has_missing_field_ids,
+        )?;
+        Ok(builder.with_projection(mask))
+    }
+
+    /// Returns the list of virtual columns to request from the Parquet reader for the
+    /// given projection. Currently, only `_pos` is a virtual column (produced by the
+    /// Parquet reader itself rather than read from file data).
+    pub(crate) fn build_virtual_columns(
+        project_field_ids: &[i32],
+    ) -> Vec<Arc<arrow_schema::Field>> {
+        let mut virtual_columns = Vec::new();
+        if project_field_ids.contains(&RESERVED_FIELD_ID_POS) {
+            virtual_columns.push(Arc::clone(row_pos_field()));
+        }
+        virtual_columns
+    }
+
+    /// Builds a [`RecordBatchTransformer`] for a data file scan task.
+    ///
+    /// Handles the three optional transformations that are common to both the full
+    /// scan (`process_file_scan_task`) and the incremental append scan
+    /// (`process_incremental_append_task`):
+    /// - `_file` constant column (only when `RESERVED_FIELD_ID_FILE` is projected)
+    /// - `_pos` virtual column (only when `RESERVED_FIELD_ID_POS` is projected)
+    /// - identity-transform partition columns (only when partition metadata is present)
+    fn build_record_batch_transformer(
+        schema: SchemaRef,
+        project_field_ids: &[i32],
+        data_file_path: &str,
+        partition_spec: Option<Arc<PartitionSpec>>,
+        partition: Option<Struct>,
+    ) -> Result<RecordBatchTransformer> {
+        let mut builder = RecordBatchTransformerBuilder::new(schema, project_field_ids);
+
+        if project_field_ids.contains(&RESERVED_FIELD_ID_FILE) {
+            builder = builder.with_constant(RESERVED_FIELD_ID_FILE, Datum::string(data_file_path));
+        }
+
+        if project_field_ids.contains(&RESERVED_FIELD_ID_POS) {
+            builder = builder.with_virtual_field(Arc::clone(row_pos_field()))?;
+        }
+
+        if let (Some(spec), Some(data)) = (partition_spec, partition) {
+            builder = builder.with_partition(spec, data)?;
+        }
+
+        Ok(builder.build())
+    }
+
+    /// Centralises the final "commit" step shared by all Parquet reading paths.
+    /// Applies projection to `builder`, constructs a `RecordBatchTransformer`, builds the
+    /// Parquet stream, and wraps it so every batch is passed through the transformer.
+    ///
+    /// This is the shared finalization step used by every data-file reading path.
+    pub(crate) fn build_projected_record_batch_stream(
+        builder: ParquetRecordBatchStreamBuilder<ArrowFileReader>,
+        project_field_ids: &[i32],
+        schema: SchemaRef,
+        has_missing_field_ids: bool,
+        data_file_path: &str,
+        partition_spec: Option<Arc<PartitionSpec>>,
+        partition: Option<Struct>,
+    ) -> Result<ArrowRecordBatchStream> {
+        let builder =
+            Self::apply_projection(builder, project_field_ids, &schema, has_missing_field_ids)?;
+
+        let mut record_batch_transformer = Self::build_record_batch_transformer(
+            schema,
+            project_field_ids,
+            data_file_path,
+            partition_spec,
+            partition,
+        )?;
+
+        let record_batch_stream = builder.build()?.map(move |batch| match batch {
+            Ok(batch) => record_batch_transformer.process_record_batch(batch),
+            Err(err) => Err(err.into()),
+        });
+
+        Ok(Box::pin(record_batch_stream) as ArrowRecordBatchStream)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::fs::File;
+    use std::sync::Arc;
+
+    use arrow_array::cast::AsArray;
+    use arrow_array::{Array, ArrayRef, RecordBatch};
+    use arrow_schema::{DataType, Field, Schema as ArrowSchema};
+    use futures::TryStreamExt;
+    use parquet::arrow::{ArrowWriter, PARQUET_FIELD_ID_META_KEY};
+    use parquet::basic::Compression;
+    use parquet::file::properties::WriterProperties;
+    use tempfile::TempDir;
+
+    use crate::arrow::ArrowReaderBuilder;
+    use crate::io::FileIO;
+    use crate::scan::{FileScanTask, FileScanTaskStream};
+    use crate::spec::{DataFileFormat, NestedField, PrimitiveType, Schema, SchemaRef, Type};
+
+    // INT96 encoding: [nanos_low_u32, nanos_high_u32, julian_day_u32]
+    // Julian day 2_440_588 = Unix epoch (1970-01-01)
+    const UNIX_EPOCH_JULIAN: i64 = 2_440_588;
+    const MICROS_PER_DAY: i64 = 86_400_000_000;
+    // Noon on 3333-01-01 (Julian day 2_953_529) — outside the i64 nanosecond range (~1677-2262).
+    const INT96_TEST_NANOS_WITHIN_DAY: u64 = 43_200_000_000_000;
+    const INT96_TEST_JULIAN_DAY: u32 = 2_953_529;
+
+    fn make_int96_test_value() -> (parquet::data_type::Int96, i64) {
+        let mut val = parquet::data_type::Int96::new();
+        val.set_data(
+            (INT96_TEST_NANOS_WITHIN_DAY & 0xFFFFFFFF) as u32,
+            (INT96_TEST_NANOS_WITHIN_DAY >> 32) as u32,
+            INT96_TEST_JULIAN_DAY,
+        );
+        let expected_micros = (INT96_TEST_JULIAN_DAY as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY
+            + (INT96_TEST_NANOS_WITHIN_DAY / 1_000) as i64;
+        (val, expected_micros)
+    }
+
+    async fn read_int96_batches(
+        file_path: &str,
+        schema: SchemaRef,
+        project_field_ids: Vec<i32>,
+    ) -> Vec<RecordBatch> {
+        let file_io = FileIO::new_with_fs();
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let file_size = std::fs::metadata(file_path).unwrap().len();
+        let task = FileScanTask {
+            file_size_in_bytes: file_size,
+            start: 0,
+            length: file_size,
+            record_count: None,
+            data_file_path: file_path.to_string(),
+            data_file_format: DataFileFormat::Parquet,
+            schema,
+            project_field_ids,
+            predicate: None,
+            deletes: vec![],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
+        reader
+            .read(tasks)
+            .unwrap()
+            .stream()
+            .try_collect()
+            .await
+            .unwrap()
+    }
+
+    // ArrowWriter cannot write INT96, so we use SerializedFileWriter directly.
+    fn write_int96_parquet_file(
+        table_location: &str,
+        filename: &str,
+        with_field_ids: bool,
+    ) -> (String, Vec<i64>) {
+        use parquet::basic::{Repetition, Type as PhysicalType};
+        use parquet::data_type::{Int32Type, Int96, Int96Type};
+        use parquet::file::writer::SerializedFileWriter;
+        use parquet::schema::types::Type as SchemaType;
+
+        let file_path = format!("{table_location}/{filename}");
+
+        let mut ts_builder = SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL);
+        let mut id_builder = SchemaType::primitive_type_builder("id", PhysicalType::INT32)
+            .with_repetition(Repetition::REQUIRED);
+
+        if with_field_ids {
+            ts_builder = ts_builder.with_id(Some(1));
+            id_builder = id_builder.with_id(Some(2));
+        }
+
+        let schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![
+                Arc::new(ts_builder.build().unwrap()),
+                Arc::new(id_builder.build().unwrap()),
+            ])
+            .build()
+            .unwrap();
+
+        // Dates outside the i64 nanosecond range (~1677-2262) overflow without coercion.
+        const NOON_NANOS: u64 = INT96_TEST_NANOS_WITHIN_DAY;
+        const JULIAN_3333: u32 = INT96_TEST_JULIAN_DAY;
+        const JULIAN_2100: u32 = 2_488_070;
+
+        let test_data: Vec<(u32, u32, u32, i64)> = vec![
+            // 3333-01-01 00:00:00
+            (
+                0,
+                0,
+                JULIAN_3333,
+                (JULIAN_3333 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
+            ),
+            // 3333-01-01 12:00:00
+            (
+                (NOON_NANOS & 0xFFFFFFFF) as u32,
+                (NOON_NANOS >> 32) as u32,
+                JULIAN_3333,
+                (JULIAN_3333 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY
+                    + (NOON_NANOS / 1_000) as i64,
+            ),
+            // 2100-01-01 00:00:00
+            (
+                0,
+                0,
+                JULIAN_2100,
+                (JULIAN_2100 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
+            ),
+        ];
+
+        let int96_values: Vec<Int96> = test_data
+            .iter()
+            .map(|(lo, hi, day, _)| {
+                let mut v = Int96::new();
+                v.set_data(*lo, *hi, *day);
+                v
+            })
+            .collect();
+
+        let id_values: Vec<i32> = (0..test_data.len() as i32).collect();
+        let expected_micros: Vec<i64> = test_data.iter().map(|(_, _, _, m)| *m).collect();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(schema), Default::default()).unwrap();
+
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            // def=1: ts is OPTIONAL and present. No repetition levels (top-level columns).
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&int96_values, Some(&vec![1; test_data.len()]), None)
+                .unwrap();
+            col.close().unwrap();
+        }
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int32Type>()
+                .write_batch(&id_values, None, None)
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        (file_path, expected_micros)
+    }
+
+    async fn assert_int96_read_matches(
+        file_path: &str,
+        schema: SchemaRef,
+        project_field_ids: Vec<i32>,
+        expected_micros: &[i64],
+    ) {
+        use arrow_array::TimestampMicrosecondArray;
+
+        let batches = read_int96_batches(file_path, schema, project_field_ids).await;
+
+        assert_eq!(batches.len(), 1);
+        let ts_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray");
+
+        for (i, expected) in expected_micros.iter().enumerate() {
+            assert_eq!(
+                ts_array.value(i),
+                *expected,
+                "Row {i}: got {}, expected {expected}",
+                ts_array.value(i)
+            );
+        }
+    }
+
+    /// Test that concurrency=1 reads all files correctly and in deterministic order.
+    /// This verifies the fast-path optimization for single concurrency.
+    #[tokio::test]
+    async fn test_read_with_concurrency_one() {
+        use arrow_array::Int32Array;
+
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::required(2, "file_num", Type::Primitive(PrimitiveType::Int))
+                        .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+            Field::new("file_num", DataType::Int32, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "2".to_string(),
+            )])),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        // Create 3 parquet files with different data
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        for file_num in 0..3 {
+            let id_data = Arc::new(Int32Array::from_iter_values(
+                file_num * 10..(file_num + 1) * 10,
+            )) as ArrayRef;
+            let file_num_data = Arc::new(Int32Array::from(vec![file_num; 10])) as ArrayRef;
+
+            let to_write =
+                RecordBatch::try_new(arrow_schema.clone(), vec![id_data, file_num_data]).unwrap();
+
+            let file = File::create(format!("{table_location}/file_{file_num}.parquet")).unwrap();
+            let mut writer =
+                ArrowWriter::try_new(file, to_write.schema(), Some(props.clone())).unwrap();
+            writer.write(&to_write).expect("Writing batch");
+            writer.close().unwrap();
+        }
+
+        // Read with concurrency=1 (fast-path)
+        let reader = ArrowReaderBuilder::new(file_io)
+            .with_data_file_concurrency_limit(1)
+            .build();
+
+        // Create tasks in a specific order: file_0, file_1, file_2
+        let tasks = vec![
+            Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/file_0.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/file_0.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            }),
+            Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/file_1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/file_1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            }),
+            Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/file_2.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/file_2.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            }),
+        ];
+
+        let tasks_stream = Box::pin(futures::stream::iter(tasks)) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks_stream)
+            .unwrap()
+            .stream()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        // Verify we got all 30 rows (10 from each file)
+        let total_rows: usize = result.iter().map(|b| b.num_rows()).sum();
+        assert_eq!(total_rows, 30, "Should have 30 total rows");
+
+        // Collect all ids and file_nums to verify data
+        let mut all_ids = Vec::new();
+        let mut all_file_nums = Vec::new();
+
+        for batch in &result {
+            let id_col = batch
+                .column(0)
+                .as_primitive::<arrow_array::types::Int32Type>();
+            let file_num_col = batch
+                .column(1)
+                .as_primitive::<arrow_array::types::Int32Type>();
+
+            for i in 0..batch.num_rows() {
+                all_ids.push(id_col.value(i));
+                all_file_nums.push(file_num_col.value(i));
+            }
+        }
+
+        assert_eq!(all_ids.len(), 30);
+        assert_eq!(all_file_nums.len(), 30);
+
+        // With concurrency=1 and sequential processing, files should be processed in order
+        // file_0: ids 0-9, file_num=0
+        // file_1: ids 10-19, file_num=1
+        // file_2: ids 20-29, file_num=2
+        for i in 0..10 {
+            assert_eq!(all_file_nums[i], 0, "First 10 rows should be from file_0");
+            assert_eq!(all_ids[i], i as i32, "IDs should be 0-9");
+        }
+        for i in 10..20 {
+            assert_eq!(all_file_nums[i], 1, "Next 10 rows should be from file_1");
+            assert_eq!(all_ids[i], i as i32, "IDs should be 10-19");
+        }
+        for i in 20..30 {
+            assert_eq!(all_file_nums[i], 2, "Last 10 rows should be from file_2");
+            assert_eq!(all_ids[i], i as i32, "IDs should be 20-29");
+        }
+    }
+
+    #[tokio::test]
+    async fn test_read_int96_timestamps_with_field_ids() {
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp))
+                        .into(),
+                    NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let (file_path, expected_micros) =
+            write_int96_parquet_file(&table_location, "with_ids.parquet", true);
+
+        assert_int96_read_matches(&file_path, schema, vec![1, 2], &expected_micros).await;
+    }
+
+    #[tokio::test]
+    async fn test_read_int96_timestamps_without_field_ids() {
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp))
+                        .into(),
+                    NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let (file_path, expected_micros) =
+            write_int96_parquet_file(&table_location, "no_ids.parquet", false);
+
+        assert_int96_read_matches(&file_path, schema, vec![1, 2], &expected_micros).await;
+    }
+
+    #[tokio::test]
+    async fn test_read_int96_timestamps_in_struct() {
+        use arrow_array::{StructArray, TimestampMicrosecondArray};
+        use parquet::basic::{Repetition, Type as PhysicalType};
+        use parquet::data_type::Int96Type;
+        use parquet::file::writer::SerializedFileWriter;
+        use parquet::schema::types::Type as SchemaType;
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/struct_int96.parquet");
+
+        let ts_type = SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(2))
+            .build()
+            .unwrap();
+
+        let struct_type = SchemaType::group_type_builder("data")
+            .with_repetition(Repetition::REQUIRED)
+            .with_id(Some(1))
+            .with_fields(vec![Arc::new(ts_type)])
+            .build()
+            .unwrap();
+
+        let parquet_schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![Arc::new(struct_type)])
+            .build()
+            .unwrap();
+
+        let (int96_val, expected_micros) = make_int96_test_value();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
+
+        // def=1: struct is REQUIRED so no level, ts is OPTIONAL and present (1).
+        // No repetition levels needed (no repeated groups).
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&[int96_val], Some(&[1]), None)
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(
+                        1,
+                        "data",
+                        Type::Struct(crate::spec::StructType::new(vec![
+                            NestedField::optional(
+                                2,
+                                "ts",
+                                Type::Primitive(PrimitiveType::Timestamp),
+                            )
+                            .into(),
+                        ])),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
+
+        assert_eq!(batches.len(), 1);
+        let struct_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<StructArray>()
+            .expect("Expected StructArray");
+        let ts_array = struct_array
+            .column(0)
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray inside struct");
+
+        assert_eq!(
+            ts_array.value(0),
+            expected_micros,
+            "INT96 in struct: got {}, expected {expected_micros}",
+            ts_array.value(0)
+        );
+    }
+
+    #[tokio::test]
+    async fn test_read_int96_timestamps_in_list() {
+        use arrow_array::{ListArray, TimestampMicrosecondArray};
+        use parquet::basic::{Repetition, Type as PhysicalType};
+        use parquet::data_type::Int96Type;
+        use parquet::file::writer::SerializedFileWriter;
+        use parquet::schema::types::Type as SchemaType;
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/list_int96.parquet");
+
+        // 3-level LIST encoding:
+        //   optional group timestamps (LIST) {
+        //     repeated group list {
+        //       optional int96 element;
+        //     }
+        //   }
+        let element_type = SchemaType::primitive_type_builder("element", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(2))
+            .build()
+            .unwrap();
+
+        let list_group = SchemaType::group_type_builder("list")
+            .with_repetition(Repetition::REPEATED)
+            .with_fields(vec![Arc::new(element_type)])
+            .build()
+            .unwrap();
+
+        let list_type = SchemaType::group_type_builder("timestamps")
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(1))
+            .with_logical_type(Some(parquet::basic::LogicalType::List))
+            .with_fields(vec![Arc::new(list_group)])
+            .build()
+            .unwrap();
+
+        let parquet_schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![Arc::new(list_type)])
+            .build()
+            .unwrap();
+
+        let (int96_val, expected_micros) = make_int96_test_value();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
+
+        // Write a single row with a list containing one INT96 element.
+        // def=3: list present (1) + repeated group (2) + element present (3)
+        // rep=0: start of a new list
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&[int96_val], Some(&[3]), Some(&[0]))
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(
+                        1,
+                        "timestamps",
+                        Type::List(crate::spec::ListType {
+                            element_field: NestedField::optional(
+                                2,
+                                "element",
+                                Type::Primitive(PrimitiveType::Timestamp),
+                            )
+                            .into(),
+                        }),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
+
+        assert_eq!(batches.len(), 1);
+        let list_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<ListArray>()
+            .expect("Expected ListArray");
+        let ts_array = list_array
+            .values()
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray inside list");
+
+        assert_eq!(
+            ts_array.value(0),
+            expected_micros,
+            "INT96 in list: got {}, expected {expected_micros}",
+            ts_array.value(0)
+        );
+    }
+
+    #[tokio::test]
+    async fn test_read_int96_timestamps_in_map() {
+        use arrow_array::{MapArray, TimestampMicrosecondArray};
+        use parquet::basic::{Repetition, Type as PhysicalType};
+        use parquet::data_type::{ByteArrayType, Int96Type};
+        use parquet::file::writer::SerializedFileWriter;
+        use parquet::schema::types::Type as SchemaType;
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/map_int96.parquet");
+
+        // MAP encoding:
+        //   optional group ts_map (MAP) {
+        //     repeated group key_value {
+        //       required binary key (UTF8);
+        //       optional int96 value;
+        //     }
+        //   }
+        let key_type = SchemaType::primitive_type_builder("key", PhysicalType::BYTE_ARRAY)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(Some(parquet::basic::LogicalType::String))
+            .with_id(Some(2))
+            .build()
+            .unwrap();
+
+        let value_type = SchemaType::primitive_type_builder("value", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(3))
+            .build()
+            .unwrap();
+
+        let key_value_group = SchemaType::group_type_builder("key_value")
+            .with_repetition(Repetition::REPEATED)
+            .with_fields(vec![Arc::new(key_type), Arc::new(value_type)])
+            .build()
+            .unwrap();
+
+        let map_type = SchemaType::group_type_builder("ts_map")
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(1))
+            .with_logical_type(Some(parquet::basic::LogicalType::Map))
+            .with_fields(vec![Arc::new(key_value_group)])
+            .build()
+            .unwrap();
+
+        let parquet_schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![Arc::new(map_type)])
+            .build()
+            .unwrap();
+
+        let (int96_val, expected_micros) = make_int96_test_value();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
+
+        // Write a single row with a map containing one key-value pair.
+        // rep=0 for both columns: start of a new map.
+        // key def=2: map present (1) + key_value entry present (2), key is REQUIRED.
+        // value def=3: map present (1) + key_value entry present (2) + value present (3).
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<ByteArrayType>()
+                .write_batch(
+                    &[parquet::data_type::ByteArray::from("event_time")],
+                    Some(&[2]),
+                    Some(&[0]),
+                )
+                .unwrap();
+            col.close().unwrap();
+        }
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&[int96_val], Some(&[3]), Some(&[0]))
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(
+                        1,
+                        "ts_map",
+                        Type::Map(crate::spec::MapType {
+                            key_field: NestedField::required(
+                                2,
+                                "key",
+                                Type::Primitive(PrimitiveType::String),
+                            )
+                            .into(),
+                            value_field: NestedField::optional(
+                                3,
+                                "value",
+                                Type::Primitive(PrimitiveType::Timestamp),
+                            )
+                            .into(),
+                        }),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
+
+        assert_eq!(batches.len(), 1);
+        let map_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<MapArray>()
+            .expect("Expected MapArray");
+        let ts_array = map_array
+            .values()
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray as map values");
+
+        assert_eq!(
+            ts_array.value(0),
+            expected_micros,
+            "INT96 in map: got {}, expected {expected_micros}",
+            ts_array.value(0)
+        );
+    }
+}
diff --git a/crates/iceberg/src/arrow/reader/positional_deletes.rs b/crates/iceberg/src/arrow/reader/positional_deletes.rs
new file mode 100644
index 0000000000..b2993572c5
--- /dev/null
+++ b/crates/iceberg/src/arrow/reader/positional_deletes.rs
@@ -0,0 +1,934 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Positional delete handling for `ArrowReader`: converting a `DeleteVector`
+//! into a Parquet `RowSelection` that skips the deleted rows, while respecting
+//! any row-group selection made by the predicate evaluator.
+
+use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
+use parquet::file::metadata::RowGroupMetaData;
+
+use super::ArrowReader;
+use crate::delete_vector::DeleteVector;
+use crate::error::Result;
+
+impl ArrowReader {
+    /// computes a `RowSelection` from positional delete indices.
+    ///
+    /// Using the Parquet page index, we build a `RowSelection` that rejects rows that are indicated
+    /// as having been deleted by a positional delete, taking into account any row groups that have
+    /// been skipped entirely by the filter predicate
+    pub(super) fn build_deletes_row_selection(
+        row_group_metadata_list: &[RowGroupMetaData],
+        selected_row_groups: &Option<Vec<usize>>,
+        positional_deletes: &DeleteVector,
+    ) -> Result<RowSelection> {
+        let mut results: Vec<RowSelector> = Vec::new();
+        let mut selected_row_groups_idx = 0;
+        let mut current_row_group_base_idx: u64 = 0;
+        let mut delete_vector_iter = positional_deletes.iter();
+        let mut next_deleted_row_idx_opt = delete_vector_iter.next();
+
+        for (idx, row_group_metadata) in row_group_metadata_list.iter().enumerate() {
+            let row_group_num_rows = row_group_metadata.num_rows() as u64;
+            let next_row_group_base_idx = current_row_group_base_idx + row_group_num_rows;
+
+            // if row group selection is enabled,
+            if let Some(selected_row_groups) = selected_row_groups {
+                // if we've consumed all the selected row groups, we're done
+                if selected_row_groups_idx == selected_row_groups.len() {
+                    break;
+                }
+
+                if idx == selected_row_groups[selected_row_groups_idx] {
+                    // we're in a selected row group. Increment selected_row_groups_idx
+                    // so that next time around the for loop we're looking for the next
+                    // selected row group
+                    selected_row_groups_idx += 1;
+                } else {
+                    // Advance iterator past all deletes in the skipped row group.
+                    // advance_to() positions the iterator to the first delete >= next_row_group_base_idx.
+                    // However, if our cached next_deleted_row_idx_opt is in the skipped range,
+                    // we need to call next() to update the cache with the newly positioned value.
+                    delete_vector_iter.advance_to(next_row_group_base_idx);
+                    // Only update the cache if the cached value is stale (in the skipped range)
+                    if let Some(cached_idx) = next_deleted_row_idx_opt
+                        && cached_idx < next_row_group_base_idx
+                    {
+                        next_deleted_row_idx_opt = delete_vector_iter.next();
+                    }
+
+                    // still increment the current page base index but then skip to the next row group
+                    // in the file
+                    current_row_group_base_idx += row_group_num_rows;
+                    continue;
+                }
+            }
+
+            let mut next_deleted_row_idx = match next_deleted_row_idx_opt {
+                Some(next_deleted_row_idx) => {
+                    // if the index of the next deleted row is beyond this row group, add a selection for
+                    // the remainder of this row group and skip to the next row group
+                    if next_deleted_row_idx >= next_row_group_base_idx {
+                        results.push(RowSelector::select(row_group_num_rows as usize));
+                        current_row_group_base_idx += row_group_num_rows;
+                        continue;
+                    }
+
+                    next_deleted_row_idx
+                }
+
+                // If there are no more pos deletes, add a selector for the entirety of this row group.
+                _ => {
+                    results.push(RowSelector::select(row_group_num_rows as usize));
+                    current_row_group_base_idx += row_group_num_rows;
+                    continue;
+                }
+            };
+
+            let mut current_idx = current_row_group_base_idx;
+            'chunks: while next_deleted_row_idx < next_row_group_base_idx {
+                // `select` all rows that precede the next delete index
+                if current_idx < next_deleted_row_idx {
+                    let run_length = next_deleted_row_idx - current_idx;
+                    results.push(RowSelector::select(run_length as usize));
+                    current_idx += run_length;
+                }
+
+                // `skip` all consecutive deleted rows in the current row group
+                let mut run_length = 0;
+                while next_deleted_row_idx == current_idx
+                    && next_deleted_row_idx < next_row_group_base_idx
+                {
+                    run_length += 1;
+                    current_idx += 1;
+
+                    next_deleted_row_idx_opt = delete_vector_iter.next();
+                    next_deleted_row_idx = match next_deleted_row_idx_opt {
+                        Some(next_deleted_row_idx) => next_deleted_row_idx,
+                        _ => {
+                            // We've processed the final positional delete.
+                            // Conclude the skip and then break so that we select the remaining
+                            // rows in the row group and move on to the next row group
+                            results.push(RowSelector::skip(run_length));
+                            break 'chunks;
+                        }
+                    };
+                }
+                if run_length > 0 {
+                    results.push(RowSelector::skip(run_length));
+                }
+            }
+
+            if current_idx < next_row_group_base_idx {
+                results.push(RowSelector::select(
+                    (next_row_group_base_idx - current_idx) as usize,
+                ));
+            }
+
+            current_row_group_base_idx += row_group_num_rows;
+        }
+
+        Ok(results.into())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::fs::File;
+    use std::sync::Arc;
+
+    use arrow_array::cast::AsArray;
+    use arrow_array::{RecordBatch, StringArray};
+    use arrow_schema::{DataType, Field, Schema as ArrowSchema};
+    use futures::TryStreamExt;
+    use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
+    use parquet::arrow::{ArrowWriter, PARQUET_FIELD_ID_META_KEY};
+    use parquet::basic::Compression;
+    use parquet::file::metadata::{ColumnChunkMetaData, RowGroupMetaData};
+    use parquet::file::properties::WriterProperties;
+    use parquet::schema::types::{SchemaDescPtr, SchemaDescriptor};
+    use roaring::RoaringTreemap;
+    use tempfile::TempDir;
+
+    use crate::arrow::{ArrowReader, ArrowReaderBuilder};
+    use crate::delete_vector::DeleteVector;
+    use crate::io::FileIO;
+    use crate::scan::{FileScanTask, FileScanTaskDeleteFile, FileScanTaskStream};
+    use crate::spec::{DataContentType, DataFileFormat, NestedField, PrimitiveType, Schema, Type};
+
+    fn build_test_row_group_meta(
+        schema_descr: SchemaDescPtr,
+        columns: Vec<ColumnChunkMetaData>,
+        num_rows: i64,
+        ordinal: i16,
+    ) -> RowGroupMetaData {
+        RowGroupMetaData::builder(schema_descr.clone())
+            .set_num_rows(num_rows)
+            .set_total_byte_size(2000)
+            .set_column_metadata(columns)
+            .set_ordinal(ordinal)
+            .build()
+            .unwrap()
+    }
+
+    fn get_test_schema_descr() -> SchemaDescPtr {
+        use parquet::schema::types::Type as SchemaType;
+
+        let schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![
+                Arc::new(
+                    SchemaType::primitive_type_builder("a", parquet::basic::Type::INT32)
+                        .build()
+                        .unwrap(),
+                ),
+                Arc::new(
+                    SchemaType::primitive_type_builder("b", parquet::basic::Type::INT32)
+                        .build()
+                        .unwrap(),
+                ),
+            ])
+            .build()
+            .unwrap();
+
+        Arc::new(SchemaDescriptor::new(Arc::new(schema)))
+    }
+
+    #[test]
+    fn test_build_deletes_row_selection() {
+        let schema_descr = get_test_schema_descr();
+
+        let mut columns = vec![];
+        for ptr in schema_descr.columns() {
+            let column = ColumnChunkMetaData::builder(ptr.clone()).build().unwrap();
+            columns.push(column);
+        }
+
+        let row_groups_metadata = vec![
+            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 1000, 0),
+            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 500, 1),
+            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 500, 2),
+            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 1000, 3),
+            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 500, 4),
+        ];
+
+        let selected_row_groups = Some(vec![1, 3]);
+
+        /* cases to cover:
+           * {skip|select} {first|intermediate|last} {one row|multiple rows} in
+             {first|intermediate|last} {skipped|selected} row group
+           * row group selection disabled
+        */
+
+        let positional_deletes = RoaringTreemap::from_iter(&[
+            1, // in skipped rg 0, should be ignored
+            3, // run of three consecutive items in skipped rg0
+            4, 5, 998, // two consecutive items at end of skipped rg0
+            999, 1000, // solitary row at start of selected rg1 (1, 9)
+            1010, // run of 3 rows in selected rg1
+            1011, 1012, // (3, 485)
+            1498, // run of two items at end of selected rg1
+            1499, 1500, // run of two items at start of skipped rg2
+            1501, 1600, // should ignore, in skipped rg2
+            1999, // single row at end of skipped rg2
+            2000, // run of two items at start of selected rg3
+            2001, // (4, 98)
+            2100, // single row in selected row group 3 (1, 99)
+            2200, // run of 3 consecutive rows in selected row group 3
+            2201, 2202, // (3, 796)
+            2999, // single item at end of selected rg3 (1)
+            3000, // single item at start of skipped rg4
+        ]);
+
+        let positional_deletes = DeleteVector::new(positional_deletes);
+
+        // using selected row groups 1 and 3
+        let result = ArrowReader::build_deletes_row_selection(
+            &row_groups_metadata,
+            &selected_row_groups,
+            &positional_deletes,
+        )
+        .unwrap();
+
+        let expected = RowSelection::from(vec![
+            RowSelector::skip(1),
+            RowSelector::select(9),
+            RowSelector::skip(3),
+            RowSelector::select(485),
+            RowSelector::skip(4),
+            RowSelector::select(98),
+            RowSelector::skip(1),
+            RowSelector::select(99),
+            RowSelector::skip(3),
+            RowSelector::select(796),
+            RowSelector::skip(1),
+        ]);
+
+        assert_eq!(result, expected);
+
+        // selecting all row groups
+        let result = ArrowReader::build_deletes_row_selection(
+            &row_groups_metadata,
+            &None,
+            &positional_deletes,
+        )
+        .unwrap();
+
+        let expected = RowSelection::from(vec![
+            RowSelector::select(1),
+            RowSelector::skip(1),
+            RowSelector::select(1),
+            RowSelector::skip(3),
+            RowSelector::select(992),
+            RowSelector::skip(3),
+            RowSelector::select(9),
+            RowSelector::skip(3),
+            RowSelector::select(485),
+            RowSelector::skip(4),
+            RowSelector::select(98),
+            RowSelector::skip(1),
+            RowSelector::select(398),
+            RowSelector::skip(3),
+            RowSelector::select(98),
+            RowSelector::skip(1),
+            RowSelector::select(99),
+            RowSelector::skip(3),
+            RowSelector::select(796),
+            RowSelector::skip(2),
+            RowSelector::select(499),
+        ]);
+
+        assert_eq!(result, expected);
+    }
+
+    /// Test for bug where position deletes in later row groups are not applied correctly.
+    ///
+    /// When a file has multiple row groups and a position delete targets a row in a later
+    /// row group, the `build_deletes_row_selection` function had a bug where it would
+    /// fail to increment `current_row_group_base_idx` when skipping row groups.
+    ///
+    /// This test creates:
+    /// - A data file with 200 rows split into 2 row groups (0-99, 100-199)
+    /// - A position delete file that deletes row 199 (last row in second row group)
+    ///
+    /// Expected behavior: Should return 199 rows (with id=200 deleted)
+    /// Bug behavior: Returns 200 rows (delete is not applied)
+    ///
+    /// This bug was discovered while running Apache Spark + Apache Iceberg integration tests
+    /// through DataFusion Comet. The following Iceberg Java tests failed due to this bug:
+    /// - `org.apache.iceberg.spark.extensions.TestMergeOnReadDelete::testDeleteWithMultipleRowGroupsParquet`
+    /// - `org.apache.iceberg.spark.extensions.TestMergeOnReadUpdate::testUpdateWithMultipleRowGroupsParquet`
+    #[tokio::test]
+    async fn test_position_delete_across_multiple_row_groups() {
+        use arrow_array::{Int32Array, Int64Array};
+        use parquet::file::reader::{FileReader, SerializedFileReader};
+
+        // Field IDs for positional delete schema
+        const FIELD_ID_POSITIONAL_DELETE_FILE_PATH: u64 = 2147483546;
+        const FIELD_ID_POSITIONAL_DELETE_POS: u64 = 2147483545;
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+
+        // Create table schema with a single 'id' column
+        let table_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+        ]));
+
+        // Step 1: Create data file with 200 rows in 2 row groups
+        // Row group 0: rows 0-99 (ids 1-100)
+        // Row group 1: rows 100-199 (ids 101-200)
+        let data_file_path = format!("{table_location}/data.parquet");
+
+        let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
+            Int32Array::from_iter_values(1..=100),
+        )])
+        .unwrap();
+
+        let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
+            Int32Array::from_iter_values(101..=200),
+        )])
+        .unwrap();
+
+        // Force each batch into its own row group
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .set_max_row_group_row_count(Some(100))
+            .build();
+
+        let file = File::create(&data_file_path).unwrap();
+        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
+        writer.write(&batch1).expect("Writing batch 1");
+        writer.write(&batch2).expect("Writing batch 2");
+        writer.close().unwrap();
+
+        // Verify we created 2 row groups
+        let verify_file = File::open(&data_file_path).unwrap();
+        let verify_reader = SerializedFileReader::new(verify_file).unwrap();
+        assert_eq!(
+            verify_reader.metadata().num_row_groups(),
+            2,
+            "Should have 2 row groups"
+        );
+
+        // Step 2: Create position delete file that deletes row 199 (id=200, last row in row group 1)
+        let delete_file_path = format!("{table_location}/deletes.parquet");
+
+        let delete_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                FIELD_ID_POSITIONAL_DELETE_FILE_PATH.to_string(),
+            )])),
+            Field::new("pos", DataType::Int64, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                FIELD_ID_POSITIONAL_DELETE_POS.to_string(),
+            )])),
+        ]));
+
+        // Delete row at position 199 (0-indexed, so it's the last row: id=200)
+        let delete_batch = RecordBatch::try_new(delete_schema.clone(), vec![
+            Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])),
+            Arc::new(Int64Array::from_iter_values(vec![199i64])),
+        ])
+        .unwrap();
+
+        let delete_props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let delete_file = File::create(&delete_file_path).unwrap();
+        let mut delete_writer =
+            ArrowWriter::try_new(delete_file, delete_schema, Some(delete_props)).unwrap();
+        delete_writer.write(&delete_batch).unwrap();
+        delete_writer.close().unwrap();
+
+        // Step 3: Read the data file with the delete applied
+        let file_io = FileIO::new_with_fs();
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let task = FileScanTask {
+            file_size_in_bytes: std::fs::metadata(&data_file_path).unwrap().len(),
+            start: 0,
+            length: 0,
+            record_count: Some(200),
+            data_file_path: data_file_path.clone(),
+            data_file_format: DataFileFormat::Parquet,
+            schema: table_schema.clone(),
+            project_field_ids: vec![1],
+            predicate: None,
+            deletes: vec![FileScanTaskDeleteFile {
+                file_size_in_bytes: std::fs::metadata(&delete_file_path).unwrap().len(),
+                file_path: delete_file_path,
+                file_type: DataContentType::PositionDeletes,
+                partition_spec_id: 0,
+                equality_ids: None,
+            }],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .stream()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        // Step 4: Verify we got 199 rows (not 200)
+        let total_rows: usize = result.iter().map(|b| b.num_rows()).sum();
+
+        println!("Total rows read: {total_rows}");
+        println!("Expected: 199 rows (deleted row 199 which had id=200)");
+
+        // This assertion will FAIL before the fix and PASS after the fix
+        assert_eq!(
+            total_rows, 199,
+            "Expected 199 rows after deleting row 199, but got {total_rows} rows. \
+             The bug causes position deletes in later row groups to be ignored."
+        );
+
+        // Verify the deleted row (id=200) is not present
+        let all_ids: Vec<i32> = result
+            .iter()
+            .flat_map(|batch| {
+                batch
+                    .column(0)
+                    .as_primitive::<arrow_array::types::Int32Type>()
+                    .values()
+                    .iter()
+                    .copied()
+            })
+            .collect();
+
+        assert!(
+            !all_ids.contains(&200),
+            "Row with id=200 should be deleted but was found in results"
+        );
+
+        // Verify we have all other ids (1-199)
+        let expected_ids: Vec<i32> = (1..=199).collect();
+        assert_eq!(
+            all_ids, expected_ids,
+            "Should have ids 1-199 but got different values"
+        );
+    }
+
+    /// Test for bug where position deletes are lost when skipping unselected row groups.
+    ///
+    /// This is a variant of `test_position_delete_across_multiple_row_groups` that exercises
+    /// the row group selection code path (`selected_row_groups: Some([...])`).
+    ///
+    /// When a file has multiple row groups and only some are selected for reading,
+    /// the `build_deletes_row_selection` function must correctly skip over deletes in
+    /// unselected row groups WITHOUT consuming deletes that belong to selected row groups.
+    ///
+    /// This test creates:
+    /// - A data file with 200 rows split into 2 row groups (0-99, 100-199)
+    /// - A position delete file that deletes row 199 (last row in second row group)
+    /// - Row group selection that reads ONLY row group 1 (rows 100-199)
+    ///
+    /// Expected behavior: Should return 99 rows (with row 199 deleted)
+    /// Bug behavior: Returns 100 rows (delete is lost when skipping row group 0)
+    ///
+    /// The bug occurs when processing row group 0 (unselected):
+    /// ```rust
+    /// delete_vector_iter.advance_to(next_row_group_base_idx); // Position at first delete >= 100
+    /// next_deleted_row_idx_opt = delete_vector_iter.next(); // BUG: Consumes delete at 199!
+    /// ```
+    ///
+    /// The fix is to NOT call `next()` after `advance_to()` when skipping unselected row groups,
+    /// because `advance_to()` already positions the iterator correctly without consuming elements.
+    #[tokio::test]
+    async fn test_position_delete_with_row_group_selection() {
+        use arrow_array::{Int32Array, Int64Array};
+        use parquet::file::reader::{FileReader, SerializedFileReader};
+
+        // Field IDs for positional delete schema
+        const FIELD_ID_POSITIONAL_DELETE_FILE_PATH: u64 = 2147483546;
+        const FIELD_ID_POSITIONAL_DELETE_POS: u64 = 2147483545;
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+
+        // Create table schema with a single 'id' column
+        let table_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+        ]));
+
+        // Step 1: Create data file with 200 rows in 2 row groups
+        // Row group 0: rows 0-99 (ids 1-100)
+        // Row group 1: rows 100-199 (ids 101-200)
+        let data_file_path = format!("{table_location}/data.parquet");
+
+        let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
+            Int32Array::from_iter_values(1..=100),
+        )])
+        .unwrap();
+
+        let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
+            Int32Array::from_iter_values(101..=200),
+        )])
+        .unwrap();
+
+        // Force each batch into its own row group
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .set_max_row_group_row_count(Some(100))
+            .build();
+
+        let file = File::create(&data_file_path).unwrap();
+        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
+        writer.write(&batch1).expect("Writing batch 1");
+        writer.write(&batch2).expect("Writing batch 2");
+        writer.close().unwrap();
+
+        // Verify we created 2 row groups
+        let verify_file = File::open(&data_file_path).unwrap();
+        let verify_reader = SerializedFileReader::new(verify_file).unwrap();
+        assert_eq!(
+            verify_reader.metadata().num_row_groups(),
+            2,
+            "Should have 2 row groups"
+        );
+
+        // Step 2: Create position delete file that deletes row 199 (id=200, last row in row group 1)
+        let delete_file_path = format!("{table_location}/deletes.parquet");
+
+        let delete_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                FIELD_ID_POSITIONAL_DELETE_FILE_PATH.to_string(),
+            )])),
+            Field::new("pos", DataType::Int64, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                FIELD_ID_POSITIONAL_DELETE_POS.to_string(),
+            )])),
+        ]));
+
+        // Delete row at position 199 (0-indexed, so it's the last row: id=200)
+        let delete_batch = RecordBatch::try_new(delete_schema.clone(), vec![
+            Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])),
+            Arc::new(Int64Array::from_iter_values(vec![199i64])),
+        ])
+        .unwrap();
+
+        let delete_props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let delete_file = File::create(&delete_file_path).unwrap();
+        let mut delete_writer =
+            ArrowWriter::try_new(delete_file, delete_schema, Some(delete_props)).unwrap();
+        delete_writer.write(&delete_batch).unwrap();
+        delete_writer.close().unwrap();
+
+        // Step 3: Get byte ranges to read ONLY row group 1 (rows 100-199)
+        // This exercises the row group selection code path where row group 0 is skipped
+        let metadata_file = File::open(&data_file_path).unwrap();
+        let metadata_reader = SerializedFileReader::new(metadata_file).unwrap();
+        let metadata = metadata_reader.metadata();
+
+        let row_group_0 = metadata.row_group(0);
+        let row_group_1 = metadata.row_group(1);
+
+        let rg0_start = 4u64; // Parquet files start with 4-byte magic "PAR1"
+        let rg1_start = rg0_start + row_group_0.compressed_size() as u64;
+        let rg1_length = row_group_1.compressed_size() as u64;
+
+        println!(
+            "Row group 0: starts at byte {}, {} bytes compressed",
+            rg0_start,
+            row_group_0.compressed_size()
+        );
+        println!(
+            "Row group 1: starts at byte {}, {} bytes compressed",
+            rg1_start,
+            row_group_1.compressed_size()
+        );
+
+        let file_io = FileIO::new_with_fs();
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        // Create FileScanTask that reads ONLY row group 1 via byte range filtering
+        let task = FileScanTask {
+            file_size_in_bytes: std::fs::metadata(&data_file_path).unwrap().len(),
+            start: rg1_start,
+            length: rg1_length,
+            record_count: Some(100), // Row group 1 has 100 rows
+            data_file_path: data_file_path.clone(),
+            data_file_format: DataFileFormat::Parquet,
+            schema: table_schema.clone(),
+            project_field_ids: vec![1],
+            predicate: None,
+            deletes: vec![FileScanTaskDeleteFile {
+                file_size_in_bytes: std::fs::metadata(&delete_file_path).unwrap().len(),
+                file_path: delete_file_path,
+                file_type: DataContentType::PositionDeletes,
+                partition_spec_id: 0,
+                equality_ids: None,
+            }],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .stream()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        // Step 4: Verify we got 99 rows (not 100)
+        // Row group 1 has 100 rows (ids 101-200), minus 1 delete (id=200) = 99 rows
+        let total_rows: usize = result.iter().map(|b| b.num_rows()).sum();
+
+        println!("Total rows read from row group 1: {total_rows}");
+        println!("Expected: 99 rows (row group 1 has 100 rows, 1 delete at position 199)");
+
+        // This assertion will FAIL before the fix and PASS after the fix
+        assert_eq!(
+            total_rows, 99,
+            "Expected 99 rows from row group 1 after deleting position 199, but got {total_rows} rows. \
+             The bug causes position deletes to be lost when advance_to() is followed by next() \
+             when skipping unselected row groups."
+        );
+
+        // Verify the deleted row (id=200) is not present
+        let all_ids: Vec<i32> = result
+            .iter()
+            .flat_map(|batch| {
+                batch
+                    .column(0)
+                    .as_primitive::<arrow_array::types::Int32Type>()
+                    .values()
+                    .iter()
+                    .copied()
+            })
+            .collect();
+
+        assert!(
+            !all_ids.contains(&200),
+            "Row with id=200 should be deleted but was found in results"
+        );
+
+        // Verify we have ids 101-199 (not 101-200)
+        let expected_ids: Vec<i32> = (101..=199).collect();
+        assert_eq!(
+            all_ids, expected_ids,
+            "Should have ids 101-199 but got different values"
+        );
+    }
+
+    /// Test for bug where stale cached delete causes infinite loop when skipping row groups.
+    ///
+    /// This test exposes the inverse scenario of `test_position_delete_with_row_group_selection`:
+    /// - Position delete targets a row in the SKIPPED row group (not the selected one)
+    /// - After calling advance_to(), the cached delete index is stale
+    /// - Without updating the cache, the code enters an infinite loop
+    ///
+    /// This test creates:
+    /// - A data file with 200 rows split into 2 row groups (0-99, 100-199)
+    /// - A position delete file that deletes row 0 (first row in SKIPPED row group 0)
+    /// - Row group selection that reads ONLY row group 1 (rows 100-199)
+    ///
+    /// The bug occurs when skipping row group 0:
+    /// ```rust
+    /// let mut next_deleted_row_idx_opt = delete_vector_iter.next(); // Some(0)
+    /// // ... skip to row group 1 ...
+    /// delete_vector_iter.advance_to(100); // Iterator advances past delete at 0
+    /// // BUG: next_deleted_row_idx_opt is still Some(0) - STALE!
+    /// // When processing row group 1:
+    /// //   current_idx = 100, next_deleted_row_idx = 0, next_row_group_base_idx = 200
+    /// //   Loop condition: 0 < 200 (true)
+    /// //   But: current_idx (100) > next_deleted_row_idx (0)
+    /// //   And: current_idx (100) != next_deleted_row_idx (0)
+    /// //   Neither branch executes -> INFINITE LOOP!
+    /// ```
+    ///
+    /// Expected behavior: Should return 100 rows (delete at 0 doesn't affect row group 1)
+    /// Bug behavior: Infinite loop in build_deletes_row_selection
+    #[tokio::test]
+    async fn test_position_delete_in_skipped_row_group() {
+        use arrow_array::{Int32Array, Int64Array};
+        use parquet::file::reader::{FileReader, SerializedFileReader};
+
+        // Field IDs for positional delete schema
+        const FIELD_ID_POSITIONAL_DELETE_FILE_PATH: u64 = 2147483546;
+        const FIELD_ID_POSITIONAL_DELETE_POS: u64 = 2147483545;
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+
+        // Create table schema with a single 'id' column
+        let table_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+        ]));
+
+        // Step 1: Create data file with 200 rows in 2 row groups
+        // Row group 0: rows 0-99 (ids 1-100)
+        // Row group 1: rows 100-199 (ids 101-200)
+        let data_file_path = format!("{table_location}/data.parquet");
+
+        let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
+            Int32Array::from_iter_values(1..=100),
+        )])
+        .unwrap();
+
+        let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
+            Int32Array::from_iter_values(101..=200),
+        )])
+        .unwrap();
+
+        // Force each batch into its own row group
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .set_max_row_group_row_count(Some(100))
+            .build();
+
+        let file = File::create(&data_file_path).unwrap();
+        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
+        writer.write(&batch1).expect("Writing batch 1");
+        writer.write(&batch2).expect("Writing batch 2");
+        writer.close().unwrap();
+
+        // Verify we created 2 row groups
+        let verify_file = File::open(&data_file_path).unwrap();
+        let verify_reader = SerializedFileReader::new(verify_file).unwrap();
+        assert_eq!(
+            verify_reader.metadata().num_row_groups(),
+            2,
+            "Should have 2 row groups"
+        );
+
+        // Step 2: Create position delete file that deletes row 0 (id=1, first row in row group 0)
+        let delete_file_path = format!("{table_location}/deletes.parquet");
+
+        let delete_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                FIELD_ID_POSITIONAL_DELETE_FILE_PATH.to_string(),
+            )])),
+            Field::new("pos", DataType::Int64, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                FIELD_ID_POSITIONAL_DELETE_POS.to_string(),
+            )])),
+        ]));
+
+        // Delete row at position 0 (0-indexed, so it's the first row: id=1)
+        let delete_batch = RecordBatch::try_new(delete_schema.clone(), vec![
+            Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])),
+            Arc::new(Int64Array::from_iter_values(vec![0i64])),
+        ])
+        .unwrap();
+
+        let delete_props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let delete_file = File::create(&delete_file_path).unwrap();
+        let mut delete_writer =
+            ArrowWriter::try_new(delete_file, delete_schema, Some(delete_props)).unwrap();
+        delete_writer.write(&delete_batch).unwrap();
+        delete_writer.close().unwrap();
+
+        // Step 3: Get byte ranges to read ONLY row group 1 (rows 100-199)
+        // This exercises the row group selection code path where row group 0 is skipped
+        let metadata_file = File::open(&data_file_path).unwrap();
+        let metadata_reader = SerializedFileReader::new(metadata_file).unwrap();
+        let metadata = metadata_reader.metadata();
+
+        let row_group_0 = metadata.row_group(0);
+        let row_group_1 = metadata.row_group(1);
+
+        let rg0_start = 4u64; // Parquet files start with 4-byte magic "PAR1"
+        let rg1_start = rg0_start + row_group_0.compressed_size() as u64;
+        let rg1_length = row_group_1.compressed_size() as u64;
+
+        let file_io = FileIO::new_with_fs();
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        // Create FileScanTask that reads ONLY row group 1 via byte range filtering
+        let task = FileScanTask {
+            file_size_in_bytes: std::fs::metadata(&data_file_path).unwrap().len(),
+            start: rg1_start,
+            length: rg1_length,
+            record_count: Some(100), // Row group 1 has 100 rows
+            data_file_path: data_file_path.clone(),
+            data_file_format: DataFileFormat::Parquet,
+            schema: table_schema.clone(),
+            project_field_ids: vec![1],
+            predicate: None,
+            deletes: vec![FileScanTaskDeleteFile {
+                file_size_in_bytes: std::fs::metadata(&delete_file_path).unwrap().len(),
+                file_path: delete_file_path,
+                file_type: DataContentType::PositionDeletes,
+                partition_spec_id: 0,
+                equality_ids: None,
+            }],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .stream()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        // Step 4: Verify we got 100 rows (all of row group 1)
+        // The delete at position 0 is in row group 0, which is skipped, so it doesn't affect us
+        let total_rows: usize = result.iter().map(|b| b.num_rows()).sum();
+
+        assert_eq!(
+            total_rows, 100,
+            "Expected 100 rows from row group 1 (delete at position 0 is in skipped row group 0). \
+             If this hangs or fails, it indicates the cached delete index was not updated after advance_to()."
+        );
+
+        // Verify we have all ids from row group 1 (101-200)
+        let all_ids: Vec<i32> = result
+            .iter()
+            .flat_map(|batch| {
+                batch
+                    .column(0)
+                    .as_primitive::<arrow_array::types::Int32Type>()
+                    .values()
+                    .iter()
+                    .copied()
+            })
+            .collect();
+
+        let expected_ids: Vec<i32> = (101..=200).collect();
+        assert_eq!(
+            all_ids, expected_ids,
+            "Should have ids 101-200 (all of row group 1)"
+        );
+    }
+}
diff --git a/crates/iceberg/src/arrow/reader/predicate_visitor.rs b/crates/iceberg/src/arrow/reader/predicate_visitor.rs
new file mode 100644
index 0000000000..272de49390
--- /dev/null
+++ b/crates/iceberg/src/arrow/reader/predicate_visitor.rs
@@ -0,0 +1,820 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Visitors that translate Iceberg bound predicates into the pieces needed for
+//! Arrow-level evaluation: collecting referenced field IDs and producing
+//! per-record-batch predicate closures.
+
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+
+use arrow_arith::boolean::{and, and_kleene, is_not_null, is_null, not, or, or_kleene};
+use arrow_array::cast::AsArray;
+use arrow_array::types::{Float32Type, Float64Type};
+use arrow_array::{Array, ArrayRef, BooleanArray, Datum as ArrowDatum, RecordBatch, Scalar};
+use arrow_buffer::BooleanBuffer;
+use arrow_cast::cast::cast;
+use arrow_ord::cmp::{eq, gt, gt_eq, lt, lt_eq, neq};
+use arrow_schema::{ArrowError, DataType};
+use arrow_string::like::starts_with;
+use fnv::FnvHashSet;
+use parquet::schema::types::SchemaDescriptor;
+
+use crate::arrow::get_arrow_datum;
+use crate::error::Result;
+use crate::expr::visitors::bound_predicate_visitor::BoundPredicateVisitor;
+use crate::expr::{BoundPredicate, BoundReference};
+use crate::spec::Datum;
+use crate::{Error, ErrorKind};
+
+/// A visitor to collect field ids from bound predicates.
+pub(super) struct CollectFieldIdVisitor {
+    pub(super) field_ids: HashSet<i32>,
+}
+
+impl CollectFieldIdVisitor {
+    pub(super) fn field_ids(self) -> HashSet<i32> {
+        self.field_ids
+    }
+}
+
+impl BoundPredicateVisitor for CollectFieldIdVisitor {
+    type T = ();
+
+    fn always_true(&mut self) -> Result<()> {
+        Ok(())
+    }
+
+    fn always_false(&mut self) -> Result<()> {
+        Ok(())
+    }
+
+    fn and(&mut self, _lhs: (), _rhs: ()) -> Result<()> {
+        Ok(())
+    }
+
+    fn or(&mut self, _lhs: (), _rhs: ()) -> Result<()> {
+        Ok(())
+    }
+
+    fn not(&mut self, _inner: ()) -> Result<()> {
+        Ok(())
+    }
+
+    fn is_null(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn not_null(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn is_nan(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn not_nan(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn less_than(
+        &mut self,
+        reference: &BoundReference,
+        _literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn less_than_or_eq(
+        &mut self,
+        reference: &BoundReference,
+        _literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn greater_than(
+        &mut self,
+        reference: &BoundReference,
+        _literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn greater_than_or_eq(
+        &mut self,
+        reference: &BoundReference,
+        _literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn eq(
+        &mut self,
+        reference: &BoundReference,
+        _literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn not_eq(
+        &mut self,
+        reference: &BoundReference,
+        _literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn starts_with(
+        &mut self,
+        reference: &BoundReference,
+        _literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn not_starts_with(
+        &mut self,
+        reference: &BoundReference,
+        _literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn r#in(
+        &mut self,
+        reference: &BoundReference,
+        _literals: &FnvHashSet<Datum>,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn not_in(
+        &mut self,
+        reference: &BoundReference,
+        _literals: &FnvHashSet<Datum>,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+}
+
+/// A visitor to convert Iceberg bound predicates to Arrow predicates.
+pub(super) struct PredicateConverter<'a> {
+    /// The Parquet schema descriptor.
+    pub(super) parquet_schema: &'a SchemaDescriptor,
+    /// The map between field id and leaf column index in Parquet schema.
+    pub(super) column_map: &'a HashMap<i32, usize>,
+    /// The required column indices in Parquet schema for the predicates.
+    pub(super) column_indices: &'a Vec<usize>,
+}
+
+impl PredicateConverter<'_> {
+    /// When visiting a bound reference, we return index of the leaf column in the
+    /// required column indices which is used to project the column in the record batch.
+    /// Return None if the field id is not found in the column map, which is possible
+    /// due to schema evolution.
+    fn bound_reference(&mut self, reference: &BoundReference) -> Result<Option<usize>> {
+        // The leaf column's index in Parquet schema.
+        if let Some(column_idx) = self.column_map.get(&reference.field().id) {
+            if self.parquet_schema.get_column_root(*column_idx).is_group() {
+                return Err(Error::new(
+                    ErrorKind::DataInvalid,
+                    format!(
+                        "Leaf column `{}` in predicates isn't a root column in Parquet schema.",
+                        reference.field().name
+                    ),
+                ));
+            }
+
+            // The leaf column's index in the required column indices.
+            let index = self
+                .column_indices
+                .iter()
+                .position(|&idx| idx == *column_idx)
+                .ok_or(Error::new(
+                    ErrorKind::DataInvalid,
+                    format!(
+                "Leaf column `{}` in predicates cannot be found in the required column indices.",
+                reference.field().name
+            ),
+                ))?;
+
+            Ok(Some(index))
+        } else {
+            Ok(None)
+        }
+    }
+
+    /// Build an Arrow predicate that always returns true.
+    fn build_always_true(&self) -> Result<Box<PredicateResult>> {
+        Ok(Box::new(|batch| {
+            Ok(BooleanArray::from(vec![true; batch.num_rows()]))
+        }))
+    }
+
+    /// Build an Arrow predicate that always returns false.
+    fn build_always_false(&self) -> Result<Box<PredicateResult>> {
+        Ok(Box::new(|batch| {
+            Ok(BooleanArray::from(vec![false; batch.num_rows()]))
+        }))
+    }
+}
+
+/// Gets the leaf column from the record batch for the required column index. Only
+/// supports top-level columns for now.
+fn project_column(
+    batch: &RecordBatch,
+    column_idx: usize,
+) -> std::result::Result<ArrayRef, ArrowError> {
+    let column = batch.column(column_idx);
+
+    match column.data_type() {
+        DataType::Struct(_) => Err(ArrowError::SchemaError(
+            "Does not support struct column yet.".to_string(),
+        )),
+        _ => Ok(column.clone()),
+    }
+}
+
+fn compute_is_nan(array: &ArrayRef) -> std::result::Result<BooleanArray, ArrowError> {
+    // Compute NaN over the contiguous values slice, then fold the null bitmap
+    // in with a single bitwise AND so that null slots become false.
+    let (is_nan, nulls) = match array.data_type() {
+        DataType::Float32 => {
+            let arr = array.as_primitive::<Float32Type>();
+            (
+                BooleanBuffer::from_iter(arr.values().iter().map(|v| v.is_nan())),
+                arr.nulls(),
+            )
+        }
+        DataType::Float64 => {
+            let arr = array.as_primitive::<Float64Type>();
+            (
+                BooleanBuffer::from_iter(arr.values().iter().map(|v| v.is_nan())),
+                arr.nulls(),
+            )
+        }
+        _ => unreachable!("is_nan is only valid for float types"),
+    };
+
+    let values = match nulls {
+        Some(nulls) => &is_nan & nulls.inner(),
+        None => is_nan,
+    };
+
+    Ok(BooleanArray::new(values, None))
+}
+
+pub(super) type PredicateResult =
+    dyn FnMut(RecordBatch) -> std::result::Result<BooleanArray, ArrowError> + Send + 'static;
+
+impl BoundPredicateVisitor for PredicateConverter<'_> {
+    type T = Box<PredicateResult>;
+
+    fn always_true(&mut self) -> Result<Box<PredicateResult>> {
+        self.build_always_true()
+    }
+
+    fn always_false(&mut self) -> Result<Box<PredicateResult>> {
+        self.build_always_false()
+    }
+
+    fn and(
+        &mut self,
+        mut lhs: Box<PredicateResult>,
+        mut rhs: Box<PredicateResult>,
+    ) -> Result<Box<PredicateResult>> {
+        Ok(Box::new(move |batch| {
+            let left = lhs(batch.clone())?;
+            let right = rhs(batch)?;
+            and_kleene(&left, &right)
+        }))
+    }
+
+    fn or(
+        &mut self,
+        mut lhs: Box<PredicateResult>,
+        mut rhs: Box<PredicateResult>,
+    ) -> Result<Box<PredicateResult>> {
+        Ok(Box::new(move |batch| {
+            let left = lhs(batch.clone())?;
+            let right = rhs(batch)?;
+            or_kleene(&left, &right)
+        }))
+    }
+
+    fn not(&mut self, mut inner: Box<PredicateResult>) -> Result<Box<PredicateResult>> {
+        Ok(Box::new(move |batch| {
+            let pred_ret = inner(batch)?;
+            not(&pred_ret)
+        }))
+    }
+
+    fn is_null(
+        &mut self,
+        reference: &BoundReference,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            Ok(Box::new(move |batch| {
+                let column = project_column(&batch, idx)?;
+                is_null(&column)
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_true()
+        }
+    }
+
+    fn not_null(
+        &mut self,
+        reference: &BoundReference,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            Ok(Box::new(move |batch| {
+                let column = project_column(&batch, idx)?;
+                is_not_null(&column)
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_false()
+        }
+    }
+
+    fn is_nan(
+        &mut self,
+        reference: &BoundReference,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            Ok(Box::new(move |batch| {
+                let column = project_column(&batch, idx)?;
+                compute_is_nan(&column)
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_false()
+        }
+    }
+
+    fn not_nan(
+        &mut self,
+        reference: &BoundReference,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            Ok(Box::new(move |batch| {
+                let column = project_column(&batch, idx)?;
+                let is_nan = compute_is_nan(&column)?;
+                not(&is_nan)
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_true()
+        }
+    }
+
+    fn less_than(
+        &mut self,
+        reference: &BoundReference,
+        literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literal = get_arrow_datum(literal)?;
+
+            Ok(Box::new(move |batch| {
+                let left = project_column(&batch, idx)?;
+                let literal = try_cast_literal(&literal, left.data_type())?;
+                lt(&left, literal.as_ref())
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_true()
+        }
+    }
+
+    fn less_than_or_eq(
+        &mut self,
+        reference: &BoundReference,
+        literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literal = get_arrow_datum(literal)?;
+
+            Ok(Box::new(move |batch| {
+                let left = project_column(&batch, idx)?;
+                let literal = try_cast_literal(&literal, left.data_type())?;
+                lt_eq(&left, literal.as_ref())
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_true()
+        }
+    }
+
+    fn greater_than(
+        &mut self,
+        reference: &BoundReference,
+        literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literal = get_arrow_datum(literal)?;
+
+            Ok(Box::new(move |batch| {
+                let left = project_column(&batch, idx)?;
+                let literal = try_cast_literal(&literal, left.data_type())?;
+                gt(&left, literal.as_ref())
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_false()
+        }
+    }
+
+    fn greater_than_or_eq(
+        &mut self,
+        reference: &BoundReference,
+        literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literal = get_arrow_datum(literal)?;
+
+            Ok(Box::new(move |batch| {
+                let left = project_column(&batch, idx)?;
+                let literal = try_cast_literal(&literal, left.data_type())?;
+                gt_eq(&left, literal.as_ref())
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_false()
+        }
+    }
+
+    fn eq(
+        &mut self,
+        reference: &BoundReference,
+        literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literal = get_arrow_datum(literal)?;
+
+            Ok(Box::new(move |batch| {
+                let left = project_column(&batch, idx)?;
+                let literal = try_cast_literal(&literal, left.data_type())?;
+                eq(&left, literal.as_ref())
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_false()
+        }
+    }
+
+    fn not_eq(
+        &mut self,
+        reference: &BoundReference,
+        literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literal = get_arrow_datum(literal)?;
+
+            Ok(Box::new(move |batch| {
+                let left = project_column(&batch, idx)?;
+                let literal = try_cast_literal(&literal, left.data_type())?;
+                neq(&left, literal.as_ref())
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_false()
+        }
+    }
+
+    fn starts_with(
+        &mut self,
+        reference: &BoundReference,
+        literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literal = get_arrow_datum(literal)?;
+
+            Ok(Box::new(move |batch| {
+                let left = project_column(&batch, idx)?;
+                let literal = try_cast_literal(&literal, left.data_type())?;
+                starts_with(&left, literal.as_ref())
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_false()
+        }
+    }
+
+    fn not_starts_with(
+        &mut self,
+        reference: &BoundReference,
+        literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literal = get_arrow_datum(literal)?;
+
+            Ok(Box::new(move |batch| {
+                let left = project_column(&batch, idx)?;
+                let literal = try_cast_literal(&literal, left.data_type())?;
+                // update here if arrow ever adds a native not_starts_with
+                not(&starts_with(&left, literal.as_ref())?)
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_true()
+        }
+    }
+
+    fn r#in(
+        &mut self,
+        reference: &BoundReference,
+        literals: &FnvHashSet<Datum>,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literals: Vec<_> = literals
+                .iter()
+                .map(|lit| get_arrow_datum(lit).unwrap())
+                .collect();
+
+            Ok(Box::new(move |batch| {
+                // update this if arrow ever adds a native is_in kernel
+                let left = project_column(&batch, idx)?;
+
+                let mut acc = BooleanArray::from(vec![false; batch.num_rows()]);
+                for literal in &literals {
+                    let literal = try_cast_literal(literal, left.data_type())?;
+                    acc = or(&acc, &eq(&left, literal.as_ref())?)?
+                }
+
+                Ok(acc)
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_false()
+        }
+    }
+
+    fn not_in(
+        &mut self,
+        reference: &BoundReference,
+        literals: &FnvHashSet<Datum>,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literals: Vec<_> = literals
+                .iter()
+                .map(|lit| get_arrow_datum(lit).unwrap())
+                .collect();
+
+            Ok(Box::new(move |batch| {
+                // update this if arrow ever adds a native not_in kernel
+                let left = project_column(&batch, idx)?;
+                let mut acc = BooleanArray::from(vec![true; batch.num_rows()]);
+                for literal in &literals {
+                    let literal = try_cast_literal(literal, left.data_type())?;
+                    acc = and(&acc, &neq(&left, literal.as_ref())?)?
+                }
+
+                Ok(acc)
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_true()
+        }
+    }
+}
+
+/// The Arrow type of an array that the Parquet reader reads may not match the exact Arrow type
+/// that Iceberg uses for literals - but they are effectively the same logical type,
+/// i.e. LargeUtf8 and Utf8 or Utf8View and Utf8 or Utf8View and LargeUtf8.
+///
+/// The Arrow compute kernels that we use must match the type exactly, so first cast the literal
+/// into the type of the batch we read from Parquet before sending it to the compute kernel.
+fn try_cast_literal(
+    literal: &Arc<dyn ArrowDatum + Send + Sync>,
+    column_type: &DataType,
+) -> std::result::Result<Arc<dyn ArrowDatum + Send + Sync>, ArrowError> {
+    let literal_array = literal.get().0;
+
+    // No cast required
+    if literal_array.data_type() == column_type {
+        return Ok(Arc::clone(literal));
+    }
+
+    let literal_array = cast(literal_array, column_type)?;
+    Ok(Arc::new(Scalar::new(literal_array)))
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::{HashMap, HashSet};
+    use std::sync::Arc;
+
+    use arrow_array::{Array, BooleanArray, RecordBatch};
+    use arrow_schema::{DataType, Field, Schema as ArrowSchema};
+    use parquet::schema::parser::parse_message_type;
+    use parquet::schema::types::SchemaDescriptor;
+
+    use super::{CollectFieldIdVisitor, PredicateConverter};
+    use crate::expr::visitors::bound_predicate_visitor::visit;
+    use crate::expr::{Bind, Predicate, Reference};
+    use crate::spec::{NestedField, PrimitiveType, Schema, SchemaRef, Type};
+
+    fn table_schema_simple() -> SchemaRef {
+        Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_identifier_field_ids(vec![2])
+                .with_fields(vec![
+                    NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
+                    NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
+                    NestedField::optional(4, "qux", Type::Primitive(PrimitiveType::Float)).into(),
+                ])
+                .build()
+                .unwrap(),
+        )
+    }
+
+    #[test]
+    fn test_collect_field_id() {
+        let schema = table_schema_simple();
+        let expr = Reference::new("qux").is_null();
+        let bound_expr = expr.bind(schema, true).unwrap();
+
+        let mut visitor = CollectFieldIdVisitor {
+            field_ids: HashSet::default(),
+        };
+        visit(&mut visitor, &bound_expr).unwrap();
+
+        let mut expected = HashSet::default();
+        expected.insert(4_i32);
+
+        assert_eq!(visitor.field_ids, expected);
+    }
+
+    #[test]
+    fn test_collect_field_id_with_and() {
+        let schema = table_schema_simple();
+        let expr = Reference::new("qux")
+            .is_null()
+            .and(Reference::new("baz").is_null());
+        let bound_expr = expr.bind(schema, true).unwrap();
+
+        let mut visitor = CollectFieldIdVisitor {
+            field_ids: HashSet::default(),
+        };
+        visit(&mut visitor, &bound_expr).unwrap();
+
+        let mut expected = HashSet::default();
+        expected.insert(4_i32);
+        expected.insert(3);
+
+        assert_eq!(visitor.field_ids, expected);
+    }
+
+    #[test]
+    fn test_collect_field_id_with_or() {
+        let schema = table_schema_simple();
+        let expr = Reference::new("qux")
+            .is_null()
+            .or(Reference::new("baz").is_null());
+        let bound_expr = expr.bind(schema, true).unwrap();
+
+        let mut visitor = CollectFieldIdVisitor {
+            field_ids: HashSet::default(),
+        };
+        visit(&mut visitor, &bound_expr).unwrap();
+
+        let mut expected = HashSet::default();
+        expected.insert(4_i32);
+        expected.insert(3);
+
+        assert_eq!(visitor.field_ids, expected);
+    }
+
+    fn apply_predicate_to_batch(
+        predicate: Predicate,
+        schema: SchemaRef,
+        batch: RecordBatch,
+    ) -> BooleanArray {
+        let bound = predicate.bind(schema, true).unwrap();
+
+        // Build a trivial Parquet schema with one float column at field id 4
+        let message_type = "
+            message schema {
+              optional float qux = 4;
+            }
+        ";
+        let parquet_type = parse_message_type(message_type).expect("parse schema");
+        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_type));
+
+        let column_map = HashMap::from([(4i32, 0usize)]);
+        let column_indices = vec![0usize];
+
+        let mut converter = PredicateConverter {
+            parquet_schema: &parquet_schema,
+            column_map: &column_map,
+            column_indices: &column_indices,
+        };
+
+        let mut predicate_fn = visit(&mut converter, &bound).unwrap();
+        predicate_fn(batch).unwrap()
+    }
+
+    #[test]
+    fn test_predicate_converter_nan() {
+        use arrow_array::Float32Array;
+
+        let schema = table_schema_simple();
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
+            "qux",
+            DataType::Float32,
+            true,
+        )]));
+        let values = vec![Some(1.0f32), Some(f32::NAN), None, Some(0.0f32)];
+
+        // is_nan: non-null-propagating per Java's implementation - NULL → false
+        let batch = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Float32Array::from(
+            values.clone(),
+        ))])
+        .unwrap();
+        let result =
+            apply_predicate_to_batch(Reference::new("qux").is_nan(), schema.clone(), batch);
+        assert_eq!(
+            [
+                result.value(0),
+                result.value(1),
+                result.value(2),
+                result.value(3)
+            ],
+            [false, true, false, false]
+        );
+        assert!(!result.is_null(2));
+
+        // not_nan: non-null-propagating per Java's implementation - NULL → true
+        let batch =
+            RecordBatch::try_new(arrow_schema, vec![Arc::new(Float32Array::from(values))]).unwrap();
+        let result = apply_predicate_to_batch(Reference::new("qux").is_not_nan(), schema, batch);
+        assert_eq!(
+            [
+                result.value(0),
+                result.value(1),
+                result.value(2),
+                result.value(3)
+            ],
+            [true, false, true, true]
+        );
+        assert!(!result.is_null(2));
+    }
+}
diff --git a/crates/iceberg/src/arrow/reader/projection.rs b/crates/iceberg/src/arrow/reader/projection.rs
new file mode 100644
index 0000000000..6d1a0f927d
--- /dev/null
+++ b/crates/iceberg/src/arrow/reader/projection.rs
@@ -0,0 +1,1920 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Column projection for `ArrowReader`: building the Parquet projection mask
+//! from Iceberg field IDs, and mapping field IDs between Iceberg and Parquet
+//! (including fallback handling for files without embedded IDs).
+
+use std::collections::{HashMap, HashSet};
+use std::str::FromStr;
+use std::sync::Arc;
+
+use arrow_schema::{Field, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef};
+use parquet::arrow::{PARQUET_FIELD_ID_META_KEY, ProjectionMask};
+use parquet::schema::types::{SchemaDescriptor, Type as ParquetType};
+
+use super::{ArrowReader, CollectFieldIdVisitor};
+use crate::arrow::arrow_schema_to_schema;
+use crate::error::Result;
+use crate::expr::BoundPredicate;
+use crate::expr::visitors::bound_predicate_visitor::visit;
+use crate::spec::{NameMapping, NestedField, PrimitiveType, Schema, Type};
+use crate::{Error, ErrorKind};
+
+impl ArrowReader {
+    pub(super) fn build_field_id_set_and_map(
+        parquet_schema: &SchemaDescriptor,
+        predicate: &BoundPredicate,
+    ) -> Result<(HashSet<i32>, HashMap<i32, usize>)> {
+        // Collects all Iceberg field IDs referenced in the filter predicate
+        let mut collector = CollectFieldIdVisitor {
+            field_ids: HashSet::default(),
+        };
+        visit(&mut collector, predicate)?;
+
+        let iceberg_field_ids = collector.field_ids();
+
+        // Without embedded field IDs, we fall back to position-based mapping for compatibility
+        let field_id_map = match build_field_id_map(parquet_schema)? {
+            Some(map) => map,
+            None => build_fallback_field_id_map(parquet_schema),
+        };
+
+        Ok((iceberg_field_ids, field_id_map))
+    }
+
+    /// Recursively extract leaf field IDs because Parquet projection works at the leaf column level.
+    /// Nested types (struct/list/map) are flattened in Parquet's columnar format.
+    fn include_leaf_field_id(field: &NestedField, field_ids: &mut Vec<i32>) {
+        match field.field_type.as_ref() {
+            Type::Primitive(_) => {
+                field_ids.push(field.id);
+            }
+            Type::Struct(struct_type) => {
+                for nested_field in struct_type.fields() {
+                    Self::include_leaf_field_id(nested_field, field_ids);
+                }
+            }
+            Type::List(list_type) => {
+                Self::include_leaf_field_id(&list_type.element_field, field_ids);
+            }
+            Type::Map(map_type) => {
+                Self::include_leaf_field_id(&map_type.key_field, field_ids);
+                Self::include_leaf_field_id(&map_type.value_field, field_ids);
+            }
+        }
+    }
+
+    pub(super) fn get_arrow_projection_mask(
+        field_ids: &[i32],
+        iceberg_schema_of_task: &Schema,
+        parquet_schema: &SchemaDescriptor,
+        arrow_schema: &ArrowSchemaRef,
+        use_fallback: bool, // Whether file lacks embedded field IDs (e.g., migrated from Hive/Spark)
+    ) -> Result<ProjectionMask> {
+        fn type_promotion_is_valid(
+            file_type: Option<&PrimitiveType>,
+            projected_type: Option<&PrimitiveType>,
+        ) -> bool {
+            match (file_type, projected_type) {
+                (Some(lhs), Some(rhs)) if lhs == rhs => true,
+                (Some(PrimitiveType::Int), Some(PrimitiveType::Long)) => true,
+                (Some(PrimitiveType::Float), Some(PrimitiveType::Double)) => true,
+                (
+                    Some(PrimitiveType::Decimal {
+                        precision: file_precision,
+                        scale: file_scale,
+                    }),
+                    Some(PrimitiveType::Decimal {
+                        precision: requested_precision,
+                        scale: requested_scale,
+                    }),
+                ) if requested_precision >= file_precision && file_scale == requested_scale => true,
+                // Uuid will be store as Fixed(16) in parquet file, so the read back type will be Fixed(16).
+                (Some(PrimitiveType::Fixed(16)), Some(PrimitiveType::Uuid)) => true,
+                // Some Parquet writers (e.g. Snowflake) store FIXED_LEN_BYTE_ARRAY as
+                // Arrow Binary rather than FixedSizeBinary. Allow Binary -> Fixed(N)
+                // since the underlying bytes are the same.
+                (Some(PrimitiveType::Binary), Some(PrimitiveType::Fixed(_))) => true,
+                _ => false,
+            }
+        }
+
+        if field_ids.is_empty() {
+            return Ok(ProjectionMask::all());
+        }
+
+        if use_fallback {
+            // Position-based projection necessary because file lacks embedded field IDs
+            Self::get_arrow_projection_mask_fallback(field_ids, parquet_schema)
+        } else {
+            // Field-ID-based projection using embedded field IDs from Parquet metadata
+
+            // Parquet's columnar format requires leaf-level (not top-level struct/list/map) projection
+            let mut leaf_field_ids = vec![];
+            for field_id in field_ids {
+                let field = iceberg_schema_of_task.field_by_id(*field_id);
+                if let Some(field) = field {
+                    Self::include_leaf_field_id(field, &mut leaf_field_ids);
+                }
+            }
+
+            Self::get_arrow_projection_mask_with_field_ids(
+                &leaf_field_ids,
+                iceberg_schema_of_task,
+                parquet_schema,
+                arrow_schema,
+                type_promotion_is_valid,
+            )
+        }
+    }
+
+    /// Standard projection using embedded field IDs from Parquet metadata.
+    /// For iceberg-java compatibility with ParquetSchemaUtil.pruneColumns().
+    fn get_arrow_projection_mask_with_field_ids(
+        leaf_field_ids: &[i32],
+        iceberg_schema_of_task: &Schema,
+        parquet_schema: &SchemaDescriptor,
+        arrow_schema: &ArrowSchemaRef,
+        type_promotion_is_valid: fn(Option<&PrimitiveType>, Option<&PrimitiveType>) -> bool,
+    ) -> Result<ProjectionMask> {
+        let mut column_map = HashMap::new();
+        let fields = arrow_schema.fields();
+
+        // Pre-project only the fields that have been selected, possibly avoiding converting
+        // some Arrow types that are not yet supported.
+        let mut projected_fields: HashMap<arrow_schema::FieldRef, i32> = HashMap::new();
+        let projected_arrow_schema = ArrowSchema::new_with_metadata(
+            fields.filter_leaves(|_, f| {
+                f.metadata()
+                    .get(PARQUET_FIELD_ID_META_KEY)
+                    .and_then(|field_id| i32::from_str(field_id).ok())
+                    .is_some_and(|field_id| {
+                        projected_fields.insert((*f).clone(), field_id);
+                        leaf_field_ids.contains(&field_id)
+                    })
+            }),
+            arrow_schema.metadata().clone(),
+        );
+        let iceberg_schema = arrow_schema_to_schema(&projected_arrow_schema)?;
+
+        fields.filter_leaves(|idx, field| {
+            let Some(field_id) = projected_fields.get(field).cloned() else {
+                return false;
+            };
+
+            let iceberg_field = iceberg_schema_of_task.field_by_id(field_id);
+            let parquet_iceberg_field = iceberg_schema.field_by_id(field_id);
+
+            if iceberg_field.is_none() || parquet_iceberg_field.is_none() {
+                return false;
+            }
+
+            if !type_promotion_is_valid(
+                parquet_iceberg_field
+                    .unwrap()
+                    .field_type
+                    .as_primitive_type(),
+                iceberg_field.unwrap().field_type.as_primitive_type(),
+            ) {
+                return false;
+            }
+
+            column_map.insert(field_id, idx);
+            true
+        });
+
+        // Schema evolution: New columns may not exist in old Parquet files.
+        // We only project existing columns; RecordBatchTransformer adds default/NULL values.
+        let mut indices = vec![];
+        for field_id in leaf_field_ids {
+            if let Some(col_idx) = column_map.get(field_id) {
+                indices.push(*col_idx);
+            }
+        }
+
+        if indices.is_empty() {
+            // Edge case: All requested columns are new (don't exist in file).
+            // Project all columns so RecordBatchTransformer has a batch to transform.
+            Ok(ProjectionMask::all())
+        } else {
+            Ok(ProjectionMask::leaves(parquet_schema, indices))
+        }
+    }
+
+    /// Fallback projection for Parquet files without field IDs.
+    /// Uses position-based matching: field ID N → column position N-1.
+    /// Projects entire top-level columns (including nested content) for iceberg-java compatibility.
+    fn get_arrow_projection_mask_fallback(
+        field_ids: &[i32],
+        parquet_schema: &SchemaDescriptor,
+    ) -> Result<ProjectionMask> {
+        // Position-based: field_id N → column N-1 (field IDs are 1-indexed)
+        let parquet_root_fields = parquet_schema.root_schema().get_fields();
+        let mut root_indices = vec![];
+
+        for field_id in field_ids.iter() {
+            let parquet_pos = (*field_id - 1) as usize;
+
+            if parquet_pos < parquet_root_fields.len() {
+                root_indices.push(parquet_pos);
+            }
+            // RecordBatchTransformer adds missing columns with NULL values
+        }
+
+        if root_indices.is_empty() {
+            Ok(ProjectionMask::all())
+        } else {
+            Ok(ProjectionMask::roots(parquet_schema, root_indices))
+        }
+    }
+}
+
+/// Build the map of parquet field id to Parquet column index in the schema.
+/// Returns None if the Parquet file doesn't have field IDs embedded (e.g., migrated tables).
+pub(super) fn build_field_id_map(
+    parquet_schema: &SchemaDescriptor,
+) -> Result<Option<HashMap<i32, usize>>> {
+    let mut column_map = HashMap::new();
+
+    for (idx, field) in parquet_schema.columns().iter().enumerate() {
+        let field_type = field.self_type();
+        match field_type {
+            ParquetType::PrimitiveType { basic_info, .. } => {
+                if !basic_info.has_id() {
+                    return Ok(None);
+                }
+                column_map.insert(basic_info.id(), idx);
+            }
+            ParquetType::GroupType { .. } => {
+                return Err(Error::new(
+                    ErrorKind::DataInvalid,
+                    format!(
+                        "Leaf column in schema should be primitive type but got {field_type:?}"
+                    ),
+                ));
+            }
+        };
+    }
+
+    Ok(Some(column_map))
+}
+
+/// Build a fallback field ID map for Parquet files without embedded field IDs.
+///
+/// Returns the number of primitive (leaf) columns in a Parquet type, recursing into groups.
+fn leaf_count(ty: &parquet::schema::types::Type) -> usize {
+    if ty.is_primitive() {
+        1
+    } else {
+        ty.get_fields().iter().map(|f| leaf_count(f)).sum()
+    }
+}
+
+/// Builds a mapping from fallback field IDs to leaf column indices for Parquet files
+/// without embedded field IDs. Returns entries only for primitive top-level fields.
+///
+/// Must use top-level field positions (not leaf column positions) to stay consistent
+/// with `add_fallback_field_ids_to_arrow_schema`, which assigns ordinal IDs to
+/// top-level Arrow fields. Using leaf positions instead would produce wrong indices
+/// when nested types (struct/list/map) expand into multiple leaf columns.
+///
+/// Mirrors iceberg-java's ParquetSchemaUtil.addFallbackIds() which iterates
+/// fileSchema.getFields() assigning ordinal IDs to top-level fields.
+pub(super) fn build_fallback_field_id_map(
+    parquet_schema: &SchemaDescriptor,
+) -> HashMap<i32, usize> {
+    let mut column_map = HashMap::new();
+    let mut leaf_idx = 0;
+
+    for (top_pos, field) in parquet_schema.root_schema().get_fields().iter().enumerate() {
+        let field_id = (top_pos + 1) as i32;
+        if field.is_primitive() {
+            column_map.insert(field_id, leaf_idx);
+        }
+        leaf_idx += leaf_count(field);
+    }
+
+    column_map
+}
+
+/// Apply name mapping to Arrow schema for Parquet files lacking field IDs.
+///
+/// Assigns Iceberg field IDs based on column names using the name mapping,
+/// enabling correct projection on migrated files (e.g., from Hive/Spark via add_files).
+///
+/// Per Iceberg spec Column Projection rule #2:
+/// "Use schema.name-mapping.default metadata to map field id to columns without field id"
+/// https://iceberg.apache.org/spec/#column-projection
+///
+/// Corresponds to Java's ParquetSchemaUtil.applyNameMapping() and ApplyNameMapping visitor.
+/// The key difference is Java operates on Parquet MessageType, while we operate on Arrow Schema.
+///
+/// # Arguments
+/// * `arrow_schema` - Arrow schema from Parquet file (without field IDs)
+/// * `name_mapping` - Name mapping from table metadata (TableProperties.DEFAULT_NAME_MAPPING)
+///
+/// # Returns
+/// Arrow schema with field IDs assigned based on name mapping
+pub(super) fn apply_name_mapping_to_arrow_schema(
+    arrow_schema: ArrowSchemaRef,
+    name_mapping: &NameMapping,
+) -> Result<Arc<ArrowSchema>> {
+    debug_assert!(
+        arrow_schema
+            .fields()
+            .iter()
+            .next()
+            .is_none_or(|f| f.metadata().get(PARQUET_FIELD_ID_META_KEY).is_none()),
+        "Schema already has field IDs - name mapping should not be applied"
+    );
+
+    let fields_with_mapped_ids: Vec<_> = arrow_schema
+        .fields()
+        .iter()
+        .map(|field| {
+            // Look up this column name in name mapping to get the Iceberg field ID.
+            // Corresponds to Java's ApplyNameMapping visitor which calls
+            // nameMapping.find(currentPath()) and returns field.withId() if found.
+            //
+            // If the field isn't in the mapping, leave it WITHOUT assigning an ID
+            // (matching Java's behavior of returning the field unchanged).
+            // Later, during projection, fields without IDs are filtered out.
+            let mapped_field_opt = name_mapping
+                .fields()
+                .iter()
+                .find(|f| f.names().contains(&field.name().to_string()));
+
+            let mut metadata = field.metadata().clone();
+
+            if let Some(mapped_field) = mapped_field_opt
+                && let Some(field_id) = mapped_field.field_id()
+            {
+                // Field found in mapping with a field_id → assign it
+                metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field_id.to_string());
+            }
+            // If field_id is None, leave the field without an ID (will be filtered by projection)
+
+            Field::new(field.name(), field.data_type().clone(), field.is_nullable())
+                .with_metadata(metadata)
+        })
+        .collect();
+
+    Ok(Arc::new(ArrowSchema::new_with_metadata(
+        fields_with_mapped_ids,
+        arrow_schema.metadata().clone(),
+    )))
+}
+
+/// Add position-based fallback field IDs to Arrow schema for Parquet files lacking them.
+/// Enables projection on migrated files (e.g., from Hive/Spark).
+///
+/// Why at schema level (not per-batch): Efficiency - avoids repeated schema modification.
+/// Why only top-level: Nested projection uses leaf column indices, not parent struct IDs.
+/// Why 1-indexed: Compatibility with iceberg-java's ParquetSchemaUtil.addFallbackIds().
+pub(super) fn add_fallback_field_ids_to_arrow_schema(
+    arrow_schema: &ArrowSchemaRef,
+) -> Arc<ArrowSchema> {
+    debug_assert!(
+        arrow_schema
+            .fields()
+            .iter()
+            .next()
+            .is_none_or(|f| f.metadata().get(PARQUET_FIELD_ID_META_KEY).is_none()),
+        "Schema already has field IDs"
+    );
+
+    let fields_with_fallback_ids: Vec<_> = arrow_schema
+        .fields()
+        .iter()
+        .enumerate()
+        .map(|(pos, field)| {
+            let mut metadata = field.metadata().clone();
+            let field_id = (pos + 1) as i32; // 1-indexed for Java compatibility
+            metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field_id.to_string());
+
+            Field::new(field.name(), field.data_type().clone(), field.is_nullable())
+                .with_metadata(metadata)
+        })
+        .collect();
+
+    Arc::new(ArrowSchema::new_with_metadata(
+        fields_with_fallback_ids,
+        arrow_schema.metadata().clone(),
+    ))
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::fs::File;
+    use std::sync::Arc;
+
+    use arrow_array::cast::AsArray;
+    use arrow_array::{
+        Array, ArrayRef, BinaryArray, FixedSizeBinaryArray, Int32Array, RecordBatch, StringArray,
+    };
+    use arrow_schema::{DataType, Field, Schema as ArrowSchema, TimeUnit};
+    use futures::TryStreamExt;
+    use parquet::arrow::{ArrowWriter, PARQUET_FIELD_ID_META_KEY, ProjectionMask};
+    use parquet::basic::Compression;
+    use parquet::file::properties::WriterProperties;
+    use parquet::schema::parser::parse_message_type;
+    use parquet::schema::types::SchemaDescriptor;
+    use tempfile::TempDir;
+
+    use crate::ErrorKind;
+    use crate::arrow::{ArrowReader, ArrowReaderBuilder};
+    use crate::expr::{Bind, Reference};
+    use crate::io::FileIO;
+    use crate::scan::{FileScanTask, FileScanTaskStream};
+    use crate::spec::{DataFileFormat, Datum, NestedField, PrimitiveType, Schema, Type};
+
+    #[test]
+    fn test_arrow_projection_mask() {
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_identifier_field_ids(vec![1])
+                .with_fields(vec![
+                    NestedField::required(1, "c1", Type::Primitive(PrimitiveType::String)).into(),
+                    NestedField::optional(2, "c2", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::optional(
+                        3,
+                        "c3",
+                        Type::Primitive(PrimitiveType::Decimal {
+                            precision: 38,
+                            scale: 3,
+                        }),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("c1", DataType::Utf8, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+            // Type not supported
+            Field::new("c2", DataType::Duration(TimeUnit::Microsecond), true).with_metadata(
+                HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "2".to_string())]),
+            ),
+            // Precision is beyond the supported range
+            Field::new("c3", DataType::Decimal128(39, 3), true).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "3".to_string(),
+            )])),
+        ]));
+
+        let message_type = "
+message schema {
+  required binary c1 (STRING) = 1;
+  optional int32 c2 (INTEGER(8,true)) = 2;
+  optional fixed_len_byte_array(17) c3 (DECIMAL(39,3)) = 3;
+}
+    ";
+        let parquet_type = parse_message_type(message_type).expect("should parse schema");
+        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_type));
+
+        // Try projecting the fields c2 and c3 with the unsupported data types
+        let err = ArrowReader::get_arrow_projection_mask(
+            &[1, 2, 3],
+            &schema,
+            &parquet_schema,
+            &arrow_schema,
+            false,
+        )
+        .unwrap_err();
+
+        assert_eq!(err.kind(), ErrorKind::DataInvalid);
+        assert_eq!(
+            err.to_string(),
+            "DataInvalid => Unsupported Arrow data type: Duration(µs)".to_string()
+        );
+
+        // Omitting field c2, we still get an error due to c3 being selected
+        let err = ArrowReader::get_arrow_projection_mask(
+            &[1, 3],
+            &schema,
+            &parquet_schema,
+            &arrow_schema,
+            false,
+        )
+        .unwrap_err();
+
+        assert_eq!(err.kind(), ErrorKind::DataInvalid);
+        assert_eq!(
+            err.to_string(),
+            "DataInvalid => Failed to create decimal type, source: DataInvalid => Decimals with precision larger than 38 are not supported: 39".to_string()
+        );
+
+        // Finally avoid selecting fields with unsupported data types
+        let mask = ArrowReader::get_arrow_projection_mask(
+            &[1],
+            &schema,
+            &parquet_schema,
+            &arrow_schema,
+            false,
+        )
+        .expect("Some ProjectionMask");
+        assert_eq!(mask, ProjectionMask::leaves(&parquet_schema, vec![0]));
+    }
+
+    /// Test schema evolution: reading old Parquet file (with only column 'a')
+    /// using a newer table schema (with columns 'a' and 'b').
+    /// This tests that:
+    /// 1. get_arrow_projection_mask allows missing columns
+    /// 2. RecordBatchTransformer adds missing column 'b' with NULL values
+    #[tokio::test]
+    async fn test_schema_evolution_add_column() {
+        use arrow_array::{Array, Int32Array};
+
+        // New table schema: columns 'a' and 'b' (b was added later, file only has 'a')
+        let new_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(2)
+                .with_fields(vec![
+                    NestedField::required(1, "a", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::optional(2, "b", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        // Create Arrow schema for old Parquet file (only has column 'a')
+        let arrow_schema_old = Arc::new(ArrowSchema::new(vec![
+            Field::new("a", DataType::Int32, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+        ]));
+
+        // Write old Parquet file with only column 'a'
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        let data_a = Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef;
+        let to_write = RecordBatch::try_new(arrow_schema_old.clone(), vec![data_a]).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+        let file = File::create(format!("{table_location}/old_file.parquet")).unwrap();
+        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
+        writer.write(&to_write).expect("Writing batch");
+        writer.close().unwrap();
+
+        // Read the old Parquet file using the NEW schema (with column 'b')
+        let reader = ArrowReaderBuilder::new(file_io).build();
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/old_file.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/old_file.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: new_schema.clone(),
+                project_field_ids: vec![1, 2], // Request both columns 'a' and 'b'
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .stream()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        // Verify we got the correct data
+        assert_eq!(result.len(), 1);
+        let batch = &result[0];
+
+        // Should have 2 columns now
+        assert_eq!(batch.num_columns(), 2);
+        assert_eq!(batch.num_rows(), 3);
+
+        // Column 'a' should have the original data
+        let col_a = batch
+            .column(0)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(col_a.values(), &[1, 2, 3]);
+
+        // Column 'b' should be all NULLs (it didn't exist in the old file)
+        let col_b = batch
+            .column(1)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(col_b.null_count(), 3);
+        assert!(col_b.is_null(0));
+        assert!(col_b.is_null(1));
+        assert!(col_b.is_null(2));
+    }
+
+    /// Test reading Parquet files without field ID metadata (e.g., migrated tables).
+    /// This exercises the position-based fallback path.
+    ///
+    /// Corresponds to Java's ParquetSchemaUtil.addFallbackIds() + pruneColumnsFallback()
+    /// in /parquet/src/main/java/org/apache/iceberg/parquet/ParquetSchemaUtil.java
+    #[tokio::test]
+    async fn test_read_parquet_file_without_field_ids() {
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "name", Type::Primitive(PrimitiveType::String)).into(),
+                    NestedField::required(2, "age", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        // Parquet file from a migrated table - no field ID metadata
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("name", DataType::Utf8, false),
+            Field::new("age", DataType::Int32, false),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        let name_data = vec!["Alice", "Bob", "Charlie"];
+        let age_data = vec![30, 25, 35];
+
+        use arrow_array::Int32Array;
+        let name_col = Arc::new(StringArray::from(name_data.clone())) as ArrayRef;
+        let age_col = Arc::new(Int32Array::from(age_data.clone())) as ArrayRef;
+
+        let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![name_col, age_col]).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
+        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
+
+        writer.write(&to_write).expect("Writing batch");
+        writer.close().unwrap();
+
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .stream()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 1);
+        let batch = &result[0];
+        assert_eq!(batch.num_rows(), 3);
+        assert_eq!(batch.num_columns(), 2);
+
+        // Verify position-based mapping: field_id 1 → position 0, field_id 2 → position 1
+        let name_array = batch.column(0).as_string::<i32>();
+        assert_eq!(name_array.value(0), "Alice");
+        assert_eq!(name_array.value(1), "Bob");
+        assert_eq!(name_array.value(2), "Charlie");
+
+        let age_array = batch
+            .column(1)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(age_array.value(0), 30);
+        assert_eq!(age_array.value(1), 25);
+        assert_eq!(age_array.value(2), 35);
+    }
+
+    /// Test reading Parquet files without field IDs with partial projection.
+    /// Only a subset of columns are requested, verifying position-based fallback
+    /// handles column selection correctly.
+    #[tokio::test]
+    async fn test_read_parquet_without_field_ids_partial_projection() {
+        use arrow_array::Int32Array;
+
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "col1", Type::Primitive(PrimitiveType::String)).into(),
+                    NestedField::required(2, "col2", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::required(3, "col3", Type::Primitive(PrimitiveType::String)).into(),
+                    NestedField::required(4, "col4", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("col1", DataType::Utf8, false),
+            Field::new("col2", DataType::Int32, false),
+            Field::new("col3", DataType::Utf8, false),
+            Field::new("col4", DataType::Int32, false),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        let col1_data = Arc::new(StringArray::from(vec!["a", "b"])) as ArrayRef;
+        let col2_data = Arc::new(Int32Array::from(vec![10, 20])) as ArrayRef;
+        let col3_data = Arc::new(StringArray::from(vec!["c", "d"])) as ArrayRef;
+        let col4_data = Arc::new(Int32Array::from(vec![30, 40])) as ArrayRef;
+
+        let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![
+            col1_data, col2_data, col3_data, col4_data,
+        ])
+        .unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
+        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
+
+        writer.write(&to_write).expect("Writing batch");
+        writer.close().unwrap();
+
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 3],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .stream()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 1);
+        let batch = &result[0];
+        assert_eq!(batch.num_rows(), 2);
+        assert_eq!(batch.num_columns(), 2);
+
+        let col1_array = batch.column(0).as_string::<i32>();
+        assert_eq!(col1_array.value(0), "a");
+        assert_eq!(col1_array.value(1), "b");
+
+        let col3_array = batch.column(1).as_string::<i32>();
+        assert_eq!(col3_array.value(0), "c");
+        assert_eq!(col3_array.value(1), "d");
+    }
+
+    /// Test reading Parquet files without field IDs with schema evolution.
+    /// The Iceberg schema has more fields than the Parquet file, testing that
+    /// missing columns are filled with NULLs.
+    #[tokio::test]
+    async fn test_read_parquet_without_field_ids_schema_evolution() {
+        use arrow_array::{Array, Int32Array};
+
+        // Schema with field 3 added after the file was written
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "name", Type::Primitive(PrimitiveType::String)).into(),
+                    NestedField::required(2, "age", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::optional(3, "city", Type::Primitive(PrimitiveType::String)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("name", DataType::Utf8, false),
+            Field::new("age", DataType::Int32, false),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        let name_data = Arc::new(StringArray::from(vec!["Alice", "Bob"])) as ArrayRef;
+        let age_data = Arc::new(Int32Array::from(vec![30, 25])) as ArrayRef;
+
+        let to_write =
+            RecordBatch::try_new(arrow_schema.clone(), vec![name_data, age_data]).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
+        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
+
+        writer.write(&to_write).expect("Writing batch");
+        writer.close().unwrap();
+
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2, 3],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .stream()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 1);
+        let batch = &result[0];
+        assert_eq!(batch.num_rows(), 2);
+        assert_eq!(batch.num_columns(), 3);
+
+        let name_array = batch.column(0).as_string::<i32>();
+        assert_eq!(name_array.value(0), "Alice");
+        assert_eq!(name_array.value(1), "Bob");
+
+        let age_array = batch
+            .column(1)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(age_array.value(0), 30);
+        assert_eq!(age_array.value(1), 25);
+
+        // Verify missing column filled with NULLs
+        let city_array = batch.column(2).as_string::<i32>();
+        assert_eq!(city_array.null_count(), 2);
+        assert!(city_array.is_null(0));
+        assert!(city_array.is_null(1));
+    }
+
+    /// Test reading Parquet files without field IDs that have multiple row groups.
+    /// This ensures the position-based fallback works correctly across row group boundaries.
+    #[tokio::test]
+    async fn test_read_parquet_without_field_ids_multiple_row_groups() {
+        use arrow_array::Int32Array;
+
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "name", Type::Primitive(PrimitiveType::String)).into(),
+                    NestedField::required(2, "value", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("name", DataType::Utf8, false),
+            Field::new("value", DataType::Int32, false),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        // Small row group size to create multiple row groups
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .set_write_batch_size(2)
+            .set_max_row_group_row_count(Some(2))
+            .build();
+
+        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
+        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
+
+        // Write 6 rows in 3 batches (will create 3 row groups)
+        for batch_num in 0..3 {
+            let name_data = Arc::new(StringArray::from(vec![
+                format!("name_{}", batch_num * 2),
+                format!("name_{}", batch_num * 2 + 1),
+            ])) as ArrayRef;
+            let value_data =
+                Arc::new(Int32Array::from(vec![batch_num * 2, batch_num * 2 + 1])) as ArrayRef;
+
+            let batch =
+                RecordBatch::try_new(arrow_schema.clone(), vec![name_data, value_data]).unwrap();
+            writer.write(&batch).expect("Writing batch");
+        }
+        writer.close().unwrap();
+
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .stream()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        assert!(!result.is_empty());
+
+        let mut all_names = Vec::new();
+        let mut all_values = Vec::new();
+
+        for batch in &result {
+            let name_array = batch.column(0).as_string::<i32>();
+            let value_array = batch
+                .column(1)
+                .as_primitive::<arrow_array::types::Int32Type>();
+
+            for i in 0..batch.num_rows() {
+                all_names.push(name_array.value(i).to_string());
+                all_values.push(value_array.value(i));
+            }
+        }
+
+        assert_eq!(all_names.len(), 6);
+        assert_eq!(all_values.len(), 6);
+
+        for i in 0..6 {
+            assert_eq!(all_names[i], format!("name_{i}"));
+            assert_eq!(all_values[i], i as i32);
+        }
+    }
+
+    /// Test reading Parquet files without field IDs with nested types (struct).
+    /// Java's pruneColumnsFallback() projects entire top-level columns including nested content.
+    /// This test verifies that a top-level struct field is projected correctly with all its nested fields.
+    #[tokio::test]
+    async fn test_read_parquet_without_field_ids_with_struct() {
+        use arrow_array::{Int32Array, StructArray};
+        use arrow_schema::Fields;
+
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::required(
+                        2,
+                        "person",
+                        Type::Struct(crate::spec::StructType::new(vec![
+                            NestedField::required(
+                                3,
+                                "name",
+                                Type::Primitive(PrimitiveType::String),
+                            )
+                            .into(),
+                            NestedField::required(4, "age", Type::Primitive(PrimitiveType::Int))
+                                .into(),
+                        ])),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new(
+                "person",
+                DataType::Struct(Fields::from(vec![
+                    Field::new("name", DataType::Utf8, false),
+                    Field::new("age", DataType::Int32, false),
+                ])),
+                false,
+            ),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        let id_data = Arc::new(Int32Array::from(vec![1, 2])) as ArrayRef;
+        let name_data = Arc::new(StringArray::from(vec!["Alice", "Bob"])) as ArrayRef;
+        let age_data = Arc::new(Int32Array::from(vec![30, 25])) as ArrayRef;
+        let person_data = Arc::new(StructArray::from(vec![
+            (
+                Arc::new(Field::new("name", DataType::Utf8, false)),
+                name_data,
+            ),
+            (
+                Arc::new(Field::new("age", DataType::Int32, false)),
+                age_data,
+            ),
+        ])) as ArrayRef;
+
+        let to_write =
+            RecordBatch::try_new(arrow_schema.clone(), vec![id_data, person_data]).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
+        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
+
+        writer.write(&to_write).expect("Writing batch");
+        writer.close().unwrap();
+
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .stream()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 1);
+        let batch = &result[0];
+        assert_eq!(batch.num_rows(), 2);
+        assert_eq!(batch.num_columns(), 2);
+
+        let id_array = batch
+            .column(0)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(id_array.value(0), 1);
+        assert_eq!(id_array.value(1), 2);
+
+        let person_array = batch.column(1).as_struct();
+        assert_eq!(person_array.num_columns(), 2);
+
+        let name_array = person_array.column(0).as_string::<i32>();
+        assert_eq!(name_array.value(0), "Alice");
+        assert_eq!(name_array.value(1), "Bob");
+
+        let age_array = person_array
+            .column(1)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(age_array.value(0), 30);
+        assert_eq!(age_array.value(1), 25);
+    }
+
+    /// Test reading Parquet files without field IDs with schema evolution - column added in the middle.
+    /// When a new column is inserted between existing columns in the schema order,
+    /// the fallback projection must correctly map field IDs to output positions.
+    #[tokio::test]
+    async fn test_read_parquet_without_field_ids_schema_evolution_add_column_in_middle() {
+        use arrow_array::{Array, Int32Array};
+
+        let arrow_schema_old = Arc::new(ArrowSchema::new(vec![
+            Field::new("col0", DataType::Int32, true),
+            Field::new("col1", DataType::Int32, true),
+        ]));
+
+        // New column added between existing columns: col0 (id=1), newCol (id=5), col1 (id=2)
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(1, "col0", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::optional(5, "newCol", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::optional(2, "col1", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        let col0_data = Arc::new(Int32Array::from(vec![1, 2])) as ArrayRef;
+        let col1_data = Arc::new(Int32Array::from(vec![10, 20])) as ArrayRef;
+
+        let to_write =
+            RecordBatch::try_new(arrow_schema_old.clone(), vec![col0_data, col1_data]).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
+        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
+        writer.write(&to_write).expect("Writing batch");
+        writer.close().unwrap();
+
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 5, 2],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .stream()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 1);
+        let batch = &result[0];
+        assert_eq!(batch.num_rows(), 2);
+        assert_eq!(batch.num_columns(), 3);
+
+        let result_col0 = batch
+            .column(0)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(result_col0.value(0), 1);
+        assert_eq!(result_col0.value(1), 2);
+
+        // New column should be NULL (doesn't exist in old file)
+        let result_newcol = batch
+            .column(1)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(result_newcol.null_count(), 2);
+        assert!(result_newcol.is_null(0));
+        assert!(result_newcol.is_null(1));
+
+        let result_col1 = batch
+            .column(2)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(result_col1.value(0), 10);
+        assert_eq!(result_col1.value(1), 20);
+    }
+
+    /// Test reading Parquet files without field IDs with a filter that eliminates all row groups.
+    /// During development of field ID mapping, we saw a panic when row_selection_enabled=true and
+    /// all row groups are filtered out.
+    #[tokio::test]
+    async fn test_read_parquet_without_field_ids_filter_eliminates_all_rows() {
+        use arrow_array::{Float64Array, Int32Array};
+
+        // Schema with fields that will use fallback IDs 1, 2, 3
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::required(2, "name", Type::Primitive(PrimitiveType::String)).into(),
+                    NestedField::required(3, "value", Type::Primitive(PrimitiveType::Double))
+                        .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, false),
+            Field::new("value", DataType::Float64, false),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        // Write data where all ids are >= 10
+        let id_data = Arc::new(Int32Array::from(vec![10, 11, 12])) as ArrayRef;
+        let name_data = Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef;
+        let value_data = Arc::new(Float64Array::from(vec![100.0, 200.0, 300.0])) as ArrayRef;
+
+        let to_write =
+            RecordBatch::try_new(arrow_schema.clone(), vec![id_data, name_data, value_data])
+                .unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
+        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
+        writer.write(&to_write).expect("Writing batch");
+        writer.close().unwrap();
+
+        // Filter that eliminates all row groups: id < 5
+        let predicate = Reference::new("id").less_than(Datum::int(5));
+
+        // Enable both row_group_filtering and row_selection - triggered the panic
+        let reader = ArrowReaderBuilder::new(file_io)
+            .with_row_group_filtering_enabled(true)
+            .with_row_selection_enabled(true)
+            .build();
+
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2, 3],
+                predicate: Some(predicate.bind(schema, true).unwrap()),
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        // Should no longer panic
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .stream()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        // Should return empty results
+        assert!(result.is_empty() || result.iter().all(|batch| batch.num_rows() == 0));
+    }
+
+    /// Test bucket partitioning reads source column from data file (not partition metadata).
+    ///
+    /// This is an integration test verifying the complete ArrowReader pipeline with bucket partitioning.
+    /// It corresponds to TestRuntimeFiltering tests in Iceberg Java (e.g., testRenamedSourceColumnTable).
+    ///
+    /// # Iceberg Spec Requirements
+    ///
+    /// Per the Iceberg spec "Column Projection" section:
+    /// > "Return the value from partition metadata if an **Identity Transform** exists for the field"
+    ///
+    /// This means:
+    /// - Identity transforms (e.g., `identity(dept)`) use constants from partition metadata
+    /// - Non-identity transforms (e.g., `bucket(4, id)`) must read source columns from data files
+    /// - Partition metadata for bucket transforms stores bucket numbers (0-3), NOT source values
+    ///
+    /// Java's PartitionUtil.constantsMap() implements this via:
+    /// ```java
+    /// if (field.transform().isIdentity()) {
+    ///     idToConstant.put(field.sourceId(), converted);
+    /// }
+    /// ```
+    ///
+    /// # What This Test Verifies
+    ///
+    /// This test ensures the full ArrowReader → RecordBatchTransformer pipeline correctly handles
+    /// bucket partitioning when FileScanTask provides partition_spec and partition_data:
+    ///
+    /// - Parquet file has field_id=1 named "id" with actual data [1, 5, 9, 13]
+    /// - FileScanTask specifies partition_spec with bucket(4, id) and partition_data with bucket=1
+    /// - RecordBatchTransformer.constants_map() excludes bucket-partitioned field from constants
+    /// - ArrowReader correctly reads [1, 5, 9, 13] from the data file
+    /// - Values are NOT replaced with constant 1 from partition metadata
+    ///
+    /// # Why This Matters
+    ///
+    /// Without correct handling:
+    /// - Runtime filtering would break (e.g., `WHERE id = 5` would fail)
+    /// - Query results would be incorrect (all rows would have id=1)
+    /// - Bucket partitioning would be unusable for query optimization
+    ///
+    /// # References
+    /// - Iceberg spec: format/spec.md "Column Projection" + "Partition Transforms"
+    /// - Java test: spark/src/test/java/.../TestRuntimeFiltering.java
+    /// - Java impl: core/src/main/java/org/apache/iceberg/util/PartitionUtil.java
+    #[tokio::test]
+    async fn test_bucket_partitioning_reads_source_column_from_file() {
+        use arrow_array::Int32Array;
+
+        use crate::spec::{Literal, PartitionSpec, Struct, Transform};
+
+        // Iceberg schema with id and name columns
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(0)
+                .with_fields(vec![
+                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::optional(2, "name", Type::Primitive(PrimitiveType::String)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        // Partition spec: bucket(4, id)
+        let partition_spec = Arc::new(
+            PartitionSpec::builder(schema.clone())
+                .with_spec_id(0)
+                .add_partition_field("id", "id_bucket", Transform::Bucket(4))
+                .unwrap()
+                .build()
+                .unwrap(),
+        );
+
+        // Partition data: bucket value is 1
+        let partition_data = Struct::from_iter(vec![Some(Literal::int(1))]);
+
+        // Create Arrow schema with field IDs for Parquet file
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+            Field::new("name", DataType::Utf8, true).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "2".to_string(),
+            )])),
+        ]));
+
+        // Write Parquet file with data
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        let id_data = Arc::new(Int32Array::from(vec![1, 5, 9, 13])) as ArrayRef;
+        let name_data =
+            Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie", "Dave"])) as ArrayRef;
+
+        let to_write =
+            RecordBatch::try_new(arrow_schema.clone(), vec![id_data, name_data]).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+        let file = File::create(format!("{}/data.parquet", &table_location)).unwrap();
+        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
+        writer.write(&to_write).expect("Writing batch");
+        writer.close().unwrap();
+
+        // Read the Parquet file with partition spec and data
+        let reader = ArrowReaderBuilder::new(file_io).build();
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/data.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/data.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2],
+                predicate: None,
+                deletes: vec![],
+                partition: Some(partition_data),
+                partition_spec: Some(partition_spec),
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .stream()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        // Verify we got the correct data
+        assert_eq!(result.len(), 1);
+        let batch = &result[0];
+
+        assert_eq!(batch.num_columns(), 2);
+        assert_eq!(batch.num_rows(), 4);
+
+        // The id column MUST contain actual values from the Parquet file [1, 5, 9, 13],
+        // NOT the constant partition value 1
+        let id_col = batch
+            .column(0)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(id_col.value(0), 1);
+        assert_eq!(id_col.value(1), 5);
+        assert_eq!(id_col.value(2), 9);
+        assert_eq!(id_col.value(3), 13);
+
+        let name_col = batch.column(1).as_string::<i32>();
+        assert_eq!(name_col.value(0), "Alice");
+        assert_eq!(name_col.value(1), "Bob");
+        assert_eq!(name_col.value(2), "Charlie");
+        assert_eq!(name_col.value(3), "Dave");
+    }
+
+    /// Regression for <https://github.com/apache/iceberg-rust/issues/2306>:
+    /// predicate on a column after nested types in a migrated file (no field IDs).
+    /// Schema has struct, list, and map columns before the predicate target (`id`),
+    /// exercising the fallback field ID mapping across all nested type variants.
+    #[tokio::test]
+    async fn test_predicate_on_migrated_file_with_nested_types() {
+        use serde::{Deserialize, Serialize};
+        use serde_arrow::schema::{SchemaLike, TracingOptions};
+
+        #[derive(Serialize, Deserialize)]
+        struct Person {
+            name: String,
+            age: i32,
+        }
+
+        #[derive(Serialize, Deserialize)]
+        struct Row {
+            person: Person,
+            people: Vec<Person>,
+            props: std::collections::BTreeMap<String, String>,
+            id: i32,
+        }
+
+        let rows = vec![
+            Row {
+                person: Person {
+                    name: "Alice".into(),
+                    age: 30,
+                },
+                people: vec![Person {
+                    name: "Alice".into(),
+                    age: 30,
+                }],
+                props: [("k1".into(), "v1".into())].into(),
+                id: 1,
+            },
+            Row {
+                person: Person {
+                    name: "Bob".into(),
+                    age: 25,
+                },
+                people: vec![Person {
+                    name: "Bob".into(),
+                    age: 25,
+                }],
+                props: [("k2".into(), "v2".into())].into(),
+                id: 2,
+            },
+            Row {
+                person: Person {
+                    name: "Carol".into(),
+                    age: 40,
+                },
+                people: vec![Person {
+                    name: "Carol".into(),
+                    age: 40,
+                }],
+                props: [("k3".into(), "v3".into())].into(),
+                id: 3,
+            },
+        ];
+
+        let tracing_options = TracingOptions::default()
+            .map_as_struct(false)
+            .strings_as_large_utf8(false)
+            .sequence_as_large_list(false);
+        let fields = Vec::<arrow_schema::FieldRef>::from_type::<Row>(tracing_options).unwrap();
+        let arrow_schema = Arc::new(ArrowSchema::new(fields.clone()));
+        let batch = serde_arrow::to_record_batch(&fields, &rows).unwrap();
+
+        // Fallback field IDs: person=1, people=2, props=3, id=4
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(
+                        1,
+                        "person",
+                        Type::Struct(crate::spec::StructType::new(vec![
+                            NestedField::required(
+                                5,
+                                "name",
+                                Type::Primitive(PrimitiveType::String),
+                            )
+                            .into(),
+                            NestedField::required(6, "age", Type::Primitive(PrimitiveType::Int))
+                                .into(),
+                        ])),
+                    )
+                    .into(),
+                    NestedField::required(
+                        2,
+                        "people",
+                        Type::List(crate::spec::ListType {
+                            element_field: NestedField::required(
+                                7,
+                                "element",
+                                Type::Struct(crate::spec::StructType::new(vec![
+                                    NestedField::required(
+                                        8,
+                                        "name",
+                                        Type::Primitive(PrimitiveType::String),
+                                    )
+                                    .into(),
+                                    NestedField::required(
+                                        9,
+                                        "age",
+                                        Type::Primitive(PrimitiveType::Int),
+                                    )
+                                    .into(),
+                                ])),
+                            )
+                            .into(),
+                        }),
+                    )
+                    .into(),
+                    NestedField::required(
+                        3,
+                        "props",
+                        Type::Map(crate::spec::MapType {
+                            key_field: NestedField::required(
+                                10,
+                                "key",
+                                Type::Primitive(PrimitiveType::String),
+                            )
+                            .into(),
+                            value_field: NestedField::required(
+                                11,
+                                "value",
+                                Type::Primitive(PrimitiveType::String),
+                            )
+                            .into(),
+                        }),
+                    )
+                    .into(),
+                    NestedField::required(4, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/1.parquet");
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+        let file = File::create(&file_path).unwrap();
+        let mut writer = ArrowWriter::try_new(file, arrow_schema, Some(props)).unwrap();
+        writer.write(&batch).expect("Writing batch");
+        writer.close().unwrap();
+
+        let predicate = Reference::new("id").greater_than(Datum::int(1));
+
+        let reader = ArrowReaderBuilder::new(FileIO::new_with_fs())
+            .with_row_group_filtering_enabled(true)
+            .with_row_selection_enabled(true)
+            .build();
+
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(&file_path).unwrap().len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: file_path,
+                data_file_format: DataFileFormat::Parquet,
+                schema: iceberg_schema.clone(),
+                project_field_ids: vec![4],
+                predicate: Some(predicate.bind(iceberg_schema, true).unwrap()),
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .stream()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        let ids: Vec<i32> = result
+            .iter()
+            .flat_map(|b| {
+                b.column(0)
+                    .as_primitive::<arrow_array::types::Int32Type>()
+                    .values()
+                    .iter()
+                    .copied()
+            })
+            .collect();
+        assert_eq!(ids, vec![2, 3]);
+    }
+
+    /// Test that a Parquet file written with Arrow Binary type can be read when the
+    /// Iceberg schema declares the column as Fixed(N).
+    ///
+    /// This reproduces a real-world issue where Snowflake writes `FIXED_LEN_BYTE_ARRAY`
+    /// columns that the Arrow Parquet reader decodes as `Binary` rather than
+    /// `FixedSizeBinary(N)`. Without the `(Binary, Fixed(_))` arm in
+    /// `type_promotion_is_valid`, the column is silently excluded from projection and
+    /// filled with nulls.
+    #[tokio::test]
+    async fn test_binary_to_fixed_type_promotion() {
+        // UUID-like 16-byte values
+        let uuid_bytes: Vec<[u8; 16]> = vec![
+            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+            [
+                0xA1, 0xB2, 0xC3, 0xD4, 0xE5, 0xF6, 0x07, 0x18, 0x29, 0x3A, 0x4B, 0x5C, 0x6D, 0x7E,
+                0x8F, 0x90,
+            ],
+            [0xFF; 16],
+        ];
+        let int_data = vec![1i32, 2, 3];
+
+        // Iceberg schema: field 1 = Int, field 2 = Fixed(16)
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::optional(2, "uuid_col", Type::Primitive(PrimitiveType::Fixed(16)))
+                        .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        // Arrow schema: write uuid_col as Binary (not FixedSizeBinary), simulating
+        // what the Arrow Parquet reader produces for some writers (e.g. Snowflake).
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+            Field::new("uuid_col", DataType::Binary, true).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "2".to_string(),
+            )])),
+        ]));
+
+        let id_col = Arc::new(Int32Array::from(int_data.clone())) as ArrayRef;
+        let uuid_col = Arc::new(BinaryArray::from_vec(
+            uuid_bytes.iter().map(|b| b.as_slice()).collect(),
+        )) as ArrayRef;
+
+        let batch = RecordBatch::try_new(arrow_schema.clone(), vec![id_col, uuid_col]).unwrap();
+
+        // Write Parquet file
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let parquet_path = format!("{table_location}/1.parquet");
+        let file = File::create(&parquet_path).unwrap();
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+        let mut writer = ArrowWriter::try_new(file, batch.schema(), Some(props)).unwrap();
+        writer.write(&batch).unwrap();
+        writer.close().unwrap();
+
+        let file_io = FileIO::new_with_fs();
+        let file_size = std::fs::metadata(&parquet_path).unwrap().len();
+        let reader = ArrowReaderBuilder::new(file_io.clone()).build();
+
+        // --- Test 1: Full scan (all columns projected) ---
+        // This is the case that previously failed.
+        {
+            let tasks = Box::pin(futures::stream::iter(vec![Ok(FileScanTask {
+                file_size_in_bytes: file_size,
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: parquet_path.clone(),
+                data_file_format: DataFileFormat::Parquet,
+                schema: iceberg_schema.clone(),
+                project_field_ids: vec![1, 2],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })])) as FileScanTaskStream;
+
+            let batches: Vec<RecordBatch> = reader
+                .read(tasks)
+                .unwrap()
+                .stream()
+                .try_collect()
+                .await
+                .unwrap();
+
+            assert_eq!(batches.len(), 1);
+            let result = &batches[0];
+            assert_eq!(result.num_rows(), 3);
+            assert_eq!(result.num_columns(), 2);
+
+            // Verify id column
+            let id_arr = result
+                .column_by_name("id")
+                .unwrap()
+                .as_any()
+                .downcast_ref::<Int32Array>()
+                .unwrap();
+            assert_eq!(id_arr.values(), &int_data);
+
+            // Verify uuid_col: data must come through as Binary, preserving every byte
+            let uuid_arr = result.column_by_name("uuid_col").unwrap();
+            assert_eq!(uuid_arr.null_count(), 0, "uuid_col should have no nulls");
+            // The transformer may cast Binary -> FixedSizeBinary to match the target schema
+            let uuid_values: Vec<&[u8]> =
+                if let Some(bin) = uuid_arr.as_any().downcast_ref::<BinaryArray>() {
+                    (0..bin.len()).map(|i| bin.value(i)).collect()
+                } else if let Some(fsb) = uuid_arr.as_any().downcast_ref::<FixedSizeBinaryArray>() {
+                    (0..fsb.len()).map(|i| fsb.value(i)).collect()
+                } else {
+                    panic!("uuid_col has unexpected type: {}", uuid_arr.data_type())
+                };
+            for (i, expected) in uuid_bytes.iter().enumerate() {
+                assert_eq!(
+                    uuid_values[i],
+                    expected.as_slice(),
+                    "uuid_col row {i} bytes mismatch"
+                );
+            }
+        }
+
+        // --- Test 2: Projected scan (only uuid_col) ---
+        {
+            let reader2 = ArrowReaderBuilder::new(file_io).build();
+            let tasks = Box::pin(futures::stream::iter(vec![Ok(FileScanTask {
+                file_size_in_bytes: file_size,
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: parquet_path.clone(),
+                data_file_format: DataFileFormat::Parquet,
+                schema: iceberg_schema.clone(),
+                project_field_ids: vec![2],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })])) as FileScanTaskStream;
+
+            let batches: Vec<RecordBatch> = reader2
+                .read(tasks)
+                .unwrap()
+                .stream()
+                .try_collect()
+                .await
+                .unwrap();
+
+            assert_eq!(batches.len(), 1);
+            let result = &batches[0];
+            assert_eq!(result.num_rows(), 3);
+            assert_eq!(result.num_columns(), 1);
+
+            let uuid_arr = result.column(0);
+            assert_eq!(uuid_arr.null_count(), 0, "uuid_col should have no nulls");
+            let uuid_values: Vec<&[u8]> =
+                if let Some(bin) = uuid_arr.as_any().downcast_ref::<BinaryArray>() {
+                    (0..bin.len()).map(|i| bin.value(i)).collect()
+                } else if let Some(fsb) = uuid_arr.as_any().downcast_ref::<FixedSizeBinaryArray>() {
+                    (0..fsb.len()).map(|i| fsb.value(i)).collect()
+                } else {
+                    panic!("uuid_col has unexpected type: {}", uuid_arr.data_type())
+                };
+            for (i, expected) in uuid_bytes.iter().enumerate() {
+                assert_eq!(
+                    uuid_values[i],
+                    expected.as_slice(),
+                    "uuid_col row {i} bytes mismatch in projected scan"
+                );
+            }
+        }
+    }
+}
diff --git a/crates/iceberg/src/arrow/reader/row_filter.rs b/crates/iceberg/src/arrow/reader/row_filter.rs
new file mode 100644
index 0000000000..80432a0437
--- /dev/null
+++ b/crates/iceberg/src/arrow/reader/row_filter.rs
@@ -0,0 +1,619 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Predicate-driven row filtering for `ArrowReader`: constructing Arrow `RowFilter`s
+//! from Iceberg predicates, row-group selection based on column statistics, and
+//! row-selection via the Parquet page index. Also includes byte-range row-group
+//! filtering used for file splitting.
+
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+
+use parquet::arrow::ProjectionMask;
+use parquet::arrow::arrow_reader::{ArrowPredicateFn, RowFilter, RowSelection};
+use parquet::file::metadata::ParquetMetaData;
+use parquet::schema::types::SchemaDescriptor;
+
+use super::{ArrowReader, PredicateConverter};
+use crate::error::Result;
+use crate::expr::BoundPredicate;
+use crate::expr::visitors::bound_predicate_visitor::visit;
+use crate::expr::visitors::page_index_evaluator::PageIndexEvaluator;
+use crate::expr::visitors::row_group_metrics_evaluator::RowGroupMetricsEvaluator;
+use crate::spec::Schema;
+use crate::{Error, ErrorKind};
+
+impl ArrowReader {
+    pub(super) fn get_row_filter(
+        predicates: &BoundPredicate,
+        parquet_schema: &SchemaDescriptor,
+        iceberg_field_ids: &HashSet<i32>,
+        field_id_map: &HashMap<i32, usize>,
+    ) -> Result<RowFilter> {
+        // Collect Parquet column indices from field ids.
+        // If the field id is not found in Parquet schema, it will be ignored due to schema evolution.
+        let mut column_indices = iceberg_field_ids
+            .iter()
+            .filter_map(|field_id| field_id_map.get(field_id).cloned())
+            .collect::<Vec<_>>();
+        column_indices.sort();
+
+        // The converter that converts `BoundPredicates` to `ArrowPredicates`
+        let mut converter = PredicateConverter {
+            parquet_schema,
+            column_map: field_id_map,
+            column_indices: &column_indices,
+        };
+
+        // After collecting required leaf column indices used in the predicate,
+        // creates the projection mask for the Arrow predicates.
+        let projection_mask = ProjectionMask::leaves(parquet_schema, column_indices.clone());
+        let predicate_func = visit(&mut converter, predicates)?;
+        let arrow_predicate = ArrowPredicateFn::new(projection_mask, predicate_func);
+        Ok(RowFilter::new(vec![Box::new(arrow_predicate)]))
+    }
+
+    pub(super) fn get_selected_row_group_indices(
+        predicate: &BoundPredicate,
+        parquet_metadata: &Arc<ParquetMetaData>,
+        field_id_map: &HashMap<i32, usize>,
+        snapshot_schema: &Schema,
+    ) -> Result<Vec<usize>> {
+        let row_groups_metadata = parquet_metadata.row_groups();
+        let mut results = Vec::with_capacity(row_groups_metadata.len());
+
+        for (idx, row_group_metadata) in row_groups_metadata.iter().enumerate() {
+            if RowGroupMetricsEvaluator::eval(
+                predicate,
+                row_group_metadata,
+                field_id_map,
+                snapshot_schema,
+            )? {
+                results.push(idx);
+            }
+        }
+
+        Ok(results)
+    }
+
+    pub(super) fn get_row_selection_for_filter_predicate(
+        predicate: &BoundPredicate,
+        parquet_metadata: &Arc<ParquetMetaData>,
+        selected_row_groups: &Option<Vec<usize>>,
+        field_id_map: &HashMap<i32, usize>,
+        snapshot_schema: &Schema,
+    ) -> Result<RowSelection> {
+        let Some(column_index) = parquet_metadata.column_index() else {
+            return Err(Error::new(
+                ErrorKind::Unexpected,
+                "Parquet file metadata does not contain a column index",
+            ));
+        };
+
+        let Some(offset_index) = parquet_metadata.offset_index() else {
+            return Err(Error::new(
+                ErrorKind::Unexpected,
+                "Parquet file metadata does not contain an offset index",
+            ));
+        };
+
+        // If all row groups were filtered out, return an empty RowSelection (select no rows)
+        if let Some(selected_row_groups) = selected_row_groups
+            && selected_row_groups.is_empty()
+        {
+            return Ok(RowSelection::from(Vec::new()));
+        }
+
+        let mut selected_row_groups_idx = 0;
+
+        let page_index = column_index
+            .iter()
+            .enumerate()
+            .zip(offset_index)
+            .zip(parquet_metadata.row_groups());
+
+        let mut results = Vec::new();
+        for (((idx, column_index), offset_index), row_group_metadata) in page_index {
+            if let Some(selected_row_groups) = selected_row_groups {
+                // skip row groups that aren't present in selected_row_groups
+                if idx == selected_row_groups[selected_row_groups_idx] {
+                    selected_row_groups_idx += 1;
+                } else {
+                    continue;
+                }
+            }
+
+            let selections_for_page = PageIndexEvaluator::eval(
+                predicate,
+                column_index,
+                offset_index,
+                row_group_metadata,
+                field_id_map,
+                snapshot_schema,
+            )?;
+
+            results.push(selections_for_page);
+
+            if let Some(selected_row_groups) = selected_row_groups
+                && selected_row_groups_idx == selected_row_groups.len()
+            {
+                break;
+            }
+        }
+
+        Ok(results.into_iter().flatten().collect::<Vec<_>>().into())
+    }
+
+    /// Filters row groups by byte range to support Iceberg's file splitting.
+    ///
+    /// Iceberg splits large files at row group boundaries, so we only read row groups
+    /// whose byte ranges overlap with [start, start+length).
+    pub(super) fn filter_row_groups_by_byte_range(
+        parquet_metadata: &Arc<ParquetMetaData>,
+        start: u64,
+        length: u64,
+    ) -> Result<Vec<usize>> {
+        let row_groups = parquet_metadata.row_groups();
+        let mut selected = Vec::new();
+        let end = start + length;
+
+        // Row groups are stored sequentially after the 4-byte magic header.
+        let mut current_byte_offset = 4u64;
+
+        for (idx, row_group) in row_groups.iter().enumerate() {
+            let row_group_size = row_group.compressed_size() as u64;
+            let row_group_end = current_byte_offset + row_group_size;
+
+            if current_byte_offset < end && start < row_group_end {
+                selected.push(idx);
+            }
+
+            current_byte_offset = row_group_end;
+        }
+
+        Ok(selected)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::fs::File;
+    use std::sync::Arc;
+
+    use arrow_array::cast::AsArray;
+    use arrow_array::{ArrayRef, LargeStringArray, RecordBatch, StringArray};
+    use arrow_schema::{DataType, Field, Schema as ArrowSchema};
+    use futures::TryStreamExt;
+    use parquet::arrow::{ArrowWriter, PARQUET_FIELD_ID_META_KEY};
+    use parquet::basic::Compression;
+    use parquet::file::properties::WriterProperties;
+    use tempfile::TempDir;
+
+    use crate::arrow::{ArrowReader, ArrowReaderBuilder};
+    use crate::expr::{Bind, Predicate, Reference};
+    use crate::io::FileIO;
+    use crate::scan::{FileScanTask, FileScanTaskStream};
+    use crate::spec::{DataFileFormat, Datum, NestedField, PrimitiveType, Schema, SchemaRef, Type};
+
+    async fn test_perform_read(
+        predicate: Predicate,
+        schema: SchemaRef,
+        table_location: String,
+        reader: ArrowReader,
+    ) -> Vec<Option<String>> {
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1],
+                predicate: Some(predicate.bind(schema, true).unwrap()),
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .stream()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        result[0].columns()[0]
+            .as_string_opt::<i32>()
+            .unwrap()
+            .iter()
+            .map(|v| v.map(ToOwned::to_owned))
+            .collect::<Vec<_>>()
+    }
+
+    fn setup_kleene_logic(
+        data_for_col_a: Vec<Option<String>>,
+        col_a_type: DataType,
+    ) -> (FileIO, SchemaRef, String, TempDir) {
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(1, "a", Type::Primitive(PrimitiveType::String)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("a", col_a_type.clone(), true).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+
+        let file_io = FileIO::new_with_fs();
+
+        let col = match col_a_type {
+            DataType::Utf8 => Arc::new(StringArray::from(data_for_col_a)) as ArrayRef,
+            DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data_for_col_a)) as ArrayRef,
+            _ => panic!("unexpected col_a_type"),
+        };
+
+        let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![col]).unwrap();
+
+        // Write the Parquet files
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
+        let mut writer =
+            ArrowWriter::try_new(file, to_write.schema(), Some(props.clone())).unwrap();
+
+        writer.write(&to_write).expect("Writing batch");
+
+        // writer must be closed to write footer
+        writer.close().unwrap();
+
+        (file_io, schema, table_location, tmp_dir)
+    }
+
+    #[tokio::test]
+    async fn test_kleene_logic_or_behaviour() {
+        // a IS NULL OR a = 'foo'
+        let predicate = Reference::new("a")
+            .is_null()
+            .or(Reference::new("a").equal_to(Datum::string("foo")));
+
+        // Table data: [NULL, "foo", "bar"]
+        let data_for_col_a = vec![None, Some("foo".to_string()), Some("bar".to_string())];
+
+        // Expected: [NULL, "foo"].
+        let expected = vec![None, Some("foo".to_string())];
+
+        let (file_io, schema, table_location, _temp_dir) =
+            setup_kleene_logic(data_for_col_a, DataType::Utf8);
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let result_data = test_perform_read(predicate, schema, table_location, reader).await;
+
+        assert_eq!(result_data, expected);
+    }
+
+    #[tokio::test]
+    async fn test_kleene_logic_and_behaviour() {
+        // a IS NOT NULL AND a != 'foo'
+        let predicate = Reference::new("a")
+            .is_not_null()
+            .and(Reference::new("a").not_equal_to(Datum::string("foo")));
+
+        // Table data: [NULL, "foo", "bar"]
+        let data_for_col_a = vec![None, Some("foo".to_string()), Some("bar".to_string())];
+
+        // Expected: ["bar"].
+        let expected = vec![Some("bar".to_string())];
+
+        let (file_io, schema, table_location, _temp_dir) =
+            setup_kleene_logic(data_for_col_a, DataType::Utf8);
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let result_data = test_perform_read(predicate, schema, table_location, reader).await;
+
+        assert_eq!(result_data, expected);
+    }
+
+    #[tokio::test]
+    async fn test_predicate_cast_literal() {
+        let predicates = vec![
+            // a == 'foo'
+            (Reference::new("a").equal_to(Datum::string("foo")), vec![
+                Some("foo".to_string()),
+            ]),
+            // a != 'foo'
+            (
+                Reference::new("a").not_equal_to(Datum::string("foo")),
+                vec![Some("bar".to_string())],
+            ),
+            // STARTS_WITH(a, 'foo')
+            (Reference::new("a").starts_with(Datum::string("f")), vec![
+                Some("foo".to_string()),
+            ]),
+            // NOT STARTS_WITH(a, 'foo')
+            (
+                Reference::new("a").not_starts_with(Datum::string("f")),
+                vec![Some("bar".to_string())],
+            ),
+            // a < 'foo'
+            (Reference::new("a").less_than(Datum::string("foo")), vec![
+                Some("bar".to_string()),
+            ]),
+            // a <= 'foo'
+            (
+                Reference::new("a").less_than_or_equal_to(Datum::string("foo")),
+                vec![Some("foo".to_string()), Some("bar".to_string())],
+            ),
+            // a > 'foo'
+            (
+                Reference::new("a").greater_than(Datum::string("bar")),
+                vec![Some("foo".to_string())],
+            ),
+            // a >= 'foo'
+            (
+                Reference::new("a").greater_than_or_equal_to(Datum::string("foo")),
+                vec![Some("foo".to_string())],
+            ),
+            // a IN ('foo', 'bar')
+            (
+                Reference::new("a").is_in([Datum::string("foo"), Datum::string("baz")]),
+                vec![Some("foo".to_string())],
+            ),
+            // a NOT IN ('foo', 'bar')
+            (
+                Reference::new("a").is_not_in([Datum::string("foo"), Datum::string("baz")]),
+                vec![Some("bar".to_string())],
+            ),
+        ];
+
+        // Table data: ["foo", "bar"]
+        let data_for_col_a = vec![Some("foo".to_string()), Some("bar".to_string())];
+
+        let (file_io, schema, table_location, _temp_dir) =
+            setup_kleene_logic(data_for_col_a, DataType::LargeUtf8);
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        for (predicate, expected) in predicates {
+            println!("testing predicate {predicate}");
+            let result_data = test_perform_read(
+                predicate.clone(),
+                schema.clone(),
+                table_location.clone(),
+                reader.clone(),
+            )
+            .await;
+
+            assert_eq!(result_data, expected, "predicate={predicate}");
+        }
+    }
+
+    /// Verifies that file splits respect byte ranges and only read specific row groups.
+    #[tokio::test]
+    async fn test_file_splits_respect_byte_ranges() {
+        use arrow_array::Int32Array;
+        use parquet::file::reader::{FileReader, SerializedFileReader};
+
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/multi_row_group.parquet");
+
+        // Force each batch into its own row group for testing byte range filtering.
+        let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from(
+            (0..100).collect::<Vec<i32>>(),
+        ))])
+        .unwrap();
+        let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from(
+            (100..200).collect::<Vec<i32>>(),
+        ))])
+        .unwrap();
+        let batch3 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from(
+            (200..300).collect::<Vec<i32>>(),
+        ))])
+        .unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .set_max_row_group_row_count(Some(100))
+            .build();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
+        writer.write(&batch1).expect("Writing batch 1");
+        writer.write(&batch2).expect("Writing batch 2");
+        writer.write(&batch3).expect("Writing batch 3");
+        writer.close().unwrap();
+
+        // Read the file metadata to get row group byte positions
+        let file = File::open(&file_path).unwrap();
+        let reader = SerializedFileReader::new(file).unwrap();
+        let metadata = reader.metadata();
+
+        println!("File has {} row groups", metadata.num_row_groups());
+        assert_eq!(metadata.num_row_groups(), 3, "Expected 3 row groups");
+
+        // Get byte positions for each row group
+        let row_group_0 = metadata.row_group(0);
+        let row_group_1 = metadata.row_group(1);
+        let row_group_2 = metadata.row_group(2);
+
+        let rg0_start = 4u64; // Parquet files start with 4-byte magic "PAR1"
+        let rg1_start = rg0_start + row_group_0.compressed_size() as u64;
+        let rg2_start = rg1_start + row_group_1.compressed_size() as u64;
+        let file_end = rg2_start + row_group_2.compressed_size() as u64;
+
+        println!(
+            "Row group 0: {} rows, starts at byte {}, {} bytes compressed",
+            row_group_0.num_rows(),
+            rg0_start,
+            row_group_0.compressed_size()
+        );
+        println!(
+            "Row group 1: {} rows, starts at byte {}, {} bytes compressed",
+            row_group_1.num_rows(),
+            rg1_start,
+            row_group_1.compressed_size()
+        );
+        println!(
+            "Row group 2: {} rows, starts at byte {}, {} bytes compressed",
+            row_group_2.num_rows(),
+            rg2_start,
+            row_group_2.compressed_size()
+        );
+
+        let file_io = FileIO::new_with_fs();
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        // Task 1: read only the first row group
+        let task1 = FileScanTask {
+            file_size_in_bytes: std::fs::metadata(&file_path).unwrap().len(),
+            start: rg0_start,
+            length: row_group_0.compressed_size() as u64,
+            record_count: Some(100),
+            data_file_path: file_path.clone(),
+            data_file_format: DataFileFormat::Parquet,
+            schema: schema.clone(),
+            project_field_ids: vec![1],
+            predicate: None,
+            deletes: vec![],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        // Task 2: read the second and third row groups
+        let task2 = FileScanTask {
+            file_size_in_bytes: std::fs::metadata(&file_path).unwrap().len(),
+            start: rg1_start,
+            length: file_end - rg1_start,
+            record_count: Some(200),
+            data_file_path: file_path.clone(),
+            data_file_format: DataFileFormat::Parquet,
+            schema: schema.clone(),
+            project_field_ids: vec![1],
+            predicate: None,
+            deletes: vec![],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        let tasks1 = Box::pin(futures::stream::iter(vec![Ok(task1)])) as FileScanTaskStream;
+        let result1 = reader
+            .clone()
+            .read(tasks1)
+            .unwrap()
+            .stream()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        let total_rows_task1: usize = result1.iter().map(|b| b.num_rows()).sum();
+        println!(
+            "Task 1 (bytes {}-{}) returned {} rows",
+            rg0_start,
+            rg0_start + row_group_0.compressed_size() as u64,
+            total_rows_task1
+        );
+
+        let tasks2 = Box::pin(futures::stream::iter(vec![Ok(task2)])) as FileScanTaskStream;
+        let result2 = reader
+            .read(tasks2)
+            .unwrap()
+            .stream()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        let total_rows_task2: usize = result2.iter().map(|b| b.num_rows()).sum();
+        println!("Task 2 (bytes {rg1_start}-{file_end}) returned {total_rows_task2} rows");
+
+        assert_eq!(
+            total_rows_task1, 100,
+            "Task 1 should read only the first row group (100 rows), but got {total_rows_task1} rows"
+        );
+
+        assert_eq!(
+            total_rows_task2, 200,
+            "Task 2 should read only the second+third row groups (200 rows), but got {total_rows_task2} rows"
+        );
+
+        // Verify the actual data values are correct (not just the row count)
+        if total_rows_task1 > 0 {
+            let first_batch = &result1[0];
+            let id_col = first_batch
+                .column(0)
+                .as_primitive::<arrow_array::types::Int32Type>();
+            let first_val = id_col.value(0);
+            let last_val = id_col.value(id_col.len() - 1);
+            println!("Task 1 data range: {first_val} to {last_val}");
+
+            assert_eq!(first_val, 0, "Task 1 should start with id=0");
+            assert_eq!(last_val, 99, "Task 1 should end with id=99");
+        }
+
+        if total_rows_task2 > 0 {
+            let first_batch = &result2[0];
+            let id_col = first_batch
+                .column(0)
+                .as_primitive::<arrow_array::types::Int32Type>();
+            let first_val = id_col.value(0);
+            println!("Task 2 first value: {first_val}");
+
+            assert_eq!(first_val, 100, "Task 2 should start with id=100, not id=0");
+        }
+    }
+}
diff --git a/crates/iceberg/src/arrow/scan_metrics.rs b/crates/iceberg/src/arrow/scan_metrics.rs
new file mode 100644
index 0000000000..642190c57d
--- /dev/null
+++ b/crates/iceberg/src/arrow/scan_metrics.rs
@@ -0,0 +1,96 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Scan metrics and I/O counting for Parquet data file reads.
+
+use std::ops::Range;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, Ordering};
+
+use bytes::Bytes;
+
+use crate::error::Result;
+use crate::io::FileRead;
+use crate::scan::ArrowRecordBatchStream;
+
+/// Wraps a [`FileRead`] to count bytes read via a shared atomic counter.
+pub(crate) struct CountingFileRead<F: FileRead> {
+    inner: F,
+    bytes_read: Arc<AtomicU64>,
+}
+
+impl<F: FileRead> CountingFileRead<F> {
+    pub(crate) fn new(inner: F, bytes_read: Arc<AtomicU64>) -> Self {
+        Self { inner, bytes_read }
+    }
+}
+
+#[async_trait::async_trait]
+impl<F: FileRead> FileRead for CountingFileRead<F> {
+    async fn read(&self, range: Range<u64>) -> Result<Bytes> {
+        debug_assert!(range.end >= range.start);
+        self.bytes_read
+            .fetch_add(range.end - range.start, Ordering::Relaxed);
+        self.inner.read(range).await
+    }
+}
+
+/// Metrics collected during an Iceberg scan.
+#[derive(Clone, Debug)]
+pub struct ScanMetrics {
+    bytes_read: Arc<AtomicU64>,
+}
+
+impl ScanMetrics {
+    pub(crate) fn new() -> Self {
+        Self {
+            bytes_read: Arc::new(AtomicU64::new(0)),
+        }
+    }
+
+    pub(crate) fn bytes_read_counter(&self) -> &Arc<AtomicU64> {
+        &self.bytes_read
+    }
+
+    /// Total bytes read from storage during this scan, including data files and delete files.
+    pub fn bytes_read(&self) -> u64 {
+        self.bytes_read.load(Ordering::Relaxed)
+    }
+}
+
+/// Result of [`ArrowReader::read`](super::ArrowReader::read), containing the
+/// record batch stream and metrics collected during the scan.
+pub struct ScanResult {
+    stream: ArrowRecordBatchStream,
+    metrics: ScanMetrics,
+}
+
+impl ScanResult {
+    pub(crate) fn new(stream: ArrowRecordBatchStream, metrics: ScanMetrics) -> Self {
+        Self { stream, metrics }
+    }
+
+    /// Consumes the result, returning only the record batch stream.
+    pub fn stream(self) -> ArrowRecordBatchStream {
+        self.stream
+    }
+
+    /// Returns a reference to the scan metrics.
+    pub fn metrics(&self) -> &ScanMetrics {
+        &self.metrics
+    }
+}
diff --git a/crates/iceberg/src/arrow/schema.rs b/crates/iceberg/src/arrow/schema.rs
index 165717f6a0..88d4a07a39 100644
--- a/crates/iceberg/src/arrow/schema.rs
+++ b/crates/iceberg/src/arrow/schema.rs
@@ -199,7 +199,10 @@ fn visit_struct<V: ArrowSchemaVisitor>(fields: &Fields, visitor: &mut V) -> Resu
 }
 
 /// Visit schema in post order.
-fn visit_schema<V: ArrowSchemaVisitor>(schema: &ArrowSchema, visitor: &mut V) -> Result<V::U> {
+pub(crate) fn visit_schema<V: ArrowSchemaVisitor>(
+    schema: &ArrowSchema,
+    visitor: &mut V,
+) -> Result<V::U> {
     let mut results = Vec::with_capacity(schema.fields().len());
     for field in schema.fields() {
         visitor.before_field(field)?;
@@ -759,6 +762,11 @@ pub(crate) fn get_arrow_datum(datum: &Datum) -> Result<Arc<dyn ArrowDatum + Send
             let array = FixedSizeBinaryArray::try_from_iter(vec![bytes].into_iter()).unwrap();
             Ok(Arc::new(Scalar::new(array)))
         }
+        (PrimitiveType::Fixed(_), PrimitiveLiteral::Binary(value)) => {
+            let array = FixedSizeBinaryArray::try_from_iter(std::iter::once(value.as_slice()))
+                .map_err(|e| Error::new(ErrorKind::DataInvalid, e.to_string()))?;
+            Ok(Arc::new(Scalar::new(array)))
+        }
 
         (primitive_type, _) => Err(Error::new(
             ErrorKind::FeatureUnsupported,
@@ -2177,6 +2185,18 @@ mod tests {
             assert!(is_scalar);
             assert_eq!(array.value(0), [66u8; 16]);
         }
+        {
+            let datum = Datum::fixed(vec![1u8, 2, 3, 4, 5, 6, 7, 8]);
+            let arrow_datum = get_arrow_datum(&datum).unwrap();
+            let (array, is_scalar) = arrow_datum.get();
+            let array = array
+                .as_any()
+                .downcast_ref::<FixedSizeBinaryArray>()
+                .unwrap();
+            assert!(is_scalar);
+            assert_eq!(array.value_length(), 8);
+            assert_eq!(array.value(0), &[1u8, 2, 3, 4, 5, 6, 7, 8]);
+        }
     }
 
     #[test]
diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs
index f296cf2260..43102adec9 100644
--- a/crates/iceberg/src/catalog/mod.rs
+++ b/crates/iceberg/src/catalog/mod.rs
@@ -144,7 +144,6 @@ pub trait CatalogBuilder: Default + Debug + Send + Sync {
     ///
     /// let catalog = MyCatalogBuilder::default()
     ///     .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 {
-    ///         configured_scheme: "s3a".to_string(),
     ///         customized_credential_load: None,
     ///     }))
     ///     .load("my_catalog", props)
diff --git a/crates/iceberg/src/encryption/crypto.rs b/crates/iceberg/src/encryption/crypto.rs
index 0b34580db8..0f6a9eff43 100644
--- a/crates/iceberg/src/encryption/crypto.rs
+++ b/crates/iceberg/src/encryption/crypto.rs
@@ -43,7 +43,7 @@ use crate::{Error, ErrorKind, Result};
 /// containing `SensitiveBytes` can safely derive or implement `Debug`
 /// without risk of leaking key material.
 #[derive(Clone, PartialEq, Eq)]
-struct SensitiveBytes(Zeroizing<Box<[u8]>>);
+pub struct SensitiveBytes(Zeroizing<Box<[u8]>>);
 
 impl SensitiveBytes {
     /// Wraps the given bytes as sensitive material.
@@ -57,13 +57,11 @@ impl SensitiveBytes {
     }
 
     /// Returns the number of bytes.
-    #[allow(dead_code)] // Encryption work is ongoing so currently unused
     pub fn len(&self) -> usize {
         self.0.len()
     }
 
     /// Returns `true` if the byte slice is empty.
-    #[allow(dead_code)] // Encryption work is ongoing so currently unused
     pub fn is_empty(&self) -> bool {
         self.0.is_empty()
     }
@@ -85,9 +83,10 @@ impl fmt::Display for SensitiveBytes {
 ///
 /// The Iceberg spec supports 128, 192, and 256-bit keys for AES-GCM.
 /// See: <https://iceberg.apache.org/gcm-stream-spec/#goals>
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
 pub enum AesKeySize {
-    /// 128-bit AES key (16 bytes)
+    /// 128-bit AES key (16 bytes). Default per the Iceberg spec.
+    #[default]
     Bits128 = 128,
     /// 192-bit AES key (24 bytes)
     Bits192 = 192,
diff --git a/crates/iceberg/src/encryption/file_decryptor.rs b/crates/iceberg/src/encryption/file_decryptor.rs
new file mode 100644
index 0000000000..e44c0e1d78
--- /dev/null
+++ b/crates/iceberg/src/encryption/file_decryptor.rs
@@ -0,0 +1,156 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! File-level decryption helper for AGS1 stream-encrypted files.
+
+use std::fmt;
+use std::sync::Arc;
+
+use super::crypto::{AesGcmCipher, SecureKey};
+use super::stream::AesGcmFileRead;
+use crate::Result;
+use crate::io::FileRead;
+
+/// Holds the decryption material for a single encrypted file.
+///
+/// Created from a plaintext DEK and AAD prefix, then used to wrap
+/// an encrypted file reader for transparent decryption on read.
+pub struct AesGcmFileDecryptor {
+    cipher: Arc<AesGcmCipher>,
+    aad_prefix: Box<[u8]>,
+}
+
+impl AesGcmFileDecryptor {
+    /// Creates a new `AesGcmFileDecryptor` from a plaintext DEK and AAD prefix.
+    pub fn new(dek: &[u8], aad_prefix: impl Into<Box<[u8]>>) -> Result<Self> {
+        let key = SecureKey::new(dek)?;
+        let cipher = Arc::new(AesGcmCipher::new(key));
+        Ok(Self {
+            cipher,
+            aad_prefix: aad_prefix.into(),
+        })
+    }
+
+    /// Wraps a raw encrypted-file reader in a decrypting [`AesGcmFileRead`].
+    pub fn wrap_reader(
+        &self,
+        reader: Box<dyn FileRead>,
+        encrypted_file_length: u64,
+    ) -> Result<Box<dyn FileRead>> {
+        let decrypting = AesGcmFileRead::new(
+            reader,
+            Arc::clone(&self.cipher),
+            self.aad_prefix.clone(),
+            encrypted_file_length,
+        )?;
+        Ok(Box::new(decrypting))
+    }
+
+    /// Calculates the plaintext length from an encrypted file's total length.
+    pub fn plaintext_length(&self, encrypted_file_length: u64) -> Result<u64> {
+        AesGcmFileRead::calculate_plaintext_length(encrypted_file_length)
+    }
+}
+
+impl fmt::Debug for AesGcmFileDecryptor {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("AesGcmFileDecryptor")
+            .field("aad_prefix_len", &self.aad_prefix.len())
+            .finish_non_exhaustive()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::Range;
+
+    use bytes::Bytes;
+
+    use super::*;
+    use crate::encryption::AesGcmFileEncryptor;
+    use crate::io::FileWrite;
+
+    struct MemoryFileRead(Bytes);
+
+    #[async_trait::async_trait]
+    impl FileRead for MemoryFileRead {
+        async fn read(&self, range: Range<u64>) -> Result<Bytes> {
+            Ok(self.0.slice(range.start as usize..range.end as usize))
+        }
+    }
+
+    struct MemoryFileWrite {
+        buffer: std::sync::Arc<std::sync::Mutex<Vec<u8>>>,
+    }
+
+    #[async_trait::async_trait]
+    impl FileWrite for MemoryFileWrite {
+        async fn write(&mut self, bs: Bytes) -> Result<()> {
+            self.buffer.lock().unwrap().extend_from_slice(&bs);
+            Ok(())
+        }
+
+        async fn close(&mut self) -> Result<()> {
+            Ok(())
+        }
+    }
+
+    #[tokio::test]
+    async fn test_wrap_reader_roundtrip() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"test-aad-prefix!";
+        let plaintext = b"Hello from file decryptor!";
+
+        // Encrypt via the encryptor wrapper
+        let encryptor = AesGcmFileEncryptor::new(key.as_slice(), aad_prefix.as_slice()).unwrap();
+        let buffer = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
+        let mut writer = encryptor.wrap_writer(Box::new(MemoryFileWrite {
+            buffer: buffer.clone(),
+        }));
+        writer.write(Bytes::from(plaintext.to_vec())).await.unwrap();
+        writer.close().await.unwrap();
+        let encrypted = buffer.lock().unwrap().clone();
+        let encrypted_len = encrypted.len() as u64;
+
+        // Decrypt via the decryptor wrapper
+        let decryptor = AesGcmFileDecryptor::new(key.as_slice(), aad_prefix.as_slice()).unwrap();
+        let reader = decryptor
+            .wrap_reader(
+                Box::new(MemoryFileRead(Bytes::from(encrypted))),
+                encrypted_len,
+            )
+            .unwrap();
+
+        let result = reader.read(0..plaintext.len() as u64).await.unwrap();
+        assert_eq!(&result[..], plaintext);
+    }
+
+    #[tokio::test]
+    async fn test_invalid_key_length() {
+        let result = AesGcmFileDecryptor::new(b"too-short", b"aad".as_slice());
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_plaintext_length() {
+        let decryptor = AesGcmFileDecryptor::new(b"0123456789abcdef", b"aad".as_slice()).unwrap();
+        // header(8) + nonce(12) + 10 bytes ciphertext + tag(16) = 46
+        let encrypted_len = 8 + 12 + 10 + 16;
+        let plain_len = decryptor.plaintext_length(encrypted_len).unwrap();
+        assert_eq!(plain_len, 10);
+    }
+}
diff --git a/crates/iceberg/src/encryption/file_encryptor.rs b/crates/iceberg/src/encryption/file_encryptor.rs
new file mode 100644
index 0000000000..773438ad80
--- /dev/null
+++ b/crates/iceberg/src/encryption/file_encryptor.rs
@@ -0,0 +1,138 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! File-level encryption helper for AGS1 stream-encrypted files.
+
+use std::fmt;
+use std::sync::Arc;
+
+use super::crypto::{AesGcmCipher, SecureKey};
+use super::stream::AesGcmFileWrite;
+use crate::Result;
+use crate::io::FileWrite;
+
+/// Holds the encryption material for a single encrypted file.
+///
+/// This is the write-side counterpart to
+/// [`AesGcmFileDecryptor`](super::AesGcmFileDecryptor). Created from
+/// a plaintext DEK and AAD prefix, then used to wrap an output writer
+/// for transparent encryption on write.
+pub struct AesGcmFileEncryptor {
+    cipher: Arc<AesGcmCipher>,
+    aad_prefix: Box<[u8]>,
+}
+
+impl AesGcmFileEncryptor {
+    /// Creates a new `AesGcmFileEncryptor` from a plaintext DEK and AAD prefix.
+    pub fn new(dek: &[u8], aad_prefix: impl Into<Box<[u8]>>) -> Result<Self> {
+        let key = SecureKey::new(dek)?;
+        let cipher = Arc::new(AesGcmCipher::new(key));
+        Ok(Self {
+            cipher,
+            aad_prefix: aad_prefix.into(),
+        })
+    }
+
+    /// Wraps a raw output writer in an encrypting [`AesGcmFileWrite`].
+    pub fn wrap_writer(&self, writer: Box<dyn FileWrite>) -> Box<dyn FileWrite> {
+        Box::new(AesGcmFileWrite::new(
+            writer,
+            Arc::clone(&self.cipher),
+            self.aad_prefix.clone(),
+        ))
+    }
+}
+
+impl fmt::Debug for AesGcmFileEncryptor {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("AesGcmFileEncryptor")
+            .field("aad_prefix_len", &self.aad_prefix.len())
+            .finish_non_exhaustive()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::Range;
+
+    use bytes::Bytes;
+
+    use super::*;
+    use crate::encryption::AesGcmFileDecryptor;
+    use crate::io::FileRead;
+
+    struct MemoryFileRead(Bytes);
+
+    #[async_trait::async_trait]
+    impl FileRead for MemoryFileRead {
+        async fn read(&self, range: Range<u64>) -> Result<Bytes> {
+            Ok(self.0.slice(range.start as usize..range.end as usize))
+        }
+    }
+
+    struct MemoryFileWrite {
+        buffer: std::sync::Arc<std::sync::Mutex<Vec<u8>>>,
+    }
+
+    #[async_trait::async_trait]
+    impl FileWrite for MemoryFileWrite {
+        async fn write(&mut self, bs: Bytes) -> Result<()> {
+            self.buffer.lock().unwrap().extend_from_slice(&bs);
+            Ok(())
+        }
+
+        async fn close(&mut self) -> Result<()> {
+            Ok(())
+        }
+    }
+
+    #[tokio::test]
+    async fn test_wrap_writer_roundtrip() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"test-aad-prefix!";
+        let plaintext = b"Hello from file encryptor!";
+
+        // Encrypt via the encryptor wrapper
+        let encryptor = AesGcmFileEncryptor::new(key.as_slice(), aad_prefix.as_slice()).unwrap();
+        let buffer = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
+        let mut writer = encryptor.wrap_writer(Box::new(MemoryFileWrite {
+            buffer: buffer.clone(),
+        }));
+        writer.write(Bytes::from(plaintext.to_vec())).await.unwrap();
+        writer.close().await.unwrap();
+        let encrypted = buffer.lock().unwrap().clone();
+        let encrypted_len = encrypted.len() as u64;
+
+        // Decrypt via the decryptor wrapper
+        let decryptor = AesGcmFileDecryptor::new(key.as_slice(), aad_prefix.as_slice()).unwrap();
+        let reader = decryptor
+            .wrap_reader(
+                Box::new(MemoryFileRead(Bytes::from(encrypted))),
+                encrypted_len,
+            )
+            .unwrap();
+
+        let result = reader.read(0..plaintext.len() as u64).await.unwrap();
+        assert_eq!(&result[..], plaintext);
+    }
+
+    #[tokio::test]
+    async fn test_invalid_key_length() {
+        let result = AesGcmFileEncryptor::new(b"bad-key", b"aad".as_slice());
+        assert!(result.is_err());
+    }
+}
diff --git a/crates/iceberg/src/encryption/key_metadata.rs b/crates/iceberg/src/encryption/key_metadata.rs
new file mode 100644
index 0000000000..4ef66ce394
--- /dev/null
+++ b/crates/iceberg/src/encryption/key_metadata.rs
@@ -0,0 +1,286 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Avro-serialized key metadata format compatible with Java's
+//! `org.apache.iceberg.encryption.StandardKeyMetadata`.
+
+use std::fmt;
+
+use super::SensitiveBytes;
+use crate::{Error, ErrorKind, Result};
+
+/// Standard key metadata for Iceberg table encryption.
+///
+/// Contains the Data Encryption Key (DEK), AAD prefix, and optional file
+/// length. Byte-compatible with Java's `StandardKeyMetadata` via Avro
+/// serialization.
+///
+/// Wire format: `[version byte (0x01)] [Avro binary datum]`
+#[derive(Clone, PartialEq, Eq)]
+pub struct StandardKeyMetadata {
+    encryption_key: SensitiveBytes,
+    aad_prefix: Option<Box<[u8]>>,
+    file_length: Option<u64>,
+}
+
+impl fmt::Debug for StandardKeyMetadata {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("StandardKeyMetadata")
+            .field("encryption_key", &self.encryption_key)
+            .field(
+                "aad_prefix",
+                &self
+                    .aad_prefix
+                    .as_ref()
+                    .map(|b| format!("[{} bytes]", b.len())),
+            )
+            .field("file_length", &self.file_length)
+            .finish()
+    }
+}
+
+impl StandardKeyMetadata {
+    /// Creates a new `StandardKeyMetadata`.
+    pub fn new(encryption_key: &[u8]) -> Self {
+        Self {
+            encryption_key: SensitiveBytes::new(encryption_key),
+            aad_prefix: None,
+            file_length: None,
+        }
+    }
+
+    /// Adds an AAD prefix.
+    pub fn with_aad_prefix(mut self, aad_prefix: &[u8]) -> Self {
+        self.aad_prefix = Some(aad_prefix.into());
+        self
+    }
+
+    /// Adds a file length.
+    pub fn with_file_length(mut self, length: u64) -> Self {
+        self.file_length = Some(length);
+        self
+    }
+
+    /// Returns the plaintext Data Encryption Key.
+    pub fn encryption_key(&self) -> &SensitiveBytes {
+        &self.encryption_key
+    }
+
+    /// Returns the AAD prefix.
+    pub fn aad_prefix(&self) -> Option<&[u8]> {
+        self.aad_prefix.as_deref()
+    }
+
+    /// Returns the optional file length.
+    pub fn file_length(&self) -> Option<u64> {
+        self.file_length
+    }
+
+    /// Encodes to Java-compatible format: `[0x01] [Avro binary datum]`
+    pub fn encode(&self) -> Result<Box<[u8]>> {
+        _serde::StandardKeyMetadataV1::from(self).encode()
+    }
+
+    /// Decodes from Java-compatible format.
+    pub fn decode(bytes: &[u8]) -> Result<Self> {
+        _serde::StandardKeyMetadataV1::decode(bytes).map(Self::from)
+    }
+}
+
+mod _serde {
+    use std::io::Cursor;
+    use std::sync::{Arc, LazyLock};
+
+    use apache_avro::{Schema as AvroSchema, from_avro_datum, from_value, to_avro_datum, to_value};
+    use serde::{Deserialize, Serialize};
+
+    use super::*;
+    use crate::avro::schema_to_avro_schema;
+    use crate::spec::{NestedField, PrimitiveType, Schema, Type};
+
+    pub(super) const V1: u8 = 1;
+
+    /// Avro schema for StandardKeyMetadata V1, derived from Iceberg schema.
+    pub(super) static AVRO_SCHEMA_V1: LazyLock<AvroSchema> = LazyLock::new(|| {
+        let schema = Schema::builder()
+            .with_fields(vec![
+                Arc::new(NestedField::required(
+                    0,
+                    "encryption_key",
+                    Type::Primitive(PrimitiveType::Binary),
+                )),
+                Arc::new(NestedField::optional(
+                    1,
+                    "aad_prefix",
+                    Type::Primitive(PrimitiveType::Binary),
+                )),
+                Arc::new(NestedField::optional(
+                    2,
+                    "file_length",
+                    Type::Primitive(PrimitiveType::Long),
+                )),
+            ])
+            .build()
+            .expect("Failed to build StandardKeyMetadata Iceberg schema");
+
+        schema_to_avro_schema("StandardKeyMetadata", &schema)
+            .expect("Failed to convert StandardKeyMetadata to Avro schema")
+    });
+
+    /// Serde struct for Avro serialization of [`StandardKeyMetadata`] V1.
+    /// Field names must match [`AVRO_SCHEMA_V1`] exactly.
+    #[derive(Serialize, Deserialize)]
+    pub(super) struct StandardKeyMetadataV1 {
+        pub encryption_key: serde_bytes::ByteBuf,
+        pub aad_prefix: Option<serde_bytes::ByteBuf>,
+        pub file_length: Option<u64>,
+    }
+
+    impl StandardKeyMetadataV1 {
+        pub(super) fn encode(&self) -> Result<Box<[u8]>> {
+            let value = to_value(self)
+                .and_then(|v| v.resolve(&AVRO_SCHEMA_V1))
+                .map_err(|e| {
+                    Error::new(ErrorKind::Unexpected, "Failed to encode key metadata")
+                        .with_source(e)
+                })?;
+
+            let datum = to_avro_datum(&AVRO_SCHEMA_V1, value).map_err(|e| {
+                Error::new(ErrorKind::Unexpected, "Failed to encode key metadata").with_source(e)
+            })?;
+
+            let mut result = Vec::with_capacity(1 + datum.len());
+            result.push(V1);
+            result.extend_from_slice(&datum);
+            Ok(result.into_boxed_slice())
+        }
+
+        pub(super) fn decode(bytes: &[u8]) -> Result<Self> {
+            if bytes.is_empty() {
+                return Err(Error::new(
+                    ErrorKind::DataInvalid,
+                    "Empty key metadata buffer",
+                ));
+            }
+
+            let version = bytes[0];
+            if version != V1 {
+                return Err(Error::new(
+                    ErrorKind::FeatureUnsupported,
+                    format!("Cannot resolve schema for version: {version}"),
+                ));
+            }
+
+            let mut reader = Cursor::new(&bytes[1..]);
+            let value = from_avro_datum(&AVRO_SCHEMA_V1, &mut reader, None).map_err(|e| {
+                Error::new(ErrorKind::DataInvalid, "Failed to decode key metadata").with_source(e)
+            })?;
+
+            from_value(&value).map_err(|e| {
+                Error::new(
+                    ErrorKind::DataInvalid,
+                    "Failed to decode key metadata fields",
+                )
+                .with_source(e)
+            })
+        }
+    }
+
+    impl From<&StandardKeyMetadata> for StandardKeyMetadataV1 {
+        fn from(metadata: &StandardKeyMetadata) -> Self {
+            Self {
+                encryption_key: serde_bytes::ByteBuf::from(metadata.encryption_key.as_bytes()),
+                aad_prefix: metadata
+                    .aad_prefix
+                    .as_ref()
+                    .map(|b| serde_bytes::ByteBuf::from(b.as_ref())),
+                file_length: metadata.file_length,
+            }
+        }
+    }
+
+    impl From<StandardKeyMetadataV1> for StandardKeyMetadata {
+        fn from(v1: StandardKeyMetadataV1) -> Self {
+            Self {
+                encryption_key: SensitiveBytes::new(v1.encryption_key.into_vec()),
+                aad_prefix: v1.aad_prefix.map(|b| b.into_vec().into_boxed_slice()),
+                file_length: v1.file_length,
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_roundtrip() {
+        let key = b"0123456789012345";
+        let aad = b"1234567890123456";
+
+        let metadata = StandardKeyMetadata::new(key).with_aad_prefix(aad);
+        let serialized = metadata.encode().unwrap();
+        let parsed = StandardKeyMetadata::decode(&serialized).unwrap();
+
+        assert_eq!(parsed.encryption_key().as_bytes(), key);
+        assert_eq!(parsed.aad_prefix(), Some(aad.as_slice()));
+        assert_eq!(parsed.file_length(), None);
+    }
+
+    #[test]
+    fn test_roundtrip_with_length() {
+        let key = b"0123456789012345";
+        let aad = b"1234567890123456";
+
+        let file_length = 100_000;
+        let metadata = StandardKeyMetadata::new(key)
+            .with_aad_prefix(aad)
+            .with_file_length(file_length);
+        let serialized = metadata.encode().unwrap();
+        let parsed = StandardKeyMetadata::decode(&serialized).unwrap();
+
+        assert_eq!(parsed.encryption_key().as_bytes(), key);
+        assert_eq!(parsed.aad_prefix(), Some(aad.as_slice()));
+        assert_eq!(parsed.file_length(), Some(file_length));
+    }
+
+    #[test]
+    fn test_unsupported_version() {
+        let result = StandardKeyMetadata::decode(&[0x02]);
+        assert!(result.is_err());
+        let err = result.unwrap_err();
+        assert_eq!(err.kind(), ErrorKind::FeatureUnsupported);
+    }
+
+    #[test]
+    fn test_empty_buffer() {
+        let result = StandardKeyMetadata::decode(&[]);
+        assert!(result.is_err());
+        assert_eq!(result.unwrap_err().kind(), ErrorKind::DataInvalid);
+    }
+
+    #[test]
+    fn test_roundtrip_without_aad() {
+        let metadata = StandardKeyMetadata::new(&[1, 2, 3, 4]);
+        let serialized = metadata.encode().unwrap();
+        let parsed = StandardKeyMetadata::decode(&serialized).unwrap();
+
+        assert_eq!(parsed.encryption_key().as_bytes(), &[1, 2, 3, 4]);
+        assert_eq!(parsed.aad_prefix(), None);
+    }
+}
diff --git a/crates/iceberg/src/encryption/kms/client.rs b/crates/iceberg/src/encryption/kms/client.rs
new file mode 100644
index 0000000000..85cd511758
--- /dev/null
+++ b/crates/iceberg/src/encryption/kms/client.rs
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Key management client trait for encryption key operations.
+//!
+//! Mirrors the Java `KeyManagementClient` interface from the Apache Iceberg spec.
+
+use async_trait::async_trait;
+
+use crate::Result;
+use crate::encryption::SensitiveBytes;
+
+/// Result of a server-side key generation operation.
+///
+/// Returned by [`KeyManagementClient::generate_key`] when the KMS supports
+/// atomic key generation and wrapping.
+pub struct GeneratedKey {
+    key: SensitiveBytes,
+    wrapped_key: Vec<u8>,
+}
+
+impl GeneratedKey {
+    /// Creates a new `GeneratedKey` from plaintext key bytes and wrapped key bytes.
+    pub fn new(key: SensitiveBytes, wrapped_key: Vec<u8>) -> Self {
+        Self { key, wrapped_key }
+    }
+
+    /// Returns the plaintext key bytes. Zeroized on drop, redacted in Debug.
+    pub fn key(&self) -> &SensitiveBytes {
+        &self.key
+    }
+
+    /// Returns the wrapped (encrypted) key bytes.
+    pub fn wrapped_key(&self) -> &[u8] {
+        &self.wrapped_key
+    }
+}
+
+/// Pluggable interface for key management systems (AWS KMS, Azure Key Vault, etc.).
+#[async_trait]
+pub trait KeyManagementClient: Send + Sync + std::fmt::Debug {
+    /// Wrap (encrypt) a key using a wrapping key managed by the KMS.
+    async fn wrap_key(&self, key: &[u8], wrapping_key_id: &str) -> Result<Vec<u8>>;
+
+    /// Unwrap (decrypt) a previously wrapped key.
+    async fn unwrap_key(&self, wrapped_key: &[u8], wrapping_key_id: &str)
+    -> Result<SensitiveBytes>;
+
+    /// Whether this KMS supports server-side key generation.
+    ///
+    /// If `true`, callers can use [`generate_key`](Self::generate_key) for atomic
+    /// key generation and wrapping, which is more secure than generating a key
+    /// locally and then wrapping it.
+    fn supports_key_generation(&self) -> bool;
+
+    /// Generate a new key and wrap it atomically on the server side.
+    ///
+    /// This is only supported when [`supports_key_generation`](Self::supports_key_generation)
+    /// returns `true`.
+    async fn generate_key(&self, wrapping_key_id: &str) -> Result<GeneratedKey>;
+}
+
+#[async_trait]
+impl<T: AsRef<dyn KeyManagementClient> + Send + Sync + std::fmt::Debug> KeyManagementClient for T {
+    async fn wrap_key(&self, key: &[u8], wrapping_key_id: &str) -> Result<Vec<u8>> {
+        self.as_ref().wrap_key(key, wrapping_key_id).await
+    }
+
+    async fn unwrap_key(
+        &self,
+        wrapped_key: &[u8],
+        wrapping_key_id: &str,
+    ) -> Result<SensitiveBytes> {
+        self.as_ref().unwrap_key(wrapped_key, wrapping_key_id).await
+    }
+
+    fn supports_key_generation(&self) -> bool {
+        self.as_ref().supports_key_generation()
+    }
+
+    async fn generate_key(&self, wrapping_key_id: &str) -> Result<GeneratedKey> {
+        self.as_ref().generate_key(wrapping_key_id).await
+    }
+}
diff --git a/crates/iceberg/src/encryption/kms/memory.rs b/crates/iceberg/src/encryption/kms/memory.rs
new file mode 100644
index 0000000000..65319831dd
--- /dev/null
+++ b/crates/iceberg/src/encryption/kms/memory.rs
@@ -0,0 +1,296 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! In-memory KMS implementation for testing and development.
+//!
+//! **WARNING**: This implementation is NOT suitable for production use.
+//! Keys are stored in memory only and will be lost when the process exits.
+
+use std::collections::HashMap;
+use std::fmt;
+use std::sync::{Arc, RwLock};
+
+use async_trait::async_trait;
+
+use super::KeyManagementClient;
+use crate::encryption::{AesGcmCipher, AesKeySize, SecureKey, SensitiveBytes};
+use crate::error::lock_error;
+use crate::{Error, ErrorKind, Result};
+
+/// In-memory KMS for testing. Not suitable for production use.
+///
+/// ```
+/// use iceberg::encryption::KeyManagementClient;
+/// use iceberg::encryption::kms::MemoryKeyManagementClient;
+///
+/// # async fn example() -> iceberg::Result<()> {
+/// let kms = MemoryKeyManagementClient::new();
+/// kms.add_master_key("my-master-key")?;
+///
+/// let dek = vec![0u8; 16];
+/// let wrapped = kms.wrap_key(&dek, "my-master-key").await?;
+/// let unwrapped = kms.unwrap_key(&wrapped, "my-master-key").await?;
+/// assert_eq!(dek.as_slice(), unwrapped.as_bytes());
+/// # Ok(())
+/// # }
+/// ```
+#[derive(Clone, Default)]
+pub struct MemoryKeyManagementClient {
+    master_keys: Arc<RwLock<HashMap<String, SensitiveBytes>>>,
+    master_key_size: AesKeySize,
+}
+
+impl fmt::Debug for MemoryKeyManagementClient {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("MemoryKeyManagementClient")
+            .field("master_key_size", &self.master_key_size)
+            .field("key_count", &self.key_count())
+            .finish()
+    }
+}
+
+impl MemoryKeyManagementClient {
+    /// Creates a new in-memory KMS with 128-bit AES keys.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Creates a new in-memory KMS with the specified master key size.
+    pub fn with_master_key_size(master_key_size: AesKeySize) -> Self {
+        Self {
+            master_keys: Arc::new(RwLock::new(HashMap::new())),
+            master_key_size,
+        }
+    }
+
+    /// Adds a randomly generated master key with the given ID.
+    pub fn add_master_key(&self, key_id: impl Into<String>) -> Result<()> {
+        let key = SecureKey::generate(self.master_key_size);
+        self.insert_key(key_id.into(), SensitiveBytes::new(key.as_bytes()))
+    }
+
+    /// Adds a master key with explicit key bytes.
+    ///
+    /// Use this to seed the KMS with known key material, e.g. for
+    /// cross-language integration tests where both Java and Rust must
+    /// share the same master key bytes.
+    pub fn add_master_key_bytes(
+        &self,
+        key_id: impl Into<String>,
+        key_bytes: SensitiveBytes,
+    ) -> Result<()> {
+        Self::check_key_length(&key_bytes)?;
+        self.insert_key(key_id.into(), key_bytes)
+    }
+
+    /// Check the key length is valid by constructing a SecureKey.
+    fn check_key_length(key_bytes: &SensitiveBytes) -> Result<()> {
+        SecureKey::new(key_bytes.as_bytes())?;
+        Ok(())
+    }
+
+    fn insert_key(&self, key_id: String, key: SensitiveBytes) -> Result<()> {
+        let mut keys = self.master_keys.write().map_err(lock_error)?;
+
+        if keys.contains_key(&key_id) {
+            return Err(Error::new(
+                ErrorKind::DataInvalid,
+                format!("Master key already exists: {key_id}"),
+            ));
+        }
+
+        keys.insert(key_id, key);
+        Ok(())
+    }
+
+    fn get_master_key(&self, key_id: &str) -> Result<SensitiveBytes> {
+        let keys = self.master_keys.read().map_err(lock_error)?;
+
+        keys.get(key_id).cloned().ok_or_else(|| {
+            Error::new(
+                ErrorKind::DataInvalid,
+                format!("Master key not found: {key_id}"),
+            )
+        })
+    }
+
+    /// Number of master keys stored.
+    pub fn key_count(&self) -> usize {
+        self.master_keys.read().map(|keys| keys.len()).unwrap_or(0)
+    }
+
+    /// Whether a master key with the given ID exists.
+    pub fn has_key(&self, key_id: &str) -> bool {
+        self.master_keys
+            .read()
+            .map(|keys| keys.contains_key(key_id))
+            .unwrap_or(false)
+    }
+}
+
+#[async_trait]
+impl KeyManagementClient for MemoryKeyManagementClient {
+    async fn wrap_key(&self, key: &[u8], wrapping_key_id: &str) -> Result<Vec<u8>> {
+        let master_key_bytes = self.get_master_key(wrapping_key_id)?;
+        let master_key = SecureKey::new(master_key_bytes.as_bytes())?;
+        let cipher = AesGcmCipher::new(master_key);
+
+        cipher.encrypt(key, None)
+    }
+
+    async fn unwrap_key(
+        &self,
+        wrapped_key: &[u8],
+        wrapping_key_id: &str,
+    ) -> Result<SensitiveBytes> {
+        let master_key_bytes = self.get_master_key(wrapping_key_id)?;
+        let master_key = SecureKey::new(master_key_bytes.as_bytes())?;
+        let cipher = AesGcmCipher::new(master_key);
+
+        Ok(SensitiveBytes::new(cipher.decrypt(wrapped_key, None)?))
+    }
+
+    fn supports_key_generation(&self) -> bool {
+        false
+    }
+
+    async fn generate_key(&self, _wrapping_key_id: &str) -> Result<super::GeneratedKey> {
+        Err(Error::new(
+            ErrorKind::FeatureUnsupported,
+            "MemoryKeyManagementClient does not support server-side key generation",
+        ))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[tokio::test]
+    async fn test_wrap_unwrap_roundtrip() {
+        let kms = MemoryKeyManagementClient::new();
+        kms.add_master_key("master-1").unwrap();
+        let dek = vec![0u8; 16];
+
+        let wrapped = kms.wrap_key(&dek, "master-1").await.unwrap();
+        let unwrapped = kms.unwrap_key(&wrapped, "master-1").await.unwrap();
+        assert_eq!(unwrapped.as_bytes(), dek.as_slice());
+    }
+
+    #[tokio::test]
+    async fn test_wrap_unknown_key_fails() {
+        let kms = MemoryKeyManagementClient::new();
+        let dek = vec![0u8; 16];
+
+        let result = kms.wrap_key(&dek, "nonexistent").await;
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_wrong_master_key_fails_unwrap() {
+        let kms = MemoryKeyManagementClient::new();
+        kms.add_master_key("master-1").unwrap();
+        kms.add_master_key("master-2").unwrap();
+        let dek = vec![0u8; 16];
+
+        let wrapped = kms.wrap_key(&dek, "master-1").await.unwrap();
+
+        let result = kms.unwrap_key(&wrapped, "master-2").await;
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_does_not_support_key_generation() {
+        let kms = MemoryKeyManagementClient::new();
+        assert!(!kms.supports_key_generation());
+
+        let result = kms.generate_key("master-1").await;
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_multiple_master_keys() {
+        let kms = MemoryKeyManagementClient::new();
+        kms.add_master_key("master-1").unwrap();
+        kms.add_master_key("master-2").unwrap();
+        let dek1 = vec![1u8; 16];
+        let dek2 = vec![2u8; 16];
+
+        let wrapped1 = kms.wrap_key(&dek1, "master-1").await.unwrap();
+        let wrapped2 = kms.wrap_key(&dek2, "master-2").await.unwrap();
+
+        let unwrapped1 = kms.unwrap_key(&wrapped1, "master-1").await.unwrap();
+        let unwrapped2 = kms.unwrap_key(&wrapped2, "master-2").await.unwrap();
+
+        assert_eq!(unwrapped1.as_bytes(), dek1.as_slice());
+        assert_eq!(unwrapped2.as_bytes(), dek2.as_slice());
+    }
+
+    #[tokio::test]
+    async fn test_add_master_key() {
+        let kms = MemoryKeyManagementClient::new();
+
+        kms.add_master_key("my-key").unwrap();
+        assert!(kms.has_key("my-key"));
+        assert_eq!(kms.key_count(), 1);
+
+        let result = kms.add_master_key("my-key");
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_add_master_key_bytes() {
+        let kms = MemoryKeyManagementClient::new();
+        let key_bytes = SensitiveBytes::new([42u8; 16]);
+
+        kms.add_master_key_bytes("my-key", key_bytes).unwrap();
+        assert!(kms.has_key("my-key"));
+
+        let dek = vec![7u8; 16];
+        let wrapped = kms.wrap_key(&dek, "my-key").await.unwrap();
+        let unwrapped = kms.unwrap_key(&wrapped, "my-key").await.unwrap();
+        assert_eq!(unwrapped.as_bytes(), dek.as_slice());
+    }
+
+    #[tokio::test]
+    async fn test_add_master_key_bytes_invalid_length() {
+        let kms = MemoryKeyManagementClient::new();
+
+        let result = kms.add_master_key_bytes("my-key", SensitiveBytes::new([0u8; 7]));
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_with_master_key_size() {
+        let kms = MemoryKeyManagementClient::with_master_key_size(AesKeySize::Bits256);
+        kms.add_master_key("master-256").unwrap();
+
+        let dek = vec![0u8; 16];
+        let wrapped = kms.wrap_key(&dek, "master-256").await.unwrap();
+        let unwrapped = kms.unwrap_key(&wrapped, "master-256").await.unwrap();
+        assert_eq!(unwrapped.as_bytes(), dek.as_slice());
+    }
+
+    #[tokio::test]
+    async fn test_clone_shares_state() {
+        let kms1 = MemoryKeyManagementClient::new();
+        let kms2 = kms1.clone();
+
+        kms1.add_master_key("shared-key").unwrap();
+        assert!(kms2.has_key("shared-key"));
+    }
+}
diff --git a/crates/iceberg/src/encryption/kms/mod.rs b/crates/iceberg/src/encryption/kms/mod.rs
new file mode 100644
index 0000000000..160e692550
--- /dev/null
+++ b/crates/iceberg/src/encryption/kms/mod.rs
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Key Management System trait and implementations.
+//!
+//! This module provides the [`KeyManagementClient`] trait for pluggable KMS
+//! integration and implementations for different key management systems.
+
+mod client;
+mod memory;
+
+pub use client::{GeneratedKey, KeyManagementClient};
+pub use memory::MemoryKeyManagementClient;
diff --git a/crates/iceberg/src/encryption/mod.rs b/crates/iceberg/src/encryption/mod.rs
index 097f4f24e3..773d781d6d 100644
--- a/crates/iceberg/src/encryption/mod.rs
+++ b/crates/iceberg/src/encryption/mod.rs
@@ -17,9 +17,19 @@
 
 //! Encryption module for Apache Iceberg.
 //!
-//! This module provides core cryptographic primitives for encrypting
-//! and decrypting data in Iceberg tables.
+//! This module provides core cryptographic primitives and key management
+//! for encrypting and decrypting data in Iceberg tables.
 
 mod crypto;
+mod file_decryptor;
+mod file_encryptor;
+pub(crate) mod key_metadata;
+pub mod kms;
+mod stream;
 
-pub use crypto::{AesGcmCipher, AesKeySize, SecureKey};
+pub use crypto::{AesGcmCipher, AesKeySize, SecureKey, SensitiveBytes};
+pub use file_decryptor::AesGcmFileDecryptor;
+pub use file_encryptor::AesGcmFileEncryptor;
+pub use key_metadata::StandardKeyMetadata;
+pub use kms::{GeneratedKey, KeyManagementClient};
+pub use stream::{AesGcmFileRead, AesGcmFileWrite};
diff --git a/crates/iceberg/src/encryption/stream.rs b/crates/iceberg/src/encryption/stream.rs
new file mode 100644
index 0000000000..130578f2b1
--- /dev/null
+++ b/crates/iceberg/src/encryption/stream.rs
@@ -0,0 +1,1249 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! AGS1 stream encryption/decryption for Iceberg.
+//!
+//! Implements the block-based AES-GCM stream format used by Iceberg for
+//! encrypting manifest lists and manifest files. The format is
+//! byte-compatible with Java's `AesGcmInputStream` / `AesGcmOutputStream`.
+//!
+//! # AGS1 File Format
+//!
+//! ```text
+//! ┌─────────────────────────────────────────────┐
+//! │ Header (8 bytes)                            │
+//! │   Magic: "AGS1" (4 bytes, ASCII)            │
+//! │   Plain block size: u32 LE (4 bytes)        │
+//! │     Default: 1,048,576 (1 MiB)              │
+//! ├─────────────────────────────────────────────┤
+//! │ Block 0                                     │
+//! │   Nonce (12 bytes)                          │
+//! │   Ciphertext (up to plain_block_size bytes) │
+//! │   GCM Tag (16 bytes)                        │
+//! ├─────────────────────────────────────────────┤
+//! │ Block 1..N (same structure)                 │
+//! ├─────────────────────────────────────────────┤
+//! │ Final block (may be shorter)                │
+//! └─────────────────────────────────────────────┘
+//! ```
+//!
+//! Each block's AAD is: `aad_prefix || block_index (4 bytes, LE)`.
+
+use std::ops::Range;
+use std::sync::Arc;
+
+use bytes::{Bytes, BytesMut};
+
+use super::AesGcmCipher;
+use crate::io::{FileRead, FileWrite};
+use crate::{Error, ErrorKind, Result};
+
+/// Default plaintext block size (1 MiB), matching Java's `Ciphers.PLAIN_BLOCK_SIZE`.
+pub const PLAIN_BLOCK_SIZE: u32 = 1024 * 1024;
+
+/// AES-GCM nonce length in bytes.
+pub const NONCE_LENGTH: u32 = 12;
+
+/// AES-GCM authentication tag length in bytes.
+pub const GCM_TAG_LENGTH: u32 = 16;
+
+/// Cipher block size = plaintext block size + nonce + GCM tag.
+pub const CIPHER_BLOCK_SIZE: u32 = PLAIN_BLOCK_SIZE + NONCE_LENGTH + GCM_TAG_LENGTH;
+
+/// AGS1 stream magic bytes.
+pub const GCM_STREAM_MAGIC: [u8; 4] = *b"AGS1";
+
+/// AGS1 stream header length (4-byte magic + 4-byte block size).
+pub const GCM_STREAM_HEADER_LENGTH: u32 = 8;
+
+/// Minimum valid AGS1 stream length (header + one empty block).
+#[cfg(test)]
+pub const MIN_STREAM_LENGTH: u32 = GCM_STREAM_HEADER_LENGTH + NONCE_LENGTH + GCM_TAG_LENGTH;
+
+/// Constructs the per-block AAD for AGS1 stream encryption.
+///
+/// Format: `aad_prefix || block_index (4 bytes, little-endian)`
+///
+/// This matches Java's `Ciphers.streamBlockAAD()`.
+pub(crate) fn stream_block_aad(aad_prefix: &[u8], block_index: u32) -> Vec<u8> {
+    let index_bytes = block_index.to_le_bytes();
+    if aad_prefix.is_empty() {
+        index_bytes.to_vec()
+    } else {
+        let mut aad = Vec::with_capacity(aad_prefix.len() + 4);
+        aad.extend_from_slice(aad_prefix);
+        aad.extend_from_slice(&index_bytes);
+        aad
+    }
+}
+
+/// Transparent decryption of AGS1 stream-encrypted files.
+///
+/// Implements the [`FileRead`] trait, providing random-access reads over
+/// encrypted data. Each `read()` call determines which encrypted blocks
+/// overlap the requested plaintext range, reads and decrypts them, then
+/// returns the requested plaintext bytes.
+///
+/// # Usage
+///
+/// ```ignore
+/// // (ignored: requires async runtime and concrete FileRead/FileWrite impls)
+/// let reader = AesGcmFileRead::new(
+///     inner_reader,       // Box<dyn FileRead> for the encrypted file
+///     cipher,             // Arc<AesGcmCipher> with the DEK
+///     aad_prefix.to_vec(),
+///     encrypted_file_length,
+/// )?;
+///
+/// // Read plaintext bytes transparently
+/// let plaintext = reader.read(0..1024).await?;
+/// ```
+pub struct AesGcmFileRead {
+    /// The underlying encrypted file reader.
+    inner: Box<dyn FileRead>,
+    /// The AES-GCM cipher holding the DEK.
+    cipher: Arc<AesGcmCipher>,
+    /// AAD prefix from the key metadata.
+    aad_prefix: Box<[u8]>,
+    /// Total plaintext stream size in bytes.
+    plain_stream_size: u64,
+    /// Total number of encrypted blocks.
+    num_blocks: u64,
+    /// Size of the last cipher block (may be smaller than `CIPHER_BLOCK_SIZE`).
+    last_cipher_block_size: u32,
+}
+
+impl AesGcmFileRead {
+    /// Creates a new `AesGcmFileRead` for decrypting an AGS1 stream.
+    ///
+    /// Computes the plaintext size and block layout from the encrypted file
+    /// length. No I/O is performed; header validation happens implicitly
+    /// when blocks are decrypted (GCM authentication will fail on corrupt data).
+    ///
+    /// # Arguments
+    ///
+    /// * `inner` - Reader for the underlying encrypted file
+    /// * `cipher` - AES-GCM cipher initialized with the file's DEK
+    /// * `aad_prefix` - AAD prefix from the file's `StandardKeyMetadata`
+    /// * `encrypted_file_length` - Total byte length of the encrypted file
+    pub fn new(
+        inner: Box<dyn FileRead>,
+        cipher: Arc<AesGcmCipher>,
+        aad_prefix: Box<[u8]>,
+        encrypted_file_length: u64,
+    ) -> Result<Self> {
+        let plain_stream_size = Self::calculate_plaintext_length(encrypted_file_length)?;
+        let stream_length = encrypted_file_length - GCM_STREAM_HEADER_LENGTH as u64;
+
+        if stream_length == 0 {
+            return Ok(Self {
+                inner,
+                cipher,
+                aad_prefix,
+                plain_stream_size: 0,
+                num_blocks: 0,
+                last_cipher_block_size: 0,
+            });
+        }
+
+        let num_full_blocks = stream_length / CIPHER_BLOCK_SIZE as u64;
+        let cipher_bytes_in_last_block = (stream_length % CIPHER_BLOCK_SIZE as u64) as u32;
+        let full_blocks_only = cipher_bytes_in_last_block == 0;
+
+        let num_blocks = if full_blocks_only {
+            num_full_blocks
+        } else {
+            num_full_blocks + 1
+        };
+
+        if num_blocks > u32::MAX as u64 {
+            return Err(Error::new(
+                ErrorKind::DataInvalid,
+                format!(
+                    "AGS1 format supports at most {} blocks (~4 TiB per file), but file requires {num_blocks} blocks",
+                    u32::MAX
+                ),
+            ));
+        }
+
+        let last_cipher_block_size = if full_blocks_only {
+            CIPHER_BLOCK_SIZE
+        } else {
+            cipher_bytes_in_last_block
+        };
+
+        Ok(Self {
+            inner,
+            cipher,
+            aad_prefix,
+            plain_stream_size,
+            num_blocks,
+            last_cipher_block_size,
+        })
+    }
+
+    /// Returns the plaintext stream size in bytes.
+    pub fn plaintext_length(&self) -> u64 {
+        self.plain_stream_size
+    }
+
+    /// Calculates the plaintext length from an encrypted file's total length.
+    ///
+    /// This is a static calculation matching Java's
+    /// `AesGcmInputStream.calculatePlaintextLength()`.
+    pub fn calculate_plaintext_length(encrypted_file_length: u64) -> Result<u64> {
+        if encrypted_file_length < GCM_STREAM_HEADER_LENGTH as u64 {
+            return Err(Error::new(
+                ErrorKind::DataInvalid,
+                format!(
+                    "Encrypted file too short: {encrypted_file_length} bytes (minimum {GCM_STREAM_HEADER_LENGTH})"
+                ),
+            ));
+        }
+
+        let stream_length = encrypted_file_length - GCM_STREAM_HEADER_LENGTH as u64;
+
+        if stream_length == 0 {
+            return Ok(0);
+        }
+
+        let num_full_blocks = stream_length / CIPHER_BLOCK_SIZE as u64;
+        let cipher_bytes_in_last_block = stream_length % CIPHER_BLOCK_SIZE as u64;
+        let full_blocks_only = cipher_bytes_in_last_block == 0;
+
+        let plain_bytes_in_last_block = if full_blocks_only {
+            0
+        } else {
+            if cipher_bytes_in_last_block < (NONCE_LENGTH + GCM_TAG_LENGTH) as u64 {
+                return Err(Error::new(
+                    ErrorKind::DataInvalid,
+                    format!(
+                        "Truncated encrypted file: last block is {} bytes (minimum {})",
+                        cipher_bytes_in_last_block,
+                        NONCE_LENGTH + GCM_TAG_LENGTH
+                    ),
+                ));
+            }
+            cipher_bytes_in_last_block - NONCE_LENGTH as u64 - GCM_TAG_LENGTH as u64
+        };
+
+        Ok(num_full_blocks * PLAIN_BLOCK_SIZE as u64 + plain_bytes_in_last_block)
+    }
+
+    /// Returns the encrypted byte offset for a given block index.
+    fn encrypted_block_offset(block_index: u64) -> u64 {
+        block_index * CIPHER_BLOCK_SIZE as u64 + GCM_STREAM_HEADER_LENGTH as u64
+    }
+
+    /// Returns the cipher block size for a given block index.
+    fn cipher_block_size(&self, block_index: u64) -> u32 {
+        if block_index == self.num_blocks - 1 {
+            self.last_cipher_block_size
+        } else {
+            CIPHER_BLOCK_SIZE
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl FileRead for AesGcmFileRead {
+    /// Reads and decrypts a plaintext byte range from the encrypted AGS1 stream.
+    ///
+    /// The caller specifies a range in **plaintext** coordinates (e.g. "bytes 0..1024
+    /// of the original file"). This method translates that into the encrypted file
+    /// layout and performs the following steps:
+    ///
+    /// 1. **Map to blocks** — divides the plaintext range by `PLAIN_BLOCK_SIZE` to
+    ///    find which encrypted blocks (`first_block..=last_block`) contain the
+    ///    requested data.
+    ///
+    /// 2. **Single I/O read** — calculates the contiguous byte range in the
+    ///    encrypted file that covers all needed blocks (including the 8-byte AGS1
+    ///    header offset, 12-byte nonces, and 16-byte GCM tags) and fetches them in
+    ///    one call to the inner `FileRead`.
+    ///
+    /// 3. **Decrypt per block** — iterates over each cipher block in the response,
+    ///    decrypts it with AES-GCM using the per-block AAD (`aad_prefix || block_index`),
+    ///    and slices out only the plaintext bytes that overlap the requested range.
+    ///
+    /// 4. **Assemble result** — concatenates the slices into a single `Bytes` buffer
+    ///    matching exactly `range.end - range.start` bytes.
+    ///
+    /// Because each block is independently encrypted with its own nonce and AAD,
+    /// arbitrary random-access reads are supported without decrypting the entire
+    /// file. GCM authentication is verified per-block, so any tampering is detected
+    /// at the granularity of individual blocks.
+    async fn read(&self, range: Range<u64>) -> Result<Bytes> {
+        if range.start == range.end {
+            return Ok(Bytes::new());
+        }
+
+        if range.start > range.end {
+            return Err(Error::new(
+                ErrorKind::DataInvalid,
+                format!(
+                    "Invalid read range: start ({}) is greater than end ({})",
+                    range.start, range.end
+                ),
+            ));
+        }
+
+        if range.end > self.plain_stream_size {
+            return Err(Error::new(
+                ErrorKind::DataInvalid,
+                format!(
+                    "Read range {}..{} exceeds plaintext size {}",
+                    range.start, range.end, self.plain_stream_size
+                ),
+            ));
+        }
+
+        if self.num_blocks == 0 {
+            return Ok(Bytes::new());
+        }
+
+        let first_block = range.start / PLAIN_BLOCK_SIZE as u64;
+        let last_block = (range.end - 1) / PLAIN_BLOCK_SIZE as u64;
+
+        // Read all needed encrypted blocks in a single I/O call
+        let encrypted_start = Self::encrypted_block_offset(first_block);
+        let encrypted_end =
+            Self::encrypted_block_offset(last_block) + self.cipher_block_size(last_block) as u64;
+
+        let all_encrypted = self.inner.read(encrypted_start..encrypted_end).await?;
+
+        // Decrypt each block and extract the requested plaintext range
+        let result_len = (range.end - range.start) as usize;
+        let mut result = BytesMut::with_capacity(result_len);
+        let mut encrypted_offset = 0usize;
+
+        for block_idx in first_block..=last_block {
+            let block_size = self.cipher_block_size(block_idx) as usize;
+            let cipher_block = &all_encrypted[encrypted_offset..encrypted_offset + block_size];
+            encrypted_offset += block_size;
+
+            let aad = stream_block_aad(&self.aad_prefix, block_idx as u32);
+            let decrypted = self.cipher.decrypt(cipher_block, Some(&aad))?;
+
+            // Calculate which slice of this decrypted block we need
+            let block_plain_start = block_idx * PLAIN_BLOCK_SIZE as u64;
+            let slice_start = if block_idx == first_block {
+                (range.start - block_plain_start) as usize
+            } else {
+                0
+            };
+            let slice_end = if block_idx == last_block {
+                (range.end - block_plain_start) as usize
+            } else {
+                decrypted.len()
+            };
+
+            result.extend_from_slice(&decrypted[slice_start..slice_end]);
+        }
+
+        Ok(result.freeze())
+    }
+}
+
+/// Transparent encryption of AGS1 stream-encrypted files.
+///
+/// Implements the [`FileWrite`] trait, buffering plaintext and emitting
+/// encrypted AGS1 blocks. This is the streaming write counterpart to
+/// [`AesGcmFileRead`].
+///
+/// # Usage
+///
+/// ```ignore
+/// // (ignored: requires async runtime and concrete FileRead/FileWrite impls)
+/// let writer = AesGcmFileWrite::new(
+///     inner_writer,       // Box<dyn FileWrite> for the output file
+///     cipher,             // Arc<AesGcmCipher> with the DEK
+///     aad_prefix.to_vec(),
+/// );
+///
+/// writer.write(plaintext_chunk).await?;
+/// writer.close().await?;
+/// ```
+pub struct AesGcmFileWrite {
+    /// The underlying output writer.
+    inner: Box<dyn FileWrite>,
+    /// The AES-GCM cipher holding the DEK.
+    cipher: Arc<AesGcmCipher>,
+    /// AAD prefix from the key metadata.
+    aad_prefix: Box<[u8]>,
+    /// Plaintext buffer accumulating data before block encryption.
+    buffer: Vec<u8>,
+    /// Current block index for AAD construction.
+    block_index: u32,
+    /// Whether the AGS1 header has been written.
+    header_written: bool,
+    /// Whether close() has been called.
+    closed: bool,
+    /// Whether the writer is in a poisoned state due to a failed inner write.
+    /// Once poisoned, all subsequent operations are rejected because the inner
+    /// writer may have received partial data.
+    poisoned: bool,
+}
+
+impl AesGcmFileWrite {
+    /// Creates a new `AesGcmFileWrite` for encrypting to AGS1 format.
+    ///
+    /// No I/O is performed until `write()` or `close()` is called.
+    pub fn new(
+        inner: Box<dyn FileWrite>,
+        cipher: Arc<AesGcmCipher>,
+        aad_prefix: impl Into<Box<[u8]>>,
+    ) -> Self {
+        Self {
+            inner,
+            cipher,
+            aad_prefix: aad_prefix.into(),
+            buffer: Vec::new(),
+            block_index: 0,
+            header_written: false,
+            closed: false,
+            poisoned: false,
+        }
+    }
+
+    /// Writes the AGS1 header (magic + plain block size) to the inner writer.
+    async fn write_header(&mut self) -> Result<()> {
+        let mut header = Vec::with_capacity(GCM_STREAM_HEADER_LENGTH as usize);
+        header.extend_from_slice(&GCM_STREAM_MAGIC);
+        header.extend_from_slice(&PLAIN_BLOCK_SIZE.to_le_bytes());
+        if let Err(e) = self.inner.write(Bytes::from(header)).await {
+            self.poisoned = true;
+            return Err(e);
+        }
+        self.header_written = true;
+        Ok(())
+    }
+
+    /// Encrypts a plaintext block and writes it to the inner writer.
+    async fn encrypt_and_write_block(&mut self, block_data: &[u8]) -> Result<()> {
+        let aad = stream_block_aad(&self.aad_prefix, self.block_index);
+        let encrypted = self.cipher.encrypt(block_data, Some(&aad))?;
+        if let Err(e) = self.inner.write(Bytes::from(encrypted)).await {
+            self.poisoned = true;
+            return Err(e);
+        }
+        self.block_index = self.block_index.checked_add(1).ok_or_else(|| {
+            Error::new(
+                ErrorKind::DataInvalid,
+                "AGS1 block index overflow: file exceeds the maximum supported size (~4 TiB)",
+            )
+        })?;
+        Ok(())
+    }
+
+    /// Encrypts the first `PLAIN_BLOCK_SIZE` bytes of the buffer in-place
+    /// and drains them, avoiding a 1 MiB temporary copy.
+    async fn encrypt_and_drain_block(&mut self) -> Result<()> {
+        let aad = stream_block_aad(&self.aad_prefix, self.block_index);
+        let encrypted = self
+            .cipher
+            .encrypt(&self.buffer[..PLAIN_BLOCK_SIZE as usize], Some(&aad))?;
+        if let Err(e) = self.inner.write(Bytes::from(encrypted)).await {
+            self.poisoned = true;
+            return Err(e);
+        }
+        self.block_index = self.block_index.checked_add(1).ok_or_else(|| {
+            Error::new(
+                ErrorKind::DataInvalid,
+                "AGS1 block index overflow: file exceeds the maximum supported size (~4 TiB)",
+            )
+        })?;
+        self.buffer.drain(..PLAIN_BLOCK_SIZE as usize);
+        Ok(())
+    }
+}
+
+#[async_trait::async_trait]
+impl FileWrite for AesGcmFileWrite {
+    async fn write(&mut self, bs: Bytes) -> Result<()> {
+        if self.closed {
+            return Err(Error::new(
+                ErrorKind::Unexpected,
+                "Cannot write to a closed AesGcmFileWrite",
+            ));
+        }
+        if self.poisoned {
+            return Err(Error::new(
+                ErrorKind::Unexpected,
+                "AesGcmFileWrite is in a poisoned state due to a previous write failure",
+            ));
+        }
+
+        if !self.header_written {
+            self.write_header().await?;
+        }
+
+        self.buffer.extend_from_slice(&bs);
+
+        // Flush full blocks
+        while self.buffer.len() >= PLAIN_BLOCK_SIZE as usize {
+            self.encrypt_and_drain_block().await?;
+        }
+
+        Ok(())
+    }
+
+    async fn close(&mut self) -> Result<()> {
+        if self.closed {
+            return Err(Error::new(
+                ErrorKind::Unexpected,
+                "AesGcmFileWrite already closed",
+            ));
+        }
+        if self.poisoned {
+            return Err(Error::new(
+                ErrorKind::Unexpected,
+                "AesGcmFileWrite is in a poisoned state due to a previous write failure",
+            ));
+        }
+
+        if !self.header_written {
+            self.write_header().await?;
+        }
+
+        // Write the final block if there's remaining data, or if this is an empty file
+        // (block_index == 0). Skip writing a spurious empty block when the plaintext was
+        // exactly block-aligned (buffer empty, blocks already written).
+        if !self.buffer.is_empty() || self.block_index == 0 {
+            let final_block = std::mem::take(&mut self.buffer);
+            self.encrypt_and_write_block(&final_block).await?;
+        }
+        self.closed = true;
+
+        self.inner.close().await
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Encrypts plaintext into AGS1 format for testing.
+    ///
+    /// Mirrors Java's `AesGcmOutputStream` behavior:
+    /// - Always writes header + at least one block (even for empty input)
+    /// - Full blocks are `PLAIN_BLOCK_SIZE` bytes; last block may be shorter
+    fn encrypt_ags1(plaintext: &[u8], cipher: &AesGcmCipher, aad_prefix: &[u8]) -> Vec<u8> {
+        let mut result = Vec::new();
+
+        // Write header: "AGS1" + PLAIN_BLOCK_SIZE (LE)
+        result.extend_from_slice(&GCM_STREAM_MAGIC);
+        result.extend_from_slice(&PLAIN_BLOCK_SIZE.to_le_bytes());
+
+        // Write blocks
+        let mut offset = 0;
+        let mut block_index = 0u32;
+
+        loop {
+            let remaining = plaintext.len() - offset;
+            let block_size = std::cmp::min(remaining, PLAIN_BLOCK_SIZE as usize);
+
+            // Block 0 is always written (even if empty); subsequent empty blocks are skipped
+            if block_size == 0 && block_index > 0 {
+                break;
+            }
+
+            let block_data = &plaintext[offset..offset + block_size];
+            let aad = stream_block_aad(aad_prefix, block_index);
+            let encrypted = cipher.encrypt(block_data, Some(&aad)).unwrap();
+            result.extend_from_slice(&encrypted);
+
+            offset += block_size;
+            block_index += 1;
+
+            // A partial block is always the last
+            if block_size < PLAIN_BLOCK_SIZE as usize {
+                break;
+            }
+        }
+
+        result
+    }
+
+    /// Helper to create an AesGcmCipher from raw key bytes.
+    fn make_cipher(key: &[u8]) -> AesGcmCipher {
+        use super::super::SecureKey;
+        let secure_key = SecureKey::new(key).unwrap();
+        AesGcmCipher::new(secure_key)
+    }
+
+    /// Helper to create an in-memory FileRead from bytes.
+    fn memory_reader(data: Vec<u8>) -> Box<dyn FileRead> {
+        Box::new(MemoryFileRead(Bytes::from(data)))
+    }
+
+    /// Simple in-memory FileRead for tests.
+    struct MemoryFileRead(Bytes);
+
+    #[async_trait::async_trait]
+    impl FileRead for MemoryFileRead {
+        async fn read(&self, range: Range<u64>) -> Result<Bytes> {
+            let start = range.start as usize;
+            let end = range.end as usize;
+            if end > self.0.len() {
+                return Err(Error::new(
+                    ErrorKind::DataInvalid,
+                    format!(
+                        "Range {}..{} out of bounds for {} bytes",
+                        start,
+                        end,
+                        self.0.len()
+                    ),
+                ));
+            }
+            Ok(self.0.slice(start..end))
+        }
+    }
+
+    #[tokio::test]
+    async fn test_empty_file_roundtrip() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"test-aad-prefix!";
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(b"", &cipher, aad_prefix);
+
+        // Verify minimum length: header(8) + nonce(12) + tag(16) = 36
+        assert_eq!(encrypted.len(), MIN_STREAM_LENGTH as usize);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), 0);
+
+        // Reading empty range should return empty bytes
+        let result = reader.read(0..0).await.unwrap();
+        assert!(result.is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_small_file_roundtrip() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"test-aad-prefix!";
+        let plaintext = b"Hello, Iceberg encryption!";
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), plaintext.len() as u64);
+
+        // Read entire file
+        let result = reader.read(0..plaintext.len() as u64).await.unwrap();
+        assert_eq!(&result[..], plaintext);
+    }
+
+    #[tokio::test]
+    async fn test_partial_read() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"aad-prefix-here!";
+        let plaintext = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        // Read a slice from the middle
+        let result = reader.read(10..20).await.unwrap();
+        assert_eq!(&result[..], &plaintext[10..20]);
+
+        // Read first byte
+        let result = reader.read(0..1).await.unwrap();
+        assert_eq!(&result[..], &plaintext[0..1]);
+
+        // Read last byte
+        let last = plaintext.len() as u64;
+        let result = reader.read(last - 1..last).await.unwrap();
+        assert_eq!(&result[..], &plaintext[plaintext.len() - 1..]);
+    }
+
+    #[tokio::test]
+    async fn test_multi_block_roundtrip() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"multi-block-aad!";
+
+        // 1.5 blocks of data
+        let size = PLAIN_BLOCK_SIZE as usize + PLAIN_BLOCK_SIZE as usize / 2;
+        let plaintext: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(&plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), plaintext.len() as u64);
+
+        // Read entire file
+        let result = reader.read(0..plaintext.len() as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_cross_block_read() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"cross-block-aad!";
+
+        // 2.5 blocks of data
+        let size = PLAIN_BLOCK_SIZE as usize * 2 + PLAIN_BLOCK_SIZE as usize / 2;
+        let plaintext: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(&plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        // Read across block boundary (last 100 bytes of block 0 + first 100 bytes of block 1)
+        let boundary = PLAIN_BLOCK_SIZE as u64;
+        let result = reader.read(boundary - 100..boundary + 100).await.unwrap();
+        assert_eq!(
+            &result[..],
+            &plaintext[(boundary - 100) as usize..(boundary + 100) as usize]
+        );
+
+        // Read across two block boundaries (spans blocks 0, 1, and 2)
+        let result = reader.read(boundary - 50..boundary * 2 + 50).await.unwrap();
+        assert_eq!(
+            &result[..],
+            &plaintext[(boundary - 50) as usize..(boundary * 2 + 50) as usize]
+        );
+    }
+
+    #[tokio::test]
+    async fn test_exact_block_size() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"exact-block-aad!";
+
+        // Exactly 1 block
+        let plaintext: Vec<u8> = (0..PLAIN_BLOCK_SIZE as usize)
+            .map(|i| (i % 256) as u8)
+            .collect();
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(&plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), PLAIN_BLOCK_SIZE as u64);
+
+        let result = reader.read(0..PLAIN_BLOCK_SIZE as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_block_size_plus_one() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"block-plus-one!!";
+
+        // 1 block + 1 byte
+        let size = PLAIN_BLOCK_SIZE as usize + 1;
+        let plaintext: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(&plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), size as u64);
+
+        // Read the last byte (in block 1)
+        let result = reader.read(size as u64 - 1..size as u64).await.unwrap();
+        assert_eq!(result[0], plaintext[size - 1]);
+
+        // Read all
+        let result = reader.read(0..size as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_block_size_minus_one() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"block-minus-one!";
+
+        // 1 block - 1 byte
+        let size = PLAIN_BLOCK_SIZE as usize - 1;
+        let plaintext: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(&plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), size as u64);
+
+        let result = reader.read(0..size as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_wrong_aad_fails() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"correct-aad-here";
+        let plaintext = b"sensitive data here";
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(plaintext, &cipher, aad_prefix);
+
+        // Try to decrypt with wrong AAD
+        let mut bad_aad = aad_prefix.to_vec();
+        bad_aad[0] ^= 0xFF;
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            bad_aad.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        let result = reader.read(0..plaintext.len() as u64).await;
+        assert!(result.is_err(), "Decryption with wrong AAD should fail");
+    }
+
+    #[tokio::test]
+    async fn test_wrong_key_fails() {
+        let key = b"0123456789abcdef";
+        let wrong_key = b"fedcba9876543210";
+        let aad_prefix = b"test-aad-prefix!";
+        let plaintext = b"sensitive data";
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(wrong_key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        let result = reader.read(0..plaintext.len() as u64).await;
+        assert!(result.is_err(), "Decryption with wrong key should fail");
+    }
+
+    #[tokio::test]
+    async fn test_out_of_bounds_read() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"test-aad-prefix!";
+        let plaintext = b"short data";
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        let result = reader.read(0..plaintext.len() as u64 + 1).await;
+        assert!(result.is_err(), "Reading past end should fail");
+    }
+
+    #[tokio::test]
+    async fn test_calculate_plaintext_length() {
+        // Empty file: header only (not valid per Java, but handled)
+        assert_eq!(
+            AesGcmFileRead::calculate_plaintext_length(GCM_STREAM_HEADER_LENGTH as u64).unwrap(),
+            0
+        );
+
+        // Empty file with one empty block: header(8) + nonce(12) + tag(16) = 36
+        assert_eq!(
+            AesGcmFileRead::calculate_plaintext_length(MIN_STREAM_LENGTH as u64).unwrap(),
+            0
+        );
+
+        // One full block: header(8) + cipher_block(1048604) = 1048612
+        let one_full = GCM_STREAM_HEADER_LENGTH as u64 + CIPHER_BLOCK_SIZE as u64;
+        assert_eq!(
+            AesGcmFileRead::calculate_plaintext_length(one_full).unwrap(),
+            PLAIN_BLOCK_SIZE as u64
+        );
+
+        // One full block + 1 byte: need partial second block
+        // Second block = nonce(12) + 1 byte ciphertext + tag(16) = 29
+        let one_full_plus_one = one_full + NONCE_LENGTH as u64 + 1 + GCM_TAG_LENGTH as u64;
+        assert_eq!(
+            AesGcmFileRead::calculate_plaintext_length(one_full_plus_one).unwrap(),
+            PLAIN_BLOCK_SIZE as u64 + 1
+        );
+    }
+
+    #[tokio::test]
+    async fn test_stream_block_aad() {
+        // With prefix
+        let aad = stream_block_aad(b"prefix", 0);
+        assert_eq!(&aad[..6], b"prefix");
+        assert_eq!(&aad[6..], &0u32.to_le_bytes());
+
+        let aad = stream_block_aad(b"prefix", 1);
+        assert_eq!(&aad[..6], b"prefix");
+        assert_eq!(&aad[6..], &1u32.to_le_bytes());
+
+        // Without prefix
+        let aad = stream_block_aad(b"", 42);
+        assert_eq!(&aad[..], &42u32.to_le_bytes());
+    }
+
+    #[tokio::test]
+    async fn test_encrypted_file_too_short() {
+        let result = AesGcmFileRead::new(
+            memory_reader(vec![0; 4]),
+            Arc::new(make_cipher(b"0123456789abcdef")),
+            [].into(),
+            4,
+        );
+        assert!(result.is_err());
+    }
+
+    // --- AesGcmFileWrite tests ---
+
+    /// Shared-buffer FileWrite for testing AesGcmFileWrite output.
+    struct SharedMemoryWrite {
+        buffer: std::sync::Arc<std::sync::Mutex<Vec<u8>>>,
+    }
+
+    /// FileWrite that fails after a configured number of successful writes.
+    struct FailingFileWrite {
+        writes_before_failure: usize,
+        write_count: usize,
+    }
+
+    #[async_trait::async_trait]
+    impl FileWrite for FailingFileWrite {
+        async fn write(&mut self, _bs: Bytes) -> Result<()> {
+            if self.write_count >= self.writes_before_failure {
+                return Err(Error::new(ErrorKind::Unexpected, "simulated write failure"));
+            }
+            self.write_count += 1;
+            Ok(())
+        }
+
+        async fn close(&mut self) -> Result<()> {
+            Ok(())
+        }
+    }
+
+    #[async_trait::async_trait]
+    impl FileWrite for SharedMemoryWrite {
+        async fn write(&mut self, bs: Bytes) -> Result<()> {
+            self.buffer.lock().unwrap().extend_from_slice(&bs);
+            Ok(())
+        }
+
+        async fn close(&mut self) -> Result<()> {
+            Ok(())
+        }
+    }
+
+    /// Helper: one-shot encrypt through AesGcmFileWrite, return encrypted bytes.
+    async fn write_through_ags1(plaintext: &[u8], key: &[u8], aad_prefix: &[u8]) -> Vec<u8> {
+        let buffer = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
+        let inner: Box<dyn FileWrite> = Box::new(SharedMemoryWrite {
+            buffer: buffer.clone(),
+        });
+        let cipher = Arc::new(make_cipher(key));
+        let mut writer = AesGcmFileWrite::new(inner, cipher, aad_prefix.to_vec());
+
+        writer.write(Bytes::from(plaintext.to_vec())).await.unwrap();
+        writer.close().await.unwrap();
+
+        buffer.lock().unwrap().clone()
+    }
+
+    #[tokio::test]
+    async fn test_write_empty_roundtrip() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"test-aad-prefix!";
+
+        let encrypted = write_through_ags1(b"", key, aad_prefix).await;
+
+        // Should produce header + one empty encrypted block
+        assert_eq!(encrypted.len(), MIN_STREAM_LENGTH as usize);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), 0);
+    }
+
+    #[tokio::test]
+    async fn test_write_small_roundtrip() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"test-aad-prefix!";
+        let plaintext = b"Hello, Iceberg encryption!";
+
+        let encrypted = write_through_ags1(plaintext, key, aad_prefix).await;
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), plaintext.len() as u64);
+        let result = reader.read(0..plaintext.len() as u64).await.unwrap();
+        assert_eq!(&result[..], plaintext);
+    }
+
+    #[tokio::test]
+    async fn test_write_multi_block_roundtrip() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"multi-block-aad!";
+
+        // 1.5 blocks of data
+        let size = PLAIN_BLOCK_SIZE as usize + PLAIN_BLOCK_SIZE as usize / 2;
+        let plaintext: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
+
+        let encrypted = write_through_ags1(&plaintext, key, aad_prefix).await;
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), plaintext.len() as u64);
+        let result = reader.read(0..plaintext.len() as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_write_cross_block_accumulation() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"cross-block-aad!";
+
+        let buffer = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
+        let inner: Box<dyn FileWrite> = Box::new(SharedMemoryWrite {
+            buffer: buffer.clone(),
+        });
+        let cipher = Arc::new(make_cipher(key));
+        let mut writer = AesGcmFileWrite::new(inner, cipher, aad_prefix.to_vec());
+
+        // Write 1.5 blocks in 1000-byte chunks
+        let total_size = PLAIN_BLOCK_SIZE as usize + PLAIN_BLOCK_SIZE as usize / 2;
+        let plaintext: Vec<u8> = (0..total_size).map(|i| (i % 256) as u8).collect();
+        let chunk_size = 1000;
+        for chunk in plaintext.chunks(chunk_size) {
+            writer.write(Bytes::from(chunk.to_vec())).await.unwrap();
+        }
+        writer.close().await.unwrap();
+
+        let encrypted = buffer.lock().unwrap().clone();
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), plaintext.len() as u64);
+        let result = reader.read(0..plaintext.len() as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_write_exact_block_size() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"exact-block-aad!";
+
+        // Exactly 1 block
+        let plaintext: Vec<u8> = (0..PLAIN_BLOCK_SIZE as usize)
+            .map(|i| (i % 256) as u8)
+            .collect();
+
+        let encrypted = write_through_ags1(&plaintext, key, aad_prefix).await;
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), PLAIN_BLOCK_SIZE as u64);
+        let result = reader.read(0..PLAIN_BLOCK_SIZE as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_write_block_aligned_no_spurious_empty_block() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"block-align-aad!";
+
+        // Write exactly one block of plaintext — close() should NOT add
+        // a trailing empty encrypted block (28 bytes: 12-byte nonce + 16-byte tag).
+        let plaintext: Vec<u8> = (0..PLAIN_BLOCK_SIZE as usize)
+            .map(|i| (i % 256) as u8)
+            .collect();
+
+        let encrypted_via_writer = write_through_ags1(&plaintext, key, aad_prefix).await;
+        let encrypted_via_reference = encrypt_ags1(&plaintext, &make_cipher(key), aad_prefix);
+
+        // Both should be the same length — no extra 28-byte empty block
+        assert_eq!(
+            encrypted_via_writer.len(),
+            encrypted_via_reference.len(),
+            "Writer output should match reference encryption length (no spurious trailing block)"
+        );
+
+        // Verify roundtrip
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted_via_writer.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted_via_writer.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), PLAIN_BLOCK_SIZE as u64);
+        let result = reader.read(0..PLAIN_BLOCK_SIZE as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_write_two_blocks_aligned_no_spurious_empty_block() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"2blk-align-aad!!";
+
+        // Exactly 2 blocks
+        let size = PLAIN_BLOCK_SIZE as usize * 2;
+        let plaintext: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
+
+        let encrypted_via_writer = write_through_ags1(&plaintext, key, aad_prefix).await;
+        let encrypted_via_reference = encrypt_ags1(&plaintext, &make_cipher(key), aad_prefix);
+
+        assert_eq!(
+            encrypted_via_writer.len(),
+            encrypted_via_reference.len(),
+            "Writer output should match reference encryption length (no spurious trailing block)"
+        );
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted_via_writer.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted_via_writer.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), size as u64);
+        let result = reader.read(0..size as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_write_poisoned_after_inner_write_failure() {
+        let cipher = Arc::new(make_cipher(b"0123456789abcdef"));
+        // Fail on the second write (first write is the header, second is block data)
+        let inner: Box<dyn FileWrite> = Box::new(FailingFileWrite {
+            writes_before_failure: 1,
+            write_count: 0,
+        });
+        let mut writer = AesGcmFileWrite::new(inner, cipher, b"aad-prefix-here!".to_vec());
+
+        // First write triggers header (succeeds) + block encrypt+write (fails)
+        let data = vec![0u8; PLAIN_BLOCK_SIZE as usize];
+        let result = writer.write(Bytes::from(data)).await;
+        assert!(result.is_err());
+
+        // Subsequent write should be rejected as poisoned
+        let result = writer.write(Bytes::from(b"more data".to_vec())).await;
+        assert!(result.is_err());
+        assert!(
+            result.unwrap_err().to_string().contains("poisoned"),
+            "expected poisoned error"
+        );
+
+        // Close should also be rejected
+        let result = writer.close().await;
+        assert!(result.is_err());
+        assert!(
+            result.unwrap_err().to_string().contains("poisoned"),
+            "expected poisoned error on close"
+        );
+    }
+}
diff --git a/crates/iceberg/src/error.rs b/crates/iceberg/src/error.rs
index 55e9043d17..ad91473612 100644
--- a/crates/iceberg/src/error.rs
+++ b/crates/iceberg/src/error.rs
@@ -18,6 +18,7 @@
 use std::backtrace::{Backtrace, BacktraceStatus};
 use std::fmt;
 use std::fmt::{Debug, Display, Formatter};
+use std::sync::PoisonError;
 
 use chrono::{DateTime, TimeZone as _, Utc};
 
@@ -447,6 +448,11 @@ define_from_err!(
     "Failure in doing io operation"
 );
 
+/// Converts a [`PoisonError`] from a poisoned lock into an [`Error`].
+pub(crate) fn lock_error<T>(e: PoisonError<T>) -> Error {
+    Error::new(ErrorKind::Unexpected, format!("Lock poisoned: {e}"))
+}
+
 /// Converts a timestamp in milliseconds to `DateTime<Utc>`, handling errors.
 ///
 /// # Arguments
diff --git a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs
index 96d1c651cd..4cd676dab1 100644
--- a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs
+++ b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs
@@ -793,7 +793,7 @@ mod tests {
     };
     use parquet::file::metadata::{PageIndexPolicy, ParquetMetaData};
     use parquet::file::properties::WriterProperties;
-    use rand::{Rng, thread_rng};
+    use rand::Rng;
     use tempfile::NamedTempFile;
 
     use super::PageIndexEvaluator;
@@ -1284,13 +1284,13 @@ mod tests {
 
     #[test]
     fn eval_in_length_of_set_above_limit_all_rows() -> Result<()> {
-        let mut rng = thread_rng();
+        let mut rng = rand::rng();
         let (metadata, _temp_file) = create_test_parquet_file()?;
         let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata);
         let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?;
 
         let filter = Reference::new("col_float")
-            .is_in(std::iter::repeat_with(|| Datum::float(rng.gen_range(0.0..10.0))).take(1000))
+            .is_in(std::iter::repeat_with(|| Datum::float(rng.random_range(0.0..10.0))).take(1000))
             .bind(iceberg_schema_ref.clone(), false)?;
 
         let result = PageIndexEvaluator::eval(
diff --git a/crates/iceberg/src/expr/visitors/row_group_metrics_evaluator.rs b/crates/iceberg/src/expr/visitors/row_group_metrics_evaluator.rs
index 0506b33af0..ad7e19f548 100644
--- a/crates/iceberg/src/expr/visitors/row_group_metrics_evaluator.rs
+++ b/crates/iceberg/src/expr/visitors/row_group_metrics_evaluator.rs
@@ -528,7 +528,7 @@ mod tests {
     use parquet::schema::types::{
         ColumnDescriptor, ColumnPath, SchemaDescriptor, Type as parquetSchemaType,
     };
-    use rand::{Rng, thread_rng};
+    use rand::Rng;
 
     use super::RowGroupMetricsEvaluator;
     use crate::Result;
@@ -1617,7 +1617,7 @@ mod tests {
 
     #[test]
     fn eval_true_for_too_many_literals_filter_is_in() -> Result<()> {
-        let mut rng = thread_rng();
+        let mut rng = rand::rng();
 
         let row_group_metadata = create_row_group_metadata(
             1,
@@ -1636,7 +1636,7 @@ mod tests {
         let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?;
 
         let filter = Reference::new("col_float")
-            .is_in(std::iter::repeat_with(|| Datum::float(rng.gen_range(0.0..10.0))).take(1000))
+            .is_in(std::iter::repeat_with(|| Datum::float(rng.random_range(0.0..10.0))).take(1000))
             .bind(iceberg_schema_ref.clone(), false)?;
 
         let result = RowGroupMetricsEvaluator::eval(
diff --git a/crates/iceberg/src/io/file_io.rs b/crates/iceberg/src/io/file_io.rs
index d00ba1ba6a..6260160f85 100644
--- a/crates/iceberg/src/io/file_io.rs
+++ b/crates/iceberg/src/io/file_io.rs
@@ -280,6 +280,13 @@ pub trait FileRead: Send + Sync + Unpin + 'static {
     async fn read(&self, range: Range<u64>) -> crate::Result<Bytes>;
 }
 
+#[async_trait::async_trait]
+impl<T: AsRef<dyn FileRead> + Send + Sync + Unpin + 'static> FileRead for T {
+    async fn read(&self, range: Range<u64>) -> crate::Result<Bytes> {
+        self.as_ref().read(range).await
+    }
+}
+
 /// Input file is used for reading from files.
 #[derive(Debug)]
 pub struct InputFile {
diff --git a/crates/iceberg/src/io/storage/config/s3.rs b/crates/iceberg/src/io/storage/config/s3.rs
index fae3a14757..64db47084e 100644
--- a/crates/iceberg/src/io/storage/config/s3.rs
+++ b/crates/iceberg/src/io/storage/config/s3.rs
@@ -69,8 +69,14 @@ pub const S3_DISABLE_CONFIG_LOAD: &str = "s3.disable-config-load";
 ///
 /// This struct contains all the configuration options for connecting to Amazon S3.
 /// Use the builder pattern via `S3Config::builder()` to construct instances.
-/// ```
-#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize, TypedBuilder)]
+///
+/// Defaults follow the Iceberg `S3FileIOProperties` spec (see
+/// [`PATH_STYLE_ACCESS_DEFAULT = false`](https://github.com/apache/iceberg/blob/main/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java)),
+/// i.e. virtual-host-style addressing is enabled unless
+/// `s3.path-style-access=true` is explicitly set. This matches what
+/// Java clients do out of the box and is required for a number of
+/// S3-compatible stores that do not support path-style URLs.
+#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, TypedBuilder)]
 pub struct S3Config {
     /// S3 endpoint URL.
     #[builder(default, setter(strip_option, into))]
@@ -88,7 +94,9 @@ pub struct S3Config {
     #[builder(default, setter(strip_option, into))]
     pub region: Option<String>,
     /// Enable virtual host style (opposite of path style access).
-    #[builder(default)]
+    ///
+    /// Defaults to `true` to match Iceberg `S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT = false`.
+    #[builder(default = true)]
     pub enable_virtual_host_style: bool,
     /// Server side encryption type.
     #[builder(default, setter(strip_option, into))]
@@ -125,6 +133,12 @@ pub struct S3Config {
     pub disable_config_load: bool,
 }
 
+impl Default for S3Config {
+    fn default() -> Self {
+        Self::builder().build()
+    }
+}
+
 impl TryFrom<&StorageConfig> for S3Config {
     type Error = crate::Error;
 
@@ -267,6 +281,17 @@ mod tests {
         assert_eq!(s3_config.region.as_deref(), Some("eu-west-1"));
     }
 
+    #[test]
+    fn test_s3_config_default_is_virtual_host_style() {
+        // Matches Iceberg S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT = false.
+        assert!(S3Config::default().enable_virtual_host_style);
+        assert!(
+            S3Config::try_from(&StorageConfig::new())
+                .unwrap()
+                .enable_virtual_host_style
+        );
+    }
+
     #[test]
     fn test_s3_config_path_style_access() {
         let storage_config = StorageConfig::new().with_prop(S3_PATH_STYLE_ACCESS, "true");
diff --git a/crates/iceberg/src/scan/incremental/mod.rs b/crates/iceberg/src/scan/incremental/mod.rs
index 690f88e8b5..ff1cdd13f9 100644
--- a/crates/iceberg/src/scan/incremental/mod.rs
+++ b/crates/iceberg/src/scan/incremental/mod.rs
@@ -24,8 +24,7 @@ use std::sync::Arc;
 use crate::arrow::caching_delete_file_loader::CachingDeleteFileLoader;
 use crate::arrow::delete_filter::{DeleteFilter, is_equality_delete};
 use crate::arrow::{
-    ArrowReaderBuilder, CombinedIncrementalBatchRecordStream, StreamsInto,
-    UnzippedIncrementalBatchRecordStream,
+    ArrowReaderBuilder, CombinedIncrementalScanResult, StreamsInto, UnzippedIncrementalScanResult,
 };
 use crate::delete_file_index::DeleteFileIndex;
 use crate::io::FileIO;
@@ -733,8 +732,8 @@ impl IncrementalTableScan {
         Ok((append_stream, delete_stream))
     }
 
-    /// Returns an [`CombinedIncrementalBatchRecordStream`] for this incremental table scan.
-    pub async fn to_arrow(&self) -> Result<CombinedIncrementalBatchRecordStream> {
+    /// Returns a [`CombinedIncrementalScanResult`] for this incremental table scan.
+    pub async fn to_arrow(&self) -> Result<CombinedIncrementalScanResult> {
         let mut arrow_reader_builder = ArrowReaderBuilder::new(self.file_io.clone())
             .with_data_file_concurrency_limit(self.concurrency_limit_data_files)
             .with_row_group_filtering_enabled(true)
@@ -749,9 +748,10 @@ impl IncrementalTableScan {
         file_scan_task_stream.stream(arrow_reader)
     }
 
-    /// Returns an [`UnzippedIncrementalBatchRecordStream`] for this incremental table scan.
-    /// This stream will yield separate streams for appended and deleted record batches.
-    pub async fn to_unzipped_arrow(&self) -> Result<UnzippedIncrementalBatchRecordStream> {
+    /// Returns an [`UnzippedIncrementalScanResult`] for this incremental table scan.
+    /// This result contains separate streams for appended and deleted record batches,
+    /// together with scan metrics.
+    pub async fn to_unzipped_arrow(&self) -> Result<UnzippedIncrementalScanResult> {
         let mut arrow_reader_builder = ArrowReaderBuilder::new(self.file_io.clone())
             .with_data_file_concurrency_limit(self.concurrency_limit_data_files)
             .with_row_group_filtering_enabled(true)
diff --git a/crates/iceberg/src/scan/incremental/tests.rs b/crates/iceberg/src/scan/incremental/tests.rs
index 2a53c9ff37..1f23d5c731 100644
--- a/crates/iceberg/src/scan/incremental/tests.rs
+++ b/crates/iceberg/src/scan/incremental/tests.rs
@@ -1565,7 +1565,7 @@ async fn scan_and_verify(
         .build()
         .unwrap();
 
-    let stream = incremental_scan.to_arrow().await.unwrap();
+    let stream = incremental_scan.to_arrow().await.unwrap().stream;
     let batches: Vec<_> = stream.try_collect().await.unwrap();
 
     let append_batches: Vec<_> = batches
@@ -2003,7 +2003,7 @@ async fn test_incremental_scan_builder_options() {
         .build()
         .unwrap();
 
-    let stream = scan.to_arrow().await.unwrap();
+    let stream = scan.to_arrow().await.unwrap().stream;
     let batches: Vec<_> = stream.try_collect().await.unwrap();
 
     // Verify we have both append and delete batches
@@ -2038,7 +2038,7 @@ async fn test_incremental_scan_builder_options() {
         .build()
         .unwrap();
 
-    let stream = scan.to_arrow().await.unwrap();
+    let stream = scan.to_arrow().await.unwrap().stream;
     let batches: Vec<_> = stream.try_collect().await.unwrap();
 
     let append_batches: Vec<_> = batches
@@ -2069,7 +2069,7 @@ async fn test_incremental_scan_builder_options() {
         .build()
         .unwrap();
 
-    let stream = scan.to_arrow().await.unwrap();
+    let stream = scan.to_arrow().await.unwrap().stream;
     let batches: Vec<_> = stream.try_collect().await.unwrap();
 
     let append_batches: Vec<_> = batches
@@ -2097,7 +2097,7 @@ async fn test_incremental_scan_builder_options() {
         .build()
         .unwrap();
 
-    let stream = scan.to_arrow().await.unwrap();
+    let stream = scan.to_arrow().await.unwrap().stream;
     let batches: Vec<_> = stream.try_collect().await.unwrap();
 
     let append_batches = batches
@@ -2123,7 +2123,7 @@ async fn test_incremental_scan_builder_options() {
         .build()
         .unwrap();
 
-    let stream = scan.to_arrow().await.unwrap();
+    let stream = scan.to_arrow().await.unwrap().stream;
     let batches: Vec<_> = stream.try_collect().await.unwrap();
 
     let append_batches = batches
@@ -2145,7 +2145,7 @@ async fn test_incremental_scan_builder_options() {
         .build()
         .unwrap();
 
-    let stream = scan.to_arrow().await.unwrap();
+    let stream = scan.to_arrow().await.unwrap().stream;
     let batches: Vec<_> = stream.try_collect().await.unwrap();
 
     let append_batches: Vec<_> = batches
@@ -2182,7 +2182,7 @@ async fn test_incremental_scan_builder_options() {
         .build()
         .unwrap();
 
-    let stream = scan.to_arrow().await.unwrap();
+    let stream = scan.to_arrow().await.unwrap().stream;
     let batches: Vec<_> = stream.try_collect().await.unwrap();
 
     let delete_batches: Vec<_> = batches
@@ -2772,7 +2772,7 @@ async fn test_incremental_scan_includes_root_when_from_is_none() {
     // Test 2: Scan using table.incremental_scan(None, None) API
     // This should INCLUDE the root snapshot
     let scan = fixture.table.incremental_scan(None, None).build().unwrap();
-    let stream = scan.to_arrow().await.unwrap();
+    let stream = scan.to_arrow().await.unwrap().stream;
     let batches: Vec<_> = stream.try_collect().await.unwrap();
 
     // Collect all appended data
@@ -2867,7 +2867,7 @@ async fn test_incremental_scan_with_file_column() {
         .build()
         .unwrap();
 
-    let stream = scan.to_arrow().await.unwrap();
+    let stream = scan.to_arrow().await.unwrap().stream;
     let batches: Vec<_> = stream.try_collect().await.unwrap();
 
     // Get append batches
@@ -2938,7 +2938,7 @@ async fn test_incremental_select_with_pos_column() {
         .build()
         .unwrap();
 
-    let stream = scan.to_arrow().await.unwrap();
+    let stream = scan.to_arrow().await.unwrap().stream;
     let batches: Vec<_> = stream.try_collect().await.unwrap();
 
     // Get append batches (we're only appending in this test)
@@ -2997,7 +2997,7 @@ async fn test_incremental_select_with_pos_column() {
             .build()
             .unwrap();
 
-        let stream = scan.to_arrow().await.unwrap();
+        let stream = scan.to_arrow().await.unwrap().stream;
         let batches: Vec<_> = stream.try_collect().await.unwrap();
 
         // Get append batches
@@ -3073,7 +3073,7 @@ async fn test_incremental_select_with_pos_and_file_columns() {
         .build()
         .unwrap();
 
-    let stream = scan.to_arrow().await.unwrap();
+    let stream = scan.to_arrow().await.unwrap().stream;
     let batches: Vec<_> = stream.try_collect().await.unwrap();
 
     // Get append batches
@@ -3169,7 +3169,8 @@ async fn test_incremental_scan_with_no_deletes() {
         .unwrap();
 
     // Convert to arrow streams (unzipped into separate append and delete streams)
-    let (append_stream, delete_stream) = scan.to_unzipped_arrow().await.unwrap();
+    let result = scan.to_unzipped_arrow().await.unwrap();
+    let (append_stream, delete_stream) = (result.appends, result.deletes);
 
     // IMPORTANT: Try to collect from delete stream FIRST (without consuming append stream)
     // This is the scenario that previously caused a deadlock because the delete stream
@@ -3233,7 +3234,8 @@ async fn test_incremental_scan_deadlock_with_deletes_and_appends() {
         .unwrap();
 
     // Convert to unzipped streams
-    let (append_stream, delete_stream) = scan.to_unzipped_arrow().await.unwrap();
+    let result = scan.to_unzipped_arrow().await.unwrap();
+    let (append_stream, delete_stream) = (result.appends, result.deletes);
 
     // Read deletes first (this is important for triggering the deadlock)
     eprintln!("Starting to read delete stream...");
diff --git a/crates/iceberg/src/scan/mod.rs b/crates/iceberg/src/scan/mod.rs
index 97d7d8afb1..86caa99309 100644
--- a/crates/iceberg/src/scan/mod.rs
+++ b/crates/iceberg/src/scan/mod.rs
@@ -33,6 +33,7 @@ use futures::{SinkExt, StreamExt, TryStreamExt};
 pub use task::*;
 
 use crate::arrow::ArrowReaderBuilder;
+pub use crate::arrow::{ScanMetrics, ScanResult};
 use crate::delete_file_index::DeleteFileIndex;
 use crate::expr::visitors::inclusive_metrics_evaluator::InclusiveMetricsEvaluator;
 use crate::expr::{Bind, BoundPredicate, Predicate};
@@ -531,7 +532,10 @@ impl TableScan {
             arrow_reader_builder = arrow_reader_builder.with_batch_size(batch_size);
         }
 
-        arrow_reader_builder.build().read(self.plan_files().await?)
+        arrow_reader_builder
+            .build()
+            .read(self.plan_files().await?)
+            .map(|result| result.stream())
     }
 
     /// Returns a reference to the column names of the table scan.
@@ -1454,13 +1458,15 @@ pub mod tests {
         let batch_stream = reader
             .clone()
             .read(Box::pin(stream::iter(vec![Ok(plan_task.remove(0))])))
-            .unwrap();
+            .unwrap()
+            .stream();
         let batch_1: Vec<_> = batch_stream.try_collect().await.unwrap();
 
         let reader = ArrowReaderBuilder::new(fixture.table.file_io().clone()).build();
         let batch_stream = reader
             .read(Box::pin(stream::iter(vec![Ok(plan_task.remove(0))])))
-            .unwrap();
+            .unwrap()
+            .stream();
         let batch_2: Vec<_> = batch_stream.try_collect().await.unwrap();
 
         assert_eq!(batch_1, batch_2);
diff --git a/crates/iceberg/src/spec/snapshot.rs b/crates/iceberg/src/spec/snapshot.rs
index 72b5417c47..3b8a3c934e 100644
--- a/crates/iceberg/src/spec/snapshot.rs
+++ b/crates/iceberg/src/spec/snapshot.rs
@@ -291,6 +291,7 @@ pub(super) mod _serde {
         pub snapshot_id: i64,
         #[serde(skip_serializing_if = "Option::is_none")]
         pub parent_snapshot_id: Option<i64>,
+        #[serde(default)]
         pub sequence_number: i64,
         pub timestamp_ms: i64,
         pub manifest_list: String,
diff --git a/crates/iceberg/src/writer/file_writer/rolling_writer.rs b/crates/iceberg/src/writer/file_writer/rolling_writer.rs
index b86f6a2ea7..b0b2d2f191 100644
--- a/crates/iceberg/src/writer/file_writer/rolling_writer.rs
+++ b/crates/iceberg/src/writer/file_writer/rolling_writer.rs
@@ -399,7 +399,7 @@ mod tests {
             "Kelly", "Larry", "Mallory", "Shawn",
         ];
 
-        let mut rng = rand::thread_rng();
+        let mut rng = rand::rng();
         let batch_num = 10;
         let batch_rows = 100;
         let expected_rows = batch_num * batch_rows;
diff --git a/crates/integration_tests/src/lib.rs b/crates/integration_tests/src/lib.rs
index 4bf8f4d19c..feafa3ae9f 100644
--- a/crates/integration_tests/src/lib.rs
+++ b/crates/integration_tests/src/lib.rs
@@ -18,7 +18,9 @@
 use std::collections::HashMap;
 use std::sync::OnceLock;
 
-use iceberg::io::{S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY};
+use iceberg::io::{
+    S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_PATH_STYLE_ACCESS, S3_REGION, S3_SECRET_ACCESS_KEY,
+};
 use iceberg_catalog_rest::REST_CATALOG_PROP_URI;
 use iceberg_test_utils::{get_minio_endpoint, get_rest_catalog_endpoint, set_up};
 
@@ -45,6 +47,7 @@ impl GlobalTestFixture {
             (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()),
             (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()),
             (S3_REGION.to_string(), "us-east-1".to_string()),
+            (S3_PATH_STYLE_ACCESS.to_string(), "true".to_string()),
         ]);
 
         GlobalTestFixture { catalog_config }
diff --git a/crates/integration_tests/tests/common/mod.rs b/crates/integration_tests/tests/common/mod.rs
index e49a57465c..b7197a3a46 100644
--- a/crates/integration_tests/tests/common/mod.rs
+++ b/crates/integration_tests/tests/common/mod.rs
@@ -28,7 +28,6 @@ pub async fn random_ns() -> Namespace {
     let fixture = get_test_fixture();
     let rest_catalog = RestCatalogBuilder::default()
         .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: None,
         }))
         .load("rest", fixture.catalog_config.clone())
diff --git a/crates/integration_tests/tests/conflict_commit_test.rs b/crates/integration_tests/tests/conflict_commit_test.rs
index 3b1362b95d..af2c7a7779 100644
--- a/crates/integration_tests/tests/conflict_commit_test.rs
+++ b/crates/integration_tests/tests/conflict_commit_test.rs
@@ -43,7 +43,6 @@ async fn test_append_data_file_conflict() {
     let fixture = get_test_fixture();
     let rest_catalog = RestCatalogBuilder::default()
         .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: None,
         }))
         .load("rest", fixture.catalog_config.clone())
diff --git a/crates/integration_tests/tests/read_evolved_schema.rs b/crates/integration_tests/tests/read_evolved_schema.rs
index ae25a08987..f7416be2d4 100644
--- a/crates/integration_tests/tests/read_evolved_schema.rs
+++ b/crates/integration_tests/tests/read_evolved_schema.rs
@@ -34,7 +34,6 @@ async fn test_evolved_schema() {
     let fixture = get_test_fixture();
     let rest_catalog = RestCatalogBuilder::default()
         .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: None,
         }))
         .load("rest", fixture.catalog_config.clone())
diff --git a/crates/integration_tests/tests/read_positional_deletes.rs b/crates/integration_tests/tests/read_positional_deletes.rs
index d4c4afeaf3..0f79596a12 100644
--- a/crates/integration_tests/tests/read_positional_deletes.rs
+++ b/crates/integration_tests/tests/read_positional_deletes.rs
@@ -30,7 +30,6 @@ async fn test_read_table_with_positional_deletes() {
     let fixture = get_test_fixture();
     let rest_catalog = RestCatalogBuilder::default()
         .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: None,
         }))
         .load("rest", fixture.catalog_config.clone())
diff --git a/crates/integrations/datafusion/src/physical_plan/scan.rs b/crates/integrations/datafusion/src/physical_plan/scan.rs
index 234ab26470..36539ae503 100644
--- a/crates/integrations/datafusion/src/physical_plan/scan.rs
+++ b/crates/integrations/datafusion/src/physical_plan/scan.rs
@@ -196,7 +196,11 @@ impl DisplayAs for IcebergTableScan {
             self.predicates
                 .clone()
                 .map_or(String::from(""), |p| format!("{p}"))
-        )
+        )?;
+        if let Some(limit) = self.limit {
+            write!(f, " limit:[{limit}]")?;
+        }
+        Ok(())
     }
 }
 
diff --git a/crates/sqllogictest/testdata/slts/df_test/basic_queries.slt b/crates/sqllogictest/testdata/slts/df_test/basic_queries.slt
index 5d8889f158..a5ca4de46a 100644
--- a/crates/sqllogictest/testdata/slts/df_test/basic_queries.slt
+++ b/crates/sqllogictest/testdata/slts/df_test/basic_queries.slt
@@ -43,6 +43,18 @@ INSERT INTO default.default.query_test_table VALUES
 ----
 10
 
+# Verify EXPLAIN shows limit is pushed down to IcebergTableScan
+query TT
+EXPLAIN SELECT * FROM default.default.query_test_table LIMIT 3
+----
+logical_plan
+01)Limit: skip=0, fetch=3
+02)--TableScan: default.default.query_test_table projection=[id, name, score, category], fetch=3
+physical_plan
+01)GlobalLimitExec: skip=0, fetch=3
+02)--CooperativeExec
+03)----IcebergTableScan projection:[id,name,score,category] predicate:[] limit:[3]
+
 # Test SELECT * with ORDER BY and LIMIT
 query ITRT
 SELECT * FROM default.default.query_test_table ORDER BY id LIMIT 3
diff --git a/crates/storage/opendal/README.md b/crates/storage/opendal/README.md
index c5092eb97a..a4ad512e17 100644
--- a/crates/storage/opendal/README.md
+++ b/crates/storage/opendal/README.md
@@ -61,7 +61,6 @@ use iceberg_storage_opendal::OpenDalStorageFactory;
 async fn main() -> iceberg::Result<()> {
     let catalog = RestCatalogBuilder::default()
         .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: None,
         }))
         .load(
diff --git a/crates/storage/opendal/src/azdls.rs b/crates/storage/opendal/src/azdls.rs
index f826b0a103..bab414a95c 100644
--- a/crates/storage/opendal/src/azdls.rs
+++ b/crates/storage/opendal/src/azdls.rs
@@ -124,10 +124,9 @@ pub(crate) fn azdls_config_parse(mut properties: HashMap<String, String>) -> Res
 pub(crate) fn azdls_create_operator<'a>(
     absolute_path: &'a str,
     config: &AzdlsConfig,
-    configured_scheme: &AzureStorageScheme,
 ) -> Result<(opendal::Operator, &'a str)> {
     let path = absolute_path.parse::<AzureStoragePath>()?;
-    match_path_with_config(&path, config, configured_scheme)?;
+    match_path_with_config(&path, config)?;
 
     let op = azdls_config_build(config, &path)?;
 
@@ -193,18 +192,7 @@ impl FromStr for AzureStorageScheme {
 }
 
 /// Validates whether the given path matches what's configured for the backend.
-pub(crate) fn match_path_with_config(
-    path: &AzureStoragePath,
-    config: &AzdlsConfig,
-    configured_scheme: &AzureStorageScheme,
-) -> Result<()> {
-    ensure_data_valid!(
-        &path.scheme == configured_scheme,
-        "Storage::Azdls: Scheme mismatch: configured {}, passed {}",
-        configured_scheme,
-        path.scheme
-    );
-
+pub(crate) fn match_path_with_config(path: &AzureStoragePath, config: &AzdlsConfig) -> Result<()> {
     if let Some(ref configured_account_name) = config.account_name {
         ensure_data_valid!(
             &path.account_name == configured_account_name,
@@ -518,7 +506,6 @@ mod tests {
                         endpoint: Some("https://myaccount.dfs.core.windows.net".to_string()),
                         ..Default::default()
                     },
-                    AzureStorageScheme::Abfss,
                 ),
                 Some(("myfs", "/path/to/file.parquet")),
             ),
@@ -531,33 +518,19 @@ mod tests {
                         endpoint: Some("https://myaccount.dfs.core.windows.net".to_string()),
                         ..Default::default()
                     },
-                    AzureStorageScheme::Abfss,
-                ),
-                None,
-            ),
-            (
-                "different scheme",
-                (
-                    "wasbs://myfs@myaccount.dfs.core.windows.net/path/to/file.parquet",
-                    AzdlsConfig {
-                        account_name: Some("myaccount".to_string()),
-                        endpoint: Some("https://myaccount.dfs.core.windows.net".to_string()),
-                        ..Default::default()
-                    },
-                    AzureStorageScheme::Abfss,
                 ),
                 None,
             ),
             (
                 "incompatible scheme for endpoint",
                 (
-                    "abfs://myfs@myaccount.dfs.core.windows.net/path/to/file.parquet",
+                    // `abfss` implies https; configured endpoint is plain http.
+                    "abfss://myfs@myaccount.dfs.core.windows.net/path/to/file.parquet",
                     AzdlsConfig {
                         account_name: Some("myaccount".to_string()),
                         endpoint: Some("http://myaccount.dfs.core.windows.net".to_string()),
                         ..Default::default()
                     },
-                    AzureStorageScheme::Abfss,
                 ),
                 None,
             ),
@@ -570,7 +543,6 @@ mod tests {
                         endpoint: Some("https://myaccount.dfs.core.chinacloudapi.cn".to_string()),
                         ..Default::default()
                     },
-                    AzureStorageScheme::Abfss,
                 ),
                 None,
             ),
@@ -584,7 +556,18 @@ mod tests {
                         endpoint: None,
                         ..Default::default()
                     },
-                    AzureStorageScheme::Abfs,
+                ),
+                Some(("myfs", "/path/to/file.parquet")),
+            ),
+            (
+                "different scheme is accepted",
+                (
+                    "wasbs://myfs@myaccount.blob.core.windows.net/path/to/file.parquet",
+                    AzdlsConfig {
+                        account_name: Some("myaccount".to_string()),
+                        endpoint: Some("https://myaccount.blob.core.windows.net".to_string()),
+                        ..Default::default()
+                    },
                 ),
                 Some(("myfs", "/path/to/file.parquet")),
             ),
@@ -598,14 +581,13 @@ mod tests {
                         account_key: Some("secret".to_string()),
                         ..Default::default()
                     },
-                    AzureStorageScheme::Wasb,
                 ),
                 Some(("testfs", "/path/to/data.parquet")),
             ),
         ];
 
         for (name, input, expected) in test_cases {
-            let result = azdls_create_operator(input.0, &input.1, &input.2);
+            let result = azdls_create_operator(input.0, &input.1);
             match expected {
                 Some((expected_filesystem, expected_path)) => {
                     assert!(result.is_ok(), "Test case {name} failed: {result:?}");
diff --git a/crates/storage/opendal/src/lib.rs b/crates/storage/opendal/src/lib.rs
index 8160680523..7fdf9e6965 100644
--- a/crates/storage/opendal/src/lib.rs
+++ b/crates/storage/opendal/src/lib.rs
@@ -46,7 +46,6 @@ use utils::from_opendal_error;
 cfg_if! {
     if #[cfg(feature = "opendal-azdls")] {
         mod azdls;
-        use azdls::AzureStorageScheme;
         use azdls::*;
         use opendal::services::AzdlsConfig;
     }
@@ -108,9 +107,6 @@ pub enum OpenDalStorageFactory {
     /// S3 storage factory.
     #[cfg(feature = "opendal-s3")]
     S3 {
-        /// s3 storage could have `s3://` and `s3a://`.
-        /// Storing the scheme string here to return the correct path.
-        configured_scheme: String,
         /// Custom AWS credential loader.
         #[serde(skip)]
         customized_credential_load: Option<s3::CustomAwsCredentialLoader>,
@@ -123,10 +119,7 @@ pub enum OpenDalStorageFactory {
     Oss,
     /// Azure Data Lake Storage factory.
     #[cfg(feature = "opendal-azdls")]
-    Azdls {
-        /// The configured Azure storage scheme.
-        configured_scheme: AzureStorageScheme,
-    },
+    Azdls,
 }
 
 #[typetag::serde(name = "OpenDalStorageFactory")]
@@ -142,10 +135,8 @@ impl StorageFactory for OpenDalStorageFactory {
             OpenDalStorageFactory::Fs => Ok(Arc::new(OpenDalStorage::LocalFs)),
             #[cfg(feature = "opendal-s3")]
             OpenDalStorageFactory::S3 {
-                configured_scheme,
                 customized_credential_load,
             } => Ok(Arc::new(OpenDalStorage::S3 {
-                configured_scheme: configured_scheme.clone(),
                 config: s3_config_parse(config.props().clone())?.into(),
                 customized_credential_load: customized_credential_load.clone(),
             })),
@@ -158,12 +149,9 @@ impl StorageFactory for OpenDalStorageFactory {
                 config: oss_config_parse(config.props().clone())?.into(),
             })),
             #[cfg(feature = "opendal-azdls")]
-            OpenDalStorageFactory::Azdls { configured_scheme } => {
-                Ok(Arc::new(OpenDalStorage::Azdls {
-                    configured_scheme: configured_scheme.clone(),
-                    config: azdls_config_parse(config.props().clone())?.into(),
-                }))
-            }
+            OpenDalStorageFactory::Azdls => Ok(Arc::new(OpenDalStorage::Azdls {
+                config: azdls_config_parse(config.props().clone())?.into(),
+            })),
             #[cfg(all(
                 not(feature = "opendal-memory"),
                 not(feature = "opendal-fs"),
@@ -196,11 +184,11 @@ pub enum OpenDalStorage {
     #[cfg(feature = "opendal-fs")]
     LocalFs,
     /// S3 storage variant.
+    ///
+    /// Accepts any S3-family URL (`s3://`, `s3a://`, `s3n://`); the scheme is
+    /// derived from the path at call time.
     #[cfg(feature = "opendal-s3")]
     S3 {
-        /// s3 storage could have `s3://` and `s3a://`.
-        /// Storing the scheme string here to return the correct path.
-        configured_scheme: String,
         /// S3 configuration.
         config: Arc<S3Config>,
         /// Custom AWS credential loader.
@@ -220,16 +208,12 @@ pub enum OpenDalStorage {
         config: Arc<OssConfig>,
     },
     /// Azure Data Lake Storage variant.
-    /// Expects paths of the form
+    ///
+    /// Accepts paths of the form
     /// `abfs[s]://<filesystem>@<account>.dfs.<endpoint-suffix>/<path>` or
     /// `wasb[s]://<container>@<account>.blob.<endpoint-suffix>/<path>`.
     #[cfg(feature = "opendal-azdls")]
-    #[allow(private_interfaces)]
     Azdls {
-        /// The configured Azure storage scheme.
-        /// Because Azdls accepts multiple possible schemes, we store the full
-        /// passed scheme here to later validate schemes passed via paths.
-        configured_scheme: AzureStorageScheme,
         /// Azure DLS configuration.
         config: Arc<AzdlsConfig>,
     },
@@ -274,15 +258,21 @@ impl OpenDalStorage {
             }
             #[cfg(feature = "opendal-s3")]
             OpenDalStorage::S3 {
-                configured_scheme,
                 config,
                 customized_credential_load,
             } => {
                 let op = s3_config_build(config, customized_credential_load, path)?;
                 let op_info = op.info();
 
-                // Check prefix of s3 path.
-                let prefix = format!("{}://{}/", configured_scheme, op_info.name());
+                // Use the URL scheme in the path for prefix matching. This enables
+                // use of S3-compatible storage backends using custom schemes (e.g., `minio://`, `r2://`).
+                let url = url::Url::parse(path).map_err(|e| {
+                    Error::new(
+                        ErrorKind::DataInvalid,
+                        format!("Invalid s3 url: {path}: {e}"),
+                    )
+                })?;
+                let prefix = format!("{}://{}/", url.scheme(), op_info.name());
                 if path.starts_with(&prefix) {
                     (op, &path[prefix.len()..])
                 } else {
@@ -319,10 +309,7 @@ impl OpenDalStorage {
                 }
             }
             #[cfg(feature = "opendal-azdls")]
-            OpenDalStorage::Azdls {
-                configured_scheme,
-                config,
-            } => azdls_create_operator(path, config, configured_scheme)?,
+            OpenDalStorage::Azdls { config } => azdls_create_operator(path, config)?,
             #[cfg(all(
                 not(feature = "opendal-s3"),
                 not(feature = "opendal-fs"),
@@ -357,9 +344,7 @@ impl OpenDalStorage {
             #[cfg(feature = "opendal-fs")]
             OpenDalStorage::LocalFs => Ok(path.strip_prefix("file:/").unwrap_or(&path[1..])),
             #[cfg(feature = "opendal-s3")]
-            OpenDalStorage::S3 {
-                configured_scheme, ..
-            } => {
+            OpenDalStorage::S3 { .. } => {
                 let url = url::Url::parse(path)?;
                 let bucket = url.host_str().ok_or_else(|| {
                     Error::new(
@@ -367,7 +352,7 @@ impl OpenDalStorage {
                         format!("Invalid s3 url: {path}, missing bucket"),
                     )
                 })?;
-                let prefix = format!("{}://{}/", configured_scheme, bucket);
+                let prefix = format!("{}://{}/", url.scheme(), bucket);
                 if path.starts_with(&prefix) {
                     Ok(&path[prefix.len()..])
                 } else {
@@ -416,12 +401,9 @@ impl OpenDalStorage {
                 }
             }
             #[cfg(feature = "opendal-azdls")]
-            OpenDalStorage::Azdls {
-                configured_scheme,
-                config,
-            } => {
+            OpenDalStorage::Azdls { config } => {
                 let azure_path = path.parse::<AzureStoragePath>()?;
-                match_path_with_config(&azure_path, config, configured_scheme)?;
+                match_path_with_config(&azure_path, config)?;
                 let relative_path_len = azure_path.path.len();
                 Ok(&path[path.len() - relative_path_len..])
             }
@@ -631,47 +613,21 @@ mod tests {
     #[test]
     fn test_relativize_path_s3() {
         let storage = OpenDalStorage::S3 {
-            configured_scheme: "s3".to_string(),
             config: Arc::new(S3Config::default()),
             customized_credential_load: None,
         };
 
-        assert_eq!(
-            storage
-                .relativize_path("s3://my-bucket/path/to/file.parquet")
-                .unwrap(),
-            "path/to/file.parquet"
-        );
-
-        // s3a scheme
-        let storage_s3a = OpenDalStorage::S3 {
-            configured_scheme: "s3a".to_string(),
-            config: Arc::new(S3Config::default()),
-            customized_credential_load: None,
-        };
-        assert_eq!(
-            storage_s3a
-                .relativize_path("s3a://my-bucket/path/to/file.parquet")
-                .unwrap(),
-            "path/to/file.parquet"
-        );
-    }
-
-    #[cfg(feature = "opendal-s3")]
-    #[test]
-    fn test_relativize_path_s3_scheme_mismatch() {
-        let storage = OpenDalStorage::S3 {
-            configured_scheme: "s3".to_string(),
-            config: Arc::new(S3Config::default()),
-            customized_credential_load: None,
-        };
-
-        // Scheme mismatch should error
-        assert!(
-            storage
-                .relativize_path("s3a://my-bucket/path/to/file.parquet")
-                .is_err()
-        );
+        // All S3-family schemes are accepted by the same storage instance.
+        // Custom schemes for S3-compatible stores (e.g., `minio://`) are also
+        // accepted because the path's scheme is used as-is for prefix matching.
+        for scheme in ["s3", "s3a", "s3n", "minio"] {
+            assert_eq!(
+                storage
+                    .relativize_path(&format!("{scheme}://my-bucket/path/to/file.parquet"))
+                    .unwrap(),
+                "path/to/file.parquet"
+            );
+        }
     }
 
     #[cfg(feature = "opendal-gcs")]
@@ -736,7 +692,6 @@ mod tests {
     #[test]
     fn test_relativize_path_azdls() {
         let storage = OpenDalStorage::Azdls {
-            configured_scheme: AzureStorageScheme::Abfss,
             config: Arc::new(AzdlsConfig {
                 account_name: Some("myaccount".to_string()),
                 endpoint: Some("https://myaccount.dfs.core.windows.net".to_string()),
@@ -751,24 +706,4 @@ mod tests {
             "/path/to/file.parquet"
         );
     }
-
-    #[cfg(feature = "opendal-azdls")]
-    #[test]
-    fn test_relativize_path_azdls_scheme_mismatch() {
-        let storage = OpenDalStorage::Azdls {
-            configured_scheme: AzureStorageScheme::Abfss,
-            config: Arc::new(AzdlsConfig {
-                account_name: Some("myaccount".to_string()),
-                endpoint: Some("https://myaccount.dfs.core.windows.net".to_string()),
-                ..Default::default()
-            }),
-        };
-
-        // wasbs scheme doesn't match configured abfss
-        assert!(
-            storage
-                .relativize_path("wasbs://myfs@myaccount.dfs.core.windows.net/path/to/file.parquet")
-                .is_err()
-        );
-    }
 }
diff --git a/crates/storage/opendal/src/resolving.rs b/crates/storage/opendal/src/resolving.rs
index 7c06cf96a5..64a16b18d2 100644
--- a/crates/storage/opendal/src/resolving.rs
+++ b/crates/storage/opendal/src/resolving.rs
@@ -70,29 +70,28 @@ fn parse_scheme(scheme: &str) -> Result<Scheme> {
     }
 }
 
-/// Extract the scheme string from a path URL.
-fn extract_scheme(path: &str) -> Result<String> {
+/// Extract the [`Scheme`] family from a path URL.
+fn extract_scheme(path: &str) -> Result<Scheme> {
     let url = Url::parse(path).map_err(|e| {
         Error::new(
             ErrorKind::DataInvalid,
             format!("Invalid path: {path}, failed to parse URL: {e}"),
         )
     })?;
-    Ok(url.scheme().to_string())
+    parse_scheme(url.scheme())
 }
 
 /// Build an [`OpenDalStorage`] variant for the given scheme and config properties.
 fn build_storage_for_scheme(
-    scheme: &str,
+    scheme: Scheme,
     props: &HashMap<String, String>,
     #[cfg(feature = "opendal-s3")] customized_credential_load: &Option<CustomAwsCredentialLoader>,
 ) -> Result<OpenDalStorage> {
-    match parse_scheme(scheme)? {
+    match scheme {
         #[cfg(feature = "opendal-s3")]
         Scheme::S3 => {
             let config = crate::s3::s3_config_parse(props.clone())?;
             Ok(OpenDalStorage::S3 {
-                configured_scheme: scheme.to_string(),
                 config: Arc::new(config),
                 customized_credential_load: customized_credential_load.clone(),
             })
@@ -113,10 +112,8 @@ fn build_storage_for_scheme(
         }
         #[cfg(feature = "opendal-azdls")]
         Scheme::Azdls => {
-            let configured_scheme: crate::azdls::AzureStorageScheme = scheme.parse()?;
             let config = crate::azdls::azdls_config_parse(props.clone())?;
             Ok(OpenDalStorage::Azdls {
-                configured_scheme,
                 config: Arc::new(config),
             })
         }
@@ -196,14 +193,15 @@ impl StorageFactory for OpenDalResolvingStorageFactory {
 /// to the appropriate [`OpenDalStorage`] variant.
 ///
 /// Sub-storages are lazily created on first use for each scheme and cached
-/// for subsequent operations.
+/// for subsequent operations. Scheme aliases like `s3`/`s3a`/`s3n` map to
+/// the same [`Scheme`] variant, so they share a storage instance.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct OpenDalResolvingStorage {
     /// Configuration properties shared across all backends.
     props: HashMap<String, String>,
-    /// Cache of scheme → storage mappings.
+    /// Cache of scheme to storage mappings.
     #[serde(skip, default)]
-    storages: RwLock<HashMap<String, Arc<OpenDalStorage>>>,
+    storages: RwLock<HashMap<Scheme, Arc<OpenDalStorage>>>,
     /// Custom AWS credential loader for S3 storage.
     #[cfg(feature = "opendal-s3")]
     #[serde(skip)]
@@ -239,7 +237,7 @@ impl OpenDalResolvingStorage {
         }
 
         let storage = build_storage_for_scheme(
-            &scheme,
+            scheme,
             &self.props,
             #[cfg(feature = "opendal-s3")]
             &self.customized_credential_load,
@@ -288,7 +286,7 @@ impl Storage for OpenDalResolvingStorage {
     async fn delete_stream(&self, mut paths: BoxStream<'static, String>) -> Result<()> {
         // Group paths by scheme so each resolved storage receives a batch,
         // avoiding repeated operator creation per path.
-        let mut grouped: HashMap<String, Vec<String>> = HashMap::new();
+        let mut grouped: HashMap<Scheme, Vec<String>> = HashMap::new();
         while let Some(path) = paths.next().await {
             let scheme = extract_scheme(&path)?;
             grouped.entry(scheme).or_default().push(path);
@@ -317,3 +315,54 @@ impl Storage for OpenDalResolvingStorage {
         ))
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Builds a resolving storage with empty props, suitable for `resolve()`
+    /// calls that don't actually hit any backend.
+    fn empty_resolving_storage() -> OpenDalResolvingStorage {
+        OpenDalResolvingStorage {
+            props: HashMap::new(),
+            storages: RwLock::new(HashMap::new()),
+            #[cfg(feature = "opendal-s3")]
+            customized_credential_load: None,
+        }
+    }
+
+    #[cfg(feature = "opendal-s3")]
+    #[test]
+    fn test_resolve_s3_aliases_share_instance() {
+        let storage = empty_resolving_storage();
+
+        // All three S3-family schemes must collapse to a single cached
+        // `Arc<OpenDalStorage>` so that catalogs handing the resolver a mix
+        // of `s3://`, `s3a://`, `s3n://` paths don't rebuild operators.
+        let a = storage.resolve("s3://bucket/key").unwrap();
+        let b = storage.resolve("s3a://bucket/key").unwrap();
+        let c = storage.resolve("s3n://bucket/key").unwrap();
+
+        assert!(Arc::ptr_eq(&a, &b), "s3 and s3a should share one instance");
+        assert!(Arc::ptr_eq(&a, &c), "s3 and s3n should share one instance");
+    }
+
+    #[cfg(feature = "opendal-azdls")]
+    #[test]
+    fn test_resolve_azdls_aliases_share_instance() {
+        let storage = empty_resolving_storage();
+
+        let path_for = |scheme: &str| {
+            format!("{scheme}://myfs@myaccount.dfs.core.windows.net/path/to/file.parquet")
+        };
+
+        // All Azure schemes collapse onto one cached instance.
+        let abfss = storage.resolve(&path_for("abfss")).unwrap();
+        let abfs = storage.resolve(&path_for("abfs")).unwrap();
+
+        assert!(
+            Arc::ptr_eq(&abfss, &abfs),
+            "abfss and abfs should share one instance"
+        );
+    }
+}
diff --git a/crates/storage/opendal/src/s3.rs b/crates/storage/opendal/src/s3.rs
index 7db88d273f..2e21418606 100644
--- a/crates/storage/opendal/src/s3.rs
+++ b/crates/storage/opendal/src/s3.rs
@@ -37,6 +37,12 @@ use crate::utils::{from_opendal_error, is_truthy};
 /// Parse iceberg props to s3 config.
 pub(crate) fn s3_config_parse(mut m: HashMap<String, String>) -> Result<S3Config> {
     let mut cfg = S3Config::default();
+    // Match Iceberg `S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT = false`:
+    // virtual-host-style addressing is the spec default. opendal's own
+    // default is path-style, which disagrees with the Java SDK and breaks
+    // S3-compatible stores that only accept virtual-hosted-style URLs.
+    // Any explicit `s3.path-style-access` property below overrides this.
+    cfg.enable_virtual_host_style = true;
     if let Some(endpoint) = m.remove(S3_ENDPOINT) {
         cfg.endpoint = Some(endpoint);
     };
@@ -177,3 +183,28 @@ impl AwsCredentialLoad for CustomAwsCredentialLoader {
         self.0.load_credential(client).await
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+
+    use iceberg::io::S3_PATH_STYLE_ACCESS;
+
+    use super::s3_config_parse;
+
+    fn parse_with(prop: Option<&str>) -> bool {
+        let mut props = HashMap::new();
+        if let Some(v) = prop {
+            props.insert(S3_PATH_STYLE_ACCESS.to_string(), v.to_string());
+        }
+        s3_config_parse(props).unwrap().enable_virtual_host_style
+    }
+
+    #[test]
+    fn s3_config_parse_path_style_access() {
+        // Match Iceberg S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT = false.
+        assert!(parse_with(None));
+        assert!(parse_with(Some("false")));
+        assert!(!parse_with(Some("true")));
+    }
+}
diff --git a/crates/storage/opendal/tests/file_io_s3_test.rs b/crates/storage/opendal/tests/file_io_s3_test.rs
index 207a4454d7..d6dd8a3b45 100644
--- a/crates/storage/opendal/tests/file_io_s3_test.rs
+++ b/crates/storage/opendal/tests/file_io_s3_test.rs
@@ -26,7 +26,8 @@ mod tests {
     use async_trait::async_trait;
     use futures::StreamExt;
     use iceberg::io::{
-        FileIO, FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY,
+        FileIO, FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_PATH_STYLE_ACCESS, S3_REGION,
+        S3_SECRET_ACCESS_KEY,
     };
     use iceberg_storage_opendal::{CustomAwsCredentialLoader, OpenDalStorageFactory};
     use iceberg_test_utils::{get_minio_endpoint, normalize_test_name_with_parts, set_up};
@@ -39,7 +40,6 @@ mod tests {
         let minio_endpoint = get_minio_endpoint();
 
         FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: None,
         }))
         .with_props(vec![
@@ -47,6 +47,7 @@ mod tests {
             (S3_ACCESS_KEY_ID, "admin".to_string()),
             (S3_SECRET_ACCESS_KEY, "password".to_string()),
             (S3_REGION, "us-east-1".to_string()),
+            (S3_PATH_STYLE_ACCESS, "true".to_string()),
         ])
         .build()
     }
@@ -132,13 +133,13 @@ mod tests {
 
         // Test that the loader can be used in FileIOBuilder with OpenDalStorageFactory
         let _builder = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: Some(custom_loader),
         }))
         .with_props(vec![
             (S3_ENDPOINT, "http://localhost:9000".to_string()),
             ("bucket", "test-bucket".to_string()),
             (S3_REGION, "us-east-1".to_string()),
+            (S3_PATH_STYLE_ACCESS, "true".to_string()),
         ]);
     }
 
@@ -154,12 +155,12 @@ mod tests {
 
         // Build FileIO with custom credential loader via OpenDalStorageFactory
         let file_io_with_custom_creds = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: Some(custom_loader),
         }))
         .with_props(vec![
             (S3_ENDPOINT, minio_endpoint),
             (S3_REGION, "us-east-1".to_string()),
+            (S3_PATH_STYLE_ACCESS, "true".to_string()),
         ])
         .build();
 
@@ -182,12 +183,12 @@ mod tests {
 
         // Build FileIO with custom credential loader via OpenDalStorageFactory
         let file_io_with_custom_creds = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: Some(custom_loader),
         }))
         .with_props(vec![
             (S3_ENDPOINT, minio_endpoint),
             (S3_REGION, "us-east-1".to_string()),
+            (S3_PATH_STYLE_ACCESS, "true".to_string()),
         ])
         .build();
 
diff --git a/crates/storage/opendal/tests/resolving_storage_test.rs b/crates/storage/opendal/tests/resolving_storage_test.rs
index 4572ad2c2d..c235089508 100644
--- a/crates/storage/opendal/tests/resolving_storage_test.rs
+++ b/crates/storage/opendal/tests/resolving_storage_test.rs
@@ -29,7 +29,8 @@ mod tests {
     use std::sync::Arc;
 
     use iceberg::io::{
-        FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY,
+        FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_PATH_STYLE_ACCESS, S3_REGION,
+        S3_SECRET_ACCESS_KEY,
     };
     use iceberg_storage_opendal::OpenDalResolvingStorageFactory;
     use iceberg_test_utils::{get_minio_endpoint, normalize_test_name_with_parts, set_up};
@@ -45,6 +46,7 @@ mod tests {
                 (S3_ACCESS_KEY_ID, "admin".to_string()),
                 (S3_SECRET_ACCESS_KEY, "password".to_string()),
                 (S3_REGION, "us-east-1".to_string()),
+                (S3_PATH_STYLE_ACCESS, "true".to_string()),
             ])
             .build()
     }
@@ -288,6 +290,7 @@ mod tests {
             .with_props(vec![
                 (S3_ENDPOINT, minio_endpoint),
                 (S3_REGION, "us-east-1".to_string()),
+                (S3_PATH_STYLE_ACCESS, "true".to_string()),
             ])
             .build();
 
diff --git a/website/src/release.md b/website/src/release.md
index 7549b8ef4d..79c2bca8a0 100644
--- a/website/src/release.md
+++ b/website/src/release.md
@@ -108,6 +108,7 @@ Bump all components' version in the project to the new iceberg version.
 Please note that this version is the exact version of the release, not the release candidate version.
 
 - rust core: bump version in `Cargo.toml`
+- python binding: bump version in `bindings/python/Cargo.toml`
 
 ### Update docs
 
@@ -159,6 +160,7 @@ dist
 ├── apache-iceberg-rust-0.2.0.tar.gz.asc
 └── apache-iceberg-rust-0.2.0.tar.gz.sha512
 ```
+It is recommended to verify the artifacts yourself before uploading them to the SVN dist repo, see [How to verify a release](#how-to-verify-a-release)
 
 ### Upload artifacts to the SVN dist repo
 
@@ -175,7 +177,9 @@ svn co https://dist.apache.org/repos/dist/dev/iceberg/ /tmp/iceberg-dist-dev
 
 Then, upload the artifacts:
 
-> The `${release_version}` here should be like `0.2.0-rc.1`
+> The `${release_version}` here should be like `0.2.0-rc1`
+
+Example of uploaded artifacts can be found at: https://dist.apache.org/repos/dist/dev/iceberg/apache-iceberg-rust-0.9.1-rc3/
 
 ```shell
 # create a directory named by version
@@ -189,7 +193,8 @@ cd /tmp/iceberg-dist-dev/
 
 # check svn status
 svn status
-
+```
+```shell
 # add to svn
 svn add apache-iceberg-rust-${release_version}
 
@@ -219,11 +224,11 @@ Title:
 Content:
 
 ```
-Hello, Apache Iceberg Rust Community,
+Hello Apache Iceberg Rust Community,
 
-This is a call for a vote to release Apache Iceberg rust version ${iceberg_version}.
+This is a call for a vote to release Apache Iceberg Rust version ${iceberg_version}.
 
-The tag to be voted on is v${release_version}.
+The tag to be voted on is: v${release_version}.
 
 The release candidate:
 
@@ -237,30 +242,30 @@ Git tag for the release:
 
 https://github.com/apache/iceberg-rust/releases/tag/v${release_version}
 
-Please download, verify, and test.
+Please download, verify, and test the release candidate.
 
-The VOTE will be open for at least 72 hours and until the necessary
-number of votes are reached.
+This vote will be open for at least 72 hours and will remain open until the required number of votes is reached.
 
-[ ] +1 approve
-[ ] +0 no opinion
-[ ] -1 disapprove with the reason
+Please vote accordingly:
+[ ] +1 Approve
+[ ] +0 No opinion
+[ ] -1 Disapprove (please provide a reason)
 
-To learn more about Apache Iceberg, please see https://rust.iceberg.apache.org/
+To learn more about Apache Iceberg, please visit:
+https://rust.iceberg.apache.org/
 
 Checklist for reference:
-
-[ ] Download links are valid.
-[ ] Checksums and signatures.
-[ ] LICENSE/NOTICE files exist
-[ ] No unexpected binary files
+[ ] Download links are valid
+[ ] Checksums and signatures are correct
+[ ] LICENSE and NOTICE files are present
+[ ] No unexpected binary files are included
 [ ] All source files have ASF headers
-[ ] Can compile from source
+[ ] The project builds successfully from source
 
-More details please refer to https://rust.iceberg.apache.org/release.html#how-to-verify-a-release.
-
-Thanks
+For more details, please refer to:
+https://rust.iceberg.apache.org/release.html#how-to-verify-a-release
 
+Thanks,
 ${name}
 ```
 
@@ -277,7 +282,7 @@ Title:
 Content:
 
 ```
-Hello, Apache Iceberg Rust Community,
+Hello Apache Iceberg Rust Community,
 
 The vote to release Apache Iceberg Rust ${release_version} has passed.
 
@@ -295,8 +300,7 @@ Non-Binding votes:
 
 Vote thread: ${vote_thread_url}
 
-Thanks
-
+Thanks,
 ${name}
 ```