From 64cf451b4729097e264213b16914364dc452a178 Mon Sep 17 00:00:00 2001
From: nicornk <nicornk@users.noreply.github.com>
Date: Thu, 19 Feb 2026 14:19:23 +0100
Subject: [PATCH 1/9] feat: add to_lazy_polars to Dataset

---
 .../foundry_dev_tools/resources/dataset.py    | 40 ++++++++
 tests/integration/resources/test_dataset.py   | 22 +++++
 tests/integration/utils.py                    | 97 +++++++++++++++++++
 3 files changed, 159 insertions(+)

diff --git a/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py b/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
index 8a73187..586dde3 100644
--- a/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
+++ b/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
@@ -14,6 +14,7 @@
 from foundry_dev_tools.errors.dataset import (
     BranchNotFoundError,
     DatasetHasNoOpenTransactionError,
+    DatasetHasNoTransactionsError,
     DatasetNotFoundError,
     TransactionTypeMismatchError,
 )
@@ -799,6 +800,45 @@ def to_polars(self) -> pl.DataFrame:
         """
         return self.query_foundry_sql("SELECT *", return_type="polars")
 
+    def to_lazy_polars(self) -> pl.LazyFrame:
+        """Get dataset as a :py:class:`polars.LazyFrame`.
+
+        Returns a lazy polars DataFrame that can be queried efficiently using
+        polars' lazy evaluation API. The data is accessed directly from S3
+        without going through FoundrySqlServer.
+
+        Example:
+            >>> ds = ctx.get_dataset_by_path("/path/to/dataset")
+            >>> lf = ds.to_lazy_polars()
+            >>> # Lazy operations - not executed yet
+            >>> result = lf.filter(pl.col("age") > 25).select(["name", "age"])
+            >>> # Execute and collect results
+            >>> df = result.collect()
+
+        Returns:
+            pl.LazyFrame: A lazy polars DataFrame
+
+        Note:
+            This method uses the S3 API to directly access dataset files.
+            For hive-partitioned datasets, polars will automatically read
+            the partition structure.
+        """
+        from foundry_dev_tools._optional.polars import pl
+
+        last_transaction = self.get_last_transaction()
+        if last_transaction is None:
+            msg = f"Dataset has no transactions: {self.path=} {self.rid=}"
+            raise DatasetHasNoTransactionsError(msg)
+
+        bucket_path = f"s3://{self.rid}.{last_transaction['rid']}/"
+
+        storage_options = self._context.s3.get_polars_storage_options()
+
+        return pl.scan_parquet(
+            bucket_path,
+            storage_options=storage_options,
+        )
+
     @contextmanager
     def transaction_context(
         self,
diff --git a/tests/integration/resources/test_dataset.py b/tests/integration/resources/test_dataset.py
index 6e20239..3bb7efb 100644
--- a/tests/integration/resources/test_dataset.py
+++ b/tests/integration/resources/test_dataset.py
@@ -130,3 +130,25 @@ def test_crud_dataset(spark_session, tmp_path):  # noqa: PLR0915
     # # check that deletion was successful
     with pytest.raises(DatasetNotFoundError):
         ds.sync()
+
+
+def test_to_lazy_polars_parquet_dataset():
+    ds = TEST_SINGLETON.iris_parquet
+    lazy_df = ds.to_lazy_polars()
+
+    assert isinstance(lazy_df, pl.LazyFrame)
+
+    df = lazy_df.collect()
+    assert df.shape == (150, 5)
+    assert df.columns == ["sepal_width", "sepal_length", "petal_width", "petal_length", "is_setosa"]
+
+
+def test_to_lazy_polars_hive_partitioned():
+    ds = TEST_SINGLETON.iris_hive_partitioned
+    lazy_df = ds.to_lazy_polars()
+
+    assert isinstance(lazy_df, pl.LazyFrame)
+
+    df = lazy_df.collect()
+    assert df.shape == (150, 5)
+    assert df.columns == ["sepal_width", "sepal_length", "petal_width", "petal_length", "is_setosa"]
diff --git a/tests/integration/utils.py b/tests/integration/utils.py
index 69414a8..e627d55 100644
--- a/tests/integration/utils.py
+++ b/tests/integration/utils.py
@@ -144,6 +144,79 @@
     },
 }
 
+IRIS_SCHEMA_HIVE = {
+    "fieldSchemaList": [
+        {
+            "type": "DOUBLE",
+            "name": "sepal_width",
+            "nullable": None,
+            "userDefinedTypeClass": None,
+            "customMetadata": {},
+            "arraySubtype": None,
+            "precision": None,
+            "scale": None,
+            "mapKeyType": None,
+            "mapValueType": None,
+            "subSchemas": None,
+        },
+        {
+            "type": "DOUBLE",
+            "name": "sepal_length",
+            "nullable": None,
+            "userDefinedTypeClass": None,
+            "customMetadata": {},
+            "arraySubtype": None,
+            "precision": None,
+            "scale": None,
+            "mapKeyType": None,
+            "mapValueType": None,
+            "subSchemas": None,
+        },
+        {
+            "type": "DOUBLE",
+            "name": "petal_width",
+            "nullable": None,
+            "userDefinedTypeClass": None,
+            "customMetadata": {},
+            "arraySubtype": None,
+            "precision": None,
+            "scale": None,
+            "mapKeyType": None,
+            "mapValueType": None,
+            "subSchemas": None,
+        },
+        {
+            "type": "DOUBLE",
+            "name": "petal_length",
+            "nullable": None,
+            "userDefinedTypeClass": None,
+            "customMetadata": {},
+            "arraySubtype": None,
+            "precision": None,
+            "scale": None,
+            "mapKeyType": None,
+            "mapValueType": None,
+            "subSchemas": None,
+        },
+        {
+            "type": "STRING",
+            "name": "is_setosa",
+            "nullable": None,
+            "userDefinedTypeClass": None,
+            "customMetadata": {},
+            "arraySubtype": None,
+            "precision": None,
+            "scale": None,
+            "mapKeyType": None,
+            "mapValueType": None,
+            "subSchemas": None,
+        },
+    ],
+    "primaryKey": None,
+    "dataFrameReaderClass": "com.palantir.foundry.spark.input.ParquetDataFrameReader",
+    "customMetadata": {"format": "parquet"},
+}
+
 FOUNDRY_SCHEMA_COMPLEX_DATASET = {
     "fieldSchemaList": [
         {
@@ -515,6 +588,30 @@ def iris_no_schema(self) -> Dataset:
             )
         return _iris_no_schema
 
+    @cached_property
+    def iris_hive_partitioned(self) -> Dataset:
+        _iris_hive_partitioned = self.ctx.get_dataset_by_path(
+            INTEGRATION_TEST_COMPASS_ROOT_PATH + "/iris_hive_partitioned",
+            create_if_not_exist=True,
+        )
+        if _iris_hive_partitioned.__created__:
+            _ = _iris_hive_partitioned.upload_folder(TEST_FOLDER.joinpath("test_data", "iris", "iris_hive_partitioned"))
+            _iris_hive_partitioned.upload_schema(
+                _iris_hive_partitioned.get_last_transaction()["rid"], schema=IRIS_SCHEMA_HIVE
+            )
+        return _iris_hive_partitioned
+
+    @cached_property
+    def iris_parquet(self) -> Dataset:
+        _iris_parquet = self.ctx.get_dataset_by_path(
+            INTEGRATION_TEST_COMPASS_ROOT_PATH + "/iris_parquet",
+            create_if_not_exist=True,
+        )
+        if _iris_parquet.__created__:
+            _ = _iris_parquet.upload_folder(TEST_FOLDER.joinpath("test_data", "iris", "iris_parquet"))
+            _iris_parquet.upload_schema(_iris_parquet.get_last_transaction()["rid"], schema=IRIS_SCHEMA_HIVE)
+        return _iris_parquet
+
     @cached_property
     def empty_dataset(self) -> Dataset:
         return self.ctx.get_dataset_by_path(

From 08aeb1ae2e4cc756c8a6a4fd464bcd9e5fc8ec81 Mon Sep 17 00:00:00 2001
From: nicornk <nicornk@users.noreply.github.com>
Date: Thu, 19 Feb 2026 14:22:23 +0100
Subject: [PATCH 2/9] remove not required variables

---
 .../src/foundry_dev_tools/resources/dataset.py           | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py b/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
index 586dde3..51b321f 100644
--- a/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
+++ b/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
@@ -810,7 +810,6 @@ def to_lazy_polars(self) -> pl.LazyFrame:
         Example:
             >>> ds = ctx.get_dataset_by_path("/path/to/dataset")
             >>> lf = ds.to_lazy_polars()
-            >>> # Lazy operations - not executed yet
             >>> result = lf.filter(pl.col("age") > 25).select(["name", "age"])
             >>> # Execute and collect results
             >>> df = result.collect()
@@ -830,13 +829,9 @@ def to_lazy_polars(self) -> pl.LazyFrame:
             msg = f"Dataset has no transactions: {self.path=} {self.rid=}"
             raise DatasetHasNoTransactionsError(msg)
 
-        bucket_path = f"s3://{self.rid}.{last_transaction['rid']}/"
-
-        storage_options = self._context.s3.get_polars_storage_options()
-
         return pl.scan_parquet(
-            bucket_path,
-            storage_options=storage_options,
+            f"s3://{self.rid}.{last_transaction['rid']}/",
+            storage_options=self._context.s3.get_polars_storage_options(),
         )
 
     @contextmanager

From 9294e1107d984e362bf7b9fb9861cdc5b7af3167 Mon Sep 17 00:00:00 2001
From: nicornk <nicornk@users.noreply.github.com>
Date: Thu, 19 Feb 2026 15:48:13 +0100
Subject: [PATCH 3/9] add test data

---
 .../is_setosa=Iris-setosa/file.parquet           | Bin 0 -> 2054 bytes
 .../is_setosa=Iris-versicolor/file.parquet       | Bin 0 -> 2113 bytes
 .../is_setosa=Iris-virginica/file.parquet        | Bin 0 -> 2095 bytes
 .../iris/iris_parquet/spark/iris.parquet         | Bin 0 -> 3170 bytes
 4 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 tests/test_data/iris/iris_hive_partitioned/is_setosa=Iris-setosa/file.parquet
 create mode 100644 tests/test_data/iris/iris_hive_partitioned/is_setosa=Iris-versicolor/file.parquet
 create mode 100644 tests/test_data/iris/iris_hive_partitioned/is_setosa=Iris-virginica/file.parquet
 create mode 100644 tests/test_data/iris/iris_parquet/spark/iris.parquet

diff --git a/tests/test_data/iris/iris_hive_partitioned/is_setosa=Iris-setosa/file.parquet b/tests/test_data/iris/iris_hive_partitioned/is_setosa=Iris-setosa/file.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..7ffebed270b897b082f6679e92c07bd340a807ef
GIT binary patch
literal 2054
zcmb_eYe*bd6uvvNxX)xAb;g;xP(j^_Ml_lFNKtpMZ;5Spw=_Qb8g+LyWPRZJScH^~
zf)%<y(jaLxkfzWd#+0BfjVvwg+O!4R6#{8mw@JlTVnK=|@j)6>>Af?%n_Wo@DLKsC
zd+t4FzB%7H=MH0r)`K*lKaQi%69;G;Qsd}2eF4Z+@}eZ0*pNn<005bv<^C<40XcHe
zOAu&nlR1)16XS`dg+PMIkzfKe#V1n9Oj1$;08|3S^K@3Khu|(1Z%jBncBNO|udER!
z_57P>3ThX2ru*Ji$qj~pYNk&3!2i+jBQ3RL!O0s+Zz7w(nmgYaT}c1)myW(GSsL5y
z&)(-7jqK#FQ}^yS+TE>bDaU*it>!X79LH*0_U>lqTLZ^zz3sG?u`xpmW|J@|i~o&?
zM0t7-<%w}D6r$`9g@~fuCkH=<C<CntC=qKFVy)Dgc%4SEFP_qAO^*EzwMpf0#G{+o
zmy_xlx_G$sdQy-)lXzBj*54yv45ZIY7kIYj4xKpi9xHdQreB#}626qB_ow+is+0NT
zS5#Nt-MOEJGjvshFy+?m#i{J_uU8evPmpPjZ)ZO3Q(tpz{cxJGe0SR$=B|~vJMEoq
zkI0XZhILY8|2R$(`>8$bCC<A>R*4FW<c<*yp@P352V>!aZyO!k#)ZJ95!~l@CdU7|
zyMcXo9S(G1*)|?X?H=M@ot5z^N8^PL&j*;JN-rP`;GC{%N7x$vZTN=5owqvo_Ox;F
zo^V02GxzX;;pQpJCxTZP^{WXDv7z#gy%s2D3ofzG3?`;g<?`}Acd)6YFs~(rC?vwv
zHQn~!y=C{1hBbxAT#6=^kHmaY_PADGLCmFW`J$N9B)B7W`o#FWF=*tk`+UA&5G)7h
z=Yu}q{L1{ZKv4UI%%S|To3(_tfok9gf6=o04}O{LCT_{zhdG*8bkDJw@|uPc_NqoG
zy~f#sgzUoS7qhOa*1w+x-^jv*v=X7%T3#j$OPZ)oj*XL}2=37lji|{c8gL8%l37Xx
zVM69`KSZ0wa-$j|b2w?y=5SIX;48+ES<fixO#;0JaC&B)q({h*zNVl(VSPzMUlx66
zf_uG2-qY39ODh4b&LCpWt+W%<%^|0#Du<k+(*zhBr|0Sn4+(uZjZgeJU>?NN3^Syo
zOMziCE`k#iO!&L<-$#o%MbRwd7m?P~5u_*YRAv$b?TSF&{Sl!5(IJc<BYhE%$T6dO
z#%545lbR%4GFA(%)|cyFaJ9DEJDS-=G8fCiwlWA|CAwizKU%vjSVe1b>j;D{SPmhY
zEil`R>;nVXUJGGc$PfEh9LI|eJXGO9dbkk#4~lEixmq$+pveNuY9V?tnkz9r5)l_d
za}Ro`2yHdOvNnu^_sa&F8m#RtRfk<o_4%E)Y7q`T9eh_bKHpq#&1;4gE)+!Wi#RMP
zxJIl(3)(?JSK_$vY{7Xz?D2pk8|QBT@oIyJszutbdCA#YUFB*x<aV}n*bp|Br-NUP
nQxnGLZQV^Z)-DK3N&p*Y?s7KwxVSn!0FeuvCIK*`KY)J#3oAaB

literal 0
HcmV?d00001

diff --git a/tests/test_data/iris/iris_hive_partitioned/is_setosa=Iris-versicolor/file.parquet b/tests/test_data/iris/iris_hive_partitioned/is_setosa=Iris-versicolor/file.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..2302fa861c02983392af0fb798e9834a70effd17
GIT binary patch
literal 2113
zcmb_edrVVT7(czYw6{=fDZQ6o?6#84N(PQcH_;8{9^@@<Er~*5iEX(QmR*EU2D(Qg
zxRE{B!s_g3Wn|g>XU1iOC=a(8D4WAs65<}|7XF!;nr(>|AqJf$JEylTpv=U?FTMAi
z^L^j>-S79E-@U0BR^`xuJ|Uw=h)kMCQyDGOZwBcU2%DbMQ9duWWDYsfPi5V)dBFQp
zaEu_(nwJ1Q=kv1bQIJXHW&#XnO6HteZHkUbHJMU$sq1O2c9X$Ir0qUH*3;H3$xc6~
z@-*dbs=t_W-L}_%UH<u*zaIBUZ{!6l{C#rrvEE<u`o`_g9oA`doUMN%SK|5l5<ilC
z<Y@jvq2T?^>bZP$I^SUJXzb#~Cu@fPEUO!E+sD5xq2t*NK*s74Tn(*uVQ=S4e0vkE
zWO$}q&h(N-D*yjTiS#G#qHhXY0-}#-V&l*ip?^vWo*+f~KCc3?rZ^`di|0(FJdOjD
zBn|+nlr$wcodnt?b%ssGWg5)gmcY;CdBf1GtWI00)+vPK;GU}=ys7w6J#qZ4zw<a-
z+QEw*?Sor|N=Jol<4}=J)5UhtT@ppmd^j<?Zm#g`arxAt-EaI8cuzPG`1JKDe(05J
z!&kn1RQTi0eC=4Y2THLALc_ks<tBJYV~;f4g|)b?EdsH0ZLAB_iY_1#oQ#NICZOSf
z1_>cq%{iU%CY4gHOwy?FbCR^{5>*-vrRJV8^bxH8w1oGnf~@2<iqUj^NS&N>e<vmO
zRP>)=i=>n8YXPTqyN&Gn<ojLcv&%P5`=3TR{j(0C-UlvCE3Lnt%PSQ$HR+?%!WHf8
z)xOUzIBrd<)dwAyzhS@lAwJfCgw}@kHALVRRj?K-k-zUQ70d*%-7bi#AXKodf=8sF
zJ;L94qW~<;1QvbXo1xHBXleeoX~FdG{L+HBf3e_6!%%lx?n1IANL5lNDs0CR`O&(w
zvnev-NL|)D=y)sq7Y^Op?8uZ%(eIwlQS_GBzFPNjxiC6(E;HjMKT>I=dli|Av9UOI
zcC@c|<o7u$(3<hnK=x?t$VJAhk32*e-OBKhuSJASBG~1(8%+^6WO|MUECYZT7JYz2
z;izox4O2KA?drYZPyq@@D~N>k3`O^n=rMrRGd)t;6P>$P_}xl6MBp5nU|%egwzagh
z(-ffNHxR2{bJ4R(^pT_$GKeItY!hI3R?k)%KO*(fHa_szg9#<k46~%7w*%vVJchHX
znCNHd{%;`DqoH-oh#DgeCd7~)T*}NK23iaQb*o}P%9YfkCEZ^k%m_vrPC_LyZ|fP}
zNHK#-Ej}`x5>{2cQ2nf{zNw+vt_zY`*m4>RA%w-~f?GP!dcOrv(K_tf384$_fDnxq
zm|;fo&Q2UJhp;h{hx3bG#*1crE5SE$a~{rrHM|Zdmx`exG+1C^IYbXeV==}@7RH6p
z+=e!7K`tvSY{WR&U)Wh&Q`O`s+3KpTer6wE8is=p2j8_4-(;_@%C^HI7b+ra7cNUI
z&Wfkdf;_0`Vq6y<Ew~SeJ-#8!!2Rn)yc!{*Y7zZq&kOaXC9ay>{eq*JN7&dN4*m|@
nnkc@BZ>=q>YJqUuHlUN4TLgQXi>=h7Z$tD4+#~>I^ylF}nu<U8

literal 0
HcmV?d00001

diff --git a/tests/test_data/iris/iris_hive_partitioned/is_setosa=Iris-virginica/file.parquet b/tests/test_data/iris/iris_hive_partitioned/is_setosa=Iris-virginica/file.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..6864edca67f8dabf03896bfd583124edbaacbd82
GIT binary patch
literal 2095
zcmb`JdrVtZ9LG=FOIv6MwDcbCr5&)bRg@(>8YU>C_b{H}v9?5c%)}Pjg04J5(c<P7
zTEZk55C(x*wm%j(jfwk%MKn(N0~Vu+f?`HuNw(Qw!V)2Bwv6D0i|6#VrI2YdvtQct
z`u*<rp3m=h?#(qIQ#K9gt73Xac!;**p_mqH4uf>*q?cF<#{@?S8~`Bwv&_HE6W~J%
zs6;|Mw+eJDr%UAQ8V)2SbCceLz1+fll7>~oWB?d7WEsug6jq(2PD<o5Q)fNwjpT&r
zLsX9WmG%K&*^QplIj1HTr1Q#Iu%=iaKVGr_UbVN{+HfGN*gWw^{ExD4x$AQ|$19Ib
zecqq%sA=k*ZOr$2ZMo)?*V}TY@(b8DXC}M!^_d>FWBR6)j$sc1G0VuDwoZ%Vb=Mo#
z?p9h3t<WQZqav-e;NQ6T3|!ik0eiq=!x<1nWFVIUSA={9>@k=-iUU$kC!r{eFOQ};
zyIl-`G?o%Xi*+(-%nPX_LiUsU@l1u8a`(qGhFk49hYSC7ClAIgy2d_!_1v<gZ@TuI
ztBGfm9b;2xEqT#th3}`VKNx2l;ObYav!1txzUYn3K7Ge?(VhFAYo<~?{*z{9;9Wsl
zh*{1vJ8bPO+t3?A?I0DPeiF-*srkCAF*U9)HYJCfVBInGbdVY_oLz;%6CtNd!XZkQ
zhALcoHB4X<7?z<`Di!y9>Lt(lFYn2y@7mXX_*Hq~yrOd1T<)1xr!_D4{iZ&UXHFA*
zKV+Y&TJE+y`gHKQtbOL&ao(RTWBq&5qZ;%4O(!PaDIBc#nWulT4g2=j`dzCtLyKp#
zhC<Y`PFwfxN@M6vNkpl4qVUik?J6}!#71FgK2aNbJPkruM?mAkKGrYH18z${=@oFg
zM|z+W2|2K-2i#x!$9loWmOdkO#N)m2K<-o2*Z8T@c;CP*5Gb;4_ZjK=)T!EARd<Jc
zk~98m?%~e9ebMUV^@94|xhof21uwhD_q^#he&B~|vYYp3ugTr4;Ieeic4}#J<gVY-
zc{!>y#4YE!cUF5#s9li-zzNK599AJ@x1Z(kFomN53jv693pnf!otQ{Bud5{P&@qX0
zhjNGAd?XbT*FY&fD#Et`tOgEJ^oB4v-jr;QS7h{7V4W7Sua#3>9Ua}Y6womTg*%>T
zer3mb4+cdXhG0-+ngF!28n#M1Dbj?}_|z{7IIo}~T#BO$fz~4l>+IM}=#BH=4<B4n
z&<q@nCrZD^gq5EDK*NVbXfYskCpIkfKTZf7Rbr)K1>O=I&_JtJ3LnZ;q-2a7nM}v(
zUUD|K+S)9PPn1b4yQK&rRD%B~?`=HqF_Iyk2bL-ka-yRM;ogWc4A}2-5q||jEx|Yv
zUwnd0+DKDM8vf!JN&LycJP=&QyNYpVL`4+{-x&8LL?2r~7a>Czz9<hbHK3vvqC?h;
zT#fan)~3>YXJcJ%yR|GJhn|hTGg4n?sWW9+P_YwlBD0!oi{D%W86qQI!J952`y#!O
z<N?!@23rQnp9^!fAdG6{*UM`h&1I#|`kWp|Q=1iQ6MHuLM@ecz`Z{Z8W4WmVp(95C
YBQ|t6EL~2vN&`Un2S!Bz81SdUKhJDR%K!iX

literal 0
HcmV?d00001

diff --git a/tests/test_data/iris/iris_parquet/spark/iris.parquet b/tests/test_data/iris/iris_parquet/spark/iris.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..eb8b03deb34fa0493c7168514f84af80892baf1f
GIT binary patch
literal 3170
zcmb_f3sh4_8Xf|PkwhSmdvk9HpcUd1H3TRqE%zcm5VQm>%1a64K_n2C0P(oTf}q$g
z*2NX6MDf+?qCBmNKxqr2sK-D>TSU=T3hS<0rBtky+TzYl2!eGzXS<z~d+$H<&;QLo
z|2O~qC*fp-1OZ4t7wNN`ja1N}%s?0-Uof3%ETNhkfib1##vuRz(@)R+HU5n4qKP2G
z&KCA7Xf`T7VJzVhZ6+?E;t~MiNO>Fq%I9+d;BYxoDFQ^evrsDK3oulO;m)|oRq7%V
zxVnm5+?)jhB4<lk_-dnVJgr31H;FUP=|V^6xtnKIv$Wq_t#!V1YTDoZgVinNrl0E{
z@D%dnb<>JVf2_ugO=V|J@3_dzRpEEMlL<9*YQgT?0n6DcuY_dT(S1dy>m*$nhr2sE
z4~Yjaew}`E;)nF>jki@_m9GymJn3ZXfbhMRIW?D83`I6>k38_z`8@A7foL)VywOq9
z+xc(D{7A(^SxiC7r>ji6zTFZ!|D@p_vGwD^T|e==|9b3l-|R1xPm>~;b0h`G;sIhp
zlKS<dg7ya)rRiBSFGW9YTJUt6EFde3w>0k*VUO739)N*kEWI*Ku3cx)C^C`|Hd;YL
zTX|?BUCa#lZ^5?k()w~<IFJ_@f2tAi8R4ak339hM32TnvwxVTs=ollOom0@afww+*
zqeipqig?EHVK@A>_ZUAo4!P$=--y#0X2i&SZVyD;iw+$FHgQ`It#1qQe}|WoTb4Fg
z<q%JsqA)l^PV(=ixp#=KVZF`%7gwZpJ#zYdW@Y_ypScr;OACC5E3eMaZ}(f-?Jv>a
z?2_$$n2J8h&7JaT4=2VOBd&SfQ>@IY;m7XLbcenFj%(o}QF2h#zbbBBbC3&+n{pBt
z6zuJ9d|)$tVrRy_8>}j+O>wqTv3HYl#`^fUHKwzHRVHC~E8lJH0MqkCPw@&6nf+_%
z$-YZpKN43X<&K{%yyL#1^uj{Zx!O-^Cgn-i;zRWQk2~fJHc34LwNnoCRNit>Og`1t
zz_${?wh*CJrmDv<v7aukV~n9<e0(ibJb5`4&ZATe1U{!?H%t+_HANl=svZZnnH~j#
zC(@_JlA#|40T>7zR;h-$1ef4B5h~d1rBX+>6Pv?zqVkQycbMeJ<uWk=DwN^~$`?AL
z7=~jApHJlEml2sp+cX-!V2~5f-qyfBuPN=h+;rxnp)YrLeEYX{n_trHJ}4}`eA_*w
z_2xeU`@`<m<;Pu3czCNV%x#Ujo?V^S-g5D*X6o8!t$sd#Zg94l_J|nv&OFn0h2H!w
z`vc>tb&?CSFW2dY19mL`(IEj<9?tjPIh8NYZszToT^}cz?b}C-;&f*Z<n<a;98y<J
zpSZAzes%M<O|6~i38w>X#>2Zltle%p?h=DAGA@3WvMsGl+BPwrQLof~iBu_6*t5|2
zc_qCv!ORwgEB{`@gH+p(5?BFrD@Uv}WyWuk<qBQg&K^V2__$jrYJ8C*NC<3pkRr={
zRzHTKBTSI6i0|6u;d0RP*yfaH0sCz0=;e|9{Ab@(?@0V9d$r>GQkwdH|D1-F_~t~3
z>cZ4!w|~Z;i1siQmmHd(xs2Iz_)U>#lDOQf$5i59)4P($;WuaVGRo$i&ay3}9XkFU
z_YRR3d@}N^_o~0FulAW``&zG%Rb0RE($QOQR0^-v_C`I>%J$xtWOfxN-|c6y&fjO%
zPb+5&@Ra7*WL{Z;ulkqk6*mS=LCYTRT&j7x<&rA3FEhcNb7Qb(W6X)F36<TkYf-n&
zuir<ThA^8@e&^et_;p{Og}gtw{qz1KTRqPVPX102Jn%(kSRpWcFFbuc%B3sHBhK^s
zq`c;Xn`<Y_bNXBE?@2u(tvx(R(j$+TwTrJ6(ySz~EhM~<C`EK}5zoxR%R-CTAxVff
z3>Fcnr7oq(-YGV~o5q6OkaUgSQ?Ja>>E(`D%5=RZQK!|VJ806?np91qoCbUgrk#tW
zN{|SdV=hKc)?(yfw#~9m#aLsotP45(34oD&qbY#GMnOZ^xDQ}B;n+ltew}4u%gwum
zh8k(&gE+N~S3~0*8veWhLeW|k5rCrrSk#t40Af_lA%3eK2pm;Ak5gOEB~)90?8d6c
z?i9yfaCT%B5z0gw>Chd3i_kg-vfnDVvym%Sxrm1pS>>Zbq{xPvkI-=KQoD4WE(5U#
zh`k4GT$!?rjJq#KCXGqYkx66E3P9Kp>3PsSK$r+F25TNJ!ehnf=pyS8``?Qq`mP&-
zqD>;?O(5=MjRK8xa<sg@`cg;tyCE1lY=+v;9fkTGcMn=iAvL;CPL*M!NWUk6&~^&y
zXERiTv*-^v!suNJvCb6<2k5?VNP?J&b_w{@VZqpBM8sRG=j#)blqqto#%?avd<;P(
zNiKp9>6Zoj8)TFS`{toglGKw6NfLf#<g9S0Hy9{=2uZ3(=DkgQbx=)8DQOvH9|ZOA
zO}*aQ2#x&olxP_=+XXF#eYnC(3KxDOM(~zX^GGt-4;syd_E41!`$1`m690|SYa+4|
zy(qte^_1Q`89E1Bl=&@#Bb1RE+EHJbWyxT(k|bwQ`!j@)L27adwXS)5X^>X2WU+pY
z*9L7uiejGJJok^mUr2?@iZ4~9MK6uejl{zmMCh+29wZ!jMLc||{lbt#zoV$FnYW(k
z6={fCy+M_*Br-Ev3bCnXiTD4DA4Wl0@fBX$WW^FS2{1;uPAgB><FPQq$NoT!9RT6*
H-^G6bm*)1R

literal 0
HcmV?d00001


From d243994e77a07ab93f8e7560c81ff2059b853d09 Mon Sep 17 00:00:00 2001
From: nicornk <nicornk@users.noreply.github.com>
Date: Thu, 19 Feb 2026 15:55:27 +0100
Subject: [PATCH 4/9] add unit test for exception

---
 .../foundry_dev_tools/resources/dataset.py    |  4 ++--
 tests/unit/resources/test_dataset.py          | 24 +++++++++++++++++++
 2 files changed, 26 insertions(+), 2 deletions(-)
 create mode 100644 tests/unit/resources/test_dataset.py

diff --git a/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py b/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
index 51b321f..5fdadae 100644
--- a/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
+++ b/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
@@ -826,8 +826,8 @@ def to_lazy_polars(self) -> pl.LazyFrame:
 
         last_transaction = self.get_last_transaction()
         if last_transaction is None:
-            msg = f"Dataset has no transactions: {self.path=} {self.rid=}"
-            raise DatasetHasNoTransactionsError(msg)
+            msg = f"Dataset has no transactions: {self.rid=}"
+            raise DatasetHasNoTransactionsError(info=msg)
 
         return pl.scan_parquet(
             f"s3://{self.rid}.{last_transaction['rid']}/",
diff --git a/tests/unit/resources/test_dataset.py b/tests/unit/resources/test_dataset.py
new file mode 100644
index 0000000..bfadcd4
--- /dev/null
+++ b/tests/unit/resources/test_dataset.py
@@ -0,0 +1,24 @@
+from unittest import mock
+
+import pytest
+
+from foundry_dev_tools.errors.dataset import DatasetHasNoTransactionsError
+from foundry_dev_tools.resources.dataset import Dataset
+
+
+def test_to_lazy_polars_no_transaction():
+    with mock.patch.object(Dataset, "__created__", True):
+        ds = Dataset.__new__(Dataset)
+        ds.rid = "ri.foundry.main.dataset.test-dataset"
+        ds.path = "/test/dataset/path"
+
+        # Mock get_last_transaction to return None
+        with mock.patch.object(ds, "get_last_transaction", return_value=None):
+            # Assert that the correct exception is raised with the expected message
+            with pytest.raises(DatasetHasNoTransactionsError) as exc_info:
+                ds.to_lazy_polars()
+
+            # Verify the error message contains the expected information
+            error_message = str(exc_info.value)
+            assert "Dataset has no transactions" in error_message
+            assert ds.rid in error_message

From f8c945a28d7aee579afaf02a44e699b4f86cffda Mon Sep 17 00:00:00 2001
From: nicornk <nicornk@users.noreply.github.com>
Date: Thu, 19 Feb 2026 17:01:16 +0100
Subject: [PATCH 5/9] allow passing arbitrary transaction_rid

---
 .../foundry_dev_tools/resources/dataset.py    | 15 +++++++------
 tests/unit/resources/test_dataset.py          | 21 ++++++++++++++++---
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py b/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
index 5fdadae..196e75a 100644
--- a/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
+++ b/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
@@ -800,7 +800,7 @@ def to_polars(self) -> pl.DataFrame:
         """
         return self.query_foundry_sql("SELECT *", return_type="polars")
 
-    def to_lazy_polars(self) -> pl.LazyFrame:
+    def to_lazy_polars(self, transaction_rid: str | None = None) -> pl.LazyFrame:
         """Get dataset as a :py:class:`polars.LazyFrame`.
 
         Returns a lazy polars DataFrame that can be queried efficiently using
@@ -824,14 +824,17 @@ def to_lazy_polars(self) -> pl.LazyFrame:
         """
         from foundry_dev_tools._optional.polars import pl
 
-        last_transaction = self.get_last_transaction()
-        if last_transaction is None:
-            msg = f"Dataset has no transactions: {self.rid=}"
-            raise DatasetHasNoTransactionsError(info=msg)
+        if transaction_rid is None:
+            maybe_transaction = self.get_last_transaction()
+            if maybe_transaction is None:
+                msg = f"Dataset has no transactions: {self.rid=}"
+                raise DatasetHasNoTransactionsError(info=msg)
+            transaction_rid = maybe_transaction["rid"]
 
         return pl.scan_parquet(
-            f"s3://{self.rid}.{last_transaction['rid']}/",
+            f"s3://{self.rid}.{transaction_rid}/**/*.parquet",
             storage_options=self._context.s3.get_polars_storage_options(),
+            hive_partitioning=True,
         )
 
     @contextmanager
diff --git a/tests/unit/resources/test_dataset.py b/tests/unit/resources/test_dataset.py
index bfadcd4..12236b6 100644
--- a/tests/unit/resources/test_dataset.py
+++ b/tests/unit/resources/test_dataset.py
@@ -12,13 +12,28 @@ def test_to_lazy_polars_no_transaction():
         ds.rid = "ri.foundry.main.dataset.test-dataset"
         ds.path = "/test/dataset/path"
 
-        # Mock get_last_transaction to return None
         with mock.patch.object(ds, "get_last_transaction", return_value=None):
-            # Assert that the correct exception is raised with the expected message
             with pytest.raises(DatasetHasNoTransactionsError) as exc_info:
                 ds.to_lazy_polars()
 
-            # Verify the error message contains the expected information
             error_message = str(exc_info.value)
             assert "Dataset has no transactions" in error_message
             assert ds.rid in error_message
+
+
+def test_to_lazy_polars_transaction_rid_logic():
+    with mock.patch.object(Dataset, "__created__", True):
+        ds = Dataset.__new__(Dataset)
+        ds.rid = "ri.foundry.main.dataset.abc123"
+        ds._context = mock.MagicMock()
+        ds._context.s3.get_polars_storage_options.return_value = {"aws_access_key_id": "test"}
+
+        with mock.patch("foundry_dev_tools._optional.polars.pl.scan_parquet") as mock_scan:
+            mock_scan.return_value = mock.MagicMock()
+            ds.to_lazy_polars(transaction_rid="test")
+
+            mock_scan.assert_called_once()
+            call_args = mock_scan.call_args
+            assert call_args[0][0] == f"s3://{ds.rid}.test/**/*.parquet"
+            assert call_args[1]["storage_options"] == ds._context.s3.get_polars_storage_options()
+            assert call_args[1]["hive_partitioning"] is True

From e0a23b656ab8557b8fe5cdd101e2cb0d28cc92f3 Mon Sep 17 00:00:00 2001
From: nicornk <nicornk@users.noreply.github.com>
Date: Thu, 19 Feb 2026 17:04:16 +0100
Subject: [PATCH 6/9] add doc entry

---
 docs/examples/dataset.md | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/docs/examples/dataset.md b/docs/examples/dataset.md
index f8687b8..fa042a9 100644
--- a/docs/examples/dataset.md
+++ b/docs/examples/dataset.md
@@ -282,6 +282,28 @@ print(df)
 ```
 ````
 
+### Polars LazyFrame with direct S3-compatible API access
+
+Access dataset files directly via the S3-compatible API as a Polars LazyFrame for efficient lazy evaluation. This method bypasses FoundrySqlServer and works with both regular and hive-partitioned parquet datasets.
+
+````{tab} v2
+```python
+from foundry_dev_tools import FoundryContext
+import polars as pl
+
+ctx = FoundryContext()
+ds = ctx.get_dataset_by_path("/path/to/test_dataset")
+lazy_df = ds.to_lazy_polars()
+
+# Perform lazy operations (not executed yet)
+result = lazy_df.filter(pl.col("age") > 25).select(["name", "age"])
+
+# Execute and collect results
+df = result.collect()
+print(df)
+```
+````
+
 ### DuckDB Table from Spark SQL dialect
 
 Queries the Foundry SQL server with Spark SQL dialect, load arrow stream using [duckdb](https://duckdb.org/).

From e6d325d11748e133b01f80ba3ff0fa01e1b68d2b Mon Sep 17 00:00:00 2001
From: nicornk <nicornk@users.noreply.github.com>
Date: Fri, 20 Feb 2026 08:18:04 +0100
Subject: [PATCH 7/9] fix docstring

---
 .../src/foundry_dev_tools/resources/dataset.py  | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py b/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
index 196e75a..3d5f05e 100644
--- a/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
+++ b/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
@@ -804,8 +804,16 @@ def to_lazy_polars(self, transaction_rid: str | None = None) -> pl.LazyFrame:
         """Get dataset as a :py:class:`polars.LazyFrame`.
 
         Returns a lazy polars DataFrame that can be queried efficiently using
-        polars' lazy evaluation API. The data is accessed directly from S3
-        without going through FoundrySqlServer.
+        polars' lazy evaluation API. The data is accessed directly via the
+        S3-compatible API without going through FoundrySqlServer.
+
+        Args:
+            transaction_rid: The transaction RID to read from. If None, uses the
+                last committed transaction. Useful for reading specific historical
+                versions of the dataset.
+
+        Returns:
+            pl.LazyFrame: A lazy polars DataFrame
 
         Example:
             >>> ds = ctx.get_dataset_by_path("/path/to/dataset")
@@ -814,11 +822,8 @@ def to_lazy_polars(self, transaction_rid: str | None = None) -> pl.LazyFrame:
             >>> # Execute and collect results
             >>> df = result.collect()
 
-        Returns:
-            pl.LazyFrame: A lazy polars DataFrame
-
         Note:
-            This method uses the S3 API to directly access dataset files.
+            This method uses the S3-compatible API to directly access dataset files.
             For hive-partitioned datasets, polars will automatically read
             the partition structure.
         """

From 0a930413ea92f27f7ce3ce471ee9c14f01b241cb Mon Sep 17 00:00:00 2001
From: nicornk <nicornk@users.noreply.github.com>
Date: Fri, 20 Feb 2026 08:43:17 +0100
Subject: [PATCH 8/9] fix: get_last_transaction() now excludes open
 transactions in to_lazy_polars()

---
 .../src/foundry_dev_tools/resources/dataset.py    | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py b/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
index 3d5f05e..4aad2ee 100644
--- a/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
+++ b/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
@@ -262,11 +262,16 @@ def get_transactions(
             ).json()["values"]
         ]
 
-    def get_last_transaction(self) -> api_types.Transaction | None:
-        """Returns the last transaction or None if there are no transactions."""
+    def get_last_transaction(self, include_open_exclusive_transaction: bool = True) -> api_types.Transaction | None:
+        """Returns the last transaction or None if there are no transactions.
+
+        Args:
+            include_open_exclusive_transaction: If True, includes open transactions
+                in the results. If False, only returns committed transactions.
+        """
         v = self.get_transactions(
             page_size=1,
-            include_open_exclusive_transaction=True,
+            include_open_exclusive_transaction=include_open_exclusive_transaction,
         )
         if v is not None and len(v) > 0:
             return v[0]
@@ -830,9 +835,9 @@ def to_lazy_polars(self, transaction_rid: str | None = None) -> pl.LazyFrame:
         from foundry_dev_tools._optional.polars import pl
 
         if transaction_rid is None:
-            maybe_transaction = self.get_last_transaction()
+            maybe_transaction = self.get_last_transaction(include_open_exclusive_transaction=False)
             if maybe_transaction is None:
-                msg = f"Dataset has no transactions: {self.rid=}"
+                msg = f"Dataset has no committed transactions: {self.rid=}"
                 raise DatasetHasNoTransactionsError(info=msg)
             transaction_rid = maybe_transaction["rid"]
 

From c9cbd5ac4f69b263653c1b3b2ac922acd59563e5 Mon Sep 17 00:00:00 2001
From: nicornk <nicornk@users.noreply.github.com>
Date: Fri, 20 Feb 2026 08:51:31 +0100
Subject: [PATCH 9/9] test with explicit transaction passing

---
 tests/integration/resources/test_dataset.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/integration/resources/test_dataset.py b/tests/integration/resources/test_dataset.py
index 3bb7efb..00ee93d 100644
--- a/tests/integration/resources/test_dataset.py
+++ b/tests/integration/resources/test_dataset.py
@@ -143,6 +143,17 @@ def test_to_lazy_polars_parquet_dataset():
     assert df.columns == ["sepal_width", "sepal_length", "petal_width", "petal_length", "is_setosa"]
 
 
+def test_to_lazy_polars_parquet_dataset_explicit_transaction():
+    ds = TEST_SINGLETON.iris_parquet
+    lazy_df = ds.to_lazy_polars(ds.get_last_transaction()["rid"])
+
+    assert isinstance(lazy_df, pl.LazyFrame)
+
+    df = lazy_df.collect()
+    assert df.shape == (150, 5)
+    assert df.columns == ["sepal_width", "sepal_length", "petal_width", "petal_length", "is_setosa"]
+
+
 def test_to_lazy_polars_hive_partitioned():
     ds = TEST_SINGLETON.iris_hive_partitioned
     lazy_df = ds.to_lazy_polars()