From 85f134482c9d0e709a1d0a02b43f213a6d5b4578 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bernhard=20Sch=C3=A4fer?= <bernhard.schaefer@merckgroup.com>
Date: Fri, 20 Feb 2026 14:59:47 +0100
Subject: [PATCH] polars docs

---
 docs/examples/dataset.md                      | 32 ++++++++++++++-----
 .../foundry_dev_tools/resources/dataset.py    | 11 +++++--
 2 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/docs/examples/dataset.md b/docs/examples/dataset.md
index fa042a9..9ba851b 100644
--- a/docs/examples/dataset.md
+++ b/docs/examples/dataset.md
@@ -249,9 +249,21 @@ rest_client.download_dataset_files(dataset_rid=rid, output_directory='/paht/to/o
 ```
 ````
 
-### Polars DataFrame from Spark SQL dialect
+### Polars
 
-Queries the Foundry SQL server with Spark SQL dialect, load arrow stream using [polars](https://www.pola.rs/).
+There are three ways to get Polars data from a Foundry dataset. Choose the one that fits your workload:
+
+| Method | Data path | Evaluation | Best for |
+|---|---|---|---|
+| `to_polars()` | FoundrySqlServer | Eager (full dataset) | Quick exploration of small-medium datasets |
+| `query_foundry_sql(..., "polars")` | FoundrySqlServer | Eager (SQL-filtered) | Aggregations, joins, complex SQL queries |
+| `to_lazy_polars()` | Direct parquet scan (S3) | Lazy | Filtering/selection on large datasets; portable code for Foundry transforms |
+
+`to_lazy_polars()` scans parquet files directly via the S3-compatible API using `polars.scan_parquet`. Combined with Polars' lazy evaluation, this enables predicate pushdown: filters applied to the LazyFrame are pushed down to the parquet reader, so only relevant data is read from storage.
+
+The lazy Polars API uses the same syntax as [Foundry lightweight transforms](https://www.palantir.com/docs/foundry/transforms-python/polars-lazy), so code written with `to_lazy_polars()` can be moved into a Foundry transform without rewriting.
+
+#### Eager via FoundrySqlServer
 
 ````{tab} v2
 ```python
@@ -260,7 +272,13 @@ import polars as pl
 
 ctx = FoundryContext()
 ds = ctx.get_dataset_by_path("/path/to/test_dataset")
-df = ds.query_foundry_sql("SELECT *", return_type="polars")
+
+# Fetch the full dataset
+df = ds.to_polars()
+print(df)
+
+# Or use SQL to filter/aggregate server-side
+df = ds.query_foundry_sql("SELECT * WHERE age > 25", return_type="polars")
 print(df)
 ```
 ````
@@ -282,9 +300,7 @@ print(df)
 ```
 ````
 
-### Polars LazyFrame with direct S3-compatible API access
-
-Access dataset files directly via the S3-compatible API as a Polars LazyFrame for efficient lazy evaluation. This method bypasses FoundrySqlServer and works with both regular and hive-partitioned parquet datasets.
+#### Lazy via direct S3 parquet scan
 
 ````{tab} v2
 ```python
@@ -293,10 +309,10 @@ import polars as pl
 
 ctx = FoundryContext()
 ds = ctx.get_dataset_by_path("/path/to/test_dataset")
-lazy_df = ds.to_lazy_polars()
+lazy_df: pl.LazyFrame = ds.to_lazy_polars()
 
 # Perform lazy operations (not executed yet)
-result = lazy_df.filter(pl.col("age") > 25).select(["name", "age"])
+result = lazy_df.filter(pl.col("age") > 25).select("name", "age")
 
 # Execute and collect results
 df = result.collect()
diff --git a/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py b/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
index 4aad2ee..6f58041 100644
--- a/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
+++ b/libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py
@@ -801,6 +801,9 @@ def to_pandas(self) -> pandas.core.frame.DataFrame:
     def to_polars(self) -> pl.DataFrame:
         """Get dataset as a :py:class:`polars.DataFrame`.
 
+        Fetches the full dataset via FoundrySqlServer. For lazy evaluation
+        with predicate pushdown on large datasets, see :py:meth:`to_lazy_polars`.
+
         Via :py:meth:`foundry_dev_tools.resources.dataset.Dataset.query_foundry_sql`
         """
         return self.query_foundry_sql("SELECT *", return_type="polars")
@@ -822,8 +825,8 @@ def to_lazy_polars(self, transaction_rid: str | None = None) -> pl.LazyFrame:
 
         Example:
             >>> ds = ctx.get_dataset_by_path("/path/to/dataset")
-            >>> lf = ds.to_lazy_polars()
-            >>> result = lf.filter(pl.col("age") > 25).select(["name", "age"])
+            >>> lazy_df = ds.to_lazy_polars()
+            >>> result = lazy_df.filter(pl.col("age") > 25).select("name", "age")
             >>> # Execute and collect results
             >>> df = result.collect()
 
@@ -831,6 +834,10 @@ def to_lazy_polars(self, transaction_rid: str | None = None) -> pl.LazyFrame:
             This method uses the S3-compatible API to directly access dataset files.
             For hive-partitioned datasets, polars will automatically read
             the partition structure.
+
+        See Also:
+            :py:meth:`to_polars`: Eager alternative via FoundrySqlServer.
+            :py:meth:`query_foundry_sql`: For SQL-based filtering and aggregations.
         """
         from foundry_dev_tools._optional.polars import pl