From c32bd2c6abcc94bec75a07a35a840364512a0a68 Mon Sep 17 00:00:00 2001
From: JoshuaElkind <jelkind1011@gmail.com>
Date: Sat, 21 Feb 2026 03:24:34 -0500
Subject: [PATCH] SNOW-3077601: Fix filter on empty dataframe returning
 schemaless result

When filtering an empty DataFrame with an explicit schema in local testing
mode, pandas df[boolean_mask] can return 0 columns due to an index-alignment
edge case. This fix preserves the schema by using iloc[0:0].copy() when row
filtering incorrectly produces a schemaless result.

Fixes #4076
---
 src/snowflake/snowpark/mock/_plan.py      | 24 ++++++-
 tests/mock/test_empty_dataframe_filter.py | 81 +++++++++++++++++++++++
 2 files changed, 104 insertions(+), 1 deletion(-)
 create mode 100644 tests/mock/test_empty_dataframe_filter.py

diff --git a/src/snowflake/snowpark/mock/_plan.py b/src/snowflake/snowpark/mock/_plan.py
index a0275600ae..af735c7a6b 100644
--- a/src/snowflake/snowpark/mock/_plan.py
+++ b/src/snowflake/snowpark/mock/_plan.py
@@ -136,6 +136,7 @@
 from snowflake.snowpark._internal.analyzer.unary_plan_node import (
     Aggregate,
     CreateViewCommand,
+    Filter,
     Pivot,
     Sample,
     Project,
@@ -1082,6 +1083,18 @@ def execute_mock_plan(
         return table
     if isinstance(source_plan, MockSelectExecutionPlan):
         return execute_mock_plan(source_plan.execution_plan, expr_to_alias)
+    if isinstance(source_plan, Filter):
+        child_df = execute_mock_plan(source_plan.child, expr_to_alias)
+        if child_df is None:
+            return TableEmulator()
+        condition = calculate_expression(
+            source_plan.condition, child_df, analyzer, expr_to_alias
+        )
+        filtered = child_df[condition.fillna(value=False)]
+        # Fix issue #4076: same pandas empty-df indexing edge case as MockSelectStatement where.
+        if len(filtered.columns) == 0 and len(child_df.columns) > 0:
+            filtered = child_df.iloc[0:0].copy()
+        return filtered
     if isinstance(source_plan, MockSelectStatement):
         projection: Optional[List[Expression]] = source_plan.projection or []
         from_: Optional[MockSelectable] = source_plan.from_
@@ -1161,7 +1174,16 @@ def execute_mock_plan(
 
         if where:
             condition = calculate_expression(where, result_df, analyzer, expr_to_alias)
-            result_df = result_df[condition.fillna(value=False)]
+            filtered_result = result_df[condition.fillna(value=False)]
+            # Fix issue #4076: pandas df[boolean_mask] on empty DataFrame can return
+            # 0 columns (schemaless) due to index-alignment edge case. Row filtering
+            # should always preserve schema. We use iloc[0:0] when this happens.
+            if (
+                len(filtered_result.columns) == 0
+                and len(result_df.columns) > 0
+            ):
+                filtered_result = result_df.iloc[0:0].copy()
+            result_df = filtered_result
 
         if order_by:
             result_df = handle_order_by_clause(
diff --git a/tests/mock/test_empty_dataframe_filter.py b/tests/mock/test_empty_dataframe_filter.py
new file mode 100644
index 0000000000..3f2c52d9a6
--- /dev/null
+++ b/tests/mock/test_empty_dataframe_filter.py
@@ -0,0 +1,81 @@
+#
+# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
+#
+"""Tests for issue #4076: Filter on empty dataframe results in schemaless dataframe."""
+
+from snowflake.snowpark.functions import col, lit
+from snowflake.snowpark.types import IntegerType, StringType, StructField, StructType
+
+
+def test_filter_empty_dataframe_preserves_schema(session):
+    """Filter on empty dataframe should preserve schema (issue #4076)."""
+    empty_schema = StructType([
+        StructField("entry", StringType(), True),
+        StructField("file_path", StringType(), True),
+    ])
+    mock_encounters_df = session.create_dataframe([], schema=empty_schema)
+
+    # Before filter: should have 2 columns
+    assert len(mock_encounters_df.schema.fields) == 2
+    assert [f.name for f in mock_encounters_df.schema.fields] == ["ENTRY", "FILE_PATH"]
+
+    # After filter: should preserve the same schema
+    filtered_df = mock_encounters_df.filter(col("entry").is_null())
+    assert len(filtered_df.schema.fields) == 2, (
+        "Filtered empty dataframe should retain schema (ENTRY, FILE_PATH)"
+    )
+    assert [f.name for f in filtered_df.schema.fields] == ["ENTRY", "FILE_PATH"]
+
+    # Both should be empty
+    assert mock_encounters_df.count() == 0
+    assert filtered_df.count() == 0
+
+
+def test_filter_empty_dataframe_various_conditions(session):
+    """Various filter conditions on empty dataframe should all preserve schema."""
+    empty_schema = StructType([
+        StructField("entry", StringType(), True),
+        StructField("file_path", StringType(), True),
+    ])
+    df = session.create_dataframe([], schema=empty_schema)
+
+    # is_null
+    assert len(df.filter(col("entry").is_null()).schema.fields) == 2
+
+    # is_not_null
+    assert len(df.filter(col("entry").is_not_null()).schema.fields) == 2
+
+    # equals
+    assert len(df.filter(col("entry") == lit("x")).schema.fields) == 2
+
+    # not equals
+    assert len(df.filter(col("entry") != lit("x")).schema.fields) == 2
+
+    # Chain filters
+    chained = df.filter(col("entry").is_null()).filter(col("file_path").is_null())
+    assert len(chained.schema.fields) == 2
+
+
+def test_select_lit_filter_preserves_projection_not_source(session):
+    """select(lit(1)).filter() must keep projected columns only, not original schema."""
+    schema = StructType([
+        StructField("a", IntegerType()),
+        StructField("b", StringType()),
+    ])
+    df = session.create_dataframe([], schema=schema)
+    result = df.select(lit(1).alias("x")).filter(col("x") > 0)
+    assert len(result.schema.fields) == 1
+    assert result.schema.fields[0].name == "X"
+
+
+def test_filter_empty_dataframe_with_three_columns(session):
+    """Filter on empty dataframe with more columns preserves schema."""
+    schema = StructType([
+        StructField("a", IntegerType()),
+        StructField("b", StringType()),
+        StructField("c", IntegerType()),
+    ])
+    df = session.create_dataframe([], schema=schema)
+    filtered = df.filter(col("a") > 0)
+    assert len(filtered.schema.fields) == 3
+    assert [f.name for f in filtered.schema.fields] == ["A", "B", "C"]