SQLMesh
diff --git a/‎.circleci/continue_config.yml‎
Lines changed: 0 additions & 1 deletion b/‎.circleci/continue_config.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 13 additions & 16 deletions b/‎Makefile‎
Lines changed: 13 additions & 16 deletions
diff --git a/‎pytest.ini‎
Lines changed: 3 additions & 6 deletions b/‎pytest.ini‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎sqlmesh/core/engine_adapter/base.py‎
Lines changed: 15 additions & 0 deletions b/‎sqlmesh/core/engine_adapter/base.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎sqlmesh/core/engine_adapter/bigquery.py‎
Lines changed: 34 additions & 0 deletions b/‎sqlmesh/core/engine_adapter/bigquery.py‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎sqlmesh/core/engine_adapter/snowflake.py‎
Lines changed: 54 additions & 7 deletions b/‎sqlmesh/core/engine_adapter/snowflake.py‎
Lines changed: 54 additions & 7 deletions
diff --git a/‎sqlmesh/core/engine_adapter/spark.py‎
Lines changed: 6 additions & 43 deletions b/‎sqlmesh/core/engine_adapter/spark.py‎
Lines changed: 6 additions & 43 deletions
diff --git a/‎sqlmesh/utils/pandas.py‎
Lines changed: 8 additions & 1 deletion b/‎sqlmesh/utils/pandas.py‎
Lines changed: 8 additions & 1 deletion
@@ -281,7 +281,6 @@ workflows:
                 - trino
                 - spark
                 - clickhouse
-                - clickhouse-cluster
                 - risingwave
       - engine_tests_cloud:
           name: cloud_engine_<< matrix.engine >>
 
@@ -126,54 +126,51 @@ engine-%-down:
 ##################
 
 clickhouse-test: engine-clickhouse-up
-	pytest -n auto -x -m "clickhouse" --retries 3 --junitxml=test-results/junit-clickhouse.xml
-
-clickhouse-cluster-test: engine-clickhouse-up
-	pytest -n auto -x -m "clickhouse_cluster" --retries 3 --junitxml=test-results/junit-clickhouse-cluster.xml
+	pytest -n auto -m "clickhouse" --retries 3 --junitxml=test-results/junit-clickhouse.xml
 
 duckdb-test: engine-duckdb-install
-	pytest -n auto -x -m "duckdb" --retries 3 --junitxml=test-results/junit-duckdb.xml
+	pytest -n auto -m "duckdb" --retries 3 --junitxml=test-results/junit-duckdb.xml
 
 mssql-test: engine-mssql-up
-	pytest -n auto -x -m "mssql" --retries 3 --junitxml=test-results/junit-mssql.xml
+	pytest -n auto -m "mssql" --retries 3 --junitxml=test-results/junit-mssql.xml
 
 mysql-test: engine-mysql-up
-	pytest -n auto -x -m "mysql" --retries 3 --junitxml=test-results/junit-mysql.xml
+	pytest -n auto -m "mysql" --retries 3 --junitxml=test-results/junit-mysql.xml
 
 postgres-test: engine-postgres-up
-	pytest -n auto -x -m "postgres" --retries 3 --junitxml=test-results/junit-postgres.xml
+	pytest -n auto -m "postgres" --retries 3 --junitxml=test-results/junit-postgres.xml
 
 spark-test: engine-spark-up
-	pytest -n auto -x -m "spark or pyspark" --retries 3 --junitxml=test-results/junit-spark.xml
+	pytest -n auto -m "spark" --retries 3 --junitxml=test-results/junit-spark.xml
 
 trino-test: engine-trino-up
-	pytest -n auto -x -m "trino or trino_iceberg or trino_delta or trino_nessie" --retries 3 --junitxml=test-results/junit-trino.xml
+	pytest -n auto -m "trino" --retries 3 --junitxml=test-results/junit-trino.xml
 
 risingwave-test: engine-risingwave-up
-	pytest -n auto -x -m "risingwave" --retries 3 --junitxml=test-results/junit-risingwave.xml
+	pytest -n auto -m "risingwave" --retries 3 --junitxml=test-results/junit-risingwave.xml
 
 #################
 # Cloud Engines #
 #################
 
 snowflake-test: guard-SNOWFLAKE_ACCOUNT guard-SNOWFLAKE_WAREHOUSE guard-SNOWFLAKE_DATABASE guard-SNOWFLAKE_USER guard-SNOWFLAKE_PASSWORD engine-snowflake-install
-	pytest -n auto -x -m "snowflake" --retries 3 --junitxml=test-results/junit-snowflake.xml
+	pytest -n auto -m "snowflake" --retries 3 --junitxml=test-results/junit-snowflake.xml
 
 bigquery-test: guard-BIGQUERY_KEYFILE engine-bigquery-install
-	pytest -n auto -x -m "bigquery" --retries 3 --junitxml=test-results/junit-bigquery.xml
+	pytest -n auto -m "bigquery" --retries 3 --junitxml=test-results/junit-bigquery.xml
 
 databricks-test: guard-DATABRICKS_CATALOG guard-DATABRICKS_SERVER_HOSTNAME guard-DATABRICKS_HTTP_PATH guard-DATABRICKS_ACCESS_TOKEN guard-DATABRICKS_CONNECT_VERSION engine-databricks-install
 	pip install 'databricks-connect==${DATABRICKS_CONNECT_VERSION}'
-	pytest -n auto -x -m "databricks" --retries 3 --junitxml=test-results/junit-databricks.xml
+	pytest -n auto -m "databricks" --retries 3 --junitxml=test-results/junit-databricks.xml
 
 redshift-test: guard-REDSHIFT_HOST guard-REDSHIFT_USER guard-REDSHIFT_PASSWORD guard-REDSHIFT_DATABASE engine-redshift-install
-	pytest -n auto -x -m "redshift" --retries 3 --junitxml=test-results/junit-redshift.xml
+	pytest -n auto -m "redshift" --retries 3 --junitxml=test-results/junit-redshift.xml
 
 clickhouse-cloud-test: guard-CLICKHOUSE_CLOUD_HOST guard-CLICKHOUSE_CLOUD_USERNAME guard-CLICKHOUSE_CLOUD_PASSWORD engine-clickhouse-install
 	pytest -n 1 -m "clickhouse_cloud" --retries 3 --junitxml=test-results/junit-clickhouse-cloud.xml
 
 athena-test: guard-AWS_ACCESS_KEY_ID guard-AWS_SECRET_ACCESS_KEY guard-ATHENA_S3_WAREHOUSE_LOCATION engine-athena-install
-	pytest -n auto -x -m "athena" --retries 3 --retry-delay 10 --junitxml=test-results/junit-athena.xml
+	pytest -n auto -m "athena" --retries 3 --retry-delay 10 --junitxml=test-results/junit-athena.xml
 
 vscode_settings:
 	mkdir -p .vscode
 
@@ -16,13 +16,11 @@ markers =
     github: test for Github CI/CD bot
     jupyter: tests for Jupyter integration
     web: tests for web UI
-    spark_pyspark: test for Spark with PySpark dependency
     # Engine Adapters
     engine: test all engine adapters
     athena: test for Athena
     bigquery: test for BigQuery
-    clickhouse: test for Clickhouse (standalone mode)
-    clickhouse_cluster: test for Clickhouse (cluster mode)
+    clickhouse: test for Clickhouse (standalone mode / cluster mode)
     clickhouse_cloud: test for Clickhouse (cloud mode)
     databricks: test for Databricks
     duckdb: test for DuckDB
@@ -33,9 +31,8 @@ markers =
     redshift: test for Redshift
     snowflake: test for Snowflake
     spark: test for Spark
-    trino: test for Trino (Hive connector)
-    trino_iceberg: test for Trino (Iceberg connector)
-    trino_delta: test for Trino (Delta connector)
+    trino: test for Trino (all connectors)
+    risingwave: test for Risingwave
 addopts = -n 0 --dist=loadgroup
 
 asyncio_default_fixture_loop_scope = session
 
@@ -1008,6 +1008,8 @@ def create_view(
         if materialized_properties and not materialized:
             raise SQLMeshError("Materialized properties are only supported for materialized views")
 
+        query_or_df = self._native_df_to_pandas_df(query_or_df)
+
         if isinstance(query_or_df, pd.DataFrame):
             values: t.List[t.Tuple[t.Any, ...]] = list(
                 query_or_df.itertuples(index=False, name=None)
@@ -2002,6 +2004,19 @@ def _fetch_native_df(
             self.execute(query, quote_identifiers=quote_identifiers)
             return self.cursor.fetchdf()
 
+    def _native_df_to_pandas_df(
+        self,
+        query_or_df: QueryOrDF,
+    ) -> t.Union[Query, pd.DataFrame]:
+        """
+        Take a "native" DataFrame (eg Pyspark, Bigframe, Snowpark etc) and convert it to Pandas
+        """
+        if isinstance(query_or_df, (exp.Query, exp.DerivedTable, pd.DataFrame)):
+            return query_or_df
+
+        # EngineAdapter subclasses that have native DataFrame types should override this
+        raise NotImplementedError(f"Unable to convert {type(query_or_df)} to Pandas")
+
     def fetchdf(
         self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False
     ) -> pd.DataFrame:
 
@@ -26,6 +26,7 @@
 from sqlmesh.utils import optional_import
 from sqlmesh.utils.date import to_datetime
 from sqlmesh.utils.errors import SQLMeshError
+from sqlmesh.utils.pandas import columns_to_types_from_dtypes
 
 if t.TYPE_CHECKING:
     from google.api_core.retry import Retry
@@ -1107,6 +1108,39 @@ def _normalize_decimal_value(self, col: exp.Expression, precision: int) -> exp.E
     def _normalize_nested_value(self, col: exp.Expression) -> exp.Expression:
         return exp.func("TO_JSON_STRING", col, dialect=self.dialect)
 
+    @t.overload
+    def _columns_to_types(
+        self, query_or_df: DF, columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None
+    ) -> t.Dict[str, exp.DataType]: ...
+
+    @t.overload
+    def _columns_to_types(
+        self, query_or_df: Query, columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None
+    ) -> t.Optional[t.Dict[str, exp.DataType]]: ...
+
+    def _columns_to_types(
+        self, query_or_df: QueryOrDF, columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None
+    ) -> t.Optional[t.Dict[str, exp.DataType]]:
+        if (
+            not columns_to_types
+            and bigframes
+            and isinstance(query_or_df, bigframes.dataframe.DataFrame)
+        ):
+            # using dry_run=True attempts to prevent the DataFrame from being materialized just to read the column types from it
+            dtypes = query_or_df.to_pandas(dry_run=True).columnDtypes
+            return columns_to_types_from_dtypes(dtypes.items())
+
+        return super()._columns_to_types(query_or_df, columns_to_types)
+
+    def _native_df_to_pandas_df(
+        self,
+        query_or_df: QueryOrDF,
+    ) -> t.Union[Query, pd.DataFrame]:
+        if bigframes and isinstance(query_or_df, bigframes.dataframe.DataFrame):
+            return query_or_df.to_pandas()
+
+        return super()._native_df_to_pandas_df(query_or_df)
+
     @property
     def _query_data(self) -> t.Any:
         return self._connection_pool.get_attribute("query_data")
 
@@ -27,6 +27,7 @@
 from sqlmesh.core.schema_diff import SchemaDiffer
 from sqlmesh.utils import optional_import
 from sqlmesh.utils.errors import SQLMeshError
+from sqlmesh.utils.pandas import columns_to_types_from_dtypes
 
 logger = logging.getLogger(__name__)
 snowpark = optional_import("snowflake.snowpark")
@@ -298,13 +299,23 @@ def query_factory() -> Query:
 
             if is_snowpark_dataframe:
                 temp_table.set("catalog", database)
-                df_renamed = df.rename(
-                    {
-                        col: exp.to_identifier(col).sql(dialect=self.dialect, identify=True)
-                        for col in columns_to_types
-                    }
-                )  # type: ignore
-                df_renamed.createOrReplaceTempView(
+
+                # only quote columns if they arent already quoted
+                # if the Snowpark dataframe was created from a Pandas dataframe via snowpark.create_dataframe(pandas_df),
+                # then they will be quoted already. But if the Snowpark dataframe was created manually by the user, then the
+                # columns may not be quoted
+                columns_already_quoted = all(
+                    col.startswith('"') and col.endswith('"') for col in df.columns
+                )
+                local_df = df
+                if not columns_already_quoted:
+                    local_df = df.rename(
+                        {
+                            col: exp.to_identifier(col).sql(dialect=self.dialect, identify=True)
+                            for col in columns_to_types
+                        }
+                    )  # type: ignore
+                local_df.createOrReplaceTempView(
                     temp_table.sql(dialect=self.dialect, identify=True)
                 )  # type: ignore
             elif isinstance(df, pd.DataFrame):
@@ -356,6 +367,11 @@ def query_factory() -> Query:
 
         def cleanup() -> None:
             if is_snowpark_dataframe:
+                if hasattr(df, "table_name"):
+                    if isinstance(df.table_name, str):
+                        # created by the Snowpark library if the Snowpark DataFrame was created from a Pandas DataFrame
+                        # (if the Snowpark DataFrame was created via native means then there is no 'table_name' property and no temp table)
+                        self.drop_table(df.table_name)
                 self.drop_view(temp_table)
             else:
                 self.drop_table(temp_table)
@@ -381,6 +397,15 @@ def _fetch_native_df(
             columns = self.cursor._result_set.batches[0].column_names
             return pd.DataFrame([dict(zip(columns, row)) for row in rows])
 
+    def _native_df_to_pandas_df(
+        self,
+        query_or_df: QueryOrDF,
+    ) -> t.Union[Query, pd.DataFrame]:
+        if snowpark and isinstance(query_or_df, snowpark.DataFrame):
+            return query_or_df.to_pandas()
+
+        return super()._native_df_to_pandas_df(query_or_df)
+
     def _get_data_objects(
         self, schema_name: SchemaName, object_names: t.Optional[t.Set[str]] = None
     ) -> t.List[DataObject]:
@@ -426,6 +451,10 @@ def _get_data_objects(
         if object_names:
             query = query.where(exp.column("TABLE_NAME").isin(*object_names))
 
+        # exclude SNOWPARK_TEMP_TABLE tables that are managed by the Snowpark library and are an implementation
+        # detail of dealing with DataFrame's
+        query = query.where(exp.column("TABLE_NAME").like("SNOWPARK_TEMP_TABLE%").not_())
+
         df = self.fetchdf(query, quote_identifiers=True)
         if df.empty:
             return []
@@ -537,3 +566,21 @@ def clone_table(
             clone_kwargs=clone_kwargs,
             **kwargs,
         )
+
+    @t.overload
+    def _columns_to_types(
+        self, query_or_df: DF, columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None
+    ) -> t.Dict[str, exp.DataType]: ...
+
+    @t.overload
+    def _columns_to_types(
+        self, query_or_df: Query, columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None
+    ) -> t.Optional[t.Dict[str, exp.DataType]]: ...
+
+    def _columns_to_types(
+        self, query_or_df: QueryOrDF, columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None
+    ) -> t.Optional[t.Dict[str, exp.DataType]]:
+        if not columns_to_types and snowpark and isinstance(query_or_df, snowpark.DataFrame):
+            return columns_to_types_from_dtypes(query_or_df.sample(n=1).to_pandas().dtypes.items())
+
+        return super()._columns_to_types(query_or_df, columns_to_types)
@@ -382,51 +382,14 @@ def create_state_table(
             partitioned_by=[exp.column(x) for x in primary_key] if primary_key else None,
         )
 
-    def create_view(
+    def _native_df_to_pandas_df(
         self,
-        view_name: TableName,
         query_or_df: QueryOrDF,
-        columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None,
-        replace: bool = True,
-        materialized: bool = False,
-        materialized_properties: t.Optional[t.Dict[str, t.Any]] = None,
-        table_description: t.Optional[str] = None,
-        column_descriptions: t.Optional[t.Dict[str, str]] = None,
-        view_properties: t.Optional[t.Dict[str, exp.Expression]] = None,
-        **create_kwargs: t.Any,
-    ) -> None:
-        """Create a view with a query or dataframe.
-
-        If a dataframe is passed in, it will be converted into a literal values statement.
-        This should only be done if the dataframe is very small!
-
-        Args:
-            view_name: The view name.
-            query_or_df: A query or dataframe.
-            columns_to_types: Columns to use in the view statement.
-            replace: Whether or not to replace an existing view - defaults to True.
-            materialized: Whether or not the view should be materialized - defaults to False.
-            materialized_properties: Optional materialized view properties to add to the view.
-            table_description: Optional table description from MODEL DDL.
-            column_descriptions: Optional column descriptions from model query.
-            create_kwargs: Additional kwargs to pass into the Create expression
-            view_properties: Optional view properties to add to the view.
-        """
-        pyspark_df = self.try_get_pyspark_df(query_or_df)
-        if pyspark_df:
-            query_or_df = pyspark_df.toPandas()
-        super().create_view(
-            view_name,
-            query_or_df,
-            columns_to_types,
-            replace,
-            materialized,
-            materialized_properties,
-            table_description,
-            column_descriptions,
-            view_properties=view_properties,
-            **create_kwargs,
-        )
+    ) -> t.Union[Query, pd.DataFrame]:
+        if pyspark_df := self.try_get_pyspark_df(query_or_df):
+            return pyspark_df.toPandas()
+
+        return super()._native_df_to_pandas_df(query_or_df)
 
     def _create_table(
         self,
 
@@ -27,13 +27,20 @@
     pd.Float32Dtype(): exp.DataType.build("float"),
     pd.Float64Dtype(): exp.DataType.build("double"),
     pd.StringDtype(): exp.DataType.build("text"),  # type: ignore
+    pd.StringDtype("pyarrow"): exp.DataType.build("text"),
     pd.BooleanDtype(): exp.DataType.build("boolean"),
 }
 
 
 def columns_to_types_from_df(df: pd.DataFrame) -> t.Dict[str, exp.DataType]:
+    return columns_to_types_from_dtypes(df.dtypes.items())
+
+
+def columns_to_types_from_dtypes(
+    dtypes: t.Iterable[t.Tuple[t.Hashable, t.Any]],
+) -> t.Dict[str, exp.DataType]:
     result = {}
-    for column_name, column_type in df.dtypes.items():
+    for column_name, column_type in dtypes:
         exp_type: t.Optional[exp.DataType] = None
         if hasattr(pd, "DatetimeTZDtype") and isinstance(column_type, pd.DatetimeTZDtype):
             exp_type = exp.DataType.build("timestamptz")