Feat: show only relevant columns (as DataFrames) when unit tests fail (#1741)

georgesittas · web-flow · commit 9a5a34dbf5dd · 2023-11-21T17:14:13.000Z
* Feat: show only relevant columns (as DataFrames) when unit tests fail

* Fix bug where astype would fail due to None being passed to non-nullable types

* Formatting

* Add flag to avoid truncating dataframe

* PR feedback

* Fix test

* Update docs
diff --git a/docs/concepts/tests.md b/docs/concepts/tests.md
@@ -66,7 +66,9 @@ In this example, we'll use the `sqlmesh_example.full_model` model, which is prov
 MODEL (
   name sqlmesh_example.full_model,
   kind FULL,
-  cron '@daily'
+  cron '@daily',
+  grain item_id,
+  audits [assert_positive_order_ids],
 );
 
 SELECT
@@ -75,14 +77,15 @@ SELECT
 FROM
     sqlmesh_example.incremental_model
 GROUP BY item_id
+ORDER BY item_id
 ```
 
 Notice how the query of the model definition above references one upstream model: `sqlmesh_example.incremental_model`.
 
 The test definition for this model may look like the following:
 
 ```yaml linenums="1"
-test_full_model:
+test_example_full_model:
   model: sqlmesh_example.full_model
   inputs:
     sqlmesh_example.incremental_model:
@@ -110,7 +113,7 @@ Note that `ds` is redundant in the above test, since it is not referenced in `fu
 Let's also assume that we are only interested in testing the `num_orders` output column, i.e. we only care about the `id` input column of `sqlmesh_example.incremental_model`. Then, we could rewrite the above test more compactly as follows:
 
 ```yaml linenums="1"
-test_full_model:
+test_example_full_model:
   model: sqlmesh_example.full_model
   inputs:
     sqlmesh_example.incremental_model:
@@ -146,12 +149,13 @@ SELECT
 FROM
     filtered_orders_cte
 GROUP BY item_id
+ORDER BY item_id
 ```
 
 Below is the example of a test that verifies individual rows returned by the `filtered_orders_cte` CTE before aggregation takes place:
 
 ```yaml linenums="1" hl_lines="16-22"
-test_full_model:
+test_example_full_model:
   model: sqlmesh_example.full_model
   inputs:
     sqlmesh_example.incremental_model:
@@ -203,27 +207,28 @@ The command returns a non-zero exit code if there are any failures, and reports
 $ sqlmesh test
 F
 ======================================================================
-FAIL: test_full_model (/Users/izeigerman/github/tmp/tests/test_suite.yaml:1)
+FAIL: test_example_full_model (test/tests/test_full_model.yaml)
 ----------------------------------------------------------------------
-AssertionError: Data differs
-- {'item_id': 1, 'num_orders': 3}
-?                              ^
-
-+ {'item_id': 1, 'num_orders': 2}
-?                              ^
+AssertionError: Data differs (exp: expected, act: actual)
 
+  num_orders
+         exp  act
+0        3.0  2.0
 
 ----------------------------------------------------------------------
-Ran 1 test in 0.008s
+Ran 1 test in 0.012s
 
 FAILED (failures=1)
 ```
 
+Note: when there are many differing columns, the corresponding DataFrame will be truncated by default, but it can be fully rendered using the `-v` option (verbose) of the `sqlmesh test` command.
+
 ### Testing for specific models
+
 To run a specific model test, pass in the suite file name followed by `::` and the name of the test:
 
 ```
-sqlmesh test tests/test_suite.yaml::test_full_model
+sqlmesh test tests/test_full_model.yaml::test_example_full_model
 ```
 
 You can also run tests that match a pattern or substring using a glob pathname expansion syntax:
diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py
@@ -1047,7 +1047,11 @@ def test(
         stream: t.Optional[t.TextIO] = None,
     ) -> unittest.result.TestResult:
         """Discover and run model tests"""
-        verbosity = 2 if verbose else 1
+        if verbose:
+            pd.set_option("display.max_columns", None)
+            verbosity = 2
+        else:
+            verbosity = 1
 
         try:
             if tests:
diff --git a/sqlmesh/core/test/definition.py b/sqlmesh/core/test/definition.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import difflib
 import pathlib
 import typing as t
 import unittest
@@ -95,10 +94,12 @@ def assert_equal(self, expected: pd.DataFrame, actual: pd.DataFrame) -> None:
         """Compare two DataFrames"""
         self._add_missing_columns(expected, actual)
 
-        # Two astypes are necessary, pandas converts strings to times as NS, but if the actual
-        # is US, it doesn't take affect until the 2nd try!
+        # Two astypes are necessary, pandas converts strings to times as NS,
+        # but if the actual is US, it doesn't take effect until the 2nd try!
         actual_types = actual.dtypes.to_dict()
-        expected = expected.astype(actual_types).astype(actual_types)
+        expected = expected.astype(actual_types, errors="ignore").astype(
+            actual_types, errors="ignore"
+        )
 
         expected = expected.replace({np.nan: None, "nan": None})
         actual = actual.replace({np.nan: None, "nan": None})
@@ -111,13 +112,8 @@ def assert_equal(self, expected: pd.DataFrame, actual: pd.DataFrame) -> None:
                 check_datetimelike_compat=True,
             )
         except AssertionError as e:
-            diff = "\n".join(
-                difflib.ndiff(
-                    [str(x) for x in expected.to_dict("records")],
-                    [str(x) for x in actual.to_dict("records")],
-                )
-            )
-            e.args = (f"Data differs\n{diff}",)
+            diff = expected.compare(actual).rename(columns={"self": "exp", "other": "act"})
+            e.args = (f"Data differs (exp: expected, act: actual)\n\n{diff}",)
             raise e
 
     def runTest(self) -> None:
diff --git a/tests/core/test_test.py b/tests/core/test_test.py
@@ -329,6 +329,30 @@ def test_partial_inputs(sushi_context: Context) -> None:
     assert result and result.wasSuccessful()
 
 
+def test_missing_column_failure(sushi_context: Context, full_model_without_ctes: SqlModel) -> None:
+    model = t.cast(SqlModel, sushi_context.upsert_model(full_model_without_ctes))
+    body = load_yaml(
+        """
+test_foo:
+  model: sushi.foo
+  inputs:
+    raw:
+      - id: 1
+        value: 2
+        ds: 3
+  outputs:
+    query:
+      - id: 1
+        value: null
+            """
+    )
+    result = _create_test(body, "test_foo", model, sushi_context).run()
+    assert result and not result.wasSuccessful()
+
+    expected_msg = "AssertionError: Data differs (exp: expected, act: actual)\n\n  value        ds    \n    exp act   exp act\n0  None   2  None   3\n"
+    assert expected_msg in result.failures[0][1]
+
+
 @pytest.mark.parametrize("full_model_without_ctes", ["snowflake"], indirect=True)
 def test_normalization(full_model_without_ctes: SqlModel) -> None:
     body = load_yaml(
diff --git a/tests/web/test_main.py b/tests/web/test_main.py
@@ -603,13 +603,6 @@ def test_test_failure(project_context: Context) -> None:
         {
             "name": "test_foo",
             "path": "tests/test_foo.yaml",
-            "tb": """AssertionError: Data differs
-- {'ds': 2}
-?        ^
-
-+ {'ds': 1}
-?        ^
-
-""",
+            "tb": "AssertionError: Data differs (exp: expected, act: actual)\n\n   ds    \n  exp act\n0   2   1\n",
         }
     ]

Original file line number	Diff line number	Diff line change
`@@ -603,13 +603,6 @@ def test_test_failure(project_context: Context) -> None:`
`603`	`603`	`{`
`604`	`604`	`"name": "test_foo",`
`605`	`605`	`"path": "tests/test_foo.yaml",`
`606`		`- "tb": """AssertionError: Data differs`
`607`		`-- {'ds': 2}`
`608`		`-? ^`
`609`		`-`
`610`		`-+ {'ds': 1}`
`611`		`-? ^`
`612`		`-`
`613`		`-""",`
	`606`	`+ "tb": "AssertionError: Data differs (exp: expected, act: actual)\n\n ds \n exp act\n0 2 1\n",`
`614`	`607`	`}`
`615`	`608`	`]`