From a56bbe837e440a3370ee76d5d7b8e9cf7dc17bd4 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Sat, 30 May 2026 22:26:17 -0700 Subject: [PATCH 1/4] fix: fall back to catalog search when search_business_context returns empty (closes #61) When a dataset exists in DataHub but has no docs, glossary terms, domains, or data products, all four business-context sub-searches return empty and the LLM was incorrectly telling the user the entity does not exist. _search_business_context_impl now detects the all-empty case and automatically runs a general catalog search, returning the results as `catalog_search` so the agent can confirm entity existence before drawing conclusions. Co-Authored-By: Claude Sonnet 4.6 --- .../analytics_agent/skills/datahub_skills.py | 33 ++++ tests/unit/test_search_business_context.py | 141 ++++++++++++++++++ 2 files changed, 174 insertions(+) create mode 100644 tests/unit/test_search_business_context.py diff --git a/backend/src/analytics_agent/skills/datahub_skills.py b/backend/src/analytics_agent/skills/datahub_skills.py index a98ee58..7f0a7af 100644 --- a/backend/src/analytics_agent/skills/datahub_skills.py +++ b/backend/src/analytics_agent/skills/datahub_skills.py @@ -356,6 +356,25 @@ def _save_correction_impl( # --------------------------------------------------------------------------- +def _is_empty_search_result(result: object) -> bool: + """Return True when a search/search_documents result contains no hits.""" + if result is None: + return True + if isinstance(result, dict): + if "error" in result: + return True + # datahub_agent_context search returns {"total": N, "entities": [...]} + # or {"results": [...]} depending on the tool + total = result.get("total", None) + if total is not None: + return int(total) == 0 + entities = result.get("entities") or result.get("results") or [] + return len(entities) == 0 + if isinstance(result, list): + return len(result) == 0 + return False + + def _search_business_context_impl(topic: str) -> dict: """Fan out to DataHub docs, glossary terms, domains, and data products for a topic.""" from analytics_agent.context.datahub import get_datahub_client @@ -394,6 +413,20 @@ def _search_business_context_impl(topic: str) -> dict: except Exception as e: results[label] = {"error": str(e)} + # When no business documentation exists, fall back to a general catalog search so + # the agent can confirm the entity is present before telling the user it's missing. + if all(_is_empty_search_result(v) for v in results.values()): + try: + results["catalog_search"] = search(query=topic, num_results=10) + results["note"] = ( + "No governed documentation, glossary terms, domains, or data products " + "were found for this topic. Catalog search results are included above — " + "the entity may still exist in DataHub without governance metadata. " + "Use get_entities on any matching URN to confirm existence and fetch schema." + ) + except Exception as e: + results["catalog_search"] = {"error": str(e)} + return results diff --git a/tests/unit/test_search_business_context.py b/tests/unit/test_search_business_context.py new file mode 100644 index 0000000..ca6510d --- /dev/null +++ b/tests/unit/test_search_business_context.py @@ -0,0 +1,141 @@ +""" +Unit tests for _search_business_context_impl fallback behaviour (issue #61). + +When a dataset exists in DataHub but has no docs / glossary / domain / data-product +entries, the four business-context sub-searches all return empty. The impl must +automatically fall back to a general catalog search and surface the result so the +agent doesn't incorrectly tell the user the entity "doesn't exist". +""" + +from __future__ import annotations + +from contextlib import ExitStack +from unittest.mock import MagicMock, patch + +import pytest + +from analytics_agent.skills.datahub_skills import ( + _is_empty_search_result, + _search_business_context_impl, +) + +# --------------------------------------------------------------------------- +# _is_empty_search_result +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "result, expected", + [ + (None, True), + ([], True), + ({}, True), # no recognised keys → no entities found + ({"total": 0, "entities": []}, True), + ({"total": 1, "entities": [{"urn": "urn:li:dataset:(x,y,PROD)"}]}, False), + ({"results": []}, True), + ({"results": [{"urn": "x"}]}, False), + ({"error": "something went wrong"}, True), + ], +) +def test_is_empty_search_result(result, expected): + assert _is_empty_search_result(result) == expected + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +_EMPTY = {"total": 0, "entities": []} +_HIT = { + "total": 1, + "entities": [ + {"urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"} + ], +} + + +def _mock_datahub_context(): + """Patch DataHubContext so it works as a no-op context manager.""" + ctx_cls = MagicMock() + ctx_cls.return_value.__enter__ = MagicMock(return_value=None) + ctx_cls.return_value.__exit__ = MagicMock(return_value=False) + return patch("datahub_agent_context.context.DataHubContext", ctx_cls) + + +# --------------------------------------------------------------------------- +# _search_business_context_impl — fallback to catalog search +# --------------------------------------------------------------------------- + + +def test_fallback_triggered_when_all_empty(): + """Catalog search is included when all business-context sub-searches are empty.""" + + def _search_side_effect(**kwargs): + # Filtered calls (glossaryTerm, domain, dataProduct) → empty; + # un-filtered fallback call → hit + if kwargs.get("filter"): + return _EMPTY + return _HIT + + mock_client = MagicMock() + with ExitStack() as stack: + stack.enter_context( + patch("analytics_agent.context.datahub.get_datahub_client", return_value=mock_client) + ) + stack.enter_context(_mock_datahub_context()) + stack.enter_context( + patch( + "datahub_agent_context.mcp_tools.documents.search_documents", + return_value=_EMPTY, + ) + ) + stack.enter_context( + patch( + "datahub_agent_context.mcp_tools.search.search", + side_effect=_search_side_effect, + ) + ) + result = _search_business_context_impl("SampleHiveDataset") + + assert "catalog_search" in result, "Fallback catalog_search key must be present" + assert result["catalog_search"] == _HIT + assert "note" in result, "A note explaining the fallback must be present" + + +def test_no_fallback_when_business_context_found(): + """Catalog fallback is NOT added when at least one business-context search has results.""" + + def _search_side_effect(**kwargs): + if "glossaryTerm" in kwargs.get("filter", ""): + return _HIT # glossary found something + return _EMPTY + + mock_client = MagicMock() + with ExitStack() as stack: + stack.enter_context( + patch("analytics_agent.context.datahub.get_datahub_client", return_value=mock_client) + ) + stack.enter_context(_mock_datahub_context()) + stack.enter_context( + patch( + "datahub_agent_context.mcp_tools.documents.search_documents", + return_value=_EMPTY, + ) + ) + stack.enter_context( + patch( + "datahub_agent_context.mcp_tools.search.search", + side_effect=_search_side_effect, + ) + ) + result = _search_business_context_impl("SomeMetric") + + assert "catalog_search" not in result + assert "note" not in result + + +def test_returns_error_when_no_client(): + """Returns error dict immediately when DataHub is not configured.""" + with patch("analytics_agent.context.datahub.get_datahub_client", return_value=None): + result = _search_business_context_impl("anything") + assert result == {"error": "DataHub is not configured."} From e35ca849727c177b69f348913d94c6d1d8479304 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Sat, 30 May 2026 23:28:35 -0700 Subject: [PATCH 2/4] fix: prevent catalog_search fallback from inflating context quality score The assessor LLM could see catalog_search hits in a search_business_context result and score context as Fair when all governance searches (docs, glossary, domains, data products) were actually empty. Add an explicit rule to the assessment prompt clarifying that catalog_search is a last-resort existence check only and must not raise the score. Co-Authored-By: Claude Sonnet 4.6 --- backend/src/analytics_agent/agent/analysis.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/backend/src/analytics_agent/agent/analysis.py b/backend/src/analytics_agent/agent/analysis.py index 0a52b56..9abb995 100644 --- a/backend/src/analytics_agent/agent/analysis.py +++ b/backend/src/analytics_agent/agent/analysis.py @@ -34,11 +34,18 @@ 1 Very Poor — No useful context; agent expressed significant uncertainty, made \ conflicting assumptions, or produced an answer that contradicts available definitions. +**Important:** A `search_business_context` result that contains a `catalog_search` key \ +means ALL governance searches (documentation, glossary, domains, data products) returned \ +empty — the catalog search is a last-resort fallback to confirm entity existence only. \ +Treat this the same as empty results: it does NOT raise the score. Only documentation, \ +glossary definitions, domain membership, or data-product entries count as useful context. + Key signals that push the score DOWN: - Agent says "the definition doesn't cover this" or "I'll interpret this as…" - Agent switches columns, tables, or date anchors not mentioned in the definition - Agent produces a result that varies based on an undocumented assumption - Agent asks the user to clarify something the glossary/docs should have defined +- `search_business_context` result contains `catalog_search` (all governance searches empty) --- CONTEXT TOOL CALLS AND RESULTS --- {context_calls} From ac18906bacec78820f57cc4d730b8fee6b2859b4 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Sat, 30 May 2026 23:31:25 -0700 Subject: [PATCH 3/4] fix: cap context quality at Fair when only catalog fallback was found MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A rich dataset description from catalog_search is genuinely useful, so treating it as "empty" was wrong. The correct rule: no governed definition (glossary, docs, domain, data product) → score cannot exceed 3 (Fair), but within 1-3 the assessor should still use get_entities results to judge how informative the context was. Co-Authored-By: Claude Sonnet 4.6 --- backend/src/analytics_agent/agent/analysis.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/backend/src/analytics_agent/agent/analysis.py b/backend/src/analytics_agent/agent/analysis.py index 9abb995..572e4f0 100644 --- a/backend/src/analytics_agent/agent/analysis.py +++ b/backend/src/analytics_agent/agent/analysis.py @@ -36,16 +36,18 @@ **Important:** A `search_business_context` result that contains a `catalog_search` key \ means ALL governance searches (documentation, glossary, domains, data products) returned \ -empty — the catalog search is a last-resort fallback to confirm entity existence only. \ -Treat this the same as empty results: it does NOT raise the score. Only documentation, \ -glossary definitions, domain membership, or data-product entries count as useful context. +empty. No authoritative business definition exists. This caps the score at 3 (Fair) \ +regardless of what the catalog search found — scores of 4 or 5 require a governed \ +definition (glossary term, domain doc, or data-product entry). Within that 1–3 range, \ +use the dataset description from subsequent `get_entities` calls to judge how useful \ +the context actually was. Key signals that push the score DOWN: - Agent says "the definition doesn't cover this" or "I'll interpret this as…" - Agent switches columns, tables, or date anchors not mentioned in the definition - Agent produces a result that varies based on an undocumented assumption - Agent asks the user to clarify something the glossary/docs should have defined -- `search_business_context` result contains `catalog_search` (all governance searches empty) +- `search_business_context` result contains `catalog_search` (no governed definition → max score 3) --- CONTEXT TOOL CALLS AND RESULTS --- {context_calls} From 2a1455f0c8f125570c30dd9958c495038199af91 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Sat, 30 May 2026 23:53:28 -0700 Subject: [PATCH 4/4] chore: fix import ordering in test_search_business_context Co-Authored-By: Claude Sonnet 4.6 --- tests/unit/test_search_business_context.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/unit/test_search_business_context.py b/tests/unit/test_search_business_context.py index ca6510d..1350d29 100644 --- a/tests/unit/test_search_business_context.py +++ b/tests/unit/test_search_business_context.py @@ -13,7 +13,6 @@ from unittest.mock import MagicMock, patch import pytest - from analytics_agent.skills.datahub_skills import ( _is_empty_search_result, _search_business_context_impl, @@ -48,9 +47,7 @@ def test_is_empty_search_result(result, expected): _EMPTY = {"total": 0, "entities": []} _HIT = { "total": 1, - "entities": [ - {"urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"} - ], + "entities": [{"urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"}], }