Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions backend/src/analytics_agent/agent/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,20 @@
1 Very Poor — No useful context; agent expressed significant uncertainty, made \
conflicting assumptions, or produced an answer that contradicts available definitions.

**Important:** A `search_business_context` result that contains a `catalog_search` key \
means ALL governance searches (documentation, glossary, domains, data products) returned \
empty. No authoritative business definition exists. This caps the score at 3 (Fair) \
regardless of what the catalog search found — scores of 4 or 5 require a governed \
definition (glossary term, domain doc, or data-product entry). Within that 1–3 range, \
use the dataset description from subsequent `get_entities` calls to judge how useful \
the context actually was.

Key signals that push the score DOWN:
- Agent says "the definition doesn't cover this" or "I'll interpret this as…"
- Agent switches columns, tables, or date anchors not mentioned in the definition
- Agent produces a result that varies based on an undocumented assumption
- Agent asks the user to clarify something the glossary/docs should have defined
- `search_business_context` result contains `catalog_search` (no governed definition → max score 3)

--- CONTEXT TOOL CALLS AND RESULTS ---
{context_calls}
Expand Down
33 changes: 33 additions & 0 deletions backend/src/analytics_agent/skills/datahub_skills.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,25 @@ def _save_correction_impl(
# ---------------------------------------------------------------------------


def _is_empty_search_result(result: object) -> bool:
"""Return True when a search/search_documents result contains no hits."""
if result is None:
return True
if isinstance(result, dict):
if "error" in result:
return True
# datahub_agent_context search returns {"total": N, "entities": [...]}
# or {"results": [...]} depending on the tool
total = result.get("total", None)
if total is not None:
return int(total) == 0
entities = result.get("entities") or result.get("results") or []
return len(entities) == 0
if isinstance(result, list):
return len(result) == 0
return False


def _search_business_context_impl(topic: str) -> dict:
"""Fan out to DataHub docs, glossary terms, domains, and data products for a topic."""
from analytics_agent.context.datahub import get_datahub_client
Expand Down Expand Up @@ -394,6 +413,20 @@ def _search_business_context_impl(topic: str) -> dict:
except Exception as e:
results[label] = {"error": str(e)}

# When no business documentation exists, fall back to a general catalog search so
# the agent can confirm the entity is present before telling the user it's missing.
if all(_is_empty_search_result(v) for v in results.values()):
try:
results["catalog_search"] = search(query=topic, num_results=10)
results["note"] = (
"No governed documentation, glossary terms, domains, or data products "
"were found for this topic. Catalog search results are included above — "
"the entity may still exist in DataHub without governance metadata. "
"Use get_entities on any matching URN to confirm existence and fetch schema."
)
except Exception as e:
results["catalog_search"] = {"error": str(e)}

return results


Expand Down
138 changes: 138 additions & 0 deletions tests/unit/test_search_business_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"""
Unit tests for _search_business_context_impl fallback behaviour (issue #61).

When a dataset exists in DataHub but has no docs / glossary / domain / data-product
entries, the four business-context sub-searches all return empty. The impl must
automatically fall back to a general catalog search and surface the result so the
agent doesn't incorrectly tell the user the entity "doesn't exist".
"""

from __future__ import annotations

from contextlib import ExitStack
from unittest.mock import MagicMock, patch

import pytest
from analytics_agent.skills.datahub_skills import (
_is_empty_search_result,
_search_business_context_impl,
)

# ---------------------------------------------------------------------------
# _is_empty_search_result
# ---------------------------------------------------------------------------


@pytest.mark.parametrize(
"result, expected",
[
(None, True),
([], True),
({}, True), # no recognised keys → no entities found
({"total": 0, "entities": []}, True),
({"total": 1, "entities": [{"urn": "urn:li:dataset:(x,y,PROD)"}]}, False),
({"results": []}, True),
({"results": [{"urn": "x"}]}, False),
({"error": "something went wrong"}, True),
],
)
def test_is_empty_search_result(result, expected):
assert _is_empty_search_result(result) == expected


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

_EMPTY = {"total": 0, "entities": []}
_HIT = {
"total": 1,
"entities": [{"urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"}],
}


def _mock_datahub_context():
"""Patch DataHubContext so it works as a no-op context manager."""
ctx_cls = MagicMock()
ctx_cls.return_value.__enter__ = MagicMock(return_value=None)
ctx_cls.return_value.__exit__ = MagicMock(return_value=False)
return patch("datahub_agent_context.context.DataHubContext", ctx_cls)


# ---------------------------------------------------------------------------
# _search_business_context_impl — fallback to catalog search
# ---------------------------------------------------------------------------


def test_fallback_triggered_when_all_empty():
"""Catalog search is included when all business-context sub-searches are empty."""

def _search_side_effect(**kwargs):
# Filtered calls (glossaryTerm, domain, dataProduct) → empty;
# un-filtered fallback call → hit
if kwargs.get("filter"):
return _EMPTY
return _HIT

mock_client = MagicMock()
with ExitStack() as stack:
stack.enter_context(
patch("analytics_agent.context.datahub.get_datahub_client", return_value=mock_client)
)
stack.enter_context(_mock_datahub_context())
stack.enter_context(
patch(
"datahub_agent_context.mcp_tools.documents.search_documents",
return_value=_EMPTY,
)
)
stack.enter_context(
patch(
"datahub_agent_context.mcp_tools.search.search",
side_effect=_search_side_effect,
)
)
result = _search_business_context_impl("SampleHiveDataset")

assert "catalog_search" in result, "Fallback catalog_search key must be present"
assert result["catalog_search"] == _HIT
assert "note" in result, "A note explaining the fallback must be present"


def test_no_fallback_when_business_context_found():
"""Catalog fallback is NOT added when at least one business-context search has results."""

def _search_side_effect(**kwargs):
if "glossaryTerm" in kwargs.get("filter", ""):
return _HIT # glossary found something
return _EMPTY

mock_client = MagicMock()
with ExitStack() as stack:
stack.enter_context(
patch("analytics_agent.context.datahub.get_datahub_client", return_value=mock_client)
)
stack.enter_context(_mock_datahub_context())
stack.enter_context(
patch(
"datahub_agent_context.mcp_tools.documents.search_documents",
return_value=_EMPTY,
)
)
stack.enter_context(
patch(
"datahub_agent_context.mcp_tools.search.search",
side_effect=_search_side_effect,
)
)
result = _search_business_context_impl("SomeMetric")

assert "catalog_search" not in result
assert "note" not in result


def test_returns_error_when_no_client():
"""Returns error dict immediately when DataHub is not configured."""
with patch("analytics_agent.context.datahub.get_datahub_client", return_value=None):
result = _search_business_context_impl("anything")
assert result == {"error": "DataHub is not configured."}
Loading