From 86d53f3cb4b8f4bb77cbd1f7045219564e53b4b6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Apr 2026 22:27:36 +0000 Subject: [PATCH 1/3] Initial plan From d139d82d5bbe4df2e36fe1089d36f6881226ea67 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Apr 2026 22:38:01 +0000 Subject: [PATCH 2/3] feat: surface LLM partial failures as user-visible warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add `llm_warnings` to WorkflowState to accumulate failure messages - Update summary_generation_node, impact_analysis_node, and privacy_compliance_node to append a human-readable warning to `llm_warnings` whenever their LLM call fails - Include `partial_failures` list in scan result API response so consumers can display which sections are unavailable - Show a yellow warning banner in SummaryPanel when `partial_failures` is non-empty (e.g. "Summary unavailable — LLM service temporarily failed") - Add CSS for the warning banner (dark + light theme) - Add 9 unit tests covering warning accumulation across all three nodes" Agent-Logs-Url: https://github.com/Stanzin7/ExtensionShield/sessions/8a1fb240-97cb-498e-aced-9c548c22f375 Co-authored-by: Stanzin7 <109467334+Stanzin7@users.noreply.github.com> --- .../src/components/report/SummaryPanel.jsx | 34 +++ .../src/components/report/SummaryPanel.scss | 41 ++++ src/extension_shield/api/main.py | 2 + src/extension_shield/workflow/nodes.py | 16 +- src/extension_shield/workflow/state.py | 5 + tests/workflow/test_nodes_llm_warnings.py | 199 ++++++++++++++++++ 6 files changed, 296 insertions(+), 1 deletion(-) create mode 100644 tests/workflow/test_nodes_llm_warnings.py diff --git a/frontend/src/components/report/SummaryPanel.jsx b/frontend/src/components/report/SummaryPanel.jsx index 3fda958b..5f2ba911 100644 --- a/frontend/src/components/report/SummaryPanel.jsx +++ b/frontend/src/components/report/SummaryPanel.jsx @@ -29,6 +29,11 @@ const SummaryPanel = ({ // Fallback: highlights (keyPoints) and SAST/engine keyFindings for concerns const { oneLiner, keyPoints } = normalizeHighlights(rawScanResult); + // Partial-failure warnings surfaced from backend LLM nodes + const partialFailures = Array.isArray(rawScanResult?.partial_failures) + ? rawScanResult.partial_failures + : []; + // SAST/engine keyFindings – use for Quick Summary concerns when they add value const engineConcerns = (keyFindings || []) .filter(f => f.severity === 'high' || f.severity === 'medium') @@ -59,6 +64,17 @@ const SummaryPanel = ({ ); }; + const partialFailureBanner = partialFailures.length > 0 ? ( +
+ {partialFailures.map((msg, idx) => ( +
+ ⚠️ + {msg} +
+ ))} +
+ ) : null; + if (showPlaceholder) { return (
@@ -69,6 +85,7 @@ const SummaryPanel = ({ {getDecisionBadge()} + {partialFailureBanner}

Review this extension before installing.

@@ -94,6 +111,20 @@ const SummaryPanel = ({ } if (!hasAnySummary) { + if (partialFailures.length > 0) { + return ( +
+
+

+ + Quick Summary +

+ {getDecisionBadge()} +
+ {partialFailureBanner} +
+ ); + } return null; } @@ -113,6 +144,7 @@ const SummaryPanel = ({ {getDecisionBadge()}
+ {partialFailureBanner}
{/* Headline – short takeaway */} @@ -189,6 +221,7 @@ const SummaryPanel = ({ {getDecisionBadge()}
+ {partialFailureBanner}
{/* Verdict - the headline */} @@ -272,6 +305,7 @@ const SummaryPanel = ({ {getDecisionBadge()}
+ {partialFailureBanner}
{/* One-liner summary */} diff --git a/frontend/src/components/report/SummaryPanel.scss b/frontend/src/components/report/SummaryPanel.scss index cba5dd1b..d78028e2 100644 --- a/frontend/src/components/report/SummaryPanel.scss +++ b/frontend/src/components/report/SummaryPanel.scss @@ -423,6 +423,47 @@ } } +// Partial-failure warning banner (LLM analyses that could not complete) +.summary-panel .summary-partial-failures { + display: flex; + flex-direction: column; + gap: 6px; + margin: 0 0 14px 0; + padding: 12px 16px; + background: rgba(234, 179, 8, 0.08); + border: 1px solid rgba(234, 179, 8, 0.35); + border-radius: 10px; + + .summary-partial-failure-item { + display: flex; + align-items: flex-start; + gap: 8px; + } + + .summary-partial-failure-icon { + flex-shrink: 0; + font-size: 14px; + line-height: 1.5; + } + + .summary-partial-failure-text { + font-size: var(--report-text-sm, 0.875rem); + line-height: 1.5; + color: rgba(253, 224, 71, 0.9); + font-weight: 500; + } +} + +// Light theme overrides for partial-failure banner +.light .summary-panel .summary-partial-failures { + background: rgba(234, 179, 8, 0.06); + border-color: rgba(161, 120, 0, 0.35); + + .summary-partial-failure-text { + color: #92600a; + } +} + // Light theme: decision-badge text WCAG-compliant, matches risk colors on light bg .light .summary-panel .decision-badge { &.decision-badge--allow { diff --git a/src/extension_shield/api/main.py b/src/extension_shield/api/main.py index cbcd50c6..66c79e8c 100644 --- a/src/extension_shield/api/main.py +++ b/src/extension_shield/api/main.py @@ -1562,6 +1562,8 @@ async def run_analysis_workflow(url: str, extension_id: str): "publisher_disclosures": build_publisher_disclosures( metadata, final_state.get("governance_bundle") ), + # Partial-failure warnings from LLM nodes (empty list = all LLM analyses succeeded) + "partial_failures": final_state.get("llm_warnings") or [], } # Final sanitization pass to ensure JSON-serializability diff --git a/src/extension_shield/workflow/nodes.py b/src/extension_shield/workflow/nodes.py index 7f928159..4a15bd15 100644 --- a/src/extension_shield/workflow/nodes.py +++ b/src/extension_shield/workflow/nodes.py @@ -432,9 +432,13 @@ def summary_generation_node(state: WorkflowState) -> Command: logger.warning("Summary generation failed, using fallback: %s", exc) executive_summary = None + warnings = list(state.get("llm_warnings") or []) + if executive_summary is None: + warnings.append("Summary unavailable — LLM service temporarily failed") + return Command( goto=IMPACT_ANALYSIS_NODE, - update={"executive_summary": executive_summary}, + update={"executive_summary": executive_summary, "llm_warnings": warnings}, ) @@ -481,11 +485,16 @@ def impact_analysis_node(state: WorkflowState) -> Command: updated_results = dict(analysis_results) updated_results["impact_analysis"] = impact_analysis + warnings = list(state.get("llm_warnings") or []) + if impact_analysis is None: + warnings.append("Impact analysis unavailable — LLM service temporarily failed") + return Command( goto=PRIVACY_COMPLIANCE_NODE, update={ "analysis_results": updated_results, "impact_analysis": impact_analysis, + "llm_warnings": warnings, }, ) @@ -528,11 +537,16 @@ def privacy_compliance_node(state: WorkflowState) -> Command: updated_results = dict(analysis_results) updated_results["privacy_compliance"] = privacy_compliance + warnings = list(state.get("llm_warnings") or []) + if privacy_compliance is None: + warnings.append("Privacy compliance unavailable — LLM service temporarily failed") + return Command( goto=GOVERNANCE_NODE, update={ "analysis_results": updated_results, "privacy_compliance": privacy_compliance, + "llm_warnings": warnings, }, ) diff --git a/src/extension_shield/workflow/state.py b/src/extension_shield/workflow/state.py index 01eef7f4..2549e07b 100644 --- a/src/extension_shield/workflow/state.py +++ b/src/extension_shield/workflow/state.py @@ -45,6 +45,9 @@ class WorkflowState(TypedDict): start_time (Optional[str]): ISO 8601 formatted start time of the workflow, if available. end_time (Optional[str]): ISO 8601 formatted end time of the workflow, if available. + llm_warnings (Optional[list]): Warning messages collected when LLM nodes fail + partially (e.g. summary or impact analysis unavailable). Each entry is a + human-readable string suitable for surfacing to the user. error (Optional[str]): Error message if the workflow has failed, otherwise None. """ @@ -64,6 +67,8 @@ class WorkflowState(TypedDict): governance_verdict: Optional[str] governance_report: Optional[Dict] governance_error: Optional[str] + # Partial-failure warnings accumulated by LLM nodes + llm_warnings: Optional[list] # Status fields status: WorkflowStatus start_time: Optional[str] diff --git a/tests/workflow/test_nodes_llm_warnings.py b/tests/workflow/test_nodes_llm_warnings.py new file mode 100644 index 00000000..c6005e6f --- /dev/null +++ b/tests/workflow/test_nodes_llm_warnings.py @@ -0,0 +1,199 @@ +""" +Tests for LLM-failure warning accumulation in workflow nodes. + +Verifies that summary_generation_node, impact_analysis_node, and +privacy_compliance_node each append a human-readable warning to the +`llm_warnings` state key when the underlying LLM call raises an exception, +so that partial failures are surfaced to the user via the API response. +""" + +import pytest +from unittest.mock import patch, MagicMock + +from extension_shield.workflow.nodes import ( + summary_generation_node, + impact_analysis_node, + privacy_compliance_node, +) + + +# --------------------------------------------------------------------------- +# Shared fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture +def base_state(): + """Minimal workflow state shared across LLM node tests.""" + return { + "workflow_id": "test-scan-001", + "extension_id": "abcdefghijklmnopabcdefghijklmnop", + "manifest_data": { + "name": "Test Extension", + "version": "1.0.0", + "manifest_version": 3, + "permissions": ["storage"], + }, + "analysis_results": { + "permissions_analysis": {"permissions": ["storage"]}, + }, + "extension_metadata": {"title": "Test Extension"}, + "extension_dir": "/tmp/test_ext", + "llm_warnings": None, + } + + +# --------------------------------------------------------------------------- +# summary_generation_node +# --------------------------------------------------------------------------- + +class TestSummaryGenerationNodeWarnings: + """LLM failure warnings for summary_generation_node.""" + + def test_no_warning_on_success(self, base_state): + """When summary generation succeeds, no warning is added.""" + mock_summary = {"one_liner": "Safe extension", "summary": "All good."} + + with patch( + "extension_shield.workflow.nodes.SummaryGenerator" + ) as MockGen: + MockGen.return_value.generate.return_value = mock_summary + cmd = summary_generation_node(base_state) + + assert cmd.update.get("executive_summary") == mock_summary + assert cmd.update.get("llm_warnings") == [] + + def test_warning_added_on_generic_exception(self, base_state): + """When summary generation raises a generic exception, a warning is appended.""" + with patch( + "extension_shield.workflow.nodes.SummaryGenerator" + ) as MockGen: + MockGen.return_value.generate.side_effect = RuntimeError("timeout") + cmd = summary_generation_node(base_state) + + assert cmd.update.get("executive_summary") is None + warnings = cmd.update.get("llm_warnings", []) + assert len(warnings) == 1 + assert "Summary unavailable" in warnings[0] + assert "LLM service" in warnings[0] + + def test_warning_added_on_llm_fallback_error(self, base_state): + """When LLMFallbackError is raised, the same warning is appended.""" + from extension_shield.llm.clients.fallback import LLMFallbackError + + with patch( + "extension_shield.workflow.nodes.SummaryGenerator" + ) as MockGen: + MockGen.return_value.generate.side_effect = LLMFallbackError( + {"groq": "timeout", "openai": "rate limit"} + ) + cmd = summary_generation_node(base_state) + + assert cmd.update.get("executive_summary") is None + warnings = cmd.update.get("llm_warnings", []) + assert any("Summary unavailable" in w for w in warnings) + + def test_warning_accumulates_with_existing_warnings(self, base_state): + """Warnings from prior nodes are preserved when summary also fails.""" + base_state["llm_warnings"] = ["Impact analysis unavailable — LLM service temporarily failed"] + + with patch( + "extension_shield.workflow.nodes.SummaryGenerator" + ) as MockGen: + MockGen.return_value.generate.side_effect = RuntimeError("oops") + cmd = summary_generation_node(base_state) + + warnings = cmd.update.get("llm_warnings", []) + assert len(warnings) == 2 + assert any("Impact analysis" in w for w in warnings) + assert any("Summary unavailable" in w for w in warnings) + + +# --------------------------------------------------------------------------- +# impact_analysis_node +# --------------------------------------------------------------------------- + +class TestImpactAnalysisNodeWarnings: + """LLM failure warnings for impact_analysis_node.""" + + def test_no_warning_on_success(self, base_state): + """When impact analysis succeeds, no warning is added.""" + mock_impact = {"buckets": []} + + with patch( + "extension_shield.workflow.nodes.ImpactAnalyzer" + ) as MockAnalyzer: + MockAnalyzer.return_value.generate.return_value = mock_impact + cmd = impact_analysis_node(base_state) + + assert cmd.update.get("impact_analysis") == mock_impact + assert cmd.update.get("llm_warnings") == [] + + def test_warning_added_on_exception(self, base_state): + """When impact analysis raises an exception, a warning is appended.""" + with patch( + "extension_shield.workflow.nodes.ImpactAnalyzer" + ) as MockAnalyzer: + MockAnalyzer.return_value.generate.side_effect = RuntimeError("error") + cmd = impact_analysis_node(base_state) + + assert cmd.update.get("impact_analysis") is None + warnings = cmd.update.get("llm_warnings", []) + assert len(warnings) == 1 + assert "Impact analysis unavailable" in warnings[0] + assert "LLM service" in warnings[0] + + +# --------------------------------------------------------------------------- +# privacy_compliance_node +# --------------------------------------------------------------------------- + +class TestPrivacyComplianceNodeWarnings: + """LLM failure warnings for privacy_compliance_node.""" + + def test_no_warning_on_success(self, base_state): + """When privacy compliance analysis succeeds, no warning is added.""" + mock_privacy = {"data_collection": "minimal"} + + with patch( + "extension_shield.workflow.nodes.PrivacyComplianceAnalyzer" + ) as MockAnalyzer: + MockAnalyzer.return_value.generate.return_value = mock_privacy + cmd = privacy_compliance_node(base_state) + + assert cmd.update.get("privacy_compliance") == mock_privacy + assert cmd.update.get("llm_warnings") == [] + + def test_warning_added_on_exception(self, base_state): + """When privacy compliance analysis raises an exception, a warning is appended.""" + with patch( + "extension_shield.workflow.nodes.PrivacyComplianceAnalyzer" + ) as MockAnalyzer: + MockAnalyzer.return_value.generate.side_effect = RuntimeError("error") + cmd = privacy_compliance_node(base_state) + + assert cmd.update.get("privacy_compliance") is None + warnings = cmd.update.get("llm_warnings", []) + assert len(warnings) == 1 + assert "Privacy compliance unavailable" in warnings[0] + assert "LLM service" in warnings[0] + + def test_warnings_accumulate_across_all_three_nodes(self, base_state): + """All three LLM failures accumulate distinct warning messages.""" + # Simulate all three failing in sequence by pre-loading warnings + base_state["llm_warnings"] = [ + "Summary unavailable — LLM service temporarily failed", + "Impact analysis unavailable — LLM service temporarily failed", + ] + + with patch( + "extension_shield.workflow.nodes.PrivacyComplianceAnalyzer" + ) as MockAnalyzer: + MockAnalyzer.return_value.generate.side_effect = RuntimeError("error") + cmd = privacy_compliance_node(base_state) + + warnings = cmd.update.get("llm_warnings", []) + assert len(warnings) == 3 + messages = " ".join(warnings) + assert "Summary" in messages + assert "Impact analysis" in messages + assert "Privacy compliance" in messages From c63ead8eebb7064ab132fc4c32f079f3a7aa99ba Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Apr 2026 22:40:06 +0000 Subject: [PATCH 3/3] refactor: address code review feedback on LLM warnings - Initialize llm_warnings fixture as [] instead of None in tests - Explicitly verify pre-existing warning is preserved unchanged in accumulation test - Improve aria-label on warning banner for better screen reader support Agent-Logs-Url: https://github.com/Stanzin7/ExtensionShield/sessions/8a1fb240-97cb-498e-aced-9c548c22f375 Co-authored-by: Stanzin7 <109467334+Stanzin7@users.noreply.github.com> --- frontend/src/components/report/SummaryPanel.jsx | 2 +- tests/workflow/test_nodes_llm_warnings.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/frontend/src/components/report/SummaryPanel.jsx b/frontend/src/components/report/SummaryPanel.jsx index 5f2ba911..b3711c62 100644 --- a/frontend/src/components/report/SummaryPanel.jsx +++ b/frontend/src/components/report/SummaryPanel.jsx @@ -65,7 +65,7 @@ const SummaryPanel = ({ }; const partialFailureBanner = partialFailures.length > 0 ? ( -
+
{partialFailures.map((msg, idx) => (
⚠️ diff --git a/tests/workflow/test_nodes_llm_warnings.py b/tests/workflow/test_nodes_llm_warnings.py index c6005e6f..bb903248 100644 --- a/tests/workflow/test_nodes_llm_warnings.py +++ b/tests/workflow/test_nodes_llm_warnings.py @@ -38,7 +38,7 @@ def base_state(): }, "extension_metadata": {"title": "Test Extension"}, "extension_dir": "/tmp/test_ext", - "llm_warnings": None, + "llm_warnings": [], } @@ -94,7 +94,8 @@ def test_warning_added_on_llm_fallback_error(self, base_state): def test_warning_accumulates_with_existing_warnings(self, base_state): """Warnings from prior nodes are preserved when summary also fails.""" - base_state["llm_warnings"] = ["Impact analysis unavailable — LLM service temporarily failed"] + prior_warning = "Impact analysis unavailable — LLM service temporarily failed" + base_state["llm_warnings"] = [prior_warning] with patch( "extension_shield.workflow.nodes.SummaryGenerator" @@ -104,7 +105,7 @@ def test_warning_accumulates_with_existing_warnings(self, base_state): warnings = cmd.update.get("llm_warnings", []) assert len(warnings) == 2 - assert any("Impact analysis" in w for w in warnings) + assert warnings[0] == prior_warning, "Pre-existing warning must be preserved unchanged" assert any("Summary unavailable" in w for w in warnings)