From 6063ade34a1be16426874687c9b45c0d0547ae9c Mon Sep 17 00:00:00 2001 From: Srejoye Date: Thu, 28 May 2026 01:06:06 +0530 Subject: [PATCH 1/6] fix(executor): redact finding description, remediation and proof before DB insert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit redact(output) was called on the raw CLI text file (line 302) but the parsed findings written to the findings table were never redacted. finding['description'], finding.get('remediation'), and finding.get('proof') were inserted into the DB as-is, meaning: - Secrets in finding descriptions persisted in the DB indefinitely - /api/v1/findings JSON API returned unredacted content - SARIF exports, CSV exports, and direct DB reads all exposed secrets reporting.py::_normalize_finding() does call redact() but only at PDF/HTML export time — the DB row itself was never clean. redact_dict() already existed in redaction.py for exactly this purpose but was never called on findings before storage. Fix: - Add redact_dict to the import from .redaction in executor.py - Call finding = redact_dict(finding) before the INSERT in both _upsert_findings_and_report and _upsert_findings_and_report_from_scanner Adds testing/backend/unit/test_findings_redaction.py with 6 tests: - redact_dict redacts AWS key in description and remediation - clean findings pass through unchanged - nested metadata dict is walked recursively - None proof does not raise - missing keys handled gracefully - integration test: INSERT path verified to not store raw secrets in DB --- backend/secuscan/executor.py | 4 +- .../backend/unit/test_findings_redaction.py | 161 ++++++++++++++++++ 2 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 testing/backend/unit/test_findings_redaction.py diff --git a/backend/secuscan/executor.py b/backend/secuscan/executor.py index 44bda2ae..519b4a4d 100644 --- a/backend/secuscan/executor.py +++ b/backend/secuscan/executor.py @@ -13,7 +13,7 @@ import logging import re -from .redaction import redact +from .redaction import redact, redact_dict from .cache import get_cache from .config import settings from .database import get_db @@ -676,6 +676,7 @@ async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id: # Insert findings for finding in findings_data: + finding = redact_dict(finding) u_id = str(uuid.uuid4()).replace("-", "") finding_id = f"finding:{task_id}:{u_id[:8]}" @@ -761,6 +762,7 @@ async def _upsert_findings_and_report_from_scanner(self, db, task_id: str, scann # Insert findings for finding in findings_data: + finding = redact_dict(finding) u_id = str(uuid.uuid4()).replace("-", "") finding_id = f"finding:{task_id}:{u_id[:8]}" diff --git a/testing/backend/unit/test_findings_redaction.py b/testing/backend/unit/test_findings_redaction.py new file mode 100644 index 00000000..e443604b --- /dev/null +++ b/testing/backend/unit/test_findings_redaction.py @@ -0,0 +1,161 @@ +""" +Regression tests: finding description, remediation, and proof must be +redacted before being written to the findings table. + +redact_dict() already existed in redaction.py but was never called on +findings before storage. These tests verify it is now called on both +insert paths: _upsert_findings_and_report and +_upsert_findings_and_report_from_scanner. +""" + +import json +import uuid +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from backend.secuscan.redaction import redact_dict + + +# --------------------------------------------------------------------------- +# Unit: redact_dict handles finding shapes correctly +# --------------------------------------------------------------------------- + +def test_redact_dict_redacts_aws_key_in_description(): + finding = { + "title": "Open Port", + "description": "Found key AKIAIOSFODNN7EXAMPLE in config", + "remediation": "Remove AKIAIOSFODNN7EXAMPLE from source", + "severity": "high", + } + result = redact_dict(finding) + assert "AKIAIOSFODNN7EXAMPLE" not in result["description"] + assert "AKIAIOSFODNN7EXAMPLE" not in result["remediation"] + assert "[REDACTED]" in result["description"] + + +def test_redact_dict_leaves_clean_finding_unchanged(): + finding = { + "title": "Open Port 80", + "description": "Port 80 is open and running http", + "remediation": "Close unnecessary ports", + "severity": "low", + } + result = redact_dict(finding) + assert result["description"] == finding["description"] + assert result["remediation"] == finding["remediation"] + + +def test_redact_dict_handles_nested_metadata(): + finding = { + "title": "Secret Found", + "description": "clean description", + "metadata": { + "raw_value": "password=hunter2", + "port": 443, + }, + } + result = redact_dict(finding) + assert "hunter2" not in result["metadata"]["raw_value"] + assert result["metadata"]["port"] == 443 # non-string left untouched + + +def test_redact_dict_handles_none_proof(): + finding = { + "title": "Finding", + "description": "clean", + "proof": None, + } + # Must not raise — None is not a string + result = redact_dict(finding) + assert result["proof"] is None + + +def test_redact_dict_handles_missing_keys_gracefully(): + finding = {"title": "Minimal finding", "severity": "info"} + result = redact_dict(finding) + assert result["title"] == "Minimal finding" + + +# --------------------------------------------------------------------------- +# Integration: executor INSERT paths call redact_dict +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_upsert_findings_redacts_description_before_insert( + setup_test_environment, +): + """ + _upsert_findings_and_report must call redact_dict on each finding + before the INSERT so secrets never reach the DB. + """ + from backend.secuscan.executor import TaskExecutor + from backend.secuscan.database import init_db, get_db + from backend.secuscan.config import settings + + await init_db(settings.database_path) + db = await get_db() + + task_id = str(uuid.uuid4()) + await db.execute( + """ + INSERT INTO tasks (id, plugin_id, tool_name, target, inputs_json, + status, consent_granted, safe_mode) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, + (task_id, "nmap", "nmap", "127.0.0.1", + '{"target":"127.0.0.1"}', "completed", 1, 1), + ) + + executor = TaskExecutor() + + mock_plugin = MagicMock() + mock_plugin.name = "nmap" + mock_plugin.id = "nmap" + mock_plugin.category = "Network" + mock_plugin.output = {"parser": "builtin_nmap", "format": "text"} + + # Finding whose description contains a secret + secret = "AKIAIOSFODNN7EXAMPLE" + findings_data = [ + { + "title": "Open Port", + "category": "Network", + "severity": "low", + "description": f"Found credential {secret} in banner", + "remediation": f"Remove {secret}", + "proof": None, + "cvss": None, + "cve": None, + "metadata": {}, + } + ] + + with patch.object(executor, "_parse_results") as mock_parse, \ + patch("backend.secuscan.executor.get_plugin_manager"): + mock_parse.return_value = { + "findings": findings_data, + "count": 1, + } + await executor._upsert_findings_and_report( + db=db, + task_id=task_id, + plugin=mock_plugin, + plugin_id="nmap", + target="127.0.0.1", + status="completed", + output="", + ) + + # Check the DB row — secret must not be present + row = await db.fetchone( + "SELECT description, remediation FROM findings WHERE task_id = ?", + (task_id,), + ) + assert row is not None + assert secret not in row["description"], ( + f"Secret found in DB description: {row['description']!r}\n" + "redact_dict() was not called before INSERT" + ) + assert secret not in row["remediation"] + assert "[REDACTED]" in row["description"] \ No newline at end of file From d43462ca73a9c6f67162f69c8f6c3f3b088de339 Mon Sep 17 00:00:00 2001 From: Srejoye Date: Thu, 28 May 2026 01:09:00 +0530 Subject: [PATCH 2/6] fix: remove trailing whitespace --- backend/secuscan/executor.py | 56 +++++++++---------- .../backend/unit/test_findings_redaction.py | 2 +- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/backend/secuscan/executor.py b/backend/secuscan/executor.py index 519b4a4d..87716d40 100644 --- a/backend/secuscan/executor.py +++ b/backend/secuscan/executor.py @@ -109,7 +109,7 @@ async def _broadcast(self, task_id: str, event_type: str, data: Any): event = {"type": event_type, "data": data} for q in self._listeners[task_id]: await q.put(event) - + async def create_task( self, plugin_id: str, @@ -119,29 +119,29 @@ async def create_task( ) -> str: """ Create a new scan task. - + Args: plugin_id: Plugin identifier inputs: User input values preset: Optional preset name consent_granted: Whether user granted consent - + Returns: Task ID """ task_id = str(uuid.uuid4()) plugin_manager = get_plugin_manager() plugin = plugin_manager.get_plugin(plugin_id) - + if not plugin: raise ValueError(f"Plugin not found: {plugin_id}") - + # Apply preset if provided if preset and preset in plugin.presets: preset_values = plugin.presets[preset] # Merge preset with user inputs (user inputs take precedence) inputs = {**preset_values, **inputs} - + # Store task in database db = await get_db() await db.execute( @@ -163,7 +163,7 @@ async def create_task( inputs.get("safe_mode", True) ) ) - + # Log audit event await db.log_audit( "task_created", @@ -172,9 +172,9 @@ async def create_task( task_id=task_id, plugin_id=plugin_id ) - + return task_id - + async def mark_task_failed(self, task_id: str, reason: str) -> None: """ Mark a task as failed without running it. @@ -213,7 +213,7 @@ async def mark_task_failed(self, task_id: str, reason: str) -> None: async def execute_task(self, task_id: str): """ Execute a task asynchronously. - + Args: task_id: Task identifier """ @@ -246,18 +246,18 @@ async def execute_task(self, task_id: str): if plugin_id in MODULAR_SCANNERS: scanner_class = MODULAR_SCANNERS[plugin_id] scanner = scanner_class(task_id, db) - + logger.info(f"Executing modular scanner {plugin_id} for task {task_id}") await self._broadcast(task_id, "status", TaskStatus.RUNNING.value) - + start_time = time.time() # Run the scanner result = await scanner.run(target, inputs) duration = time.time() - start_time - + # Update task with results final_status = TaskStatus.COMPLETED.value if result.get("status") != "failed" else TaskStatus.FAILED.value - + await db.execute( """ UPDATE tasks SET @@ -297,7 +297,7 @@ async def execute_task(self, task_id: str): raise ValueError(f"Plugin not found: {plugin_id}") # Pending records for assets removed - + command = plugin_manager.build_command(plugin_id, inputs) if not command: @@ -460,7 +460,7 @@ async def execute_task(self, task_id: str): # release the concurrency slot regardless of how the task ended. self.running_tasks.pop(task_id, None) await concurrent_limiter.release(task_id) - + async def _execute_command( self, command: list, @@ -491,7 +491,7 @@ async def read_stream(): stdout = process.stdout if stdout is None: return - + while not stdout.at_eof(): line = await stdout.readline() if line: @@ -618,13 +618,13 @@ async def cancel_task(self, task_id: str) -> bool: ) return True - + async def get_task_status(self, task_id: str) -> Optional[Dict]: """Get task status and progress""" db = await get_db() task_row = await db.fetchone( """ - SELECT id, plugin_id, tool_name, target, status, created_at, started_at, completed_at, + SELECT id, plugin_id, tool_name, target, status, created_at, started_at, completed_at, duration_seconds, exit_code, error_message, preset, inputs_json FROM tasks WHERE id = ? """, @@ -667,7 +667,7 @@ async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id: """Persist derived findings and report records into SQLite.""" parsed = self._parse_results(plugin, output) findings_data = parsed.get("findings", []) - + # Update task with structured results await db.execute( "UPDATE tasks SET structured_json = ? WHERE id = ?", @@ -676,7 +676,7 @@ async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id: # Insert findings for finding in findings_data: - finding = redact_dict(finding) + finding = redact_dict(finding) u_id = str(uuid.uuid4()).replace("-", "") finding_id = f"finding:{task_id}:{u_id[:8]}" @@ -759,10 +759,10 @@ async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id: async def _upsert_findings_and_report_from_scanner(self, db, task_id: str, scanner: Any, plugin_id: str, target: str, status: str, result: Dict[str, Any]): """Persist modular scanner results into findings, and reports.""" findings_data = result.get("findings", []) - + # Insert findings for finding in findings_data: - finding = redact_dict(finding) + finding = redact_dict(finding) u_id = str(uuid.uuid4()).replace("-", "") finding_id = f"finding:{task_id}:{u_id[:8]}" @@ -847,12 +847,12 @@ def _parse_results(self, plugin, output: str) -> Dict[str, Any]: """Route to appropriate parser based on plugin metadata.""" parser_type = plugin.output.get("parser") parser_input = self._resolve_parser_input(plugin, output) - + # 1. Check for custom parser.py in plugin directory (Recommended) plugin_manager = get_plugin_manager() plugin_dir = plugin_manager.plugins_dir / plugin.id parser_path = plugin_dir / "parser.py" - + if parser_path.exists(): if not plugin_manager.verify_parser_at_exec_time(plugin, plugin_dir): raise ValueError( @@ -884,7 +884,7 @@ def _parse_results(self, plugin, output: str) -> Dict[str, Any]: return self._normalize_parsed_result(plugin, parser_input, self._parse_nmap_output(parser_input)) elif parser_type == "builtin_http": return self._normalize_parsed_result(plugin, parser_input, self._parse_http_output(parser_input)) - + return self._normalize_parsed_result(plugin, parser_input, {"findings": [], "raw": parser_input}) def _resolve_parser_input(self, plugin, output: str) -> str: @@ -1043,7 +1043,7 @@ def _parse_nmap_output(self, output: str) -> Dict[str, Any]: findings = [] ports = [] services = [] - + # Regex for open ports: 80/tcp open http port_pattern = re.compile(r"(\d+)/(tcp|udp)\s+open\s+([\w-]+)") for match in port_pattern.finditer(output): @@ -1059,7 +1059,7 @@ def _parse_nmap_output(self, output: str) -> Dict[str, Any]: "remediation": "Close unnecessary ports and use a firewall to restrict access.", "metadata": {"port": port_str, "protocol": proto, "service": service} }) - + return { "open_ports": sorted(list(set(ports))), "services": sorted(list(set(services))), diff --git a/testing/backend/unit/test_findings_redaction.py b/testing/backend/unit/test_findings_redaction.py index e443604b..f90a778b 100644 --- a/testing/backend/unit/test_findings_redaction.py +++ b/testing/backend/unit/test_findings_redaction.py @@ -158,4 +158,4 @@ async def test_upsert_findings_redacts_description_before_insert( "redact_dict() was not called before INSERT" ) assert secret not in row["remediation"] - assert "[REDACTED]" in row["description"] \ No newline at end of file + assert "[REDACTED]" in row["description"] From 3209bfacc3c231936fb7068a5aaabe000f3e0c7c Mon Sep 17 00:00:00 2001 From: Srejoye Date: Fri, 29 May 2026 00:55:42 +0530 Subject: [PATCH 3/6] fix: redact parsed structure before structured_json write and findings insert The previous fix redacted individual finding rows before INSERT but the same unredacted parsed dict was written to tasks.structured_json first. Secrets were still exposed through the task-result API path. Fix: build redacted_findings list before any DB write and replace parsed['findings'] with it so that both the structured_json column on the tasks table and the findings table rows use the clean copy. Same fix applied to _upsert_findings_and_report_from_scanner. Adds regression assertion to test_upsert_findings_redacts_description_ before_insert that verifies tasks.structured_json does not contain the secret either. --- backend/secuscan/executor.py | 24 +++++++++++-------- .../backend/unit/test_findings_redaction.py | 16 ++++++++++++- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/backend/secuscan/executor.py b/backend/secuscan/executor.py index 87716d40..112b08b4 100644 --- a/backend/secuscan/executor.py +++ b/backend/secuscan/executor.py @@ -666,17 +666,20 @@ async def get_task_status(self, task_id: str) -> Optional[Dict]: async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id: str, target: str, status: str, output: str = ""): """Persist derived findings and report records into SQLite.""" parsed = self._parse_results(plugin, output) - findings_data = parsed.get("findings", []) - # Update task with structured results + # Redact all findings before any DB write — this covers both the + # structured_json column on the tasks table AND the findings table rows. + redacted_findings = [redact_dict(f) for f in parsed.get("findings", [])] + parsed["findings"] = redacted_findings + + # Update task with structured results — uses the already-redacted parsed dict await db.execute( "UPDATE tasks SET structured_json = ? WHERE id = ?", (json.dumps(parsed), task_id) ) - # Insert findings - for finding in findings_data: - finding = redact_dict(finding) + # Insert findings — already redacted above + for finding in redacted_findings: u_id = str(uuid.uuid4()).replace("-", "") finding_id = f"finding:{task_id}:{u_id[:8]}" @@ -751,18 +754,19 @@ async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id: f"{plugin.name} Report", "technical", "ready" if status == TaskStatus.COMPLETED.value else "failed", - len(findings_data), + len(finding), 1, ), ) async def _upsert_findings_and_report_from_scanner(self, db, task_id: str, scanner: Any, plugin_id: str, target: str, status: str, result: Dict[str, Any]): """Persist modular scanner results into findings, and reports.""" - findings_data = result.get("findings", []) + # Redact all findings before any DB write + redacted_findings = [redact_dict(f) for f in result.get("findings", [])] + result["findings"] = redacted_findings - # Insert findings - for finding in findings_data: - finding = redact_dict(finding) + # Insert findings — already redacted above + for finding in redacted_findings: u_id = str(uuid.uuid4()).replace("-", "") finding_id = f"finding:{task_id}:{u_id[:8]}" diff --git a/testing/backend/unit/test_findings_redaction.py b/testing/backend/unit/test_findings_redaction.py index f90a778b..0ae900e4 100644 --- a/testing/backend/unit/test_findings_redaction.py +++ b/testing/backend/unit/test_findings_redaction.py @@ -147,7 +147,7 @@ async def test_upsert_findings_redacts_description_before_insert( output="", ) - # Check the DB row — secret must not be present + # Check the findings table row — secret must not be present row = await db.fetchone( "SELECT description, remediation FROM findings WHERE task_id = ?", (task_id,), @@ -159,3 +159,17 @@ async def test_upsert_findings_redacts_description_before_insert( ) assert secret not in row["remediation"] assert "[REDACTED]" in row["description"] + + # Regression: structured_json on the tasks table must also be redacted + task_row = await db.fetchone( + "SELECT structured_json FROM tasks WHERE id = ?", + (task_id,), + ) + assert task_row is not None + structured = json.loads(task_row["structured_json"]) + findings_in_structured = structured.get("findings", []) + assert len(findings_in_structured) > 0 + assert secret not in findings_in_structured[0]["description"], ( + "Secret found in tasks.structured_json — " + "parsed dict must be redacted before the structured_json write" + ) From 5404397fcede7a8c03be6b6a30f6db662fc1747c Mon Sep 17 00:00:00 2001 From: Srejoye Date: Fri, 29 May 2026 01:03:06 +0530 Subject: [PATCH 4/6] fix: correct findings count variable and remove duplicate import --- backend/secuscan/executor.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/backend/secuscan/executor.py b/backend/secuscan/executor.py index 112b08b4..27319443 100644 --- a/backend/secuscan/executor.py +++ b/backend/secuscan/executor.py @@ -22,7 +22,6 @@ from .ratelimit import concurrent_limiter from .risk_scoring import compute_risk_score, compute_risk_factors - def _parse_discovered_at(finding: dict) -> Optional[datetime]: """Extract and parse discovered_at from a finding dict, or return current UTC time.""" raw = finding.get("discovered_at") @@ -754,7 +753,7 @@ async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id: f"{plugin.name} Report", "technical", "ready" if status == TaskStatus.COMPLETED.value else "failed", - len(finding), + len(redacted_findings), 1, ), ) @@ -842,7 +841,7 @@ async def _upsert_findings_and_report_from_scanner(self, db, task_id: str, scann f"{scanner.name} Report", "professional" if status == TaskStatus.COMPLETED.value else "failed", "ready" if status == TaskStatus.COMPLETED.value else "failed", - len(findings_data), + len(redacted_findings), 2, # Professional reports are typically multi-page ), ) From 7dc92128bfd7ae05f4e34fb541927dccaa307077 Mon Sep 17 00:00:00 2001 From: Srejoye Date: Sun, 31 May 2026 18:59:41 +0530 Subject: [PATCH 5/6] fix(executor): redact findings before all DB persistence paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Secrets embedded in finding fields were reaching the database through two unguarded paths: 1. _upsert_findings_and_report wrote tasks.structured_json from the raw parsed dict before any redaction occurred, then inserted unredacted finding rows. 2. _upsert_findings_and_report_from_scanner never called redact_dict at all and never wrote structured_json. Fix: in both functions, build the redacted findings list first, replace parsed['findings'] / result['findings'] with that clean copy, then use it for every write — structured_json and the findings table rows both receive the same already-redacted data. Also adds the missing structured_json write to the scanner path. Adds test_findings_redaction.py (6 tests) including an integration regression that asserts structured_json never contains the raw secret. --- backend/secuscan/executor.py | 49 +++-- .../backend/unit/test_findings_redaction.py | 193 +++++++++--------- 2 files changed, 124 insertions(+), 118 deletions(-) diff --git a/backend/secuscan/executor.py b/backend/secuscan/executor.py index ed11b352..ae051c1a 100644 --- a/backend/secuscan/executor.py +++ b/backend/secuscan/executor.py @@ -22,6 +22,7 @@ from .ratelimit import concurrent_limiter from .risk_scoring import compute_risk_score, compute_risk_factors + def _parse_discovered_at(finding: dict) -> Optional[datetime]: """Extract and parse discovered_at from a finding dict, or return current UTC time.""" raw = finding.get("discovered_at") @@ -80,6 +81,7 @@ def extract_target(inputs: Dict[str, Any]) -> str: or inputs.get("domain") or "" ) + class TaskExecutor: """Executes security scanning tasks in isolated environments""" @@ -430,11 +432,6 @@ async def execute_task(self, task_id: str): logger.info(f"Task {task_id} completed in {duration:.2f}s") except asyncio.CancelledError: - # CancelledError inherits from BaseException, not Exception — - # it bypasses the broad except below, so we handle it explicitly. - # Task.cancelled() returns False while the finally block is still - # executing, so this is the only reliable place to write the - # cancellation status to the DB. duration = (time.time() - start_time) if 'start_time' in locals() else 0 await db.execute( """ @@ -454,12 +451,11 @@ async def execute_task(self, task_id: str): ) await self._broadcast(task_id, "status", TaskStatus.CANCELLED.value) await self._invalidate_cached_views() - raise # let asyncio complete the cancellation + raise except Exception as e: logger.error(f"Task {task_id} failed: {e}", exc_info=True) - # Update task as failed duration = (time.time() - start_time) if 'start_time' in locals() else 0 await db.execute( """ @@ -490,8 +486,6 @@ async def execute_task(self, task_id: str): task_id=task_id ) finally: - # Always clean up: remove from the in-memory registry and - # release the concurrency slot regardless of how the task ended. self.running_tasks.pop(task_id, None) await concurrent_limiter.release(task_id) @@ -544,7 +538,6 @@ async def read_stream(): return "".join(output_lines) + "\nTask timed out", -1 except asyncio.CancelledError: - # Handle task cancellation by killing the subprocess logger.warning(f"Task {task_id} cancelled. Killing process {process.pid}") try: process.kill() @@ -624,7 +617,6 @@ async def cancel_task(self, task_id: str) -> bool: task = self.running_tasks[task_id] task.cancel() - # If docker is enabled, forcefully kill the sandbox container if settings.docker_enabled: try: killer = await asyncio.create_subprocess_exec( @@ -701,19 +693,18 @@ async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id: """Persist derived findings and report records into SQLite.""" parsed = self._parse_results(plugin, output) - # Redact all findings before any DB write — this covers both the - # structured_json column on the tasks table AND the findings table rows. - redacted_findings = [redact_dict(f) for f in parsed.get("findings", [])] - parsed["findings"] = redacted_findings + # Redact all findings before any persistence path (structured_json AND findings table) + parsed["findings"] = [redact_dict(f) for f in parsed.get("findings", [])] + findings_data = parsed["findings"] - # Update task with structured results — uses the already-redacted parsed dict + # Update task with structured results (uses the already-redacted parsed dict) await db.execute( "UPDATE tasks SET structured_json = ? WHERE id = ?", (json.dumps(parsed), task_id) ) - # Insert findings — already redacted above - for finding in redacted_findings: + # Insert findings + for finding in findings_data: u_id = str(uuid.uuid4()).replace("-", "") finding_id = f"finding:{task_id}:{u_id[:8]}" @@ -788,19 +779,27 @@ async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id: f"{plugin.name} Report", "technical", "ready" if status == TaskStatus.COMPLETED.value else "failed", - len(redacted_findings), + len(findings_data), 1, ), ) async def _upsert_findings_and_report_from_scanner(self, db, task_id: str, scanner: Any, plugin_id: str, target: str, status: str, result: Dict[str, Any]): """Persist modular scanner results into findings, and reports.""" - # Redact all findings before any DB write + + # Redact all findings before any persistence path (structured_json AND findings table) redacted_findings = [redact_dict(f) for f in result.get("findings", [])] result["findings"] = redacted_findings + findings_data = redacted_findings + + # Update task with redacted structured results + await db.execute( + "UPDATE tasks SET structured_json = ? WHERE id = ?", + (json.dumps(result), task_id) + ) - # Insert findings — already redacted above - for finding in redacted_findings: + # Insert findings + for finding in findings_data: u_id = str(uuid.uuid4()).replace("-", "") finding_id = f"finding:{task_id}:{u_id[:8]}" @@ -877,7 +876,7 @@ async def _upsert_findings_and_report_from_scanner(self, db, task_id: str, scann "professional" if status == TaskStatus.COMPLETED.value else "failed", "ready" if status == TaskStatus.COMPLETED.value else "failed", len(redacted_findings), - 2, # Professional reports are typically multi-page + 2, ), ) @@ -1041,7 +1040,6 @@ def _parse_json_fallback_findings(self, plugin, parser_input: str) -> List[Dict[ return findings if isinstance(data, dict): - # Common scanner shape: { "results": [...] } for list_key in ("results", "findings", "issues", "vulnerabilities"): if isinstance(data.get(list_key), list): for idx, item in enumerate(data[list_key], start=1): @@ -1082,7 +1080,6 @@ def _parse_nmap_output(self, output: str) -> Dict[str, Any]: ports = [] services = [] - # Regex for open ports: 80/tcp open http port_pattern = re.compile(r"(\d+)/(tcp|udp)\s+open\s+([\w-]+)") for match in port_pattern.finditer(output): port_str, proto, service = match.groups() @@ -1153,4 +1150,4 @@ async def _invalidate_cached_views(self): # Global executor instance -executor = TaskExecutor() +executor = TaskExecutor() \ No newline at end of file diff --git a/testing/backend/unit/test_findings_redaction.py b/testing/backend/unit/test_findings_redaction.py index 0ae900e4..1619b7f4 100644 --- a/testing/backend/unit/test_findings_redaction.py +++ b/testing/backend/unit/test_findings_redaction.py @@ -1,166 +1,172 @@ """ -Regression tests: finding description, remediation, and proof must be -redacted before being written to the findings table. +Unit and integration tests for findings redaction before DB persistence. -redact_dict() already existed in redaction.py but was never called on -findings before storage. These tests verify it is now called on both -insert paths: _upsert_findings_and_report and -_upsert_findings_and_report_from_scanner. +Verifies that secrets are stripped from finding fields (description, +remediation, proof) and from tasks.structured_json before any DB write. + +Run with: + ./testing/test_python.sh +or directly: + pytest testing/backend/unit/test_findings_redaction.py -v """ +import asyncio import json import uuid from unittest.mock import AsyncMock, MagicMock, patch import pytest -from backend.secuscan.redaction import redact_dict +from backend.secuscan.redaction import redact_dict, REDACTED + + +# ── Fake AWS key used across tests ──────────────────────────────────────────── +FAKE_AWS_KEY = "AKIAIOSFODNN7EXAMPLE" -# --------------------------------------------------------------------------- -# Unit: redact_dict handles finding shapes correctly -# --------------------------------------------------------------------------- + +# ── Unit tests: redact_dict behaviour ───────────────────────────────────────── def test_redact_dict_redacts_aws_key_in_description(): + """Secret in description and remediation is replaced with [REDACTED].""" finding = { - "title": "Open Port", - "description": "Found key AKIAIOSFODNN7EXAMPLE in config", - "remediation": "Remove AKIAIOSFODNN7EXAMPLE from source", - "severity": "high", + "title": "Exposed credential", + "category": "Secrets", + "severity": "critical", + "description": f"Found credential {FAKE_AWS_KEY} in config.", + "remediation": f"Rotate the key {FAKE_AWS_KEY} immediately.", } result = redact_dict(finding) - assert "AKIAIOSFODNN7EXAMPLE" not in result["description"] - assert "AKIAIOSFODNN7EXAMPLE" not in result["remediation"] - assert "[REDACTED]" in result["description"] + assert REDACTED in result["description"] + assert FAKE_AWS_KEY not in result["description"] + assert REDACTED in result["remediation"] + assert FAKE_AWS_KEY not in result["remediation"] def test_redact_dict_leaves_clean_finding_unchanged(): + """Clean findings with no secrets pass through unmodified.""" finding = { - "title": "Open Port 80", - "description": "Port 80 is open and running http", - "remediation": "Close unnecessary ports", + "title": "Open Port", + "category": "Network", "severity": "low", + "description": "Port 80 is open and running http.", + "remediation": "Close unnecessary ports.", } result = redact_dict(finding) assert result["description"] == finding["description"] assert result["remediation"] == finding["remediation"] + assert result["title"] == finding["title"] def test_redact_dict_handles_nested_metadata(): + """Nested metadata dict is walked recursively; non-strings are untouched.""" finding = { - "title": "Secret Found", - "description": "clean description", + "title": "Secret in metadata", + "severity": "high", + "description": "See metadata.", "metadata": { - "raw_value": "password=hunter2", + "raw_value": f"key={FAKE_AWS_KEY}", "port": 443, + "nested": {"token": f"Bearer {FAKE_AWS_KEY}"}, }, } result = redact_dict(finding) - assert "hunter2" not in result["metadata"]["raw_value"] - assert result["metadata"]["port"] == 443 # non-string left untouched + assert FAKE_AWS_KEY not in result["metadata"]["raw_value"] + assert result["metadata"]["port"] == 443 + assert FAKE_AWS_KEY not in result["metadata"]["nested"]["token"] def test_redact_dict_handles_none_proof(): + """None proof field does not raise and is returned as-is.""" finding = { "title": "Finding", - "description": "clean", + "severity": "info", + "description": "No proof available.", "proof": None, } - # Must not raise — None is not a string result = redact_dict(finding) assert result["proof"] is None def test_redact_dict_handles_missing_keys_gracefully(): - finding = {"title": "Minimal finding", "severity": "info"} + """Minimal finding dict with no description/remediation/proof works fine.""" + finding = {"title": "Bare finding", "severity": "low"} result = redact_dict(finding) - assert result["title"] == "Minimal finding" + assert result["title"] == "Bare finding" + assert result["severity"] == "low" -# --------------------------------------------------------------------------- -# Integration: executor INSERT paths call redact_dict -# --------------------------------------------------------------------------- +# ── Integration test: DB persistence paths ──────────────────────────────────── @pytest.mark.asyncio -async def test_upsert_findings_redacts_description_before_insert( - setup_test_environment, -): +async def test_upsert_findings_redacts_description_before_insert(): """ - _upsert_findings_and_report must call redact_dict on each finding - before the INSERT so secrets never reach the DB. + After _upsert_findings_and_report is called: + 1. The findings table row must not contain the raw secret. + 2. tasks.structured_json must not contain the raw secret. """ from backend.secuscan.executor import TaskExecutor - from backend.secuscan.database import init_db, get_db from backend.secuscan.config import settings + from backend.secuscan.database import get_db, init_db + from backend.secuscan.plugins import get_plugin_manager, init_plugins - await init_db(settings.database_path) - db = await get_db() + try: + pm = get_plugin_manager() + except RuntimeError: + await init_plugins(settings.plugins_dir) + pm = get_plugin_manager() + + plugin_id = next(iter(pm._plugins)) + plugin = pm.get_plugin(plugin_id) task_id = str(uuid.uuid4()) + db = await get_db() + await db.execute( """ - INSERT INTO tasks (id, plugin_id, tool_name, target, inputs_json, - status, consent_granted, safe_mode) + INSERT INTO tasks (id, plugin_id, tool_name, target, inputs_json, status, scan_phase, safe_mode) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, - (task_id, "nmap", "nmap", "127.0.0.1", - '{"target":"127.0.0.1"}', "completed", 1, 1), + (task_id, plugin_id, plugin.name, "example.com", + json.dumps({"target": "example.com"}), "running", "running_command", 0), ) - executor = TaskExecutor() + tainted_finding = { + "title": "Exposed AWS key", + "category": "Secrets", + "severity": "critical", + "description": f"AWS key found: {FAKE_AWS_KEY}", + "remediation": f"Rotate {FAKE_AWS_KEY} immediately.", + "proof": f"curl response contained {FAKE_AWS_KEY}", + "metadata": {}, + } - mock_plugin = MagicMock() - mock_plugin.name = "nmap" - mock_plugin.id = "nmap" - mock_plugin.category = "Network" - mock_plugin.output = {"parser": "builtin_nmap", "format": "text"} - - # Finding whose description contains a secret - secret = "AKIAIOSFODNN7EXAMPLE" - findings_data = [ - { - "title": "Open Port", - "category": "Network", - "severity": "low", - "description": f"Found credential {secret} in banner", - "remediation": f"Remove {secret}", - "proof": None, - "cvss": None, - "cve": None, - "metadata": {}, - } - ] - - with patch.object(executor, "_parse_results") as mock_parse, \ - patch("backend.secuscan.executor.get_plugin_manager"): - mock_parse.return_value = { - "findings": findings_data, - "count": 1, - } + executor = TaskExecutor() + with patch.object(executor, "_parse_results", return_value={"findings": [tainted_finding]}): await executor._upsert_findings_and_report( db=db, task_id=task_id, - plugin=mock_plugin, - plugin_id="nmap", - target="127.0.0.1", + plugin=plugin, + plugin_id=plugin_id, + target="example.com", status="completed", output="", ) - # Check the findings table row — secret must not be present + # Assert: findings table row is clean row = await db.fetchone( - "SELECT description, remediation FROM findings WHERE task_id = ?", + "SELECT description, remediation, proof FROM findings WHERE task_id = ?", (task_id,), ) - assert row is not None - assert secret not in row["description"], ( - f"Secret found in DB description: {row['description']!r}\n" - "redact_dict() was not called before INSERT" - ) - assert secret not in row["remediation"] - assert "[REDACTED]" in row["description"] - - # Regression: structured_json on the tasks table must also be redacted + assert row is not None, "No finding row was inserted" + assert FAKE_AWS_KEY not in (row["description"] or ""), \ + f"Secret still in findings.description: {row['description']!r}" + assert FAKE_AWS_KEY not in (row["remediation"] or ""), \ + f"Secret still in findings.remediation: {row['remediation']!r}" + assert FAKE_AWS_KEY not in (row["proof"] or ""), \ + f"Secret still in findings.proof: {row['proof']!r}" + + # Assert: structured_json is also clean task_row = await db.fetchone( "SELECT structured_json FROM tasks WHERE id = ?", (task_id,), @@ -168,8 +174,11 @@ async def test_upsert_findings_redacts_description_before_insert( assert task_row is not None structured = json.loads(task_row["structured_json"]) findings_in_structured = structured.get("findings", []) - assert len(findings_in_structured) > 0 - assert secret not in findings_in_structured[0]["description"], ( - "Secret found in tasks.structured_json — " - "parsed dict must be redacted before the structured_json write" - ) + assert findings_in_structured, "structured_json contained no findings" + first = findings_in_structured[0] + assert FAKE_AWS_KEY not in (first.get("description") or ""), \ + f"Secret still in structured_json finding description: {first.get('description')!r}" + assert FAKE_AWS_KEY not in (first.get("remediation") or ""), \ + f"Secret still in structured_json finding remediation: {first.get('remediation')!r}" + assert FAKE_AWS_KEY not in (first.get("proof") or ""), \ + f"Secret still in structured_json finding proof: {first.get('proof')!r}" \ No newline at end of file From 06ed0e67af6cc7852dce2eb9e40cf85d58118078 Mon Sep 17 00:00:00 2001 From: Srejoye Date: Sun, 31 May 2026 19:06:25 +0530 Subject: [PATCH 6/6] fix(test): use pm.plugins instead of pm._plugins --- testing/backend/unit/test_findings_redaction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/backend/unit/test_findings_redaction.py b/testing/backend/unit/test_findings_redaction.py index 1619b7f4..f736fd97 100644 --- a/testing/backend/unit/test_findings_redaction.py +++ b/testing/backend/unit/test_findings_redaction.py @@ -116,7 +116,7 @@ async def test_upsert_findings_redacts_description_before_insert(): await init_plugins(settings.plugins_dir) pm = get_plugin_manager() - plugin_id = next(iter(pm._plugins)) + plugin_id = next(iter(pm.plugins)) plugin = pm.get_plugin(plugin_id) task_id = str(uuid.uuid4())