From 6063ade34a1be16426874687c9b45c0d0547ae9c Mon Sep 17 00:00:00 2001
From: Srejoye <Sahasrejoye2005@gmail.com>
Date: Thu, 28 May 2026 01:06:06 +0530
Subject: [PATCH 1/6] fix(executor): redact finding description, remediation
 and proof before DB insert
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

redact(output) was called on the raw CLI text file (line 302) but the parsed findings written to the findings table were never redacted. finding['description'], finding.get('remediation'), and finding.get('proof') were inserted into the DB as-is, meaning:

- Secrets in finding descriptions persisted in the DB indefinitely

- /api/v1/findings JSON API returned unredacted content

- SARIF exports, CSV exports, and direct DB reads all exposed secrets

reporting.py::_normalize_finding() does call redact() but only at PDF/HTML export time — the DB row itself was never clean.

redact_dict() already existed in redaction.py for exactly this purpose but was never called on findings before storage.

Fix:
- Add redact_dict to the import from .redaction in executor.py
- Call finding = redact_dict(finding) before the INSERT in both _upsert_findings_and_report and _upsert_findings_and_report_from_scanner

Adds testing/backend/unit/test_findings_redaction.py with 6 tests:
- redact_dict redacts AWS key in description and remediation
- clean findings pass through unchanged
- nested metadata dict is walked recursively
- None proof does not raise
- missing keys handled gracefully
- integration test: INSERT path verified to not store raw secrets in DB
---
 backend/secuscan/executor.py                  |   4 +-
 .../backend/unit/test_findings_redaction.py   | 161 ++++++++++++++++++
 2 files changed, 164 insertions(+), 1 deletion(-)
 create mode 100644 testing/backend/unit/test_findings_redaction.py

diff --git a/backend/secuscan/executor.py b/backend/secuscan/executor.py
index 44bda2ae..519b4a4d 100644
--- a/backend/secuscan/executor.py
+++ b/backend/secuscan/executor.py
@@ -13,7 +13,7 @@
 import logging
 import re
 
-from .redaction import redact
+from .redaction import redact, redact_dict
 from .cache import get_cache
 from .config import settings
 from .database import get_db
@@ -676,6 +676,7 @@ async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id:
 
         # Insert findings
         for finding in findings_data:
+            finding = redact_dict(finding)   
             u_id = str(uuid.uuid4()).replace("-", "")
             finding_id = f"finding:{task_id}:{u_id[:8]}"
 
@@ -761,6 +762,7 @@ async def _upsert_findings_and_report_from_scanner(self, db, task_id: str, scann
         
         # Insert findings
         for finding in findings_data:
+            finding = redact_dict(finding)   
             u_id = str(uuid.uuid4()).replace("-", "")
             finding_id = f"finding:{task_id}:{u_id[:8]}"
 
diff --git a/testing/backend/unit/test_findings_redaction.py b/testing/backend/unit/test_findings_redaction.py
new file mode 100644
index 00000000..e443604b
--- /dev/null
+++ b/testing/backend/unit/test_findings_redaction.py
@@ -0,0 +1,161 @@
+"""
+Regression tests: finding description, remediation, and proof must be
+redacted before being written to the findings table.
+
+redact_dict() already existed in redaction.py but was never called on
+findings before storage. These tests verify it is now called on both
+insert paths: _upsert_findings_and_report and
+_upsert_findings_and_report_from_scanner.
+"""
+
+import json
+import uuid
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from backend.secuscan.redaction import redact_dict
+
+
+# ---------------------------------------------------------------------------
+# Unit: redact_dict handles finding shapes correctly
+# ---------------------------------------------------------------------------
+
+def test_redact_dict_redacts_aws_key_in_description():
+    finding = {
+        "title": "Open Port",
+        "description": "Found key AKIAIOSFODNN7EXAMPLE in config",
+        "remediation": "Remove AKIAIOSFODNN7EXAMPLE from source",
+        "severity": "high",
+    }
+    result = redact_dict(finding)
+    assert "AKIAIOSFODNN7EXAMPLE" not in result["description"]
+    assert "AKIAIOSFODNN7EXAMPLE" not in result["remediation"]
+    assert "[REDACTED]" in result["description"]
+
+
+def test_redact_dict_leaves_clean_finding_unchanged():
+    finding = {
+        "title": "Open Port 80",
+        "description": "Port 80 is open and running http",
+        "remediation": "Close unnecessary ports",
+        "severity": "low",
+    }
+    result = redact_dict(finding)
+    assert result["description"] == finding["description"]
+    assert result["remediation"] == finding["remediation"]
+
+
+def test_redact_dict_handles_nested_metadata():
+    finding = {
+        "title": "Secret Found",
+        "description": "clean description",
+        "metadata": {
+            "raw_value": "password=hunter2",
+            "port": 443,
+        },
+    }
+    result = redact_dict(finding)
+    assert "hunter2" not in result["metadata"]["raw_value"]
+    assert result["metadata"]["port"] == 443  # non-string left untouched
+
+
+def test_redact_dict_handles_none_proof():
+    finding = {
+        "title": "Finding",
+        "description": "clean",
+        "proof": None,
+    }
+    # Must not raise — None is not a string
+    result = redact_dict(finding)
+    assert result["proof"] is None
+
+
+def test_redact_dict_handles_missing_keys_gracefully():
+    finding = {"title": "Minimal finding", "severity": "info"}
+    result = redact_dict(finding)
+    assert result["title"] == "Minimal finding"
+
+
+# ---------------------------------------------------------------------------
+# Integration: executor INSERT paths call redact_dict
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_upsert_findings_redacts_description_before_insert(
+    setup_test_environment,
+):
+    """
+    _upsert_findings_and_report must call redact_dict on each finding
+    before the INSERT so secrets never reach the DB.
+    """
+    from backend.secuscan.executor import TaskExecutor
+    from backend.secuscan.database import init_db, get_db
+    from backend.secuscan.config import settings
+
+    await init_db(settings.database_path)
+    db = await get_db()
+
+    task_id = str(uuid.uuid4())
+    await db.execute(
+        """
+        INSERT INTO tasks (id, plugin_id, tool_name, target, inputs_json,
+                           status, consent_granted, safe_mode)
+        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+        """,
+        (task_id, "nmap", "nmap", "127.0.0.1",
+         '{"target":"127.0.0.1"}', "completed", 1, 1),
+    )
+
+    executor = TaskExecutor()
+
+    mock_plugin = MagicMock()
+    mock_plugin.name = "nmap"
+    mock_plugin.id = "nmap"
+    mock_plugin.category = "Network"
+    mock_plugin.output = {"parser": "builtin_nmap", "format": "text"}
+
+    # Finding whose description contains a secret
+    secret = "AKIAIOSFODNN7EXAMPLE"
+    findings_data = [
+        {
+            "title": "Open Port",
+            "category": "Network",
+            "severity": "low",
+            "description": f"Found credential {secret} in banner",
+            "remediation": f"Remove {secret}",
+            "proof": None,
+            "cvss": None,
+            "cve": None,
+            "metadata": {},
+        }
+    ]
+
+    with patch.object(executor, "_parse_results") as mock_parse, \
+         patch("backend.secuscan.executor.get_plugin_manager"):
+        mock_parse.return_value = {
+            "findings": findings_data,
+            "count": 1,
+        }
+        await executor._upsert_findings_and_report(
+            db=db,
+            task_id=task_id,
+            plugin=mock_plugin,
+            plugin_id="nmap",
+            target="127.0.0.1",
+            status="completed",
+            output="",
+        )
+
+    # Check the DB row — secret must not be present
+    row = await db.fetchone(
+        "SELECT description, remediation FROM findings WHERE task_id = ?",
+        (task_id,),
+    )
+    assert row is not None
+    assert secret not in row["description"], (
+        f"Secret found in DB description: {row['description']!r}\n"
+        "redact_dict() was not called before INSERT"
+    )
+    assert secret not in row["remediation"]
+    assert "[REDACTED]" in row["description"]
\ No newline at end of file

From d43462ca73a9c6f67162f69c8f6c3f3b088de339 Mon Sep 17 00:00:00 2001
From: Srejoye <Sahasrejoye2005@gmail.com>
Date: Thu, 28 May 2026 01:09:00 +0530
Subject: [PATCH 2/6] fix: remove trailing whitespace

---
 backend/secuscan/executor.py                  | 56 +++++++++----------
 .../backend/unit/test_findings_redaction.py   |  2 +-
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/backend/secuscan/executor.py b/backend/secuscan/executor.py
index 519b4a4d..87716d40 100644
--- a/backend/secuscan/executor.py
+++ b/backend/secuscan/executor.py
@@ -109,7 +109,7 @@ async def _broadcast(self, task_id: str, event_type: str, data: Any):
             event = {"type": event_type, "data": data}
             for q in self._listeners[task_id]:
                 await q.put(event)
-    
+
     async def create_task(
         self,
         plugin_id: str,
@@ -119,29 +119,29 @@ async def create_task(
     ) -> str:
         """
         Create a new scan task.
-        
+
         Args:
             plugin_id: Plugin identifier
             inputs: User input values
             preset: Optional preset name
             consent_granted: Whether user granted consent
-        
+
         Returns:
             Task ID
         """
         task_id = str(uuid.uuid4())
         plugin_manager = get_plugin_manager()
         plugin = plugin_manager.get_plugin(plugin_id)
-        
+
         if not plugin:
             raise ValueError(f"Plugin not found: {plugin_id}")
-        
+
         # Apply preset if provided
         if preset and preset in plugin.presets:
             preset_values = plugin.presets[preset]
             # Merge preset with user inputs (user inputs take precedence)
             inputs = {**preset_values, **inputs}
-        
+
         # Store task in database
         db = await get_db()
         await db.execute(
@@ -163,7 +163,7 @@ async def create_task(
                 inputs.get("safe_mode", True)
             )
         )
-        
+
         # Log audit event
         await db.log_audit(
             "task_created",
@@ -172,9 +172,9 @@ async def create_task(
             task_id=task_id,
             plugin_id=plugin_id
         )
-        
+
         return task_id
-    
+
     async def mark_task_failed(self, task_id: str, reason: str) -> None:
         """
         Mark a task as failed without running it.
@@ -213,7 +213,7 @@ async def mark_task_failed(self, task_id: str, reason: str) -> None:
     async def execute_task(self, task_id: str):
         """
         Execute a task asynchronously.
-        
+
         Args:
             task_id: Task identifier
         """
@@ -246,18 +246,18 @@ async def execute_task(self, task_id: str):
             if plugin_id in MODULAR_SCANNERS:
                 scanner_class = MODULAR_SCANNERS[plugin_id]
                 scanner = scanner_class(task_id, db)
-                
+
                 logger.info(f"Executing modular scanner {plugin_id} for task {task_id}")
                 await self._broadcast(task_id, "status", TaskStatus.RUNNING.value)
-                
+
                 start_time = time.time()
                 # Run the scanner
                 result = await scanner.run(target, inputs)
                 duration = time.time() - start_time
-                
+
                 # Update task with results
                 final_status = TaskStatus.COMPLETED.value if result.get("status") != "failed" else TaskStatus.FAILED.value
-                
+
                 await db.execute(
                     """
                     UPDATE tasks SET
@@ -297,7 +297,7 @@ async def execute_task(self, task_id: str):
                     raise ValueError(f"Plugin not found: {plugin_id}")
 
                 # Pending records for assets removed
-                
+
                 command = plugin_manager.build_command(plugin_id, inputs)
 
                 if not command:
@@ -460,7 +460,7 @@ async def execute_task(self, task_id: str):
             # release the concurrency slot regardless of how the task ended.
             self.running_tasks.pop(task_id, None)
             await concurrent_limiter.release(task_id)
-    
+
     async def _execute_command(
         self,
         command: list,
@@ -491,7 +491,7 @@ async def read_stream():
                 stdout = process.stdout
                 if stdout is None:
                     return
-                    
+
                 while not stdout.at_eof():
                     line = await stdout.readline()
                     if line:
@@ -618,13 +618,13 @@ async def cancel_task(self, task_id: str) -> bool:
         )
 
         return True
-    
+
     async def get_task_status(self, task_id: str) -> Optional[Dict]:
         """Get task status and progress"""
         db = await get_db()
         task_row = await db.fetchone(
             """
-            SELECT id, plugin_id, tool_name, target, status, created_at, started_at, completed_at, 
+            SELECT id, plugin_id, tool_name, target, status, created_at, started_at, completed_at,
                    duration_seconds, exit_code, error_message, preset, inputs_json
             FROM tasks WHERE id = ?
             """,
@@ -667,7 +667,7 @@ async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id:
         """Persist derived findings and report records into SQLite."""
         parsed = self._parse_results(plugin, output)
         findings_data = parsed.get("findings", [])
-        
+
         # Update task with structured results
         await db.execute(
             "UPDATE tasks SET structured_json = ? WHERE id = ?",
@@ -676,7 +676,7 @@ async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id:
 
         # Insert findings
         for finding in findings_data:
-            finding = redact_dict(finding)   
+            finding = redact_dict(finding)
             u_id = str(uuid.uuid4()).replace("-", "")
             finding_id = f"finding:{task_id}:{u_id[:8]}"
 
@@ -759,10 +759,10 @@ async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id:
     async def _upsert_findings_and_report_from_scanner(self, db, task_id: str, scanner: Any, plugin_id: str, target: str, status: str, result: Dict[str, Any]):
         """Persist modular scanner results into findings, and reports."""
         findings_data = result.get("findings", [])
-        
+
         # Insert findings
         for finding in findings_data:
-            finding = redact_dict(finding)   
+            finding = redact_dict(finding)
             u_id = str(uuid.uuid4()).replace("-", "")
             finding_id = f"finding:{task_id}:{u_id[:8]}"
 
@@ -847,12 +847,12 @@ def _parse_results(self, plugin, output: str) -> Dict[str, Any]:
         """Route to appropriate parser based on plugin metadata."""
         parser_type = plugin.output.get("parser")
         parser_input = self._resolve_parser_input(plugin, output)
-        
+
         # 1. Check for custom parser.py in plugin directory (Recommended)
         plugin_manager = get_plugin_manager()
         plugin_dir = plugin_manager.plugins_dir / plugin.id
         parser_path = plugin_dir / "parser.py"
-        
+
         if parser_path.exists():
             if not plugin_manager.verify_parser_at_exec_time(plugin, plugin_dir):
                 raise ValueError(
@@ -884,7 +884,7 @@ def _parse_results(self, plugin, output: str) -> Dict[str, Any]:
             return self._normalize_parsed_result(plugin, parser_input, self._parse_nmap_output(parser_input))
         elif parser_type == "builtin_http":
             return self._normalize_parsed_result(plugin, parser_input, self._parse_http_output(parser_input))
-        
+
         return self._normalize_parsed_result(plugin, parser_input, {"findings": [], "raw": parser_input})
 
     def _resolve_parser_input(self, plugin, output: str) -> str:
@@ -1043,7 +1043,7 @@ def _parse_nmap_output(self, output: str) -> Dict[str, Any]:
         findings = []
         ports = []
         services = []
-        
+
         # Regex for open ports: 80/tcp open http
         port_pattern = re.compile(r"(\d+)/(tcp|udp)\s+open\s+([\w-]+)")
         for match in port_pattern.finditer(output):
@@ -1059,7 +1059,7 @@ def _parse_nmap_output(self, output: str) -> Dict[str, Any]:
                 "remediation": "Close unnecessary ports and use a firewall to restrict access.",
                 "metadata": {"port": port_str, "protocol": proto, "service": service}
             })
-        
+
         return {
             "open_ports": sorted(list(set(ports))),
             "services": sorted(list(set(services))),
diff --git a/testing/backend/unit/test_findings_redaction.py b/testing/backend/unit/test_findings_redaction.py
index e443604b..f90a778b 100644
--- a/testing/backend/unit/test_findings_redaction.py
+++ b/testing/backend/unit/test_findings_redaction.py
@@ -158,4 +158,4 @@ async def test_upsert_findings_redacts_description_before_insert(
         "redact_dict() was not called before INSERT"
     )
     assert secret not in row["remediation"]
-    assert "[REDACTED]" in row["description"]
\ No newline at end of file
+    assert "[REDACTED]" in row["description"]

From 3209bfacc3c231936fb7068a5aaabe000f3e0c7c Mon Sep 17 00:00:00 2001
From: Srejoye <Sahasrejoye2005@gmail.com>
Date: Fri, 29 May 2026 00:55:42 +0530
Subject: [PATCH 3/6] fix: redact parsed structure before structured_json write
 and findings insert

The previous fix redacted individual finding rows before INSERT but the
same unredacted parsed dict was written to tasks.structured_json first.
Secrets were still exposed through the task-result API path.

Fix: build redacted_findings list before any DB write and replace parsed['findings'] with it so that both the structured_json column on the tasks table and the findings table rows use the clean copy.

Same fix applied to _upsert_findings_and_report_from_scanner.

Adds regression assertion to test_upsert_findings_redacts_description_ before_insert that verifies tasks.structured_json does not contain the secret either.
---
 backend/secuscan/executor.py                  | 24 +++++++++++--------
 .../backend/unit/test_findings_redaction.py   | 16 ++++++++++++-
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/backend/secuscan/executor.py b/backend/secuscan/executor.py
index 87716d40..112b08b4 100644
--- a/backend/secuscan/executor.py
+++ b/backend/secuscan/executor.py
@@ -666,17 +666,20 @@ async def get_task_status(self, task_id: str) -> Optional[Dict]:
     async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id: str, target: str, status: str, output: str = ""):
         """Persist derived findings and report records into SQLite."""
         parsed = self._parse_results(plugin, output)
-        findings_data = parsed.get("findings", [])
 
-        # Update task with structured results
+        # Redact all findings before any DB write — this covers both the
+        # structured_json column on the tasks table AND the findings table rows.
+        redacted_findings = [redact_dict(f) for f in parsed.get("findings", [])]
+        parsed["findings"] = redacted_findings
+
+        # Update task with structured results — uses the already-redacted parsed dict
         await db.execute(
             "UPDATE tasks SET structured_json = ? WHERE id = ?",
             (json.dumps(parsed), task_id)
         )
 
-        # Insert findings
-        for finding in findings_data:
-            finding = redact_dict(finding)
+        # Insert findings — already redacted above
+        for finding in redacted_findings:
             u_id = str(uuid.uuid4()).replace("-", "")
             finding_id = f"finding:{task_id}:{u_id[:8]}"
 
@@ -751,18 +754,19 @@ async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id:
                 f"{plugin.name} Report",
                 "technical",
                 "ready" if status == TaskStatus.COMPLETED.value else "failed",
-                len(findings_data),
+                len(finding),
                 1,
             ),
         )
 
     async def _upsert_findings_and_report_from_scanner(self, db, task_id: str, scanner: Any, plugin_id: str, target: str, status: str, result: Dict[str, Any]):
         """Persist modular scanner results into findings, and reports."""
-        findings_data = result.get("findings", [])
+        # Redact all findings before any DB write
+        redacted_findings = [redact_dict(f) for f in result.get("findings", [])]
+        result["findings"] = redacted_findings
 
-        # Insert findings
-        for finding in findings_data:
-            finding = redact_dict(finding)
+        # Insert findings — already redacted above
+        for finding in redacted_findings:
             u_id = str(uuid.uuid4()).replace("-", "")
             finding_id = f"finding:{task_id}:{u_id[:8]}"
 
diff --git a/testing/backend/unit/test_findings_redaction.py b/testing/backend/unit/test_findings_redaction.py
index f90a778b..0ae900e4 100644
--- a/testing/backend/unit/test_findings_redaction.py
+++ b/testing/backend/unit/test_findings_redaction.py
@@ -147,7 +147,7 @@ async def test_upsert_findings_redacts_description_before_insert(
             output="",
         )
 
-    # Check the DB row — secret must not be present
+    # Check the findings table row — secret must not be present
     row = await db.fetchone(
         "SELECT description, remediation FROM findings WHERE task_id = ?",
         (task_id,),
@@ -159,3 +159,17 @@ async def test_upsert_findings_redacts_description_before_insert(
     )
     assert secret not in row["remediation"]
     assert "[REDACTED]" in row["description"]
+
+    # Regression: structured_json on the tasks table must also be redacted
+    task_row = await db.fetchone(
+        "SELECT structured_json FROM tasks WHERE id = ?",
+        (task_id,),
+    )
+    assert task_row is not None
+    structured = json.loads(task_row["structured_json"])
+    findings_in_structured = structured.get("findings", [])
+    assert len(findings_in_structured) > 0
+    assert secret not in findings_in_structured[0]["description"], (
+        "Secret found in tasks.structured_json — "
+        "parsed dict must be redacted before the structured_json write"
+    )

From 5404397fcede7a8c03be6b6a30f6db662fc1747c Mon Sep 17 00:00:00 2001
From: Srejoye <Sahasrejoye2005@gmail.com>
Date: Fri, 29 May 2026 01:03:06 +0530
Subject: [PATCH 4/6] fix: correct findings count variable and remove duplicate
 import

---
 backend/secuscan/executor.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/backend/secuscan/executor.py b/backend/secuscan/executor.py
index 112b08b4..27319443 100644
--- a/backend/secuscan/executor.py
+++ b/backend/secuscan/executor.py
@@ -22,7 +22,6 @@
 from .ratelimit import concurrent_limiter
 from .risk_scoring import compute_risk_score, compute_risk_factors
 
-
 def _parse_discovered_at(finding: dict) -> Optional[datetime]:
     """Extract and parse discovered_at from a finding dict, or return current UTC time."""
     raw = finding.get("discovered_at")
@@ -754,7 +753,7 @@ async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id:
                 f"{plugin.name} Report",
                 "technical",
                 "ready" if status == TaskStatus.COMPLETED.value else "failed",
-                len(finding),
+                len(redacted_findings),
                 1,
             ),
         )
@@ -842,7 +841,7 @@ async def _upsert_findings_and_report_from_scanner(self, db, task_id: str, scann
                 f"{scanner.name} Report",
                 "professional" if status == TaskStatus.COMPLETED.value else "failed",
                 "ready" if status == TaskStatus.COMPLETED.value else "failed",
-                len(findings_data),
+                len(redacted_findings),
                 2, # Professional reports are typically multi-page
             ),
         )

From 7dc92128bfd7ae05f4e34fb541927dccaa307077 Mon Sep 17 00:00:00 2001
From: Srejoye <Sahasrejoye2005@gmail.com>
Date: Sun, 31 May 2026 18:59:41 +0530
Subject: [PATCH 5/6] fix(executor): redact findings before all DB persistence
 paths
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Secrets embedded in finding fields were reaching the database through two unguarded paths:

1. _upsert_findings_and_report wrote tasks.structured_json from the raw parsed dict before any redaction occurred, then inserted unredacted finding rows.

2. _upsert_findings_and_report_from_scanner never called redact_dict at all and never wrote structured_json.

Fix: in both functions, build the redacted findings list first, replace parsed['findings'] / result['findings'] with that clean copy, then use it for every write — structured_json and the findings table rows both receive the same already-redacted data. Also adds the missing structured_json write to the scanner path. Adds test_findings_redaction.py (6 tests) including an integration regression that asserts structured_json never contains the raw secret.
---
 backend/secuscan/executor.py                  |  49 +++--
 .../backend/unit/test_findings_redaction.py   | 193 +++++++++---------
 2 files changed, 124 insertions(+), 118 deletions(-)

diff --git a/backend/secuscan/executor.py b/backend/secuscan/executor.py
index ed11b352..ae051c1a 100644
--- a/backend/secuscan/executor.py
+++ b/backend/secuscan/executor.py
@@ -22,6 +22,7 @@
 from .ratelimit import concurrent_limiter
 from .risk_scoring import compute_risk_score, compute_risk_factors
 
+
 def _parse_discovered_at(finding: dict) -> Optional[datetime]:
     """Extract and parse discovered_at from a finding dict, or return current UTC time."""
     raw = finding.get("discovered_at")
@@ -80,6 +81,7 @@ def extract_target(inputs: Dict[str, Any]) -> str:
         or inputs.get("domain")
         or ""
     )
+
 class TaskExecutor:
     """Executes security scanning tasks in isolated environments"""
 
@@ -430,11 +432,6 @@ async def execute_task(self, task_id: str):
             logger.info(f"Task {task_id} completed in {duration:.2f}s")
 
         except asyncio.CancelledError:
-            # CancelledError inherits from BaseException, not Exception —
-            # it bypasses the broad except below, so we handle it explicitly.
-            # Task.cancelled() returns False while the finally block is still
-            # executing, so this is the only reliable place to write the
-            # cancellation status to the DB.
             duration = (time.time() - start_time) if 'start_time' in locals() else 0
             await db.execute(
                 """
@@ -454,12 +451,11 @@ async def execute_task(self, task_id: str):
             )
             await self._broadcast(task_id, "status", TaskStatus.CANCELLED.value)
             await self._invalidate_cached_views()
-            raise  # let asyncio complete the cancellation
+            raise
 
         except Exception as e:
             logger.error(f"Task {task_id} failed: {e}", exc_info=True)
 
-            # Update task as failed
             duration = (time.time() - start_time) if 'start_time' in locals() else 0
             await db.execute(
                 """
@@ -490,8 +486,6 @@ async def execute_task(self, task_id: str):
                 task_id=task_id
             )
         finally:
-            # Always clean up: remove from the in-memory registry and
-            # release the concurrency slot regardless of how the task ended.
             self.running_tasks.pop(task_id, None)
             await concurrent_limiter.release(task_id)
 
@@ -544,7 +538,6 @@ async def read_stream():
                 return "".join(output_lines) + "\nTask timed out", -1
 
             except asyncio.CancelledError:
-                # Handle task cancellation by killing the subprocess
                 logger.warning(f"Task {task_id} cancelled. Killing process {process.pid}")
                 try:
                     process.kill()
@@ -624,7 +617,6 @@ async def cancel_task(self, task_id: str) -> bool:
         task = self.running_tasks[task_id]
         task.cancel()
 
-        # If docker is enabled, forcefully kill the sandbox container
         if settings.docker_enabled:
             try:
                 killer = await asyncio.create_subprocess_exec(
@@ -701,19 +693,18 @@ async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id:
         """Persist derived findings and report records into SQLite."""
         parsed = self._parse_results(plugin, output)
 
-        # Redact all findings before any DB write — this covers both the
-        # structured_json column on the tasks table AND the findings table rows.
-        redacted_findings = [redact_dict(f) for f in parsed.get("findings", [])]
-        parsed["findings"] = redacted_findings
+        # Redact all findings before any persistence path (structured_json AND findings table)
+        parsed["findings"] = [redact_dict(f) for f in parsed.get("findings", [])]
+        findings_data = parsed["findings"]
 
-        # Update task with structured results — uses the already-redacted parsed dict
+        # Update task with structured results (uses the already-redacted parsed dict)
         await db.execute(
             "UPDATE tasks SET structured_json = ? WHERE id = ?",
             (json.dumps(parsed), task_id)
         )
 
-        # Insert findings — already redacted above
-        for finding in redacted_findings:
+        # Insert findings
+        for finding in findings_data:
             u_id = str(uuid.uuid4()).replace("-", "")
             finding_id = f"finding:{task_id}:{u_id[:8]}"
 
@@ -788,19 +779,27 @@ async def _upsert_findings_and_report(self, db, task_id: str, plugin, plugin_id:
                 f"{plugin.name} Report",
                 "technical",
                 "ready" if status == TaskStatus.COMPLETED.value else "failed",
-                len(redacted_findings),
+                len(findings_data),
                 1,
             ),
         )
 
     async def _upsert_findings_and_report_from_scanner(self, db, task_id: str, scanner: Any, plugin_id: str, target: str, status: str, result: Dict[str, Any]):
         """Persist modular scanner results into findings, and reports."""
-        # Redact all findings before any DB write
+
+        # Redact all findings before any persistence path (structured_json AND findings table)
         redacted_findings = [redact_dict(f) for f in result.get("findings", [])]
         result["findings"] = redacted_findings
+        findings_data = redacted_findings
+
+        # Update task with redacted structured results
+        await db.execute(
+            "UPDATE tasks SET structured_json = ? WHERE id = ?",
+            (json.dumps(result), task_id)
+        )
 
-        # Insert findings — already redacted above
-        for finding in redacted_findings:
+        # Insert findings
+        for finding in findings_data:
             u_id = str(uuid.uuid4()).replace("-", "")
             finding_id = f"finding:{task_id}:{u_id[:8]}"
 
@@ -877,7 +876,7 @@ async def _upsert_findings_and_report_from_scanner(self, db, task_id: str, scann
                 "professional" if status == TaskStatus.COMPLETED.value else "failed",
                 "ready" if status == TaskStatus.COMPLETED.value else "failed",
                 len(redacted_findings),
-                2, # Professional reports are typically multi-page
+                2,
             ),
         )
 
@@ -1041,7 +1040,6 @@ def _parse_json_fallback_findings(self, plugin, parser_input: str) -> List[Dict[
             return findings
 
         if isinstance(data, dict):
-            # Common scanner shape: { "results": [...] }
             for list_key in ("results", "findings", "issues", "vulnerabilities"):
                 if isinstance(data.get(list_key), list):
                     for idx, item in enumerate(data[list_key], start=1):
@@ -1082,7 +1080,6 @@ def _parse_nmap_output(self, output: str) -> Dict[str, Any]:
         ports = []
         services = []
 
-        # Regex for open ports: 80/tcp open http
         port_pattern = re.compile(r"(\d+)/(tcp|udp)\s+open\s+([\w-]+)")
         for match in port_pattern.finditer(output):
             port_str, proto, service = match.groups()
@@ -1153,4 +1150,4 @@ async def _invalidate_cached_views(self):
 
 
 # Global executor instance
-executor = TaskExecutor()
+executor = TaskExecutor()
\ No newline at end of file
diff --git a/testing/backend/unit/test_findings_redaction.py b/testing/backend/unit/test_findings_redaction.py
index 0ae900e4..1619b7f4 100644
--- a/testing/backend/unit/test_findings_redaction.py
+++ b/testing/backend/unit/test_findings_redaction.py
@@ -1,166 +1,172 @@
 """
-Regression tests: finding description, remediation, and proof must be
-redacted before being written to the findings table.
+Unit and integration tests for findings redaction before DB persistence.
 
-redact_dict() already existed in redaction.py but was never called on
-findings before storage. These tests verify it is now called on both
-insert paths: _upsert_findings_and_report and
-_upsert_findings_and_report_from_scanner.
+Verifies that secrets are stripped from finding fields (description,
+remediation, proof) and from tasks.structured_json before any DB write.
+
+Run with:
+    ./testing/test_python.sh
+or directly:
+    pytest testing/backend/unit/test_findings_redaction.py -v
 """
 
+import asyncio
 import json
 import uuid
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-from backend.secuscan.redaction import redact_dict
+from backend.secuscan.redaction import redact_dict, REDACTED
+
+
+# ── Fake AWS key used across tests ────────────────────────────────────────────
 
+FAKE_AWS_KEY = "AKIAIOSFODNN7EXAMPLE"
 
-# ---------------------------------------------------------------------------
-# Unit: redact_dict handles finding shapes correctly
-# ---------------------------------------------------------------------------
+
+# ── Unit tests: redact_dict behaviour ─────────────────────────────────────────
 
 def test_redact_dict_redacts_aws_key_in_description():
+    """Secret in description and remediation is replaced with [REDACTED]."""
     finding = {
-        "title": "Open Port",
-        "description": "Found key AKIAIOSFODNN7EXAMPLE in config",
-        "remediation": "Remove AKIAIOSFODNN7EXAMPLE from source",
-        "severity": "high",
+        "title": "Exposed credential",
+        "category": "Secrets",
+        "severity": "critical",
+        "description": f"Found credential {FAKE_AWS_KEY} in config.",
+        "remediation": f"Rotate the key {FAKE_AWS_KEY} immediately.",
     }
     result = redact_dict(finding)
-    assert "AKIAIOSFODNN7EXAMPLE" not in result["description"]
-    assert "AKIAIOSFODNN7EXAMPLE" not in result["remediation"]
-    assert "[REDACTED]" in result["description"]
+    assert REDACTED in result["description"]
+    assert FAKE_AWS_KEY not in result["description"]
+    assert REDACTED in result["remediation"]
+    assert FAKE_AWS_KEY not in result["remediation"]
 
 
 def test_redact_dict_leaves_clean_finding_unchanged():
+    """Clean findings with no secrets pass through unmodified."""
     finding = {
-        "title": "Open Port 80",
-        "description": "Port 80 is open and running http",
-        "remediation": "Close unnecessary ports",
+        "title": "Open Port",
+        "category": "Network",
         "severity": "low",
+        "description": "Port 80 is open and running http.",
+        "remediation": "Close unnecessary ports.",
     }
     result = redact_dict(finding)
     assert result["description"] == finding["description"]
     assert result["remediation"] == finding["remediation"]
+    assert result["title"] == finding["title"]
 
 
 def test_redact_dict_handles_nested_metadata():
+    """Nested metadata dict is walked recursively; non-strings are untouched."""
     finding = {
-        "title": "Secret Found",
-        "description": "clean description",
+        "title": "Secret in metadata",
+        "severity": "high",
+        "description": "See metadata.",
         "metadata": {
-            "raw_value": "password=hunter2",
+            "raw_value": f"key={FAKE_AWS_KEY}",
             "port": 443,
+            "nested": {"token": f"Bearer {FAKE_AWS_KEY}"},
         },
     }
     result = redact_dict(finding)
-    assert "hunter2" not in result["metadata"]["raw_value"]
-    assert result["metadata"]["port"] == 443  # non-string left untouched
+    assert FAKE_AWS_KEY not in result["metadata"]["raw_value"]
+    assert result["metadata"]["port"] == 443
+    assert FAKE_AWS_KEY not in result["metadata"]["nested"]["token"]
 
 
 def test_redact_dict_handles_none_proof():
+    """None proof field does not raise and is returned as-is."""
     finding = {
         "title": "Finding",
-        "description": "clean",
+        "severity": "info",
+        "description": "No proof available.",
         "proof": None,
     }
-    # Must not raise — None is not a string
     result = redact_dict(finding)
     assert result["proof"] is None
 
 
 def test_redact_dict_handles_missing_keys_gracefully():
-    finding = {"title": "Minimal finding", "severity": "info"}
+    """Minimal finding dict with no description/remediation/proof works fine."""
+    finding = {"title": "Bare finding", "severity": "low"}
     result = redact_dict(finding)
-    assert result["title"] == "Minimal finding"
+    assert result["title"] == "Bare finding"
+    assert result["severity"] == "low"
 
 
-# ---------------------------------------------------------------------------
-# Integration: executor INSERT paths call redact_dict
-# ---------------------------------------------------------------------------
+# ── Integration test: DB persistence paths ────────────────────────────────────
 
 @pytest.mark.asyncio
-async def test_upsert_findings_redacts_description_before_insert(
-    setup_test_environment,
-):
+async def test_upsert_findings_redacts_description_before_insert():
     """
-    _upsert_findings_and_report must call redact_dict on each finding
-    before the INSERT so secrets never reach the DB.
+    After _upsert_findings_and_report is called:
+    1. The findings table row must not contain the raw secret.
+    2. tasks.structured_json must not contain the raw secret.
     """
     from backend.secuscan.executor import TaskExecutor
-    from backend.secuscan.database import init_db, get_db
     from backend.secuscan.config import settings
+    from backend.secuscan.database import get_db, init_db
+    from backend.secuscan.plugins import get_plugin_manager, init_plugins
 
-    await init_db(settings.database_path)
-    db = await get_db()
+    try:
+        pm = get_plugin_manager()
+    except RuntimeError:
+        await init_plugins(settings.plugins_dir)
+        pm = get_plugin_manager()
+
+    plugin_id = next(iter(pm._plugins))
+    plugin = pm.get_plugin(plugin_id)
 
     task_id = str(uuid.uuid4())
+    db = await get_db()
+
     await db.execute(
         """
-        INSERT INTO tasks (id, plugin_id, tool_name, target, inputs_json,
-                           status, consent_granted, safe_mode)
+        INSERT INTO tasks (id, plugin_id, tool_name, target, inputs_json, status, scan_phase, safe_mode)
         VALUES (?, ?, ?, ?, ?, ?, ?, ?)
         """,
-        (task_id, "nmap", "nmap", "127.0.0.1",
-         '{"target":"127.0.0.1"}', "completed", 1, 1),
+        (task_id, plugin_id, plugin.name, "example.com",
+         json.dumps({"target": "example.com"}), "running", "running_command", 0),
     )
 
-    executor = TaskExecutor()
+    tainted_finding = {
+        "title": "Exposed AWS key",
+        "category": "Secrets",
+        "severity": "critical",
+        "description": f"AWS key found: {FAKE_AWS_KEY}",
+        "remediation": f"Rotate {FAKE_AWS_KEY} immediately.",
+        "proof": f"curl response contained {FAKE_AWS_KEY}",
+        "metadata": {},
+    }
 
-    mock_plugin = MagicMock()
-    mock_plugin.name = "nmap"
-    mock_plugin.id = "nmap"
-    mock_plugin.category = "Network"
-    mock_plugin.output = {"parser": "builtin_nmap", "format": "text"}
-
-    # Finding whose description contains a secret
-    secret = "AKIAIOSFODNN7EXAMPLE"
-    findings_data = [
-        {
-            "title": "Open Port",
-            "category": "Network",
-            "severity": "low",
-            "description": f"Found credential {secret} in banner",
-            "remediation": f"Remove {secret}",
-            "proof": None,
-            "cvss": None,
-            "cve": None,
-            "metadata": {},
-        }
-    ]
-
-    with patch.object(executor, "_parse_results") as mock_parse, \
-         patch("backend.secuscan.executor.get_plugin_manager"):
-        mock_parse.return_value = {
-            "findings": findings_data,
-            "count": 1,
-        }
+    executor = TaskExecutor()
+    with patch.object(executor, "_parse_results", return_value={"findings": [tainted_finding]}):
         await executor._upsert_findings_and_report(
             db=db,
             task_id=task_id,
-            plugin=mock_plugin,
-            plugin_id="nmap",
-            target="127.0.0.1",
+            plugin=plugin,
+            plugin_id=plugin_id,
+            target="example.com",
             status="completed",
             output="",
         )
 
-    # Check the findings table row — secret must not be present
+    # Assert: findings table row is clean
     row = await db.fetchone(
-        "SELECT description, remediation FROM findings WHERE task_id = ?",
+        "SELECT description, remediation, proof FROM findings WHERE task_id = ?",
         (task_id,),
     )
-    assert row is not None
-    assert secret not in row["description"], (
-        f"Secret found in DB description: {row['description']!r}\n"
-        "redact_dict() was not called before INSERT"
-    )
-    assert secret not in row["remediation"]
-    assert "[REDACTED]" in row["description"]
-
-    # Regression: structured_json on the tasks table must also be redacted
+    assert row is not None, "No finding row was inserted"
+    assert FAKE_AWS_KEY not in (row["description"] or ""), \
+        f"Secret still in findings.description: {row['description']!r}"
+    assert FAKE_AWS_KEY not in (row["remediation"] or ""), \
+        f"Secret still in findings.remediation: {row['remediation']!r}"
+    assert FAKE_AWS_KEY not in (row["proof"] or ""), \
+        f"Secret still in findings.proof: {row['proof']!r}"
+
+    # Assert: structured_json is also clean
     task_row = await db.fetchone(
         "SELECT structured_json FROM tasks WHERE id = ?",
         (task_id,),
@@ -168,8 +174,11 @@ async def test_upsert_findings_redacts_description_before_insert(
     assert task_row is not None
     structured = json.loads(task_row["structured_json"])
     findings_in_structured = structured.get("findings", [])
-    assert len(findings_in_structured) > 0
-    assert secret not in findings_in_structured[0]["description"], (
-        "Secret found in tasks.structured_json — "
-        "parsed dict must be redacted before the structured_json write"
-    )
+    assert findings_in_structured, "structured_json contained no findings"
+    first = findings_in_structured[0]
+    assert FAKE_AWS_KEY not in (first.get("description") or ""), \
+        f"Secret still in structured_json finding description: {first.get('description')!r}"
+    assert FAKE_AWS_KEY not in (first.get("remediation") or ""), \
+        f"Secret still in structured_json finding remediation: {first.get('remediation')!r}"
+    assert FAKE_AWS_KEY not in (first.get("proof") or ""), \
+        f"Secret still in structured_json finding proof: {first.get('proof')!r}"
\ No newline at end of file

From 06ed0e67af6cc7852dce2eb9e40cf85d58118078 Mon Sep 17 00:00:00 2001
From: Srejoye <Sahasrejoye2005@gmail.com>
Date: Sun, 31 May 2026 19:06:25 +0530
Subject: [PATCH 6/6] fix(test): use pm.plugins instead of pm._plugins

---
 testing/backend/unit/test_findings_redaction.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testing/backend/unit/test_findings_redaction.py b/testing/backend/unit/test_findings_redaction.py
index 1619b7f4..f736fd97 100644
--- a/testing/backend/unit/test_findings_redaction.py
+++ b/testing/backend/unit/test_findings_redaction.py
@@ -116,7 +116,7 @@ async def test_upsert_findings_redacts_description_before_insert():
         await init_plugins(settings.plugins_dir)
         pm = get_plugin_manager()
 
-    plugin_id = next(iter(pm._plugins))
+    plugin_id = next(iter(pm.plugins))
     plugin = pm.get_plugin(plugin_id)
 
     task_id = str(uuid.uuid4())