From 955270053dced3003db7d5aaef0765d3784565f4 Mon Sep 17 00:00:00 2001 From: John McChesney TenEyck Jr Date: Sat, 23 May 2026 16:50:31 +0100 Subject: [PATCH 1/3] Harden local metadata secret guardrails --- docs/privacy-redaction-boundaries.md | 18 +++ scripts/check-detect-secrets.sh | 203 +++++++++++++++++---------- src/mailplus_intelligence/sqlite.py | 16 +++ tests/test_runtime.py | 11 ++ tests/test_secret_scan_script.py | 97 +++++++++++++ 5 files changed, 270 insertions(+), 75 deletions(-) create mode 100644 tests/test_secret_scan_script.py diff --git a/docs/privacy-redaction-boundaries.md b/docs/privacy-redaction-boundaries.md index 2bcb829..6dcefbe 100644 --- a/docs/privacy-redaction-boundaries.md +++ b/docs/privacy-redaction-boundaries.md @@ -115,6 +115,24 @@ Forbidden in the repo: - Production selected text caches or semantic output exports that have not passed promotion review. - Machine-local caches, logs, database files, or generated stores that may contain raw message content. +## Local Secret Scan Guardrail + +`scripts/check-detect-secrets.sh` is a fast baseline guardrail for CI and local preflight checks. It is not comprehensive DLP and does not replace operator review before live MailPlus integration, selected-text-cache work, or public release. + +The default CI mode scans tracked files only: + +```bash +bash scripts/check-detect-secrets.sh --all-files +``` + +Before staging or opening a PR that may have generated local artifacts, use the broader local mode: + +```bash +bash scripts/check-detect-secrets.sh --all-files-with-untracked +``` + +The broader mode includes untracked, non-ignored files and checks for common local leak shapes, including `.eml` and `.mbox` mailbox exports, MailPlus metadata/cache database filenames, and live OAuth, reset, magic-login, recovery, checkout, invoice, billing, or payment links with token-like query parameters. Synthetic documentation and fixtures should use reserved domains such as `example.com` and redaction markers such as `[REDACTED_TOKEN]` so the scanner can distinguish examples from live artifacts. + ## Fixture Redaction Rules Fixtures must be synthetic by default. If a real-world shape is needed to reproduce parsing behavior, reduce it to the minimum structure and redact before committing. diff --git a/scripts/check-detect-secrets.sh b/scripts/check-detect-secrets.sh index 897cf20..bec5841 100755 --- a/scripts/check-detect-secrets.sh +++ b/scripts/check-detect-secrets.sh @@ -1,81 +1,134 @@ - #!/usr/bin/env bash - set -euo pipefail - - mode="${1:-"--all-files"}" - ignore_globs=("scripts/check-detect-secrets.sh") - if [[ -f .detect-secrets-ignore ]]; then - while IFS= read -r ignore_glob; do - if [[ -z "$ignore_glob" ]]; then - continue - fi - if [[ "${ignore_glob:0:1}" == "#" ]]; then - continue - fi - ignore_globs+=("$ignore_glob") - done < .detect-secrets-ignore +#!/usr/bin/env bash +set -euo pipefail + +mode="${1:-"--all-files"}" +ignore_globs=("scripts/check-detect-secrets.sh") +if [[ -f .detect-secrets-ignore ]]; then + while IFS= read -r ignore_glob; do + if [[ -z "$ignore_glob" ]]; then + continue + fi + if [[ "${ignore_glob:0:1}" == "#" ]]; then + continue + fi + ignore_globs+=("$ignore_glob") + done < .detect-secrets-ignore +fi + +should_skip_file() { + local candidate="$1" + local ignore_glob + for ignore_glob in "${ignore_globs[@]}"; do + case "$candidate" in + $ignore_glob) + return 0 + ;; + esac + done + return 1 +} + +append_tracked_files() { + while IFS= read -r -d '' file; do + files+=("$file") + done < <(git ls-files -z) +} + +append_staged_files() { + while IFS= read -r -d '' file; do + files+=("$file") + done < <(git diff --cached --name-only --diff-filter=ACMR -z) +} + +append_untracked_files() { + while IFS= read -r -d '' file; do + files+=("$file") + done < <(git ls-files --others --exclude-standard -z) +} + +files=() +case "$mode" in + --staged) + append_staged_files + ;; + --all-files) + append_tracked_files + ;; + --all-files-with-untracked | --local) + append_tracked_files + append_untracked_files + ;; + *) + echo "Usage: $0 [--all-files|--staged|--all-files-with-untracked|--local]" >&2 + exit 2 + ;; +esac + +if [[ "${#files[@]}" -eq 0 ]]; then + echo "No files to scan." + exit 0 +fi + +content_patterns=( + 'ghp_' + 'github_pat_' + 'sk-live-' + 'sk-proj-' + 'AKIA[0-9A-Z]{16}' + 'BEGIN (RSA|OPENSSH|EC) PRIVATE KEY' + 'ANTHROPIC_API_KEY=' + 'OPENAI_API_KEY=' + 'SUDO_PASS=' + 'BW_SESSION=' +) + +mailplus_content_patterns=( + 'https?://[^[:space:])>"]*(oauth|authorize|token)[^[:space:])>"]*(token|code|key|secret|signature|sig|jwt|session|auth)=' + 'https?://[^[:space:])>"]*(reset|recovery|recover-password|password-reset|magic|login)[^[:space:])>"]*(token|code|key|secret|signature|sig|jwt|session|auth)=' + 'https?://[^[:space:])>"]*(pay|payment|checkout|invoice|billing)[^[:space:])>"]*(token|code|key|secret|signature|sig|jwt|session|auth)=' +) + +mailplus_path_patterns=( + '\.eml$' + '\.mbox$' + '(^|/)(mailplus|selected-text|semantic|metadata)[^/]*\.(db|sqlite|sqlite3)$' + '(^|/)(mailplus|selected-text|semantic|metadata)[^/]*\.(cache|log)$' +) + +allowed_synthetic_pattern='(example\.(com|org|net|test)|\[REDACTED_[A-Z_]+\])' + +tmp_file="$(mktemp)" +trap 'rm -f "$tmp_file"' EXIT + +for file in "${files[@]}"; do + if [[ ! -f "$file" ]] || should_skip_file "$file"; then + continue + fi + printf '%s\n' "$file" >>"$tmp_file" +done + +failed=0 +while IFS= read -r file; do + for pattern in "${mailplus_path_patterns[@]}"; do + if [[ "$file" =~ $pattern ]]; then + echo "Potential MailPlus export/cache artifact '$pattern' found at $file" >&2 + failed=1 fi + done - should_skip_file() { - local candidate="$1" - local ignore_glob - for ignore_glob in "${ignore_globs[@]}"; do - case "$candidate" in - $ignore_glob) - return 0 - ;; - esac - done - return 1 - } - - files=() - if [[ "$mode" == "--staged" ]]; then - while IFS= read -r -d '' file; do - files+=("$file") - done < <(git diff --cached --name-only --diff-filter=ACMR -z) - else - while IFS= read -r -d '' file; do - files+=("$file") - done < <(git ls-files -z) + for pattern in "${content_patterns[@]}"; do + if grep -E -n "$pattern" "$file" >/dev/null 2>&1; then + echo "Potential secret pattern '$pattern' found in $file" >&2 + failed=1 fi + done - if [[ "${#files[@]}" -eq 0 ]]; then - echo "No files to scan." - exit 0 + for pattern in "${mailplus_content_patterns[@]}"; do + if grep -E -n "$pattern" "$file" | grep -Ev "$allowed_synthetic_pattern" >/dev/null 2>&1; then + echo "Potential MailPlus link leak pattern '$pattern' found in $file" >&2 + failed=1 fi + done +done <"$tmp_file" - patterns=( - 'ghp_' - 'github_pat_' - 'sk-live-' - 'sk-proj-' - 'AKIA[0-9A-Z]{16}' - 'BEGIN (RSA|OPENSSH|EC) PRIVATE KEY' - 'ANTHROPIC_API_KEY=' - 'OPENAI_API_KEY=' - 'SUDO_PASS=' - 'BW_SESSION=' - ) - - tmp_file="$(mktemp)" - trap 'rm -f "$tmp_file"' EXIT - - for file in "${files[@]}"; do - if [[ ! -f "$file" ]] || should_skip_file "$file"; then - continue - fi - printf '%s -' "$file" >>"$tmp_file" - done - - failed=0 - while IFS= read -r file; do - for pattern in "${patterns[@]}"; do - if grep -E -n "$pattern" "$file" >/dev/null 2>&1; then - echo "Potential secret pattern '$pattern' found in $file" >&2 - failed=1 - fi - done - done <"$tmp_file" - - exit "$failed" +exit "$failed" diff --git a/src/mailplus_intelligence/sqlite.py b/src/mailplus_intelligence/sqlite.py index 715b8b8..0e8a4a0 100644 --- a/src/mailplus_intelligence/sqlite.py +++ b/src/mailplus_intelligence/sqlite.py @@ -2,14 +2,30 @@ from __future__ import annotations +import os import sqlite3 from pathlib import Path +def _create_owner_only_database_file(database: Path) -> None: + if database.exists(): + return + + try: + file_descriptor = os.open(database, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600) + except FileExistsError: + return + + os.close(file_descriptor) + + def connect_sqlite(database: str | Path = ":memory:") -> sqlite3.Connection: """Open a SQLite connection with project defaults for index work.""" database_name = str(database) + if database_name != ":memory:": + _create_owner_only_database_file(Path(database)) + connection = sqlite3.connect(database_name) connection.row_factory = sqlite3.Row connection.execute("PRAGMA foreign_keys = ON") diff --git a/tests/test_runtime.py b/tests/test_runtime.py index b75b6a2..6e78fe0 100644 --- a/tests/test_runtime.py +++ b/tests/test_runtime.py @@ -1,5 +1,6 @@ from __future__ import annotations +import stat import tempfile import unittest from pathlib import Path @@ -40,6 +41,16 @@ def test_sqlite_connection_supports_index_style_round_trip(self) -> None: finally: connection.close() + def test_sqlite_file_is_owner_only_when_created(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + database = Path(tmpdir) / "mailplus-intelligence.db" + connection = connect_sqlite(database) + try: + mode = stat.S_IMODE(database.stat().st_mode) + self.assertEqual(mode, 0o600) + finally: + connection.close() + if __name__ == "__main__": unittest.main() diff --git a/tests/test_secret_scan_script.py b/tests/test_secret_scan_script.py new file mode 100644 index 0000000..69c7fc3 --- /dev/null +++ b/tests/test_secret_scan_script.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +import subprocess +import tempfile +import unittest +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[1] +SCRIPT = REPO_ROOT / "scripts" / "check-detect-secrets.sh" + + +class SecretScanScriptTests(unittest.TestCase): + def setUp(self) -> None: + self.tmpdir_context = tempfile.TemporaryDirectory() + self.workspace = Path(self.tmpdir_context.name) + subprocess.run(["git", "init"], cwd=self.workspace, check=True, capture_output=True) + subprocess.run( + ["git", "config", "user.email", "test@example.com"], + cwd=self.workspace, + check=True, + capture_output=True, + ) + subprocess.run( + ["git", "config", "user.name", "MailPlus Test"], + cwd=self.workspace, + check=True, + capture_output=True, + ) + + def tearDown(self) -> None: + self.tmpdir_context.cleanup() + + def run_scan(self, mode: str) -> subprocess.CompletedProcess[str]: + return subprocess.run( + ["bash", str(SCRIPT), mode], + cwd=self.workspace, + check=False, + capture_output=True, + text=True, + ) + + def write_file(self, relative_path: str, content: str) -> Path: + path = self.workspace / relative_path + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content, encoding="utf-8") + return path + + def git_add(self, *relative_paths: str) -> None: + subprocess.run(["git", "add", *relative_paths], cwd=self.workspace, check=True) + + def test_staged_mode_scans_staged_files(self) -> None: + secret_shape = "OPENAI" + "_API_KEY=not-a-real-test-value\n" + self.write_file("candidate.txt", secret_shape) + self.git_add("candidate.txt") + + result = self.run_scan("--staged") + + self.assertNotEqual(result.returncode, 0) + self.assertIn("OPENAI" + "_API_KEY=", result.stderr) + + def test_local_mode_includes_untracked_non_ignored_files(self) -> None: + self.write_file("tracked.txt", "ordinary synthetic content\n") + self.git_add("tracked.txt") + self.write_file("local/mailplus-export.eml", "From: sender@example.com\n") + + ci_result = self.run_scan("--all-files") + local_result = self.run_scan("--all-files-with-untracked") + + self.assertEqual(ci_result.returncode, 0, ci_result.stderr) + self.assertNotEqual(local_result.returncode, 0) + self.assertIn("mailplus-export.eml", local_result.stderr) + + def test_mailplus_link_leak_is_detected(self) -> None: + live_link = "https://mail.vendor.invalid/reset?" + "token=abc123" + self.write_file("message.txt", f"Reset at {live_link}\n") + self.git_add("message.txt") + + result = self.run_scan("--all-files") + + self.assertNotEqual(result.returncode, 0) + self.assertIn("MailPlus link leak", result.stderr) + + def test_synthetic_redacted_doc_link_is_allowed(self) -> None: + self.write_file( + "docs/example.md", + "Synthetic reset URL: https://example.com/reset?token=[REDACTED_TOKEN]\n", + ) + self.git_add("docs/example.md") + + result = self.run_scan("--all-files") + + self.assertEqual(result.returncode, 0, result.stderr) + + +if __name__ == "__main__": + unittest.main() From 4d6ecb53cbed5159f940c836054a630d4b19a8c8 Mon Sep 17 00:00:00 2001 From: John McChesney TenEyck Jr Date: Sat, 23 May 2026 16:53:28 +0100 Subject: [PATCH 2/3] Finish R1 public security polish --- CONTRIBUTING.md | 7 +++++++ SECURITY.md | 6 ++++++ fixtures/README.md | 9 +++++++++ scripts/check-detect-secrets.sh | 4 ++-- tests/test_secret_scan_script.py | 3 +++ 5 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 fixtures/README.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ecbaa16..0d3d504 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -18,6 +18,13 @@ Run the fast validation gate before opening a pull request: bash scripts/ci/run-fast-checks.sh ``` +Before staging local MailPlus fixtures, generated metadata, or cache-related +changes, run the broader local scan so untracked non-ignored files are checked: + +```bash +bash scripts/check-detect-secrets.sh --all-local +``` + ## Pull Requests - Keep changes focused and reviewable. diff --git a/SECURITY.md b/SECURITY.md index 5e74ee3..7112b64 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -14,6 +14,12 @@ available for this repository. If private reporting is unavailable, contact the repository owner with a minimal description of the issue and the affected version or commit. +Expected response: the maintainer will acknowledge a complete report within +seven calendar days and will provide a remediation or disclosure plan once the +impact is understood. Please include enough reproduction detail to verify the +issue without including live credentials, raw mail, mailbox exports, or other +private payloads. + ## Data Handling Scope MailPlus Intelligence must not store raw mail bodies, attachment payloads, diff --git a/fixtures/README.md b/fixtures/README.md new file mode 100644 index 0000000..4c6c38e --- /dev/null +++ b/fixtures/README.md @@ -0,0 +1,9 @@ +# Fixtures + +All fixtures in this tree are synthetic, metadata-only or derived-output examples. Do not add real MailPlus exports, raw message bodies, attachment payloads, credentials, live links, personal names, real domains, generated databases, caches, or logs. + +Before extending a fixture corpus, review `docs/privacy-redaction-boundaries.md`, use reserved domains such as `example.com` or `example.test`, and run: + +```bash +bash scripts/check-detect-secrets.sh --all-local +``` diff --git a/scripts/check-detect-secrets.sh b/scripts/check-detect-secrets.sh index bec5841..2f2dd6f 100755 --- a/scripts/check-detect-secrets.sh +++ b/scripts/check-detect-secrets.sh @@ -54,12 +54,12 @@ case "$mode" in --all-files) append_tracked_files ;; - --all-files-with-untracked | --local) + --all-files-with-untracked | --all-local | --local) append_tracked_files append_untracked_files ;; *) - echo "Usage: $0 [--all-files|--staged|--all-files-with-untracked|--local]" >&2 + echo "Usage: $0 [--all-files|--staged|--all-files-with-untracked|--all-local|--local]" >&2 exit 2 ;; esac diff --git a/tests/test_secret_scan_script.py b/tests/test_secret_scan_script.py index 69c7fc3..c432a97 100644 --- a/tests/test_secret_scan_script.py +++ b/tests/test_secret_scan_script.py @@ -66,10 +66,13 @@ def test_local_mode_includes_untracked_non_ignored_files(self) -> None: ci_result = self.run_scan("--all-files") local_result = self.run_scan("--all-files-with-untracked") + alias_result = self.run_scan("--all-local") self.assertEqual(ci_result.returncode, 0, ci_result.stderr) self.assertNotEqual(local_result.returncode, 0) self.assertIn("mailplus-export.eml", local_result.stderr) + self.assertNotEqual(alias_result.returncode, 0) + self.assertIn("mailplus-export.eml", alias_result.stderr) def test_mailplus_link_leak_is_detected(self) -> None: live_link = "https://mail.vendor.invalid/reset?" + "token=abc123" From f3c5525ffaa41384fd43e9af7821ca98f3850fce Mon Sep 17 00:00:00 2001 From: Ares Date: Sun, 24 May 2026 20:20:17 +0000 Subject: [PATCH 3/3] Unblock public PR fast CI --- .github/workflows/pr-fast-ci.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr-fast-ci.yml b/.github/workflows/pr-fast-ci.yml index a4f8f45..2b73f44 100644 --- a/.github/workflows/pr-fast-ci.yml +++ b/.github/workflows/pr-fast-ci.yml @@ -23,7 +23,7 @@ defaults: jobs: changes: name: Detect Relevant Changes - runs-on: ['self-hosted', 'synology', 'shell-only', 'public'] + runs-on: ubuntu-latest outputs: app: ${{ steps.filter.outputs.app }} ci: ${{ steps.filter.outputs.ci }} @@ -63,7 +63,7 @@ jobs: fast-checks: name: Fast Checks - runs-on: ['self-hosted', 'synology', 'shell-only', 'public'] + runs-on: ubuntu-latest timeout-minutes: 15 needs: changes if: >- @@ -74,12 +74,16 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} + - uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + - name: Run fast checks run: bash scripts/ci/run-fast-checks.sh validate-secrets: name: Validate Secrets - runs-on: ['self-hosted', 'synology', 'shell-only', 'public'] + runs-on: ubuntu-latest timeout-minutes: 10 if: github.event.pull_request.draft == false steps: @@ -91,7 +95,7 @@ jobs: ci-gate: name: CI Gate - runs-on: ['self-hosted', 'synology', 'shell-only', 'public'] + runs-on: ubuntu-latest if: always() needs: - changes