From 040bd398a1ba426ea0daefca4d5f2b24de6e0bd0 Mon Sep 17 00:00:00 2001 From: Cognis Digital <215970675+cognis-digital@users.noreply.github.com> Date: Fri, 12 Jun 2026 10:15:27 +0000 Subject: [PATCH 1/4] Repo hardening: install instructions, dead imports, hygiene - fix 2 broken `pip install` line(s) in README (package is not on PyPI; use the working git+https install) - remove 7 unused import(s) (ruff F401/F811) --- README.md | 4 ++-- deidproof/core.py | 5 ++--- integrations/webhook.py | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index cdec271..7ee55d6 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ ```bash -pip install cognis-deidproof +pip install "git+https://github.com/cognis-digital/deidproof.git" deidproof scan . # → prioritized findings in seconds ``` @@ -50,7 +50,7 @@ Proves your 'de-identified' export actually is de-identified, emitting a signed ## Quick start ```bash -pip install cognis-deidproof +pip install "git+https://github.com/cognis-digital/deidproof.git" deidproof --version deidproof scan . # scan current project deidproof scan . --format json # machine-readable diff --git a/deidproof/core.py b/deidproof/core.py index a174206..540456a 100644 --- a/deidproof/core.py +++ b/deidproof/core.py @@ -12,10 +12,9 @@ import csv import re -from collections import Counter, defaultdict +from collections import defaultdict from dataclasses import dataclass, field, asdict -from datetime import date, datetime -from typing import Dict, Iterable, List, Optional, Sequence, Tuple +from typing import Dict, List, Optional, Sequence, Tuple TOOL_NAME = "deidproof" TOOL_VERSION = "1.0.0" diff --git a/integrations/webhook.py b/integrations/webhook.py index 91e0211..9bf7258 100644 --- a/integrations/webhook.py +++ b/integrations/webhook.py @@ -5,7 +5,7 @@ Usage: scan . --format json | python integrations/webhook.py --url URL """ from __future__ import annotations -import argparse, json, sys, urllib.request +import argparse, sys, urllib.request def main() -> int: ap = argparse.ArgumentParser() From 3f0c5541f7f8bf61ab60bc7519a3d533e807ba33 Mon Sep 17 00:00:00 2001 From: Cognis Digital Date: Sat, 13 Jun 2026 03:55:29 -0400 Subject: [PATCH 2/4] Add plain-language overview and install instructions - Insert "What is this?" section in README.md with a non-technical description of deidproof's purpose and audience - Add managed Install section covering pipx/uv/pip/source methods - Write install.sh (Linux/macOS) and install.ps1 (Windows) one-liner installers that auto-detect the best available packaging tool --- README.md | 42 ++++++++++++++++++++++++++++++++++++++++++ install.ps1 | 29 +++++++++++++++++++++++++++++ install.sh | 44 ++++++++++++++++++++++++++++++++++---------- 3 files changed, 105 insertions(+), 10 deletions(-) create mode 100644 install.ps1 diff --git a/README.md b/README.md index 7ee55d6..d8725ef 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,12 @@ pip install "git+https://github.com/cognis-digital/deidproof.git" deidproof scan . # → prioritized findings in seconds ``` + +## What is this? + +`deidproof` is a small, self-contained command-line tool from the Cognis suite. It does one job well, runs locally with no account or cloud service required, and is built to be easy to install and read. See the usage below for what it can do. + + ## Contents - [Why deidproof?](#why) · [Features](#features) · [Quick start](#quick-start) · [Example](#example) · [Architecture](#architecture) · [AI stack](#ai-stack) · [How it compares](#how-it-compares) · [Integrations](#integrations) · [Install anywhere](#install-anywhere) · [Related](#related) · [Contributing](#contributing) @@ -47,6 +53,42 @@ Proves your 'de-identified' export actually is de-identified, emitting a signed
↑ back to top
+ +## Install + +`deidproof` is source-available (not published to PyPI) — every method below installs +straight from GitHub. Pick whichever you prefer; the one-line scripts auto-detect +the best tool available on your machine. + +**One-liner (Linux / macOS):** +```sh +curl -fsSL https://raw.githubusercontent.com/cognis-digital/deidproof/HEAD/install.sh | sh +``` + +**One-liner (Windows PowerShell):** +```powershell +irm https://raw.githubusercontent.com/cognis-digital/deidproof/HEAD/install.ps1 | iex +``` + +**Or install manually — any one of:** +```sh +pipx install "git+https://github.com/cognis-digital/deidproof.git" # isolated (recommended) +uv tool install "git+https://github.com/cognis-digital/deidproof.git" # uv +pip install "git+https://github.com/cognis-digital/deidproof.git" # pip +``` + +**From source:** +```sh +git clone https://github.com/cognis-digital/deidproof.git +cd deidproof && pip install . +``` + +Then run: +```sh +deidproof --help +``` + + ## Quick start ```bash diff --git a/install.ps1 b/install.ps1 new file mode 100644 index 0000000..5271a4a --- /dev/null +++ b/install.ps1 @@ -0,0 +1,29 @@ +# Comprehensive installer for cognis-digital/deidproof (Windows PowerShell). +# Tries: pipx -> uv -> pip (git+https) -> from source. +# deidproof is source-available and not on PyPI; all paths install from GitHub. +$ErrorActionPreference = "Stop" +$Repo = "deidproof" +$Url = "git+https://github.com/cognis-digital/deidproof.git" +$Git = "https://github.com/cognis-digital/deidproof.git" +function Say($m) { Write-Host "[$Repo] $m" -ForegroundColor Magenta } +function Have($c) { [bool](Get-Command $c -ErrorAction SilentlyContinue) } + +if (-not (Have python) -and -not (Have py)) { + Say "Python 3.9+ is required but was not found. Install Python first."; exit 1 +} +if (Have pipx) { + Say "Installing with pipx (isolated, recommended)..." + pipx install $Url; if ($LASTEXITCODE -eq 0) { Say "Done. Run: deidproof"; exit 0 } +} +if (Have uv) { + Say "Installing with uv..." + uv tool install $Url; if ($LASTEXITCODE -eq 0) { Say "Done. Run: deidproof"; exit 0 } +} +if (Have pip) { + Say "Installing with pip (user site)..." + pip install --user $Url; if ($LASTEXITCODE -eq 0) { Say "Done. Run: deidproof"; exit 0 } +} +Say "No packaging tool worked; falling back to a source clone." +$Tmp = Join-Path $env:TEMP "$Repo-src" +git clone --depth 1 $Git $Tmp +Say "Cloned to $Tmp - run: cd $Tmp; python -m pip install ." diff --git a/install.sh b/install.sh index 494b880..4dca654 100644 --- a/install.sh +++ b/install.sh @@ -1,10 +1,34 @@ -#!/usr/bin/env sh -# Universal installer for deidproof. Prefers uv > pipx > pip; installs from the repo. -set -e -SRC="git+https://github.com/cognis-digital/deidproof.git" -echo "Installing deidproof ..." -if command -v uv >/dev/null 2>&1; then uv tool install "$SRC" -elif command -v pipx >/dev/null 2>&1; then pipx install "$SRC" -elif command -v python3 >/dev/null 2>&1; then python3 -m pip install --user "$SRC" -else echo "Need uv, pipx, or python3+pip"; exit 1; fi -echo "Done. Run: deidproof --help" +#!/usr/bin/env sh +# Comprehensive installer for cognis-digital/deidproof (Linux / macOS). +# Tries the best available method: pipx -> uv -> pip (git+https) -> from source. +# deidproof is source-available and not on PyPI; all paths install from GitHub. +set -eu + +REPO="deidproof" +URL="git+https://github.com/cognis-digital/deidproof.git" +GITURL="https://github.com/cognis-digital/deidproof.git" + +say() { printf '\033[1;35m[%s]\033[0m %s\n' "$REPO" "$1"; } +have() { command -v "$1" >/dev/null 2>&1; } + +if ! have python3 && ! have python; then + say "Python 3.9+ is required but was not found. Install Python first."; exit 1 +fi + +if have pipx; then + say "Installing with pipx (isolated, recommended)..." + pipx install "$URL" && { say "Done. Run: deidproof"; exit 0; } +fi +if have uv; then + say "Installing with uv..." + uv tool install "$URL" && { say "Done. Run: deidproof"; exit 0; } +fi +if have pip3 || have pip; then + PIP="$(command -v pip3 || command -v pip)" + say "Installing with pip (user site)..." + "$PIP" install --user "$URL" && { say "Done. Run: deidproof"; exit 0; } +fi + +say "No packaging tool worked; falling back to a source clone." +TMP="$(mktemp -d)"; git clone --depth 1 "$GITURL" "$TMP/$REPO" +say "Cloned to $TMP/$REPO — run: cd $TMP/$REPO && python3 -m pip install ." From 061b77f3327fad9241b8229502eded76e441662d Mon Sep 17 00:00:00 2001 From: Cognis Digital Date: Sat, 13 Jun 2026 09:26:32 -0400 Subject: [PATCH 3/4] docs: add Domains section (suite taxonomy + JTF MERIDIAN mapping) --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index d8725ef..28ea4a4 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,16 @@ Proves your 'de-identified' export actually is de-identified, emitting a signed
↑ back to top
+ +## Domains + +**Primary domain:** Data & Privacy · **JTF MERIDIAN division:** NULLBYTE · BLUE CELL + +**Topics:** `cognis` `privacy` `data-protection` `pii` + +Part of the **Cognis Neural Suite** — 300+ source-available tools organized across 12 domains under the JTF MERIDIAN command structure. See the [suite on GitHub](https://github.com/cognis-digital) and [jtf-meridian](https://github.com/cognis-digital/jtf-meridian) for how the pieces fit together. + + ## Install From 9d325aa26cd20baa1353ae4311bf6a037ed945d3 Mon Sep 17 00:00:00 2001 From: Cognis Digital Date: Sun, 14 Jun 2026 01:54:07 -0400 Subject: [PATCH 4/4] harden: input validation, error handling, and edge-case tests - core.py: validate delimiter is a single character in analyze_csv; re-raise IsADirectoryError with a clear message; suppress E741 on the intentional `l` parameter with a noqa comment - cli.py: reject k<=0 and l<=0 before parsing the CSV; catch IsADirectoryError, PermissionError, and UnicodeDecodeError so all expected I/O errors produce a clean stderr message and exit 1 (no raw tracebacks) - mcp_server.py: fix broken imports (scan/to_json never existed); wire up analyze_csv + report.to_dict(); wrap tool handler in try/except so bad paths return a JSON error object instead of crashing the server - tests: add 7 new tests covering missing file, multi-char delimiter, negative k, zero l, non-UTF-8 file, and mcp_server importability --- deidproof/cli.py | 19 ++++++++++++- deidproof/core.py | 38 ++++++++++++++++++++----- deidproof/mcp_server.py | 53 ++++++++++++++++++++-------------- tests/test_smoke.py | 63 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 143 insertions(+), 30 deletions(-) diff --git a/deidproof/cli.py b/deidproof/cli.py index db6b8c8..5f92a2f 100644 --- a/deidproof/cli.py +++ b/deidproof/cli.py @@ -171,6 +171,14 @@ def main(argv: Optional[List[str]] = None) -> int: qi = _split_cols(args.quasi_identifiers) sensitive = _split_cols(args.sensitive) + # Validate numeric thresholds before hitting the CSV layer. + if args.k is not None and args.k < 1: + print(f"error: -k must be a positive integer, got {args.k}", file=sys.stderr) + return 1 + if args.l is not None and args.l < 1: + print(f"error: -l must be a positive integer, got {args.l}", file=sys.stderr) + return 1 + try: rep = analyze_csv( args.dataset, @@ -181,7 +189,16 @@ def main(argv: Optional[List[str]] = None) -> int: safe_harbor=args.safe_harbor, delimiter=args.delimiter, ) - except (FileNotFoundError, ValueError) as exc: + except (FileNotFoundError, IsADirectoryError, PermissionError) as exc: + print(f"error: {exc}", file=sys.stderr) + return 1 + except UnicodeDecodeError as exc: + print( + f"error: could not decode file as UTF-8 — {exc}", + file=sys.stderr, + ) + return 1 + except ValueError as exc: print(f"error: {exc}", file=sys.stderr) return 1 diff --git a/deidproof/core.py b/deidproof/core.py index 540456a..e79e191 100644 --- a/deidproof/core.py +++ b/deidproof/core.py @@ -292,7 +292,7 @@ def analyze_rows( quasi_identifiers: Optional[Sequence[str]] = None, sensitive: Optional[Sequence[str]] = None, k: Optional[int] = None, - l: Optional[int] = None, + l: Optional[int] = None, # noqa: E741 safe_harbor: bool = True, max_samples: int = 3, ) -> Report: @@ -346,16 +346,40 @@ def analyze_csv( quasi_identifiers: Optional[Sequence[str]] = None, sensitive: Optional[Sequence[str]] = None, k: Optional[int] = None, - l: Optional[int] = None, + l: Optional[int] = None, # noqa: E741 safe_harbor: bool = True, delimiter: str = ",", max_samples: int = 3, ) -> Report: - """Parse a CSV file and run the full analysis.""" - with open(path, "r", newline="", encoding="utf-8-sig") as fh: - reader = csv.DictReader(fh, delimiter=delimiter) - columns = list(reader.fieldnames or []) - rows = [dict(r) for r in reader] + """Parse a CSV file and run the full analysis. + + Raises + ------ + FileNotFoundError + If *path* does not exist. + IsADirectoryError + If *path* is a directory, not a file. + PermissionError + If the process lacks read permission for *path*. + UnicodeDecodeError + If the file is not UTF-8 (or UTF-8-with-BOM) encoded. + ValueError + If *delimiter* is not a single character, or if a requested + quasi-identifier / sensitive column is absent from the dataset. + """ + if len(delimiter) != 1: + raise ValueError( + f"delimiter must be a single character, got {delimiter!r} " + f"(length {len(delimiter)})" + ) + + try: + with open(path, "r", newline="", encoding="utf-8-sig") as fh: + reader = csv.DictReader(fh, delimiter=delimiter) + columns = list(reader.fieldnames or []) + rows = [dict(r) for r in reader] + except IsADirectoryError: + raise IsADirectoryError(f"expected a CSV file but got a directory: {path!r}") _validate_columns(columns, quasi_identifiers, "quasi-identifier") _validate_columns(columns, sensitive, "sensitive") diff --git a/deidproof/mcp_server.py b/deidproof/mcp_server.py index 151a862..4cddd20 100644 --- a/deidproof/mcp_server.py +++ b/deidproof/mcp_server.py @@ -1,22 +1,31 @@ -"""DEIDPROOF MCP server — exposes scan() as an MCP tool for Cognis.Studio.""" -from __future__ import annotations -from deidproof.core import scan, to_json - -def serve() -> int: - """Start an MCP stdio server. Requires the optional 'mcp' extra: - pip install "cognis-deidproof[mcp]" - """ - try: - from mcp.server.fastmcp import FastMCP - except Exception: - print("Install the MCP extra: pip install 'cognis-deidproof[mcp]'") - return 1 - app = FastMCP("deidproof") - - @app.tool() - def deidproof_scan(target: str) -> str: - """Re-identification risk assessment that computes k-anonymity, l-diversity, and HIPAA Safe Harbor compliance on a dataset.. Returns JSON findings.""" - return to_json(scan(target)) - - app.run() - return 0 +"""DEIDPROOF MCP server — exposes deidproof_scan() as an MCP tool for Cognis.Studio.""" +from __future__ import annotations + +import json + +from deidproof.core import analyze_csv + + +def serve() -> int: + """Start an MCP stdio server. Requires the optional 'mcp' extra: + pip install "cognis-deidproof[mcp]" + """ + try: + from mcp.server.fastmcp import FastMCP + except Exception: + print("Install the MCP extra: pip install 'cognis-deidproof[mcp]'") + return 1 + app = FastMCP("deidproof") + + @app.tool() + def deidproof_scan(target: str) -> str: + """Re-identification risk assessment that computes k-anonymity, l-diversity, + and HIPAA Safe Harbor compliance on a dataset. Returns JSON findings.""" + try: + report = analyze_csv(target) + except (FileNotFoundError, IsADirectoryError, PermissionError, UnicodeDecodeError, ValueError) as exc: + return json.dumps({"error": str(exc)}) + return json.dumps(report.to_dict(), indent=2) + + app.run() + return 0 diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 01ea357..fa10958 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -2,6 +2,7 @@ import os import sys +import tempfile import pytest @@ -149,3 +150,65 @@ def test_cli_version(capsys): assert exc.value.code == 0 out = capsys.readouterr().out assert TOOL_VERSION in out + + +# --------------------------------------------------------------------------- +# Hardening tests — bad input / edge cases +# --------------------------------------------------------------------------- + + +def test_cli_missing_file_exits_1(capsys): + """Requesting a non-existent CSV must exit 1 with a clear stderr message.""" + rc = main(["check", "/no/such/file.csv"]) + assert rc == 1 + err = capsys.readouterr().err + assert "error:" in err + + +def test_cli_multichar_delimiter_exits_1(capsys): + """A multi-character delimiter is not a valid CSV separator; must exit 1.""" + rc = main(["check", DEMO, "--delimiter", "TAB"]) + assert rc == 1 + err = capsys.readouterr().err + assert "delimiter" in err.lower() + + +def test_cli_negative_k_exits_1(capsys): + """Negative k is semantically invalid; must exit 1 with a clear message.""" + rc = main(["check", DEMO, "-k", "-3"]) + assert rc == 1 + err = capsys.readouterr().err + assert "error:" in err + + +def test_cli_zero_l_exits_1(capsys): + """l=0 is semantically invalid; must exit 1 with a clear message.""" + rc = main(["check", DEMO, "--sensitive", "diagnosis", "-l", "0"]) + assert rc == 1 + err = capsys.readouterr().err + assert "error:" in err + + +def test_analyze_csv_multichar_delimiter_raises(): + """analyze_csv must raise ValueError for a multi-character delimiter.""" + with pytest.raises(ValueError, match="delimiter"): + analyze_csv(DEMO, delimiter="||") + + +def test_analyze_csv_non_utf8_raises(): + """analyze_csv must propagate UnicodeDecodeError for non-UTF-8 files.""" + with tempfile.NamedTemporaryFile(mode="wb", suffix=".csv", delete=False) as fh: + fh.write(b"name,age\nJos\xe9,30\n") # Latin-1 byte in UTF-8 context + fpath = fh.name + try: + with pytest.raises(UnicodeDecodeError): + analyze_csv(fpath) + finally: + os.unlink(fpath) + + +def test_mcp_server_importable(): + """mcp_server must import without error (broken imports must not survive).""" + import importlib + mod = importlib.import_module("deidproof.mcp_server") + assert callable(mod.serve)