diff --git a/src/lib/privacy-innovations.ts b/src/lib/privacy-innovations.ts index 091b8d5..10f0a6f 100644 --- a/src/lib/privacy-innovations.ts +++ b/src/lib/privacy-innovations.ts @@ -1,829 +1 @@ -import type { PrivacyLevel } from "./privacy-layer"; - -// ============================================================ -// 1. PRIVACY PERSONAS -// ============================================================ - -export type PersonaId = "auto" | "doctor" | "developer" | "journalist" | "casual" | "enterprise" | "researcher"; - -export interface PrivacyPersona { - id: PersonaId; - label: string; - description: string; - icon: string; - defaultLevel: PrivacyLevel; - hardRules: HardRule[]; - autoDetectKeywords: string[]; - auditEnabled: boolean; -} - -export interface HardRule { - type: "strip-phi" | "mask-credentials" | "remove-watermarks" | "enforce-minimum" | "audit-all" | "synthetic-substitute"; - description: string; -} - -export interface PersonaDetectionResult { - detectedPersona: PersonaId; - confidence: number; - triggeredKeywords: string[]; -} - -export const PERSONAS: Record = { - auto: { - id: "auto", - label: "Auto-Detect", - description: "AI automatically detects your role from content", - icon: "brain", - defaultLevel: "STANDARD", - hardRules: [], - autoDetectKeywords: [], - auditEnabled: false, - }, - doctor: { - id: "doctor", - label: "Healthcare Professional", - description: "HIPAA-aligned: MAXIMUM default, strip all PHI, audit trail", - icon: "stethoscope", - defaultLevel: "MAXIMUM", - hardRules: [ - { type: "strip-phi", description: "Automatically strip patient names, MRN, DOB, SSN from all queries" }, - { type: "enforce-minimum", description: "Never use privacy level below HIGH for any medical content" }, - { type: "audit-all", description: "Log all queries for compliance auditing" }, - { type: "synthetic-substitute", description: "Use synthetic patient data when analyzing records" }, - ], - autoDetectKeywords: ["patient", "diagnosis", "prescription", "medical record", "hipaa", "emr", "chart", "clinical", "treatment", "pathology", "radiology", "doctor", "physician", "nurse", "hospital", "pharmacy", "medication", "symptom", "prognosis"], - auditEnabled: true, - }, - developer: { - id: "developer", - label: "Developer", - description: "Code-focused: HIGH default, mask all credentials and secrets", - icon: "code", - defaultLevel: "HIGH", - hardRules: [ - { type: "mask-credentials", description: "Auto-redact API keys, tokens, passwords, secrets before sending" }, - { type: "enforce-minimum", description: "Never use level below STANDARD when code contains credentials" }, - { type: "audit-all", description: "Log all credential-containing queries" }, - ], - autoDetectKeywords: ["api key", "token", "secret", "password", "credential", "env", "config", "ssh", "private key", "github", "aws", "azure", "gcp", "database", "connection string", "oauth", "jwt"], - auditEnabled: true, - }, - journalist: { - id: "journalist", - label: "Journalist", - description: "Source protection: MAXIMUM for source names, metadata stripping", - icon: "newspaper", - defaultLevel: "HIGH", - hardRules: [ - { type: "strip-phi", description: "Remove source names, contact info, location data" }, - { type: "remove-watermarks", description: "Strip EXIF/metadata from uploaded images" }, - { type: "enforce-minimum", description: "Source-related content always uses MAXIMUM or HIGH" }, - ], - autoDetectKeywords: ["source", "anonymous", "whistleblower", "leak", "informant", "off the record", "not for attribution", "deep background", "investigation", "exposé", "undercover"], - auditEnabled: false, - }, - casual: { - id: "casual", - label: "Casual User", - description: "Everyday use: LOW default for speed, escalates for detected sensitivity", - icon: "coffee", - defaultLevel: "LOW", - hardRules: [ - { type: "enforce-minimum", description: "Auto-escalate to STANDARD if PII detected" }, - ], - autoDetectKeywords: ["weather", "recipe", "movie", "sports", "game", "music", "travel", "restaurant", "shopping", "hobby", "fun", "joke", "trivia"], - auditEnabled: false, - }, - enterprise: { - id: "enterprise", - label: "Enterprise", - description: "Business: MEDIUM+ default, DLP scanning, compliance logging", - icon: "building", - defaultLevel: "MEDIUM", - hardRules: [ - { type: "strip-phi", description: "Remove employee names, internal IDs, salary data" }, - { type: "mask-credentials", description: "Redact corporate secrets, financial figures" }, - { type: "enforce-minimum", description: "Never go below STANDARD for internal documents" }, - { type: "audit-all", description: "Full audit trail for compliance (GDPR, SOX, etc.)" }, - { type: "synthetic-substitute", description: "Use anonymized employee data for analytics" }, - ], - autoDetectKeywords: ["internal", "confidential", "proprietary", "board", "executive", "revenue", "q1", "q2", "quarterly", "forecast", "roadmap", "employee", "hr", "payroll", "merger", "acquisition", "ipo"], - auditEnabled: true, - }, - researcher: { - id: "researcher", - label: "Researcher", - description: "Academic: STANDARD+ default, synthetic data for datasets", - icon: "flask", - defaultLevel: "STANDARD", - hardRules: [ - { type: "synthetic-substitute", description: "Generate synthetic datasets from real data for cloud analysis" }, - { type: "enforce-minimum", description: "Subject data always uses differential privacy or higher" }, - ], - autoDetectKeywords: ["dataset", "survey", "participant", "subject", "irb", "consent", "study", "experiment", "statistical", "correlation", "regression", "cohort", "control group", "p-value"], - auditEnabled: true, - }, -}; - -/** - * Detects the most appropriate privacy persona based on content keywords. - * - * Analyzes text for persona-specific keywords and returns the best match - * with confidence score. Used for automatic persona selection. - * - * @param request - The user's request text - * @param data - Optional additional data to analyze - * @returns Detected persona ID, confidence (0-100), and triggered keywords - * - * @example - * ```typescript - * const detection = detectPersona( - * "Review patient diagnosis", - * "Medical record data" - * ); - * console.log(detection.detectedPersona); // "doctor" - * console.log(detection.confidence); // 75 - * console.log(detection.triggeredKeywords); // ["patient", "diagnosis", "medical"] - * ``` - */ -export function detectPersona(request: string, data?: string): PersonaDetectionResult { - const text = `${request} ${data || ""}`.toLowerCase(); - - let bestPersona: PersonaId = "auto"; - let bestScore = 0; - const triggeredKeywords: string[] = []; - - for (const [id, persona] of Object.entries(PERSONAS)) { - if (id === "auto") continue; - let score = 0; - const hits: string[] = []; - - for (const keyword of persona.autoDetectKeywords) { - if (text.includes(keyword.toLowerCase())) { - score += 1; - hits.push(keyword); - } - } - - if (score > bestScore) { - bestScore = score; - bestPersona = id as PersonaId; - triggeredKeywords.length = 0; - triggeredKeywords.push(...hits); - } - } - - // Confidence calculation - const confidence = Math.min(100, bestScore * 15); - - return { - detectedPersona: bestPersona, - confidence, - triggeredKeywords: Array.from(new Set(triggeredKeywords)), - }; -} - -/** - * Applies privacy persona hard rules to determine final privacy level. - * - * Enforces minimum privacy levels and applies persona-specific rules - * (e.g., doctor persona never goes below HIGH for medical content). - * - * @param personaId - The persona to apply rules from - * @param baseLevel - The base privacy level from sensitivity analysis - * @param sensitivityScore - The sensitivity score (0-100) - * @returns Final privacy level, applied rules, and explanation - * - * @example - * ```typescript - * const result = applyPersonaRules("doctor", "MEDIUM", 65); - * console.log(result.finalLevel); // "HIGH" (enforced minimum) - * console.log(result.appliedRules); // [{ type: "enforce-minimum", ... }] - * ``` - */ -export function applyPersonaRules( - personaId: PersonaId, - baseLevel: PrivacyLevel, - sensitivityScore: number -): { finalLevel: PrivacyLevel; appliedRules: HardRule[]; explanation: string } { - const persona = PERSONAS[personaId]; - const appliedRules: HardRule[] = []; - let explanation = `Base level (${baseLevel}) selected by sensitivity analysis.`; - - // Apply hard rules in order, collecting all rules as we go - for (const rule of persona.hardRules) { - appliedRules.push(rule); - - if (rule.type === "enforce-minimum") { - const minLevels: Record = { - auto: "LOW", - doctor: "HIGH", - developer: "STANDARD", - journalist: "HIGH", - casual: "LOW", - enterprise: "STANDARD", - researcher: "STANDARD", - }; - const minLevel = minLevels[personaId]; - const levelOrder: PrivacyLevel[] = ["LOW", "STANDARD", "MEDIUM", "HIGH", "MAXIMUM"]; - const baseIdx = levelOrder.indexOf(baseLevel); - const minIdx = levelOrder.indexOf(minLevel); - - if (baseIdx < minIdx) { - explanation += ` Persona "${persona.label}" enforced minimum level ${minLevel}.`; - // Collect any remaining rules before returning - for (const remaining of persona.hardRules) { - if (!appliedRules.includes(remaining)) { - appliedRules.push(remaining); - } - } - // Also consider persona default if score is high enough - const personaDefaultIdx = levelOrder.indexOf(persona.defaultLevel); - if (personaDefaultIdx > minIdx && sensitivityScore > 50) { - explanation += ` Persona default (${persona.defaultLevel}) applied due to high sensitivity score.`; - return { finalLevel: persona.defaultLevel, appliedRules, explanation }; - } - return { finalLevel: minLevel, appliedRules, explanation }; - } - } - } - - // If persona default is higher than base level and sensitivity is meaningful, bump up - if (personaId !== "auto" && persona.defaultLevel !== "LOW") { - const levelOrder: PrivacyLevel[] = ["LOW", "STANDARD", "MEDIUM", "HIGH", "MAXIMUM"]; - const baseIdx = levelOrder.indexOf(baseLevel); - const personaIdx = levelOrder.indexOf(persona.defaultLevel); - - if (personaIdx > baseIdx && sensitivityScore > 30) { - explanation += ` Persona "${persona.label}" default (${persona.defaultLevel}) applied due to detected role.`; - return { finalLevel: persona.defaultLevel, appliedRules, explanation }; - } - } - - return { finalLevel: baseLevel, appliedRules, explanation }; -} - -// ============================================================ -// 2. SYNTHETIC SUBSTITUTION -// ============================================================ - -export interface EntityMapping { - original: string; - synthetic: string; - type: "name" | "ssn" | "email" | "phone" | "address" | "id" | "credential" | "date" | "amount" | "organization" | "location"; -} - -export interface SyntheticResult { - transformedText: string; - mappings: EntityMapping[]; - entityCount: number; - statsPreserved: string[]; -} - -const SYNTHETIC_NAMES = [ - "Alex Rivera", "Jordan Chen", "Morgan Park", "Casey Williams", "Taylor Brooks", - "Riley Nguyen", "Quinn Patel", "Avery Kim", "Blake Torres", "Dakota Singh", - "Reese Okafor", "Skyler Yamamoto", "Hayden Müller", "Emerson Silva", "Finley Okonkwo", -]; - -const SYNTHETIC_ORGS = [ - "NexGen Systems", "Pinnacle Dynamics", "QuantumBridge Labs", "Solara Industries", - "Vertex Innovations", "Catalyst Group", "Aurora Technologies", "Horizon Partners", -]; - -// Reserved for future address/location substitution -const _SYNTHETIC_LOCATIONS = [ - "Maplewood District", "Riverside Heights", "Cedar Grove", "Willow Creek", - "Oakhaven", "Pinehurst", "Brookside", "Sunnyside Terrace", -]; - -function generateSyntheticId(type: "ssn" | "phone" | "email" | "mrn"): string { - switch (type) { - case "ssn": return `XXX-${String(Math.floor(Math.random() * 9000) + 1000)}-XXXX`; - case "phone": return `(555) ${String(Math.floor(Math.random() * 9000) + 1000)}-XXXX`; - case "email": return `user${Math.floor(Math.random() * 9999)}@example.com`; - case "mrn": return `MRN-${String(Math.floor(Math.random() * 900000) + 100000)}`; - } -} - -function extractNames(text: string): string[] { - const patterns = [ - /(?:patient|client|user|employee|customer|subject|source|mr\.?|mrs\.?|ms\.?|dr\.?)\s+([A-Z][a-z]+\s+[A-Z][a-z]+)/gi, - /(?:between|by|from|to)\s+([A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*)/g, - /(?:name[d]?[:\s]+)([A-Z][a-z]+\s+[A-Z][a-z]+)/gi, - ]; - const names: string[] = []; - for (const pattern of patterns) { - let match; - while ((match = pattern.exec(text)) !== null) { - const name = match[1]?.trim(); - if (name && name.length > 2 && !name.match(/^(the|and|for|with|from|between|Internal|Confidential)/i)) { - names.push(name); - } - } - } - return Array.from(new Set(names)); -} - -function extractEntities(text: string): EntityMapping[] { - const mappings: EntityMapping[] = []; - let nameIdx = 0; - let orgIdx = 0; - - // Names - const names = extractNames(text); - for (const name of names) { - if (nameIdx < SYNTHETIC_NAMES.length) { - mappings.push({ original: name, synthetic: SYNTHETIC_NAMES[nameIdx++], type: "name" }); - } - } - - // SSN patterns — only match formatted SSNs (NNN-NN-NNNN) to avoid false positives on zip codes, IDs, etc. - const ssnPattern = /\b\d{3}-\d{2}-\d{4}\b/g; - let ssnMatch; - while ((ssnMatch = ssnPattern.exec(text)) !== null) { - mappings.push({ original: ssnMatch[0], synthetic: generateSyntheticId("ssn"), type: "ssn" }); - } - - // Email patterns - const emailPattern = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g; - let emailMatch; - while ((emailMatch = emailPattern.exec(text)) !== null) { - mappings.push({ original: emailMatch[0], synthetic: generateSyntheticId("email"), type: "email" }); - } - - // Phone patterns - const phonePattern = /\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g; - let phoneMatch; - while ((phoneMatch = phonePattern.exec(text)) !== null) { - mappings.push({ original: phoneMatch[0], synthetic: generateSyntheticId("phone"), type: "phone" }); - } - - // Organizations (capitalized multi-word phrases not caught as names) - const orgPattern = /(?:Corp|Inc|LLC|Ltd|Company|Ventures|Partners|Group|Systems|Labs|Industries|Technologies)/gi; - let orgMatch; - while ((orgMatch = orgPattern.exec(text)) !== null) { - const start = Math.max(0, orgMatch.index - 30); - const context = text.slice(start, orgMatch.index + orgMatch[0].length); - const orgName = context.match(/([A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*)\s*(?:Corp|Inc|LLC|Ltd|Company|Ventures|Partners|Group|Systems|Labs|Industries|Technologies)/i)?.[0]; - if (orgName && !mappings.some(m => m.original === orgName)) { - mappings.push({ original: orgName, synthetic: SYNTHETIC_ORGS[orgIdx++ % SYNTHETIC_ORGS.length], type: "organization" }); - } - } - - // Dollar amounts (preserve range but mask exact) - const amountPattern = /\$[\d,]+(?:\.\d{2})?\s*(?:M|K|Billion|Million)?/gi; - let amountMatch; - while ((amountMatch = amountPattern.exec(text)) !== null) { - const val = parseFloat(amountMatch[0].replace(/[$,MKB]/gi, "")); - let synthetic: string; - if (amountMatch[0].toLowerCase().includes("m")) synthetic = "$XX.XM"; - else if (amountMatch[0].toLowerCase().includes("k")) synthetic = "$XX.XK"; - else if (val > 1000000) synthetic = "$XX.XM"; - else if (val > 1000) synthetic = "$X,XXX"; - else synthetic = "$XXX.XX"; - mappings.push({ original: amountMatch[0], synthetic, type: "amount" }); - } - - // Patient/record IDs - const idPattern = /(?:patient\s*id|mrn|record\s*#|id[:\s]+)\s*[:#]?\s*(\d+)/gi; - let idMatch; - while ((idMatch = idPattern.exec(text)) !== null) { - mappings.push({ original: idMatch[0], synthetic: `ID: ${generateSyntheticId("mrn")}`, type: "id" }); - } - - // API keys / credentials - const credPattern = /(?:AKIA[0-9A-Z]{16}|ghp_[a-zA-Z0-9]{36}|sk-[a-zA-Z0-9]{32,}|api[_-]?key[:\s]+[a-zA-Z0-9_-]{8,})/gi; - let credMatch; - while ((credMatch = credPattern.exec(text)) !== null) { - mappings.push({ original: credMatch[0], synthetic: "[REDACTED-CREDENTIAL]", type: "credential" }); - } - - return mappings; -} - -/** - * Applies synthetic substitution to replace PII with fake entities. - * - * Extracts and replaces sensitive entities while preserving: - * - Entity relationships - * - Value ranges (e.g., $12.4M → $XX.XM) - * - Record structure - * - Semantic patterns - * - * **Supported Entity Types:** - * - Names (→ synthetic names from pool) - * - SSN (→ XXX-XXXX-XXXX format) - * - Emails (→ user{N}@example.com) - * - Phone numbers (→ (555) XXXX-XXXX) - * - Dollar amounts (→ $XX.XM, preserves magnitude) - * - Patient/Record IDs (→ MRN-XXXXXX) - * - API credentials (→ [REDACTED-CREDENTIAL]) - * - * **Note:** This is demo-quality. For production, integrate Microsoft Presidio. - * - * @param text - Input text containing potential PII - * @returns Object with transformed text, entity mappings, and stats - * - * @example - * ```typescript - * const result = applySyntheticSubstitution( - * "Patient John Smith, SSN 123-45-6789, owes $12,450" - * ); - * console.log(result.transformedText); - * // "Patient Alex Rivera, SSN XXX-XXXX-XXXX, owes $XX,XXX" - * console.log(result.entityCount); // 3 - * console.log(result.mappings); - * // [ - * // { original: "John Smith", synthetic: "Alex Rivera", type: "name" }, - * // { original: "123-45-6789", synthetic: "XXX-XXXX-XXXX", type: "ssn" }, - * // { original: "$12,450", synthetic: "$XX,XXX", type: "amount" } - * // ] - * ``` - */ -export function applySyntheticSubstitution(text: string): SyntheticResult { - const mappings = extractEntities(text); - - // Sort by length (descending) to avoid partial replacements - const sortedMappings = [...mappings].sort((a, b) => b.original.length - a.original.length); - - let transformedText = text; - for (const mapping of sortedMappings) { - // Use a global replace with escape for regex special chars - const escaped = mapping.original.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); - transformedText = transformedText.replace(new RegExp(escaped, "g"), mapping.synthetic); - } - - // Collect stats preserved - const statsPreserved: string[] = []; - if (mappings.some(m => m.type === "name")) statsPreserved.push("Entity relationships preserved"); - if (mappings.some(m => m.type === "amount")) statsPreserved.push("Value ranges preserved"); - if (mappings.some(m => m.type === "ssn" || m.type === "id")) statsPreserved.push("Record structure preserved"); - if (mappings.length > 0) statsPreserved.push("Semantic patterns maintained"); - - return { - transformedText, - mappings, - entityCount: mappings.length, - statsPreserved: statsPreserved.length > 0 ? statsPreserved : ["No sensitive entities detected"], - }; -} - -// ============================================================ -// 3. WATERMARKED OUTPUTS -// ============================================================ - -// Zero-width unicode characters for steganographic watermarking -const WATERMARK_CHARS = { - "0": "\u200B", // ZERO WIDTH SPACE - "1": "\u200C", // ZERO WIDTH NON-JOINER -}; - -const WATERMARK_REVERSE: Record = { - "\u200B": "0", - "\u200C": "1", -}; - -export interface WatermarkMetadata { - timestamp: number; - privacyLevel: PrivacyLevel; - queryHash: string; - userFingerprint: string; -} - -export interface WatermarkResult { - watermarkedText: string; - metadata: WatermarkMetadata; - encodedLength: number; - humanReadable: string; -} - -export interface WatermarkDetection { - detected: boolean; - metadata?: WatermarkMetadata; - confidence: number; -} - -function hashString(str: string): string { - // Use a better mixing function to reduce collisions on short similar strings - let h1 = 0xdeadbeef; - let h2 = 0x41c6ce57; - for (let i = 0; i < str.length; i++) { - const ch = str.charCodeAt(i); - h1 = Math.imul(h1 ^ ch, 2654435761); - h2 = Math.imul(h2 ^ ch, 1597334677); - } - h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909); - h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909); - const combined = (4294967296 * (2097151 & h2) + (h1 >>> 0)); - return (combined >>> 0).toString(2).padStart(32, "0"); -} - -function encodeBitsToWatermark(bits: string): string { - return bits.split("").map(bit => WATERMARK_CHARS[bit as "0" | "1"]).join(""); -} - -function decodeWatermarkFromText(text: string): string { - let bits = ""; - for (const char of text) { - if (WATERMARK_REVERSE[char]) { - bits += WATERMARK_REVERSE[char]; - } - } - return bits; -} - -/** - * Embeds an invisible watermark in text using zero-width Unicode characters. - * - * Uses steganographic encoding to embed metadata: - * - Timestamp (48 bits) - * - Privacy level (3 bits) - * - Query hash (16 bits) - * - User fingerprint (8 bits) - * - * **Encoding:** Uses U+200B (zero-width space) for "0" and U+200C (zero-width non-joiner) for "1" - * - * **⚠️ Privacy Warning:** Watermarked outputs contain tracking metadata. - * Do NOT share externally. For internal tracking only. - * - * @param text - The text to watermark - * @param privacyLevel - The privacy level used for processing - * @param query - The original query (hashed for watermark) - * @returns Watermarked text, metadata, and human-readable summary - * - * @example - * ```typescript - * const result = embedWatermark( - * "This is the AI response", - * "MAXIMUM", - * "Analyze patient record" - * ); - * console.log(result.watermarkedText); // Contains invisible chars - * console.log(result.humanReadable); - * // "Privacy: MAXIMUM | Query: a3f2... | Time: 2026-05-11T... | User: 4a7b..." - * ``` - */ -export function embedWatermark( - text: string, - privacyLevel: PrivacyLevel, - query: string -): WatermarkResult { - const timestamp = Date.now(); - const queryHash = hashString(query).slice(0, 16); - const userFingerprint = hashString(navigator?.userAgent || "unknown").slice(0, 8); - - const metadata: WatermarkMetadata = { - timestamp, - privacyLevel, - queryHash, - userFingerprint, - }; - - // Encode metadata as binary string - // Format: [timestamp(48 bits)][level(3 bits)][queryHash(16 bits)][fingerprint(8 bits)] - const tsBits = timestamp.toString(2).padStart(48, "0"); - const levelBits = ["LOW", "STANDARD", "MEDIUM", "HIGH", "MAXIMUM"].indexOf(privacyLevel).toString(2).padStart(3, "0"); - const hashBits = queryHash; - const fpBits = userFingerprint; - - const fullBits = tsBits + levelBits + hashBits + fpBits; - const watermark = encodeBitsToWatermark(fullBits); - - // Insert watermark after first sentence (or at end if no sentence break) - const sentenceEnd = text.search(/[.!?]\s/); - const insertPos = sentenceEnd > 0 ? sentenceEnd + 1 : Math.floor(text.length / 2); - - const watermarkedText = text.slice(0, insertPos) + watermark + text.slice(insertPos); - - const humanReadable = `Privacy: ${privacyLevel} | Query: ${queryHash.slice(0, 6)}... | Time: ${new Date(timestamp).toISOString()} | User: ${userFingerprint.slice(0, 4)}...`; - - return { - watermarkedText, - metadata, - encodedLength: watermark.length, - humanReadable, - }; -} - -/** - * Detects and extracts watermark metadata from text. - * - * Scans text for zero-width Unicode characters and decodes embedded metadata. - * Returns detection status, metadata, and confidence score. - * - * @param text - Text potentially containing a watermark - * @returns Detection result with metadata and confidence - * - * @example - * ```typescript - * const detection = detectWatermark(watermarkedText); - * if (detection.detected) { - * console.log(detection.metadata.privacyLevel); // "MAXIMUM" - * console.log(detection.metadata.timestamp); // 1715443200000 - * console.log(detection.confidence); // 100 - * } - * ``` - */ -export function detectWatermark(text: string): WatermarkDetection { - const bits = decodeWatermarkFromText(text); - - if (bits.length < 48) { - return { detected: false, confidence: 0 }; - } - - // Check if the first 48 bits decode to a reasonable timestamp - const tsBits = bits.slice(0, 48); - const timestamp = parseInt(tsBits, 2); - const now = Date.now(); - const isValidTimestamp = timestamp > 1700000000000 && timestamp <= now + 86400000; - - if (!isValidTimestamp) { - return { detected: false, confidence: 0 }; - } - - const levelIdx = parseInt(bits.slice(48, 51), 2); - const allLevels: PrivacyLevel[] = ["LOW", "STANDARD", "MEDIUM", "HIGH", "MAXIMUM"]; - const level: PrivacyLevel = allLevels[levelIdx] ?? "LOW"; - const queryHash = bits.slice(51, 67); - const userFingerprint = bits.slice(67, 75); - - return { - detected: true, - metadata: { - timestamp, - privacyLevel: level, - queryHash, - userFingerprint, - }, - confidence: Math.min(100, (bits.length / 75) * 100), - }; -} - -// ============================================================ -// INTEGRATION HELPER -// ============================================================ - -export interface EnhancedProcessRequest { - request: string; - data?: string; - overrideLevel?: PrivacyLevel; - personaId?: PersonaId; - useSyntheticSubstitution?: boolean; - enableWatermark?: boolean; -} - -export interface EnhancedProcessResponse { - analysis: import("./privacy-layer").SensitivityAnalysis; - privacy: import("./privacy-layer").PrivacyResult; - simulatedResult: string; - persona?: { - id: PersonaId; - label: string; - detected: boolean; - confidence: number; - appliedRules: HardRule[]; - ruleExplanation: string; - }; - syntheticSubstitution?: SyntheticResult; - watermark?: WatermarkResult; -} - -/** - * Enhanced processing with personas, synthetic substitution, and watermarking. - * - * Extends `processWithPrivacy` with advanced privacy features: - * - Automatic persona detection - * - Hard rule enforcement - * - Synthetic PII substitution - * - Output watermarking - * - * **Input Validation:** - * - Request must be non-empty string (max 10,000 chars) - * - Data must be string (max 100,000 chars) if provided - * - * **Processing Pipeline:** - * 1. Validate inputs - * 2. Detect persona (if auto) - * 3. Apply synthetic substitution (if enabled) - * 4. Analyze sensitivity - * 5. Apply persona rules - * 6. Process with privacy - * 7. Embed watermark (if enabled) - * - * @param req - Enhanced processing request with persona and feature flags - * @returns Enhanced response with persona info, synthetic mappings, and watermark - * @throws Error if validation fails or processing errors occur - * - * @example - * ```typescript - * const result = await processEnhanced({ - * request: "Review this financial report", - * data: "Q1 2026 Revenue: $12.4M...", - * personaId: "enterprise", - * useSyntheticSubstitution: true, - * enableWatermark: true - * }); - * - * console.log(result.persona.appliedRules); // Hard rules enforced - * console.log(result.syntheticSubstitution?.entityCount); // 5 entities replaced - * console.log(result.watermark?.humanReadable); // Watermark metadata - * ``` - */ -export async function processEnhanced(req: EnhancedProcessRequest): Promise { - try { - // Input validation — check type first, then emptiness - if (req.request === null || req.request === undefined || typeof req.request !== "string") { - throw new Error("Request must be a non-empty string"); - } - - if (req.request.trim().length === 0) { - throw new Error("Request cannot be empty"); - } - - if (req.request.length > 10000) { - throw new Error("Request exceeds maximum length of 10,000 characters"); - } - - if (req.data && req.data.length > 100000) { - throw new Error("Data exceeds maximum length of 100,000 characters"); - } - - const { processWithPrivacy } = await import("./privacy-layer"); - - // 1. Detect persona if auto - const personaId = req.personaId || "auto"; - let detectedPersona: PersonaId = personaId; - let personaConfidence = 100; - - if (personaId === "auto") { - const detection = detectPersona(req.request, req.data); - detectedPersona = detection.detectedPersona; - personaConfidence = detection.confidence; - } - - // 2. Apply synthetic substitution if enabled - let syntheticResult: SyntheticResult | undefined; - let processedData = req.data; - - if (req.useSyntheticSubstitution && req.data) { - syntheticResult = applySyntheticSubstitution(req.data); - processedData = syntheticResult.transformedText; - } - - // 3. Get base privacy analysis - const baseResponse = await processWithPrivacy({ - request: req.request, - data: processedData || undefined, - overrideLevel: req.overrideLevel, - }); - - // 4. Apply persona rules - const persona = PERSONAS[detectedPersona]; - const personaRules = applyPersonaRules( - detectedPersona, - baseResponse.privacy.level, - baseResponse.analysis.sensitivityScore - ); - - // 5. If persona changed the level, re-process - let finalPrivacy = baseResponse.privacy; - let finalResult = baseResponse.simulatedResult; - - if (personaRules.finalLevel !== baseResponse.privacy.level) { - const reprocess = await processWithPrivacy({ - request: req.request, - data: processedData || undefined, - overrideLevel: personaRules.finalLevel, - }); - finalPrivacy = reprocess.privacy; - finalResult = reprocess.simulatedResult; - } - - // 6. Apply watermark if enabled - let watermarkResult: WatermarkResult | undefined; - if (req.enableWatermark !== false) { - watermarkResult = embedWatermark(finalResult, finalPrivacy.level, req.request); - finalResult = watermarkResult.watermarkedText; - } - - return { - analysis: baseResponse.analysis, - privacy: finalPrivacy, - simulatedResult: finalResult, - persona: { - id: detectedPersona, - label: persona.label, - detected: personaId === "auto" && detectedPersona !== "auto", - confidence: personaConfidence, - appliedRules: personaRules.appliedRules, - ruleExplanation: personaRules.explanation, - }, - syntheticSubstitution: syntheticResult, - watermark: watermarkResult, - }; - } catch (error) { - console.error("processEnhanced error:", error); - throw new Error(`Privacy processing failed: ${error instanceof Error ? error.message : "Unknown error"}`); - } -} +[Error: GROQ_API_KEY not found in environment. Please set it to use the Freebuff API.] \ No newline at end of file