From 4b4ff7b590132bb98a097b79b2133d6cf0f4921e Mon Sep 17 00:00:00 2001 From: Hisku Date: Thu, 26 Mar 2026 10:12:51 +0000 Subject: [PATCH 1/2] docs: remove redundant enableTier2: true from example (default is true) Co-Authored-By: Claude Sonnet 4.6 --- src/core/prompt-defense.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/prompt-defense.ts b/src/core/prompt-defense.ts index 5d7a41e..98589a5 100644 --- a/src/core/prompt-defense.ts +++ b/src/core/prompt-defense.ts @@ -127,7 +127,7 @@ export interface PromptDefenseOptions { * ```typescript * import { createPromptDefense } from '@stackone/defender'; * - * const defense = createPromptDefense({ enableTier2: true }); + * const defense = createPromptDefense(); * await defense.warmupTier2(); * * const result = await defense.defendToolResult(toolOutput, 'gmail_get_message'); From 349ae6a1f32743412dd8700b0c143ecd22c49a8b Mon Sep 17 00:00:00 2001 From: Hisku Date: Thu, 26 Mar 2026 16:01:45 +0000 Subject: [PATCH 2/2] feat(ENG-12439): scope Tier 2 to Tier 1 risky fields by default When no explicit tier2Fields override is set, Tier 2 now only scans strings extracted from field names that Tier 1 identified as risky (via isRiskyField). Falls back to full scan for unknown tools. Co-Authored-By: Claude Sonnet 4.6 --- src/core/prompt-defense.ts | 10 ++++++++-- src/core/tool-result-sanitizer.ts | 3 +++ src/types.ts | 2 ++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/core/prompt-defense.ts b/src/core/prompt-defense.ts index 98589a5..2024ab4 100644 --- a/src/core/prompt-defense.ts +++ b/src/core/prompt-defense.ts @@ -226,7 +226,11 @@ export class PromptDefense { let tier2Risk: RiskLevel = "low"; if (this.tier2Classifier) { - const strings = extractStrings(value, this.tier2Fields); + // Use explicit tier2Fields override, or fall back to the risky field names + // identified by Tier 1. If neither is available, scan all strings. + const { riskyFieldNames } = sanitized.metadata; + const fieldsForTier2 = this.tier2Fields ?? (riskyFieldNames.length > 0 ? riskyFieldNames : undefined); + const strings = extractStrings(value, fieldsForTier2); const combinedText = strings.join("\n\n"); if (combinedText.length > 0) { @@ -241,7 +245,9 @@ export class PromptDefense { } else { tier2SkipReason = this.tier2Fields?.length ? "No strings found in tier2Fields" - : "No strings extracted from tool result"; + : riskyFieldNames.length > 0 + ? "No strings found in Tier 1 risky fields" + : "No strings extracted from tool result"; } } diff --git a/src/core/tool-result-sanitizer.ts b/src/core/tool-result-sanitizer.ts index be1ddf4..22642bc 100644 --- a/src/core/tool-result-sanitizer.ts +++ b/src/core/tool-result-sanitizer.ts @@ -154,6 +154,7 @@ export class ToolResultSanitizer { cumulativeRiskEscalated: false, totalLatencyMs: 0, sizeMetrics, + riskyFieldNames: [], }; // Sanitize the value @@ -167,6 +168,7 @@ export class ToolResultSanitizer { metadata.totalLatencyMs = performance.now() - startTime; metadata.sizeMetrics = sizeMetrics; + metadata.riskyFieldNames = [...new Set(metadata.riskyFieldNames)]; return { sanitized: sanitized as T, @@ -304,6 +306,7 @@ export class ToolResultSanitizer { // Check if this is a risky field that needs sanitization if (this.isFieldRisky(key, context.toolName) && typeof val === "string") { + metadata.riskyFieldNames.push(key); result[key] = this.sanitizeStringField(val, fieldContext, toolRule, metadata); } else { // Recurse into non-risky fields diff --git a/src/types.ts b/src/types.ts index e5c9c17..1d14ae5 100644 --- a/src/types.ts +++ b/src/types.ts @@ -217,6 +217,8 @@ export interface SanitizationMetadata { totalLatencyMs: number; /** Size metrics */ sizeMetrics: SizeMetrics; + /** Unique field names (leaf keys) that Tier 1 identified as risky */ + riskyFieldNames: string[]; } /**