diff --git a/src/core/prompt-defense.ts b/src/core/prompt-defense.ts index 5d7a41e..2024ab4 100644 --- a/src/core/prompt-defense.ts +++ b/src/core/prompt-defense.ts @@ -127,7 +127,7 @@ export interface PromptDefenseOptions { * ```typescript * import { createPromptDefense } from '@stackone/defender'; * - * const defense = createPromptDefense({ enableTier2: true }); + * const defense = createPromptDefense(); * await defense.warmupTier2(); * * const result = await defense.defendToolResult(toolOutput, 'gmail_get_message'); @@ -226,7 +226,11 @@ export class PromptDefense { let tier2Risk: RiskLevel = "low"; if (this.tier2Classifier) { - const strings = extractStrings(value, this.tier2Fields); + // Use explicit tier2Fields override, or fall back to the risky field names + // identified by Tier 1. If neither is available, scan all strings. + const { riskyFieldNames } = sanitized.metadata; + const fieldsForTier2 = this.tier2Fields ?? (riskyFieldNames.length > 0 ? riskyFieldNames : undefined); + const strings = extractStrings(value, fieldsForTier2); const combinedText = strings.join("\n\n"); if (combinedText.length > 0) { @@ -241,7 +245,9 @@ export class PromptDefense { } else { tier2SkipReason = this.tier2Fields?.length ? "No strings found in tier2Fields" - : "No strings extracted from tool result"; + : riskyFieldNames.length > 0 + ? "No strings found in Tier 1 risky fields" + : "No strings extracted from tool result"; } } diff --git a/src/core/tool-result-sanitizer.ts b/src/core/tool-result-sanitizer.ts index be1ddf4..22642bc 100644 --- a/src/core/tool-result-sanitizer.ts +++ b/src/core/tool-result-sanitizer.ts @@ -154,6 +154,7 @@ export class ToolResultSanitizer { cumulativeRiskEscalated: false, totalLatencyMs: 0, sizeMetrics, + riskyFieldNames: [], }; // Sanitize the value @@ -167,6 +168,7 @@ export class ToolResultSanitizer { metadata.totalLatencyMs = performance.now() - startTime; metadata.sizeMetrics = sizeMetrics; + metadata.riskyFieldNames = [...new Set(metadata.riskyFieldNames)]; return { sanitized: sanitized as T, @@ -304,6 +306,7 @@ export class ToolResultSanitizer { // Check if this is a risky field that needs sanitization if (this.isFieldRisky(key, context.toolName) && typeof val === "string") { + metadata.riskyFieldNames.push(key); result[key] = this.sanitizeStringField(val, fieldContext, toolRule, metadata); } else { // Recurse into non-risky fields diff --git a/src/types.ts b/src/types.ts index e5c9c17..1d14ae5 100644 --- a/src/types.ts +++ b/src/types.ts @@ -217,6 +217,8 @@ export interface SanitizationMetadata { totalLatencyMs: number; /** Size metrics */ sizeMetrics: SizeMetrics; + /** Unique field names (leaf keys) that Tier 1 identified as risky */ + riskyFieldNames: string[]; } /**