From 0a2745138ef5b980327432148612659f80d87dcc Mon Sep 17 00:00:00 2001 From: Jorben Date: Wed, 4 Mar 2026 11:29:24 +0800 Subject: [PATCH 1/7] =?UTF-8?q?feat(action):=20=E2=9C=A8=20show=20confiden?= =?UTF-8?q?ce=20in=20inline=20comments?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/index.js | 16 +++++++++++++++- test/index.test.js | 3 +++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/index.js b/src/index.js index 759fdc5..e0496d0 100644 --- a/src/index.js +++ b/src/index.js @@ -30,6 +30,7 @@ function getTextBundle(language) { return { suggestionLabel: 'Suggestion', riskLabel: 'Risk', + confidenceLabel: 'Confidence', summaryTitle: 'AI Code Review Summary', preferredLanguage: 'Preferred language', overallAssessment: 'Overall Assessment', @@ -80,6 +81,7 @@ function getTextBundle(language) { return { suggestionLabel: '建议', riskLabel: '风险', + confidenceLabel: '置信度', summaryTitle: 'AI 代码审查汇总', preferredLanguage: '指定语言', overallAssessment: '总体评价', @@ -187,6 +189,16 @@ function summarizePlannerBatchesForLog(batches, maxEntries = 12) { }).join(' | '); } +function formatConfidenceValue(confidence) { + const value = Number.parseFloat(String(confidence)); + if (!Number.isFinite(value)) { + return '0.80'; + } + + const clamped = Math.min(1, Math.max(0, value)); + return clamped.toFixed(2); +} + function buildInlineBody(finding, text) { const lines = []; const subAgent = String(finding.sourceDimension || 'general').trim().toLowerCase() || 'general'; @@ -202,8 +214,9 @@ function buildInlineBody(finding, text) { lines.push(`${text.riskLabel}: ${finding.risk}`); } - lines.push(``); + lines.push(`${text.confidenceLabel}: ${formatConfidenceValue(finding.confidence)}`); lines.push(`
${text.fromSubAgentTag(subAgent)}
`); + lines.push(``); return lines.join('\n\n'); } @@ -1026,6 +1039,7 @@ module.exports = { shouldUseSummaryOnlyMode, sanitizePlannedBatches, summarizePlannerBatchesForLog, + formatConfidenceValue, buildInlineBody, summarizeSeverity, summarizeFileConclusions, diff --git a/test/index.test.js b/test/index.test.js index 0e265d3..8a6d372 100644 --- a/test/index.test.js +++ b/test/index.test.js @@ -60,6 +60,7 @@ test('buildInlineBody includes severity, labels, inline key marker, and sub-agen summary: 'The value can be null before property access.', suggestion: 'Add a null check before dereference.', risk: 'Can throw at runtime.', + confidence: 0.93, path: 'src/a.js', side: 'RIGHT', line: 10, @@ -69,8 +70,10 @@ test('buildInlineBody includes severity, labels, inline key marker, and sub-agen assert.match(body, /\*\*\[MEDIUM\] Missing null guard\*\*/); assert.match(body, /Suggestion: Add a null check before dereference\./); assert.match(body, /Risk: Can throw at runtime\./); + assert.match(body, /Confidence: 0.93/); assert.match(body, /ai-code-review-agent:inline-key/); assert.match(body, /\[From SubAgent: security\]/); + assert.ok(body.indexOf('Confidence: 0.93') < body.indexOf('[From SubAgent: security]')); }); test('formatSummaryMarkdown supports unknown severities and degraded reasons', () => { From 196b7ab3ba375d59b1f54245d11ddcf847b86f78 Mon Sep 17 00:00:00 2001 From: Jorben Date: Wed, 4 Mar 2026 12:09:26 +0800 Subject: [PATCH 2/7] =?UTF-8?q?test(action):=20=E2=9C=85=20add=20confidenc?= =?UTF-8?q?e=20edge=20and=20zh=20inline=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/index.test.js | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/test/index.test.js b/test/index.test.js index 8a6d372..4839443 100644 --- a/test/index.test.js +++ b/test/index.test.js @@ -8,6 +8,7 @@ const { chunk, sanitizePlannedBatches, shouldUseSummaryOnlyMode, + formatConfidenceValue, buildInlineBody, formatSummaryMarkdown } = __internal; @@ -76,6 +77,36 @@ test('buildInlineBody includes severity, labels, inline key marker, and sub-agen assert.ok(body.indexOf('Confidence: 0.93') < body.indexOf('[From SubAgent: security]')); }); +test('formatConfidenceValue handles invalid and boundary values predictably', () => { + assert.equal(formatConfidenceValue(undefined), '0.80'); + assert.equal(formatConfidenceValue(null), '0.80'); + assert.equal(formatConfidenceValue('abc'), '0.80'); + assert.equal(formatConfidenceValue(-0.1), '0.00'); + assert.equal(formatConfidenceValue(1.2), '1.00'); + assert.equal(formatConfidenceValue('0.345'), '0.34'); + assert.equal(formatConfidenceValue(0), '0.00'); + assert.equal(formatConfidenceValue(1), '1.00'); +}); + +test('buildInlineBody renders chinese confidence label before sub-agent tag', () => { + const text = getTextBundle('zh-CN'); + const body = buildInlineBody({ + severity: 'low', + title: '缺少日志上下文', + summary: '建议补充必要上下文便于排查。', + confidence: 0.88, + path: 'src/a.js', + side: 'RIGHT', + line: 6, + sourceDimension: 'testing' + }, text); + + assert.match(body, /\*\*\[LOW\] 缺少日志上下文\*\*/); + assert.match(body, /置信度: 0.88/); + assert.match(body, /\[来自 SubAgent:testing\]/); + assert.ok(body.indexOf('置信度: 0.88') < body.indexOf('[来自 SubAgent:testing]')); +}); + test('formatSummaryMarkdown supports unknown severities and degraded reasons', () => { const markdown = formatSummaryMarkdown({ pull: { number: 7, title: 'Add feature' }, From a8a0477810bc2cc8acfa389debbb9545b4de530c Mon Sep 17 00:00:00 2001 From: Jorben Date: Wed, 4 Mar 2026 12:14:54 +0800 Subject: [PATCH 3/7] =?UTF-8?q?feat(action):=20=E2=9C=A8=20support=20expli?= =?UTF-8?q?cit=20missing=20confidence=20policy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- action.yml | 8 ++++++ src/agents.js | 5 ++-- src/aggregate.js | 38 ++++++++++++++++++++++----- src/config.js | 11 ++++++++ src/index.js | 23 ++++++++++++---- test/aggregate.test.js | 59 +++++++++++++++++++++++++++++++++++++++--- test/config.test.js | 28 ++++++++++++++++++++ test/index.test.js | 24 ++++++++++++++--- 8 files changed, 176 insertions(+), 20 deletions(-) diff --git a/action.yml b/action.yml index 167cf39..4419d89 100644 --- a/action.yml +++ b/action.yml @@ -50,6 +50,14 @@ inputs: description: Minimum confidence (0-1) required for a finding to be kept. required: false default: "0.72" + missing_confidence_policy: + description: Policy when finding confidence is missing/invalid (drop|na|fallback). + required: false + default: "na" + fallback_confidence_value: + description: Fallback confidence (0-1) used only when missing_confidence_policy=fallback. + required: false + default: "0.5" coverage_first_round_primary_only: description: In round 1, run only primary dimension to maximize file coverage under budget. required: false diff --git a/src/agents.js b/src/agents.js index be436fd..a929948 100644 --- a/src/agents.js +++ b/src/agents.js @@ -25,7 +25,7 @@ const findingSchema = z.object({ path: z.string().min(1), side: z.enum(['LEFT', 'RIGHT', 'FILE']).default('RIGHT'), line: z.number().int().positive().nullable().default(null), - confidence: z.number().min(0).max(1).default(0.8), + confidence: z.number().min(0).max(1).nullable().optional().default(null), evidence: z.array(z.string().min(1)).default([]), fingerprint: z.string().max(120).default(''), summary: z.string().min(1), @@ -126,7 +126,8 @@ Rules: - Never emit line numbers that do not appear in the provided anchors. - Do not invent files or line numbers. - Severity must be one of critical/high/medium/low. -- Set confidence in [0,1]. Include at least one concrete evidence item tied to provided diff context. +- Set confidence in [0,1] when you can estimate it; otherwise use null. +- Include at least one concrete evidence item tied to provided diff context. - If confidence is below 0.70, do not emit it as a finding; put it in file-level notes instead. - Use fingerprint as stable short key for same issue across dimensions (e.g. unsafe_openai_base_url, planner_done_ignored). - Keep findings concrete, actionable, and concise. diff --git a/src/aggregate.js b/src/aggregate.js index 03c03b9..fc431e6 100644 --- a/src/aggregate.js +++ b/src/aggregate.js @@ -70,6 +70,10 @@ function jaccardSimilarity(a, b) { return union.size === 0 ? 0 : intersection / union.size; } +function confidenceRank(value) { + return Number.isFinite(value) ? value : -1; +} + function isSemanticallySameIssue(a, b) { if ( a.fingerprint && @@ -102,11 +106,14 @@ function isSemanticallySameIssue(a, b) { } function mergeFinding(base, incoming) { - const preferIncoming = incoming.confidence > base.confidence; + const baseRank = confidenceRank(base.confidence); + const incomingRank = confidenceRank(incoming.confidence); + const preferIncoming = incomingRank > baseRank; const mergedEvidence = [...new Set([...(base.evidence || []), ...(incoming.evidence || [])])].slice(0, 3); const severity = SEVERITY_RANK[incoming.severity] > SEVERITY_RANK[base.severity] ? incoming.severity : base.severity; + const mergedConfidence = incomingRank >= baseRank ? incoming.confidence : base.confidence; return { ...base, ...(preferIncoming @@ -118,7 +125,7 @@ function mergeFinding(base, incoming) { } : {}), severity, - confidence: Math.max(base.confidence, incoming.confidence), + confidence: mergedConfidence, evidence: mergedEvidence, fingerprint: base.fingerprint || incoming.fingerprint, sourceDimension: preferIncoming @@ -130,6 +137,13 @@ function mergeFinding(base, incoming) { function normalizeFindings(findings, allowedPaths, options = {}) { const pathSet = new Set(allowedPaths); const minConfidence = Number.isFinite(options.minConfidence) ? options.minConfidence : 0; + const missingConfidencePolicy = ['drop', 'na', 'fallback'].includes(options.missingConfidencePolicy) + ? options.missingConfidencePolicy + : 'na'; + const fallbackConfidenceValueRaw = Number.parseFloat(String(options.fallbackConfidenceValue ?? '0.5')); + const fallbackConfidenceValue = Number.isFinite(fallbackConfidenceValueRaw) + ? clamp(fallbackConfidenceValueRaw, 0, 1) + : 0.5; const out = []; for (const finding of findings || []) { @@ -143,11 +157,23 @@ function normalizeFindings(findings, allowedPaths, options = {}) { const line = Number.isInteger(finding.line) && finding.line > 0 ? finding.line : null; const title = String(finding.title || '').trim(); const summary = String(finding.summary || '').trim(); - const confidenceRaw = Number.parseFloat(String(finding.confidence ?? '0.8')); - const confidence = Number.isFinite(confidenceRaw) ? clamp(confidenceRaw, 0, 1) : 0.8; + const confidenceRaw = Number.parseFloat(String(finding.confidence)); + let confidence = Number.isFinite(confidenceRaw) ? clamp(confidenceRaw, 0, 1) : null; const evidence = normalizeEvidence(finding.evidence); - if (!title || !summary || evidence.length === 0 || confidence < minConfidence) { + if (confidence === null) { + if (missingConfidencePolicy === 'drop') { + continue; + } + if (missingConfidencePolicy === 'fallback') { + confidence = fallbackConfidenceValue; + } + } + + if (!title || !summary || evidence.length === 0) { + continue; + } + if (Number.isFinite(confidence) && confidence < minConfidence) { continue; } @@ -216,7 +242,7 @@ function dedupeAndSortFindings(findings, maxFindings) { return pathDiff; } - const confidenceDiff = (b.confidence || 0) - (a.confidence || 0); + const confidenceDiff = confidenceRank(b.confidence) - confidenceRank(a.confidence); if (confidenceDiff !== 0) { return confidenceDiff; } diff --git a/src/config.js b/src/config.js index 3c33a94..e57d206 100644 --- a/src/config.js +++ b/src/config.js @@ -75,6 +75,15 @@ function parseFloatRangeInput(name, defaultValue, min, max) { return parsed; } +function parseEnumInput(name, defaultValue, allowedValues) { + const raw = core.getInput(name) || String(defaultValue); + const normalized = String(raw).trim().toLowerCase(); + if (!allowedValues.includes(normalized)) { + throw new Error(`Input ${name} must be one of [${allowedValues.join(', ')}], got: ${raw}`); + } + return normalized; +} + function uniqueLowercase(items) { const seen = new Set(); const out = []; @@ -123,6 +132,8 @@ function loadConfig() { reviewDimensions: normalizedDimensions, reviewLanguage, minFindingConfidence: parseFloatRangeInput('min_finding_confidence', 0.72, 0, 1), + missingConfidencePolicy: parseEnumInput('missing_confidence_policy', 'na', ['drop', 'na', 'fallback']), + fallbackConfidenceValue: parseFloatRangeInput('fallback_confidence_value', 0.5, 0, 1), coverageFirstRoundPrimaryOnly: parseBooleanInput('coverage_first_round_primary_only', true), autoMinimizeOutdatedComments: parseBooleanInput('auto_minimize_outdated_comments', true), maxRounds: parsePositiveIntInput('max_rounds', 8), diff --git a/src/index.js b/src/index.js index e0496d0..38ba3f6 100644 --- a/src/index.js +++ b/src/index.js @@ -31,6 +31,7 @@ function getTextBundle(language) { suggestionLabel: 'Suggestion', riskLabel: 'Risk', confidenceLabel: 'Confidence', + unknownConfidenceValue: 'N/A', summaryTitle: 'AI Code Review Summary', preferredLanguage: 'Preferred language', overallAssessment: 'Overall Assessment', @@ -41,6 +42,7 @@ function getTextBundle(language) { fileLevelCoverage: 'File-Level Coverage Notes', inlineDowngraded: 'Inline Downgraded Items (processed but not inline)', coverageStatus: 'Coverage Status', + unknownConfidenceFindings: 'Findings with unknown confidence (N/A)', uncoveredList: 'Uncovered list', noPatchCoveredList: 'No-patch covered list', runtimeBudget: 'Runtime/Budget', @@ -82,6 +84,7 @@ function getTextBundle(language) { suggestionLabel: '建议', riskLabel: '风险', confidenceLabel: '置信度', + unknownConfidenceValue: 'N/A', summaryTitle: 'AI 代码审查汇总', preferredLanguage: '指定语言', overallAssessment: '总体评价', @@ -92,6 +95,7 @@ function getTextBundle(language) { fileLevelCoverage: '文件级覆盖说明', inlineDowngraded: '无法 inline 的已处理项', coverageStatus: '覆盖状态', + unknownConfidenceFindings: '置信度未知(N/A)的问题数', uncoveredList: '未覆盖文件清单', noPatchCoveredList: '无 patch 文件覆盖清单', runtimeBudget: '轮次与预算', @@ -189,10 +193,10 @@ function summarizePlannerBatchesForLog(batches, maxEntries = 12) { }).join(' | '); } -function formatConfidenceValue(confidence) { +function formatConfidenceValue(confidence, unknownValue = 'N/A') { const value = Number.parseFloat(String(confidence)); if (!Number.isFinite(value)) { - return '0.80'; + return unknownValue; } const clamped = Math.min(1, Math.max(0, value)); @@ -214,7 +218,7 @@ function buildInlineBody(finding, text) { lines.push(`${text.riskLabel}: ${finding.risk}`); } - lines.push(`${text.confidenceLabel}: ${formatConfidenceValue(finding.confidence)}`); + lines.push(`${text.confidenceLabel}: ${formatConfidenceValue(finding.confidence, text.unknownConfidenceValue)}`); lines.push(`
${text.fromSubAgentTag(subAgent)}
`); lines.push(``); @@ -313,6 +317,9 @@ function formatSummaryMarkdown({ const degradedText = degradedSummaryOnly ? `${text.yes}\n\n${text.reasons}:\n${degradedReasons.map((x) => `- ${x}`).join('\n') || '- unknown'}` : text.no; + const unknownConfidenceFindings = Number.isFinite(coverage.unknownConfidenceFindings) + ? coverage.unknownConfidenceFindings + : 0; return [ `## ${text.summaryTitle}`, @@ -346,6 +353,7 @@ function formatSummaryMarkdown({ `- Covered files: ${coverage.covered}`, `- Uncovered files: ${coverage.uncovered}`, `- No-patch/binary covered as file-level: ${coverage.noPatch}`, + `- ${text.unknownConfidenceFindings}: ${unknownConfidenceFindings}`, '', `${text.uncoveredList}:`, uncoveredLines, @@ -787,10 +795,13 @@ async function runAction() { const normalizedFindings = dedupeAndSortFindings( normalizeFindings(rawFindings, targetPaths, { - minConfidence: config.minFindingConfidence + minConfidence: config.minFindingConfidence, + missingConfidencePolicy: config.missingConfidencePolicy, + fallbackConfidenceValue: config.fallbackConfidenceValue }), config.maxFindings ); + const unknownConfidenceFindings = normalizedFindings.filter((finding) => !Number.isFinite(finding.confidence)).length; const diffLineMap = buildDiffLineMaps(patchFiles); const inlineComments = []; @@ -908,7 +919,8 @@ async function runAction() { target: filteredFiles.length, covered: filteredFiles.length - uncovered.length, uncovered: uncovered.length, - noPatch: noPatchCovered.length + noPatch: noPatchCovered.length, + unknownConfidenceFindings }; if (filteredFiles.length === 0) { @@ -960,6 +972,7 @@ async function runAction() { const reviewBody = [ text.reviewCompleted, `- Findings kept: ${normalizedFindings.length}`, + `- Findings with unknown confidence: ${unknownConfidenceFindings}`, `- Inline comments attempted: ${inlineComments.length}`, `- Target files: ${coverage.target}`, `- Covered files: ${coverage.covered}`, diff --git a/test/aggregate.test.js b/test/aggregate.test.js index 23e7e3d..4e3c325 100644 --- a/test/aggregate.test.js +++ b/test/aggregate.test.js @@ -178,8 +178,8 @@ test('normalizeFindings handles confidence/evidence type anomalies predictably', const findings = [ { path: 'src/a.js', - title: 'NaN confidence defaults', - summary: 'NaN confidence should fall back to default 0.8', + title: 'NaN confidence is unknown', + summary: 'NaN confidence should be kept as unknown by default policy', severity: 'LOW', side: 'LEFT', line: -2, @@ -200,12 +200,63 @@ test('normalizeFindings handles confidence/evidence type anomalies predictably', const normalized = normalizeFindings(findings, allowed, { minConfidence: 0.72 }); assert.equal(normalized.length, 1); - assert.equal(normalized[0].title, 'NaN confidence defaults'); - assert.equal(normalized[0].confidence, 0.8); + assert.equal(normalized[0].title, 'NaN confidence is unknown'); + assert.equal(normalized[0].confidence, null); assert.equal(normalized[0].side, 'LEFT'); assert.equal(normalized[0].line, null); }); +test('normalizeFindings drops findings with missing confidence when policy is drop', () => { + const allowed = ['src/a.js']; + const findings = [ + { + path: 'src/a.js', + title: 'Unknown confidence', + summary: 'confidence missing', + severity: 'LOW', + side: 'RIGHT', + line: 2, + evidence: ['e1'] + } + ]; + + const normalized = normalizeFindings(findings, allowed, { + minConfidence: 0.72, + missingConfidencePolicy: 'drop' + }); + assert.equal(normalized.length, 0); +}); + +test('normalizeFindings applies fallback confidence and min threshold when policy is fallback', () => { + const allowed = ['src/a.js']; + const findings = [ + { + path: 'src/a.js', + title: 'Fallback confidence', + summary: 'confidence missing', + severity: 'LOW', + side: 'RIGHT', + line: 2, + evidence: ['e1'] + } + ]; + + const kept = normalizeFindings(findings, allowed, { + minConfidence: 0.72, + missingConfidencePolicy: 'fallback', + fallbackConfidenceValue: 0.85 + }); + assert.equal(kept.length, 1); + assert.equal(kept[0].confidence, 0.85); + + const dropped = normalizeFindings(findings, allowed, { + minConfidence: 0.72, + missingConfidencePolicy: 'fallback', + fallbackConfidenceValue: 0.5 + }); + assert.equal(dropped.length, 0); +}); + test('groupFindingsBySeverity falls back unknown severities to medium', () => { const unknownSeverityFinding = { path: 'src/a.js', diff --git a/test/config.test.js b/test/config.test.js index a1d53cd..4b103b0 100644 --- a/test/config.test.js +++ b/test/config.test.js @@ -44,6 +44,8 @@ test('loadConfig applies defaults for confidence and coverage-first mode', () => }); assert.equal(config.minFindingConfidence, 0.72); + assert.equal(config.missingConfidencePolicy, 'na'); + assert.equal(config.fallbackConfidenceValue, 0.5); assert.equal(config.coverageFirstRoundPrimaryOnly, true); assert.equal(config.autoMinimizeOutdatedComments, true); assert.deepEqual(config.openaiApiBaseAllowlist, ['api.openai.com']); @@ -54,6 +56,8 @@ test('loadConfig parses custom confidence and coverage-first mode', () => { github_token: 'ghs_xxx', openai_api_key: 'sk-test', min_finding_confidence: '0.85', + missing_confidence_policy: 'fallback', + fallback_confidence_value: '0.65', coverage_first_round_primary_only: 'false', auto_minimize_outdated_comments: 'false', openai_api_base: 'https://gateway.example.com/v1', @@ -61,6 +65,8 @@ test('loadConfig parses custom confidence and coverage-first mode', () => { }); assert.equal(config.minFindingConfidence, 0.85); + assert.equal(config.missingConfidencePolicy, 'fallback'); + assert.equal(config.fallbackConfidenceValue, 0.65); assert.equal(config.coverageFirstRoundPrimaryOnly, false); assert.equal(config.autoMinimizeOutdatedComments, false); assert.equal(config.openaiApiBase, 'https://gateway.example.com/v1'); @@ -78,6 +84,28 @@ test('loadConfig rejects invalid confidence range', () => { ); }); +test('loadConfig rejects invalid missing_confidence_policy', () => { + assert.throws( + () => loadConfigWithMockedInputs({ + github_token: 'ghs_xxx', + openai_api_key: 'sk-test', + missing_confidence_policy: 'invalid' + }), + /missing_confidence_policy must be one of \[drop, na, fallback\]/ + ); +}); + +test('loadConfig rejects invalid fallback_confidence_value range', () => { + assert.throws( + () => loadConfigWithMockedInputs({ + github_token: 'ghs_xxx', + openai_api_key: 'sk-test', + fallback_confidence_value: '-0.1' + }), + /fallback_confidence_value must be a number in \[0, 1\]/ + ); +}); + test('loadConfig normalizes and deduplicates review_dimensions while preserving order', () => { const config = loadConfigWithMockedInputs({ github_token: 'ghs_xxx', diff --git a/test/index.test.js b/test/index.test.js index 4839443..7d35099 100644 --- a/test/index.test.js +++ b/test/index.test.js @@ -78,9 +78,9 @@ test('buildInlineBody includes severity, labels, inline key marker, and sub-agen }); test('formatConfidenceValue handles invalid and boundary values predictably', () => { - assert.equal(formatConfidenceValue(undefined), '0.80'); - assert.equal(formatConfidenceValue(null), '0.80'); - assert.equal(formatConfidenceValue('abc'), '0.80'); + assert.equal(formatConfidenceValue(undefined), 'N/A'); + assert.equal(formatConfidenceValue(null), 'N/A'); + assert.equal(formatConfidenceValue('abc'), 'N/A'); assert.equal(formatConfidenceValue(-0.1), '0.00'); assert.equal(formatConfidenceValue(1.2), '1.00'); assert.equal(formatConfidenceValue('0.345'), '0.34'); @@ -107,6 +107,23 @@ test('buildInlineBody renders chinese confidence label before sub-agent tag', () assert.ok(body.indexOf('置信度: 0.88') < body.indexOf('[来自 SubAgent:testing]')); }); +test('buildInlineBody renders N/A for missing confidence', () => { + const text = getTextBundle('English'); + const body = buildInlineBody({ + severity: 'low', + title: 'Unknown confidence finding', + summary: 'Confidence value is unavailable.', + path: 'src/a.js', + side: 'RIGHT', + line: 3, + sourceDimension: 'general' + }, text); + + assert.match(body, /Confidence: N\/A/); + assert.match(body, /\[From SubAgent: general\]/); + assert.ok(body.indexOf('Confidence: N/A') < body.indexOf('[From SubAgent: general]')); +}); + test('formatSummaryMarkdown supports unknown severities and degraded reasons', () => { const markdown = formatSummaryMarkdown({ pull: { number: 7, title: 'Add feature' }, @@ -150,6 +167,7 @@ test('formatSummaryMarkdown supports unknown severities and degraded reasons', ( assert.match(markdown, /## AI Code Review Summary/); assert.match(markdown, /- MEDIUM \(1\)/); assert.match(markdown, /Unknown severity should be grouped/); + assert.match(markdown, /Findings with unknown confidence \(N\/A\): 0/); assert.match(markdown, /Structured-output summary-only degradation: YES/); assert.match(markdown, /planner_structured_output_failed_round_1: unknown_error/); }); From 41651f716b6244382dbce21c32fdeeeec33e8d84 Mon Sep 17 00:00:00 2001 From: Jorben Date: Wed, 4 Mar 2026 12:50:39 +0800 Subject: [PATCH 4/7] =?UTF-8?q?test(action):=20=E2=9C=85=20add=20regressio?= =?UTF-8?q?n=20tests=20for=20confidence=20policies?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/agents.test.js | 89 ++++++++++++++++++++++++++++++++++++++++++ test/aggregate.test.js | 46 ++++++++++++++++++++++ test/config.test.js | 26 ++++++++++++ test/index.test.js | 44 +++++++++++++++++++++ 4 files changed, 205 insertions(+) diff --git a/test/agents.test.js b/test/agents.test.js index 63dd61d..587d822 100644 --- a/test/agents.test.js +++ b/test/agents.test.js @@ -94,6 +94,95 @@ test('runStructuredWithRepair reports wrapped error after repair failure', async assert.match(String(result.error?.message || result.error), /still-invalid/); }); +test('createReviewerAgent schema accepts nullable/omitted confidence and rejects invalid confidence', () => { + const { createReviewerAgent } = loadAgentsWithMockedRuntime(async () => ({ finalOutput: {} })); + const agent = createReviewerAgent({ + dimension: 'general', + model: 'gpt-test', + language: 'English', + projectGuidance: null + }); + + const schema = agent.opts.outputType; + const parsedOmitted = schema.parse({ + overall: 'ok', + findings: [ + { + title: 'No confidence field', + severity: 'low', + path: 'src/a.js', + summary: 'desc', + evidence: ['e1'] + } + ] + }); + assert.equal(parsedOmitted.findings[0].confidence, null); + + const parsedNull = schema.parse({ + overall: 'ok', + findings: [ + { + title: 'Null confidence field', + severity: 'low', + path: 'src/a.js', + summary: 'desc', + confidence: null, + evidence: ['e1'] + } + ] + }); + assert.equal(parsedNull.findings[0].confidence, null); + + const parsedNumeric = schema.parse({ + overall: 'ok', + findings: [ + { + title: 'Numeric confidence', + severity: 'low', + path: 'src/a.js', + summary: 'desc', + confidence: 0.9, + evidence: ['e1'] + } + ] + }); + assert.equal(parsedNumeric.findings[0].confidence, 0.9); + + assert.throws( + () => schema.parse({ + overall: 'ok', + findings: [ + { + title: 'String confidence', + severity: 'low', + path: 'src/a.js', + summary: 'desc', + confidence: '0.9', + evidence: ['e1'] + } + ] + }), + /Expected number, received string/ + ); + + assert.throws( + () => schema.parse({ + overall: 'ok', + findings: [ + { + title: 'Out-of-range confidence', + severity: 'low', + path: 'src/a.js', + summary: 'desc', + confidence: 1.2, + evidence: ['e1'] + } + ] + }), + /Number must be less than or equal to 1/ + ); +}); + test('buildBatchReviewInput keeps additional file with truncation at boundary', () => { const { buildBatchReviewInput } = loadAgentsWithMockedRuntime(async () => ({ finalOutput: {} })); diff --git a/test/aggregate.test.js b/test/aggregate.test.js index 4e3c325..529c7c2 100644 --- a/test/aggregate.test.js +++ b/test/aggregate.test.js @@ -151,6 +151,52 @@ test('dedupeAndSortFindings keeps deterministic order on same severity/confidenc ); }); +test('dedupeAndSortFindings ranks unknown confidence after numeric values', () => { + const findings = [ + { path: 'a.js', side: 'RIGHT', line: 3, severity: 'medium', title: 'T3', summary: 'S3', confidence: null, evidence: ['3'] }, + { path: 'a.js', side: 'RIGHT', line: 2, severity: 'medium', title: 'T2', summary: 'S2', confidence: 0, evidence: ['2'] }, + { path: 'a.js', side: 'RIGHT', line: 1, severity: 'medium', title: 'T1', summary: 'S1', confidence: 0.8, evidence: ['1'] } + ]; + + const result = dedupeAndSortFindings(findings, 10); + assert.deepEqual(result.map((x) => x.line), [1, 2, 3]); +}); + +test('dedupeAndSortFindings merge prefers numeric confidence over unknown and updates sourceDimension', () => { + const findings = [ + { + path: 'a.js', + side: 'RIGHT', + line: 7, + severity: 'medium', + title: 'Issue from unknown confidence', + summary: 'Unknown confidence finding', + confidence: null, + evidence: ['unknown'], + fingerprint: 'same_issue', + sourceDimension: 'general' + }, + { + path: 'a.js', + side: 'RIGHT', + line: 7, + severity: 'medium', + title: 'Issue from numeric confidence', + summary: 'Numeric confidence finding', + confidence: 0.91, + evidence: ['numeric'], + fingerprint: 'same_issue', + sourceDimension: 'security' + } + ]; + + const result = dedupeAndSortFindings(findings, 10); + assert.equal(result.length, 1); + assert.equal(result[0].confidence, 0.91); + assert.equal(result[0].sourceDimension, 'security'); + assert.equal(result[0].title, 'Issue from numeric confidence'); +}); + test('normalizeFindings keeps confidence at threshold and normalizes side/line edge values', () => { const allowed = ['src/a.js']; const findings = [ diff --git a/test/config.test.js b/test/config.test.js index 4b103b0..c0b3b8d 100644 --- a/test/config.test.js +++ b/test/config.test.js @@ -106,6 +106,32 @@ test('loadConfig rejects invalid fallback_confidence_value range', () => { ); }); +test('loadConfig normalizes missing_confidence_policy casing and whitespace', () => { + const config = loadConfigWithMockedInputs({ + github_token: 'ghs_xxx', + openai_api_key: 'sk-test', + missing_confidence_policy: ' Fallback ' + }); + + assert.equal(config.missingConfidencePolicy, 'fallback'); +}); + +test('loadConfig accepts fallback_confidence_value boundaries 0 and 1', () => { + const low = loadConfigWithMockedInputs({ + github_token: 'ghs_xxx', + openai_api_key: 'sk-test', + fallback_confidence_value: '0' + }); + assert.equal(low.fallbackConfidenceValue, 0); + + const high = loadConfigWithMockedInputs({ + github_token: 'ghs_xxx', + openai_api_key: 'sk-test', + fallback_confidence_value: '1' + }); + assert.equal(high.fallbackConfidenceValue, 1); +}); + test('loadConfig normalizes and deduplicates review_dimensions while preserving order', () => { const config = loadConfigWithMockedInputs({ github_token: 'ghs_xxx', diff --git a/test/index.test.js b/test/index.test.js index 7d35099..44a1295 100644 --- a/test/index.test.js +++ b/test/index.test.js @@ -75,17 +75,22 @@ test('buildInlineBody includes severity, labels, inline key marker, and sub-agen assert.match(body, /ai-code-review-agent:inline-key/); assert.match(body, /\[From SubAgent: security\]/); assert.ok(body.indexOf('Confidence: 0.93') < body.indexOf('[From SubAgent: security]')); + assert.ok(body.indexOf('[From SubAgent: security]') < body.indexOf('ai-code-review-agent:inline-key')); + assert.ok(body.trim().endsWith('-->')); + assert.equal((body.match(/ai-code-review-agent:inline-key/g) || []).length, 1); }); test('formatConfidenceValue handles invalid and boundary values predictably', () => { assert.equal(formatConfidenceValue(undefined), 'N/A'); assert.equal(formatConfidenceValue(null), 'N/A'); + assert.equal(formatConfidenceValue(''), 'N/A'); assert.equal(formatConfidenceValue('abc'), 'N/A'); assert.equal(formatConfidenceValue(-0.1), '0.00'); assert.equal(formatConfidenceValue(1.2), '1.00'); assert.equal(formatConfidenceValue('0.345'), '0.34'); assert.equal(formatConfidenceValue(0), '0.00'); assert.equal(formatConfidenceValue(1), '1.00'); + assert.equal(formatConfidenceValue(undefined, 'UNKNOWN'), 'UNKNOWN'); }); test('buildInlineBody renders chinese confidence label before sub-agent tag', () => { @@ -122,6 +127,7 @@ test('buildInlineBody renders N/A for missing confidence', () => { assert.match(body, /Confidence: N\/A/); assert.match(body, /\[From SubAgent: general\]/); assert.ok(body.indexOf('Confidence: N/A') < body.indexOf('[From SubAgent: general]')); + assert.ok(body.trim().endsWith('-->')); }); test('formatSummaryMarkdown supports unknown severities and degraded reasons', () => { @@ -171,3 +177,41 @@ test('formatSummaryMarkdown supports unknown severities and degraded reasons', ( assert.match(markdown, /Structured-output summary-only degradation: YES/); assert.match(markdown, /planner_structured_output_failed_round_1: unknown_error/); }); + +test('formatSummaryMarkdown renders unknown confidence count in zh bundle', () => { + const markdown = formatSummaryMarkdown({ + pull: { number: 8, title: '修复边界' }, + reviewLanguage: 'zh-CN', + findings: [], + fileConclusions: [], + actionableSuggestions: [], + potentialRisks: [], + testSuggestions: [], + downgradedInline: [], + uncovered: [], + noPatchCovered: [], + coverage: { + target: 2, + covered: 2, + uncovered: 0, + noPatch: 0, + unknownConfidenceFindings: 3 + }, + runtime: { + roundsUsed: 1, + maxRounds: 3, + plannedBatches: 1, + executedBatches: 1, + subAgentRuns: 1, + plannerCalls: 1, + reviewerCalls: 1, + modelCalls: 2, + maxModelCalls: 10 + }, + degradedSummaryOnly: false, + degradedReasons: [] + }); + + assert.match(markdown, /## AI 代码审查汇总/); + assert.match(markdown, /置信度未知(N\/A)的问题数: 3/); +}); From 741537e5335924520af209972ffc117d4952ce4b Mon Sep 17 00:00:00 2001 From: Jorben Date: Wed, 4 Mar 2026 12:50:39 +0800 Subject: [PATCH 5/7] =?UTF-8?q?docs(action):=20=F0=9F=93=9D=20document=20n?= =?UTF-8?q?ullable=20confidence=20semantics?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index cda3155..9e8f758 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,8 @@ jobs: review_dimensions: general,security,performance,testing review_language: English min_finding_confidence: 0.72 + missing_confidence_policy: na + fallback_confidence_value: 0.5 coverage_first_round_primary_only: true auto_minimize_outdated_comments: true max_rounds: 8 @@ -104,6 +106,8 @@ jobs: | `review_dimensions` | no | `general,security,performance,testing` | Subagent dimensions | | `review_language` | no | `English` | Preferred language for review comments and summary | | `min_finding_confidence` | no | `0.72` | Keep only findings at or above this confidence (0-1) | +| `missing_confidence_policy` | no | `na` | Handling for missing/invalid confidence: `drop`, `na`, or `fallback` | +| `fallback_confidence_value` | no | `0.5` | Fallback confidence used only when `missing_confidence_policy=fallback` | | `coverage_first_round_primary_only` | no | `true` | Round 1 runs only primary dimension for faster file coverage | | `auto_minimize_outdated_comments` | no | `true` | Best-effort GraphQL minimize for outdated historical inline comments from this action | | `max_rounds` | no | `8` | Max planning/review rounds | @@ -148,6 +152,13 @@ Practical guidance: | `uncovered_files` | Number of uncovered files | | `degraded` | `true` if summary-only degradation was triggered | +## Confidence Semantics + +- Finding `confidence` can be `null` when the model cannot confidently estimate a value. +- Inline comments show unknown values as `N/A`. +- `min_finding_confidence` is applied only when confidence is numeric. +- Use `missing_confidence_policy=fallback` if your downstream expects numeric confidence only. + ## Fork PR Notes - For public fork PRs, repository secrets are typically unavailable on `pull_request`. From 80e199e98aaf74e982f2a7f2c7d85edb88ae21a0 Mon Sep 17 00:00:00 2001 From: Jorben Date: Wed, 4 Mar 2026 12:57:58 +0800 Subject: [PATCH 6/7] =?UTF-8?q?refactor(action):=20=E2=99=BB=EF=B8=8F=20ce?= =?UTF-8?q?ntralize=20review=20body=20formatting=20for=20confidence=20test?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/index.js | 37 +++++++++++++++++++++-------- test/aggregate.test.js | 49 ++++++++++++++++++++++++++++++++++++++ test/config.test.js | 11 +++++++++ test/index.test.js | 54 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 141 insertions(+), 10 deletions(-) diff --git a/src/index.js b/src/index.js index 38ba3f6..7cae0a3 100644 --- a/src/index.js +++ b/src/index.js @@ -225,6 +225,25 @@ function buildInlineBody(finding, text) { return lines.join('\n\n'); } +function buildReviewBody({ + text, + findingsKept, + unknownConfidenceFindings, + inlineCommentsAttempted, + coverage +}) { + return [ + text.reviewCompleted, + `- Findings kept: ${findingsKept}`, + `- Findings with unknown confidence: ${unknownConfidenceFindings}`, + `- Inline comments attempted: ${inlineCommentsAttempted}`, + `- Target files: ${coverage.target}`, + `- Covered files: ${coverage.covered}`, + `- Uncovered files: ${coverage.uncovered}`, + text.reviewSeeSummary + ].join('\n'); +} + function summarizeSeverity(groups, text, limitEach = 8) { const order = ['critical', 'high', 'medium', 'low']; const lines = []; @@ -969,16 +988,13 @@ async function runAction() { ); if (!degradedSummaryOnly) { - const reviewBody = [ - text.reviewCompleted, - `- Findings kept: ${normalizedFindings.length}`, - `- Findings with unknown confidence: ${unknownConfidenceFindings}`, - `- Inline comments attempted: ${inlineComments.length}`, - `- Target files: ${coverage.target}`, - `- Covered files: ${coverage.covered}`, - `- Uncovered files: ${coverage.uncovered}`, - text.reviewSeeSummary - ].join('\n'); + const reviewBody = buildReviewBody({ + text, + findingsKept: normalizedFindings.length, + unknownConfidenceFindings, + inlineCommentsAttempted: inlineComments.length, + coverage + }); const reviewResult = await createReview(octokit, { owner, @@ -1054,6 +1070,7 @@ module.exports = { summarizePlannerBatchesForLog, formatConfidenceValue, buildInlineBody, + buildReviewBody, summarizeSeverity, summarizeFileConclusions, formatSummaryMarkdown diff --git a/test/aggregate.test.js b/test/aggregate.test.js index 529c7c2..009f0c4 100644 --- a/test/aggregate.test.js +++ b/test/aggregate.test.js @@ -303,6 +303,55 @@ test('normalizeFindings applies fallback confidence and min threshold when polic assert.equal(dropped.length, 0); }); +test('normalizeFindings mixed numeric/null confidence follows na policy semantics', () => { + const allowed = ['src/a.js']; + const findings = [ + { + path: 'src/a.js', + title: 'Unknown confidence kept', + summary: 'unknown confidence should be kept with na policy', + severity: 'LOW', + side: 'RIGHT', + line: 1, + confidence: null, + evidence: ['e1'] + }, + { + path: 'src/a.js', + title: 'High numeric confidence kept', + summary: 'numeric confidence above threshold should be kept', + severity: 'LOW', + side: 'RIGHT', + line: 2, + confidence: 0.9, + evidence: ['e2'] + }, + { + path: 'src/a.js', + title: 'Low numeric confidence dropped', + summary: 'numeric confidence below threshold should be dropped', + severity: 'LOW', + side: 'RIGHT', + line: 3, + confidence: 0.5, + evidence: ['e3'] + } + ]; + + const normalized = normalizeFindings(findings, allowed, { + minConfidence: 0.72, + missingConfidencePolicy: 'na' + }); + assert.equal(normalized.length, 2); + assert.deepEqual( + normalized.map((x) => [x.title, x.confidence]), + [ + ['Unknown confidence kept', null], + ['High numeric confidence kept', 0.9] + ] + ); +}); + test('groupFindingsBySeverity falls back unknown severities to medium', () => { const unknownSeverityFinding = { path: 'src/a.js', diff --git a/test/config.test.js b/test/config.test.js index c0b3b8d..c99bfbd 100644 --- a/test/config.test.js +++ b/test/config.test.js @@ -132,6 +132,17 @@ test('loadConfig accepts fallback_confidence_value boundaries 0 and 1', () => { assert.equal(high.fallbackConfidenceValue, 1); }); +test('loadConfig uses default fallback value when policy is fallback and value is omitted', () => { + const config = loadConfigWithMockedInputs({ + github_token: 'ghs_xxx', + openai_api_key: 'sk-test', + missing_confidence_policy: 'fallback' + }); + + assert.equal(config.missingConfidencePolicy, 'fallback'); + assert.equal(config.fallbackConfidenceValue, 0.5); +}); + test('loadConfig normalizes and deduplicates review_dimensions while preserving order', () => { const config = loadConfigWithMockedInputs({ github_token: 'ghs_xxx', diff --git a/test/index.test.js b/test/index.test.js index 44a1295..6f3accf 100644 --- a/test/index.test.js +++ b/test/index.test.js @@ -10,6 +10,7 @@ const { shouldUseSummaryOnlyMode, formatConfidenceValue, buildInlineBody, + buildReviewBody, formatSummaryMarkdown } = __internal; @@ -88,6 +89,10 @@ test('formatConfidenceValue handles invalid and boundary values predictably', () assert.equal(formatConfidenceValue(-0.1), '0.00'); assert.equal(formatConfidenceValue(1.2), '1.00'); assert.equal(formatConfidenceValue('0.345'), '0.34'); + assert.equal(formatConfidenceValue(0.3449), '0.34'); + assert.equal(formatConfidenceValue(0.345), '0.34'); + assert.equal(formatConfidenceValue(0.3451), '0.35'); + assert.equal(formatConfidenceValue(0.755), '0.76'); assert.equal(formatConfidenceValue(0), '0.00'); assert.equal(formatConfidenceValue(1), '1.00'); assert.equal(formatConfidenceValue(undefined, 'UNKNOWN'), 'UNKNOWN'); @@ -215,3 +220,52 @@ test('formatSummaryMarkdown renders unknown confidence count in zh bundle', () = assert.match(markdown, /## AI 代码审查汇总/); assert.match(markdown, /置信度未知(N\/A)的问题数: 3/); }); + +test('unknown confidence count stays consistent between summary and review body', () => { + const text = getTextBundle('English'); + const coverage = { + target: 5, + covered: 5, + uncovered: 0, + noPatch: 0, + unknownConfidenceFindings: 2 + }; + + const summary = formatSummaryMarkdown({ + pull: { number: 9, title: 'Consistency check' }, + reviewLanguage: 'English', + findings: [], + fileConclusions: [], + actionableSuggestions: [], + potentialRisks: [], + testSuggestions: [], + downgradedInline: [], + uncovered: [], + noPatchCovered: [], + coverage, + runtime: { + roundsUsed: 1, + maxRounds: 3, + plannedBatches: 1, + executedBatches: 1, + subAgentRuns: 1, + plannerCalls: 1, + reviewerCalls: 1, + modelCalls: 2, + maxModelCalls: 10 + }, + degradedSummaryOnly: false, + degradedReasons: [] + }); + + const reviewBody = buildReviewBody({ + text, + findingsKept: 4, + unknownConfidenceFindings: 2, + inlineCommentsAttempted: 3, + coverage + }); + + assert.match(summary, /Findings with unknown confidence \(N\/A\): 2/); + assert.match(reviewBody, /Findings with unknown confidence: 2/); +}); From 5e6ff8e3892177d90e0ee67bba99a8b90cb530fe Mon Sep 17 00:00:00 2001 From: Jorben Date: Wed, 4 Mar 2026 12:57:58 +0800 Subject: [PATCH 7/7] =?UTF-8?q?docs(action):=20=F0=9F=93=9D=20clarify=20fa?= =?UTF-8?q?llback=20confidence=20scope?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 9e8f758..3bebc89 100644 --- a/README.md +++ b/README.md @@ -158,6 +158,7 @@ Practical guidance: - Inline comments show unknown values as `N/A`. - `min_finding_confidence` is applied only when confidence is numeric. - Use `missing_confidence_policy=fallback` if your downstream expects numeric confidence only. +- When `missing_confidence_policy` is `drop` or `na`, `fallback_confidence_value` is ignored. ## Fork PR Notes