diff --git a/README.md b/README.md
index cda3155..3bebc89 100644
--- a/README.md
+++ b/README.md
@@ -79,6 +79,8 @@ jobs:
review_dimensions: general,security,performance,testing
review_language: English
min_finding_confidence: 0.72
+ missing_confidence_policy: na
+ fallback_confidence_value: 0.5
coverage_first_round_primary_only: true
auto_minimize_outdated_comments: true
max_rounds: 8
@@ -104,6 +106,8 @@ jobs:
| `review_dimensions` | no | `general,security,performance,testing` | Subagent dimensions |
| `review_language` | no | `English` | Preferred language for review comments and summary |
| `min_finding_confidence` | no | `0.72` | Keep only findings at or above this confidence (0-1) |
+| `missing_confidence_policy` | no | `na` | Handling for missing/invalid confidence: `drop`, `na`, or `fallback` |
+| `fallback_confidence_value` | no | `0.5` | Fallback confidence used only when `missing_confidence_policy=fallback` |
| `coverage_first_round_primary_only` | no | `true` | Round 1 runs only primary dimension for faster file coverage |
| `auto_minimize_outdated_comments` | no | `true` | Best-effort GraphQL minimize for outdated historical inline comments from this action |
| `max_rounds` | no | `8` | Max planning/review rounds |
@@ -148,6 +152,14 @@ Practical guidance:
| `uncovered_files` | Number of uncovered files |
| `degraded` | `true` if summary-only degradation was triggered |
+## Confidence Semantics
+
+- Finding `confidence` can be `null` when the model cannot confidently estimate a value.
+- Inline comments show unknown values as `N/A`.
+- `min_finding_confidence` is applied only when confidence is numeric.
+- Use `missing_confidence_policy=fallback` if your downstream expects numeric confidence only.
+- When `missing_confidence_policy` is `drop` or `na`, `fallback_confidence_value` is ignored.
+
## Fork PR Notes
- For public fork PRs, repository secrets are typically unavailable on `pull_request`.
diff --git a/action.yml b/action.yml
index 167cf39..4419d89 100644
--- a/action.yml
+++ b/action.yml
@@ -50,6 +50,14 @@ inputs:
description: Minimum confidence (0-1) required for a finding to be kept.
required: false
default: "0.72"
+ missing_confidence_policy:
+ description: Policy when finding confidence is missing/invalid (drop|na|fallback).
+ required: false
+ default: "na"
+ fallback_confidence_value:
+ description: Fallback confidence (0-1) used only when missing_confidence_policy=fallback.
+ required: false
+ default: "0.5"
coverage_first_round_primary_only:
description: In round 1, run only primary dimension to maximize file coverage under budget.
required: false
diff --git a/src/agents.js b/src/agents.js
index be436fd..a929948 100644
--- a/src/agents.js
+++ b/src/agents.js
@@ -25,7 +25,7 @@ const findingSchema = z.object({
path: z.string().min(1),
side: z.enum(['LEFT', 'RIGHT', 'FILE']).default('RIGHT'),
line: z.number().int().positive().nullable().default(null),
- confidence: z.number().min(0).max(1).default(0.8),
+ confidence: z.number().min(0).max(1).nullable().optional().default(null),
evidence: z.array(z.string().min(1)).default([]),
fingerprint: z.string().max(120).default(''),
summary: z.string().min(1),
@@ -126,7 +126,8 @@ Rules:
- Never emit line numbers that do not appear in the provided anchors.
- Do not invent files or line numbers.
- Severity must be one of critical/high/medium/low.
-- Set confidence in [0,1]. Include at least one concrete evidence item tied to provided diff context.
+- Set confidence in [0,1] when you can estimate it; otherwise use null.
+- Include at least one concrete evidence item tied to provided diff context.
- If confidence is below 0.70, do not emit it as a finding; put it in file-level notes instead.
- Use fingerprint as stable short key for same issue across dimensions (e.g. unsafe_openai_base_url, planner_done_ignored).
- Keep findings concrete, actionable, and concise.
diff --git a/src/aggregate.js b/src/aggregate.js
index 03c03b9..fc431e6 100644
--- a/src/aggregate.js
+++ b/src/aggregate.js
@@ -70,6 +70,10 @@ function jaccardSimilarity(a, b) {
return union.size === 0 ? 0 : intersection / union.size;
}
+function confidenceRank(value) {
+ return Number.isFinite(value) ? value : -1;
+}
+
function isSemanticallySameIssue(a, b) {
if (
a.fingerprint &&
@@ -102,11 +106,14 @@ function isSemanticallySameIssue(a, b) {
}
function mergeFinding(base, incoming) {
- const preferIncoming = incoming.confidence > base.confidence;
+ const baseRank = confidenceRank(base.confidence);
+ const incomingRank = confidenceRank(incoming.confidence);
+ const preferIncoming = incomingRank > baseRank;
const mergedEvidence = [...new Set([...(base.evidence || []), ...(incoming.evidence || [])])].slice(0, 3);
const severity = SEVERITY_RANK[incoming.severity] > SEVERITY_RANK[base.severity]
? incoming.severity
: base.severity;
+ const mergedConfidence = incomingRank >= baseRank ? incoming.confidence : base.confidence;
return {
...base,
...(preferIncoming
@@ -118,7 +125,7 @@ function mergeFinding(base, incoming) {
}
: {}),
severity,
- confidence: Math.max(base.confidence, incoming.confidence),
+ confidence: mergedConfidence,
evidence: mergedEvidence,
fingerprint: base.fingerprint || incoming.fingerprint,
sourceDimension: preferIncoming
@@ -130,6 +137,13 @@ function mergeFinding(base, incoming) {
function normalizeFindings(findings, allowedPaths, options = {}) {
const pathSet = new Set(allowedPaths);
const minConfidence = Number.isFinite(options.minConfidence) ? options.minConfidence : 0;
+ const missingConfidencePolicy = ['drop', 'na', 'fallback'].includes(options.missingConfidencePolicy)
+ ? options.missingConfidencePolicy
+ : 'na';
+ const fallbackConfidenceValueRaw = Number.parseFloat(String(options.fallbackConfidenceValue ?? '0.5'));
+ const fallbackConfidenceValue = Number.isFinite(fallbackConfidenceValueRaw)
+ ? clamp(fallbackConfidenceValueRaw, 0, 1)
+ : 0.5;
const out = [];
for (const finding of findings || []) {
@@ -143,11 +157,23 @@ function normalizeFindings(findings, allowedPaths, options = {}) {
const line = Number.isInteger(finding.line) && finding.line > 0 ? finding.line : null;
const title = String(finding.title || '').trim();
const summary = String(finding.summary || '').trim();
- const confidenceRaw = Number.parseFloat(String(finding.confidence ?? '0.8'));
- const confidence = Number.isFinite(confidenceRaw) ? clamp(confidenceRaw, 0, 1) : 0.8;
+ const confidenceRaw = Number.parseFloat(String(finding.confidence));
+ let confidence = Number.isFinite(confidenceRaw) ? clamp(confidenceRaw, 0, 1) : null;
const evidence = normalizeEvidence(finding.evidence);
- if (!title || !summary || evidence.length === 0 || confidence < minConfidence) {
+ if (confidence === null) {
+ if (missingConfidencePolicy === 'drop') {
+ continue;
+ }
+ if (missingConfidencePolicy === 'fallback') {
+ confidence = fallbackConfidenceValue;
+ }
+ }
+
+ if (!title || !summary || evidence.length === 0) {
+ continue;
+ }
+ if (Number.isFinite(confidence) && confidence < minConfidence) {
continue;
}
@@ -216,7 +242,7 @@ function dedupeAndSortFindings(findings, maxFindings) {
return pathDiff;
}
- const confidenceDiff = (b.confidence || 0) - (a.confidence || 0);
+ const confidenceDiff = confidenceRank(b.confidence) - confidenceRank(a.confidence);
if (confidenceDiff !== 0) {
return confidenceDiff;
}
diff --git a/src/config.js b/src/config.js
index 3c33a94..e57d206 100644
--- a/src/config.js
+++ b/src/config.js
@@ -75,6 +75,15 @@ function parseFloatRangeInput(name, defaultValue, min, max) {
return parsed;
}
+function parseEnumInput(name, defaultValue, allowedValues) {
+ const raw = core.getInput(name) || String(defaultValue);
+ const normalized = String(raw).trim().toLowerCase();
+ if (!allowedValues.includes(normalized)) {
+ throw new Error(`Input ${name} must be one of [${allowedValues.join(', ')}], got: ${raw}`);
+ }
+ return normalized;
+}
+
function uniqueLowercase(items) {
const seen = new Set();
const out = [];
@@ -123,6 +132,8 @@ function loadConfig() {
reviewDimensions: normalizedDimensions,
reviewLanguage,
minFindingConfidence: parseFloatRangeInput('min_finding_confidence', 0.72, 0, 1),
+ missingConfidencePolicy: parseEnumInput('missing_confidence_policy', 'na', ['drop', 'na', 'fallback']),
+ fallbackConfidenceValue: parseFloatRangeInput('fallback_confidence_value', 0.5, 0, 1),
coverageFirstRoundPrimaryOnly: parseBooleanInput('coverage_first_round_primary_only', true),
autoMinimizeOutdatedComments: parseBooleanInput('auto_minimize_outdated_comments', true),
maxRounds: parsePositiveIntInput('max_rounds', 8),
diff --git a/src/index.js b/src/index.js
index 759fdc5..7cae0a3 100644
--- a/src/index.js
+++ b/src/index.js
@@ -30,6 +30,8 @@ function getTextBundle(language) {
return {
suggestionLabel: 'Suggestion',
riskLabel: 'Risk',
+ confidenceLabel: 'Confidence',
+ unknownConfidenceValue: 'N/A',
summaryTitle: 'AI Code Review Summary',
preferredLanguage: 'Preferred language',
overallAssessment: 'Overall Assessment',
@@ -40,6 +42,7 @@ function getTextBundle(language) {
fileLevelCoverage: 'File-Level Coverage Notes',
inlineDowngraded: 'Inline Downgraded Items (processed but not inline)',
coverageStatus: 'Coverage Status',
+ unknownConfidenceFindings: 'Findings with unknown confidence (N/A)',
uncoveredList: 'Uncovered list',
noPatchCoveredList: 'No-patch covered list',
runtimeBudget: 'Runtime/Budget',
@@ -80,6 +83,8 @@ function getTextBundle(language) {
return {
suggestionLabel: '建议',
riskLabel: '风险',
+ confidenceLabel: '置信度',
+ unknownConfidenceValue: 'N/A',
summaryTitle: 'AI 代码审查汇总',
preferredLanguage: '指定语言',
overallAssessment: '总体评价',
@@ -90,6 +95,7 @@ function getTextBundle(language) {
fileLevelCoverage: '文件级覆盖说明',
inlineDowngraded: '无法 inline 的已处理项',
coverageStatus: '覆盖状态',
+ unknownConfidenceFindings: '置信度未知(N/A)的问题数',
uncoveredList: '未覆盖文件清单',
noPatchCoveredList: '无 patch 文件覆盖清单',
runtimeBudget: '轮次与预算',
@@ -187,6 +193,16 @@ function summarizePlannerBatchesForLog(batches, maxEntries = 12) {
}).join(' | ');
}
+function formatConfidenceValue(confidence, unknownValue = 'N/A') {
+ const value = Number.parseFloat(String(confidence));
+ if (!Number.isFinite(value)) {
+ return unknownValue;
+ }
+
+ const clamped = Math.min(1, Math.max(0, value));
+ return clamped.toFixed(2);
+}
+
function buildInlineBody(finding, text) {
const lines = [];
const subAgent = String(finding.sourceDimension || 'general').trim().toLowerCase() || 'general';
@@ -202,12 +218,32 @@ function buildInlineBody(finding, text) {
lines.push(`${text.riskLabel}: ${finding.risk}`);
}
- lines.push(``);
+ lines.push(`${text.confidenceLabel}: ${formatConfidenceValue(finding.confidence, text.unknownConfidenceValue)}`);
lines.push(`
${text.fromSubAgentTag(subAgent)}
`);
+ lines.push(``);
return lines.join('\n\n');
}
+function buildReviewBody({
+ text,
+ findingsKept,
+ unknownConfidenceFindings,
+ inlineCommentsAttempted,
+ coverage
+}) {
+ return [
+ text.reviewCompleted,
+ `- Findings kept: ${findingsKept}`,
+ `- Findings with unknown confidence: ${unknownConfidenceFindings}`,
+ `- Inline comments attempted: ${inlineCommentsAttempted}`,
+ `- Target files: ${coverage.target}`,
+ `- Covered files: ${coverage.covered}`,
+ `- Uncovered files: ${coverage.uncovered}`,
+ text.reviewSeeSummary
+ ].join('\n');
+}
+
function summarizeSeverity(groups, text, limitEach = 8) {
const order = ['critical', 'high', 'medium', 'low'];
const lines = [];
@@ -300,6 +336,9 @@ function formatSummaryMarkdown({
const degradedText = degradedSummaryOnly
? `${text.yes}\n\n${text.reasons}:\n${degradedReasons.map((x) => `- ${x}`).join('\n') || '- unknown'}`
: text.no;
+ const unknownConfidenceFindings = Number.isFinite(coverage.unknownConfidenceFindings)
+ ? coverage.unknownConfidenceFindings
+ : 0;
return [
`## ${text.summaryTitle}`,
@@ -333,6 +372,7 @@ function formatSummaryMarkdown({
`- Covered files: ${coverage.covered}`,
`- Uncovered files: ${coverage.uncovered}`,
`- No-patch/binary covered as file-level: ${coverage.noPatch}`,
+ `- ${text.unknownConfidenceFindings}: ${unknownConfidenceFindings}`,
'',
`${text.uncoveredList}:`,
uncoveredLines,
@@ -774,10 +814,13 @@ async function runAction() {
const normalizedFindings = dedupeAndSortFindings(
normalizeFindings(rawFindings, targetPaths, {
- minConfidence: config.minFindingConfidence
+ minConfidence: config.minFindingConfidence,
+ missingConfidencePolicy: config.missingConfidencePolicy,
+ fallbackConfidenceValue: config.fallbackConfidenceValue
}),
config.maxFindings
);
+ const unknownConfidenceFindings = normalizedFindings.filter((finding) => !Number.isFinite(finding.confidence)).length;
const diffLineMap = buildDiffLineMaps(patchFiles);
const inlineComments = [];
@@ -895,7 +938,8 @@ async function runAction() {
target: filteredFiles.length,
covered: filteredFiles.length - uncovered.length,
uncovered: uncovered.length,
- noPatch: noPatchCovered.length
+ noPatch: noPatchCovered.length,
+ unknownConfidenceFindings
};
if (filteredFiles.length === 0) {
@@ -944,15 +988,13 @@ async function runAction() {
);
if (!degradedSummaryOnly) {
- const reviewBody = [
- text.reviewCompleted,
- `- Findings kept: ${normalizedFindings.length}`,
- `- Inline comments attempted: ${inlineComments.length}`,
- `- Target files: ${coverage.target}`,
- `- Covered files: ${coverage.covered}`,
- `- Uncovered files: ${coverage.uncovered}`,
- text.reviewSeeSummary
- ].join('\n');
+ const reviewBody = buildReviewBody({
+ text,
+ findingsKept: normalizedFindings.length,
+ unknownConfidenceFindings,
+ inlineCommentsAttempted: inlineComments.length,
+ coverage
+ });
const reviewResult = await createReview(octokit, {
owner,
@@ -1026,7 +1068,9 @@ module.exports = {
shouldUseSummaryOnlyMode,
sanitizePlannedBatches,
summarizePlannerBatchesForLog,
+ formatConfidenceValue,
buildInlineBody,
+ buildReviewBody,
summarizeSeverity,
summarizeFileConclusions,
formatSummaryMarkdown
diff --git a/test/agents.test.js b/test/agents.test.js
index 63dd61d..587d822 100644
--- a/test/agents.test.js
+++ b/test/agents.test.js
@@ -94,6 +94,95 @@ test('runStructuredWithRepair reports wrapped error after repair failure', async
assert.match(String(result.error?.message || result.error), /still-invalid/);
});
+test('createReviewerAgent schema accepts nullable/omitted confidence and rejects invalid confidence', () => {
+ const { createReviewerAgent } = loadAgentsWithMockedRuntime(async () => ({ finalOutput: {} }));
+ const agent = createReviewerAgent({
+ dimension: 'general',
+ model: 'gpt-test',
+ language: 'English',
+ projectGuidance: null
+ });
+
+ const schema = agent.opts.outputType;
+ const parsedOmitted = schema.parse({
+ overall: 'ok',
+ findings: [
+ {
+ title: 'No confidence field',
+ severity: 'low',
+ path: 'src/a.js',
+ summary: 'desc',
+ evidence: ['e1']
+ }
+ ]
+ });
+ assert.equal(parsedOmitted.findings[0].confidence, null);
+
+ const parsedNull = schema.parse({
+ overall: 'ok',
+ findings: [
+ {
+ title: 'Null confidence field',
+ severity: 'low',
+ path: 'src/a.js',
+ summary: 'desc',
+ confidence: null,
+ evidence: ['e1']
+ }
+ ]
+ });
+ assert.equal(parsedNull.findings[0].confidence, null);
+
+ const parsedNumeric = schema.parse({
+ overall: 'ok',
+ findings: [
+ {
+ title: 'Numeric confidence',
+ severity: 'low',
+ path: 'src/a.js',
+ summary: 'desc',
+ confidence: 0.9,
+ evidence: ['e1']
+ }
+ ]
+ });
+ assert.equal(parsedNumeric.findings[0].confidence, 0.9);
+
+ assert.throws(
+ () => schema.parse({
+ overall: 'ok',
+ findings: [
+ {
+ title: 'String confidence',
+ severity: 'low',
+ path: 'src/a.js',
+ summary: 'desc',
+ confidence: '0.9',
+ evidence: ['e1']
+ }
+ ]
+ }),
+ /Expected number, received string/
+ );
+
+ assert.throws(
+ () => schema.parse({
+ overall: 'ok',
+ findings: [
+ {
+ title: 'Out-of-range confidence',
+ severity: 'low',
+ path: 'src/a.js',
+ summary: 'desc',
+ confidence: 1.2,
+ evidence: ['e1']
+ }
+ ]
+ }),
+ /Number must be less than or equal to 1/
+ );
+});
+
test('buildBatchReviewInput keeps additional file with truncation at boundary', () => {
const { buildBatchReviewInput } = loadAgentsWithMockedRuntime(async () => ({ finalOutput: {} }));
diff --git a/test/aggregate.test.js b/test/aggregate.test.js
index 23e7e3d..009f0c4 100644
--- a/test/aggregate.test.js
+++ b/test/aggregate.test.js
@@ -151,6 +151,52 @@ test('dedupeAndSortFindings keeps deterministic order on same severity/confidenc
);
});
+test('dedupeAndSortFindings ranks unknown confidence after numeric values', () => {
+ const findings = [
+ { path: 'a.js', side: 'RIGHT', line: 3, severity: 'medium', title: 'T3', summary: 'S3', confidence: null, evidence: ['3'] },
+ { path: 'a.js', side: 'RIGHT', line: 2, severity: 'medium', title: 'T2', summary: 'S2', confidence: 0, evidence: ['2'] },
+ { path: 'a.js', side: 'RIGHT', line: 1, severity: 'medium', title: 'T1', summary: 'S1', confidence: 0.8, evidence: ['1'] }
+ ];
+
+ const result = dedupeAndSortFindings(findings, 10);
+ assert.deepEqual(result.map((x) => x.line), [1, 2, 3]);
+});
+
+test('dedupeAndSortFindings merge prefers numeric confidence over unknown and updates sourceDimension', () => {
+ const findings = [
+ {
+ path: 'a.js',
+ side: 'RIGHT',
+ line: 7,
+ severity: 'medium',
+ title: 'Issue from unknown confidence',
+ summary: 'Unknown confidence finding',
+ confidence: null,
+ evidence: ['unknown'],
+ fingerprint: 'same_issue',
+ sourceDimension: 'general'
+ },
+ {
+ path: 'a.js',
+ side: 'RIGHT',
+ line: 7,
+ severity: 'medium',
+ title: 'Issue from numeric confidence',
+ summary: 'Numeric confidence finding',
+ confidence: 0.91,
+ evidence: ['numeric'],
+ fingerprint: 'same_issue',
+ sourceDimension: 'security'
+ }
+ ];
+
+ const result = dedupeAndSortFindings(findings, 10);
+ assert.equal(result.length, 1);
+ assert.equal(result[0].confidence, 0.91);
+ assert.equal(result[0].sourceDimension, 'security');
+ assert.equal(result[0].title, 'Issue from numeric confidence');
+});
+
test('normalizeFindings keeps confidence at threshold and normalizes side/line edge values', () => {
const allowed = ['src/a.js'];
const findings = [
@@ -178,8 +224,8 @@ test('normalizeFindings handles confidence/evidence type anomalies predictably',
const findings = [
{
path: 'src/a.js',
- title: 'NaN confidence defaults',
- summary: 'NaN confidence should fall back to default 0.8',
+ title: 'NaN confidence is unknown',
+ summary: 'NaN confidence should be kept as unknown by default policy',
severity: 'LOW',
side: 'LEFT',
line: -2,
@@ -200,12 +246,112 @@ test('normalizeFindings handles confidence/evidence type anomalies predictably',
const normalized = normalizeFindings(findings, allowed, { minConfidence: 0.72 });
assert.equal(normalized.length, 1);
- assert.equal(normalized[0].title, 'NaN confidence defaults');
- assert.equal(normalized[0].confidence, 0.8);
+ assert.equal(normalized[0].title, 'NaN confidence is unknown');
+ assert.equal(normalized[0].confidence, null);
assert.equal(normalized[0].side, 'LEFT');
assert.equal(normalized[0].line, null);
});
+test('normalizeFindings drops findings with missing confidence when policy is drop', () => {
+ const allowed = ['src/a.js'];
+ const findings = [
+ {
+ path: 'src/a.js',
+ title: 'Unknown confidence',
+ summary: 'confidence missing',
+ severity: 'LOW',
+ side: 'RIGHT',
+ line: 2,
+ evidence: ['e1']
+ }
+ ];
+
+ const normalized = normalizeFindings(findings, allowed, {
+ minConfidence: 0.72,
+ missingConfidencePolicy: 'drop'
+ });
+ assert.equal(normalized.length, 0);
+});
+
+test('normalizeFindings applies fallback confidence and min threshold when policy is fallback', () => {
+ const allowed = ['src/a.js'];
+ const findings = [
+ {
+ path: 'src/a.js',
+ title: 'Fallback confidence',
+ summary: 'confidence missing',
+ severity: 'LOW',
+ side: 'RIGHT',
+ line: 2,
+ evidence: ['e1']
+ }
+ ];
+
+ const kept = normalizeFindings(findings, allowed, {
+ minConfidence: 0.72,
+ missingConfidencePolicy: 'fallback',
+ fallbackConfidenceValue: 0.85
+ });
+ assert.equal(kept.length, 1);
+ assert.equal(kept[0].confidence, 0.85);
+
+ const dropped = normalizeFindings(findings, allowed, {
+ minConfidence: 0.72,
+ missingConfidencePolicy: 'fallback',
+ fallbackConfidenceValue: 0.5
+ });
+ assert.equal(dropped.length, 0);
+});
+
+test('normalizeFindings mixed numeric/null confidence follows na policy semantics', () => {
+ const allowed = ['src/a.js'];
+ const findings = [
+ {
+ path: 'src/a.js',
+ title: 'Unknown confidence kept',
+ summary: 'unknown confidence should be kept with na policy',
+ severity: 'LOW',
+ side: 'RIGHT',
+ line: 1,
+ confidence: null,
+ evidence: ['e1']
+ },
+ {
+ path: 'src/a.js',
+ title: 'High numeric confidence kept',
+ summary: 'numeric confidence above threshold should be kept',
+ severity: 'LOW',
+ side: 'RIGHT',
+ line: 2,
+ confidence: 0.9,
+ evidence: ['e2']
+ },
+ {
+ path: 'src/a.js',
+ title: 'Low numeric confidence dropped',
+ summary: 'numeric confidence below threshold should be dropped',
+ severity: 'LOW',
+ side: 'RIGHT',
+ line: 3,
+ confidence: 0.5,
+ evidence: ['e3']
+ }
+ ];
+
+ const normalized = normalizeFindings(findings, allowed, {
+ minConfidence: 0.72,
+ missingConfidencePolicy: 'na'
+ });
+ assert.equal(normalized.length, 2);
+ assert.deepEqual(
+ normalized.map((x) => [x.title, x.confidence]),
+ [
+ ['Unknown confidence kept', null],
+ ['High numeric confidence kept', 0.9]
+ ]
+ );
+});
+
test('groupFindingsBySeverity falls back unknown severities to medium', () => {
const unknownSeverityFinding = {
path: 'src/a.js',
diff --git a/test/config.test.js b/test/config.test.js
index a1d53cd..c99bfbd 100644
--- a/test/config.test.js
+++ b/test/config.test.js
@@ -44,6 +44,8 @@ test('loadConfig applies defaults for confidence and coverage-first mode', () =>
});
assert.equal(config.minFindingConfidence, 0.72);
+ assert.equal(config.missingConfidencePolicy, 'na');
+ assert.equal(config.fallbackConfidenceValue, 0.5);
assert.equal(config.coverageFirstRoundPrimaryOnly, true);
assert.equal(config.autoMinimizeOutdatedComments, true);
assert.deepEqual(config.openaiApiBaseAllowlist, ['api.openai.com']);
@@ -54,6 +56,8 @@ test('loadConfig parses custom confidence and coverage-first mode', () => {
github_token: 'ghs_xxx',
openai_api_key: 'sk-test',
min_finding_confidence: '0.85',
+ missing_confidence_policy: 'fallback',
+ fallback_confidence_value: '0.65',
coverage_first_round_primary_only: 'false',
auto_minimize_outdated_comments: 'false',
openai_api_base: 'https://gateway.example.com/v1',
@@ -61,6 +65,8 @@ test('loadConfig parses custom confidence and coverage-first mode', () => {
});
assert.equal(config.minFindingConfidence, 0.85);
+ assert.equal(config.missingConfidencePolicy, 'fallback');
+ assert.equal(config.fallbackConfidenceValue, 0.65);
assert.equal(config.coverageFirstRoundPrimaryOnly, false);
assert.equal(config.autoMinimizeOutdatedComments, false);
assert.equal(config.openaiApiBase, 'https://gateway.example.com/v1');
@@ -78,6 +84,65 @@ test('loadConfig rejects invalid confidence range', () => {
);
});
+test('loadConfig rejects invalid missing_confidence_policy', () => {
+ assert.throws(
+ () => loadConfigWithMockedInputs({
+ github_token: 'ghs_xxx',
+ openai_api_key: 'sk-test',
+ missing_confidence_policy: 'invalid'
+ }),
+ /missing_confidence_policy must be one of \[drop, na, fallback\]/
+ );
+});
+
+test('loadConfig rejects invalid fallback_confidence_value range', () => {
+ assert.throws(
+ () => loadConfigWithMockedInputs({
+ github_token: 'ghs_xxx',
+ openai_api_key: 'sk-test',
+ fallback_confidence_value: '-0.1'
+ }),
+ /fallback_confidence_value must be a number in \[0, 1\]/
+ );
+});
+
+test('loadConfig normalizes missing_confidence_policy casing and whitespace', () => {
+ const config = loadConfigWithMockedInputs({
+ github_token: 'ghs_xxx',
+ openai_api_key: 'sk-test',
+ missing_confidence_policy: ' Fallback '
+ });
+
+ assert.equal(config.missingConfidencePolicy, 'fallback');
+});
+
+test('loadConfig accepts fallback_confidence_value boundaries 0 and 1', () => {
+ const low = loadConfigWithMockedInputs({
+ github_token: 'ghs_xxx',
+ openai_api_key: 'sk-test',
+ fallback_confidence_value: '0'
+ });
+ assert.equal(low.fallbackConfidenceValue, 0);
+
+ const high = loadConfigWithMockedInputs({
+ github_token: 'ghs_xxx',
+ openai_api_key: 'sk-test',
+ fallback_confidence_value: '1'
+ });
+ assert.equal(high.fallbackConfidenceValue, 1);
+});
+
+test('loadConfig uses default fallback value when policy is fallback and value is omitted', () => {
+ const config = loadConfigWithMockedInputs({
+ github_token: 'ghs_xxx',
+ openai_api_key: 'sk-test',
+ missing_confidence_policy: 'fallback'
+ });
+
+ assert.equal(config.missingConfidencePolicy, 'fallback');
+ assert.equal(config.fallbackConfidenceValue, 0.5);
+});
+
test('loadConfig normalizes and deduplicates review_dimensions while preserving order', () => {
const config = loadConfigWithMockedInputs({
github_token: 'ghs_xxx',
diff --git a/test/index.test.js b/test/index.test.js
index 0e265d3..6f3accf 100644
--- a/test/index.test.js
+++ b/test/index.test.js
@@ -8,7 +8,9 @@ const {
chunk,
sanitizePlannedBatches,
shouldUseSummaryOnlyMode,
+ formatConfidenceValue,
buildInlineBody,
+ buildReviewBody,
formatSummaryMarkdown
} = __internal;
@@ -60,6 +62,7 @@ test('buildInlineBody includes severity, labels, inline key marker, and sub-agen
summary: 'The value can be null before property access.',
suggestion: 'Add a null check before dereference.',
risk: 'Can throw at runtime.',
+ confidence: 0.93,
path: 'src/a.js',
side: 'RIGHT',
line: 10,
@@ -69,8 +72,67 @@ test('buildInlineBody includes severity, labels, inline key marker, and sub-agen
assert.match(body, /\*\*\[MEDIUM\] Missing null guard\*\*/);
assert.match(body, /Suggestion: Add a null check before dereference\./);
assert.match(body, /Risk: Can throw at runtime\./);
+ assert.match(body, /Confidence: 0.93/);
assert.match(body, /ai-code-review-agent:inline-key/);
assert.match(body, /\[From SubAgent: security\]/);
+ assert.ok(body.indexOf('Confidence: 0.93') < body.indexOf('[From SubAgent: security]'));
+ assert.ok(body.indexOf('[From SubAgent: security]') < body.indexOf('ai-code-review-agent:inline-key'));
+ assert.ok(body.trim().endsWith('-->'));
+ assert.equal((body.match(/ai-code-review-agent:inline-key/g) || []).length, 1);
+});
+
+test('formatConfidenceValue handles invalid and boundary values predictably', () => {
+ assert.equal(formatConfidenceValue(undefined), 'N/A');
+ assert.equal(formatConfidenceValue(null), 'N/A');
+ assert.equal(formatConfidenceValue(''), 'N/A');
+ assert.equal(formatConfidenceValue('abc'), 'N/A');
+ assert.equal(formatConfidenceValue(-0.1), '0.00');
+ assert.equal(formatConfidenceValue(1.2), '1.00');
+ assert.equal(formatConfidenceValue('0.345'), '0.34');
+ assert.equal(formatConfidenceValue(0.3449), '0.34');
+ assert.equal(formatConfidenceValue(0.345), '0.34');
+ assert.equal(formatConfidenceValue(0.3451), '0.35');
+ assert.equal(formatConfidenceValue(0.755), '0.76');
+ assert.equal(formatConfidenceValue(0), '0.00');
+ assert.equal(formatConfidenceValue(1), '1.00');
+ assert.equal(formatConfidenceValue(undefined, 'UNKNOWN'), 'UNKNOWN');
+});
+
+test('buildInlineBody renders chinese confidence label before sub-agent tag', () => {
+ const text = getTextBundle('zh-CN');
+ const body = buildInlineBody({
+ severity: 'low',
+ title: '缺少日志上下文',
+ summary: '建议补充必要上下文便于排查。',
+ confidence: 0.88,
+ path: 'src/a.js',
+ side: 'RIGHT',
+ line: 6,
+ sourceDimension: 'testing'
+ }, text);
+
+ assert.match(body, /\*\*\[LOW\] 缺少日志上下文\*\*/);
+ assert.match(body, /置信度: 0.88/);
+ assert.match(body, /\[来自 SubAgent:testing\]/);
+ assert.ok(body.indexOf('置信度: 0.88') < body.indexOf('[来自 SubAgent:testing]'));
+});
+
+test('buildInlineBody renders N/A for missing confidence', () => {
+ const text = getTextBundle('English');
+ const body = buildInlineBody({
+ severity: 'low',
+ title: 'Unknown confidence finding',
+ summary: 'Confidence value is unavailable.',
+ path: 'src/a.js',
+ side: 'RIGHT',
+ line: 3,
+ sourceDimension: 'general'
+ }, text);
+
+ assert.match(body, /Confidence: N\/A/);
+ assert.match(body, /\[From SubAgent: general\]/);
+ assert.ok(body.indexOf('Confidence: N/A') < body.indexOf('[From SubAgent: general]'));
+ assert.ok(body.trim().endsWith('-->'));
});
test('formatSummaryMarkdown supports unknown severities and degraded reasons', () => {
@@ -116,6 +178,94 @@ test('formatSummaryMarkdown supports unknown severities and degraded reasons', (
assert.match(markdown, /## AI Code Review Summary/);
assert.match(markdown, /- MEDIUM \(1\)/);
assert.match(markdown, /Unknown severity should be grouped/);
+ assert.match(markdown, /Findings with unknown confidence \(N\/A\): 0/);
assert.match(markdown, /Structured-output summary-only degradation: YES/);
assert.match(markdown, /planner_structured_output_failed_round_1: unknown_error/);
});
+
+test('formatSummaryMarkdown renders unknown confidence count in zh bundle', () => {
+ const markdown = formatSummaryMarkdown({
+ pull: { number: 8, title: '修复边界' },
+ reviewLanguage: 'zh-CN',
+ findings: [],
+ fileConclusions: [],
+ actionableSuggestions: [],
+ potentialRisks: [],
+ testSuggestions: [],
+ downgradedInline: [],
+ uncovered: [],
+ noPatchCovered: [],
+ coverage: {
+ target: 2,
+ covered: 2,
+ uncovered: 0,
+ noPatch: 0,
+ unknownConfidenceFindings: 3
+ },
+ runtime: {
+ roundsUsed: 1,
+ maxRounds: 3,
+ plannedBatches: 1,
+ executedBatches: 1,
+ subAgentRuns: 1,
+ plannerCalls: 1,
+ reviewerCalls: 1,
+ modelCalls: 2,
+ maxModelCalls: 10
+ },
+ degradedSummaryOnly: false,
+ degradedReasons: []
+ });
+
+ assert.match(markdown, /## AI 代码审查汇总/);
+ assert.match(markdown, /置信度未知(N\/A)的问题数: 3/);
+});
+
+test('unknown confidence count stays consistent between summary and review body', () => {
+ const text = getTextBundle('English');
+ const coverage = {
+ target: 5,
+ covered: 5,
+ uncovered: 0,
+ noPatch: 0,
+ unknownConfidenceFindings: 2
+ };
+
+ const summary = formatSummaryMarkdown({
+ pull: { number: 9, title: 'Consistency check' },
+ reviewLanguage: 'English',
+ findings: [],
+ fileConclusions: [],
+ actionableSuggestions: [],
+ potentialRisks: [],
+ testSuggestions: [],
+ downgradedInline: [],
+ uncovered: [],
+ noPatchCovered: [],
+ coverage,
+ runtime: {
+ roundsUsed: 1,
+ maxRounds: 3,
+ plannedBatches: 1,
+ executedBatches: 1,
+ subAgentRuns: 1,
+ plannerCalls: 1,
+ reviewerCalls: 1,
+ modelCalls: 2,
+ maxModelCalls: 10
+ },
+ degradedSummaryOnly: false,
+ degradedReasons: []
+ });
+
+ const reviewBody = buildReviewBody({
+ text,
+ findingsKept: 4,
+ unknownConfidenceFindings: 2,
+ inlineCommentsAttempted: 3,
+ coverage
+ });
+
+ assert.match(summary, /Findings with unknown confidence \(N\/A\): 2/);
+ assert.match(reviewBody, /Findings with unknown confidence: 2/);
+});