-
Notifications
You must be signed in to change notification settings - Fork 0
feat(action): ✨ Add confidence to inline comments #6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0a27451
196b7ab
a8a0477
41651f7
741537e
80e199e
5e6ff8e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,7 +25,7 @@ const findingSchema = z.object({ | |
| path: z.string().min(1), | ||
| side: z.enum(['LEFT', 'RIGHT', 'FILE']).default('RIGHT'), | ||
| line: z.number().int().positive().nullable().default(null), | ||
| confidence: z.number().min(0).max(1).default(0.8), | ||
| confidence: z.number().min(0).max(1).nullable().optional().default(null), | ||
This comment was marked as outdated.
Sorry, something went wrong.
This comment was marked as outdated.
Sorry, something went wrong. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [MEDIUM] finding schema 从必填数值改为 nullable optional 后缺少兼容性测试 schema 语义变化较大(默认 0.8 -> null),会影响下游过滤/展示逻辑,缺少对应 contract 测试。 建议: 增加 agents/schema 测试:缺失 confidence、显式 null、合法数值、越界数值的解析结果;并验证与 normalize/filter 流程联动行为符合预期。 风险: 上游模型输出轻微变化可能触发解析/过滤行为差异,造成结果数量和质量波动。 置信度: 0.92 [来自 SubAgent:testing]
|
||
| evidence: z.array(z.string().min(1)).default([]), | ||
| fingerprint: z.string().max(120).default(''), | ||
| summary: z.string().min(1), | ||
|
|
@@ -126,7 +126,8 @@ Rules: | |
| - Never emit line numbers that do not appear in the provided anchors. | ||
| - Do not invent files or line numbers. | ||
| - Severity must be one of critical/high/medium/low. | ||
| - Set confidence in [0,1]. Include at least one concrete evidence item tied to provided diff context. | ||
| - Set confidence in [0,1] when you can estimate it; otherwise use null. | ||
| - Include at least one concrete evidence item tied to provided diff context. | ||
| - If confidence is below 0.70, do not emit it as a finding; put it in file-level notes instead. | ||
| - Use fingerprint as stable short key for same issue across dimensions (e.g. unsafe_openai_base_url, planner_done_ignored). | ||
| - Keep findings concrete, actionable, and concise. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -70,6 +70,10 @@ function jaccardSimilarity(a, b) { | |
| return union.size === 0 ? 0 : intersection / union.size; | ||
| } | ||
|
|
||
| function confidenceRank(value) { | ||
| return Number.isFinite(value) ? value : -1; | ||
| } | ||
|
|
||
| function isSemanticallySameIssue(a, b) { | ||
| if ( | ||
| a.fingerprint && | ||
|
|
@@ -102,11 +106,14 @@ function isSemanticallySameIssue(a, b) { | |
| } | ||
|
|
||
| function mergeFinding(base, incoming) { | ||
| const preferIncoming = incoming.confidence > base.confidence; | ||
| const baseRank = confidenceRank(base.confidence); | ||
This comment was marked as outdated.
Sorry, something went wrong. |
||
| const incomingRank = confidenceRank(incoming.confidence); | ||
| const preferIncoming = incomingRank > baseRank; | ||
| const mergedEvidence = [...new Set([...(base.evidence || []), ...(incoming.evidence || [])])].slice(0, 3); | ||
| const severity = SEVERITY_RANK[incoming.severity] > SEVERITY_RANK[base.severity] | ||
| ? incoming.severity | ||
| : base.severity; | ||
| const mergedConfidence = incomingRank >= baseRank ? incoming.confidence : base.confidence; | ||
This comment was marked as outdated.
Sorry, something went wrong. |
||
| return { | ||
| ...base, | ||
| ...(preferIncoming | ||
|
|
@@ -118,7 +125,7 @@ function mergeFinding(base, incoming) { | |
| } | ||
| : {}), | ||
| severity, | ||
| confidence: Math.max(base.confidence, incoming.confidence), | ||
| confidence: mergedConfidence, | ||
| evidence: mergedEvidence, | ||
| fingerprint: base.fingerprint || incoming.fingerprint, | ||
| sourceDimension: preferIncoming | ||
|
|
@@ -130,6 +137,13 @@ function mergeFinding(base, incoming) { | |
| function normalizeFindings(findings, allowedPaths, options = {}) { | ||
| const pathSet = new Set(allowedPaths); | ||
| const minConfidence = Number.isFinite(options.minConfidence) ? options.minConfidence : 0; | ||
| const missingConfidencePolicy = ['drop', 'na', 'fallback'].includes(options.missingConfidencePolicy) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [MEDIUM] 默认 当 建议: 将默认策略改为 风险: 可能造成安全告警结果被噪声淹没、误导人工审计优先级,形成间接安全运营风险。 置信度: 0.90 [来自 SubAgent:security]
|
||
| ? options.missingConfidencePolicy | ||
| : 'na'; | ||
| const fallbackConfidenceValueRaw = Number.parseFloat(String(options.fallbackConfidenceValue ?? '0.5')); | ||
| const fallbackConfidenceValue = Number.isFinite(fallbackConfidenceValueRaw) | ||
| ? clamp(fallbackConfidenceValueRaw, 0, 1) | ||
| : 0.5; | ||
| const out = []; | ||
|
|
||
| for (const finding of findings || []) { | ||
|
|
@@ -143,11 +157,23 @@ function normalizeFindings(findings, allowedPaths, options = {}) { | |
| const line = Number.isInteger(finding.line) && finding.line > 0 ? finding.line : null; | ||
| const title = String(finding.title || '').trim(); | ||
| const summary = String(finding.summary || '').trim(); | ||
| const confidenceRaw = Number.parseFloat(String(finding.confidence ?? '0.8')); | ||
| const confidence = Number.isFinite(confidenceRaw) ? clamp(confidenceRaw, 0, 1) : 0.8; | ||
| const confidenceRaw = Number.parseFloat(String(finding.confidence)); | ||
This comment was marked as outdated.
Sorry, something went wrong. |
||
| let confidence = Number.isFinite(confidenceRaw) ? clamp(confidenceRaw, 0, 1) : null; | ||
| const evidence = normalizeEvidence(finding.evidence); | ||
|
|
||
| if (!title || !summary || evidence.length === 0 || confidence < minConfidence) { | ||
| if (confidence === null) { | ||
| if (missingConfidencePolicy === 'drop') { | ||
| continue; | ||
| } | ||
| if (missingConfidencePolicy === 'fallback') { | ||
| confidence = fallbackConfidenceValue; | ||
| } | ||
| } | ||
|
|
||
| if (!title || !summary || evidence.length === 0) { | ||
| continue; | ||
| } | ||
| if (Number.isFinite(confidence) && confidence < minConfidence) { | ||
| continue; | ||
| } | ||
|
|
||
|
|
@@ -216,7 +242,7 @@ function dedupeAndSortFindings(findings, maxFindings) { | |
| return pathDiff; | ||
| } | ||
|
|
||
| const confidenceDiff = (b.confidence || 0) - (a.confidence || 0); | ||
| const confidenceDiff = confidenceRank(b.confidence) - confidenceRank(a.confidence); | ||
This comment was marked as outdated.
Sorry, something went wrong. |
||
| if (confidenceDiff !== 0) { | ||
| return confidenceDiff; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -75,6 +75,15 @@ function parseFloatRangeInput(name, defaultValue, min, max) { | |
| return parsed; | ||
| } | ||
|
|
||
| function parseEnumInput(name, defaultValue, allowedValues) { | ||
This comment was marked as outdated.
Sorry, something went wrong.
This comment was marked as outdated.
Sorry, something went wrong. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [MEDIUM] 配置新增枚举输入解析缺少非法值与大小写归一化测试 新增 parseEnumInput 并接入关键配置,但未见对默认值、大小写、空白、非法值抛错的测试。 建议: 补充 风险: 配置解析异常会导致 action 在运行时直接失败,影响 CI 稳定性。 置信度: 0.96 [来自 SubAgent:testing]
|
||
| const raw = core.getInput(name) || String(defaultValue); | ||
| const normalized = String(raw).trim().toLowerCase(); | ||
| if (!allowedValues.includes(normalized)) { | ||
| throw new Error(`Input ${name} must be one of [${allowedValues.join(', ')}], got: ${raw}`); | ||
| } | ||
| return normalized; | ||
| } | ||
|
|
||
| function uniqueLowercase(items) { | ||
| const seen = new Set(); | ||
| const out = []; | ||
|
|
@@ -123,6 +132,8 @@ function loadConfig() { | |
| reviewDimensions: normalizedDimensions, | ||
| reviewLanguage, | ||
| minFindingConfidence: parseFloatRangeInput('min_finding_confidence', 0.72, 0, 1), | ||
| missingConfidencePolicy: parseEnumInput('missing_confidence_policy', 'na', ['drop', 'na', 'fallback']), | ||
This comment was marked as outdated.
Sorry, something went wrong.
This comment was marked as outdated.
Sorry, something went wrong. |
||
| fallbackConfidenceValue: parseFloatRangeInput('fallback_confidence_value', 0.5, 0, 1), | ||
| coverageFirstRoundPrimaryOnly: parseBooleanInput('coverage_first_round_primary_only', true), | ||
| autoMinimizeOutdatedComments: parseBooleanInput('auto_minimize_outdated_comments', true), | ||
| maxRounds: parsePositiveIntInput('max_rounds', 8), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,6 +30,8 @@ function getTextBundle(language) { | |
| return { | ||
| suggestionLabel: 'Suggestion', | ||
| riskLabel: 'Risk', | ||
| confidenceLabel: 'Confidence', | ||
| unknownConfidenceValue: 'N/A', | ||
| summaryTitle: 'AI Code Review Summary', | ||
| preferredLanguage: 'Preferred language', | ||
| overallAssessment: 'Overall Assessment', | ||
|
|
@@ -40,6 +42,7 @@ function getTextBundle(language) { | |
| fileLevelCoverage: 'File-Level Coverage Notes', | ||
| inlineDowngraded: 'Inline Downgraded Items (processed but not inline)', | ||
| coverageStatus: 'Coverage Status', | ||
| unknownConfidenceFindings: 'Findings with unknown confidence (N/A)', | ||
| uncoveredList: 'Uncovered list', | ||
| noPatchCoveredList: 'No-patch covered list', | ||
| runtimeBudget: 'Runtime/Budget', | ||
|
|
@@ -80,6 +83,8 @@ function getTextBundle(language) { | |
| return { | ||
| suggestionLabel: '建议', | ||
| riskLabel: '风险', | ||
| confidenceLabel: '置信度', | ||
This comment was marked as outdated.
Sorry, something went wrong. |
||
| unknownConfidenceValue: 'N/A', | ||
| summaryTitle: 'AI 代码审查汇总', | ||
| preferredLanguage: '指定语言', | ||
| overallAssessment: '总体评价', | ||
|
|
@@ -90,6 +95,7 @@ function getTextBundle(language) { | |
| fileLevelCoverage: '文件级覆盖说明', | ||
| inlineDowngraded: '无法 inline 的已处理项', | ||
| coverageStatus: '覆盖状态', | ||
| unknownConfidenceFindings: '置信度未知(N/A)的问题数', | ||
| uncoveredList: '未覆盖文件清单', | ||
| noPatchCoveredList: '无 patch 文件覆盖清单', | ||
| runtimeBudget: '轮次与预算', | ||
|
|
@@ -187,6 +193,16 @@ function summarizePlannerBatchesForLog(batches, maxEntries = 12) { | |
| }).join(' | '); | ||
| } | ||
|
|
||
| function formatConfidenceValue(confidence, unknownValue = 'N/A') { | ||
This comment was marked as outdated.
Sorry, something went wrong.
This comment was marked as outdated.
Sorry, something went wrong. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [MEDIUM] 新增置信度格式化与展示路径缺少针对边界值的单元测试 新增格式化函数包含 NaN/非数值、越界截断与保留两位小数逻辑,但未看到对应测试覆盖,容易在显示层出现回归。 建议: 补充 风险: 置信度文案错误会误导审查结果解读,且该字段已进入 inline 评论,影响面较大。 置信度: 0.95 [来自 SubAgent:testing]
|
||
| const value = Number.parseFloat(String(confidence)); | ||
This comment was marked as outdated.
Sorry, something went wrong. |
||
| if (!Number.isFinite(value)) { | ||
| return unknownValue; | ||
| } | ||
|
|
||
| const clamped = Math.min(1, Math.max(0, value)); | ||
| return clamped.toFixed(2); | ||
| } | ||
|
|
||
| function buildInlineBody(finding, text) { | ||
| const lines = []; | ||
| const subAgent = String(finding.sourceDimension || 'general').trim().toLowerCase() || 'general'; | ||
|
|
@@ -202,12 +218,32 @@ function buildInlineBody(finding, text) { | |
| lines.push(`${text.riskLabel}: ${finding.risk}`); | ||
| } | ||
|
|
||
| lines.push(`<!-- ai-code-review-agent:inline-key ${inlineKey} -->`); | ||
| lines.push(`${text.confidenceLabel}: ${formatConfidenceValue(finding.confidence, text.unknownConfidenceValue)}`); | ||
This comment was marked as outdated.
Sorry, something went wrong. |
||
| lines.push(`<div align="right">${text.fromSubAgentTag(subAgent)}</div>`); | ||
| lines.push(`<!-- ai-code-review-agent:inline-key ${inlineKey} -->`); | ||
|
|
||
| return lines.join('\n\n'); | ||
| } | ||
|
|
||
| function buildReviewBody({ | ||
| text, | ||
| findingsKept, | ||
| unknownConfidenceFindings, | ||
| inlineCommentsAttempted, | ||
| coverage | ||
| }) { | ||
| return [ | ||
| text.reviewCompleted, | ||
| `- Findings kept: ${findingsKept}`, | ||
| `- Findings with unknown confidence: ${unknownConfidenceFindings}`, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [MEDIUM] Review body 新增字段未国际化,中文模式会出现英文文案
建议: 为 review body 的该行新增并使用语言包键(例如 风险: 用户界面语言不一致,降低可读性并影响国际化可维护性;后续新增语言时更易遗漏。 置信度: 0.95 [来自 SubAgent:general]
|
||
| `- Inline comments attempted: ${inlineCommentsAttempted}`, | ||
| `- Target files: ${coverage.target}`, | ||
| `- Covered files: ${coverage.covered}`, | ||
| `- Uncovered files: ${coverage.uncovered}`, | ||
| text.reviewSeeSummary | ||
| ].join('\n'); | ||
| } | ||
|
|
||
| function summarizeSeverity(groups, text, limitEach = 8) { | ||
| const order = ['critical', 'high', 'medium', 'low']; | ||
| const lines = []; | ||
|
|
@@ -300,6 +336,9 @@ function formatSummaryMarkdown({ | |
| const degradedText = degradedSummaryOnly | ||
| ? `${text.yes}\n\n${text.reasons}:\n${degradedReasons.map((x) => `- ${x}`).join('\n') || '- unknown'}` | ||
| : text.no; | ||
| const unknownConfidenceFindings = Number.isFinite(coverage.unknownConfidenceFindings) | ||
This comment was marked as outdated.
Sorry, something went wrong. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [MEDIUM] Review/Summary 新增 unknownConfidenceFindings 统计缺少一致性测试 unknownConfidenceFindings 被同时用于 summary 与 review body,且有默认值兜底分支;缺少测试验证统计口径和展示一致性。 建议: 增加集成/单测:1) findings 含 null/undefined/NaN/数字时计数正确;2) 风险: 统计不一致会降低结果可信度,用户可能误判模型输出质量。 置信度: 0.90 [来自 SubAgent:testing]
|
||
| ? coverage.unknownConfidenceFindings | ||
| : 0; | ||
|
|
||
| return [ | ||
| `## ${text.summaryTitle}`, | ||
|
|
@@ -333,6 +372,7 @@ function formatSummaryMarkdown({ | |
| `- Covered files: ${coverage.covered}`, | ||
| `- Uncovered files: ${coverage.uncovered}`, | ||
| `- No-patch/binary covered as file-level: ${coverage.noPatch}`, | ||
| `- ${text.unknownConfidenceFindings}: ${unknownConfidenceFindings}`, | ||
| '', | ||
| `${text.uncoveredList}:`, | ||
| uncoveredLines, | ||
|
|
@@ -774,10 +814,13 @@ async function runAction() { | |
|
|
||
| const normalizedFindings = dedupeAndSortFindings( | ||
| normalizeFindings(rawFindings, targetPaths, { | ||
| minConfidence: config.minFindingConfidence | ||
| minConfidence: config.minFindingConfidence, | ||
| missingConfidencePolicy: config.missingConfidencePolicy, | ||
| fallbackConfidenceValue: config.fallbackConfidenceValue | ||
| }), | ||
| config.maxFindings | ||
| ); | ||
| const unknownConfidenceFindings = normalizedFindings.filter((finding) => !Number.isFinite(finding.confidence)).length; | ||
This comment was marked as outdated.
Sorry, something went wrong. |
||
|
|
||
| const diffLineMap = buildDiffLineMaps(patchFiles); | ||
| const inlineComments = []; | ||
|
|
@@ -895,7 +938,8 @@ async function runAction() { | |
| target: filteredFiles.length, | ||
| covered: filteredFiles.length - uncovered.length, | ||
| uncovered: uncovered.length, | ||
| noPatch: noPatchCovered.length | ||
| noPatch: noPatchCovered.length, | ||
| unknownConfidenceFindings | ||
| }; | ||
|
|
||
| if (filteredFiles.length === 0) { | ||
|
|
@@ -944,15 +988,13 @@ async function runAction() { | |
| ); | ||
|
|
||
| if (!degradedSummaryOnly) { | ||
| const reviewBody = [ | ||
| text.reviewCompleted, | ||
| `- Findings kept: ${normalizedFindings.length}`, | ||
| `- Inline comments attempted: ${inlineComments.length}`, | ||
| `- Target files: ${coverage.target}`, | ||
| `- Covered files: ${coverage.covered}`, | ||
| `- Uncovered files: ${coverage.uncovered}`, | ||
| text.reviewSeeSummary | ||
| ].join('\n'); | ||
| const reviewBody = buildReviewBody({ | ||
| text, | ||
| findingsKept: normalizedFindings.length, | ||
| unknownConfidenceFindings, | ||
| inlineCommentsAttempted: inlineComments.length, | ||
| coverage | ||
| }); | ||
|
|
||
| const reviewResult = await createReview(octokit, { | ||
| owner, | ||
|
|
@@ -1026,7 +1068,9 @@ module.exports = { | |
| shouldUseSummaryOnlyMode, | ||
| sanitizePlannedBatches, | ||
| summarizePlannerBatchesForLog, | ||
| formatConfidenceValue, | ||
| buildInlineBody, | ||
| buildReviewBody, | ||
| summarizeSeverity, | ||
| summarizeFileConclusions, | ||
| formatSummaryMarkdown | ||
|
|
||
This comment was marked as outdated.
Sorry, something went wrong.
Uh oh!
There was an error while loading. Please reload this page.