diff --git a/app/domains/commit/router.py b/app/domains/commit/router.py index 768cba2..b8ec596 100644 --- a/app/domains/commit/router.py +++ b/app/domains/commit/router.py @@ -265,6 +265,8 @@ async def get_commit_analyze_run( "`repository_ids`에는 팀에 등록된 레포 ID 목록을 전달하며, " "해당 목록 밖의 커밋은 후보에서 제외합니다. " "`top_k`는 레포별 개수가 아니라 적용사항별 최대 추천 개수입니다.\n\n" + "신뢰도 70점 미만 후보는 추천에서 제외합니다. " + "따라서 아직 구현되지 않은 적용사항은 빈 추천 목록으로 반환될 수 있습니다.\n\n" "점수 정책(100점):\n" "- 의미 유사성 50\n" "- 기술 키워드 일치도 30\n" @@ -319,6 +321,7 @@ async def get_commit_analyze_run( "semantic": 44, "keyword": 30, "context": 20, + "type_bonus": 0, "penalty": 0, "total": 94, }, @@ -334,7 +337,10 @@ async def get_commit_analyze_run( ], } ], - "notice": "신뢰도는 AI 분석 기반 추정값입니다.", + "notice": ( + "신뢰도는 AI 분석 기반 추정값이며, " + "70점 미만 후보는 추천에서 제외됩니다." + ), }, } } diff --git a/app/domains/commit/schemas.py b/app/domains/commit/schemas.py index b2a9ac6..2c774fb 100644 --- a/app/domains/commit/schemas.py +++ b/app/domains/commit/schemas.py @@ -117,7 +117,7 @@ class ApplicationCommitMatchRequest(BaseModel): default=5, ge=1, le=30, - description="적용사항별 추천 커밋 상위 K", + description="적용사항별 추천 커밋 최대 K개", ) @@ -155,7 +155,7 @@ class ApplicationCommitMatchItem(BaseModel): application_title: str = Field(description="적용사항 제목") recommended_commits: list[MatchedCommit] = Field( default_factory=list, - description="신뢰도 내림차순 추천 커밋 목록", + description="신뢰도 70점 이상인 추천 커밋 목록(내림차순)", ) @@ -165,11 +165,15 @@ class ApplicationCommitMatchResponse(BaseModel): description="매칭 후보로 사용한 레포지토리 ID 목록" ) total_applications: int = Field(description="조회된 적용사항 문서 수") - matched_applications: int = Field(description="추천 결과가 존재하는 적용사항 수") + matched_applications: int = Field( + description="신뢰도 70점 이상 추천 결과가 존재하는 적용사항 수" + ) applications: list[ApplicationCommitMatchItem] = Field( default_factory=list, description="적용사항 단위 매칭 결과" ) notice: str = Field( - default="신뢰도는 AI 분석 기반 추정값입니다.", + default=( + "신뢰도는 AI 분석 기반 추정값이며, 70점 미만 후보는 추천에서 제외됩니다." + ), description="신뢰도 안내 문구", ) diff --git a/app/domains/commit/services/matching.py b/app/domains/commit/services/matching.py index 9a05996..ebde112 100644 --- a/app/domains/commit/services/matching.py +++ b/app/domains/commit/services/matching.py @@ -25,6 +25,9 @@ ) logger = logging.getLogger(__name__) +MIN_RECOMMENDATION_CONFIDENCE = 70 +MIN_COMMIT_CANDIDATE_POOL_SIZE = 50 +MAX_COMMIT_CANDIDATE_POOL_SIZE = 100 @dataclass(frozen=True) @@ -313,7 +316,7 @@ def _build_match_record( ) ) - if score.total < 50: + if score.total < MIN_RECOMMENDATION_CONFIDENCE: return None matched_commit = MatchedCommit( @@ -370,7 +373,10 @@ async def match_applications_with_commits( applications=[], ) - pool_size = min(100, payload.top_k * 5) + pool_size = min( + MAX_COMMIT_CANDIDATE_POOL_SIZE, + max(MIN_COMMIT_CANDIDATE_POOL_SIZE, payload.top_k * 5), + ) matched_by_application: dict[int, dict[str, MatchRecord]] = { idx: {} for idx in range(len(application_entries)) } diff --git a/app/domains/meeting_analysis/services/matching_scoring.py b/app/domains/meeting_analysis/services/matching_scoring.py index 17a42ab..b7d917a 100644 --- a/app/domains/meeting_analysis/services/matching_scoring.py +++ b/app/domains/meeting_analysis/services/matching_scoring.py @@ -517,8 +517,6 @@ def build_connection_reason( keyword_overlap: set[str] | None = None, module_overlap: set[str] | None = None, ) -> str: - if score.total < 50: - return "신뢰도 임계치 미달로 자동 연결하지 않았습니다." if score.is_opposite_direction: return "의미 방향이 반대여서 자동 연결을 제한했습니다." if score.is_goal_mismatch: diff --git a/tests/test_application_commit_matching.py b/tests/test_application_commit_matching.py index 2032131..fee1de8 100644 --- a/tests/test_application_commit_matching.py +++ b/tests/test_application_commit_matching.py @@ -1,3 +1,4 @@ +from types import SimpleNamespace from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -14,6 +15,10 @@ CommitAnalyzeRequest, ) from app.domains.commit.services.matching import ( + MIN_COMMIT_CANDIDATE_POOL_SIZE, + MIN_RECOMMENDATION_CONFIDENCE, + ApplicationEntry, + _build_match_record, _to_application_entries, match_applications_with_commits, ) @@ -32,6 +37,45 @@ def _build_match_payload() -> dict: } +def _build_application_entry() -> ApplicationEntry: + return ApplicationEntry( + document_id="meeting-123_application0", + text="title: Redis 알림 안정화 | text: redis cache 안정화", + embedding=[0.1, 0.2], + application_id=101, + application_title="redis cache 안정화", + direction_labels={"modify"}, + keywords={"redis"}, + modules={"cache"}, + ) + + +def _build_commit_metadata() -> dict: + return { + "commit_ref": "commit-1", + "commit_hash": "hash-1", + "repository_id": 1, + "direction_primary": "modify", + "direction_multi_csv": "modify", + "tech_keywords_csv": "redis", + "module_tags_csv": "cache", + "commit_message": "fix: redis cache 안정화", + } + + +def _build_score(total: int) -> SimpleNamespace: + return SimpleNamespace( + semantic=40, + keyword=20, + context=max(0, total - 60), + type_bonus=0, + penalty=0, + total=total, + is_opposite_direction=False, + is_goal_mismatch=False, + ) + + class TestApplicationCommitMatchingService: def test_application_entries_accept_chroma_numpy_embeddings(self): np = pytest.importorskip("numpy") @@ -69,8 +113,41 @@ def test_application_entries_missing_application_id_returns_none(self): assert entries[0].application_id is None + def test_match_record_includes_threshold_boundary(self): + with patch( + "app.domains.commit.services.matching.calculate_match_score", + return_value=_build_score(MIN_RECOMMENDATION_CONFIDENCE), + ): + record = _build_match_record( + application=_build_application_entry(), + application_index=0, + commit_id="commit-1", + commit_document="title: redis cache | text: redis cache 안정화", + metadata=_build_commit_metadata(), + distance=0.1, + ) + + assert record is not None + assert record.commit.confidence == MIN_RECOMMENDATION_CONFIDENCE + + def test_match_record_excludes_below_threshold_boundary(self): + with patch( + "app.domains.commit.services.matching.calculate_match_score", + return_value=_build_score(MIN_RECOMMENDATION_CONFIDENCE - 1), + ): + record = _build_match_record( + application=_build_application_entry(), + application_index=0, + commit_id="commit-1", + commit_document="title: redis cache | text: redis cache 안정화", + metadata=_build_commit_metadata(), + distance=0.1, + ) + + assert record is None + @pytest.mark.asyncio - async def test_returns_recommended_commits_sorted_by_confidence(self): + async def test_returns_confident_recommended_commits_sorted_by_confidence(self): application_collection = MagicMock() application_collection.get.return_value = { "ids": ["meeting-123_application0"], @@ -168,7 +245,7 @@ async def test_returns_recommended_commits_sorted_by_confidence(self): assert result.matched_applications == 1 item = result.applications[0] assert item.application_id == 101 - assert len(item.recommended_commits) == 2 + assert len(item.recommended_commits) == 1 assert item.recommended_commits[0].commit_id == 1 assert item.recommended_commits[0].commit_hash == "h1" assert item.recommended_commits[0].commit_message == ( @@ -180,12 +257,79 @@ async def test_returns_recommended_commits_sorted_by_confidence(self): assert "커밋 타입 가산 +3점" in item.recommended_commits[0].score_detail assert "겹친 키워드" in item.recommended_commits[0].score_detail assert item.recommended_commits[0].score_breakdown.type_bonus == 3 - assert item.recommended_commits[1].commit_hash == "h2" - assert item.recommended_commits[0].confidence >= ( - item.recommended_commits[1].confidence - ) commit_collection.query.assert_called_once() assert commit_collection.query.call_args.kwargs["where"] == {"repository_id": 1} + assert ( + commit_collection.query.call_args.kwargs["n_results"] + == MIN_COMMIT_CANDIDATE_POOL_SIZE + ) + + @pytest.mark.asyncio + async def test_excludes_candidates_below_recommendation_threshold(self): + application_collection = MagicMock() + application_collection.get.return_value = { + "ids": ["meeting-123_application0"], + "documents": [ + ( + "title: Redis 알림 안정화 | text: 적용사항: " + "redis kafka notification 안정화" + ) + ], + "metadatas": [ + { + "application_id": 101, + "application_title": "redis kafka notification 안정화", + } + ], + "embeddings": [[0.11, 0.22, 0.33]], + } + + commit_collection = MagicMock() + commit_collection.query.return_value = { + "ids": [["commit_partial"]], + "documents": [ + [ + ( + "title: repository-1 update | text: 변경요약: redis 설정 수정 " + "| 기술키워드: redis | 변경방향: modify " + "| 파일맥락: cache" + ) + ] + ], + "metadatas": [ + [ + { + "commit_ref": "c-partial", + "commit_hash": "h-partial", + "repository_id": 1, + "direction_primary": "modify", + "direction_multi_csv": "modify", + "tech_keywords_csv": "redis", + "module_tags_csv": "cache", + "commit_message": "update redis config", + } + ] + ], + "distances": [[0.50]], + } + + with ( + patch( + "app.domains.commit.services.matching.get_application_collection", + return_value=application_collection, + ), + patch( + "app.domains.commit.services.matching.get_commit_collection", + return_value=commit_collection, + ), + ): + result = await match_applications_with_commits( + ApplicationCommitMatchRequest(**_build_match_payload()) + ) + + assert result.total_applications == 1 + assert result.matched_applications == 0 + assert result.applications[0].recommended_commits == [] @pytest.mark.asyncio async def test_multiple_repository_ids_query_with_in_filter(self):