Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions src/agentready/assessors/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Base assessor interface for attribute evaluation."""

from abc import ABC, abstractmethod
from pathlib import Path

from ..models.finding import Finding
from ..models.repository import Repository
Expand Down Expand Up @@ -67,6 +68,80 @@ def is_applicable(self, repository: Repository) -> bool:
"""
return True

# Root-level manifest files that strongly signal the project's primary language.
# When file counts are close, these break the tie.
_LANG_ROOT_MANIFESTS: dict[str, list[str]] = {
"Go": ["go.mod"],
"Python": ["pyproject.toml", "setup.py", "setup.cfg"],
"JavaScript": ["package.json"],
"TypeScript": ["tsconfig.json"],
}

def _primary_language(
self,
repository: Repository,
candidates: set[str],
) -> str | None:
"""Return the primary programming language among candidates.

Uses file count as the base signal, but when counts are within 30%
of each other, a root-level project manifest (go.mod, pyproject.toml,
package.json) acts as tiebreaker β€” the language whose manifest sits
at the repo root is treated as primary.

This handles repos like Go operators with a Python SDK subdirectory,
where Python may have slightly more files but Go owns the root.
"""
lang_counts = {
lang: repository.languages.get(lang, 0)
for lang in candidates
if repository.languages.get(lang, 0) > 0
}
if not lang_counts:
return None

top_lang = max(lang_counts, key=lambda k: (lang_counts[k], k))
top_count = lang_counts[top_lang]

if top_count == 0:
return None

# Check if any other candidate is close enough to contest
close_langs = {
lang for lang, count in lang_counts.items() if count >= top_count * 0.7
}
if len(close_langs) > 1:
manifest_winners = [
lang
for lang in sorted(close_langs)
if any(
(repository.path / m).exists()
for m in self._LANG_ROOT_MANIFESTS.get(lang, [])
)
]
if len(manifest_winners) == 1:
return manifest_winners[0]

return top_lang
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Comment on lines +103 to +125
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚑ Quick win

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
sed -n '95,125p' src/agentready/assessors/base.py
python - <<'PY'
lang_counts = {"Go": 10, "Python": 10, "TypeScript": 10}
current = max(lang_counts, key=lambda k: (lang_counts[k], k))
expected = min(lang_counts, key=lambda k: (-lang_counts[k], k))
print({"current": current, "expected": expected})
PY

Repository: ambient-code/agentready

Length of output: 1110


🏁 Script executed:

# Check if there are tests for the _primary_language method
find . -name "*.py" -type f | xargs grep -l "_primary_language\|lang_counts" | head -20

Repository: ambient-code/agentready

Length of output: 295


🏁 Script executed:

# Look for any documentation or comments about language precedence
rg -A5 -B5 "_primary_language|LANG_ROOT_MANIFESTS" --type py

Repository: ambient-code/agentready

Length of output: 8314


🏁 Script executed:

# Check if there are any existing tests for tie scenarios
find . -name "*test*.py" -o -name "test_*.py" | xargs grep -l "primary_language\|lang_counts" 2>/dev/null

Repository: ambient-code/agentready

Length of output: 209


🏁 Script executed:

# Check the test that uses _primary_language to understand expected behavior
sed -n '1,50p' tests/unit/test_fixers.py | head -100

Repository: ambient-code/agentready

Length of output: 1454


🏁 Script executed:

# Search for any test of _primary_language itself
rg -A20 "test_generate_fix_uses_primary_language" tests/

Repository: ambient-code/agentready

Length of output: 1367


🏁 Script executed:

# Check if there are any unit tests specifically for _primary_language
find . -name "*.py" -path "*/tests/*" -exec grep -l "_primary_language" {} \;

Repository: ambient-code/agentready

Length of output: 95


🏁 Script executed:

# Get the full test_generate_fix_uses_primary_language test
rg -A50 "def test_generate_fix_uses_primary_language" tests/unit/test_fixers.py

Repository: ambient-code/agentready

Length of output: 1846


🏁 Script executed:

# Check if there are any unit tests for BaseAssessor._primary_language itself
rg -B5 -A30 "_primary_language" tests/ --type py

Repository: ambient-code/agentready

Length of output: 2198


🏁 Script executed:

# Check the base.py implementation more completely to understand the fallback behavior
sed -n '90,145p' src/agentready/assessors/base.py

Repository: ambient-code/agentready

Length of output: 2058


🏁 Script executed:

# Test what happens with an exact tie scenario - verify the exact behavior
python3 <<'PY'
# Simulate the scenario with exact tie and no clear manifest winner
lang_counts = {"Go": 10, "Python": 10, "TypeScript": 10}
close_langs = {lang for lang, count in lang_counts.items() if count >= 10 * 0.7}
print("Close languages (within 70%):", sorted(close_langs))
print("Current max() selection:", max(lang_counts, key=lambda k: (lang_counts[k], k)))
print("Expected first alphabetical:", min(lang_counts, key=lambda k: (-lang_counts[k], k)))

# Now show the lexicographic ordering behavior
keys = list(lang_counts.keys())
print("\nLexicographic ordering of language names:")
for lang in sorted(keys, reverse=True):
    print(f"  {lang}")
PY

Repository: ambient-code/agentready

Length of output: 266


Exact-count ties still select the reverse alphabetical language.

On line 103, max(lang_counts, key=lambda k: (lang_counts[k], k)) uses reverse lexicographic ordering for ties. When counts are equal (e.g., Go: 10, Python: 10, TypeScript: 10), this picks TypeScript instead of Go. The 70% manifest check mitigates this in many cases, but fails when zero or multiple manifest files exist, falling back to the wrong language for assessment dispatch.

Fix
-        top_lang = max(lang_counts, key=lambda k: (lang_counts[k], k))
+        top_lang = min(lang_counts, key=lambda k: (-lang_counts[k], k))
πŸ€– Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/agentready/assessors/base.py` around lines 103 - 125, The current
tie-break uses max(lang_counts, key=lambda k: (lang_counts[k], k)) which selects
the reverse-lexicographic language on equal counts; change the logic to first
compute top_count = max(lang_counts.values()) and then build the top candidates
list = [lang for lang, c in lang_counts.items() if c == top_count] and set
top_lang = sorted(top_candidates)[0] (i.e., pick the alphabetically first) so
equal-count ties choose the earliest alphabetical language; keep the subsequent
close_langs, manifest_winners, _LANG_ROOT_MANIFESTS and repository.path checks
intact so the 70% manifest logic still applies.


def _find_go_module_roots(self, repository: Repository) -> list[Path]:
"""Find directories containing go.mod (Go module roots).

Supports both single-module repos (go.mod at root) and monorepos
(go.mod in subdirectories at any depth). Excludes vendor and
testdata directories.
"""
roots: list[Path] = []
if (repository.path / "go.mod").exists():
roots.append(repository.path)
for gomod in repository.path.rglob("go.mod"):
if "vendor" in gomod.parts or "testdata" in gomod.parts:
continue
if gomod.parent == repository.path:
continue
roots.append(gomod.parent)
return sorted(set(roots))

def calculate_proportional_score(
self,
measured_value: float,
Expand Down
Loading
Loading