Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions bench/agents/code_graph_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,85 @@ def note_edit(self, repo: str, path: str) -> dict[str, Any]:
except httpx.HTTPError as exc:
return {"ok": False, "error": str(exc), "path": path}

# ------------------------------------------------------------------
# v2 agent verbs — parity with the MCP transport.
# ------------------------------------------------------------------
# Each hits a /api/v2/* endpoint that wraps the same async function
# the FastMCP server exposes. Output shape is identical between
# transports, so cg / cg-mcp benchmarks measure transport overhead
# rather than API-surface differences.

def search_code(self, project: str, prefix: str, branch: str | None = None,
limit: int = 10) -> list[dict[str, Any]]:
body: dict[str, Any] = {"project": project, "prefix": prefix, "limit": limit}
if branch:
body["branch"] = branch
r = self._client.post("/api/v2/search_code", json=body)
r.raise_for_status()
return r.json()

def get_callers(self, project: str, symbol_id: int, branch: str | None = None,
limit: int = 50) -> list[dict[str, Any]]:
body: dict[str, Any] = {"project": project, "symbol_id": symbol_id, "limit": limit}
if branch:
body["branch"] = branch
r = self._client.post("/api/v2/get_callers", json=body)
r.raise_for_status()
return r.json()

def get_callees(self, project: str, symbol_id: int, branch: str | None = None,
limit: int = 50) -> list[dict[str, Any]]:
body: dict[str, Any] = {"project": project, "symbol_id": symbol_id, "limit": limit}
if branch:
body["branch"] = branch
r = self._client.post("/api/v2/get_callees", json=body)
r.raise_for_status()
return r.json()

def get_dependencies(self, project: str, symbol_id: int, branch: str | None = None,
limit: int = 50) -> list[dict[str, Any]]:
body: dict[str, Any] = {"project": project, "symbol_id": symbol_id, "limit": limit}
if branch:
body["branch"] = branch
r = self._client.post("/api/v2/get_dependencies", json=body)
r.raise_for_status()
return r.json()

def impact_analysis(self, project: str, symbol_id: int,
branch: str | None = None,
direction: str = "IN",
depth: int = 3) -> list[dict[str, Any]]:
body: dict[str, Any] = {
"project": project, "symbol_id": symbol_id,
"direction": direction, "depth": depth,
}
if branch:
body["branch"] = branch
r = self._client.post("/api/v2/impact_analysis", json=body)
r.raise_for_status()
return r.json()

def find_path_v2(self, project: str, source_id: int, dest_id: int,
branch: str | None = None,
max_paths: int = 10) -> list[dict[str, Any]]:
body: dict[str, Any] = {
"project": project, "source_id": source_id, "dest_id": dest_id,
"max_paths": max_paths,
}
if branch:
body["branch"] = branch
r = self._client.post("/api/v2/find_path", json=body)
r.raise_for_status()
return r.json()

def ask_v2(self, project: str, question: str, branch: str | None = None) -> Any:
body: dict[str, Any] = {"project": project, "question": question}
if branch:
body["branch"] = branch
r = self._client.post("/api/v2/ask", json=body)
r.raise_for_status()
return r.json()


# Convenience function aliases — the SWE-agent tool registry expects
# top-level callables. Each spins up a short-lived client; for hot loops
Expand Down
225 changes: 173 additions & 52 deletions bench/cli/cg.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
"""`cg` — bash-callable CLI exposing code-graph primitives.

mini-swe-agent only uses bash, so each "tool" we want the agent to have is
just a CLI it can invoke. This script wraps bench/agents/code_graph_adapter
behind a small argparse interface and prints JSON results to stdout, one
JSON document per call.

Usage examples (run inside the agent's bash environment):

cg graph-entities --repo django
cg get-neighbors --repo django --ids 12 14 17
cg find-paths --repo django --src 12 --dst 88
cg auto-complete --repo django --prefix get_user
cg find-symbol --repo django --name get_user_model
cg note-edit --repo django --path src/django/contrib/auth/models.py
"""`cg` — bash-callable CLI exposing code-graph's 8 agent primitives over HTTP.

This is the HTTP-transport sibling of `cg-mcp`. Both CLIs expose the **same
verb surface** (search_code, get_callers, get_callees, get_dependencies,
impact_analysis, find_path, ask, index_repo) over **the same underlying
async tool functions** (api.mcp.tools.structural and api.mcp.tools.ask),
so a benchmark comparison between them measures transport overhead, not
API differences.

The agent calls these via bash:

cg index_repo --path-or-url . [--branch B] [--ignore PAT ...]
cg search_code --project P --prefix STR [--branch B] [--limit N]
cg get_callers --project P --symbol-id ID [--branch B] [--limit N]
cg get_callees --project P --symbol-id ID [--branch B] [--limit N]
cg get_dependencies --project P --symbol-id ID [--branch B] [--limit N]
cg impact_analysis --project P --symbol-id ID [--direction IN|OUT] [--depth N] [--limit N]
cg find_path --project P --source-id ID --dest-id ID [--branch B]
cg ask --project P --question "..." [--branch B]

Legacy verbs (graph-entities, get-neighbors, find-paths, auto-complete,
find-symbol, note-edit) remain for the React UI's backing tests but are
not exposed to the agent preamble.

Required env vars (set by the runner):
CODEGRAPH_URL base URL of the code-graph service
Expand All @@ -29,12 +37,50 @@
from bench.agents.code_graph_adapter import CodeGraphClient


# ---------- Output compaction --------------------------------------------------
# Every byte returned here is re-fed to the LLM on every subsequent turn (the
# context window grows monotonically until the trajectory ends). A neighbors
# call that returns 20 KB of raw JSON costs ~5K tokens, and at 50+ turns that
# compounds badly. The full FastAPI shape is needed by the React frontend, not
# by an agent — strip the noise here so the LLM sees only what it can act on.
# ---------------------------------------------------------------------------
# Output compaction — must match bench/cli/cg_mcp.py exactly for parity.
# ---------------------------------------------------------------------------
#
# Iter2 finding: every node returned by the v2 endpoints has an absolute
# worktree path under `file` (~130 chars). Stripping the project-name
# prefix saves ~100 chars × N entries, which compounds badly when the
# agent re-feeds tool output across 30-50 turns.

def _strip_worktree_prefix(path: Any, project: str | None) -> Any:
if not isinstance(path, str) or not project:
return path
needle = f"/{project}/"
idx = path.find(needle)
if idx < 0:
return path
return path[idx + len(needle):]


def _compact_entry(entry: Any, project: str | None) -> Any:
if not isinstance(entry, dict):
return entry
out: dict[str, Any] = {}
for k, v in entry.items():
if v in (None, "", [], {}):
continue
if k == "file":
v = _strip_worktree_prefix(v, project)
out[k] = v
return out


def _compact_list(items: Any, project: str | None, limit: int | None) -> Any:
if not isinstance(items, list):
return items
compacted = [_compact_entry(x, project) for x in items]
if limit is not None and limit > 0:
compacted = compacted[:limit]
return compacted


# ---------------------------------------------------------------------------
# Legacy UI-verb compaction (kept so existing tests keep passing).
# ---------------------------------------------------------------------------

_NODE_KEEP = ("id", "label", "labels", "name", "file", "src", "line", "start_line", "end_line")
_EDGE_KEEP = ("id", "src_node", "dest_node", "relation")
Expand Down Expand Up @@ -65,7 +111,6 @@ def _compact_edge(e: Any) -> Any:


def _compact_neighbors(payload: dict[str, Any], limit: int | None) -> dict[str, Any]:
"""Strip empty properties + alias and apply optional limit."""
if not isinstance(payload, dict):
return payload
n = payload.get("neighbors") or payload
Expand All @@ -81,13 +126,6 @@ def _compact_neighbors(payload: dict[str, Any], limit: int | None) -> dict[str,


def _compact_symbols(payload: Any) -> Any:
"""Trim find-symbol / auto-complete records to the fields the agent needs.

The HTTP responses vary in shape:
- find_symbol: ``[node, ...]``
- auto_complete: ``{"branch": ..., "completions": [node, ...]}``
Compact both consistently.
"""
if isinstance(payload, list):
return [_compact_node(x) for x in payload]
if isinstance(payload, dict):
Expand All @@ -100,49 +138,132 @@ def _compact_symbols(payload: Any) -> Any:


def _print(obj: object) -> None:
# Compact separators shave ~30 % off vs the default indented form, which the
# LLM doesn't need (it ignores whitespace).
json.dump(obj, sys.stdout, separators=(",", ":"), sort_keys=True, default=str)
sys.stdout.write("\n")


# ---------------------------------------------------------------------------
# index_repo over HTTP. Hits /api/analyze_folder; for parity we keep the
# same kwargs as cg-mcp.
# ---------------------------------------------------------------------------

def _index_repo(c: CodeGraphClient, path_or_url: str,
branch: str | None, ignore: list[str] | None) -> dict[str, Any]:
"""Mirror cg-mcp index_repo but go through HTTP /api/analyze_folder."""
body: dict[str, Any] = {"path": path_or_url, "ignore": ignore or []}
if branch:
body["branch"] = branch
r = c._client.post("/api/analyze_folder", json=body)
r.raise_for_status()
return r.json()


def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(prog="cg", description=__doc__)
sub = parser.add_subparsers(dest="cmd", required=True)

def _add_project(p: argparse.ArgumentParser) -> None:
p.add_argument("--project", required=True)
p.add_argument("--branch", default=None)

def _add_symbol(p: argparse.ArgumentParser) -> None:
p.add_argument("--symbol-id", type=int, required=True, dest="symbol_id")
p.add_argument("--limit", type=int, default=50)

# ---- v2 (MCP-parity) verbs ----
ir = sub.add_parser("index_repo")
ir.add_argument("--path-or-url", required=True, dest="path_or_url")
ir.add_argument("--branch", default=None)
ir.add_argument("--ignore", nargs="*", default=None)

sc = sub.add_parser("search_code")
_add_project(sc)
sc.add_argument("--prefix", required=True)
sc.add_argument("--limit", type=int, default=10)

for name in ("get_callers", "get_callees", "get_dependencies"):
p = sub.add_parser(name)
_add_project(p)
_add_symbol(p)

ia = sub.add_parser("impact_analysis")
_add_project(ia)
ia.add_argument("--symbol-id", type=int, required=True, dest="symbol_id")
ia.add_argument("--direction", choices=["IN", "OUT"], default="IN")
ia.add_argument("--depth", type=int, default=3)
ia.add_argument("--limit", type=int, default=50)

fp2 = sub.add_parser("find_path")
_add_project(fp2)
fp2.add_argument("--source-id", type=int, required=True, dest="source_id")
fp2.add_argument("--dest-id", type=int, required=True, dest="dest_id")

aq = sub.add_parser("ask")
_add_project(aq)
aq.add_argument("--question", required=True)

# ---- legacy UI verbs (kept for existing tests) ----
def add_repo(p: argparse.ArgumentParser) -> None:
p.add_argument("--repo", required=True, help="repository name in the graph")

ge = sub.add_parser("graph-entities")
add_repo(ge)
p.add_argument("--repo", required=True)

gn = sub.add_parser("get-neighbors")
add_repo(gn)
ge = sub.add_parser("graph-entities"); add_repo(ge)
gn = sub.add_parser("get-neighbors"); add_repo(gn)
gn.add_argument("--ids", type=int, nargs="+", required=True)
gn.add_argument("--limit", type=int, default=50,
help="cap nodes/edges in response (default 50, 0 = unlimited)")

fp = sub.add_parser("find-paths")
add_repo(fp)
gn.add_argument("--limit", type=int, default=50)
fp = sub.add_parser("find-paths"); add_repo(fp)
fp.add_argument("--src", type=int, required=True)
fp.add_argument("--dst", type=int, required=True)

ac = sub.add_parser("auto-complete")
add_repo(ac)
ac = sub.add_parser("auto-complete"); add_repo(ac)
ac.add_argument("--prefix", required=True)

fs = sub.add_parser("find-symbol")
add_repo(fs)
fs = sub.add_parser("find-symbol"); add_repo(fs)
fs.add_argument("--name", required=True)

ne = sub.add_parser("note-edit")
add_repo(ne)
ne = sub.add_parser("note-edit"); add_repo(ne)
ne.add_argument("--path", required=True)

args = parser.parse_args(argv)

with CodeGraphClient() as c:
if args.cmd == "graph-entities":
proj = getattr(args, "project", None)
# ---- v2 verbs ----
if args.cmd == "index_repo":
_print(_index_repo(c, args.path_or_url, args.branch, args.ignore))
elif args.cmd == "search_code":
_print(_compact_list(
c.search_code(args.project, args.prefix, branch=args.branch, limit=args.limit),
proj, args.limit,
))
elif args.cmd == "get_callers":
_print(_compact_list(
c.get_callers(args.project, args.symbol_id, branch=args.branch, limit=args.limit),
proj, args.limit,
))
elif args.cmd == "get_callees":
_print(_compact_list(
c.get_callees(args.project, args.symbol_id, branch=args.branch, limit=args.limit),
proj, args.limit,
))
elif args.cmd == "get_dependencies":
_print(_compact_list(
c.get_dependencies(args.project, args.symbol_id, branch=args.branch, limit=args.limit),
proj, args.limit,
))
elif args.cmd == "impact_analysis":
_print(_compact_list(
c.impact_analysis(
args.project, args.symbol_id, branch=args.branch,
direction=args.direction, depth=args.depth,
),
proj, args.limit,
))
elif args.cmd == "find_path":
_print(_compact_entry(
c.find_path_v2(args.project, args.source_id, args.dest_id, branch=args.branch),
proj,
))
elif args.cmd == "ask":
_print(c.ask_v2(args.project, args.question, branch=args.branch))
# ---- legacy verbs ----
elif args.cmd == "graph-entities":
_print(c.graph_entities(args.repo))
elif args.cmd == "get-neighbors":
limit = args.limit if args.limit > 0 else None
Expand Down
Loading