diff --git a/codebase_rag/constants.py b/codebase_rag/constants.py index 14ee184c7..ffa7f632c 100644 --- a/codebase_rag/constants.py +++ b/codebase_rag/constants.py @@ -89,6 +89,7 @@ class FileAction(StrEnum): EXT_IXX = ".ixx" EXT_CPPM = ".cppm" EXT_CCM = ".ccm" +EXT_C = ".c" EXT_CS = ".cs" EXT_PHP = ".php" EXT_LUA = ".lua" @@ -101,6 +102,7 @@ class FileAction(StrEnum): GO_EXTENSIONS = (EXT_GO,) SCALA_EXTENSIONS = (EXT_SCALA, EXT_SC) JAVA_EXTENSIONS = (EXT_JAVA,) +C_EXTENSIONS = (EXT_C,) CPP_EXTENSIONS = ( EXT_CPP, EXT_H, @@ -444,6 +446,7 @@ class SupportedLanguage(StrEnum): GO = "go" SCALA = "scala" JAVA = "java" + C = "c" CPP = "cpp" CSHARP = "c-sharp" PHP = "php" @@ -477,6 +480,11 @@ class LanguageMetadata(NamedTuple): "Interfaces, type aliases, enums, namespaces, ES6/CommonJS modules", "TypeScript", ), + SupportedLanguage.C: LanguageMetadata( + LanguageStatus.DEV, + "Functions, structs, unions, enums, preprocessor includes", + "C", + ), SupportedLanguage.CPP: LanguageMetadata( LanguageStatus.FULL, "Constructors, destructors, operator overloading, templates, lambdas, C++20 modules, namespaces", @@ -742,6 +750,7 @@ class TreeSitterModule(StrEnum): GO = "tree_sitter_go" SCALA = "tree_sitter_scala" JAVA = "tree_sitter_java" + C = "tree_sitter_c" CPP = "tree_sitter_cpp" LUA = "tree_sitter_lua" @@ -2650,6 +2659,13 @@ class MCPParamName(StrEnum): TS_ENUM_SPECIFIER, ) +# (H) Derived node types for _c_get_name +C_NAME_NODE_TYPES = ( + TS_STRUCT_SPECIFIER, + TS_UNION_SPECIFIER, + TS_ENUM_SPECIFIER, +) + # (H) LANGUAGE_SPECS node type tuples for Rust SPEC_RS_FUNCTION_TYPES = ( TS_RS_FUNCTION_ITEM, @@ -2746,6 +2762,26 @@ class MCPParamName(StrEnum): PKG_CONANFILE, ) +# (H) FQN node type tuples for C +FQN_C_SCOPE_TYPES = ( + TS_CPP_TRANSLATION_UNIT, + TS_STRUCT_SPECIFIER, + TS_UNION_SPECIFIER, + TS_ENUM_SPECIFIER, +) +FQN_C_FUNCTION_TYPES = (TS_CPP_FUNCTION_DEFINITION,) + +# (H) LANGUAGE_SPECS node type tuples for C +SPEC_C_FUNCTION_TYPES = (TS_CPP_FUNCTION_DEFINITION,) +SPEC_C_CLASS_TYPES = ( + TS_STRUCT_SPECIFIER, + TS_UNION_SPECIFIER, + TS_ENUM_SPECIFIER, +) +SPEC_C_MODULE_TYPES = (TS_CPP_TRANSLATION_UNIT,) +SPEC_C_CALL_TYPES = (TS_CPP_CALL_EXPRESSION,) +SPEC_C_PACKAGE_INDICATORS = (PKG_CMAKE_LISTS, PKG_MAKEFILE) + # (H) LANGUAGE_SPECS node type tuples for C# SPEC_CS_FUNCTION_TYPES = ( TS_CS_DESTRUCTOR_DECLARATION, diff --git a/codebase_rag/language_spec.py b/codebase_rag/language_spec.py index cf550ab08..0681b94d1 100644 --- a/codebase_rag/language_spec.py +++ b/codebase_rag/language_spec.py @@ -97,6 +97,27 @@ def _rust_file_to_module(file_path: Path, repo_root: Path) -> list[str]: return [] +def _c_unwrap_declarator(declarator: Node | None) -> Node | None: + while declarator and declarator.type == cs.CppNodeType.POINTER_DECLARATOR: + declarator = declarator.child_by_field_name(cs.FIELD_DECLARATOR) + return declarator + + +def _c_get_name(node: Node) -> str | None: + if node.type in cs.C_NAME_NODE_TYPES: + name_node = node.child_by_field_name(cs.FIELD_NAME) + if name_node and name_node.text: + return name_node.text.decode(cs.ENCODING_UTF8) + elif node.type == cs.TS_CPP_FUNCTION_DEFINITION: + declarator = node.child_by_field_name(cs.FIELD_DECLARATOR) + declarator = _c_unwrap_declarator(declarator) + if declarator and declarator.type == cs.TS_CPP_FUNCTION_DECLARATOR: + name_node = declarator.child_by_field_name(cs.FIELD_DECLARATOR) + if name_node and name_node.type == cs.TS_IDENTIFIER and name_node.text: + return name_node.text.decode(cs.ENCODING_UTF8) + return _generic_get_name(node) + + def _cpp_get_name(node: Node) -> str | None: if node.type in cs.CPP_NAME_NODE_TYPES: name_node = node.child_by_field_name(cs.FIELD_NAME) @@ -154,6 +175,13 @@ def _cpp_get_name(node: Node) -> str | None: file_to_module_parts=_generic_file_to_module, ) +C_FQN_SPEC = FQNSpec( + scope_node_types=frozenset(cs.FQN_C_SCOPE_TYPES), + function_node_types=frozenset(cs.FQN_C_FUNCTION_TYPES), + get_name=_c_get_name, + file_to_module_parts=_generic_file_to_module, +) + LUA_FQN_SPEC = FQNSpec( scope_node_types=frozenset(cs.FQN_LUA_SCOPE_TYPES), function_node_types=frozenset(cs.FQN_LUA_FUNCTION_TYPES), @@ -195,6 +223,7 @@ def _cpp_get_name(node: Node) -> str | None: cs.SupportedLanguage.TS: TS_FQN_SPEC, cs.SupportedLanguage.RUST: RUST_FQN_SPEC, cs.SupportedLanguage.JAVA: JAVA_FQN_SPEC, + cs.SupportedLanguage.C: C_FQN_SPEC, cs.SupportedLanguage.CPP: CPP_FQN_SPEC, cs.SupportedLanguage.LUA: LUA_FQN_SPEC, cs.SupportedLanguage.GO: GO_FQN_SPEC, @@ -343,6 +372,28 @@ def _cpp_get_name(node: Node) -> str | None: type: (type_identifier) @name) @call """, ), + cs.SupportedLanguage.C: LanguageSpec( + language=cs.SupportedLanguage.C, + file_extensions=cs.C_EXTENSIONS, + function_node_types=cs.SPEC_C_FUNCTION_TYPES, + class_node_types=cs.SPEC_C_CLASS_TYPES, + module_node_types=cs.SPEC_C_MODULE_TYPES, + call_node_types=cs.SPEC_C_CALL_TYPES, + import_node_types=cs.IMPORT_NODES_INCLUDE, + import_from_node_types=cs.IMPORT_NODES_INCLUDE, + package_indicators=cs.SPEC_C_PACKAGE_INDICATORS, + function_query=""" + (function_definition) @function + """, + class_query=""" + (struct_specifier) @class + (union_specifier) @class + (enum_specifier) @class + """, + call_query=""" + (call_expression) @call + """, + ), cs.SupportedLanguage.CPP: LanguageSpec( language=cs.SupportedLanguage.CPP, file_extensions=cs.CPP_EXTENSIONS, diff --git a/codebase_rag/parser_loader.py b/codebase_rag/parser_loader.py index e820d3b3f..1b17693f0 100644 --- a/codebase_rag/parser_loader.py +++ b/codebase_rag/parser_loader.py @@ -136,6 +136,12 @@ def _import_language_loaders() -> dict[cs.SupportedLanguage, LanguageLoader]: cs.QUERY_LANGUAGE, cs.SupportedLanguage.JAVA, ), + LanguageImport( + cs.SupportedLanguage.C, + cs.TreeSitterModule.C, + cs.QUERY_LANGUAGE, + cs.SupportedLanguage.C, + ), LanguageImport( cs.SupportedLanguage.CPP, cs.TreeSitterModule.CPP, diff --git a/codebase_rag/tests/test_handler_registry.py b/codebase_rag/tests/test_handler_registry.py index 2a9215755..ed6596280 100644 --- a/codebase_rag/tests/test_handler_registry.py +++ b/codebase_rag/tests/test_handler_registry.py @@ -52,6 +52,11 @@ def test_returns_base_handler_for_php(self) -> None: assert isinstance(handler, BaseLanguageHandler) assert type(handler) is BaseLanguageHandler + def test_returns_base_handler_for_c(self) -> None: + handler = get_handler(SupportedLanguage.C) + assert isinstance(handler, BaseLanguageHandler) + assert type(handler) is BaseLanguageHandler + class TestHandlerCaching: def test_same_instance_returned_for_same_language(self) -> None: @@ -84,6 +89,7 @@ class TestHandlerProtocol: SupportedLanguage.PYTHON, SupportedLanguage.GO, SupportedLanguage.PHP, + SupportedLanguage.C, ], ) def test_handler_has_all_protocol_methods( @@ -114,6 +120,7 @@ def test_handler_has_all_protocol_methods( SupportedLanguage.JAVA, SupportedLanguage.LUA, SupportedLanguage.PYTHON, + SupportedLanguage.C, ], ) def test_handler_methods_are_callable(self, language: SupportedLanguage) -> None: diff --git a/codebase_rag/tests/test_language_node_coverage.py b/codebase_rag/tests/test_language_node_coverage.py index 74648125f..4d902abda 100644 --- a/codebase_rag/tests/test_language_node_coverage.py +++ b/codebase_rag/tests/test_language_node_coverage.py @@ -3,6 +3,7 @@ import pytest from codebase_rag.constants import ( + C_EXTENSIONS, CPP_EXTENSIONS, CS_EXTENSIONS, GO_EXTENSIONS, @@ -60,6 +61,7 @@ def test_each_language_has_file_extensions(self, lang: SupportedLanguage) -> Non (SupportedLanguage.GO, GO_EXTENSIONS), (SupportedLanguage.SCALA, SCALA_EXTENSIONS), (SupportedLanguage.JAVA, JAVA_EXTENSIONS), + (SupportedLanguage.C, C_EXTENSIONS), (SupportedLanguage.CPP, CPP_EXTENSIONS), (SupportedLanguage.CSHARP, CS_EXTENSIONS), (SupportedLanguage.PHP, PHP_EXTENSIONS), @@ -87,6 +89,7 @@ def test_language_spec_has_correct_extensions( (".go", SupportedLanguage.GO), (".scala", SupportedLanguage.SCALA), (".java", SupportedLanguage.JAVA), + (".c", SupportedLanguage.C), (".cpp", SupportedLanguage.CPP), (".h", SupportedLanguage.CPP), (".hpp", SupportedLanguage.CPP), diff --git a/codebase_rag/tools/semantic_search.py b/codebase_rag/tools/semantic_search.py index e7aa9c5b2..8897a4e5c 100644 --- a/codebase_rag/tools/semantic_search.py +++ b/codebase_rag/tools/semantic_search.py @@ -139,7 +139,7 @@ async def semantic_search_functions(query: str, top_k: int = 5) -> str: return response - return Tool(semantic_search_functions, name=td.AgenticToolName.SEMANTIC_SEARCH) + return Tool(semantic_search_functions, name=td.AgenticToolName.SEMANTIC_SEARCH, description=td.SEMANTIC_SEARCH) def create_get_function_source_tool() -> Tool: @@ -153,4 +153,4 @@ async def get_function_source_by_id(node_id: int) -> str: return cs.MSG_SEMANTIC_SOURCE_FORMAT.format(id=node_id, code=source_code) - return Tool(get_function_source_by_id, name=td.AgenticToolName.GET_FUNCTION_SOURCE) + return Tool(get_function_source_by_id, name=td.AgenticToolName.GET_FUNCTION_SOURCE, description=td.GET_FUNCTION_SOURCE) diff --git a/pyproject.toml b/pyproject.toml index 958996664..341bd21d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,6 +81,7 @@ treesitter-full = [ "tree-sitter-go>=0.23.4", "tree-sitter-scala>=0.24.0", "tree-sitter-java>=0.23.5", + "tree-sitter-c>=0.24.1", "tree-sitter-cpp>=0.23.0", "tree-sitter-lua>=0.0.19", ] diff --git a/uv.lock b/uv.lock index a1572fb5b..42794550f 100644 --- a/uv.lock +++ b/uv.lock @@ -494,7 +494,7 @@ wheels = [ [[package]] name = "code-graph-rag" -version = "0.0.115" +version = "0.0.116" source = { editable = "." } dependencies = [ { name = "click" }, @@ -512,6 +512,7 @@ dependencies = [ { name = "rich" }, { name = "toml" }, { name = "tree-sitter" }, + { name = "tree-sitter-c" }, { name = "tree-sitter-python" }, { name = "typer" }, { name = "watchdog" }, @@ -531,6 +532,7 @@ test = [ { name = "testcontainers" }, ] treesitter-full = [ + { name = "tree-sitter-c" }, { name = "tree-sitter-cpp" }, { name = "tree-sitter-go" }, { name = "tree-sitter-java" }, @@ -591,6 +593,8 @@ requires-dist = [ { name = "torch", marker = "extra == 'semantic'", specifier = ">=2.6.0" }, { name = "transformers", marker = "extra == 'semantic'", specifier = ">=4.0.0" }, { name = "tree-sitter", specifier = "==0.25.2" }, + { name = "tree-sitter-c", specifier = ">=0.24.1" }, + { name = "tree-sitter-c", marker = "extra == 'treesitter-full'", specifier = ">=0.21.0" }, { name = "tree-sitter-cpp", marker = "extra == 'treesitter-full'", specifier = ">=0.23.0" }, { name = "tree-sitter-go", marker = "extra == 'treesitter-full'", specifier = ">=0.23.4" }, { name = "tree-sitter-java", marker = "extra == 'treesitter-full'", specifier = ">=0.23.5" }, @@ -4263,6 +4267,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a6/6e/e64621037357acb83d912276ffd30a859ef117f9c680f2e3cb955f47c680/tree_sitter-0.25.2-cp314-cp314-win_arm64.whl", hash = "sha256:b8d4429954a3beb3e844e2872610d2a4800ba4eb42bb1990c6a4b1949b18459f", size = 117470, upload-time = "2025-09-25T17:37:58.431Z" }, ] +[[package]] +name = "tree-sitter-c" +version = "0.24.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/f5/ba8cd08d717277551ade8537d3aa2a94b907c6c6e0fbcf4e4d8b1c747fa3/tree_sitter_c-0.24.1.tar.gz", hash = "sha256:7d2d0cda0b8dda428c81440c1e94367f9f13548eedca3f49768bde66b1422ad6", size = 228014, upload-time = "2025-05-24T17:32:58.384Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/15/c7/c817be36306e457c2d36cc324789046390d9d8c555c38772429ffdb7d361/tree_sitter_c-0.24.1-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:9c06ac26a1efdcc8b26a8a6970fbc6997c4071857359e5837d4c42892d45fe1e", size = 80940, upload-time = "2025-05-24T17:32:49.967Z" }, + { url = "https://files.pythonhosted.org/packages/7a/42/283909467290b24fdbc29bb32ee20e409a19a55002b43175d66d091ca1a4/tree_sitter_c-0.24.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:942bcd7cbecd810dcf7ca6f8f834391ebf0771a89479646d891ba4ca2fdfdc88", size = 86304, upload-time = "2025-05-24T17:32:51.271Z" }, + { url = "https://files.pythonhosted.org/packages/94/53/fb4f61d4e5f15ec3da85774a4df8e58d3b5b73036cf167f0203b4dd9d158/tree_sitter_c-0.24.1-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a74cfd7a11ca5a961fafd4d751892ee65acae667d2818968a6f079397d8d28c", size = 109996, upload-time = "2025-05-24T17:32:52.119Z" }, + { url = "https://files.pythonhosted.org/packages/5e/e8/fc541d34ee81c386c5453c2596c1763e8e9cd7cb0725f39d7dfa2276afa4/tree_sitter_c-0.24.1-cp310-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6a807705a3978911dc7ee26a7ad36dcfacb6adfc13c190d496660ec9bd66707", size = 98137, upload-time = "2025-05-24T17:32:53.361Z" }, + { url = "https://files.pythonhosted.org/packages/32/c6/d0563319cae0d5b5780a92e2806074b24afea2a07aa4c10599b899bda3ec/tree_sitter_c-0.24.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:789781afcb710df34144f7e2a20cd80e325114b9119e3956c6bd1dd2d365df98", size = 94148, upload-time = "2025-05-24T17:32:54.855Z" }, + { url = "https://files.pythonhosted.org/packages/50/5a/6361df7f3fa2310c53a0d26b4702a261c332da16fa9d801e381e3a86e25f/tree_sitter_c-0.24.1-cp310-abi3-win_amd64.whl", hash = "sha256:290bff0f9c79c966496ebae45042f77543e6e4aea725f40587a8611d566231a8", size = 84703, upload-time = "2025-05-24T17:32:56.084Z" }, + { url = "https://files.pythonhosted.org/packages/22/6a/210a302e8025ac492cbaea58d3720d66b7d8034c5d747ac5e4d2d235aa25/tree_sitter_c-0.24.1-cp310-abi3-win_arm64.whl", hash = "sha256:d46bbda06f838c2dcb91daf767813671fd366b49ad84ff37db702129267b46e1", size = 82715, upload-time = "2025-05-24T17:32:57.248Z" }, +] + [[package]] name = "tree-sitter-cpp" version = "0.23.4"