-
Notifications
You must be signed in to change notification settings - Fork 101
Expand file tree
/
Copy pathchunking.py
More file actions
29 lines (20 loc) · 1.12 KB
/
chunking.py
File metadata and controls
29 lines (20 loc) · 1.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
"""Public API for writing custom chunkers.
Example usage::
from pathlib import Path
from cocoindex_code.chunking import Chunk, ChunkerFn, TextPosition
def my_chunker(path: Path, content: str) -> tuple[str | None, list[Chunk]]:
pos = TextPosition(byte_offset=0, char_offset=0, line=1, column=0)
return "mylang", [Chunk(text=content, start=pos, end=pos)]
"""
from __future__ import annotations
import pathlib as _pathlib
from collections.abc import Callable as _Callable
import cocoindex as _coco
from cocoindex.resources.chunk import Chunk, TextPosition
# Callable alias (not Protocol) — consistent with codebase style.
# language_override=None keeps the language detected by detect_code_language.
# path is not resolved (no syscall); call path.resolve() inside the chunker if needed.
ChunkerFn = _Callable[[_pathlib.Path, str], tuple[str | None, list[Chunk]]]
# tracked=False: callables are not fingerprint-able; daemon restart re-indexes anyway.
CHUNKER_REGISTRY = _coco.ContextKey[dict[str, ChunkerFn]]("chunker_registry", tracked=False)
__all__ = ["Chunk", "ChunkerFn", "CHUNKER_REGISTRY", "TextPosition"]