diff --git a/src/context_engine/cli.py b/src/context_engine/cli.py index 8260c14..26bfcca 100644 --- a/src/context_engine/cli.py +++ b/src/context_engine/cli.py @@ -623,6 +623,22 @@ def _preflight_check(config) -> None: one was picked, and surfaces Ollama status for the separate compression path so users know what compression level they will get. """ + # --- SQLite extension support --- + import sqlite3 as _sqlite3 + _test_conn = _sqlite3.connect(":memory:") + if not hasattr(_test_conn, "enable_load_extension"): + _test_conn.close() + raise click.ClickException( + "Your Python was compiled without SQLite extension support " + "(enable_load_extension is missing).\n" + "This is common with python.org installers on macOS.\n\n" + "Fix: reinstall CCE under a Python that has extension support:\n\n" + " brew install python3\n" + " uv tool install --python /opt/homebrew/bin/python3 " + "--force code-context-engine\n" + ) + _test_conn.close() + # --- Embedding backend --- click.echo(_dim(" Detecting embedding backend") + "...", nl=False) from context_engine.config import resolve_ollama_url diff --git a/src/context_engine/indexer/embedder.py b/src/context_engine/indexer/embedder.py index 69d340a..9a69221 100644 --- a/src/context_engine/indexer/embedder.py +++ b/src/context_engine/indexer/embedder.py @@ -319,16 +319,66 @@ def _ensure_model(self) -> None: for _ in resp.iter_lines(): pass + # nomic-embed-text has an 8192-token context. Dense-tokenizing content + # (YAML with ${{ }}, Python separator comments) can hit ~1 char/token, + # so 3000 chars is a safe ceiling that works for all content types. + _MAX_EMBED_CHARS = 3000 + def _embed_batch(self, texts: list[str]) -> list[list[float]]: import httpx - resp = httpx.post( - f"{self.base_url}/api/embed", - json={"model": self.model_name, "input": texts}, - timeout=self._timeout, - ) - resp.raise_for_status() - data = resp.json() - return data.get("embeddings", []) + # Truncate oversized texts and skip empty ones + safe_texts = [] + original_indices = [] + for i, t in enumerate(texts): + if not t or not t.strip(): + continue + safe_texts.append(t[:self._MAX_EMBED_CHARS]) + original_indices.append(i) + + if not safe_texts: + return [[] for _ in texts] + + try: + resp = httpx.post( + f"{self.base_url}/api/embed", + json={"model": self.model_name, "input": safe_texts}, + timeout=self._timeout, + ) + resp.raise_for_status() + embeddings = resp.json().get("embeddings", []) + except httpx.HTTPStatusError as exc: + if exc.response.status_code != 400: + raise + # Batch failed (possibly one text still too large after truncation). + # Fall back to one-at-a-time with halving retry. + log.warning("Ollama batch embed failed, retrying one-at-a-time") + embeddings = [] + for text in safe_texts: + vec = self._embed_single_with_retry(text) + embeddings.append(vec) + + # Map embeddings back to original positions (empty texts get empty vecs) + result: list[list[float]] = [[] for _ in texts] + for idx, emb in zip(original_indices, embeddings): + result[idx] = emb + return result + + def _embed_single_with_retry(self, text: str) -> list[float]: + """Embed a single text, halving on context-length errors.""" + import httpx + while text: + resp = httpx.post( + f"{self.base_url}/api/embed", + json={"model": self.model_name, "input": [text]}, + timeout=self._timeout, + ) + if resp.status_code == 400 and "context length" in resp.text: + text = text[:len(text) // 2] + continue + resp.raise_for_status() + vecs = resp.json().get("embeddings", [[]]) + return vecs[0] if vecs else [] + return [] def embed_texts(self, texts: list[str], batch_size: int = 64) -> list[list[float]]: out: list[list[float]] = [] diff --git a/src/context_engine/memory/db.py b/src/context_engine/memory/db.py index 7425797..ed81a87 100644 --- a/src/context_engine/memory/db.py +++ b/src/context_engine/memory/db.py @@ -281,6 +281,14 @@ def _try_load_vec(conn: sqlite3.Connection) -> bool: sqlite_vec.load(conn) conn.enable_load_extension(False) return True + except AttributeError: + log.warning( + "sqlite-vec load failed; semantic recall disabled. " + "Python was compiled without SQLite extension support. " + "Reinstall CCE with Homebrew Python: " + "uv tool install --python /opt/homebrew/bin/python3 --force code-context-engine" + ) + return False except Exception as exc: log.warning("sqlite-vec load failed; semantic recall disabled: %s", exc) return False diff --git a/src/context_engine/storage/vector_store.py b/src/context_engine/storage/vector_store.py index b646dfe..3385dee 100644 --- a/src/context_engine/storage/vector_store.py +++ b/src/context_engine/storage/vector_store.py @@ -46,9 +46,23 @@ def __init__(self, db_path: str) -> None: def _connect(self) -> sqlite3.Connection: import sqlite_vec conn = sqlite3.connect(self._db_file, check_same_thread=False) - conn.enable_load_extension(True) - sqlite_vec.load(conn) - conn.enable_load_extension(False) + try: + conn.enable_load_extension(True) + sqlite_vec.load(conn) + conn.enable_load_extension(False) + except AttributeError: + raise RuntimeError( + "Your Python was compiled without SQLite extension support " + "(enable_load_extension is missing). This is common with " + "python.org installers on macOS.\n\n" + "Fix: reinstall CCE under a Python that has extension support:\n" + " uv tool install --python $(brew --prefix python3)/bin/python3 " + "--force code-context-engine\n\n" + "Or use Homebrew Python directly:\n" + " brew install python3\n" + " uv tool install --python /opt/homebrew/bin/python3 " + "--force code-context-engine" + ) from None conn.execute("PRAGMA journal_mode=WAL") conn.execute("PRAGMA synchronous=NORMAL") return conn