Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ authors = [
]
dependencies = []

[project.optional-dependencies]
llm = [
"anthropic>=0.40,<1.0",
]

[project.scripts]
mpi = "mailplus_intelligence.cli:main"

Expand Down
20 changes: 20 additions & 0 deletions src/mailplus_intelligence/doctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pathlib import Path

from .fixtures import load_metadata_fixture_corpus
from .llm_extractor import resolve_llm_model
from .runtime import default_runtime_profile
from .schema import apply_schema_v0, current_schema_version
from .sqlite import connect_sqlite
Expand Down Expand Up @@ -107,6 +108,25 @@ def run_fixture_doctor(project_root: str | Path = ".") -> DoctorReport:
)
)

try:
import anthropic # noqa: F401

sdk_available = True
except ImportError:
sdk_available = False

api_key_present = bool(os.environ.get("ANTHROPIC_API_KEY"))
if sdk_available and api_key_present:
llm_status = "ok"
llm_message = f"LLM extraction available; model={resolve_llm_model()}"
elif sdk_available:
llm_status = "gated"
llm_message = f"Anthropic SDK installed; ANTHROPIC_API_KEY missing; model={resolve_llm_model()}"
else:
llm_status = "gated"
llm_message = f"Anthropic SDK not installed; deterministic extraction only; model={resolve_llm_model()}"
checks.append(DoctorCheck("llm", llm_status, llm_message))

return DoctorReport(tuple(checks))


Expand Down
40 changes: 35 additions & 5 deletions src/mailplus_intelligence/llm_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from __future__ import annotations

import json
import os
import uuid
from dataclasses import dataclass, field
from typing import Any
Expand All @@ -30,6 +31,36 @@
)

_EXTRACTION_LANES_LLM = EXTRACTION_LANES
DEFAULT_LLM_MODEL = "claude-opus-4-7"
LLM_EXTRA_INSTALL_HINT = "pip install 'mailplus-intelligence[llm]'"


class LLMNotAvailable(RuntimeError):
"""Raised when LLM extraction is requested but cannot run locally."""


def resolve_llm_model(model: str | None = None) -> str:
"""Resolve the configured LLM model name."""

if model:
return model
return os.environ.get("MAILPLUS_LLM_MODEL") or DEFAULT_LLM_MODEL


def _build_anthropic_client() -> Any:
try:
import anthropic
except ImportError as exc:
raise LLMNotAvailable(
f"Anthropic SDK is not installed; run {LLM_EXTRA_INSTALL_HINT} to enable LLM extraction."
) from exc

if not os.environ.get("ANTHROPIC_API_KEY"):
raise LLMNotAvailable(
"ANTHROPIC_API_KEY is not set; set it or pass a cassette for offline LLM extraction."
)

return anthropic.Anthropic()


@dataclass
Expand Down Expand Up @@ -116,7 +147,7 @@ def extract_with_llm(
messages: list[dict[str, Any]],
*,
client: Any = None,
model: str = "claude-opus-4-7",
model: str | None = None,
cassette: dict[str, str] | None = None,
usage_stats: LLMUsageStats | None = None,
) -> LLMExtractionResult:
Expand Down Expand Up @@ -150,11 +181,10 @@ def extract_with_llm(
return LLMExtractionResult(candidates=candidates, usage=stats, cassette_hit=True)

if client is None:
import anthropic
client = anthropic.Anthropic()
client = _build_anthropic_client()

response = client.messages.create(
model=model,
model=resolve_llm_model(model),
max_tokens=1024,
thinking={"type": "adaptive"},
system=[
Expand Down Expand Up @@ -200,7 +230,7 @@ def extract_corpus_with_llm(
messages: list[dict[str, Any]],
*,
client: Any = None,
model: str = "claude-opus-4-7",
model: str | None = None,
cassette: dict[str, str] | None = None,
) -> LLMExtractionResult:
"""Run LLM extraction over all threads, sharing usage stats."""
Expand Down
2 changes: 2 additions & 0 deletions tests/test_doctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@ def test_fixture_doctor_passes_with_live_credentials_gated(self) -> None:
self.assertEqual(statuses["fixtures"], "ok")
self.assertEqual(statuses["schema"], "ok")
self.assertEqual(statuses["live-mailplus"], "gated")
self.assertIn(statuses["llm"], {"ok", "gated"})
self.assertTrue(report.ok)

def test_fixture_doctor_output_names_gated_live_access(self) -> None:
output = format_doctor_report(run_fixture_doctor())

self.assertIn("live MailPlus credentials intentionally unavailable", output)
self.assertIn("llm", output)
self.assertIn("result: ok", output)


Expand Down
26 changes: 25 additions & 1 deletion tests/test_llm_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@
from mailplus_intelligence.extractor import ExtractionCandidate
from mailplus_intelligence.fixtures import load_metadata_fixture_corpus
from mailplus_intelligence.llm_extractor import (
DEFAULT_LLM_MODEL,
LLMNotAvailable,
LLMUsageStats,
extract_corpus_with_llm,
extract_with_llm,
resolve_llm_model,
)
from mailplus_intelligence.threading import reconstruct_fixture_threads

Expand Down Expand Up @@ -45,8 +48,12 @@ def test_cassette_miss_raises_without_credentials(self) -> None:
thread = next(t for t in self.threads if t.thread_id)
# A non-matching cassette key forces a live API call, which fails without
# credentials. Verify the right error is raised rather than a silent pass.
with self.assertRaises((TypeError, Exception)):
with self.assertRaises(LLMNotAvailable) as raised:
extract_with_llm(thread, self.messages, cassette={"other-thread": "[]"})
self.assertTrue(
"mailplus-intelligence[llm]" in str(raised.exception)
or "ANTHROPIC_API_KEY" in str(raised.exception)
)

def test_corpus_cassette_aggregates_across_threads(self) -> None:
cassette: dict[str, str] = {}
Expand Down Expand Up @@ -100,6 +107,23 @@ def test_noise_threads_skipped(self) -> None:
result = extract_with_llm(thread, messages, cassette={})
self.assertEqual(result.candidates, [])

def test_model_resolution_prefers_argument_then_environment(self) -> None:
self.assertEqual(resolve_llm_model("claude-test"), "claude-test")

import os

old = os.environ.get("MAILPLUS_LLM_MODEL")
try:
os.environ["MAILPLUS_LLM_MODEL"] = "claude-env"
self.assertEqual(resolve_llm_model(), "claude-env")
finally:
if old is None:
os.environ.pop("MAILPLUS_LLM_MODEL", None)
else:
os.environ["MAILPLUS_LLM_MODEL"] = old

self.assertEqual(resolve_llm_model(), old or DEFAULT_LLM_MODEL)


if __name__ == "__main__":
unittest.main()
Loading