From 2a73b51120296cf03c5c50248641aa59b628d708 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Tue, 24 Feb 2026 14:13:31 +0100
Subject: [PATCH 01/24] chore(deps): normalize versions, reorganize workspace
 dependency categories

Ensure all dependency versions specify major.minor, add
tracing-subscriber to workspace dependencies, sort members and
internal crates alphabetically, and fix dependency category groupings.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 Cargo.toml                  | 83 +++++++++++++++++++------------------
 crates/nvisy-cli/Cargo.toml |  2 +-
 2 files changed, 43 insertions(+), 42 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 88b50e4..1a8067a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,18 +3,18 @@
 [workspace]
 resolver = "2"
 members = [
+    "./crates/nvisy-augment",
+    "./crates/nvisy-cli",
+    "./crates/nvisy-codec",
     "./crates/nvisy-core",
     "./crates/nvisy-engine",
-    "./crates/nvisy-codec",
-    "./crates/nvisy-object",
-    "./crates/nvisy-pattern",
     "./crates/nvisy-identify",
-    "./crates/nvisy-augment",
+    "./crates/nvisy-object",
     "./crates/nvisy-ontology",
-    "./crates/nvisy-rig",
+    "./crates/nvisy-pattern",
     "./crates/nvisy-python",
+    "./crates/nvisy-rig",
     "./crates/nvisy-server",
-    "./crates/nvisy-cli",
 ]
 
 [workspace.package]
@@ -36,57 +36,58 @@ documentation = "https://docs.rs/nvisy-runtime"
 # See for more details: https://github.com/rust-lang/cargo/issues/11329
 
 # Internal crates
+nvisy-augment = { path = "./crates/nvisy-augment", version = "0.1.0" }
+nvisy-codec = { path = "./crates/nvisy-codec", version = "0.1.0" }
 nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" }
 nvisy-engine = { path = "./crates/nvisy-engine", version = "0.1.0" }
-nvisy-codec = { path = "./crates/nvisy-codec", version = "0.1.0" }
-nvisy-object = { path = "./crates/nvisy-object", version = "0.1.0" }
-nvisy-pattern = { path = "./crates/nvisy-pattern", version = "0.1.0" }
 nvisy-identify = { path = "./crates/nvisy-identify", version = "0.1.0" }
-nvisy-augment = { path = "./crates/nvisy-augment", version = "0.1.0" }
+nvisy-object = { path = "./crates/nvisy-object", version = "0.1.0" }
 nvisy-ontology = { path = "./crates/nvisy-ontology", version = "0.1.0" }
-nvisy-rig = { path = "./crates/nvisy-rig", version = "0.1.0" }
+nvisy-pattern = { path = "./crates/nvisy-pattern", version = "0.1.0" }
 nvisy-python = { path = "./crates/nvisy-python", version = "0.1.0" }
+nvisy-rig = { path = "./crates/nvisy-rig", version = "0.1.0" }
 nvisy-server = { path = "./crates/nvisy-server", version = "0.1.0" }
 
 # LLM framework
 rig-core = { version = "0.31", features = [] }
 
 # Async runtime
-tokio = { version = "1", features = [] }
+tokio = { version = "1.0", features = [] }
 tokio-util = { version = "0.7", features = [] }
 futures = { version = "0.3", features = [] }
 async-trait = { version = "0.1", features = [] }
 
 # Observability
 tracing = { version = "0.1", features = ["attributes"] }
+tracing-subscriber = { version = "0.3", features = [] }
 
 # (De)serialization
 serde = { version = "1.0", features = ["derive"] }
 serde_json = { version = "1.0", features = [] }
-schemars = { version = "1", features = ["uuid1", "bytes1"] }
+schemars = { version = "1.0", features = ["uuid1", "bytes1"] }
+csv = { version = "1.0", features = [] }
 
 # Derive macros and error handling
 thiserror = { version = "2.0", features = [] }
 anyhow = { version = "1.0", features = [] }
-derive_more = { version = "2", features = ["display", "from", "into"] }
+derive_more = { version = "2.0", features = ["display", "from", "into"] }
 strum = { version = "0.28", features = ["derive"] }
 
-# Encoding
-base64 = { version = "0.22", features = [] }
-
 # Primitive datatypes
-uuid = { version = "1", features = ["serde", "v4", "v7"] }
-bytes = { version = "1", features = ["serde"] }
+uuid = { version = "1.0", features = ["serde", "v4", "v7"] }
+bytes = { version = "1.0", features = ["serde"] }
+hipstr = { version = "0.6", features = [] }
 jiff = { version = "0.2", features = ["serde"] }
+semver = { version = "1.0", features = ["serde"] }
+
+# Encoding and hashing
+base64 = { version = "0.22", features = [] }
 sha2 = { version = "0.10", features = [] }
 hex = { version = "0.4", features = [] }
 
-# Text processing
-hipstr = { version = "0.6", features = [] }
+# Pattern matching
 regex = { version = "1.0", features = [] }
-aho-corasick = { version = "1", features = [] }
-csv = { version = "1", features = [] }
-include_dir = { version = "0.7", features = [] }
+aho-corasick = { version = "1.0", features = [] }
 
 # Graph data structures
 petgraph = { version = "0.8", features = [] }
@@ -94,31 +95,31 @@ petgraph = { version = "0.8", features = [] }
 # File type detection
 infer = { version = "0.19", features = [] }
 
-# Python interop
-pyo3 = { version = "0.24", features = [] }
-pyo3-async-runtimes = { version = "0.24", features = ["tokio-runtime"] }
-pythonize = { version = "0.24", features = [] }
-
-# Cloud object storage (S3, Azure Blob, GCS)
-object_store = { version = "0.13", default-features = false }
-
-# Image processing
-image = { version = "0.25", default-features = false, features = ["png", "jpeg", "tiff"] }
-imageproc = { version = "0.26", features = [] }
-
 # Document parsing
 pdf-extract = { version = "0.7", features = [] }
 lopdf = { version = "0.39", features = [] }
 scraper = { version = "0.22", features = [] }
 calamine = { version = "0.33", features = [] }
-zip = { version = "8", features = [] }
+zip = { version = "8.0", features = [] }
 quick-xml = { version = "0.37", features = [] }
 
-# Semantic versioning
-semver = { version = "1", features = ["serde"] }
+# Image processing
+image = { version = "0.25", default-features = false, features = ["png", "jpeg", "tiff"] }
+imageproc = { version = "0.26", features = [] }
+
+# Compile-time asset embedding
+include_dir = { version = "0.7", features = [] }
+
+# Cloud object storage (S3, Azure Blob, GCS)
+object_store = { version = "0.13", default-features = false }
+
+# Python interop
+pyo3 = { version = "0.24", features = [] }
+pyo3-async-runtimes = { version = "0.24", features = ["tokio-runtime"] }
+pythonize = { version = "0.24", features = [] }
 
 # CLI
-clap = { version = "4", features = [] }
+clap = { version = "4.0", features = [] }
 
 # HTTP server
 axum = { version = "0.8", features = [] }
@@ -127,7 +128,7 @@ tower = { version = "0.5", features = [] }
 tower-http = { version = "0.6", features = [] }
 
 # Testing
-tempfile = { version = "3", features = [] }
+tempfile = { version = "3.0", features = [] }
 
 # Randomness
 rand = { version = "0.10", features = [] }
diff --git a/crates/nvisy-cli/Cargo.toml b/crates/nvisy-cli/Cargo.toml
index eba4379..8e25784 100644
--- a/crates/nvisy-cli/Cargo.toml
+++ b/crates/nvisy-cli/Cargo.toml
@@ -42,4 +42,4 @@ tokio = { workspace = true, features = ["rt-multi-thread", "macros", "signal"] }
 
 # Observability
 tracing = { workspace = true, features = [] }
-tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
+tracing-subscriber = { workspace = true, features = ["env-filter", "json"] }

From c65519c639d73f37f41ee1f0483c13563a433bd0 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Tue, 24 Feb 2026 14:49:39 +0100
Subject: [PATCH 02/24] refactor(cli): restructure into Cli/ServerConfig, split
 server modules

Rename ServerConfig to Cli as top-level parser, extract ServerConfig
into config/server.rs for network binding. Split server/ into listen.rs
and shutdown.rs, add shutdown timeout with structured tracing, move
init_tracing to Cli, and use anyhow::Result for error propagation.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 Cargo.lock                              |  1 +
 crates/nvisy-cli/Cargo.toml             |  3 +
 crates/nvisy-cli/src/config/mod.rs      | 90 ++++++++++++++-----------
 crates/nvisy-cli/src/config/server.rs   | 63 +++++++++++++++++
 crates/nvisy-cli/src/main.rs            | 65 ++++++++++++------
 crates/nvisy-cli/src/server/listen.rs   | 40 +++++++++++
 crates/nvisy-cli/src/server/mod.rs      | 86 ++---------------------
 crates/nvisy-cli/src/server/shutdown.rs | 65 ++++++++++++++++++
 8 files changed, 273 insertions(+), 140 deletions(-)
 create mode 100644 crates/nvisy-cli/src/config/server.rs
 create mode 100644 crates/nvisy-cli/src/server/listen.rs
 create mode 100644 crates/nvisy-cli/src/server/shutdown.rs

diff --git a/Cargo.lock b/Cargo.lock
index 1daed6e..594a858 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2667,6 +2667,7 @@ dependencies = [
 name = "nvisy-cli"
 version = "0.1.0"
 dependencies = [
+ "anyhow",
  "axum",
  "clap",
  "nvisy-core",
diff --git a/crates/nvisy-cli/Cargo.toml b/crates/nvisy-cli/Cargo.toml
index 8e25784..1b37d29 100644
--- a/crates/nvisy-cli/Cargo.toml
+++ b/crates/nvisy-cli/Cargo.toml
@@ -34,6 +34,9 @@ nvisy-server = { workspace = true, features = [] }
 # CLI
 clap = { workspace = true, features = ["derive", "env"] }
 
+# Error handling
+anyhow = { workspace = true, features = [] }
+
 # HTTP framework
 axum = { workspace = true, features = ["tokio"] }
 
diff --git a/crates/nvisy-cli/src/config/mod.rs b/crates/nvisy-cli/src/config/mod.rs
index af70b7c..6aa6dbb 100644
--- a/crates/nvisy-cli/src/config/mod.rs
+++ b/crates/nvisy-cli/src/config/mod.rs
@@ -1,37 +1,48 @@
-//! CLI configuration parsed from command-line arguments and environment
-//! variables via [`clap`].
+//! CLI configuration management.
 //!
-//! All fields have sensible defaults and can be overridden by environment
-//! variables (`HOST`, `PORT`, `RUST_LOG`, etc.) or CLI flags.
+//! This module defines the complete CLI configuration hierarchy:
+//!
+//! ```text
+//! Cli
+//! ├── server: ServerConfig         # Host, port, content directory
+//! ├── body_limit_bytes: usize      # Extractor body limit (default: 2 MiB)
+//! ├── file_body_limit_bytes: usize # Upload body limit (default: 50 MiB)
+//! └── request_timeout_secs: u64    # Per-request timeout (default: 300s)
+//! ```
+//!
+//! All configuration can be provided via CLI arguments or environment variables.
+//! Use `--help` to see all available options.
+//!
+//! # Example
+//!
+//! ```bash
+//! # Configure via CLI flags
+//! nvisy-server --host 127.0.0.1 --port 3000 --request-timeout-secs 60
+//!
+//! # Or via environment variables
+//! HOST=127.0.0.1 PORT=3000 REQUEST_TIMEOUT_SECS=60 nvisy-server
+//! ```
 
-use std::net::{IpAddr, Ipv4Addr, SocketAddr};
-use std::path::PathBuf;
+mod server;
 
 use clap::Parser;
+use tracing_subscriber::EnvFilter;
 
 use nvisy_server::middleware::{OpenApiConfig, RecoveryConfig, SecurityConfig};
 
-/// nvisy API server.
-#[derive(Debug, Parser)]
-#[command(name = "nvisy-server", version, about)]
-pub struct ServerConfig {
-    /// Address to bind the HTTP listener to.
-    #[arg(long, env = "HOST", default_value_t = IpAddr::V4(Ipv4Addr::UNSPECIFIED))]
-    pub host: IpAddr,
-
-    /// Port to bind the HTTP listener to.
-    #[arg(long, env = "PORT", default_value_t = 8080)]
-    pub port: u16,
-
-    /// Directory for temporary content storage.
-    ///
-    /// Defaults to `$TMPDIR/nvisy-server-content` if not set.
-    #[arg(long, env = "CONTENT_DIR")]
-    pub content_dir: Option<PathBuf>,
+pub use server::ServerConfig;
 
-    /// Tracing filter directive (e.g. `info`, `nvisy_server=debug`).
-    #[arg(long, env = "RUST_LOG", default_value = "info")]
-    pub log_level: String,
+/// Complete CLI configuration.
+///
+/// Combines all configuration groups for the nvisy server:
+/// - [`ServerConfig`]: Network binding and content directory
+/// - Middleware settings: Body limits, timeouts, OpenAPI
+#[derive(Debug, Parser)]
+#[command(name = "nvisy-server", version, about = "nvisy API server")]
+pub struct Cli {
+    /// Server network and lifecycle configuration.
+    #[command(flatten)]
+    pub server: ServerConfig,
 
     /// Maximum body size in bytes for axum extractors (Json, Form, etc.).
     #[arg(long, env = "BODY_LIMIT_BYTES", default_value_t = 2 * 1024 * 1024)]
@@ -46,19 +57,7 @@ pub struct ServerConfig {
     pub request_timeout_secs: u64,
 }
 
-impl ServerConfig {
-    /// Returns the socket address to bind the listener to.
-    pub fn socket_addr(&self) -> SocketAddr {
-        SocketAddr::new(self.host, self.port)
-    }
-
-    /// Returns the content directory, falling back to a temp directory.
-    pub fn content_dir(&self) -> PathBuf {
-        self.content_dir
-            .clone()
-            .unwrap_or_else(|| std::env::temp_dir().join("nvisy-server-content"))
-    }
-
+impl Cli {
     /// Builds a [`SecurityConfig`] from the parsed CLI values.
     pub fn security_config(&self) -> SecurityConfig {
         SecurityConfig {
@@ -78,4 +77,17 @@ impl ServerConfig {
     pub fn open_api_config(&self) -> OpenApiConfig {
         OpenApiConfig::default()
     }
+
+    /// Initializes tracing with environment-based filtering.
+    ///
+    /// Uses `RUST_LOG` if set, otherwise defaults to `info`.
+    pub fn init_tracing() {
+        let filter =
+            EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
+
+        tracing_subscriber::fmt()
+            .with_env_filter(filter)
+            .json()
+            .init();
+    }
 }
diff --git a/crates/nvisy-cli/src/config/server.rs b/crates/nvisy-cli/src/config/server.rs
new file mode 100644
index 0000000..2c688ee
--- /dev/null
+++ b/crates/nvisy-cli/src/config/server.rs
@@ -0,0 +1,63 @@
+//! HTTP server network and lifecycle configuration.
+//!
+//! # Environment Variables
+//!
+//! - `HOST` — Server host address (default: `0.0.0.0`)
+//! - `PORT` — Server port (default: `8080`)
+//! - `CONTENT_DIR` — Temporary content storage directory
+//! - `SHUTDOWN_TIMEOUT` — Graceful shutdown timeout in seconds (default: `30`)
+
+use std::net::{IpAddr, Ipv4Addr, SocketAddr};
+use std::path::PathBuf;
+use std::time::Duration;
+
+use clap::Args;
+
+/// HTTP server network and lifecycle configuration.
+///
+/// Controls how the server binds to network interfaces, where
+/// temporary content is stored, and graceful shutdown behavior.
+#[derive(Debug, Clone, Args)]
+pub struct ServerConfig {
+    /// Host address to bind the server to.
+    ///
+    /// Use `127.0.0.1` for localhost only, `0.0.0.0` for all interfaces.
+    #[arg(long, env = "HOST", default_value_t = IpAddr::V4(Ipv4Addr::UNSPECIFIED))]
+    pub host: IpAddr,
+
+    /// TCP port number for the server to listen on.
+    #[arg(short = 'p', long, env = "PORT", default_value_t = 8080)]
+    pub port: u16,
+
+    /// Directory for temporary content storage.
+    ///
+    /// Defaults to `$TMPDIR/nvisy-server-content` if not set.
+    #[arg(long, env = "CONTENT_DIR")]
+    pub content_dir: Option<PathBuf>,
+
+    /// Maximum time in seconds to wait for graceful shutdown.
+    ///
+    /// During shutdown, the server stops accepting new connections and waits
+    /// for existing requests to complete before forcefully terminating.
+    #[arg(long, env = "SHUTDOWN_TIMEOUT", default_value_t = 30)]
+    pub shutdown_timeout: u64,
+}
+
+impl ServerConfig {
+    /// Returns the socket address for server binding.
+    pub fn socket_addr(&self) -> SocketAddr {
+        SocketAddr::new(self.host, self.port)
+    }
+
+    /// Returns the content directory, falling back to a temp directory.
+    pub fn content_dir(&self) -> PathBuf {
+        self.content_dir
+            .clone()
+            .unwrap_or_else(|| std::env::temp_dir().join("nvisy-server-content"))
+    }
+
+    /// Returns the graceful shutdown timeout as a [`Duration`].
+    pub fn shutdown_timeout(&self) -> Duration {
+        Duration::from_secs(self.shutdown_timeout)
+    }
+}
diff --git a/crates/nvisy-cli/src/main.rs b/crates/nvisy-cli/src/main.rs
index d1e8c0c..8b2125a 100644
--- a/crates/nvisy-cli/src/main.rs
+++ b/crates/nvisy-cli/src/main.rs
@@ -1,32 +1,55 @@
-//! nvisy API server entry point.
-//!
-//! Parses CLI arguments, initialises tracing, constructs application state,
-//! and starts the HTTP server with graceful shutdown support.
-
-use clap::Parser;
-use nvisy_core::fs::ContentRegistry;
-use tracing_subscriber::EnvFilter;
+#![forbid(unsafe_code)]
+#![cfg_attr(docsrs, feature(doc_cfg))]
+#![doc = include_str!("../README.md")]
 
 mod config;
 mod server;
 
-use config::ServerConfig;
+use std::process;
+
+use axum::Router;
+use clap::Parser;
+use nvisy_core::fs::ContentRegistry;
+use nvisy_server::middleware::*;
+use nvisy_server::ServiceState;
+
+use crate::config::Cli;
 
 #[tokio::main]
 async fn main() {
-    let config = ServerConfig::parse();
+    let Err(error) = run().await else {
+        process::exit(0);
+    };
+
+    if tracing::enabled!(tracing::Level::ERROR) {
+        tracing::error!(error = %error, "application terminated with error");
+    } else {
+        eprintln!("Error: {error:#}");
+    }
+
+    process::exit(1);
+}
+
+/// Main application entry point.
+async fn run() -> anyhow::Result<()> {
+    let cli = Cli::parse();
+    Cli::init_tracing();
 
-    tracing_subscriber::fmt()
-        .with_env_filter(
-            EnvFilter::try_from_default_env()
-                .unwrap_or_else(|_| EnvFilter::new(&config.log_level)),
-        )
-        .json()
-        .init();
+    // Initialize application state
+    let content_registry = ContentRegistry::new(cli.server.content_dir());
+    let state = ServiceState::new(content_registry);
 
-    let content_registry = ContentRegistry::new(config.content_dir());
-    let state = nvisy_server::ServiceState::new(content_registry);
-    let app = server::build_router(&config, state);
+    // Build and run
+    let router = create_router(&cli, state);
+    server::run(&cli.server, router).await
+}
 
-    server::run(&config, app).await;
+/// Creates the router with all middleware layers applied.
+fn create_router(cli: &Cli, state: ServiceState) -> Router {
+    nvisy_server::routes()
+        .with_open_api(&cli.open_api_config())
+        .with_recovery(&cli.recovery_config())
+        .with_observability()
+        .with_security(&cli.security_config())
+        .with_state(state)
 }
diff --git a/crates/nvisy-cli/src/server/listen.rs b/crates/nvisy-cli/src/server/listen.rs
new file mode 100644
index 0000000..207a9f6
--- /dev/null
+++ b/crates/nvisy-cli/src/server/listen.rs
@@ -0,0 +1,40 @@
+//! TCP listener binding and graceful server lifecycle.
+
+use std::path::Path;
+
+use tokio::net::TcpListener;
+
+use crate::config::ServerConfig;
+
+use super::shutdown;
+
+/// Binds a TCP listener, serves the application, and cleans up on shutdown.
+///
+/// Blocks until a shutdown signal (SIGINT or SIGTERM) is received. After the
+/// server stops, it removes the temporary content directory if one was created.
+pub async fn run(config: &ServerConfig, app: axum::Router) -> anyhow::Result<()> {
+    let addr = config.socket_addr();
+    let listener = TcpListener::bind(addr).await?;
+
+    tracing::info!(%addr, "listening");
+
+    let shutdown = shutdown::shutdown_signal(config.shutdown_timeout());
+
+    axum::serve(listener, app)
+        .with_graceful_shutdown(shutdown)
+        .await?;
+
+    cleanup_content_dir(&config.content_dir());
+    Ok(())
+}
+
+/// Removes the temporary content directory after graceful shutdown.
+fn cleanup_content_dir(path: &Path) {
+    if !path.exists() {
+        return;
+    }
+    match std::fs::remove_dir_all(path) {
+        Ok(()) => tracing::info!(path = %path.display(), "content directory cleaned up"),
+        Err(e) => tracing::warn!(path = %path.display(), "failed to clean up content directory: {e}"),
+    }
+}
diff --git a/crates/nvisy-cli/src/server/mod.rs b/crates/nvisy-cli/src/server/mod.rs
index 4bc4527..b0c9e2c 100644
--- a/crates/nvisy-cli/src/server/mod.rs
+++ b/crates/nvisy-cli/src/server/mod.rs
@@ -1,83 +1,9 @@
-//! Server lifecycle: router construction, TCP listener, and graceful shutdown.
+//! Server lifecycle: TCP listener and graceful shutdown.
 
-use std::path::Path;
+mod listen;
+mod shutdown;
 
-use tokio::net::TcpListener;
+/// Tracing target for shutdown events.
+pub const TRACING_TARGET_SHUTDOWN: &str = "nvisy_cli::server::shutdown";
 
-use nvisy_server::middleware::{
-    RouterObservabilityExt, RouterOpenApiExt, RouterRecoveryExt, RouterSecurityExt,
-};
-use nvisy_server::ServiceState;
-
-use crate::config::ServerConfig;
-
-/// Builds the application router with all middleware layers applied.
-pub fn build_router(config: &ServerConfig, state: ServiceState) -> axum::Router {
-    nvisy_server::routes()
-        .with_open_api(&config.open_api_config())
-        .with_recovery(&config.recovery_config())
-        .with_observability()
-        .with_security(&config.security_config())
-        .with_state(state)
-}
-
-/// Binds a TCP listener, serves the application, and cleans up on shutdown.
-///
-/// Blocks until a shutdown signal (SIGINT or SIGTERM) is received. After the
-/// server stops, it removes the temporary content directory if one was created.
-pub async fn run(config: &ServerConfig, app: axum::Router) {
-    let addr = config.socket_addr();
-
-    let listener = TcpListener::bind(addr).await.unwrap_or_else(|e| {
-        panic!("failed to bind to {addr}: {e}");
-    });
-
-    tracing::info!(%addr, "listening");
-
-    axum::serve(listener, app)
-        .with_graceful_shutdown(shutdown_signal())
-        .await
-        .unwrap_or_else(|e| {
-            panic!("server error: {e}");
-        });
-
-    cleanup_content_dir(&config.content_dir());
-}
-
-/// Waits for SIGINT (Ctrl+C) or SIGTERM to initiate graceful shutdown.
-async fn shutdown_signal() {
-    let ctrl_c = async {
-        tokio::signal::ctrl_c()
-            .await
-            .expect("failed to install Ctrl+C handler");
-    };
-
-    #[cfg(unix)]
-    let terminate = async {
-        tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
-            .expect("failed to install SIGTERM handler")
-            .recv()
-            .await;
-    };
-
-    #[cfg(not(unix))]
-    let terminate = std::future::pending::<()>();
-
-    tokio::select! {
-        () = ctrl_c => {},
-        () = terminate => {},
-    }
-
-    tracing::info!("shutdown signal received");
-}
-
-/// Removes the temporary content directory after graceful shutdown.
-fn cleanup_content_dir(path: &Path) {
-    if !path.exists() {
-        return;
-    }
-    match std::fs::remove_dir_all(path) {
-        Ok(()) => tracing::info!(path = %path.display(), "content directory cleaned up"),
-        Err(e) => tracing::warn!(path = %path.display(), "failed to clean up content directory: {e}"),
-    }
-}
+pub use listen::run;
diff --git a/crates/nvisy-cli/src/server/shutdown.rs b/crates/nvisy-cli/src/server/shutdown.rs
new file mode 100644
index 0000000..a6bb00a
--- /dev/null
+++ b/crates/nvisy-cli/src/server/shutdown.rs
@@ -0,0 +1,65 @@
+//! Graceful shutdown signal handling.
+
+use std::time::Duration;
+
+use tokio::signal::ctrl_c;
+#[cfg(unix)]
+use tokio::signal::unix;
+
+use super::TRACING_TARGET_SHUTDOWN;
+
+/// Waits for a shutdown signal (SIGTERM or SIGINT/Ctrl+C).
+///
+/// Listens for OS termination signals and returns when one is received.
+/// The `shutdown_timeout` is logged to inform operators how long cleanup
+/// will wait before the process is forcefully terminated.
+pub async fn shutdown_signal(shutdown_timeout: Duration) {
+    let ctrl_c = async {
+        if let Err(e) = ctrl_c().await {
+            tracing::error!(
+                target: TRACING_TARGET_SHUTDOWN,
+                error = %e,
+                "failed to install Ctrl+C handler"
+            );
+        } else {
+            tracing::info!(
+                target: TRACING_TARGET_SHUTDOWN,
+                "received Ctrl+C signal, initiating graceful shutdown"
+            );
+        }
+    };
+
+    #[cfg(unix)]
+    let terminate = async {
+        match unix::signal(unix::SignalKind::terminate()) {
+            Ok(mut signal) => {
+                signal.recv().await;
+                tracing::info!(
+                    target: TRACING_TARGET_SHUTDOWN,
+                    "received SIGTERM signal, initiating graceful shutdown"
+                );
+            }
+            Err(e) => {
+                tracing::error!(
+                    target: TRACING_TARGET_SHUTDOWN,
+                    error = %e,
+                    "failed to install SIGTERM handler"
+                );
+            }
+        }
+    };
+
+    #[cfg(not(unix))]
+    let terminate = std::future::pending::<()>();
+
+    tokio::select! {
+        () = ctrl_c => {},
+        () = terminate => {},
+    }
+
+    tracing::info!(
+        target: TRACING_TARGET_SHUTDOWN,
+        timeout_secs = shutdown_timeout.as_secs(),
+        "graceful shutdown initiated"
+    );
+}

From d30fcc89e602ee35fe3e65573799645ccf87cf0b Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Tue, 24 Feb 2026 20:50:36 +0100
Subject: [PATCH 03/24] refactor(identify): restructure by detection method,
 extract ocr/asr crates

Reorganize nvisy-identify from modality-based layout (text/, image/) to
detection-method-based layout (pattern/, ner/, llm/, vision/, audio/,
fusion/) so the module structure mirrors identification strategies.

- Create nvisy-ocr crate: OcrBackend trait, config, parsing, PythonBridge
- Create nvisy-asr crate: TranscribeBackend trait, config, parsing, PythonBridge
- Add LlmBackend trait and parse_llm_entities to nvisy-rig
- Update nvisy-augment to import from nvisy-ocr/nvisy-asr
- Add LLM contextual detection layer (llm/detection.rs, llm/prompt.rs)
- Add OCR detection layer (vision/ocr.rs)
- Add audio transcript+NER composite layer (audio/transcript.rs)
- Add ensemble fusion with MaxConfidence/WeightedAverage/NoisyOr strategies
- Remove stale nvisy-object workspace references
- Sort workspace members, deps, Dockerfile crate lists, and changelog

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md                                  |  20 +-
 Cargo.lock                                    | 136 ++-----
 Cargo.toml                                    |   9 +-
 crates/nvisy-asr/Cargo.toml                   |  34 ++
 crates/nvisy-asr/README.md                    |  25 ++
 crates/nvisy-asr/src/backend.rs               |  34 ++
 crates/nvisy-asr/src/bridge.rs                |  28 ++
 crates/nvisy-asr/src/lib.rs                   |  10 +
 crates/nvisy-asr/src/parse.rs                 |  67 ++++
 crates/nvisy-augment/Cargo.toml               |   2 +
 crates/nvisy-augment/README.md                |   2 +-
 crates/nvisy-augment/src/ocr.rs               |  96 +----
 crates/nvisy-augment/src/transcribe.rs        | 119 +-----
 crates/nvisy-identify/Cargo.toml              |   7 +-
 crates/nvisy-identify/README.md               |  12 +-
 crates/nvisy-identify/src/audio/mod.rs        |   5 +
 crates/nvisy-identify/src/audio/transcript.rs | 198 ++++++++++
 .../src/{action => fusion}/dedup.rs           |   0
 crates/nvisy-identify/src/fusion/ensemble.rs  | 209 +++++++++++
 .../src/{action => fusion}/manual.rs          |   0
 .../src/{action => fusion}/mod.rs             |   4 +-
 crates/nvisy-identify/src/lib.rs              |  22 +-
 crates/nvisy-identify/src/llm/detection.rs    | 234 ++++++++++++
 crates/nvisy-identify/src/llm/mod.rs          |   7 +
 crates/nvisy-identify/src/llm/prompt.rs       |  26 ++
 crates/nvisy-identify/src/ner/backend.rs      |   2 +-
 .../src/{image/ner.rs => ner/image.rs}        |   2 +-
 crates/nvisy-identify/src/ner/mod.rs          |   7 +-
 .../src/{text/ner.rs => ner/text.rs}          |   4 +-
 .../src/{text/pattern.rs => pattern/mod.rs}   |   0
 crates/nvisy-identify/src/text/mod.rs         |   7 -
 .../src/{image => vision}/face.rs             |   0
 .../src/{image => vision}/mod.rs              |   6 +-
 .../src/{image => vision}/object.rs           |   0
 crates/nvisy-identify/src/vision/ocr.rs       | 105 ++++++
 crates/nvisy-object/Cargo.toml                |  46 ---
 crates/nvisy-object/src/client/get_output.rs  |  15 -
 crates/nvisy-object/src/client/mod.rs         | 340 ------------------
 crates/nvisy-object/src/client/put_output.rs  |  20 --
 crates/nvisy-object/src/lib.rs                |  12 -
 crates/nvisy-object/src/prelude.rs            |   8 -
 crates/nvisy-object/src/providers/azure.rs    |  76 ----
 crates/nvisy-object/src/providers/gcs.rs      |  58 ---
 crates/nvisy-object/src/providers/mod.rs      |  11 -
 crates/nvisy-object/src/providers/provider.rs |  38 --
 crates/nvisy-object/src/providers/s3.rs       |  86 -----
 crates/nvisy-object/src/streams/mod.rs        |  11 -
 .../nvisy-object/src/streams/read_object.rs   | 147 --------
 .../nvisy-object/src/streams/source_stream.rs |  34 --
 .../nvisy-object/src/streams/target_stream.rs |  34 --
 .../nvisy-object/src/streams/write_object.rs  | 138 -------
 crates/nvisy-ocr/Cargo.toml                   |  34 ++
 crates/{nvisy-object => nvisy-ocr}/README.md  |   6 +-
 crates/nvisy-ocr/src/backend.rs               |  31 ++
 crates/nvisy-ocr/src/bridge.rs                |  27 ++
 crates/nvisy-ocr/src/lib.rs                   |  10 +
 crates/nvisy-ocr/src/parse.rs                 |  49 +++
 crates/nvisy-pattern/src/lib.rs               |   5 +-
 crates/nvisy-rig/Cargo.toml                   |   4 +
 crates/nvisy-rig/src/backend.rs               |  34 ++
 crates/nvisy-rig/src/lib.rs                   |   6 +
 crates/nvisy-rig/src/parse.rs                 |  88 +++++
 docker/Dockerfile                             |  16 +-
 63 files changed, 1378 insertions(+), 1445 deletions(-)
 create mode 100644 crates/nvisy-asr/Cargo.toml
 create mode 100644 crates/nvisy-asr/README.md
 create mode 100644 crates/nvisy-asr/src/backend.rs
 create mode 100644 crates/nvisy-asr/src/bridge.rs
 create mode 100644 crates/nvisy-asr/src/lib.rs
 create mode 100644 crates/nvisy-asr/src/parse.rs
 create mode 100644 crates/nvisy-identify/src/audio/mod.rs
 create mode 100644 crates/nvisy-identify/src/audio/transcript.rs
 rename crates/nvisy-identify/src/{action => fusion}/dedup.rs (100%)
 create mode 100644 crates/nvisy-identify/src/fusion/ensemble.rs
 rename crates/nvisy-identify/src/{action => fusion}/manual.rs (100%)
 rename crates/nvisy-identify/src/{action => fusion}/mod.rs (53%)
 create mode 100644 crates/nvisy-identify/src/llm/detection.rs
 create mode 100644 crates/nvisy-identify/src/llm/mod.rs
 create mode 100644 crates/nvisy-identify/src/llm/prompt.rs
 rename crates/nvisy-identify/src/{image/ner.rs => ner/image.rs} (98%)
 rename crates/nvisy-identify/src/{text/ner.rs => ner/text.rs} (98%)
 rename crates/nvisy-identify/src/{text/pattern.rs => pattern/mod.rs} (100%)
 delete mode 100644 crates/nvisy-identify/src/text/mod.rs
 rename crates/nvisy-identify/src/{image => vision}/face.rs (100%)
 rename crates/nvisy-identify/src/{image => vision}/mod.rs (61%)
 rename crates/nvisy-identify/src/{image => vision}/object.rs (100%)
 create mode 100644 crates/nvisy-identify/src/vision/ocr.rs
 delete mode 100644 crates/nvisy-object/Cargo.toml
 delete mode 100644 crates/nvisy-object/src/client/get_output.rs
 delete mode 100644 crates/nvisy-object/src/client/mod.rs
 delete mode 100644 crates/nvisy-object/src/client/put_output.rs
 delete mode 100644 crates/nvisy-object/src/lib.rs
 delete mode 100644 crates/nvisy-object/src/prelude.rs
 delete mode 100644 crates/nvisy-object/src/providers/azure.rs
 delete mode 100644 crates/nvisy-object/src/providers/gcs.rs
 delete mode 100644 crates/nvisy-object/src/providers/mod.rs
 delete mode 100644 crates/nvisy-object/src/providers/provider.rs
 delete mode 100644 crates/nvisy-object/src/providers/s3.rs
 delete mode 100644 crates/nvisy-object/src/streams/mod.rs
 delete mode 100644 crates/nvisy-object/src/streams/read_object.rs
 delete mode 100644 crates/nvisy-object/src/streams/source_stream.rs
 delete mode 100644 crates/nvisy-object/src/streams/target_stream.rs
 delete mode 100644 crates/nvisy-object/src/streams/write_object.rs
 create mode 100644 crates/nvisy-ocr/Cargo.toml
 rename crates/{nvisy-object => nvisy-ocr}/README.md (68%)
 create mode 100644 crates/nvisy-ocr/src/backend.rs
 create mode 100644 crates/nvisy-ocr/src/bridge.rs
 create mode 100644 crates/nvisy-ocr/src/lib.rs
 create mode 100644 crates/nvisy-ocr/src/parse.rs
 create mode 100644 crates/nvisy-rig/src/backend.rs
 create mode 100644 crates/nvisy-rig/src/parse.rs

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 796728d..3515684 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,12 +21,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Crates
 
-- **nvisy-core** - Domain types, error types, and plugin trait system
-- **nvisy-engine** - DAG compiler, executor, and connection routing
-- **nvisy-codec** - File-format handlers with span-based content access
-- **nvisy-object** - Cloud storage providers and streaming I/O
-- **nvisy-pattern** - Detection patterns, dictionaries, and validators
-- **nvisy-pipeline** - Detection, redaction, generation actions, and audit trails
-- **nvisy-python** - PyO3 bridge for Python NER models
+- **nvisy-asr:** ASR/speech-to-text backend trait and provider integration
+- **nvisy-augment:** Content augmentation actions (OCR, transcription, synthetic data)
+- **nvisy-cli:** CLI entry point for the nvisy API server
+- **nvisy-codec:** File-format codecs — read, edit, and write documents
+- **nvisy-core:** Domain types, traits, and errors
+- **nvisy-engine:** DAG compiler and executor for pipeline graphs
+- **nvisy-identify:** Entity ontology types and detection layers
+- **nvisy-ocr:** OCR backend trait and provider integration
+- **nvisy-ontology:** Domain data types, entity taxonomy, and spatial primitives
+- **nvisy-pattern:** Built-in regex patterns and dictionaries for PII/PHI detection
+- **nvisy-python:** PyO3 bridge for AI NER/OCR detection via embedded Python
+- **nvisy-rig:** LLM/VLM-driven detection, redaction, and OCR backends
+- **nvisy-server:** HTTP server exposing the Engine pipeline via REST endpoints
 
 [Unreleased]: https://github.com/nvisycom/runtime/commits/main
diff --git a/Cargo.lock b/Cargo.lock
index 594a858..9045465 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -622,7 +622,6 @@ checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118"
 dependencies = [
  "iana-time-zone",
  "num-traits",
- "serde",
  "windows-link",
 ]
 
@@ -1696,12 +1695,6 @@ version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
 
-[[package]]
-name = "humantime"
-version = "2.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424"
-
 [[package]]
 name = "hyper"
 version = "1.8.1"
@@ -1735,7 +1728,6 @@ dependencies = [
  "hyper",
  "hyper-util",
  "rustls",
- "rustls-native-certs",
  "rustls-pki-types",
  "tokio",
  "tokio-rustls",
@@ -2647,14 +2639,27 @@ dependencies = [
  "libm",
 ]
 
+[[package]]
+name = "nvisy-asr"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "nvisy-core",
+ "nvisy-ontology",
+ "nvisy-python",
+ "serde_json",
+]
+
 [[package]]
 name = "nvisy-augment"
 version = "0.1.0"
 dependencies = [
  "async-trait",
  "bytes",
+ "nvisy-asr",
  "nvisy-codec",
  "nvisy-core",
+ "nvisy-ocr",
  "nvisy-ontology",
  "nvisy-python",
  "nvisy-rig",
@@ -2756,14 +2761,16 @@ name = "nvisy-identify"
 version = "0.1.0"
 dependencies = [
  "async-trait",
+ "bytes",
  "jiff",
+ "nvisy-asr",
  "nvisy-codec",
  "nvisy-core",
+ "nvisy-ocr",
  "nvisy-ontology",
  "nvisy-pattern",
  "nvisy-python",
  "nvisy-rig",
- "regex",
  "schemars",
  "semver",
  "serde",
@@ -2775,17 +2782,14 @@ dependencies = [
 ]
 
 [[package]]
-name = "nvisy-object"
+name = "nvisy-ocr"
 version = "0.1.0"
 dependencies = [
  "async-trait",
- "bytes",
- "futures",
  "nvisy-core",
- "object_store",
- "serde",
- "tokio",
- "tracing",
+ "nvisy-ontology",
+ "nvisy-python",
+ "serde_json",
 ]
 
 [[package]]
@@ -2838,9 +2842,11 @@ dependencies = [
  "async-trait",
  "nvisy-codec",
  "nvisy-core",
+ "nvisy-ontology",
  "rig-core",
  "serde",
  "serde_json",
+ "tracing",
 ]
 
 [[package]]
@@ -2862,43 +2868,6 @@ dependencies = [
  "uuid",
 ]
 
-[[package]]
-name = "object_store"
-version = "0.13.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2858065e55c148d294a9f3aae3b0fa9458edadb41a108397094566f4e3c0dfb"
-dependencies = [
- "async-trait",
- "base64",
- "bytes",
- "chrono",
- "form_urlencoded",
- "futures",
- "http",
- "http-body-util",
- "httparse",
- "humantime",
- "hyper",
- "itertools",
- "md-5",
- "parking_lot",
- "percent-encoding",
- "quick-xml 0.38.4",
- "rand 0.9.2",
- "reqwest 0.12.28",
- "ring",
- "rustls-pki-types",
- "serde",
- "serde_json",
- "serde_urlencoded",
- "thiserror 2.0.18",
- "tokio",
- "tracing",
- "url",
- "wasm-bindgen-futures",
- "web-time",
-]
-
 [[package]]
 name = "once_cell"
 version = "1.21.3"
@@ -3363,7 +3332,6 @@ checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c"
 dependencies = [
  "encoding_rs",
  "memchr",
- "serde",
 ]
 
 [[package]]
@@ -3663,48 +3631,6 @@ version = "0.8.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c"
 
-[[package]]
-name = "reqwest"
-version = "0.12.28"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
-dependencies = [
- "base64",
- "bytes",
- "futures-core",
- "futures-util",
- "h2",
- "http",
- "http-body",
- "http-body-util",
- "hyper",
- "hyper-rustls",
- "hyper-util",
- "js-sys",
- "log",
- "percent-encoding",
- "pin-project-lite",
- "quinn",
- "rustls",
- "rustls-native-certs",
- "rustls-pki-types",
- "serde",
- "serde_json",
- "serde_urlencoded",
- "sync_wrapper",
- "tokio",
- "tokio-rustls",
- "tokio-util",
- "tower",
- "tower-http",
- "tower-service",
- "url",
- "wasm-bindgen",
- "wasm-bindgen-futures",
- "wasm-streams 0.4.2",
- "web-sys",
-]
-
 [[package]]
 name = "reqwest"
 version = "0.13.2"
@@ -3745,7 +3671,7 @@ dependencies = [
  "url",
  "wasm-bindgen",
  "wasm-bindgen-futures",
- "wasm-streams 0.5.0",
+ "wasm-streams",
  "web-sys",
 ]
 
@@ -3776,7 +3702,7 @@ dependencies = [
  "nanoid",
  "ordered-float",
  "pin-project-lite",
- "reqwest 0.13.2",
+ "reqwest",
  "rig-derive",
  "schemars",
  "serde",
@@ -3876,7 +3802,6 @@ checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b"
 dependencies = [
  "aws-lc-rs",
  "once_cell",
- "ring",
  "rustls-pki-types",
  "rustls-webpki",
  "subtle",
@@ -5078,19 +5003,6 @@ dependencies = [
  "wasmparser",
 ]
 
-[[package]]
-name = "wasm-streams"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
-dependencies = [
- "futures-util",
- "js-sys",
- "wasm-bindgen",
- "wasm-bindgen-futures",
- "web-sys",
-]
-
 [[package]]
 name = "wasm-streams"
 version = "0.5.0"
diff --git a/Cargo.toml b/Cargo.toml
index 1a8067a..5c36cf5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,13 +3,14 @@
 [workspace]
 resolver = "2"
 members = [
+    "./crates/nvisy-asr",
     "./crates/nvisy-augment",
     "./crates/nvisy-cli",
     "./crates/nvisy-codec",
     "./crates/nvisy-core",
     "./crates/nvisy-engine",
     "./crates/nvisy-identify",
-    "./crates/nvisy-object",
+    "./crates/nvisy-ocr",
     "./crates/nvisy-ontology",
     "./crates/nvisy-pattern",
     "./crates/nvisy-python",
@@ -36,12 +37,13 @@ documentation = "https://docs.rs/nvisy-runtime"
 # See for more details: https://github.com/rust-lang/cargo/issues/11329
 
 # Internal crates
+nvisy-asr = { path = "./crates/nvisy-asr", version = "0.1.0" }
 nvisy-augment = { path = "./crates/nvisy-augment", version = "0.1.0" }
 nvisy-codec = { path = "./crates/nvisy-codec", version = "0.1.0" }
 nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" }
 nvisy-engine = { path = "./crates/nvisy-engine", version = "0.1.0" }
 nvisy-identify = { path = "./crates/nvisy-identify", version = "0.1.0" }
-nvisy-object = { path = "./crates/nvisy-object", version = "0.1.0" }
+nvisy-ocr = { path = "./crates/nvisy-ocr", version = "0.1.0" }
 nvisy-ontology = { path = "./crates/nvisy-ontology", version = "0.1.0" }
 nvisy-pattern = { path = "./crates/nvisy-pattern", version = "0.1.0" }
 nvisy-python = { path = "./crates/nvisy-python", version = "0.1.0" }
@@ -110,9 +112,6 @@ imageproc = { version = "0.26", features = [] }
 # Compile-time asset embedding
 include_dir = { version = "0.7", features = [] }
 
-# Cloud object storage (S3, Azure Blob, GCS)
-object_store = { version = "0.13", default-features = false }
-
 # Python interop
 pyo3 = { version = "0.24", features = [] }
 pyo3-async-runtimes = { version = "0.24", features = ["tokio-runtime"] }
diff --git a/crates/nvisy-asr/Cargo.toml b/crates/nvisy-asr/Cargo.toml
new file mode 100644
index 0000000..b8ff004
--- /dev/null
+++ b/crates/nvisy-asr/Cargo.toml
@@ -0,0 +1,34 @@
+# https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[package]
+name = "nvisy-asr"
+description = "ASR/speech-to-text backend trait and provider integration for Nvisy"
+keywords = ["nvisy", "asr", "speech", "transcription"]
+categories = ["multimedia::audio"]
+
+version = { workspace = true }
+rust-version = { workspace = true }
+edition = { workspace = true }
+license = { workspace = true }
+publish = { workspace = true }
+
+authors = { workspace = true }
+repository = { workspace = true }
+homepage = { workspace = true }
+documentation = { workspace = true }
+
+[package.metadata.docs.rs]
+all-features = true
+rustdoc-args = ["--cfg", "docsrs"]
+
+[dependencies]
+# Internal crates
+nvisy-core = { workspace = true, features = [] }
+nvisy-ontology = { workspace = true, features = [] }
+nvisy-python = { workspace = true, features = [] }
+
+# (De)serialization
+serde_json = { workspace = true, features = [] }
+
+# Async runtime
+async-trait = { workspace = true, features = [] }
diff --git a/crates/nvisy-asr/README.md b/crates/nvisy-asr/README.md
new file mode 100644
index 0000000..d725219
--- /dev/null
+++ b/crates/nvisy-asr/README.md
@@ -0,0 +1,25 @@
+# nvisy-asr
+
+[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/runtime/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/runtime/actions/workflows/build.yml)
+
+ASR/speech-to-text backend trait and provider integration for the Nvisy runtime.
+
+Defines the `TranscribeBackend` trait for automatic speech recognition providers, configuration types, result parsing from raw JSON into entity types, and a `PythonBridge` implementation that delegates to the `nvisy_ai` Python module.
+
+## Documentation
+
+See [`docs/`](../../docs/) for architecture, security, and API documentation.
+
+## Changelog
+
+See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history.
+
+## License
+
+Apache 2.0 License, see [LICENSE.txt](../../LICENSE.txt)
+
+## Support
+
+- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com)
+- **Issues**: [GitHub Issues](https://github.com/nvisycom/runtime/issues)
+- **Email**: [support@nvisy.com](mailto:support@nvisy.com)
diff --git a/crates/nvisy-asr/src/backend.rs b/crates/nvisy-asr/src/backend.rs
new file mode 100644
index 0000000..8ee4dc7
--- /dev/null
+++ b/crates/nvisy-asr/src/backend.rs
@@ -0,0 +1,34 @@
+//! Transcription backend trait and configuration.
+
+use serde_json::Value;
+
+use nvisy_core::Error;
+
+/// Configuration passed to a [`TranscribeBackend`] implementation.
+#[derive(Debug, Clone)]
+pub struct TranscribeConfig {
+    /// BCP-47 language tag for transcription.
+    pub language: String,
+    /// Whether to perform speaker diarization.
+    pub enable_speaker_diarization: bool,
+    /// Minimum confidence threshold for results.
+    pub confidence_threshold: f64,
+}
+
+/// Backend trait for transcription providers.
+///
+/// Implementations call an external speech-to-text service and return
+/// raw JSON results.  Entity construction is handled by the consuming crate.
+#[async_trait::async_trait]
+pub trait TranscribeBackend: Send + Sync + 'static {
+    /// Transcribe audio bytes, returning raw dicts.
+    ///
+    /// Each dict should contain: `text`, `start_time`, `end_time`, `confidence`,
+    /// and optionally `speaker_id`.
+    async fn transcribe(
+        &self,
+        audio_data: &[u8],
+        mime_type: &str,
+        config: &TranscribeConfig,
+    ) -> Result<Vec<Value>, Error>;
+}
diff --git a/crates/nvisy-asr/src/bridge.rs b/crates/nvisy-asr/src/bridge.rs
new file mode 100644
index 0000000..8edb249
--- /dev/null
+++ b/crates/nvisy-asr/src/bridge.rs
@@ -0,0 +1,28 @@
+//! [`TranscribeBackend`] implementation for [`PythonBridge`].
+
+use serde_json::Value;
+
+use nvisy_core::Error;
+use nvisy_python::bridge::PythonBridge;
+use nvisy_python::transcribe::TranscribeParams;
+
+use crate::backend::{TranscribeBackend, TranscribeConfig};
+
+/// Converts [`TranscribeConfig`] to [`TranscribeParams`] and delegates to
+/// `nvisy_python::transcribe`.
+#[async_trait::async_trait]
+impl TranscribeBackend for PythonBridge {
+    async fn transcribe(
+        &self,
+        audio_data: &[u8],
+        mime_type: &str,
+        config: &TranscribeConfig,
+    ) -> Result<Vec<Value>, Error> {
+        let params = TranscribeParams {
+            language: config.language.clone(),
+            enable_speaker_diarization: config.enable_speaker_diarization,
+            confidence_threshold: config.confidence_threshold,
+        };
+        nvisy_python::transcribe::transcribe(self, audio_data, mime_type, &params).await
+    }
+}
diff --git a/crates/nvisy-asr/src/lib.rs b/crates/nvisy-asr/src/lib.rs
new file mode 100644
index 0000000..7e3e178
--- /dev/null
+++ b/crates/nvisy-asr/src/lib.rs
@@ -0,0 +1,10 @@
+#![forbid(unsafe_code)]
+#![cfg_attr(docsrs, feature(doc_cfg))]
+#![doc = include_str!("../README.md")]
+
+mod backend;
+mod bridge;
+mod parse;
+
+pub use backend::{TranscribeBackend, TranscribeConfig};
+pub use parse::parse_transcribe_entities;
diff --git a/crates/nvisy-asr/src/parse.rs b/crates/nvisy-asr/src/parse.rs
new file mode 100644
index 0000000..b23c8b3
--- /dev/null
+++ b/crates/nvisy-asr/src/parse.rs
@@ -0,0 +1,67 @@
+//! Transcription result parsing.
+
+use serde_json::Value;
+
+use nvisy_core::math::TimeSpan;
+use nvisy_core::Error;
+use nvisy_ontology::entity::{DetectionMethod, Entity, EntityCategory, EntityKind};
+use nvisy_ontology::location::{AudioLocation, Location};
+
+/// Parse raw JSON dicts from a transcription backend into [`Entity`] values.
+///
+/// Expected dict keys: `text`, `start_time`, `end_time`, `confidence`,
+/// and optionally `speaker_id`.
+pub fn parse_transcribe_entities(raw: &[Value]) -> Result<Vec<Entity>, Error> {
+    let mut entities = Vec::new();
+
+    for item in raw {
+        let obj = item.as_object().ok_or_else(|| {
+            Error::python("Expected JSON object in transcription results".to_string())
+        })?;
+
+        let text = obj
+            .get("text")
+            .and_then(Value::as_str)
+            .ok_or_else(|| Error::python("Missing 'text' in transcription result".to_string()))?;
+
+        let start_time = obj
+            .get("start_time")
+            .and_then(Value::as_f64)
+            .ok_or_else(|| Error::python("Missing 'start_time'".to_string()))?;
+
+        let end_time = obj
+            .get("end_time")
+            .and_then(Value::as_f64)
+            .ok_or_else(|| Error::python("Missing 'end_time'".to_string()))?;
+
+        let confidence = obj
+            .get("confidence")
+            .and_then(Value::as_f64)
+            .unwrap_or(0.0);
+
+        let speaker_id = obj
+            .get("speaker_id")
+            .and_then(Value::as_str)
+            .map(String::from);
+
+        let entity = Entity::new(
+            EntityCategory::Pii,
+            EntityKind::PersonName,
+            text,
+            DetectionMethod::SpeechTranscript,
+            confidence,
+        )
+        .with_location(Location::Audio(AudioLocation {
+            time_span: TimeSpan {
+                start_secs: start_time,
+                end_secs: end_time,
+            },
+            speaker_id,
+            audio_id: None,
+        }));
+
+        entities.push(entity);
+    }
+
+    Ok(entities)
+}
diff --git a/crates/nvisy-augment/Cargo.toml b/crates/nvisy-augment/Cargo.toml
index f543f92..3aa69d9 100644
--- a/crates/nvisy-augment/Cargo.toml
+++ b/crates/nvisy-augment/Cargo.toml
@@ -28,6 +28,8 @@ nvisy-ontology = { workspace = true, features = [] }
 nvisy-codec = { workspace = true, features = [] }
 nvisy-python = { workspace = true, features = [] }
 nvisy-rig = { workspace = true, features = [] }
+nvisy-ocr = { workspace = true, features = [] }
+nvisy-asr = { workspace = true, features = [] }
 
 # (De)serialization
 serde = { workspace = true, features = ["derive"] }
diff --git a/crates/nvisy-augment/README.md b/crates/nvisy-augment/README.md
index 9bb6ffb..8fa28f5 100644
--- a/crates/nvisy-augment/README.md
+++ b/crates/nvisy-augment/README.md
@@ -4,7 +4,7 @@
 
 Content augmentation actions for the Nvisy runtime.
 
-Provides OCR text extraction from images, audio transcription, and synthetic data generation for replacing redacted entities with realistic placeholder values.
+Provides OCR text extraction from images (via `nvisy-ocr`), audio transcription (via `nvisy-asr`), and synthetic data generation for replacing redacted entities with realistic placeholder values.
 
 ## Documentation
 
diff --git a/crates/nvisy-augment/src/ocr.rs b/crates/nvisy-augment/src/ocr.rs
index 3b5eeb8..5eb86bb 100644
--- a/crates/nvisy-augment/src/ocr.rs
+++ b/crates/nvisy-augment/src/ocr.rs
@@ -2,19 +2,14 @@
 //! from image documents.
 
 use serde::Deserialize;
-use serde_json::Value;
 
 use nvisy_codec::document::Document;
 use nvisy_codec::handler::{Handler, PngHandler, TxtHandler};
-use nvisy_ontology::entity::{EntityCategory, EntityKind};
 use nvisy_core::Error;
-use nvisy_core::math::BoundingBox;
 
-use nvisy_python::bridge::PythonBridge;
-use nvisy_python::ocr::OcrParams;
+use nvisy_ontology::entity::Entity;
 
-use nvisy_ontology::entity::{DetectionMethod, Entity};
-use nvisy_ontology::location::{ImageLocation, Location};
+pub use nvisy_ocr::{OcrBackend, OcrConfig, parse_ocr_entities};
 
 fn default_language() -> String {
     "eng".into()
@@ -28,32 +23,6 @@ fn default_confidence() -> f64 {
     0.5
 }
 
-/// Configuration passed to an [`OcrBackend`] implementation.
-#[derive(Debug, Clone)]
-pub struct OcrConfig {
-    /// Language hint (e.g. `"eng"` for English).
-    pub language: String,
-    /// OCR engine to use (`"tesseract"`, `"google-vision"`, `"aws-textract"`).
-    pub engine: String,
-    /// Minimum confidence threshold for OCR results.
-    pub confidence_threshold: f64,
-}
-
-/// Backend trait for OCR providers.
-///
-/// Implementations call an external OCR service and return raw JSON
-/// results.  Entity construction is handled by [`GenerateOcrAction`].
-#[async_trait::async_trait]
-pub trait OcrBackend: Send + Sync + 'static {
-    /// Run OCR on image bytes, returning raw dicts.
-    async fn detect_ocr(
-        &self,
-        image_data: &[u8],
-        mime_type: &str,
-        config: &OcrConfig,
-    ) -> Result<Vec<Value>, Error>;
-}
-
 /// Typed parameters for [`GenerateOcrAction`].
 #[derive(Debug, Deserialize)]
 #[serde(rename_all = "camelCase")]
@@ -139,64 +108,3 @@ impl<B: OcrBackend> GenerateOcrAction<B> {
         })
     }
 }
-
-/// Parse raw JSON dicts from an OCR backend into [`Entity`] values.
-///
-/// Expected dict keys: `text`, `x`, `y`, `width`, `height`, `confidence`.
-pub fn parse_ocr_entities(raw: &[Value]) -> Result<Vec<Entity>, Error> {
-    let mut entities = Vec::new();
-
-    for item in raw {
-        let obj = item.as_object().ok_or_else(|| {
-            Error::python("Expected JSON object in OCR results".to_string())
-        })?;
-
-        let text = obj
-            .get("text")
-            .and_then(Value::as_str)
-            .ok_or_else(|| Error::python("Missing 'text' in OCR result".to_string()))?;
-
-        let x = obj.get("x").and_then(Value::as_f64).unwrap_or(0.0);
-        let y = obj.get("y").and_then(Value::as_f64).unwrap_or(0.0);
-        let width = obj.get("width").and_then(Value::as_f64).unwrap_or(0.0);
-        let height = obj.get("height").and_then(Value::as_f64).unwrap_or(0.0);
-        let confidence = obj.get("confidence").and_then(Value::as_f64).unwrap_or(0.0);
-
-        let entity = Entity::new(
-            EntityCategory::Pii,
-            EntityKind::Handwriting,
-            text,
-            DetectionMethod::Ocr,
-            confidence,
-        )
-        .with_location(Location::Image(ImageLocation {
-            bounding_box: BoundingBox { x, y, width, height },
-            image_id: None,
-            page_number: None,
-        }));
-
-        entities.push(entity);
-    }
-
-    Ok(entities)
-}
-
-/// [`OcrBackend`] implementation for [`PythonBridge`].
-///
-/// Converts [`OcrConfig`] to [`OcrParams`] and delegates to `nvisy_python::ocr`.
-#[async_trait::async_trait]
-impl OcrBackend for PythonBridge {
-    async fn detect_ocr(
-        &self,
-        image_data: &[u8],
-        mime_type: &str,
-        config: &OcrConfig,
-    ) -> Result<Vec<Value>, Error> {
-        let params = OcrParams {
-            language: config.language.clone(),
-            engine: config.engine.clone(),
-            confidence_threshold: config.confidence_threshold,
-        };
-        nvisy_python::ocr::detect_ocr(self, image_data, mime_type, &params).await
-    }
-}
diff --git a/crates/nvisy-augment/src/transcribe.rs b/crates/nvisy-augment/src/transcribe.rs
index 3eac6c9..8dee0d5 100644
--- a/crates/nvisy-augment/src/transcribe.rs
+++ b/crates/nvisy-augment/src/transcribe.rs
@@ -2,18 +2,14 @@
 //! locations and transcript documents from audio input.
 
 use serde::Deserialize;
-use serde_json::Value;
 
 use nvisy_codec::document::Document;
 use nvisy_codec::handler::{Handler, WavHandler, TxtHandler};
-use nvisy_core::math::TimeSpan;
 use nvisy_core::Error;
 
-use nvisy_ontology::entity::{EntityCategory, EntityKind};
-use nvisy_ontology::entity::{DetectionMethod, Entity};
-use nvisy_ontology::location::{AudioLocation, Location};
-use nvisy_python::bridge::PythonBridge;
-use nvisy_python::transcribe::TranscribeParams;
+use nvisy_ontology::entity::Entity;
+
+pub use nvisy_asr::{TranscribeBackend, TranscribeConfig, parse_transcribe_entities};
 
 fn default_language() -> String {
     "en".into()
@@ -23,35 +19,6 @@ fn default_confidence() -> f64 {
     0.5
 }
 
-/// Configuration passed to a [`TranscribeBackend`] implementation.
-#[derive(Debug, Clone)]
-pub struct TranscribeConfig {
-    /// BCP-47 language tag for transcription.
-    pub language: String,
-    /// Whether to perform speaker diarization.
-    pub enable_speaker_diarization: bool,
-    /// Minimum confidence threshold for results.
-    pub confidence_threshold: f64,
-}
-
-/// Backend trait for transcription providers.
-///
-/// Implementations call an external speech-to-text service and return
-/// raw JSON results. Entity construction is handled by [`GenerateTranscribeAction`].
-#[async_trait::async_trait]
-pub trait TranscribeBackend: Send + Sync + 'static {
-    /// Transcribe audio bytes, returning raw dicts.
-    ///
-    /// Each dict should contain: `text`, `start_time`, `end_time`, `confidence`,
-    /// and optionally `speaker_id`.
-    async fn transcribe(
-        &self,
-        audio_data: &[u8],
-        mime_type: &str,
-        config: &TranscribeConfig,
-    ) -> Result<Vec<Value>, Error>;
-}
-
 /// Typed parameters for [`GenerateTranscribeAction`].
 #[derive(Debug, Deserialize)]
 #[serde(rename_all = "camelCase")]
@@ -138,87 +105,11 @@ impl<B: TranscribeBackend> GenerateTranscribeAction<B> {
     }
 }
 
-/// Parse raw JSON dicts from a transcription backend into [`Entity`] values.
-///
-/// Expected dict keys: `text`, `start_time`, `end_time`, `confidence`,
-/// and optionally `speaker_id`.
-pub fn parse_transcribe_entities(raw: &[Value]) -> Result<Vec<Entity>, Error> {
-    let mut entities = Vec::new();
-
-    for item in raw {
-        let obj = item.as_object().ok_or_else(|| {
-            Error::python("Expected JSON object in transcription results".to_string())
-        })?;
-
-        let text = obj
-            .get("text")
-            .and_then(Value::as_str)
-            .ok_or_else(|| Error::python("Missing 'text' in transcription result".to_string()))?;
-
-        let start_time = obj
-            .get("start_time")
-            .and_then(Value::as_f64)
-            .ok_or_else(|| Error::python("Missing 'start_time'".to_string()))?;
-
-        let end_time = obj
-            .get("end_time")
-            .and_then(Value::as_f64)
-            .ok_or_else(|| Error::python("Missing 'end_time'".to_string()))?;
-
-        let confidence = obj
-            .get("confidence")
-            .and_then(Value::as_f64)
-            .unwrap_or(0.0);
-
-        let speaker_id = obj
-            .get("speaker_id")
-            .and_then(Value::as_str)
-            .map(String::from);
-
-        let entity = Entity::new(
-            EntityCategory::Pii,
-            EntityKind::PersonName,
-            text,
-            DetectionMethod::SpeechTranscript,
-            confidence,
-        )
-        .with_location(Location::Audio(AudioLocation {
-            time_span: TimeSpan {
-                start_secs: start_time,
-                end_secs: end_time,
-            },
-            speaker_id,
-            audio_id: None,
-        }));
-
-        entities.push(entity);
-    }
-
-    Ok(entities)
-}
-
-/// [`TranscribeBackend`] implementation for [`PythonBridge`].
-#[async_trait::async_trait]
-impl TranscribeBackend for PythonBridge {
-    async fn transcribe(
-        &self,
-        audio_data: &[u8],
-        mime_type: &str,
-        config: &TranscribeConfig,
-    ) -> Result<Vec<Value>, Error> {
-        let params = TranscribeParams {
-            language: config.language.clone(),
-            enable_speaker_diarization: config.enable_speaker_diarization,
-            confidence_threshold: config.confidence_threshold,
-        };
-        nvisy_python::transcribe::transcribe(self, audio_data, mime_type, &params).await
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
-    use serde_json::json;
+    use nvisy_ontology::entity::DetectionMethod;
+    use serde_json::{json, Value};
 
     #[test]
     fn parse_transcribe_entities_basic() {
diff --git a/crates/nvisy-identify/Cargo.toml b/crates/nvisy-identify/Cargo.toml
index 112facb..bd1733f 100644
--- a/crates/nvisy-identify/Cargo.toml
+++ b/crates/nvisy-identify/Cargo.toml
@@ -33,6 +33,8 @@ nvisy-codec = { workspace = true, features = [] }
 nvisy-pattern = { workspace = true, features = [] }
 nvisy-python = { workspace = true, features = [] }
 nvisy-rig = { workspace = true, features = [] }
+nvisy-ocr = { workspace = true, features = [] }
+nvisy-asr = { workspace = true, features = [] }
 
 # (De)serialization
 serde = { workspace = true, features = ["derive"] }
@@ -46,18 +48,15 @@ async-trait = { workspace = true, features = [] }
 # Primitive datatypes
 uuid = { workspace = true, features = ["v4"] }
 jiff = { workspace = true, features = [] }
+bytes = { workspace = true, features = [] }
 semver = { workspace = true, features = [] }
 
 # Derive macros and error handling
 strum = { workspace = true, features = ["derive"] }
 
-# Text processing
-regex = { workspace = true, features = [] }
-
 # Observability
 tracing = { workspace = true, features = [] }
 
 [dev-dependencies]
-regex = { workspace = true, features = [] }
 serde_json = { workspace = true, features = [] }
 tokio = { workspace = true, features = ["macros", "rt"] }
diff --git a/crates/nvisy-identify/README.md b/crates/nvisy-identify/README.md
index aac2569..b746fd3 100644
--- a/crates/nvisy-identify/README.md
+++ b/crates/nvisy-identify/README.md
@@ -2,9 +2,17 @@
 
 [![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/runtime/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/runtime/actions/workflows/build.yml)
 
-Entity ontology types, detection layers, and pattern/dictionary infrastructure for the Nvisy runtime.
+Detection orchestration, entity ontology, and policy evaluation for the Nvisy runtime.
 
-Defines the core entity model (`Entity`, `DetectionMethod`, locations), detection traits (`DetectionLayer`, `Detect`), and concrete detection layers for text (regex patterns, Aho-Corasick dictionaries, NER), tabular data (column rules), and documents (checksum validation, manual annotations).
+Organized by **detection method** rather than content modality:
+
+- **pattern/** — Deterministic regex and dictionary matching via `PatternEngine`
+- **ner/** — Statistical NLP named-entity recognition (text and image)
+- **llm/** — LLM-based contextual entity detection via `LlmBackend`
+- **vision/** — Computer vision layers (face, object, OCR detection)
+- **audio/** — Audio detection via transcription + NER pipeline
+- **fusion/** — Post-detection entity merging, deduplication, and ensemble scoring
+- **policy/** — Policy evaluation, governance rules, and audit trails
 
 ## Documentation
 
diff --git a/crates/nvisy-identify/src/audio/mod.rs b/crates/nvisy-identify/src/audio/mod.rs
new file mode 100644
index 0000000..45004ed
--- /dev/null
+++ b/crates/nvisy-identify/src/audio/mod.rs
@@ -0,0 +1,5 @@
+//! Audio detection layers.
+
+pub mod transcript;
+
+pub use transcript::TranscriptNerDetection;
diff --git a/crates/nvisy-identify/src/audio/transcript.rs b/crates/nvisy-identify/src/audio/transcript.rs
new file mode 100644
index 0000000..0d99d48
--- /dev/null
+++ b/crates/nvisy-identify/src/audio/transcript.rs
@@ -0,0 +1,198 @@
+//! Composite audio detection: transcription followed by NER.
+//!
+//! Chains a [`TranscribeBackend`] with an [`NerBackend`] to detect
+//! entities in audio content.  The ASR stage produces a transcript
+//! with time-aligned segments, then NER runs on the combined text
+//! and the resulting text-location entities are mapped back to
+//! [`AudioLocation`] time spans.
+
+use bytes::Bytes;
+
+use nvisy_codec::handler::Span;
+use nvisy_core::Error;
+
+use nvisy_asr::{TranscribeBackend, TranscribeConfig, parse_transcribe_entities};
+
+use crate::ner::{NerBackend, NerConfig, parse_ner_entities};
+use crate::{Entity, Location};
+use crate::{ParallelContext, DetectionService};
+
+/// Composite audio detection layer: transcription + NER.
+///
+/// First transcribes each audio span via [`TranscribeBackend`], then
+/// runs [`NerBackend`] on the resulting transcript text.  Entities
+/// from transcription carry [`AudioLocation`] with time spans;
+/// entities from NER carry text locations within the transcript.
+pub struct TranscriptNerDetection<T, N> {
+    transcribe_backend: T,
+    transcribe_config: TranscribeConfig,
+    ner_backend: N,
+    ner_config: NerConfig,
+}
+
+impl<T: TranscribeBackend, N: NerBackend> TranscriptNerDetection<T, N> {
+    /// Create a new composite detection layer.
+    pub fn new(
+        transcribe_backend: T,
+        transcribe_config: TranscribeConfig,
+        ner_backend: N,
+        ner_config: NerConfig,
+    ) -> Self {
+        Self {
+            transcribe_backend,
+            transcribe_config,
+            ner_backend,
+            ner_config,
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl<T: TranscribeBackend, N: NerBackend> DetectionService<(), Bytes>
+    for TranscriptNerDetection<T, N>
+{
+    type Context = ParallelContext;
+
+    async fn detect(
+        &self,
+        spans: Vec<Span<(), Bytes>>,
+    ) -> Result<Vec<Entity>, Error> {
+        let mut entities = Vec::new();
+
+        for span in &spans {
+            let audio_bytes: &[u8] = &span.data;
+
+            // Step 1: Transcribe audio → time-aligned segments.
+            let raw_segments = self
+                .transcribe_backend
+                .transcribe(audio_bytes, "audio/wav", &self.transcribe_config)
+                .await?;
+
+            let transcript_entities = parse_transcribe_entities(&raw_segments)?;
+
+            // Collect transcript text for NER.
+            let transcript_text: String = transcript_entities
+                .iter()
+                .map(|e| e.value.as_str())
+                .collect::<Vec<_>>()
+                .join(" ");
+
+            // Include the raw transcript entities (audio-located).
+            for entity in transcript_entities {
+                entities.push(entity.with_parent(&span.source));
+            }
+
+            // Step 2: Run NER on the combined transcript text.
+            if !transcript_text.is_empty() {
+                let raw_ner = self
+                    .ner_backend
+                    .detect_text(&transcript_text, &self.ner_config)
+                    .await?;
+
+                for mut entity in parse_ner_entities(&raw_ner)? {
+                    // NER entities from transcript get a text location
+                    // within the transcript. For now we keep them as-is;
+                    // a future enhancement could map text offsets back to
+                    // audio time spans using segment boundaries.
+                    if entity.location.is_none() {
+                        entity.location = Some(Location::Text(Default::default()));
+                    }
+                    entities.push(entity.with_parent(&span.source));
+                }
+            }
+        }
+
+        Ok(entities)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use nvisy_ontology::entity::DetectionMethod;
+    use serde_json::{json, Value};
+
+    struct MockTranscribeBackend;
+
+    #[async_trait::async_trait]
+    impl TranscribeBackend for MockTranscribeBackend {
+        async fn transcribe(
+            &self,
+            _audio_data: &[u8],
+            _mime_type: &str,
+            _config: &TranscribeConfig,
+        ) -> Result<Vec<Value>, Error> {
+            Ok(vec![
+                json!({
+                    "text": "My name is John Doe",
+                    "start_time": 0.0,
+                    "end_time": 2.0,
+                    "confidence": 0.95
+                }),
+            ])
+        }
+    }
+
+    struct MockNerBackend;
+
+    #[async_trait::async_trait]
+    impl NerBackend for MockNerBackend {
+        async fn detect_text(
+            &self,
+            text: &str,
+            _config: &NerConfig,
+        ) -> Result<Vec<Value>, Error> {
+            let mut results = Vec::new();
+            if let Some(pos) = text.find("John Doe") {
+                results.push(json!({
+                    "category": "pii",
+                    "entity_type": "person_name",
+                    "value": "John Doe",
+                    "confidence": 0.9,
+                    "start_offset": pos,
+                    "end_offset": pos + 8
+                }));
+            }
+            Ok(results)
+        }
+
+        async fn detect_image(
+            &self,
+            _: &[u8], _: &str, _: &NerConfig,
+        ) -> Result<Vec<Value>, Error> {
+            Ok(Vec::new())
+        }
+    }
+
+    #[tokio::test]
+    async fn transcript_ner_produces_both_entity_types() {
+        let layer = TranscriptNerDetection::new(
+            MockTranscribeBackend,
+            TranscribeConfig {
+                language: "en".into(),
+                enable_speaker_diarization: false,
+                confidence_threshold: 0.5,
+            },
+            MockNerBackend,
+            NerConfig {
+                entity_types: vec![],
+                confidence_threshold: 0.0,
+            },
+        );
+
+        let audio = Bytes::from_static(b"fake-wav-data");
+        let spans = vec![Span::new((), audio)];
+
+        let entities = layer.detect(spans).await.unwrap();
+        // Should have: 1 transcript entity + 1 NER entity
+        assert_eq!(entities.len(), 2);
+
+        // First entity is from transcription (audio location).
+        assert_eq!(entities[0].detection_method, DetectionMethod::SpeechTranscript);
+        assert!(entities[0].location.as_ref().unwrap().as_audio().is_some());
+
+        // Second entity is from NER (text location).
+        assert_eq!(entities[1].detection_method, DetectionMethod::Ner);
+        assert_eq!(entities[1].value, "John Doe");
+    }
+}
diff --git a/crates/nvisy-identify/src/action/dedup.rs b/crates/nvisy-identify/src/fusion/dedup.rs
similarity index 100%
rename from crates/nvisy-identify/src/action/dedup.rs
rename to crates/nvisy-identify/src/fusion/dedup.rs
diff --git a/crates/nvisy-identify/src/fusion/ensemble.rs b/crates/nvisy-identify/src/fusion/ensemble.rs
new file mode 100644
index 0000000..97ba413
--- /dev/null
+++ b/crates/nvisy-identify/src/fusion/ensemble.rs
@@ -0,0 +1,209 @@
+//! Ensemble entity fusion — merges entities from multiple detectors
+//! using configurable confidence-combination strategies.
+
+use std::collections::HashMap;
+
+use crate::{DetectionMethod, Entity, Location};
+
+/// Strategy for combining confidence scores from multiple detectors.
+#[derive(Debug, Clone)]
+pub enum FusionStrategy {
+    /// Take the maximum confidence across all detectors.
+    MaxConfidence,
+    /// Weighted average by detection method.
+    WeightedAverage {
+        weights: HashMap<DetectionMethod, f64>,
+    },
+    /// Noisy-OR: `P = 1 − ∏(1 − pᵢ)` for independent detectors.
+    NoisyOr,
+}
+
+/// Ensemble merge — groups entities by `(kind, value, overlapping location)`
+/// then fuses confidence using the configured [`FusionStrategy`].
+pub struct EnsembleMerge {
+    strategy: FusionStrategy,
+}
+
+impl EnsembleMerge {
+    /// Create a new ensemble merge with the given strategy.
+    pub fn new(strategy: FusionStrategy) -> Self {
+        Self { strategy }
+    }
+
+    /// Group entities by `(kind, value, overlapping location)` then fuse
+    /// confidence according to the strategy.
+    pub fn merge(&self, entities: Vec<Entity>) -> Vec<Entity> {
+        if entities.len() <= 1 {
+            return entities;
+        }
+
+        let mut groups: Vec<Vec<Entity>> = Vec::new();
+
+        for entity in entities {
+            let group = groups.iter_mut().find(|group| {
+                let representative = &group[0];
+                representative.entity_kind == entity.entity_kind
+                    && representative.value == entity.value
+                    && locations_overlap(&representative.location, &entity.location)
+            });
+
+            match group {
+                Some(g) => g.push(entity),
+                None => groups.push(vec![entity]),
+            }
+        }
+
+        groups
+            .into_iter()
+            .map(|group| self.fuse_group(group))
+            .collect()
+    }
+
+    /// Fuse a group of matching entities into a single entity.
+    fn fuse_group(&self, group: Vec<Entity>) -> Entity {
+        debug_assert!(!group.is_empty());
+
+        if group.len() == 1 {
+            return group.into_iter().next().unwrap();
+        }
+
+        let fused_confidence = match &self.strategy {
+            FusionStrategy::MaxConfidence => {
+                group.iter().map(|e| e.confidence).fold(0.0_f64, f64::max)
+            }
+            FusionStrategy::WeightedAverage { weights } => {
+                let mut total_weight = 0.0;
+                let mut weighted_sum = 0.0;
+                for e in &group {
+                    let w = weights.get(&e.detection_method).copied().unwrap_or(1.0);
+                    weighted_sum += e.confidence * w;
+                    total_weight += w;
+                }
+                if total_weight > 0.0 {
+                    weighted_sum / total_weight
+                } else {
+                    0.0
+                }
+            }
+            FusionStrategy::NoisyOr => {
+                // P = 1 − ∏(1 − pᵢ)
+                let product: f64 = group.iter().map(|e| 1.0 - e.confidence).product();
+                1.0 - product
+            }
+        };
+
+        // Use the first entity as the base and update confidence/method.
+        let mut result = group.into_iter().next().unwrap();
+        result.confidence = fused_confidence;
+        result.detection_method = DetectionMethod::Composite;
+        result
+    }
+}
+
+/// Check whether two optional locations overlap.
+fn locations_overlap(a: &Option<Location>, b: &Option<Location>) -> bool {
+    match (a, b) {
+        (None, None) => true,
+        (Some(Location::Text(a_loc)), Some(Location::Text(b_loc))) => a_loc.overlaps(b_loc),
+        _ => false,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::TextLocation;
+    use nvisy_ontology::entity::{EntityCategory, EntityKind};
+
+    fn text_entity(
+        value: &str,
+        method: DetectionMethod,
+        confidence: f64,
+        start: usize,
+        end: usize,
+    ) -> Entity {
+        Entity::new(
+            EntityCategory::Pii,
+            EntityKind::PersonName,
+            value,
+            method,
+            confidence,
+        )
+        .with_location(Location::Text(TextLocation {
+            start_offset: start,
+            end_offset: end,
+            ..Default::default()
+        }))
+    }
+
+    #[test]
+    fn max_confidence_strategy() {
+        let merge = EnsembleMerge::new(FusionStrategy::MaxConfidence);
+        let entities = vec![
+            text_entity("John", DetectionMethod::Regex, 0.7, 0, 4),
+            text_entity("John", DetectionMethod::Ner, 0.85, 0, 4),
+        ];
+        let result = merge.merge(entities);
+        assert_eq!(result.len(), 1);
+        assert!((result[0].confidence - 0.85).abs() < f64::EPSILON);
+        assert_eq!(result[0].detection_method, DetectionMethod::Composite);
+    }
+
+    #[test]
+    fn noisy_or_strategy() {
+        let merge = EnsembleMerge::new(FusionStrategy::NoisyOr);
+        let entities = vec![
+            text_entity("John", DetectionMethod::Regex, 0.7, 0, 4),
+            text_entity("John", DetectionMethod::Ner, 0.8, 0, 4),
+        ];
+        let result = merge.merge(entities);
+        assert_eq!(result.len(), 1);
+        // P = 1 − (1 − 0.7)(1 − 0.8) = 1 − (0.3)(0.2) = 0.94
+        assert!((result[0].confidence - 0.94).abs() < 0.001);
+    }
+
+    #[test]
+    fn weighted_average_strategy() {
+        let mut weights = HashMap::new();
+        weights.insert(DetectionMethod::Regex, 1.0);
+        weights.insert(DetectionMethod::Ner, 2.0);
+
+        let merge = EnsembleMerge::new(FusionStrategy::WeightedAverage { weights });
+        let entities = vec![
+            text_entity("John", DetectionMethod::Regex, 0.6, 0, 4),
+            text_entity("John", DetectionMethod::Ner, 0.9, 0, 4),
+        ];
+        let result = merge.merge(entities);
+        assert_eq!(result.len(), 1);
+        // (0.6 * 1.0 + 0.9 * 2.0) / (1.0 + 2.0) = 2.4 / 3.0 = 0.8
+        assert!((result[0].confidence - 0.8).abs() < 0.001);
+    }
+
+    #[test]
+    fn non_overlapping_not_merged() {
+        let merge = EnsembleMerge::new(FusionStrategy::NoisyOr);
+        let entities = vec![
+            text_entity("John", DetectionMethod::Regex, 0.7, 0, 4),
+            text_entity("John", DetectionMethod::Ner, 0.8, 10, 14),
+        ];
+        let result = merge.merge(entities);
+        assert_eq!(result.len(), 2);
+    }
+
+    #[test]
+    fn single_entity_unchanged() {
+        let merge = EnsembleMerge::new(FusionStrategy::NoisyOr);
+        let entities = vec![text_entity("John", DetectionMethod::Regex, 0.7, 0, 4)];
+        let result = merge.merge(entities);
+        assert_eq!(result.len(), 1);
+        assert!((result[0].confidence - 0.7).abs() < f64::EPSILON);
+        assert_eq!(result[0].detection_method, DetectionMethod::Regex);
+    }
+
+    #[test]
+    fn empty_input() {
+        let merge = EnsembleMerge::new(FusionStrategy::MaxConfidence);
+        let result = merge.merge(Vec::new());
+        assert!(result.is_empty());
+    }
+}
diff --git a/crates/nvisy-identify/src/action/manual.rs b/crates/nvisy-identify/src/fusion/manual.rs
similarity index 100%
rename from crates/nvisy-identify/src/action/manual.rs
rename to crates/nvisy-identify/src/fusion/manual.rs
diff --git a/crates/nvisy-identify/src/action/mod.rs b/crates/nvisy-identify/src/fusion/mod.rs
similarity index 53%
rename from crates/nvisy-identify/src/action/mod.rs
rename to crates/nvisy-identify/src/fusion/mod.rs
index 988fbc2..4bf91a2 100644
--- a/crates/nvisy-identify/src/action/mod.rs
+++ b/crates/nvisy-identify/src/fusion/mod.rs
@@ -1,7 +1,9 @@
-//! Post-detection actions.
+//! Post-detection entity merging, deduplication, and manual annotations.
 
 pub mod dedup;
+pub mod ensemble;
 pub mod manual;
 
 pub use dedup::DeduplicateAction;
+pub use ensemble::{EnsembleMerge, FusionStrategy};
 pub use manual::{DetectManualAction, DetectManualParams, Exclusion, ManualOutput, is_excluded};
diff --git a/crates/nvisy-identify/src/lib.rs b/crates/nvisy-identify/src/lib.rs
index 6b092ca..cde8d5b 100644
--- a/crates/nvisy-identify/src/lib.rs
+++ b/crates/nvisy-identify/src/lib.rs
@@ -4,10 +4,12 @@
 
 mod ontology;
 mod layer;
+mod pattern;
 mod ner;
-mod text;
-mod image;
-mod action;
+mod vision;
+mod llm;
+mod audio;
+mod fusion;
 mod policy;
 
 pub mod prelude;
@@ -22,13 +24,17 @@ pub use layer::*;
 pub use ner::{NerBackend, NerConfig};
 
 // --- Detection layers ---
-pub use text::{PatternDetection, PatternDetectionParams};
-pub use text::{NerDetection, NerDetectionParams};
-pub use image::{ImageNerDetection, FaceBackend, FaceDetection, ObjectBackend, ObjectDetection};
+pub use pattern::{PatternDetection, PatternDetectionParams};
+pub use ner::{NerDetection, NerDetectionParams};
+pub use ner::ImageNerDetection;
+pub use vision::{FaceBackend, FaceDetection, ObjectBackend, ObjectDetection, OcrDetection};
+pub use llm::{LlmDetection, LlmDetectionParams, user_prompt as llm_user_prompt};
+pub use audio::TranscriptNerDetection;
 
 // --- Post-detection actions ---
-pub use action::{DetectManualAction, DetectManualParams, Exclusion, ManualOutput, is_excluded};
-pub use action::DeduplicateAction;
+pub use fusion::{DetectManualAction, DetectManualParams, Exclusion, ManualOutput, is_excluded};
+pub use fusion::DeduplicateAction;
+pub use fusion::{EnsembleMerge, FusionStrategy};
 
 // --- Policy & governance ---
 pub use policy::{
diff --git a/crates/nvisy-identify/src/llm/detection.rs b/crates/nvisy-identify/src/llm/detection.rs
new file mode 100644
index 0000000..000c5b5
--- /dev/null
+++ b/crates/nvisy-identify/src/llm/detection.rs
@@ -0,0 +1,234 @@
+//! LLM contextual detection layer.
+//!
+//! Uses a [`SequentialContext`] so the orchestrator feeds one span at
+//! a time, allowing the layer to accumulate prior text for contextual
+//! understanding across spans.
+
+use serde::Deserialize;
+use tokio::sync::Mutex;
+
+use nvisy_codec::handler::{Span, TxtSpan};
+use nvisy_ontology::entity::EntityKind;
+use nvisy_core::Error;
+use nvisy_rig::{LlmBackend, LlmConfig, parse_llm_entities};
+
+use crate::{Entity, Location, ModelInfo, TextLocation};
+use crate::{SequentialContext, DetectionService};
+
+use super::prompt;
+
+fn default_confidence() -> f64 {
+    0.5
+}
+
+/// Typed parameters for [`LlmDetection`].
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct LlmDetectionParams {
+    /// Entity kinds to detect (empty = all).
+    #[serde(rename = "entityTypes", default)]
+    pub entity_kinds: Vec<EntityKind>,
+    /// Minimum confidence score for returned entities.
+    #[serde(default = "default_confidence")]
+    pub confidence_threshold: f64,
+    /// Optional model info to attach to every LLM-produced entity.
+    #[serde(skip)]
+    pub model_info: Option<ModelInfo>,
+    /// Optional system prompt override.
+    #[serde(default)]
+    pub system_prompt: Option<String>,
+}
+
+/// Accumulated state between sequential span calls.
+struct LlmState {
+    /// Text from previously processed spans (for sliding context).
+    prior_text: String,
+}
+
+/// LLM contextual detection layer — delegates to an [`LlmBackend`].
+///
+/// Uses [`SequentialContext`]: the orchestrator feeds one span at a
+/// time so the layer can carry sliding context between spans.
+pub struct LlmDetection<B> {
+    backend: B,
+    config: LlmConfig,
+    model_info: Option<ModelInfo>,
+    state: Mutex<LlmState>,
+}
+
+impl<B: LlmBackend> LlmDetection<B> {
+    /// Create a new detection layer with the given backend and params.
+    pub fn new(backend: B, params: LlmDetectionParams) -> Self {
+        let system_prompt = params.system_prompt.unwrap_or_else(|| {
+            prompt::system_prompt().to_string()
+        });
+        let config = LlmConfig {
+            entity_types: params.entity_kinds.iter().map(|ek| ek.to_string()).collect(),
+            confidence_threshold: params.confidence_threshold,
+            system_prompt: Some(system_prompt),
+        };
+        Self {
+            backend,
+            config,
+            model_info: params.model_info,
+            state: Mutex::new(LlmState {
+                prior_text: String::new(),
+            }),
+        }
+    }
+
+    /// Clear accumulated state between documents.
+    pub async fn reset(&self) {
+        let mut state = self.state.lock().await;
+        state.prior_text.clear();
+    }
+}
+
+#[async_trait::async_trait]
+impl<B: LlmBackend> DetectionService<TxtSpan, String> for LlmDetection<B> {
+    type Context = SequentialContext;
+
+    async fn detect(
+        &self,
+        spans: Vec<Span<TxtSpan, String>>,
+    ) -> Result<Vec<Entity>, Error> {
+        let mut entities = Vec::new();
+
+        for span in &spans {
+            // Build the full text with prior context prepended.
+            let (full_text, context_len) = {
+                let state = self.state.lock().await;
+                if state.prior_text.is_empty() {
+                    (span.data.clone(), 0)
+                } else {
+                    let sep = "\n";
+                    let context_len = state.prior_text.len() + sep.len();
+                    let full = format!("{}{}{}", state.prior_text, sep, span.data);
+                    (full, context_len)
+                }
+            };
+
+            let raw = self
+                .backend
+                .detect_text(&full_text, &self.config)
+                .await?;
+
+            // Filter entities to the current span and adjust offsets.
+            let span_len = span.data.len();
+            for mut e in parse_llm_entities(&raw)? {
+                if let Some(Location::Text(ref loc)) = e.location {
+                    if loc.end_offset <= context_len {
+                        continue;
+                    }
+                    if loc.start_offset < context_len {
+                        continue;
+                    }
+                    if loc.start_offset - context_len >= span_len {
+                        continue;
+                    }
+                    e.location = Some(Location::Text(TextLocation {
+                        start_offset: loc.start_offset - context_len,
+                        end_offset: loc.end_offset - context_len,
+                        element_id: Some(span.id.0.to_string()),
+                        ..Default::default()
+                    }));
+                } else {
+                    e.location = Some(Location::Text(TextLocation {
+                        element_id: Some(span.id.0.to_string()),
+                        ..Default::default()
+                    }));
+                }
+
+                if let Some(ref model) = self.model_info {
+                    e.model = Some(model.clone());
+                }
+
+                entities.push(e.with_parent(&span.source));
+            }
+
+            // Accumulate text for sliding context.
+            let mut state = self.state.lock().await;
+            if !state.prior_text.is_empty() {
+                state.prior_text.push('\n');
+            }
+            state.prior_text.push_str(&span.data);
+        }
+
+        Ok(entities)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::{json, Value};
+
+    struct MockLlmBackend;
+
+    #[async_trait::async_trait]
+    impl LlmBackend for MockLlmBackend {
+        async fn detect_text(
+            &self,
+            text: &str,
+            _config: &LlmConfig,
+        ) -> Result<Vec<Value>, Error> {
+            let mut results = Vec::new();
+            if let Some(pos) = text.find("SECRET") {
+                results.push(json!({
+                    "category": "credentials",
+                    "entity_type": "api_key",
+                    "value": "SECRET",
+                    "confidence": 0.92,
+                    "start_offset": pos,
+                    "end_offset": pos + 6
+                }));
+            }
+            Ok(results)
+        }
+    }
+
+    #[tokio::test]
+    async fn llm_detection_basic() {
+        let params = LlmDetectionParams {
+            entity_kinds: vec![],
+            confidence_threshold: 0.0,
+            model_info: None,
+            system_prompt: None,
+        };
+        let llm = LlmDetection::new(MockLlmBackend, params);
+
+        let spans = vec![Span::new(TxtSpan(0), "contains SECRET key".into())];
+        let entities = llm.detect(spans).await.unwrap();
+        assert_eq!(entities.len(), 1);
+        assert_eq!(entities[0].value, "SECRET");
+
+        let loc = entities[0].location.as_ref().unwrap().as_text().unwrap();
+        assert_eq!(loc.start_offset, 9);
+        assert_eq!(loc.end_offset, 15);
+    }
+
+    #[tokio::test]
+    async fn llm_detection_with_context() {
+        let params = LlmDetectionParams {
+            entity_kinds: vec![],
+            confidence_threshold: 0.0,
+            model_info: None,
+            system_prompt: None,
+        };
+        let llm = LlmDetection::new(MockLlmBackend, params);
+
+        // First span: no entity.
+        let span1 = vec![Span::new(TxtSpan(0), "some context".into())];
+        let result1 = llm.detect(span1).await.unwrap();
+        assert!(result1.is_empty());
+
+        // Second span: entity in current span.
+        let span2 = vec![Span::new(TxtSpan(1), "has SECRET here".into())];
+        let result2 = llm.detect(span2).await.unwrap();
+        assert_eq!(result2.len(), 1);
+
+        let loc = result2[0].location.as_ref().unwrap().as_text().unwrap();
+        assert_eq!(loc.start_offset, 4);
+        assert_eq!(loc.end_offset, 10);
+    }
+}
diff --git a/crates/nvisy-identify/src/llm/mod.rs b/crates/nvisy-identify/src/llm/mod.rs
new file mode 100644
index 0000000..dbbc0eb
--- /dev/null
+++ b/crates/nvisy-identify/src/llm/mod.rs
@@ -0,0 +1,7 @@
+//! LLM-based contextual entity detection.
+
+pub mod detection;
+pub mod prompt;
+
+pub use detection::{LlmDetection, LlmDetectionParams};
+pub use prompt::user_prompt;
diff --git a/crates/nvisy-identify/src/llm/prompt.rs b/crates/nvisy-identify/src/llm/prompt.rs
new file mode 100644
index 0000000..0502e0b
--- /dev/null
+++ b/crates/nvisy-identify/src/llm/prompt.rs
@@ -0,0 +1,26 @@
+//! System and user prompt templates for LLM-based PII/sensitive-data detection.
+
+/// Default system prompt for LLM-based entity detection.
+///
+/// Instructs the model to identify PII and sensitive data, returning
+/// structured JSON results.
+pub fn system_prompt() -> &'static str {
+    r#"You are a precise PII and sensitive data detection system. Your task is to identify personally identifiable information (PII), protected health information (PHI), financial data, and credentials in the provided text.
+
+For each entity found, return a JSON object with these fields:
+- "category": one of "pii", "phi", "financial", "credentials", or a custom category
+- "entity_type": the specific entity type (e.g., "person_name", "email_address", "ssn", "credit_card_number")
+- "value": the exact text matched
+- "confidence": your confidence score from 0.0 to 1.0
+- "start_offset": character offset where the entity starts in the input text
+- "end_offset": character offset where the entity ends in the input text
+
+Return a JSON array of objects. If no entities are found, return an empty array [].
+
+Be thorough but precise — prioritize precision over recall. Consider context when assessing whether text constitutes sensitive data."#
+}
+
+/// Build a user prompt from the input text.
+pub fn user_prompt(text: &str) -> String {
+    format!("Detect all PII and sensitive data in the following text:\n\n{text}")
+}
diff --git a/crates/nvisy-identify/src/ner/backend.rs b/crates/nvisy-identify/src/ner/backend.rs
index 4dbc4bf..18b0bba 100644
--- a/crates/nvisy-identify/src/ner/backend.rs
+++ b/crates/nvisy-identify/src/ner/backend.rs
@@ -8,7 +8,7 @@ use nvisy_core::Error;
 ///
 /// Contains only the model-agnostic parameters that every backend needs.
 /// Provider-specific fields (API key, model name, etc.) belong in the
-/// action's [`NerDetectionParams`](super::super::text::ner::NerDetectionParams)
+/// action's [`NerDetectionParams`](super::text::NerDetectionParams)
 /// or the provider's credentials.
 #[derive(Debug, Clone)]
 pub struct NerConfig {
diff --git a/crates/nvisy-identify/src/image/ner.rs b/crates/nvisy-identify/src/ner/image.rs
similarity index 98%
rename from crates/nvisy-identify/src/image/ner.rs
rename to crates/nvisy-identify/src/ner/image.rs
index 54526af..afeae94 100644
--- a/crates/nvisy-identify/src/image/ner.rs
+++ b/crates/nvisy-identify/src/ner/image.rs
@@ -8,7 +8,7 @@ use nvisy_core::Error;
 
 use crate::Entity;
 use crate::{ParallelContext, DetectionService};
-use crate::ner::{NerBackend, NerConfig, parse_image_ner_entity};
+use super::{NerBackend, NerConfig, parse_image_ner_entity};
 
 /// NER detection layer for images.
 ///
diff --git a/crates/nvisy-identify/src/ner/mod.rs b/crates/nvisy-identify/src/ner/mod.rs
index 4a82676..4013890 100644
--- a/crates/nvisy-identify/src/ner/mod.rs
+++ b/crates/nvisy-identify/src/ner/mod.rs
@@ -1,8 +1,13 @@
-//! Cross-modal NER backend trait, configuration, and result parsing.
+//! Cross-modal NER backend trait, configuration, detection layers, and
+//! result parsing.
 
 mod backend;
 mod bridge;
 mod parse;
+pub mod text;
+pub mod image;
 
 pub use backend::{NerBackend, NerConfig};
 pub use parse::{parse_image_ner_entity, parse_ner_entities};
+pub use text::{NerDetection, NerDetectionParams};
+pub use image::ImageNerDetection;
diff --git a/crates/nvisy-identify/src/text/ner.rs b/crates/nvisy-identify/src/ner/text.rs
similarity index 98%
rename from crates/nvisy-identify/src/text/ner.rs
rename to crates/nvisy-identify/src/ner/text.rs
index 7bd3b4b..312274a 100644
--- a/crates/nvisy-identify/src/text/ner.rs
+++ b/crates/nvisy-identify/src/ner/text.rs
@@ -1,4 +1,4 @@
-//! AI-powered named-entity recognition (NER) detection layer.
+//! AI-powered named-entity recognition (NER) detection layer for text.
 //!
 //! Uses a [`SequentialContext`] so the orchestrator feeds one span at
 //! a time, allowing the layer to accumulate prior text/entities
@@ -11,7 +11,7 @@ use nvisy_codec::handler::{Span, TxtSpan};
 use nvisy_ontology::entity::EntityKind;
 use nvisy_core::Error;
 
-use crate::ner::{NerBackend, NerConfig, parse_ner_entities};
+use super::{NerBackend, NerConfig, parse_ner_entities};
 use crate::{Entity, Location, ModelInfo, TextLocation};
 use crate::{SequentialContext, DetectionService};
 
diff --git a/crates/nvisy-identify/src/text/pattern.rs b/crates/nvisy-identify/src/pattern/mod.rs
similarity index 100%
rename from crates/nvisy-identify/src/text/pattern.rs
rename to crates/nvisy-identify/src/pattern/mod.rs
diff --git a/crates/nvisy-identify/src/text/mod.rs b/crates/nvisy-identify/src/text/mod.rs
deleted file mode 100644
index b55134b..0000000
--- a/crates/nvisy-identify/src/text/mod.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-//! Text detection layers.
-
-pub mod pattern;
-pub mod ner;
-
-pub use pattern::{PatternDetection, PatternDetectionParams};
-pub use ner::{NerDetection, NerDetectionParams};
diff --git a/crates/nvisy-identify/src/image/face.rs b/crates/nvisy-identify/src/vision/face.rs
similarity index 100%
rename from crates/nvisy-identify/src/image/face.rs
rename to crates/nvisy-identify/src/vision/face.rs
diff --git a/crates/nvisy-identify/src/image/mod.rs b/crates/nvisy-identify/src/vision/mod.rs
similarity index 61%
rename from crates/nvisy-identify/src/image/mod.rs
rename to crates/nvisy-identify/src/vision/mod.rs
index ca564d5..af91b5d 100644
--- a/crates/nvisy-identify/src/image/mod.rs
+++ b/crates/nvisy-identify/src/vision/mod.rs
@@ -1,9 +1,9 @@
-//! Image detection layers.
+//! Computer vision detection layers.
 
-pub mod ner;
 pub mod face;
 pub mod object;
+pub mod ocr;
 
-pub use ner::ImageNerDetection;
 pub use face::{FaceBackend, FaceDetection};
 pub use object::{ObjectBackend, ObjectDetection};
+pub use ocr::OcrDetection;
diff --git a/crates/nvisy-identify/src/image/object.rs b/crates/nvisy-identify/src/vision/object.rs
similarity index 100%
rename from crates/nvisy-identify/src/image/object.rs
rename to crates/nvisy-identify/src/vision/object.rs
diff --git a/crates/nvisy-identify/src/vision/ocr.rs b/crates/nvisy-identify/src/vision/ocr.rs
new file mode 100644
index 0000000..ce3850f
--- /dev/null
+++ b/crates/nvisy-identify/src/vision/ocr.rs
@@ -0,0 +1,105 @@
+//! OCR detection layer for images.
+//!
+//! Wraps an [`OcrBackend`] as a [`DetectionService`] that produces entities
+//! with [`ImageLocation`] bounding boxes from OCR text extraction.
+
+use nvisy_codec::handler::{ImageData, Span};
+use nvisy_core::Error;
+use nvisy_ocr::{OcrBackend, OcrConfig, parse_ocr_entities};
+
+use crate::Entity;
+use crate::{ParallelContext, DetectionService};
+
+/// OCR detection layer — delegates to an [`OcrBackend`] at runtime.
+///
+/// Encodes each image span to PNG and runs OCR to produce text entities
+/// with bounding-box locations.
+pub struct OcrDetection<B> {
+    backend: B,
+    config: OcrConfig,
+}
+
+impl<B: OcrBackend> OcrDetection<B> {
+    /// Create a new OCR detection layer with the given backend and config.
+    pub fn new(backend: B, config: OcrConfig) -> Self {
+        Self { backend, config }
+    }
+}
+
+#[async_trait::async_trait]
+impl<B: OcrBackend> DetectionService<(), ImageData> for OcrDetection<B> {
+    type Context = ParallelContext;
+
+    async fn detect(
+        &self,
+        spans: Vec<Span<(), ImageData>>,
+    ) -> Result<Vec<Entity>, Error> {
+        let mut entities = Vec::new();
+
+        for span in &spans {
+            let png_bytes = span.data.encode_png()?;
+
+            let raw = self
+                .backend
+                .detect_ocr(&png_bytes, "image/png", &self.config)
+                .await?;
+
+            for entity in parse_ocr_entities(&raw)? {
+                entities.push(entity.with_parent(&span.source));
+            }
+        }
+
+        Ok(entities)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use nvisy_ontology::entity::{DetectionMethod, EntityKind};
+    use serde_json::{json, Value};
+
+    struct MockOcrBackend;
+
+    #[async_trait::async_trait]
+    impl OcrBackend for MockOcrBackend {
+        async fn detect_ocr(
+            &self,
+            _image_data: &[u8],
+            _mime_type: &str,
+            _config: &OcrConfig,
+        ) -> Result<Vec<Value>, Error> {
+            Ok(vec![json!({
+                "text": "John Doe",
+                "x": 10.0,
+                "y": 20.0,
+                "width": 100.0,
+                "height": 30.0,
+                "confidence": 0.88
+            })])
+        }
+    }
+
+    #[tokio::test]
+    async fn detect_ocr_produces_image_location() {
+        let config = OcrConfig {
+            language: "eng".into(),
+            engine: "tesseract".into(),
+            confidence_threshold: 0.5,
+        };
+        let layer = OcrDetection::new(MockOcrBackend, config);
+
+        let img = ImageData::new_rgb(200, 100);
+        let spans = vec![Span::new((), img)];
+
+        let entities = layer.detect(spans).await.unwrap();
+        assert_eq!(entities.len(), 1);
+        assert_eq!(entities[0].value, "John Doe");
+        assert_eq!(entities[0].entity_kind, EntityKind::Handwriting);
+        assert_eq!(entities[0].detection_method, DetectionMethod::Ocr);
+
+        let loc = entities[0].location.as_ref().unwrap().as_image().unwrap();
+        assert!((loc.bounding_box.x - 10.0).abs() < f64::EPSILON);
+        assert!((loc.bounding_box.width - 100.0).abs() < f64::EPSILON);
+    }
+}
diff --git a/crates/nvisy-object/Cargo.toml b/crates/nvisy-object/Cargo.toml
deleted file mode 100644
index c642375..0000000
--- a/crates/nvisy-object/Cargo.toml
+++ /dev/null
@@ -1,46 +0,0 @@
-# https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[package]
-name = "nvisy-object"
-description = "Object store providers and streams (S3, Azure, GCS) for Nvisy"
-keywords = ["nvisy", "object-store", "s3", "storage"]
-categories = ["filesystem"]
-
-version = { workspace = true }
-rust-version = { workspace = true }
-edition = { workspace = true }
-license = { workspace = true }
-publish = { workspace = true }
-
-authors = { workspace = true }
-repository = { workspace = true }
-homepage = { workspace = true }
-documentation = { workspace = true }
-
-[package.metadata.docs.rs]
-all-features = true
-rustdoc-args = ["--cfg", "docsrs"]
-
-[dependencies]
-# Internal crates
-nvisy-core = { workspace = true, features = [] }
-
-# (De)serialization
-serde = { workspace = true, features = ["derive"] }
-
-# Async runtime
-tokio = { workspace = true, features = ["sync"] }
-async-trait = { workspace = true, features = [] }
-futures = { workspace = true, features = [] }
-
-# Primitive datatypes
-bytes = { workspace = true, features = [] }
-
-# Cloud object storage (S3, Azure Blob, GCS)
-object_store = { workspace = true, features = ["aws", "azure", "gcp"] }
-
-# Observability
-tracing = { workspace = true, features = [] }
-
-[dev-dependencies]
-tokio = { workspace = true, features = ["macros", "rt"] }
diff --git a/crates/nvisy-object/src/client/get_output.rs b/crates/nvisy-object/src/client/get_output.rs
deleted file mode 100644
index 6546f2a..0000000
--- a/crates/nvisy-object/src/client/get_output.rs
+++ /dev/null
@@ -1,15 +0,0 @@
-//! Result type for [`ObjectStoreClient::get`](super::ObjectStoreClient::get).
-
-use bytes::Bytes;
-use object_store::ObjectMeta;
-
-/// Result of a successful [`ObjectStoreClient::get`](super::ObjectStoreClient::get) call.
-#[derive(Debug)]
-pub struct GetOutput {
-    /// Raw bytes of the retrieved object.
-    pub data: Bytes,
-    /// MIME content-type, if the backend provides one.
-    pub content_type: Option<String>,
-    /// Object metadata (size, etag, last_modified, location).
-    pub meta: ObjectMeta,
-}
diff --git a/crates/nvisy-object/src/client/mod.rs b/crates/nvisy-object/src/client/mod.rs
deleted file mode 100644
index 7419197..0000000
--- a/crates/nvisy-object/src/client/mod.rs
+++ /dev/null
@@ -1,340 +0,0 @@
-//! Unified object-store client backed by [`object_store::ObjectStore`].
-//!
-//! [`ObjectStoreClient`] is a thin, cloneable wrapper around
-//! `Arc<dyn ObjectStore>` that provides convenience methods for the most
-//! common operations. Every public method is instrumented with
-//! [`tracing`] for observability.
-
-use std::sync::Arc;
-
-use bytes::Bytes;
-use futures::stream::BoxStream;
-use futures::TryStreamExt;
-use object_store::path::Path;
-use object_store::{ObjectMeta, ObjectStore, ObjectStoreExt, PutMode, PutOptions, PutPayload};
-
-use nvisy_core::Error;
-
-mod get_output;
-mod put_output;
-
-pub use get_output::GetOutput;
-pub use put_output::PutOutput;
-
-/// Cloneable handle to any [`ObjectStore`] backend (S3, Azure, GCS, ...).
-///
-/// All methods accept human-readable string keys and convert them to
-/// [`object_store::path::Path`] internally.
-#[derive(Clone, Debug)]
-pub struct ObjectStoreClient(pub Arc<dyn ObjectStore>);
-
-impl ObjectStoreClient {
-    /// Wrap a concrete [`ObjectStore`] implementation.
-    pub fn new(store: impl ObjectStore) -> Self {
-        Self(Arc::new(store))
-    }
-
-    /// Verify that the backing store is reachable.
-    ///
-    /// Issues a HEAD for a probe key — a not-found response is treated as
-    /// success (the bucket/container exists), any other error is propagated.
-    #[tracing::instrument(name = "object.verify", skip(self))]
-    pub async fn verify_reachable(&self) -> Result<(), Error> {
-        let path = Path::from("_nvisy_verify_probe");
-        match self.0.head(&path).await {
-            Ok(_) => Ok(()),
-            Err(object_store::Error::NotFound { .. }) => Ok(()),
-            Err(e) => Err(from_object_store(e)),
-        }
-    }
-
-    /// List object keys under `prefix`.
-    ///
-    /// Returns all matching keys in a single `Vec`. For lazy iteration,
-    /// use [`list_stream`](Self::list_stream) instead.
-    #[tracing::instrument(name = "object.list", skip(self), fields(prefix))]
-    pub async fn list(
-        &self,
-        prefix: &str,
-    ) -> Result<Vec<ObjectMeta>, Error> {
-        let prefix = if prefix.is_empty() {
-            None
-        } else {
-            Some(Path::from(prefix))
-        };
-        self.0
-            .list(prefix.as_ref())
-            .try_collect()
-            .await
-            .map_err(from_object_store)
-    }
-
-    /// Lazily stream object metadata under `prefix`.
-    #[tracing::instrument(name = "object.list_stream", skip(self), fields(prefix))]
-    pub fn list_stream(
-        &self,
-        prefix: &str,
-    ) -> BoxStream<'_, Result<ObjectMeta, Error>> {
-        let prefix = if prefix.is_empty() {
-            None
-        } else {
-            Some(Path::from(prefix))
-        };
-        Box::pin(self.0.list(prefix.as_ref()).map_err(from_object_store))
-    }
-
-    /// Retrieve the raw bytes, content-type, and metadata stored at `key`.
-    #[tracing::instrument(name = "object.get", skip(self), fields(key))]
-    pub async fn get(&self, key: &str) -> Result<GetOutput, Error> {
-        let path = Path::from(key);
-        let result = self.0.get(&path).await.map_err(from_object_store)?;
-        let meta = result.meta.clone();
-        let content_type = result
-            .attributes
-            .get(&object_store::Attribute::ContentType)
-            .map(|v| v.to_string());
-        let data = result.bytes().await.map_err(from_object_store)?;
-        Ok(GetOutput {
-            data,
-            content_type,
-            meta,
-        })
-    }
-
-    /// Upload `data` to `key`, optionally setting the content-type.
-    pub async fn put(
-        &self,
-        key: &str,
-        data: Bytes,
-        content_type: Option<&str>,
-    ) -> Result<PutOutput, Error> {
-        self.put_opts(key, data, PutMode::Overwrite, content_type).await
-    }
-
-    /// Upload `data` to `key` with the specified [`PutMode`].
-    #[tracing::instrument(name = "object.put_opts", skip(self, data), fields(key, size = data.len()))]
-    pub async fn put_opts(
-        &self,
-        key: &str,
-        data: Bytes,
-        mode: PutMode,
-        content_type: Option<&str>,
-    ) -> Result<PutOutput, Error> {
-        let path = Path::from(key);
-        let payload = PutPayload::from(data);
-        let mut opts = PutOptions {
-            mode,
-            ..Default::default()
-        };
-        if let Some(ct) = content_type {
-            opts.attributes.insert(
-                object_store::Attribute::ContentType,
-                ct.to_string().into(),
-            );
-        }
-        let result = self
-            .0
-            .put_opts(&path, payload, opts)
-            .await
-            .map_err(from_object_store)?;
-        Ok(result.into())
-    }
-
-    /// Get object metadata without downloading the body.
-    #[tracing::instrument(name = "object.head", skip(self), fields(key))]
-    pub async fn head(&self, key: &str) -> Result<ObjectMeta, Error> {
-        let path = Path::from(key);
-        self.0.head(&path).await.map_err(from_object_store)
-    }
-
-    /// Delete the object at `key`.
-    #[tracing::instrument(name = "object.delete", skip(self), fields(key))]
-    pub async fn delete(&self, key: &str) -> Result<(), Error> {
-        let path = Path::from(key);
-        self.0.delete(&path).await.map_err(from_object_store)
-    }
-
-    /// Copy an object from `src` to `dst` within the same store.
-    #[tracing::instrument(name = "object.copy", skip(self), fields(src, dst))]
-    pub async fn copy(&self, src: &str, dst: &str) -> Result<(), Error> {
-        let from = Path::from(src);
-        let to = Path::from(dst);
-        self.0.copy(&from, &to).await.map_err(from_object_store)
-    }
-}
-
-/// Convert an [`object_store::Error`] into a [`nvisy_core::Error`].
-fn from_object_store(err: object_store::Error) -> Error {
-    let retryable = !matches!(
-        err,
-        object_store::Error::NotFound { .. }
-            | object_store::Error::PermissionDenied { .. }
-            | object_store::Error::Unauthenticated { .. }
-            | object_store::Error::AlreadyExists { .. }
-            | object_store::Error::Precondition { .. }
-    );
-    Error::runtime(err.to_string(), "object-store", retryable)
-        .with_source(err)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use object_store::memory::InMemory;
-
-    fn test_client() -> ObjectStoreClient {
-        ObjectStoreClient::new(InMemory::new())
-    }
-
-    #[tokio::test]
-    async fn put_and_get() {
-        let client = test_client();
-        let data = Bytes::from("hello world");
-        client
-            .put("test.txt", data.clone(), Some("text/plain"))
-            .await
-            .unwrap();
-
-        let result = client.get("test.txt").await.unwrap();
-        assert_eq!(result.data, data);
-        assert_eq!(result.content_type.as_deref(), Some("text/plain"));
-    }
-
-    #[tokio::test]
-    async fn get_returns_meta() {
-        let client = test_client();
-        let data = Bytes::from("abc");
-        client.put("meta.bin", data, None).await.unwrap();
-
-        let result = client.get("meta.bin").await.unwrap();
-        assert_eq!(result.meta.size as usize, 3);
-        assert_eq!(result.meta.location, Path::from("meta.bin"));
-    }
-
-    #[tokio::test]
-    async fn put_returns_result() {
-        let client = test_client();
-        let result = client
-            .put("etag.bin", Bytes::from("x"), None)
-            .await
-            .unwrap();
-        assert!(result.e_tag.is_some());
-    }
-
-    #[tokio::test]
-    async fn head() {
-        let client = test_client();
-        client
-            .put("head.bin", Bytes::from("data"), None)
-            .await
-            .unwrap();
-
-        let meta = client.head("head.bin").await.unwrap();
-        assert_eq!(meta.size, 4);
-        assert_eq!(meta.location, Path::from("head.bin"));
-    }
-
-    #[tokio::test]
-    async fn head_not_found() {
-        let client = test_client();
-        let err = client.head("missing").await.unwrap_err();
-        assert!(!err.is_retryable());
-    }
-
-    #[tokio::test]
-    async fn delete() {
-        let client = test_client();
-        client
-            .put("del.bin", Bytes::from("x"), None)
-            .await
-            .unwrap();
-        client.delete("del.bin").await.unwrap();
-
-        assert!(client.get("del.bin").await.is_err());
-    }
-
-    #[tokio::test]
-    async fn copy() {
-        let client = test_client();
-        let data = Bytes::from("copy me");
-        client.put("src.bin", data.clone(), None).await.unwrap();
-        client.copy("src.bin", "dst.bin").await.unwrap();
-
-        let result = client.get("dst.bin").await.unwrap();
-        assert_eq!(result.data, data);
-    }
-
-    #[tokio::test]
-    async fn list() {
-        let client = test_client();
-        for i in 0..3 {
-            client
-                .put(
-                    &format!("dir/file{i}.txt"),
-                    Bytes::from(format!("{i}")),
-                    None,
-                )
-                .await
-                .unwrap();
-        }
-
-        let items = client.list("dir/").await.unwrap();
-        assert_eq!(items.len(), 3);
-    }
-
-    #[tokio::test]
-    async fn list_stream() {
-        use futures::StreamExt;
-        let client = test_client();
-        for i in 0..3 {
-            client
-                .put(
-                    &format!("stream/f{i}.bin"),
-                    Bytes::from(format!("{i}")),
-                    None,
-                )
-                .await
-                .unwrap();
-        }
-
-        let items: Vec<_> = client
-            .list_stream("stream/")
-            .collect::<Vec<_>>()
-            .await
-            .into_iter()
-            .collect::<Result<Vec<_>, _>>()
-            .unwrap();
-        assert_eq!(items.len(), 3);
-    }
-
-    #[tokio::test]
-    async fn put_create_only() {
-        let client = test_client();
-        client
-            .put_opts(
-                "unique.bin",
-                Bytes::from("first"),
-                PutMode::Create,
-                None,
-            )
-            .await
-            .unwrap();
-
-        let err = client
-            .put_opts(
-                "unique.bin",
-                Bytes::from("second"),
-                PutMode::Create,
-                None,
-            )
-            .await
-            .unwrap_err();
-        assert!(!err.is_retryable());
-    }
-
-    #[tokio::test]
-    async fn verify_reachable() {
-        let client = test_client();
-        client.verify_reachable().await.unwrap();
-    }
-}
diff --git a/crates/nvisy-object/src/client/put_output.rs b/crates/nvisy-object/src/client/put_output.rs
deleted file mode 100644
index 2550eee..0000000
--- a/crates/nvisy-object/src/client/put_output.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-//! Result type for [`ObjectStoreClient::put`](super::ObjectStoreClient::put) and
-//! [`ObjectStoreClient::put_opts`](super::ObjectStoreClient::put_opts).
-
-/// Result of a successful put operation.
-#[derive(Debug)]
-pub struct PutOutput {
-    /// Unique identifier for the newly created object, if the backend provides one.
-    pub e_tag: Option<String>,
-    /// A version indicator for the newly created object, if the backend provides one.
-    pub version: Option<String>,
-}
-
-impl From<object_store::PutResult> for PutOutput {
-    fn from(r: object_store::PutResult) -> Self {
-        Self {
-            e_tag: r.e_tag,
-            version: r.version,
-        }
-    }
-}
diff --git a/crates/nvisy-object/src/lib.rs b/crates/nvisy-object/src/lib.rs
deleted file mode 100644
index fb0a72c..0000000
--- a/crates/nvisy-object/src/lib.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-#![forbid(unsafe_code)]
-#![cfg_attr(docsrs, feature(doc_cfg))]
-#![doc = include_str!("../README.md")]
-
-pub mod client;
-/// Provider trait and object storage provider factories.
-pub mod providers;
-/// Streaming traits and object store adapters.
-pub mod streams;
-
-#[doc(hidden)]
-pub mod prelude;
diff --git a/crates/nvisy-object/src/prelude.rs b/crates/nvisy-object/src/prelude.rs
deleted file mode 100644
index f2936a3..0000000
--- a/crates/nvisy-object/src/prelude.rs
+++ /dev/null
@@ -1,8 +0,0 @@
-//! Convenience re-exports.
-
-pub use crate::providers::Provider;
-pub use crate::streams::{StreamSource, StreamTarget};
-
-pub use crate::client::{GetOutput, ObjectStoreClient, PutOutput};
-pub use crate::providers::{AzureProvider, GcsProvider, S3Provider};
-pub use crate::streams::{ObjectReadStream, ObjectWriteStream};
diff --git a/crates/nvisy-object/src/providers/azure.rs b/crates/nvisy-object/src/providers/azure.rs
deleted file mode 100644
index dfe4a89..0000000
--- a/crates/nvisy-object/src/providers/azure.rs
+++ /dev/null
@@ -1,76 +0,0 @@
-//! Azure Blob Storage provider using [`object_store::azure::MicrosoftAzureBuilder`].
-
-use object_store::azure::MicrosoftAzureBuilder;
-use serde::Deserialize;
-
-use nvisy_core::Error;
-use super::Provider;
-
-use crate::client::ObjectStoreClient;
-
-/// Typed credentials for Azure Blob Storage.
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-pub struct AzureCredentials {
-    /// Azure storage container name.
-    pub container: String,
-    /// Azure storage account name.
-    pub account_name: String,
-    /// Storage account access key.
-    #[serde(default)]
-    pub access_key: Option<String>,
-    /// Shared Access Signature token.
-    #[serde(default)]
-    pub sas_token: Option<String>,
-    /// Custom endpoint URL (for Azure Stack or Azurite).
-    #[serde(default)]
-    pub endpoint: Option<String>,
-}
-
-/// Factory that creates [`ObjectStoreClient`] instances backed by Azure Blob Storage.
-pub struct AzureProvider;
-
-#[async_trait::async_trait]
-impl Provider for AzureProvider {
-    type Credentials = AzureCredentials;
-    type Client = ObjectStoreClient;
-
-    const ID: &str = "azure";
-
-    async fn verify(creds: &Self::Credentials) -> Result<(), Error> {
-        let client = Self::connect(creds).await?;
-        client.verify_reachable().await
-    }
-
-    async fn connect(creds: &Self::Credentials) -> Result<Self::Client, Error> {
-        let mut builder = MicrosoftAzureBuilder::new()
-            .with_container_name(&creds.container)
-            .with_account(&creds.account_name);
-
-        if let Some(key) = &creds.access_key {
-            builder = builder.with_access_key(key);
-        }
-
-        if let Some(sas) = &creds.sas_token {
-            let pairs: Vec<(String, String)> = sas
-                .trim_start_matches('?')
-                .split('&')
-                .filter_map(|pair| {
-                    let mut parts = pair.splitn(2, '=');
-                    Some((parts.next()?.to_string(), parts.next().unwrap_or("").to_string()))
-                })
-                .collect();
-            builder = builder.with_sas_authorization(pairs);
-        }
-
-        if let Some(endpoint) = &creds.endpoint {
-            builder = builder.with_endpoint(endpoint.clone());
-        }
-
-        let store = builder
-            .build()
-            .map_err(|e| Error::connection(e.to_string(), "azure", true))?;
-
-        Ok(ObjectStoreClient::new(store))
-    }
-}
diff --git a/crates/nvisy-object/src/providers/gcs.rs b/crates/nvisy-object/src/providers/gcs.rs
deleted file mode 100644
index 8002931..0000000
--- a/crates/nvisy-object/src/providers/gcs.rs
+++ /dev/null
@@ -1,58 +0,0 @@
-//! Google Cloud Storage provider using [`object_store::gcp::GoogleCloudStorageBuilder`].
-
-use object_store::gcp::GoogleCloudStorageBuilder;
-use serde::Deserialize;
-
-use nvisy_core::Error;
-use super::Provider;
-
-use crate::client::ObjectStoreClient;
-
-/// Typed credentials for Google Cloud Storage.
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-pub struct GcsCredentials {
-    /// GCS bucket name.
-    pub bucket: String,
-    /// Path to a JSON service account key file.
-    #[serde(default)]
-    pub service_account_key: Option<String>,
-    /// Custom endpoint URL (for testing with a fake GCS server).
-    #[serde(default)]
-    pub endpoint: Option<String>,
-}
-
-/// Factory that creates [`ObjectStoreClient`] instances backed by Google Cloud Storage.
-pub struct GcsProvider;
-
-#[async_trait::async_trait]
-impl Provider for GcsProvider {
-    type Credentials = GcsCredentials;
-    type Client = ObjectStoreClient;
-
-    const ID: &str = "gcs";
-
-    async fn verify(creds: &Self::Credentials) -> Result<(), Error> {
-        let client = Self::connect(creds).await?;
-        client.verify_reachable().await
-    }
-
-    async fn connect(creds: &Self::Credentials) -> Result<Self::Client, Error> {
-        let mut builder =
-            GoogleCloudStorageBuilder::new().with_bucket_name(&creds.bucket);
-
-        if let Some(key_path) = &creds.service_account_key {
-            builder = builder.with_service_account_key(key_path);
-        }
-
-        if let Some(endpoint) = &creds.endpoint {
-            builder = builder.with_url(endpoint);
-        }
-
-        let store = builder
-            .build()
-            .map_err(|e| Error::connection(e.to_string(), "gcs", true))?;
-
-        Ok(ObjectStoreClient::new(store))
-    }
-}
diff --git a/crates/nvisy-object/src/providers/mod.rs b/crates/nvisy-object/src/providers/mod.rs
deleted file mode 100644
index fe7d0e6..0000000
--- a/crates/nvisy-object/src/providers/mod.rs
+++ /dev/null
@@ -1,11 +0,0 @@
-//! Provider trait and object storage provider factories.
-
-mod provider;
-mod azure;
-mod gcs;
-mod s3;
-
-pub use provider::Provider;
-pub use azure::{AzureCredentials, AzureProvider};
-pub use gcs::{GcsCredentials, GcsProvider};
-pub use s3::{S3Credentials, S3Provider};
diff --git a/crates/nvisy-object/src/providers/provider.rs b/crates/nvisy-object/src/providers/provider.rs
deleted file mode 100644
index 3ed9441..0000000
--- a/crates/nvisy-object/src/providers/provider.rs
+++ /dev/null
@@ -1,38 +0,0 @@
-//! Provider trait for creating authenticated client connections.
-
-use std::future::Future;
-use std::pin::Pin;
-
-use serde::de::DeserializeOwned;
-
-use nvisy_core::Error;
-
-/// Factory for creating authenticated connections to an external service.
-///
-/// Implementations handle credential validation, connectivity verification,
-/// and client construction for a specific provider (e.g. S3, OpenAI).
-#[async_trait::async_trait]
-pub trait Provider: Send + Sync + 'static {
-    /// Strongly-typed credentials for this provider.
-    type Credentials: DeserializeOwned + Send;
-    /// The client type produced by [`connect`](Self::connect).
-    type Client: Send + 'static;
-
-    /// Unique identifier (e.g. "s3", "openai").
-    const ID: &str;
-
-    /// Verify credentials by attempting a lightweight connection.
-    async fn verify(creds: &Self::Credentials) -> Result<(), Error>;
-
-    /// Create a connected client instance.
-    async fn connect(creds: &Self::Credentials) -> Result<Self::Client, Error>;
-
-    /// Optional async cleanup when the connection is released.
-    ///
-    /// Return `None` if no cleanup is needed. The default implementation
-    /// returns `None`.
-    #[allow(clippy::type_complexity)]
-    fn disconnect(_client: Self::Client) -> Option<Pin<Box<dyn Future<Output = ()> + Send>>> {
-        None
-    }
-}
diff --git a/crates/nvisy-object/src/providers/s3.rs b/crates/nvisy-object/src/providers/s3.rs
deleted file mode 100644
index 66ab78d..0000000
--- a/crates/nvisy-object/src/providers/s3.rs
+++ /dev/null
@@ -1,86 +0,0 @@
-//! S3-compatible provider using [`object_store::aws::AmazonS3Builder`].
-//!
-//! Works with AWS S3, MinIO, and any S3-compatible service.
-
-use object_store::aws::AmazonS3Builder;
-use serde::Deserialize;
-
-use nvisy_core::Error;
-use super::Provider;
-
-use crate::client::ObjectStoreClient;
-
-/// Typed credentials for S3-compatible provider.
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-pub struct S3Credentials {
-    /// S3 bucket name.
-    pub bucket: String,
-    /// AWS region (defaults to `us-east-1`).
-    #[serde(default = "default_region")]
-    pub region: String,
-    /// Endpoint URL (e.g. `http://localhost:9000` for MinIO).
-    /// Required for non-AWS S3-compatible services.
-    #[serde(default)]
-    pub endpoint: Option<String>,
-    /// Access key ID for static credentials.
-    #[serde(default)]
-    pub access_key_id: Option<String>,
-    /// Secret access key for static credentials.
-    #[serde(default)]
-    pub secret_access_key: Option<String>,
-    /// Session token for temporary credentials.
-    #[serde(default)]
-    pub session_token: Option<String>,
-}
-
-fn default_region() -> String {
-    "us-east-1".to_string()
-}
-
-/// Factory that creates [`ObjectStoreClient`] instances backed by S3.
-pub struct S3Provider;
-
-#[async_trait::async_trait]
-impl Provider for S3Provider {
-    type Credentials = S3Credentials;
-    type Client = ObjectStoreClient;
-
-    const ID: &str = "s3";
-
-    async fn verify(creds: &Self::Credentials) -> Result<(), Error> {
-        let client = Self::connect(creds).await?;
-        client.verify_reachable().await
-    }
-
-    async fn connect(creds: &Self::Credentials) -> Result<Self::Client, Error> {
-        let mut builder = AmazonS3Builder::new()
-            .with_bucket_name(&creds.bucket)
-            .with_region(&creds.region);
-
-        if let Some(endpoint) = &creds.endpoint {
-            builder = builder.with_endpoint(endpoint);
-            if endpoint.starts_with("http://") {
-                builder = builder.with_allow_http(true);
-            }
-        }
-
-        if let Some(access_key) = &creds.access_key_id {
-            builder = builder.with_access_key_id(access_key);
-        }
-
-        if let Some(secret_key) = &creds.secret_access_key {
-            builder = builder.with_secret_access_key(secret_key);
-        }
-
-        if let Some(token) = &creds.session_token {
-            builder = builder.with_token(token);
-        }
-
-        let store = builder
-            .build()
-            .map_err(|e| Error::connection(e.to_string(), "s3", true))?;
-
-        Ok(ObjectStoreClient::new(store))
-    }
-}
diff --git a/crates/nvisy-object/src/streams/mod.rs b/crates/nvisy-object/src/streams/mod.rs
deleted file mode 100644
index 706d836..0000000
--- a/crates/nvisy-object/src/streams/mod.rs
+++ /dev/null
@@ -1,11 +0,0 @@
-//! Streaming traits and object store adapters.
-
-mod source_stream;
-mod target_stream;
-mod read_object;
-mod write_object;
-
-pub use source_stream::StreamSource;
-pub use target_stream::StreamTarget;
-pub use read_object::{ObjectReadStream, ObjectReadParams};
-pub use write_object::{ObjectWriteStream, ObjectWriteParams};
diff --git a/crates/nvisy-object/src/streams/read_object.rs b/crates/nvisy-object/src/streams/read_object.rs
deleted file mode 100644
index 46bae18..0000000
--- a/crates/nvisy-object/src/streams/read_object.rs
+++ /dev/null
@@ -1,147 +0,0 @@
-//! Streaming reader that pulls objects from a cloud object store.
-
-use futures::StreamExt;
-use serde::Deserialize;
-use tokio::sync::mpsc;
-
-use nvisy_core::Error;
-use nvisy_core::io::ContentData;
-use nvisy_core::path::ContentSource;
-
-use super::StreamSource;
-
-use crate::client::ObjectStoreClient;
-
-/// Typed parameters for [`ObjectReadStream`].
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-pub struct ObjectReadParams {
-    /// Object key prefix to filter by.
-    #[serde(default)]
-    pub prefix: String,
-    /// Skip objects whose size exceeds this limit (in bytes).
-    #[serde(default)]
-    pub max_size: Option<u64>,
-}
-
-/// A [`StreamSource`] that lists and fetches objects from a cloud object store,
-/// emitting each object as a [`ContentData`] onto the output channel.
-pub struct ObjectReadStream;
-
-#[async_trait::async_trait]
-impl StreamSource for ObjectReadStream {
-    type Params = ObjectReadParams;
-    type Client = ObjectStoreClient;
-
-    fn id(&self) -> &str { "read" }
-
-    #[tracing::instrument(name = "object.read", skip_all, fields(prefix = %params.prefix, count))]
-    async fn read(
-        &self,
-        output: mpsc::Sender<ContentData>,
-        params: Self::Params,
-        client: Self::Client,
-    ) -> Result<u64, Error> {
-        let mut stream = client.list_stream(&params.prefix);
-        let mut total = 0u64;
-
-        while let Some(result) = stream.next().await {
-            let meta = result?;
-            let key = meta.location.as_ref();
-
-            if let Some(max) = params.max_size
-                && meta.size > max
-            {
-                tracing::debug!(key, size = meta.size, max_size = max, "skipping oversized object");
-                continue;
-            }
-
-            let source = ContentSource::new();
-            tracing::debug!(key, source_id = %source, "fetching object");
-
-            let result = client.get(key).await?;
-
-            let mut content = ContentData::new(source, result.data);
-            if let Some(ct) = result.content_type {
-                content = content.with_content_type(ct);
-            }
-
-            total += 1;
-            if output.send(content).await.is_err() {
-                break;
-            }
-        }
-
-        tracing::Span::current().record("count", total);
-        Ok(total)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use bytes::Bytes;
-    use object_store::memory::InMemory;
-
-    fn test_client() -> ObjectStoreClient {
-        ObjectStoreClient::new(InMemory::new())
-    }
-
-    #[tokio::test]
-    async fn read_emits_all_objects() {
-        let client = test_client();
-        for i in 0..3 {
-            client
-                .put(
-                    &format!("data/file{i}.txt"),
-                    Bytes::from(format!("content-{i}")),
-                    Some("text/plain"),
-                )
-                .await
-                .unwrap();
-        }
-
-        let (tx, mut rx) = mpsc::channel(16);
-        let stream = ObjectReadStream;
-        let params = ObjectReadParams {
-            prefix: "data/".to_string(),
-            max_size: None,
-        };
-
-        let count = stream.read(tx, params, client).await.unwrap();
-        assert_eq!(count, 3);
-
-        let mut items = Vec::new();
-        while let Some(item) = rx.recv().await {
-            items.push(item);
-        }
-        assert_eq!(items.len(), 3);
-    }
-
-    #[tokio::test]
-    async fn read_max_size_filter() {
-        let client = test_client();
-        client
-            .put("filter/small.bin", Bytes::from("hi"), None)
-            .await
-            .unwrap();
-        client
-            .put("filter/big.bin", Bytes::from("this is a much bigger payload"), None)
-            .await
-            .unwrap();
-
-        let (tx, mut rx) = mpsc::channel(16);
-        let stream = ObjectReadStream;
-        let params = ObjectReadParams {
-            prefix: "filter/".to_string(),
-            max_size: Some(10),
-        };
-
-        let count = stream.read(tx, params, client).await.unwrap();
-        assert_eq!(count, 1);
-
-        let item = rx.recv().await.unwrap();
-        assert_eq!(item.as_bytes(), b"hi");
-        assert!(rx.recv().await.is_none());
-    }
-}
diff --git a/crates/nvisy-object/src/streams/source_stream.rs b/crates/nvisy-object/src/streams/source_stream.rs
deleted file mode 100644
index 51fe26c..0000000
--- a/crates/nvisy-object/src/streams/source_stream.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-//! Streaming source trait for pipeline input.
-//!
-//! [`StreamSource`] reads content from an external system into the pipeline.
-
-use serde::de::DeserializeOwned;
-use tokio::sync::mpsc;
-
-use nvisy_core::Error;
-use nvisy_core::io::ContentData;
-
-/// A source stream that reads content from an external system into the pipeline.
-///
-/// Implementations connect to a storage backend (e.g. S3, local filesystem)
-/// and emit content data into the pipeline's input channel.
-#[async_trait::async_trait]
-pub trait StreamSource: Send + Sync + 'static {
-    /// Strongly-typed parameters for this stream source.
-    type Params: DeserializeOwned + Send;
-    /// The client type this stream requires.
-    type Client: Send + 'static;
-
-    /// Unique identifier for this stream source (e.g. `"read"`).
-    fn id(&self) -> &str;
-
-    /// Read content from the external system and send it to `output`.
-    ///
-    /// Returns the number of items read.
-    async fn read(
-        &self,
-        output: mpsc::Sender<ContentData>,
-        params: Self::Params,
-        client: Self::Client,
-    ) -> Result<u64, Error>;
-}
diff --git a/crates/nvisy-object/src/streams/target_stream.rs b/crates/nvisy-object/src/streams/target_stream.rs
deleted file mode 100644
index efc92d0..0000000
--- a/crates/nvisy-object/src/streams/target_stream.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-//! Streaming target trait for pipeline output.
-//!
-//! [`StreamTarget`] writes processed content back to an external system.
-
-use serde::de::DeserializeOwned;
-use tokio::sync::mpsc;
-
-use nvisy_core::Error;
-use nvisy_core::io::ContentData;
-
-/// A target stream that writes content from the pipeline to an external system.
-///
-/// Implementations receive processed content data from the pipeline and persist
-/// it to a storage backend.
-#[async_trait::async_trait]
-pub trait StreamTarget: Send + Sync + 'static {
-    /// Strongly-typed parameters for this stream target.
-    type Params: DeserializeOwned + Send;
-    /// The client type this stream requires.
-    type Client: Send + 'static;
-
-    /// Unique identifier for this stream target (e.g. `"write"`).
-    fn id(&self) -> &str;
-
-    /// Receive content from `input` and write it to the external system.
-    ///
-    /// Returns the number of items written.
-    async fn write(
-        &self,
-        input: mpsc::Receiver<ContentData>,
-        params: Self::Params,
-        client: Self::Client,
-    ) -> Result<u64, Error>;
-}
diff --git a/crates/nvisy-object/src/streams/write_object.rs b/crates/nvisy-object/src/streams/write_object.rs
deleted file mode 100644
index 409c31b..0000000
--- a/crates/nvisy-object/src/streams/write_object.rs
+++ /dev/null
@@ -1,138 +0,0 @@
-//! Streaming writer that uploads content to a cloud object store.
-
-use object_store::PutMode;
-use serde::Deserialize;
-use tokio::sync::mpsc;
-
-use nvisy_core::Error;
-use nvisy_core::io::ContentData;
-
-use super::StreamTarget;
-
-use crate::client::ObjectStoreClient;
-
-/// Typed parameters for [`ObjectWriteStream`].
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-pub struct ObjectWriteParams {
-    /// Key prefix prepended to each content source UUID.
-    #[serde(default)]
-    pub prefix: String,
-    /// When `true`, uses `PutMode::Create` so that writing to an existing
-    /// key fails with an error.
-    #[serde(default)]
-    pub create_only: bool,
-}
-
-/// A [`StreamTarget`] that receives [`ContentData`] from the input channel and
-/// uploads each one to a cloud object store.
-pub struct ObjectWriteStream;
-
-#[async_trait::async_trait]
-impl StreamTarget for ObjectWriteStream {
-    type Params = ObjectWriteParams;
-    type Client = ObjectStoreClient;
-
-    fn id(&self) -> &str { "write" }
-
-    #[tracing::instrument(name = "object.write", skip_all, fields(prefix = %params.prefix, count))]
-    async fn write(
-        &self,
-        mut input: mpsc::Receiver<ContentData>,
-        params: Self::Params,
-        client: Self::Client,
-    ) -> Result<u64, Error> {
-        let prefix = &params.prefix;
-        let mut total = 0u64;
-
-        while let Some(content) = input.recv().await {
-            let source_id = content.content_source.to_string();
-            let key = if prefix.is_empty() {
-                source_id
-            } else {
-                format!("{prefix}{source_id}")
-            };
-
-            let mode = if params.create_only {
-                PutMode::Create
-            } else {
-                PutMode::Overwrite
-            };
-            client
-                .put_opts(&key, content.to_bytes(), mode, content.content_type())
-                .await?;
-
-            total += 1;
-        }
-
-        tracing::Span::current().record("count", total);
-        Ok(total)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use bytes::Bytes;
-    use nvisy_core::io::ContentData;
-    use nvisy_core::path::ContentSource;
-    use object_store::memory::InMemory;
-
-    fn test_client() -> ObjectStoreClient {
-        ObjectStoreClient::new(InMemory::new())
-    }
-
-    #[tokio::test]
-    async fn write_uploads_all() {
-        let client = test_client();
-        let (tx, rx) = mpsc::channel(16);
-
-        let sources: Vec<ContentSource> = (0..3).map(|_| ContentSource::new()).collect();
-        for (i, src) in sources.iter().enumerate() {
-            let content = ContentData::new(*src, Bytes::from(format!("payload-{i}")));
-            tx.send(content).await.unwrap();
-        }
-        drop(tx);
-
-        let stream = ObjectWriteStream;
-        let params = ObjectWriteParams {
-            prefix: "out/".to_string(),
-            create_only: false,
-        };
-
-        let count = stream.write(rx, params, client.clone()).await.unwrap();
-        assert_eq!(count, 3);
-
-        // Verify all objects were stored
-        let items = client.list("out/").await.unwrap();
-        assert_eq!(items.len(), 3);
-    }
-
-    #[tokio::test]
-    async fn write_create_only() {
-        let client = test_client();
-
-        // Pre-populate an object at a known key
-        let source = ContentSource::new();
-        let key = format!("prefix/{source}");
-        client
-            .put(&key, Bytes::from("existing"), None)
-            .await
-            .unwrap();
-
-        // Try to write the same key with create_only
-        let (tx, rx) = mpsc::channel(1);
-        let content = ContentData::new(source, Bytes::from("new"));
-        tx.send(content).await.unwrap();
-        drop(tx);
-
-        let stream = ObjectWriteStream;
-        let params = ObjectWriteParams {
-            prefix: "prefix/".to_string(),
-            create_only: true,
-        };
-
-        let result = stream.write(rx, params, client).await;
-        assert!(result.is_err());
-    }
-}
diff --git a/crates/nvisy-ocr/Cargo.toml b/crates/nvisy-ocr/Cargo.toml
new file mode 100644
index 0000000..ec97198
--- /dev/null
+++ b/crates/nvisy-ocr/Cargo.toml
@@ -0,0 +1,34 @@
+# https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[package]
+name = "nvisy-ocr"
+description = "OCR backend trait and provider integration for Nvisy"
+keywords = ["nvisy", "ocr", "tesseract", "text-extraction"]
+categories = ["text-processing"]
+
+version = { workspace = true }
+rust-version = { workspace = true }
+edition = { workspace = true }
+license = { workspace = true }
+publish = { workspace = true }
+
+authors = { workspace = true }
+repository = { workspace = true }
+homepage = { workspace = true }
+documentation = { workspace = true }
+
+[package.metadata.docs.rs]
+all-features = true
+rustdoc-args = ["--cfg", "docsrs"]
+
+[dependencies]
+# Internal crates
+nvisy-core = { workspace = true, features = [] }
+nvisy-ontology = { workspace = true, features = [] }
+nvisy-python = { workspace = true, features = [] }
+
+# (De)serialization
+serde_json = { workspace = true, features = [] }
+
+# Async runtime
+async-trait = { workspace = true, features = [] }
diff --git a/crates/nvisy-object/README.md b/crates/nvisy-ocr/README.md
similarity index 68%
rename from crates/nvisy-object/README.md
rename to crates/nvisy-ocr/README.md
index a0040e9..7a4bf7f 100644
--- a/crates/nvisy-object/README.md
+++ b/crates/nvisy-ocr/README.md
@@ -1,8 +1,10 @@
-# nvisy-object
+# nvisy-ocr
 
 [![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/runtime/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/runtime/actions/workflows/build.yml)
 
-Object store plugin for the Nvisy runtime. Provides cloud storage providers (S3, Azure Blob Storage, Google Cloud Storage) and streaming read/write interfaces for ingesting and outputting data through the processing pipeline.
+OCR backend trait and provider integration for the Nvisy runtime.
+
+Defines the `OcrBackend` trait for optical character recognition providers, configuration types, result parsing from raw JSON into entity types, and a `PythonBridge` implementation that delegates to the `nvisy_ai` Python module.
 
 ## Documentation
 
diff --git a/crates/nvisy-ocr/src/backend.rs b/crates/nvisy-ocr/src/backend.rs
new file mode 100644
index 0000000..c0c2f32
--- /dev/null
+++ b/crates/nvisy-ocr/src/backend.rs
@@ -0,0 +1,31 @@
+//! OCR backend trait and configuration.
+
+use serde_json::Value;
+
+use nvisy_core::Error;
+
+/// Configuration passed to an [`OcrBackend`] implementation.
+#[derive(Debug, Clone)]
+pub struct OcrConfig {
+    /// Language hint (e.g. `"eng"` for English).
+    pub language: String,
+    /// OCR engine to use (`"tesseract"`, `"google-vision"`, `"aws-textract"`).
+    pub engine: String,
+    /// Minimum confidence threshold for OCR results.
+    pub confidence_threshold: f64,
+}
+
+/// Backend trait for OCR providers.
+///
+/// Implementations call an external OCR service and return raw JSON
+/// results.  Entity construction is handled by the consuming crate.
+#[async_trait::async_trait]
+pub trait OcrBackend: Send + Sync + 'static {
+    /// Run OCR on image bytes, returning raw dicts.
+    async fn detect_ocr(
+        &self,
+        image_data: &[u8],
+        mime_type: &str,
+        config: &OcrConfig,
+    ) -> Result<Vec<Value>, Error>;
+}
diff --git a/crates/nvisy-ocr/src/bridge.rs b/crates/nvisy-ocr/src/bridge.rs
new file mode 100644
index 0000000..9ea3e5d
--- /dev/null
+++ b/crates/nvisy-ocr/src/bridge.rs
@@ -0,0 +1,27 @@
+//! [`OcrBackend`] implementation for [`PythonBridge`].
+
+use serde_json::Value;
+
+use nvisy_core::Error;
+use nvisy_python::bridge::PythonBridge;
+use nvisy_python::ocr::OcrParams;
+
+use crate::backend::{OcrBackend, OcrConfig};
+
+/// Converts [`OcrConfig`] to [`OcrParams`] and delegates to `nvisy_python::ocr`.
+#[async_trait::async_trait]
+impl OcrBackend for PythonBridge {
+    async fn detect_ocr(
+        &self,
+        image_data: &[u8],
+        mime_type: &str,
+        config: &OcrConfig,
+    ) -> Result<Vec<Value>, Error> {
+        let params = OcrParams {
+            language: config.language.clone(),
+            engine: config.engine.clone(),
+            confidence_threshold: config.confidence_threshold,
+        };
+        nvisy_python::ocr::detect_ocr(self, image_data, mime_type, &params).await
+    }
+}
diff --git a/crates/nvisy-ocr/src/lib.rs b/crates/nvisy-ocr/src/lib.rs
new file mode 100644
index 0000000..ae2b5a9
--- /dev/null
+++ b/crates/nvisy-ocr/src/lib.rs
@@ -0,0 +1,10 @@
+#![forbid(unsafe_code)]
+#![cfg_attr(docsrs, feature(doc_cfg))]
+#![doc = include_str!("../README.md")]
+
+mod backend;
+mod bridge;
+mod parse;
+
+pub use backend::{OcrBackend, OcrConfig};
+pub use parse::parse_ocr_entities;
diff --git a/crates/nvisy-ocr/src/parse.rs b/crates/nvisy-ocr/src/parse.rs
new file mode 100644
index 0000000..1c6d0dc
--- /dev/null
+++ b/crates/nvisy-ocr/src/parse.rs
@@ -0,0 +1,49 @@
+//! OCR result parsing.
+
+use serde_json::Value;
+
+use nvisy_core::math::BoundingBox;
+use nvisy_core::Error;
+use nvisy_ontology::entity::{DetectionMethod, Entity, EntityCategory, EntityKind};
+use nvisy_ontology::location::{ImageLocation, Location};
+
+/// Parse raw JSON dicts from an OCR backend into [`Entity`] values.
+///
+/// Expected dict keys: `text`, `x`, `y`, `width`, `height`, `confidence`.
+pub fn parse_ocr_entities(raw: &[Value]) -> Result<Vec<Entity>, Error> {
+    let mut entities = Vec::new();
+
+    for item in raw {
+        let obj = item.as_object().ok_or_else(|| {
+            Error::python("Expected JSON object in OCR results".to_string())
+        })?;
+
+        let text = obj
+            .get("text")
+            .and_then(Value::as_str)
+            .ok_or_else(|| Error::python("Missing 'text' in OCR result".to_string()))?;
+
+        let x = obj.get("x").and_then(Value::as_f64).unwrap_or(0.0);
+        let y = obj.get("y").and_then(Value::as_f64).unwrap_or(0.0);
+        let width = obj.get("width").and_then(Value::as_f64).unwrap_or(0.0);
+        let height = obj.get("height").and_then(Value::as_f64).unwrap_or(0.0);
+        let confidence = obj.get("confidence").and_then(Value::as_f64).unwrap_or(0.0);
+
+        let entity = Entity::new(
+            EntityCategory::Pii,
+            EntityKind::Handwriting,
+            text,
+            DetectionMethod::Ocr,
+            confidence,
+        )
+        .with_location(Location::Image(ImageLocation {
+            bounding_box: BoundingBox { x, y, width, height },
+            image_id: None,
+            page_number: None,
+        }));
+
+        entities.push(entity);
+    }
+
+    Ok(entities)
+}
diff --git a/crates/nvisy-pattern/src/lib.rs b/crates/nvisy-pattern/src/lib.rs
index 839d727..53dc740 100644
--- a/crates/nvisy-pattern/src/lib.rs
+++ b/crates/nvisy-pattern/src/lib.rs
@@ -2,10 +2,10 @@
 #![cfg_attr(docsrs, feature(doc_cfg))]
 #![doc = include_str!("../README.md")]
 
-pub(crate) mod patterns;
 pub(crate) mod dictionaries;
-pub(crate) mod validators;
 mod engine;
+pub(crate) mod patterns;
+pub(crate) mod validators;
 
 pub use engine::{
     AllowList, DenyEntry, DenyList, DetectionSource, PatternEngine, PatternEngineBuilder,
@@ -13,4 +13,5 @@ pub use engine::{
 };
 pub use patterns::ContextRule;
 
+#[doc(hidden)]
 pub mod prelude;
diff --git a/crates/nvisy-rig/Cargo.toml b/crates/nvisy-rig/Cargo.toml
index db3b506..03afc94 100644
--- a/crates/nvisy-rig/Cargo.toml
+++ b/crates/nvisy-rig/Cargo.toml
@@ -23,6 +23,7 @@ rustdoc-args = ["--cfg", "docsrs"]
 # Internal crates
 nvisy-codec = { workspace = true, features = [] }
 nvisy-core = { workspace = true, features = [] }
+nvisy-ontology = { workspace = true, features = [] }
 
 # LLM framework
 rig-core = { workspace = true, features = ["derive"] }
@@ -33,3 +34,6 @@ async-trait = { workspace = true, features = [] }
 # (De)serialization
 serde = { workspace = true, features = ["derive"] }
 serde_json = { workspace = true, features = [] }
+
+# Observability
+tracing = { workspace = true, features = [] }
diff --git a/crates/nvisy-rig/src/backend.rs b/crates/nvisy-rig/src/backend.rs
new file mode 100644
index 0000000..1bdaee8
--- /dev/null
+++ b/crates/nvisy-rig/src/backend.rs
@@ -0,0 +1,34 @@
+//! LLM backend trait and configuration.
+
+use serde_json::Value;
+
+use nvisy_core::Error;
+
+/// Configuration passed to an [`LlmBackend`] implementation.
+#[derive(Debug, Clone)]
+pub struct LlmConfig {
+    /// Entity type labels to detect (e.g., `["PERSON", "SSN"]`).
+    pub entity_types: Vec<String>,
+    /// Minimum confidence score to include a detection (0.0 -- 1.0).
+    pub confidence_threshold: f64,
+    /// System prompt override (if empty, the backend uses its default).
+    pub system_prompt: Option<String>,
+}
+
+/// Backend trait for LLM-based entity detection.
+///
+/// Implementations call an LLM service (e.g. via `rig-core`) and return
+/// raw JSON results.  Entity construction from the raw dicts is handled
+/// by the detection layers.
+#[async_trait::async_trait]
+pub trait LlmBackend: Send + Sync + 'static {
+    /// Detect entities in text using an LLM, returning raw dicts.
+    ///
+    /// Each dict should contain: `category`, `entity_type`, `value`,
+    /// `confidence`, `start_offset`, `end_offset`.
+    async fn detect_text(
+        &self,
+        text: &str,
+        config: &LlmConfig,
+    ) -> Result<Vec<Value>, Error>;
+}
diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs
index 4a9799c..4dc2bfd 100644
--- a/crates/nvisy-rig/src/lib.rs
+++ b/crates/nvisy-rig/src/lib.rs
@@ -1,3 +1,9 @@
 #![forbid(unsafe_code)]
 #![cfg_attr(docsrs, feature(doc_cfg))]
 #![doc = include_str!("../README.md")]
+
+mod backend;
+mod parse;
+
+pub use backend::{LlmBackend, LlmConfig};
+pub use parse::parse_llm_entities;
diff --git a/crates/nvisy-rig/src/parse.rs b/crates/nvisy-rig/src/parse.rs
new file mode 100644
index 0000000..a104082
--- /dev/null
+++ b/crates/nvisy-rig/src/parse.rs
@@ -0,0 +1,88 @@
+//! LLM result parsing.
+
+use std::str::FromStr;
+
+use serde_json::Value;
+
+use nvisy_core::Error;
+use nvisy_ontology::entity::{DetectionMethod, Entity, EntityCategory, EntityKind};
+use nvisy_ontology::location::{Location, TextLocation};
+
+/// Parse raw JSON dicts from an LLM backend into [`Entity`] values.
+///
+/// Expected dict keys: `category`, `entity_type`, `value`, `confidence`,
+/// and optionally `start_offset` / `end_offset`.
+pub fn parse_llm_entities(raw: &[Value]) -> Result<Vec<Entity>, Error> {
+    let mut entities = Vec::new();
+
+    for item in raw {
+        let obj = item.as_object().ok_or_else(|| {
+            Error::validation("Expected JSON object in LLM results".to_string(), "llm-parse")
+        })?;
+
+        let category_str = obj
+            .get("category")
+            .and_then(Value::as_str)
+            .ok_or_else(|| Error::validation("Missing 'category'".to_string(), "llm-parse"))?;
+
+        let category = match category_str {
+            "pii" => EntityCategory::Pii,
+            "phi" => EntityCategory::Phi,
+            "financial" => EntityCategory::Financial,
+            "credentials" => EntityCategory::Credentials,
+            other => EntityCategory::Custom(other.to_string()),
+        };
+
+        let entity_type_str = obj
+            .get("entity_type")
+            .and_then(Value::as_str)
+            .ok_or_else(|| Error::validation("Missing 'entity_type'".to_string(), "llm-parse"))?;
+
+        let entity_kind = match EntityKind::from_str(entity_type_str) {
+            Ok(ek) => ek,
+            Err(_) => {
+                tracing::warn!(entity_type = entity_type_str, "unknown entity type from LLM, dropping");
+                continue;
+            }
+        };
+
+        let value = obj
+            .get("value")
+            .and_then(Value::as_str)
+            .ok_or_else(|| Error::validation("Missing 'value'".to_string(), "llm-parse"))?;
+
+        let confidence = obj
+            .get("confidence")
+            .and_then(Value::as_f64)
+            .ok_or_else(|| Error::validation("Missing 'confidence'".to_string(), "llm-parse"))?;
+
+        let start_offset = obj
+            .get("start_offset")
+            .and_then(Value::as_u64)
+            .map(|v| v as usize)
+            .unwrap_or(0);
+
+        let end_offset = obj
+            .get("end_offset")
+            .and_then(Value::as_u64)
+            .map(|v| v as usize)
+            .unwrap_or(0);
+
+        let entity = Entity::new(
+            category,
+            entity_kind,
+            value,
+            DetectionMethod::ContextualNlp,
+            confidence,
+        )
+        .with_location(Location::Text(TextLocation {
+            start_offset,
+            end_offset,
+            ..Default::default()
+        }));
+
+        entities.push(entity);
+    }
+
+    Ok(entities)
+}
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 12e14c6..35159f5 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -6,21 +6,29 @@ WORKDIR /app
 
 # Copy manifests first to cache dependency builds
 COPY Cargo.toml Cargo.lock ./
+COPY crates/nvisy-asr/Cargo.toml crates/nvisy-asr/Cargo.toml
+COPY crates/nvisy-augment/Cargo.toml crates/nvisy-augment/Cargo.toml
+COPY crates/nvisy-cli/Cargo.toml crates/nvisy-cli/Cargo.toml
+COPY crates/nvisy-codec/Cargo.toml crates/nvisy-codec/Cargo.toml
 COPY crates/nvisy-core/Cargo.toml crates/nvisy-core/Cargo.toml
-COPY crates/nvisy-detect/Cargo.toml crates/nvisy-detect/Cargo.toml
 COPY crates/nvisy-engine/Cargo.toml crates/nvisy-engine/Cargo.toml
-COPY crates/nvisy-object/Cargo.toml crates/nvisy-object/Cargo.toml
+COPY crates/nvisy-identify/Cargo.toml crates/nvisy-identify/Cargo.toml
+COPY crates/nvisy-ocr/Cargo.toml crates/nvisy-ocr/Cargo.toml
+COPY crates/nvisy-ontology/Cargo.toml crates/nvisy-ontology/Cargo.toml
+COPY crates/nvisy-pattern/Cargo.toml crates/nvisy-pattern/Cargo.toml
 COPY crates/nvisy-python/Cargo.toml crates/nvisy-python/Cargo.toml
+COPY crates/nvisy-rig/Cargo.toml crates/nvisy-rig/Cargo.toml
 COPY crates/nvisy-server/Cargo.toml crates/nvisy-server/Cargo.toml
 
 # Create empty src files to satisfy cargo's manifest checks
-RUN for crate in nvisy-core nvisy-detect nvisy-engine nvisy-object nvisy-python; do \
+RUN for crate in nvisy-asr nvisy-augment nvisy-codec nvisy-core nvisy-engine nvisy-identify nvisy-ocr nvisy-ontology nvisy-pattern nvisy-python nvisy-rig; do \
       mkdir -p crates/$crate/src && echo "" > crates/$crate/src/lib.rs; \
     done && \
+    mkdir -p crates/nvisy-cli/src && echo "fn main() {}" > crates/nvisy-cli/src/main.rs && \
     mkdir -p crates/nvisy-server/src && echo "fn main() {}" > crates/nvisy-server/src/main.rs
 
 # Create stub READMEs for crates that use doc = include_str!("../README.md")
-RUN for crate in nvisy-core nvisy-detect nvisy-engine nvisy-object nvisy-python nvisy-server; do \
+RUN for crate in nvisy-asr nvisy-augment nvisy-cli nvisy-codec nvisy-core nvisy-engine nvisy-identify nvisy-ocr nvisy-ontology nvisy-pattern nvisy-python nvisy-rig nvisy-server; do \
       touch crates/$crate/README.md; \
     done
 

From f625ddee5f8e979d67f4290237aac2bc59ad5123 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Tue, 24 Feb 2026 21:07:11 +0100
Subject: [PATCH 04/24] refactor(pattern): reduce public surface, add
 per-column dictionary confidence

- Narrow nvisy-pattern root exports to only externally-used types
  (PatternEngine, PatternEngineBuilder, PatternMatch, DetectionSource,
  ContextRule); move AllowList/DenyList/PatternEngineError/default_engine
  behind `pub mod engine` for opt-in access
- Add `column_confidence` to DictionaryPattern so CSV dictionary columns
  can have different confidence scores (e.g. full name vs short code)
- Track source column index in CsvDictionary via new Dictionary::columns()
- Apply column-specific confidence in PatternEngine::scan_dict
- Update currencies/cryptocurrencies/languages patterns with per-column
  confidence (full names 0.85, codes 0.55/0.45)
- Remove API Status link from root README

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 README.md                                     |  1 -
 .../assets/patterns/cryptocurrencies.json     |  3 +-
 .../assets/patterns/currencies.json           |  3 +-
 .../assets/patterns/languages.json            |  3 +-
 .../src/dictionaries/csv_dictionary.rs        | 14 ++++-
 .../src/dictionaries/dictionary.rs            |  9 +++
 crates/nvisy-pattern/src/engine/builder.rs    |  3 +
 crates/nvisy-pattern/src/engine/mod.rs        | 57 +++++++++++++++++--
 crates/nvisy-pattern/src/lib.rs               |  7 +--
 crates/nvisy-pattern/src/patterns/pattern.rs  | 11 +++-
 crates/nvisy-pattern/src/prelude.rs           |  3 +-
 11 files changed, 93 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 9c09b80..ecbad49 100644
--- a/README.md
+++ b/README.md
@@ -37,4 +37,3 @@ Apache 2.0 License, see [LICENSE.txt](LICENSE.txt)
 - **Documentation**: [docs.nvisy.com](https://docs.nvisy.com)
 - **Issues**: [GitHub Issues](https://github.com/nvisycom/runtime/issues)
 - **Email**: [support@nvisy.com](mailto:support@nvisy.com)
-- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev)
diff --git a/crates/nvisy-pattern/assets/patterns/cryptocurrencies.json b/crates/nvisy-pattern/assets/patterns/cryptocurrencies.json
index 78bf468..433d2c8 100644
--- a/crates/nvisy-pattern/assets/patterns/cryptocurrencies.json
+++ b/crates/nvisy-pattern/assets/patterns/cryptocurrencies.json
@@ -3,7 +3,8 @@
   "category": "financial",
   "entity_type": "amount",
   "dictionary": {
-    "name": "cryptocurrencies"
+    "name": "cryptocurrencies",
+    "column_confidence": [0.85, 0.55]
   },
   "confidence": 0.85
 }
diff --git a/crates/nvisy-pattern/assets/patterns/currencies.json b/crates/nvisy-pattern/assets/patterns/currencies.json
index 4805155..d524c9d 100644
--- a/crates/nvisy-pattern/assets/patterns/currencies.json
+++ b/crates/nvisy-pattern/assets/patterns/currencies.json
@@ -3,7 +3,8 @@
   "category": "financial",
   "entity_type": "amount",
   "dictionary": {
-    "name": "currencies"
+    "name": "currencies",
+    "column_confidence": [0.85, 0.55]
   },
   "confidence": 0.85
 }
diff --git a/crates/nvisy-pattern/assets/patterns/languages.json b/crates/nvisy-pattern/assets/patterns/languages.json
index 436dcfc..6d953b9 100644
--- a/crates/nvisy-pattern/assets/patterns/languages.json
+++ b/crates/nvisy-pattern/assets/patterns/languages.json
@@ -3,7 +3,8 @@
   "category": "pii",
   "entity_type": "demographic",
   "dictionary": {
-    "name": "languages"
+    "name": "languages",
+    "column_confidence": [0.85, 0.45]
   },
   "confidence": 0.85
 }
diff --git a/crates/nvisy-pattern/src/dictionaries/csv_dictionary.rs b/crates/nvisy-pattern/src/dictionaries/csv_dictionary.rs
index 87c8c25..574eae6 100644
--- a/crates/nvisy-pattern/src/dictionaries/csv_dictionary.rs
+++ b/crates/nvisy-pattern/src/dictionaries/csv_dictionary.rs
@@ -10,6 +10,8 @@ use super::Dictionary;
 pub struct CsvDictionary {
     name: String,
     entries: Vec<String>,
+    /// Source column index for each entry (parallel to `entries`).
+    columns: Vec<usize>,
 }
 
 impl CsvDictionary {
@@ -17,10 +19,13 @@ impl CsvDictionary {
     ///
     /// `name` identifies this dictionary (e.g. `"currencies"`).
     /// `text` is the CSV content where each non-empty cell becomes a matchable term.
+    /// The column index of each cell is preserved so that per-column confidence
+    /// scores can be applied at detection time.
     pub fn new(name: impl Into<String>, text: &str) -> Self {
         let name = name.into();
 
         let mut entries = Vec::new();
+        let mut columns = Vec::new();
         let mut reader = csv::ReaderBuilder::new()
             .has_headers(false)
             .flexible(true)
@@ -29,15 +34,16 @@ impl CsvDictionary {
 
         for result in reader.records() {
             let record = result.expect("failed to parse CSV record");
-            for field in record.iter() {
+            for (col, field) in record.iter().enumerate() {
                 let trimmed = field.trim();
                 if !trimmed.is_empty() {
                     entries.push(trimmed.to_owned());
+                    columns.push(col);
                 }
             }
         }
 
-        Self { name, entries }
+        Self { name, entries, columns }
     }
 }
 
@@ -49,6 +55,10 @@ impl Dictionary for CsvDictionary {
     fn entries(&self) -> &[String] {
         &self.entries
     }
+
+    fn columns(&self) -> Option<&[usize]> {
+        Some(&self.columns)
+    }
 }
 
 #[cfg(test)]
diff --git a/crates/nvisy-pattern/src/dictionaries/dictionary.rs b/crates/nvisy-pattern/src/dictionaries/dictionary.rs
index 578c8eb..8edc63f 100644
--- a/crates/nvisy-pattern/src/dictionaries/dictionary.rs
+++ b/crates/nvisy-pattern/src/dictionaries/dictionary.rs
@@ -16,6 +16,15 @@ pub trait Dictionary: Send + Sync {
 
     /// All matchable terms produced by this dictionary.
     fn entries(&self) -> &[String];
+
+    /// Column index for each entry, parallel to [`entries`](Self::entries).
+    ///
+    /// Returns `Some` for CSV dictionaries where each cell tracks its
+    /// source column. Returns `None` for plain-text dictionaries (all
+    /// entries are logically in column 0).
+    fn columns(&self) -> Option<&[usize]> {
+        None
+    }
 }
 
 /// Type-erased boxed [`Dictionary`].
diff --git a/crates/nvisy-pattern/src/engine/builder.rs b/crates/nvisy-pattern/src/engine/builder.rs
index 9bb6f2e..0fb2998 100644
--- a/crates/nvisy-pattern/src/engine/builder.rs
+++ b/crates/nvisy-pattern/src/engine/builder.rs
@@ -116,6 +116,7 @@ impl PatternEngineBuilder {
                     if values.is_empty() {
                         continue;
                     }
+                    let columns = dict.columns().map(|c| c.to_vec());
                     let automaton = aho_corasick::AhoCorasickBuilder::new()
                         .ascii_case_insensitive(!dp.case_sensitive)
                         .build(&values)
@@ -130,6 +131,8 @@ impl PatternEngineBuilder {
                         confidence: p.confidence(),
                         automaton,
                         values,
+                        columns,
+                        column_confidence: dp.column_confidence.clone(),
                         context: p.context().cloned(),
                     });
                 }
diff --git a/crates/nvisy-pattern/src/engine/mod.rs b/crates/nvisy-pattern/src/engine/mod.rs
index 2573fc3..8a61926 100644
--- a/crates/nvisy-pattern/src/engine/mod.rs
+++ b/crates/nvisy-pattern/src/engine/mod.rs
@@ -57,9 +57,32 @@ struct DictEntry {
     automaton: AhoCorasick,
     /// The terms used to build the automaton, indexed by pattern id.
     values: Vec<String>,
+    /// Per-entry column index from the source dictionary (parallel to `values`).
+    /// `None` for plain-text dictionaries.
+    columns: Option<Vec<usize>>,
+    /// Per-column confidence overrides from the pattern definition.
+    column_confidence: Option<Vec<f64>>,
     context: Option<ContextRule>,
 }
 
+impl DictEntry {
+    /// Resolve the confidence for the entry at `pattern_index`.
+    ///
+    /// If per-column confidence overrides are configured and the entry has
+    /// a known column, uses the column-specific value. Otherwise falls back
+    /// to the pattern's base confidence.
+    fn resolve_confidence(&self, pattern_index: usize) -> f64 {
+        if let (Some(cols), Some(col_conf)) = (&self.columns, &self.column_confidence) {
+            if let Some(&col) = cols.get(pattern_index) {
+                if let Some(&conf) = col_conf.get(col) {
+                    return conf;
+                }
+            }
+        }
+        self.confidence
+    }
+}
+
 /// Pre-compiled engine that scans text against all registered patterns.
 ///
 /// Scanning runs in three phases:
@@ -176,12 +199,17 @@ impl PatternEngine {
     /// Phase 2: dictionary matches via Aho-Corasick automata.
     fn scan_dict(&self, text: &str, results: &mut Vec<PatternMatch>) {
         for entry in &self.dict_entries {
-            if entry.confidence < self.confidence_threshold {
-                continue;
-            }
-
             for mat in entry.automaton.find_iter(text) {
-                let value = &entry.values[mat.pattern().as_usize()];
+                let pat_idx = mat.pattern().as_usize();
+                let value = &entry.values[pat_idx];
+
+                // Resolve per-entry confidence: use column override if available,
+                // otherwise fall back to the pattern's base confidence.
+                let confidence = entry.resolve_confidence(pat_idx);
+
+                if confidence < self.confidence_threshold {
+                    continue;
+                }
 
                 if self.allow_set.contains(value.as_str()) {
                     continue;
@@ -194,7 +222,7 @@ impl PatternEngine {
                     value: value.clone(),
                     start: mat.start(),
                     end: mat.end(),
-                    confidence: entry.confidence,
+                    confidence,
                     source: DetectionSource::Dictionary,
                     context: entry.context.clone(),
                 });
@@ -389,6 +417,23 @@ mod tests {
         assert_eq!(entry.category, EntityCategory::Financial);
     }
 
+    #[test]
+    fn column_confidence_applies_to_csv_dictionaries() {
+        let engine = default_engine();
+        // "US Dollar" is column 0 (full name), "USD" is column 1 (code).
+        let matches = engine.scan_text("I paid in US Dollar and also in USD.");
+        let full_name = matches.iter().find(|m| m.value == "US Dollar");
+        let code = matches.iter().find(|m| m.value == "USD");
+        assert!(full_name.is_some(), "should match 'US Dollar'");
+        assert!(code.is_some(), "should match 'USD'");
+        let full_conf = full_name.unwrap().confidence;
+        let code_conf = code.unwrap().confidence;
+        assert!(
+            full_conf > code_conf,
+            "full name confidence ({full_conf}) should exceed code confidence ({code_conf})"
+        );
+    }
+
     #[test]
     fn context_rule_passthrough() {
         let engine = PatternEngine::builder()
diff --git a/crates/nvisy-pattern/src/lib.rs b/crates/nvisy-pattern/src/lib.rs
index 53dc740..9f50527 100644
--- a/crates/nvisy-pattern/src/lib.rs
+++ b/crates/nvisy-pattern/src/lib.rs
@@ -3,14 +3,11 @@
 #![doc = include_str!("../README.md")]
 
 pub(crate) mod dictionaries;
-mod engine;
+pub mod engine;
 pub(crate) mod patterns;
 pub(crate) mod validators;
 
-pub use engine::{
-    AllowList, DenyEntry, DenyList, DetectionSource, PatternEngine, PatternEngineBuilder,
-    PatternEngineError, PatternMatch, default_engine,
-};
+pub use engine::{DetectionSource, PatternEngine, PatternEngineBuilder, PatternMatch};
 pub use patterns::ContextRule;
 
 #[doc(hidden)]
diff --git a/crates/nvisy-pattern/src/patterns/pattern.rs b/crates/nvisy-pattern/src/patterns/pattern.rs
index 5186e42..d814ace 100644
--- a/crates/nvisy-pattern/src/patterns/pattern.rs
+++ b/crates/nvisy-pattern/src/patterns/pattern.rs
@@ -30,7 +30,7 @@ pub struct RegexPattern {
 }
 
 /// A dictionary-based match source.
-#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
+#[derive(Debug, Clone, PartialEq, Deserialize)]
 pub struct DictionaryPattern {
     /// Named dictionary from the [`DictionaryRegistry`].
     ///
@@ -42,6 +42,13 @@ pub struct DictionaryPattern {
     /// `ascii_case_insensitive` setting.
     #[serde(default)]
     pub case_sensitive: bool,
+    /// Optional per-column confidence overrides for CSV dictionaries.
+    ///
+    /// When present, entries from column `i` use `column_confidence[i]`
+    /// instead of the pattern's base confidence. Columns beyond the
+    /// length of this array fall back to the base confidence.
+    #[serde(default)]
+    pub column_confidence: Option<Vec<f64>>,
 }
 
 /// How a pattern finds matches in text.
@@ -49,7 +56,7 @@ pub struct DictionaryPattern {
 /// Each pattern uses exactly one source: either a regular expression that
 /// is compiled and run against text spans, or a named dictionary whose
 /// entries are matched literally.
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, PartialEq)]
 pub enum MatchSource {
     /// Match via a compiled regular expression.
     Regex(RegexPattern),
diff --git a/crates/nvisy-pattern/src/prelude.rs b/crates/nvisy-pattern/src/prelude.rs
index 60f8968..f045dfd 100644
--- a/crates/nvisy-pattern/src/prelude.rs
+++ b/crates/nvisy-pattern/src/prelude.rs
@@ -5,6 +5,5 @@
 //! ```
 
 pub use crate::{
-    AllowList, ContextRule, DenyEntry, DenyList, DetectionSource, PatternEngine,
-    PatternEngineBuilder, PatternEngineError, PatternMatch, default_engine,
+    ContextRule, DetectionSource, PatternEngine, PatternEngineBuilder, PatternMatch,
 };

From cd569c43b4ab04feac4af74f42f40a6c95d17d6b Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Tue, 24 Feb 2026 21:48:29 +0100
Subject: [PATCH 05/24] refactor(pattern): move confidence into
 pattern/dictionary objects
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move confidence from a top-level JSON field into the match source
objects so each source type owns its own scoring:

- RegexPattern gains a `confidence: f64` field (default 1.0)
- DictionaryPattern.confidence accepts a number (uniform) or array
  (per-column) via DictionaryConfidence enum
- Remove Pattern::confidence() from the trait — confidence is now
  read directly from the match source during engine compilation
- Remove top-level `confidence` from all 27 pattern JSON definitions
- Rename `column_confidence` to `confidence` in dictionary patterns

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../assets/patterns/aws_key.json              |  6 +-
 .../assets/patterns/bitcoin_address.json      |  6 +-
 .../assets/patterns/credit_card.json          |  4 +-
 .../assets/patterns/cryptocurrencies.json     |  5 +-
 .../assets/patterns/currencies.json           |  5 +-
 .../assets/patterns/date_of_birth.json        |  4 +-
 .../nvisy-pattern/assets/patterns/email.json  |  6 +-
 .../assets/patterns/ethereum_address.json     |  6 +-
 .../assets/patterns/generic_api_key.json      |  6 +-
 .../assets/patterns/github_token.json         |  6 +-
 .../nvisy-pattern/assets/patterns/iban.json   |  4 +-
 .../nvisy-pattern/assets/patterns/ipv4.json   |  6 +-
 .../nvisy-pattern/assets/patterns/ipv6.json   |  6 +-
 .../assets/patterns/languages.json            |  5 +-
 .../assets/patterns/mac_address.json          |  6 +-
 .../assets/patterns/nationalities.json        |  6 +-
 .../nvisy-pattern/assets/patterns/phone.json  |  4 +-
 .../assets/patterns/private_key.json          |  6 +-
 .../assets/patterns/religions.json            |  6 +-
 crates/nvisy-pattern/assets/patterns/ssn.json |  4 +-
 .../assets/patterns/stripe_key.json           |  6 +-
 .../assets/patterns/swift_code.json           |  6 +-
 crates/nvisy-pattern/assets/patterns/url.json |  6 +-
 .../assets/patterns/us_bank_routing.json      |  6 +-
 .../assets/patterns/us_drivers_license.json   |  6 +-
 .../assets/patterns/us_passport.json          |  6 +-
 .../assets/patterns/us_postal_code.json       |  6 +-
 crates/nvisy-pattern/src/engine/builder.rs    |  5 +-
 crates/nvisy-pattern/src/engine/mod.rs        | 26 ++----
 .../src/patterns/json_pattern.rs              | 11 ---
 crates/nvisy-pattern/src/patterns/mod.rs      | 20 ++--
 crates/nvisy-pattern/src/patterns/pattern.rs  | 91 ++++++++++++++++---
 32 files changed, 174 insertions(+), 128 deletions(-)

diff --git a/crates/nvisy-pattern/assets/patterns/aws_key.json b/crates/nvisy-pattern/assets/patterns/aws_key.json
index d3fa046..96ec047 100644
--- a/crates/nvisy-pattern/assets/patterns/aws_key.json
+++ b/crates/nvisy-pattern/assets/patterns/aws_key.json
@@ -3,7 +3,7 @@
   "category": "credentials",
   "entity_type": "api_key",
   "pattern": {
-    "regex": "\\bAKIA[0-9A-Z]{16}\\b"
-  },
-  "confidence": 0.95
+    "regex": "\\bAKIA[0-9A-Z]{16}\\b",
+    "confidence": 0.95
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/bitcoin_address.json b/crates/nvisy-pattern/assets/patterns/bitcoin_address.json
index 3d7289a..409b0ef 100644
--- a/crates/nvisy-pattern/assets/patterns/bitcoin_address.json
+++ b/crates/nvisy-pattern/assets/patterns/bitcoin_address.json
@@ -3,7 +3,7 @@
   "category": "financial",
   "entity_type": "crypto_address",
   "pattern": {
-    "regex": "\\b(?:bc1[a-z0-9]{25,39}|[13][a-km-zA-HJ-NP-Z1-9]{25,34})\\b"
-  },
-  "confidence": 0.85
+    "regex": "\\b(?:bc1[a-z0-9]{25,39}|[13][a-km-zA-HJ-NP-Z1-9]{25,34})\\b",
+    "confidence": 0.85
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/credit_card.json b/crates/nvisy-pattern/assets/patterns/credit_card.json
index 4d9d2ce..0e3f59f 100644
--- a/crates/nvisy-pattern/assets/patterns/credit_card.json
+++ b/crates/nvisy-pattern/assets/patterns/credit_card.json
@@ -4,9 +4,9 @@
   "entity_type": "payment_card",
   "pattern": {
     "regex": "\\b(?:\\d[ \\-]*?){13,19}\\b",
-    "validator": "luhn"
+    "validator": "luhn",
+    "confidence": 0.85
   },
-  "confidence": 0.85,
   "context": {
     "keywords": ["card", "credit", "debit", "payment", "visa", "mastercard", "amex"],
     "window": 3,
diff --git a/crates/nvisy-pattern/assets/patterns/cryptocurrencies.json b/crates/nvisy-pattern/assets/patterns/cryptocurrencies.json
index 433d2c8..140cc51 100644
--- a/crates/nvisy-pattern/assets/patterns/cryptocurrencies.json
+++ b/crates/nvisy-pattern/assets/patterns/cryptocurrencies.json
@@ -4,7 +4,6 @@
   "entity_type": "amount",
   "dictionary": {
     "name": "cryptocurrencies",
-    "column_confidence": [0.85, 0.55]
-  },
-  "confidence": 0.85
+    "confidence": [0.85, 0.55]
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/currencies.json b/crates/nvisy-pattern/assets/patterns/currencies.json
index d524c9d..ab0d016 100644
--- a/crates/nvisy-pattern/assets/patterns/currencies.json
+++ b/crates/nvisy-pattern/assets/patterns/currencies.json
@@ -4,7 +4,6 @@
   "entity_type": "amount",
   "dictionary": {
     "name": "currencies",
-    "column_confidence": [0.85, 0.55]
-  },
-  "confidence": 0.85
+    "confidence": [0.85, 0.55]
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/date_of_birth.json b/crates/nvisy-pattern/assets/patterns/date_of_birth.json
index fcb8cae..26ecd52 100644
--- a/crates/nvisy-pattern/assets/patterns/date_of_birth.json
+++ b/crates/nvisy-pattern/assets/patterns/date_of_birth.json
@@ -3,9 +3,9 @@
   "category": "pii",
   "entity_type": "date_of_birth",
   "pattern": {
-    "regex": "\\b(?:0[1-9]|1[0-2])[/\\-](?:0[1-9]|[12]\\d|3[01])[/\\-](?:19|20)\\d{2}\\b"
+    "regex": "\\b(?:0[1-9]|1[0-2])[/\\-](?:0[1-9]|[12]\\d|3[01])[/\\-](?:19|20)\\d{2}\\b",
+    "confidence": 0.6
   },
-  "confidence": 0.6,
   "context": {
     "keywords": ["birth", "born", "dob", "birthday", "date of birth"],
     "window": 2,
diff --git a/crates/nvisy-pattern/assets/patterns/email.json b/crates/nvisy-pattern/assets/patterns/email.json
index 7335061..8748538 100644
--- a/crates/nvisy-pattern/assets/patterns/email.json
+++ b/crates/nvisy-pattern/assets/patterns/email.json
@@ -3,7 +3,7 @@
   "category": "pii",
   "entity_type": "email_address",
   "pattern": {
-    "regex": "\\b[a-zA-Z0-9._%+\\-]+@[a-zA-Z0-9.\\-]+\\.[a-zA-Z]{2,}\\b"
-  },
-  "confidence": 0.95
+    "regex": "\\b[a-zA-Z0-9._%+\\-]+@[a-zA-Z0-9.\\-]+\\.[a-zA-Z]{2,}\\b",
+    "confidence": 0.95
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/ethereum_address.json b/crates/nvisy-pattern/assets/patterns/ethereum_address.json
index b757995..d9e93de 100644
--- a/crates/nvisy-pattern/assets/patterns/ethereum_address.json
+++ b/crates/nvisy-pattern/assets/patterns/ethereum_address.json
@@ -3,7 +3,7 @@
   "category": "financial",
   "entity_type": "crypto_address",
   "pattern": {
-    "regex": "\\b0x[0-9a-fA-F]{40}\\b"
-  },
-  "confidence": 0.85
+    "regex": "\\b0x[0-9a-fA-F]{40}\\b",
+    "confidence": 0.85
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/generic_api_key.json b/crates/nvisy-pattern/assets/patterns/generic_api_key.json
index 37dc872..cffec52 100644
--- a/crates/nvisy-pattern/assets/patterns/generic_api_key.json
+++ b/crates/nvisy-pattern/assets/patterns/generic_api_key.json
@@ -3,7 +3,7 @@
   "category": "credentials",
   "entity_type": "api_key",
   "pattern": {
-    "regex": "(?i)(?:api[_\\-]?key|api[_\\-]?secret|access[_\\-]?token|secret[_\\-]?key|bearer)\\s*[:=]\\s*[\"']?([a-zA-Z0-9_\\-]{20,})[\"']?"
-  },
-  "confidence": 0.7
+    "regex": "(?i)(?:api[_\\-]?key|api[_\\-]?secret|access[_\\-]?token|secret[_\\-]?key|bearer)\\s*[:=]\\s*[\"']?([a-zA-Z0-9_\\-]{20,})[\"']?",
+    "confidence": 0.7
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/github_token.json b/crates/nvisy-pattern/assets/patterns/github_token.json
index 77ac9e2..e6cf759 100644
--- a/crates/nvisy-pattern/assets/patterns/github_token.json
+++ b/crates/nvisy-pattern/assets/patterns/github_token.json
@@ -3,7 +3,7 @@
   "category": "credentials",
   "entity_type": "auth_token",
   "pattern": {
-    "regex": "\\bgh[pousr]_[a-zA-Z0-9]{36}\\b"
-  },
-  "confidence": 0.95
+    "regex": "\\bgh[pousr]_[a-zA-Z0-9]{36}\\b",
+    "confidence": 0.95
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/iban.json b/crates/nvisy-pattern/assets/patterns/iban.json
index c2c3e86..ac3f75f 100644
--- a/crates/nvisy-pattern/assets/patterns/iban.json
+++ b/crates/nvisy-pattern/assets/patterns/iban.json
@@ -4,9 +4,9 @@
   "entity_type": "iban",
   "pattern": {
     "regex": "\\b[A-Z]{2}\\d{2}\\s?[A-Z0-9]{4}\\s?(?:\\d{4}\\s?){2,7}\\d{1,4}\\b",
-    "validator": "iban"
+    "validator": "iban",
+    "confidence": 0.85
   },
-  "confidence": 0.85,
   "context": {
     "keywords": ["iban", "bank", "account", "transfer", "swift"],
     "window": 3,
diff --git a/crates/nvisy-pattern/assets/patterns/ipv4.json b/crates/nvisy-pattern/assets/patterns/ipv4.json
index 37777bd..971ccd9 100644
--- a/crates/nvisy-pattern/assets/patterns/ipv4.json
+++ b/crates/nvisy-pattern/assets/patterns/ipv4.json
@@ -3,7 +3,7 @@
   "category": "pii",
   "entity_type": "ip_address",
   "pattern": {
-    "regex": "\\b(?:(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\b"
-  },
-  "confidence": 0.75
+    "regex": "\\b(?:(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\b",
+    "confidence": 0.75
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/ipv6.json b/crates/nvisy-pattern/assets/patterns/ipv6.json
index 5462ce2..ce096fd 100644
--- a/crates/nvisy-pattern/assets/patterns/ipv6.json
+++ b/crates/nvisy-pattern/assets/patterns/ipv6.json
@@ -3,7 +3,7 @@
   "category": "pii",
   "entity_type": "ip_address",
   "pattern": {
-    "regex": "\\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\\b|(?:[0-9a-fA-F]{1,4}:){1,7}:|::(?:[0-9a-fA-F]{1,4}:){0,5}[0-9a-fA-F]{1,4}\\b"
-  },
-  "confidence": 0.75
+    "regex": "\\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\\b|(?:[0-9a-fA-F]{1,4}:){1,7}:|::(?:[0-9a-fA-F]{1,4}:){0,5}[0-9a-fA-F]{1,4}\\b",
+    "confidence": 0.75
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/languages.json b/crates/nvisy-pattern/assets/patterns/languages.json
index 6d953b9..5d460af 100644
--- a/crates/nvisy-pattern/assets/patterns/languages.json
+++ b/crates/nvisy-pattern/assets/patterns/languages.json
@@ -4,7 +4,6 @@
   "entity_type": "demographic",
   "dictionary": {
     "name": "languages",
-    "column_confidence": [0.85, 0.45]
-  },
-  "confidence": 0.85
+    "confidence": [0.85, 0.45]
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/mac_address.json b/crates/nvisy-pattern/assets/patterns/mac_address.json
index f67a7d5..fd8fe8e 100644
--- a/crates/nvisy-pattern/assets/patterns/mac_address.json
+++ b/crates/nvisy-pattern/assets/patterns/mac_address.json
@@ -3,7 +3,7 @@
   "category": "pii",
   "entity_type": "mac_address",
   "pattern": {
-    "regex": "\\b(?:[0-9A-Fa-f]{2}[:\\-]){5}[0-9A-Fa-f]{2}\\b"
-  },
-  "confidence": 0.85
+    "regex": "\\b(?:[0-9A-Fa-f]{2}[:\\-]){5}[0-9A-Fa-f]{2}\\b",
+    "confidence": 0.85
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/nationalities.json b/crates/nvisy-pattern/assets/patterns/nationalities.json
index dbaf0bb..a32593c 100644
--- a/crates/nvisy-pattern/assets/patterns/nationalities.json
+++ b/crates/nvisy-pattern/assets/patterns/nationalities.json
@@ -3,7 +3,7 @@
   "category": "pii",
   "entity_type": "demographic",
   "dictionary": {
-    "name": "nationalities"
-  },
-  "confidence": 0.85
+    "name": "nationalities",
+    "confidence": 0.85
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/phone.json b/crates/nvisy-pattern/assets/patterns/phone.json
index 928dde5..5380e94 100644
--- a/crates/nvisy-pattern/assets/patterns/phone.json
+++ b/crates/nvisy-pattern/assets/patterns/phone.json
@@ -3,9 +3,9 @@
   "category": "pii",
   "entity_type": "phone_number",
   "pattern": {
-    "regex": "(?:\\+\\d{1,3}[\\s.\\-]?)?\\(?\\d{2,4}\\)?[\\s.\\-]?\\d{3,4}[\\s.\\-]?\\d{4}\\b"
+    "regex": "(?:\\+\\d{1,3}[\\s.\\-]?)?\\(?\\d{2,4}\\)?[\\s.\\-]?\\d{3,4}[\\s.\\-]?\\d{4}\\b",
+    "confidence": 0.8
   },
-  "confidence": 0.8,
   "context": {
     "keywords": ["phone", "call", "mobile", "tel", "fax", "contact"],
     "window": 2,
diff --git a/crates/nvisy-pattern/assets/patterns/private_key.json b/crates/nvisy-pattern/assets/patterns/private_key.json
index f52c9b7..138037f 100644
--- a/crates/nvisy-pattern/assets/patterns/private_key.json
+++ b/crates/nvisy-pattern/assets/patterns/private_key.json
@@ -3,7 +3,7 @@
   "category": "credentials",
   "entity_type": "private_key",
   "pattern": {
-    "regex": "-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----"
-  },
-  "confidence": 0.98
+    "regex": "-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----",
+    "confidence": 0.98
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/religions.json b/crates/nvisy-pattern/assets/patterns/religions.json
index 521bf5f..bb3d2f2 100644
--- a/crates/nvisy-pattern/assets/patterns/religions.json
+++ b/crates/nvisy-pattern/assets/patterns/religions.json
@@ -3,7 +3,7 @@
   "category": "pii",
   "entity_type": "demographic",
   "dictionary": {
-    "name": "religions"
-  },
-  "confidence": 0.85
+    "name": "religions",
+    "confidence": 0.85
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/ssn.json b/crates/nvisy-pattern/assets/patterns/ssn.json
index c6b3b52..12aeb75 100644
--- a/crates/nvisy-pattern/assets/patterns/ssn.json
+++ b/crates/nvisy-pattern/assets/patterns/ssn.json
@@ -4,9 +4,9 @@
   "entity_type": "government_id",
   "pattern": {
     "regex": "\\b(\\d{3})-(\\d{2})-(\\d{4})\\b",
-    "validator": "ssn"
+    "validator": "ssn",
+    "confidence": 0.9
   },
-  "confidence": 0.9,
   "context": {
     "keywords": ["social security", "ssn", "tax id", "taxpayer identification"],
     "window": 3,
diff --git a/crates/nvisy-pattern/assets/patterns/stripe_key.json b/crates/nvisy-pattern/assets/patterns/stripe_key.json
index be89fb2..f2e5c1b 100644
--- a/crates/nvisy-pattern/assets/patterns/stripe_key.json
+++ b/crates/nvisy-pattern/assets/patterns/stripe_key.json
@@ -3,7 +3,7 @@
   "category": "credentials",
   "entity_type": "api_key",
   "pattern": {
-    "regex": "\\bsk_(live|test)_[a-zA-Z0-9]{24,}\\b"
-  },
-  "confidence": 0.95
+    "regex": "\\bsk_(live|test)_[a-zA-Z0-9]{24,}\\b",
+    "confidence": 0.95
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/swift_code.json b/crates/nvisy-pattern/assets/patterns/swift_code.json
index 3dd9cc0..a28a5e3 100644
--- a/crates/nvisy-pattern/assets/patterns/swift_code.json
+++ b/crates/nvisy-pattern/assets/patterns/swift_code.json
@@ -3,7 +3,7 @@
   "category": "financial",
   "entity_type": "swift_code",
   "pattern": {
-    "regex": "\\b[A-Z]{4}[A-Z]{2}[A-Z0-9]{2}(?:[A-Z0-9]{3})?\\b"
-  },
-  "confidence": 0.7
+    "regex": "\\b[A-Z]{4}[A-Z]{2}[A-Z0-9]{2}(?:[A-Z0-9]{3})?\\b",
+    "confidence": 0.7
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/url.json b/crates/nvisy-pattern/assets/patterns/url.json
index 02f43bd..d7bebc5 100644
--- a/crates/nvisy-pattern/assets/patterns/url.json
+++ b/crates/nvisy-pattern/assets/patterns/url.json
@@ -3,7 +3,7 @@
   "category": "pii",
   "entity_type": "url",
   "pattern": {
-    "regex": "\\bhttps?://[^\\s/$.?#][^\\s]*\\b"
-  },
-  "confidence": 0.9
+    "regex": "\\bhttps?://[^\\s/$.?#][^\\s]*\\b",
+    "confidence": 0.9
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/us_bank_routing.json b/crates/nvisy-pattern/assets/patterns/us_bank_routing.json
index feca4e3..cb5588c 100644
--- a/crates/nvisy-pattern/assets/patterns/us_bank_routing.json
+++ b/crates/nvisy-pattern/assets/patterns/us_bank_routing.json
@@ -3,7 +3,7 @@
   "category": "financial",
   "entity_type": "bank_routing",
   "pattern": {
-    "regex": "\\b(?:0[1-9]|[12]\\d|3[0-2])\\d{7}\\b"
-  },
-  "confidence": 0.5
+    "regex": "\\b(?:0[1-9]|[12]\\d|3[0-2])\\d{7}\\b",
+    "confidence": 0.5
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/us_drivers_license.json b/crates/nvisy-pattern/assets/patterns/us_drivers_license.json
index 7ee2664..1c1709a 100644
--- a/crates/nvisy-pattern/assets/patterns/us_drivers_license.json
+++ b/crates/nvisy-pattern/assets/patterns/us_drivers_license.json
@@ -3,7 +3,7 @@
   "category": "pii",
   "entity_type": "drivers_license",
   "pattern": {
-    "regex": "\\b[A-Z]\\d{3}-\\d{4}-\\d{4}\\b"
-  },
-  "confidence": 0.4
+    "regex": "\\b[A-Z]\\d{3}-\\d{4}-\\d{4}\\b",
+    "confidence": 0.4
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/us_passport.json b/crates/nvisy-pattern/assets/patterns/us_passport.json
index 058dfbf..bf055a8 100644
--- a/crates/nvisy-pattern/assets/patterns/us_passport.json
+++ b/crates/nvisy-pattern/assets/patterns/us_passport.json
@@ -3,7 +3,7 @@
   "category": "pii",
   "entity_type": "passport_number",
   "pattern": {
-    "regex": "\\b[A-Z]\\d{8}\\b"
-  },
-  "confidence": 0.5
+    "regex": "\\b[A-Z]\\d{8}\\b",
+    "confidence": 0.5
+  }
 }
diff --git a/crates/nvisy-pattern/assets/patterns/us_postal_code.json b/crates/nvisy-pattern/assets/patterns/us_postal_code.json
index d9cc156..b626956 100644
--- a/crates/nvisy-pattern/assets/patterns/us_postal_code.json
+++ b/crates/nvisy-pattern/assets/patterns/us_postal_code.json
@@ -3,7 +3,7 @@
   "category": "pii",
   "entity_type": "postal_code",
   "pattern": {
-    "regex": "\\b\\d{5}(?:-\\d{4})?\\b"
-  },
-  "confidence": 0.5
+    "regex": "\\b\\d{5}(?:-\\d{4})?\\b",
+    "confidence": 0.5
+  }
 }
diff --git a/crates/nvisy-pattern/src/engine/builder.rs b/crates/nvisy-pattern/src/engine/builder.rs
index 0fb2998..96ea1ed 100644
--- a/crates/nvisy-pattern/src/engine/builder.rs
+++ b/crates/nvisy-pattern/src/engine/builder.rs
@@ -99,7 +99,7 @@ impl PatternEngineBuilder {
                         pattern_name: p.name().to_owned(),
                         category: p.category().clone(),
                         entity_kind: p.entity_kind(),
-                        confidence: p.confidence(),
+                        confidence: rp.confidence,
                         validator_name: rp.validator.clone(),
                         regex: compiled,
                         context: p.context().cloned(),
@@ -128,11 +128,10 @@ impl PatternEngineBuilder {
                         pattern_name: p.name().to_owned(),
                         category: p.category().clone(),
                         entity_kind: p.entity_kind(),
-                        confidence: p.confidence(),
+                        confidence: dp.confidence.clone(),
                         automaton,
                         values,
                         columns,
-                        column_confidence: dp.column_confidence.clone(),
                         context: p.context().cloned(),
                     });
                 }
diff --git a/crates/nvisy-pattern/src/engine/mod.rs b/crates/nvisy-pattern/src/engine/mod.rs
index 8a61926..b8a8c9e 100644
--- a/crates/nvisy-pattern/src/engine/mod.rs
+++ b/crates/nvisy-pattern/src/engine/mod.rs
@@ -34,7 +34,7 @@ use regex::{Regex, RegexSet};
 
 use nvisy_ontology::entity::{EntityCategory, EntityKind};
 
-use crate::patterns::ContextRule;
+use crate::patterns::{ContextRule, DictionaryConfidence};
 use crate::validators::ValidatorResolver;
 
 /// Metadata stored alongside each compiled regex.
@@ -53,33 +53,25 @@ struct DictEntry {
     pattern_name: String,
     category: EntityCategory,
     entity_kind: EntityKind,
-    confidence: f64,
+    confidence: DictionaryConfidence,
     automaton: AhoCorasick,
     /// The terms used to build the automaton, indexed by pattern id.
     values: Vec<String>,
     /// Per-entry column index from the source dictionary (parallel to `values`).
-    /// `None` for plain-text dictionaries.
+    /// `None` for plain-text dictionaries (all entries are column 0).
     columns: Option<Vec<usize>>,
-    /// Per-column confidence overrides from the pattern definition.
-    column_confidence: Option<Vec<f64>>,
     context: Option<ContextRule>,
 }
 
 impl DictEntry {
     /// Resolve the confidence for the entry at `pattern_index`.
-    ///
-    /// If per-column confidence overrides are configured and the entry has
-    /// a known column, uses the column-specific value. Otherwise falls back
-    /// to the pattern's base confidence.
     fn resolve_confidence(&self, pattern_index: usize) -> f64 {
-        if let (Some(cols), Some(col_conf)) = (&self.columns, &self.column_confidence) {
-            if let Some(&col) = cols.get(pattern_index) {
-                if let Some(&conf) = col_conf.get(col) {
-                    return conf;
-                }
-            }
-        }
-        self.confidence
+        let col = self
+            .columns
+            .as_ref()
+            .and_then(|cols| cols.get(pattern_index).copied())
+            .unwrap_or(0);
+        self.confidence.resolve(col)
     }
 }
 
diff --git a/crates/nvisy-pattern/src/patterns/json_pattern.rs b/crates/nvisy-pattern/src/patterns/json_pattern.rs
index 67a5f46..7651d8f 100644
--- a/crates/nvisy-pattern/src/patterns/json_pattern.rs
+++ b/crates/nvisy-pattern/src/patterns/json_pattern.rs
@@ -35,9 +35,6 @@ pub enum JsonPatternWarning {
     UnknownValidator { pattern: String, validator: String },
 }
 
-/// Default confidence score when `"confidence"` is omitted from JSON.
-const DEFAULT_CONFIDENCE: f64 = 1.0;
-
 /// A detection pattern deserialized from a JSON definition file.
 ///
 /// Implements the [`Pattern`] trait and is the only concrete implementation
@@ -48,7 +45,6 @@ pub struct JsonPattern {
     category: EntityCategory,
     entity_kind: EntityKind,
     match_source: MatchSource,
-    confidence: f64,
     pub(crate) context: Option<ContextRule>,
 }
 
@@ -84,8 +80,6 @@ impl JsonPattern {
             #[serde(flatten)]
             source: RawSource,
             #[serde(default)]
-            confidence: Option<f64>,
-            #[serde(default)]
             context: Option<ContextRule>,
         }
 
@@ -118,7 +112,6 @@ impl JsonPattern {
             category: raw.category,
             entity_kind: raw.entity_kind,
             match_source,
-            confidence: raw.confidence.unwrap_or(DEFAULT_CONFIDENCE),
             context: raw.context,
         };
 
@@ -143,10 +136,6 @@ impl Pattern for JsonPattern {
         &self.match_source
     }
 
-    fn confidence(&self) -> f64 {
-        self.confidence
-    }
-
     fn context(&self) -> Option<&ContextRule> {
         self.context.as_ref()
     }
diff --git a/crates/nvisy-pattern/src/patterns/mod.rs b/crates/nvisy-pattern/src/patterns/mod.rs
index 8433777..878151d 100644
--- a/crates/nvisy-pattern/src/patterns/mod.rs
+++ b/crates/nvisy-pattern/src/patterns/mod.rs
@@ -19,7 +19,7 @@ mod pattern;
 
 pub use context_rule::ContextRule;
 pub use json_pattern::{JsonPattern, JsonPatternWarning};
-pub use pattern::{BoxPattern, MatchSource, Pattern};
+pub use pattern::{BoxPattern, DictionaryConfidence, MatchSource, Pattern};
 
 use std::collections::BTreeMap;
 use std::sync::LazyLock;
@@ -187,11 +187,18 @@ mod tests {
         for p in registry().values() {
             assert!(!p.name().is_empty(), "pattern name is empty");
             match p.match_source() {
-                MatchSource::Regex(rp) => assert!(!rp.regex.is_empty(), "regex is empty for {}", p.name()),
-                MatchSource::Dictionary(dp) => assert!(!dp.name.is_empty(), "dictionary is empty for {}", p.name()),
+                MatchSource::Regex(rp) => {
+                    assert!(!rp.regex.is_empty(), "regex is empty for {}", p.name());
+                    assert!(rp.confidence > 0.0, "confidence is 0 for {}", p.name());
+                    assert!(rp.confidence <= 1.0, "confidence > 1 for {}", p.name());
+                }
+                MatchSource::Dictionary(dp) => {
+                    assert!(!dp.name.is_empty(), "dictionary is empty for {}", p.name());
+                    let c = dp.confidence.resolve(0);
+                    assert!(c > 0.0, "confidence is 0 for {}", p.name());
+                    assert!(c <= 1.0, "confidence > 1 for {}", p.name());
+                }
             }
-            assert!(p.confidence() > 0.0, "confidence is 0 for {}", p.name());
-            assert!(p.confidence() <= 1.0, "confidence > 1 for {}", p.name());
         }
     }
 
@@ -229,8 +236,7 @@ mod tests {
             "name": "test",
             "category": "pii",
             "entity_type": "government_id",
-            "pattern": { "regex": "\\d+" },
-            "confidence": 0.9
+            "pattern": { "regex": "\\d+", "confidence": 0.9 }
         }"#;
         let (pattern, _warnings) = JsonPattern::from_bytes(json).unwrap();
 
diff --git a/crates/nvisy-pattern/src/patterns/pattern.rs b/crates/nvisy-pattern/src/patterns/pattern.rs
index d814ace..724e7f0 100644
--- a/crates/nvisy-pattern/src/patterns/pattern.rs
+++ b/crates/nvisy-pattern/src/patterns/pattern.rs
@@ -11,7 +11,7 @@ use nvisy_ontology::entity::{EntityCategory, EntityKind};
 use super::context_rule::ContextRule;
 
 /// A regex-based match source with an optional post-match validator.
-#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
+#[derive(Debug, Clone, PartialEq, Deserialize)]
 pub struct RegexPattern {
     /// The regular expression string.
     pub regex: String,
@@ -27,6 +27,63 @@ pub struct RegexPattern {
     /// inline `(?i)` or equivalent flag.
     #[serde(default)]
     pub case_sensitive: bool,
+    /// Confidence score (0.0–1.0) assigned to matches from this pattern.
+    ///
+    /// Defaults to `1.0` when not specified.
+    #[serde(default = "default_confidence")]
+    pub confidence: f64,
+}
+
+/// Confidence for a dictionary pattern: either a single uniform score
+/// or per-column scores for CSV dictionaries.
+#[derive(Debug, Clone, PartialEq)]
+pub enum DictionaryConfidence {
+    /// Single confidence score applied to all entries.
+    Uniform(f64),
+    /// Per-column confidence scores. Entries from column `i` use index `i`.
+    /// Columns beyond the length fall back to the last value.
+    PerColumn(Vec<f64>),
+}
+
+impl DictionaryConfidence {
+    /// Resolve confidence for a given column index.
+    pub fn resolve(&self, column: usize) -> f64 {
+        match self {
+            Self::Uniform(c) => *c,
+            Self::PerColumn(cols) => cols.get(column).copied().unwrap_or_else(|| {
+                cols.last().copied().unwrap_or(DEFAULT_CONFIDENCE)
+            }),
+        }
+    }
+
+}
+
+impl Default for DictionaryConfidence {
+    fn default() -> Self {
+        Self::Uniform(DEFAULT_CONFIDENCE)
+    }
+}
+
+/// Serde helper — accepts either a single number or an array of numbers.
+mod confidence_serde {
+    use super::DictionaryConfidence;
+    use serde::{Deserialize, Deserializer};
+
+    #[derive(Deserialize)]
+    #[serde(untagged)]
+    enum Raw {
+        Uniform(f64),
+        PerColumn(Vec<f64>),
+    }
+
+    pub fn deserialize<'de, D: Deserializer<'de>>(
+        deserializer: D,
+    ) -> Result<DictionaryConfidence, D::Error> {
+        Ok(match Raw::deserialize(deserializer)? {
+            Raw::Uniform(c) => DictionaryConfidence::Uniform(c),
+            Raw::PerColumn(v) => DictionaryConfidence::PerColumn(v),
+        })
+    }
 }
 
 /// A dictionary-based match source.
@@ -42,13 +99,16 @@ pub struct DictionaryPattern {
     /// `ascii_case_insensitive` setting.
     #[serde(default)]
     pub case_sensitive: bool,
-    /// Optional per-column confidence overrides for CSV dictionaries.
+    /// Confidence score(s) for matches from this dictionary.
     ///
-    /// When present, entries from column `i` use `column_confidence[i]`
-    /// instead of the pattern's base confidence. Columns beyond the
-    /// length of this array fall back to the base confidence.
-    #[serde(default)]
-    pub column_confidence: Option<Vec<f64>>,
+    /// A single number applies uniformly to all entries.
+    /// An array assigns per-column confidence for CSV dictionaries
+    /// (e.g. `[0.85, 0.55]` gives column 0 entries 0.85 and column 1
+    /// entries 0.55).
+    ///
+    /// Defaults to `1.0` when not specified.
+    #[serde(default, deserialize_with = "confidence_serde::deserialize")]
+    pub confidence: DictionaryConfidence,
 }
 
 /// How a pattern finds matches in text.
@@ -77,6 +137,13 @@ pub enum MatchSource {
 /// from the JSON files under `assets/patterns/`.
 ///
 /// [`JsonPattern`]: super::JsonPattern
+/// Default confidence score when `"confidence"` is omitted from JSON.
+pub const DEFAULT_CONFIDENCE: f64 = 1.0;
+
+fn default_confidence() -> f64 {
+    DEFAULT_CONFIDENCE
+}
+
 pub trait Pattern: Send + Sync {
     /// Unique name identifying this pattern (e.g. `"ssn"`, `"credit-card"`).
     fn name(&self) -> &str;
@@ -89,15 +156,11 @@ pub trait Pattern: Send + Sync {
 
     /// How this pattern matches text: regex or dictionary lookup.
     ///
-    /// For regex patterns, the validator (if any) is embedded in the
-    /// [`MatchSource::Regex`] variant.
+    /// Confidence scores are embedded in the match source itself:
+    /// [`RegexPattern::confidence`] for regex, [`DictionaryPattern::confidence`]
+    /// for dictionaries.
     fn match_source(&self) -> &MatchSource;
 
-    /// Base confidence score (0.0–1.0) assigned to every raw match.
-    ///
-    /// Defaults to `1.0` when not specified in the pattern definition.
-    fn confidence(&self) -> f64;
-
     /// Optional co-occurrence context rule for span-level confidence boosting.
     fn context(&self) -> Option<&ContextRule> {
         None

From 14f623337d4191b02e1cd974010139921df2a764 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Wed, 25 Feb 2026 04:21:59 +0100
Subject: [PATCH 06/24] refactor(rig): consolidate 7 top-level modules into 3

Absorb small utility modules (error, retry, metrics, compact) into
backend/ and rename structured/ to agent/, reducing module sprawl
while keeping all public re-exports intact.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/nvisy-rig/src/agent/mod.rs       | 144 ++++++++++++
 crates/nvisy-rig/src/backend/compact.rs | 157 +++++++++++++
 crates/nvisy-rig/src/backend/error.rs   |  48 ++++
 crates/nvisy-rig/src/backend/metrics.rs | 119 ++++++++++
 crates/nvisy-rig/src/backend/mod.rs     |  44 ++++
 crates/nvisy-rig/src/backend/retry.rs   | 142 ++++++++++++
 crates/nvisy-rig/src/bridge/mod.rs      | 131 +++++++++++
 crates/nvisy-rig/src/bridge/prompt.rs   |  67 ++++++
 crates/nvisy-rig/src/bridge/response.rs | 293 ++++++++++++++++++++++++
 crates/nvisy-rig/src/lib.rs             |  10 +-
 crates/nvisy-rig/src/prelude.rs         |   5 +
 11 files changed, 1157 insertions(+), 3 deletions(-)
 create mode 100644 crates/nvisy-rig/src/agent/mod.rs
 create mode 100644 crates/nvisy-rig/src/backend/compact.rs
 create mode 100644 crates/nvisy-rig/src/backend/error.rs
 create mode 100644 crates/nvisy-rig/src/backend/metrics.rs
 create mode 100644 crates/nvisy-rig/src/backend/mod.rs
 create mode 100644 crates/nvisy-rig/src/backend/retry.rs
 create mode 100644 crates/nvisy-rig/src/bridge/mod.rs
 create mode 100644 crates/nvisy-rig/src/bridge/prompt.rs
 create mode 100644 crates/nvisy-rig/src/bridge/response.rs
 create mode 100644 crates/nvisy-rig/src/prelude.rs

diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs
new file mode 100644
index 0000000..5bca2ee
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/mod.rs
@@ -0,0 +1,144 @@
+//! Structured output backend using rig-core's JSON schema enforcement.
+
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+
+use std::sync::Arc;
+
+use rig::agent::{Agent, AgentBuilder};
+use rig::completion::{CompletionModel, TypedPrompt};
+
+use nvisy_core::Error;
+
+use crate::backend::{LlmBackend, LlmConfig};
+use crate::bridge::prompt::PromptBuilder;
+use crate::bridge::response::ResponseParser;
+use crate::bridge::RigBackendConfig;
+use crate::backend::ErrorMapper;
+use crate::backend::UsageTracker;
+
+/// A list of entities returned by structured output.
+#[derive(Debug, Deserialize, Serialize, JsonSchema)]
+pub struct EntityList {
+    /// Detected entities.
+    pub entities: Vec<RawEntity>,
+}
+
+/// A single raw entity from structured LLM output.
+#[derive(Debug, Deserialize, Serialize, JsonSchema)]
+pub struct RawEntity {
+    /// Broad classification (e.g. "pii", "phi", "financial", "credentials").
+    pub category: String,
+    /// Specific entity type (e.g. "email_address", "person_name").
+    pub entity_type: String,
+    /// The matched text value.
+    pub value: String,
+    /// Detection confidence (0.0 -- 1.0).
+    pub confidence: f64,
+    /// Start byte offset in the input text.
+    pub start_offset: usize,
+    /// End byte offset in the input text.
+    pub end_offset: usize,
+}
+
+impl RawEntity {
+    /// Convert this raw entity into a [`serde_json::Value`] dict.
+    pub fn into_value(self) -> Value {
+        serde_json::json!({
+            "category": self.category,
+            "entity_type": self.entity_type,
+            "value": self.value,
+            "confidence": self.confidence,
+            "start_offset": self.start_offset,
+            "end_offset": self.end_offset,
+        })
+    }
+}
+
+/// Backend that uses rig-core's structured output (JSON schema enforcement)
+/// for entity detection.
+///
+/// Falls back to text-based parsing if structured output fails.
+pub struct StructuredBackend<M: CompletionModel> {
+    agent: Agent<M>,
+    model: Arc<M>,
+    config: RigBackendConfig,
+    tracker: UsageTracker,
+}
+
+impl<M: CompletionModel> StructuredBackend<M> {
+    /// Create a new structured backend.
+    pub fn new(model: M, config: RigBackendConfig) -> Self {
+        let model = Arc::new(model);
+        let agent = AgentBuilder::new((*model).clone())
+            .temperature(config.temperature)
+            .max_tokens(config.max_tokens)
+            .build();
+
+        Self {
+            agent,
+            model,
+            config,
+            tracker: UsageTracker::new(),
+        }
+    }
+
+    /// Access the usage tracker for this backend.
+    pub fn tracker(&self) -> &UsageTracker {
+        &self.tracker
+    }
+}
+
+#[async_trait::async_trait]
+impl<M> LlmBackend for StructuredBackend<M>
+where
+    M: CompletionModel + Send + Sync + 'static,
+{
+    #[tracing::instrument(skip_all, fields(text_len = text.len(), mode = "structured"))]
+    async fn detect_text(
+        &self,
+        text: &str,
+        config: &LlmConfig,
+    ) -> Result<Vec<Value>, Error> {
+        let user_prompt = PromptBuilder::new(config).build(text);
+
+        // Try structured output first.
+        let structured_result: Result<EntityList, _> = self
+            .agent
+            .prompt_typed::<EntityList>(&user_prompt)
+            .await;
+
+        match structured_result {
+            Ok(entity_list) => {
+                tracing::debug!(
+                    count = entity_list.entities.len(),
+                    "structured output succeeded"
+                );
+                Ok(entity_list.entities.into_iter().map(RawEntity::into_value).collect())
+            }
+            Err(structured_err) => {
+                tracing::warn!(
+                    error = %structured_err,
+                    "structured output failed, falling back to text-based parsing"
+                );
+
+                // Fall back to text-based completion using the model directly.
+                let mut builder = self
+                    .model
+                    .completion_request(&user_prompt)
+                    .temperature(self.config.temperature)
+                    .max_tokens(self.config.max_tokens);
+
+                if let Some(ref preamble) = config.system_prompt {
+                    builder = builder.preamble(preamble.clone());
+                }
+
+                let response = builder.send().await.map_err(ErrorMapper::from_completion)?;
+                let response_text = ResponseParser::extract_text(&response)?;
+                self.tracker.record(&response.usage, 0);
+                ResponseParser::parse_entities(&response_text)
+            }
+        }
+    }
+}
diff --git a/crates/nvisy-rig/src/backend/compact.rs b/crates/nvisy-rig/src/backend/compact.rs
new file mode 100644
index 0000000..6e1aca9
--- /dev/null
+++ b/crates/nvisy-rig/src/backend/compact.rs
@@ -0,0 +1,157 @@
+//! Context window management for LLM token limits.
+
+/// Manages token budget estimation, splitting, and truncation.
+pub struct ContextWindow {
+    /// Maximum tokens the model supports.
+    max_tokens: usize,
+    /// Tokens reserved for the output/completion.
+    reserve_output: usize,
+}
+
+impl ContextWindow {
+    /// Create a new context window with the given limits.
+    pub fn new(max_tokens: usize, reserve_output: usize) -> Self {
+        Self {
+            max_tokens,
+            reserve_output,
+        }
+    }
+
+    /// Estimate the number of tokens in a string (~4 chars per token).
+    pub fn estimate_tokens(text: &str) -> usize {
+        // Rough heuristic: ~4 characters per token for English text.
+        (text.len() + 3) / 4
+    }
+
+    /// Available input token budget (max minus reserved output).
+    fn input_budget(&self) -> usize {
+        self.max_tokens.saturating_sub(self.reserve_output)
+    }
+
+    /// Check if the text fits within the available input budget.
+    pub fn fits(&self, text: &str) -> bool {
+        Self::estimate_tokens(text) <= self.input_budget()
+    }
+
+    /// Split text into chunks that each fit within the input budget.
+    ///
+    /// Splitting respects sentence boundaries (`. ` and `\n`) where possible.
+    pub fn split_to_fit<'a>(&self, text: &'a str) -> Vec<&'a str> {
+        if self.fits(text) {
+            return vec![text];
+        }
+
+        let budget = self.input_budget();
+        // Approximate char budget from token budget.
+        let char_budget = budget * 4;
+
+        let mut chunks = Vec::new();
+        let mut remaining = text;
+
+        while !remaining.is_empty() {
+            if Self::estimate_tokens(remaining) <= budget {
+                chunks.push(remaining);
+                break;
+            }
+
+            // Take up to char_budget characters, then find a sentence boundary.
+            let take = remaining.len().min(char_budget);
+            let candidate = &remaining[..take];
+
+            // Try to split at the last sentence boundary within the candidate.
+            let split_pos = find_last_boundary(candidate).unwrap_or(take);
+
+            let (chunk, rest) = remaining.split_at(split_pos);
+            if chunk.is_empty() {
+                // No boundary found within budget; force-split at char_budget.
+                let forced = remaining.len().min(char_budget);
+                let (chunk, rest) = remaining.split_at(forced);
+                chunks.push(chunk);
+                remaining = rest;
+            } else {
+                chunks.push(chunk);
+                remaining = rest.trim_start_matches(['\n', ' ']);
+            }
+        }
+
+        chunks
+    }
+
+    /// Truncate text to fit, keeping the end (most recent context).
+    pub fn truncate_to_fit<'a>(&self, text: &'a str) -> &'a str {
+        if self.fits(text) {
+            return text;
+        }
+
+        let budget = self.input_budget();
+        let char_budget = budget * 4;
+
+        if text.len() <= char_budget {
+            return text;
+        }
+
+        let start = text.len() - char_budget;
+        // Try to start at a boundary to avoid splitting mid-sentence.
+        let adjusted = text[start..]
+            .find(['\n', '.'])
+            .map(|pos| start + pos + 1)
+            .unwrap_or(start);
+
+        &text[adjusted.min(text.len())..]
+    }
+}
+
+/// Find the last sentence boundary (`. ` or `\n`) in the text.
+fn find_last_boundary(text: &str) -> Option<usize> {
+    let last_newline = text.rfind('\n');
+    let last_period = text.rfind(". ").map(|p| p + 2);
+
+    match (last_newline, last_period) {
+        (Some(a), Some(b)) => Some(a.max(b)),
+        (Some(a), None) => Some(a),
+        (None, Some(b)) => Some(b),
+        (None, None) => None,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn estimate_tokens_basic() {
+        assert_eq!(ContextWindow::estimate_tokens(""), 0);
+        assert_eq!(ContextWindow::estimate_tokens("abcd"), 1);
+        assert_eq!(ContextWindow::estimate_tokens("abcdefgh"), 2);
+    }
+
+    #[test]
+    fn fits_within_budget() {
+        let cw = ContextWindow::new(100, 20);
+        // Budget = 80 tokens = ~320 chars
+        let short = "a".repeat(300);
+        assert!(cw.fits(&short));
+
+        let long = "a".repeat(400);
+        assert!(!cw.fits(&long));
+    }
+
+    #[test]
+    fn split_short_text() {
+        let cw = ContextWindow::new(100, 20);
+        let text = "hello world";
+        let chunks = cw.split_to_fit(text);
+        assert_eq!(chunks, vec!["hello world"]);
+    }
+
+    #[test]
+    fn truncate_keeps_end() {
+        let cw = ContextWindow::new(10, 2);
+        // Budget = 8 tokens = ~32 chars
+        let text = "First sentence. Second sentence. Third sentence. Fourth sentence.";
+        let truncated = cw.truncate_to_fit(text);
+        // Should keep the tail end
+        assert!(truncated.len() <= 32 + 10); // some slack for boundary adjustment
+        assert!(text.ends_with(truncated) || truncated.contains("sentence"));
+    }
+}
diff --git a/crates/nvisy-rig/src/backend/error.rs b/crates/nvisy-rig/src/backend/error.rs
new file mode 100644
index 0000000..df7ec35
--- /dev/null
+++ b/crates/nvisy-rig/src/backend/error.rs
@@ -0,0 +1,48 @@
+//! Error mapping from rig-core errors to nvisy-core errors.
+
+use rig::completion::CompletionError;
+
+use nvisy_core::Error;
+
+/// Maps [`CompletionError`] variants to [`nvisy_core::Error`].
+pub struct ErrorMapper;
+
+impl ErrorMapper {
+    /// Convert a rig-core [`CompletionError`] into a [`nvisy_core::Error`].
+    pub fn from_completion(err: CompletionError) -> Error {
+        match err {
+            CompletionError::HttpError(e) => {
+                Error::connection(format!("HTTP error: {e}"), "rig", true)
+            }
+            CompletionError::JsonError(e) => {
+                Error::new(nvisy_core::ErrorKind::Serialization, format!("JSON error: {e}"))
+                    .with_component("rig")
+            }
+            CompletionError::ProviderError(msg) => {
+                let retryable = is_retryable_provider_error(&msg);
+                Error::connection(format!("Provider error: {msg}"), "rig", retryable)
+            }
+            CompletionError::ResponseError(msg) => {
+                Error::runtime(format!("Response error: {msg}"), "rig", false)
+            }
+            CompletionError::RequestError(e) => {
+                Error::validation(format!("Request error: {e}"), "rig")
+            }
+            CompletionError::UrlError(e) => {
+                Error::validation(format!("URL error: {e}"), "rig")
+            }
+        }
+    }
+}
+
+/// Check if a provider error message indicates a retryable condition.
+fn is_retryable_provider_error(msg: &str) -> bool {
+    let lower = msg.to_lowercase();
+    lower.contains("rate_limit")
+        || lower.contains("rate limit")
+        || lower.contains("overloaded")
+        || lower.contains("timeout")
+        || lower.contains("429")
+        || lower.contains("503")
+        || lower.contains("529")
+}
diff --git a/crates/nvisy-rig/src/backend/metrics.rs b/crates/nvisy-rig/src/backend/metrics.rs
new file mode 100644
index 0000000..6c1c1a8
--- /dev/null
+++ b/crates/nvisy-rig/src/backend/metrics.rs
@@ -0,0 +1,119 @@
+//! Token usage tracking and statistics.
+
+use std::sync::Mutex;
+
+use rig::completion::Usage;
+
+/// Tracks cumulative token usage across LLM requests.
+pub struct UsageTracker {
+    inner: Mutex<UsageStats>,
+}
+
+/// Snapshot of accumulated usage statistics.
+#[derive(Debug, Default, Clone)]
+pub struct UsageStats {
+    /// Total input (prompt) tokens consumed.
+    pub total_input_tokens: u64,
+    /// Total output (completion) tokens consumed.
+    pub total_output_tokens: u64,
+    /// Total number of LLM requests sent.
+    pub total_requests: u64,
+    /// Total number of retries across all requests.
+    pub total_retries: u64,
+}
+
+impl UsageTracker {
+    /// Create a new tracker with zeroed counters.
+    pub fn new() -> Self {
+        Self {
+            inner: Mutex::new(UsageStats::default()),
+        }
+    }
+
+    /// Record usage from a single request, including retry count.
+    pub fn record(&self, usage: &Usage, retries: u32) {
+        let mut stats = self.inner.lock().expect("usage tracker lock poisoned");
+        stats.total_input_tokens += usage.input_tokens;
+        stats.total_output_tokens += usage.output_tokens;
+        stats.total_requests += 1;
+        stats.total_retries += u64::from(retries);
+    }
+
+    /// Take a snapshot of the current accumulated statistics.
+    pub fn snapshot(&self) -> UsageStats {
+        self.inner.lock().expect("usage tracker lock poisoned").clone()
+    }
+
+    /// Reset all counters to zero.
+    pub fn reset(&self) {
+        *self.inner.lock().expect("usage tracker lock poisoned") = UsageStats::default();
+    }
+}
+
+impl Default for UsageTracker {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn tracks_usage() {
+        let tracker = UsageTracker::new();
+
+        let usage = Usage {
+            input_tokens: 100,
+            output_tokens: 50,
+            total_tokens: 150,
+            cached_input_tokens: 0,
+        };
+        tracker.record(&usage, 2);
+
+        let snap = tracker.snapshot();
+        assert_eq!(snap.total_input_tokens, 100);
+        assert_eq!(snap.total_output_tokens, 50);
+        assert_eq!(snap.total_requests, 1);
+        assert_eq!(snap.total_retries, 2);
+    }
+
+    #[test]
+    fn accumulates_across_requests() {
+        let tracker = UsageTracker::new();
+
+        let usage = Usage {
+            input_tokens: 10,
+            output_tokens: 5,
+            total_tokens: 15,
+            cached_input_tokens: 0,
+        };
+        tracker.record(&usage, 0);
+        tracker.record(&usage, 1);
+
+        let snap = tracker.snapshot();
+        assert_eq!(snap.total_input_tokens, 20);
+        assert_eq!(snap.total_output_tokens, 10);
+        assert_eq!(snap.total_requests, 2);
+        assert_eq!(snap.total_retries, 1);
+    }
+
+    #[test]
+    fn reset_clears_stats() {
+        let tracker = UsageTracker::new();
+
+        let usage = Usage {
+            input_tokens: 100,
+            output_tokens: 50,
+            total_tokens: 150,
+            cached_input_tokens: 0,
+        };
+        tracker.record(&usage, 0);
+        tracker.reset();
+
+        let snap = tracker.snapshot();
+        assert_eq!(snap.total_input_tokens, 0);
+        assert_eq!(snap.total_requests, 0);
+    }
+}
diff --git a/crates/nvisy-rig/src/backend/mod.rs b/crates/nvisy-rig/src/backend/mod.rs
new file mode 100644
index 0000000..d838250
--- /dev/null
+++ b/crates/nvisy-rig/src/backend/mod.rs
@@ -0,0 +1,44 @@
+//! LLM backend trait and configuration.
+
+pub mod compact;
+pub mod error;
+pub mod metrics;
+pub mod retry;
+
+pub use compact::ContextWindow;
+pub use error::ErrorMapper;
+pub use metrics::{UsageStats, UsageTracker};
+pub use retry::RetryPolicy;
+
+use serde_json::Value;
+
+use nvisy_core::Error;
+
+/// Configuration passed to an [`LlmBackend`] implementation.
+#[derive(Debug, Clone)]
+pub struct LlmConfig {
+    /// Entity type labels to detect (e.g., `["PERSON", "SSN"]`).
+    pub entity_types: Vec<String>,
+    /// Minimum confidence score to include a detection (0.0 -- 1.0).
+    pub confidence_threshold: f64,
+    /// System prompt override (if empty, the backend uses its default).
+    pub system_prompt: Option<String>,
+}
+
+/// Backend trait for LLM-based entity detection.
+///
+/// Implementations call an LLM service (e.g. via `rig-core`) and return
+/// raw JSON results.  Entity construction from the raw dicts is handled
+/// by the detection layers.
+#[async_trait::async_trait]
+pub trait LlmBackend: Send + Sync + 'static {
+    /// Detect entities in text using an LLM, returning raw dicts.
+    ///
+    /// Each dict should contain: `category`, `entity_type`, `value`,
+    /// `confidence`, `start_offset`, `end_offset`.
+    async fn detect_text(
+        &self,
+        text: &str,
+        config: &LlmConfig,
+    ) -> Result<Vec<Value>, Error>;
+}
diff --git a/crates/nvisy-rig/src/backend/retry.rs b/crates/nvisy-rig/src/backend/retry.rs
new file mode 100644
index 0000000..ebc262f
--- /dev/null
+++ b/crates/nvisy-rig/src/backend/retry.rs
@@ -0,0 +1,142 @@
+//! Retry policy with exponential backoff.
+
+use std::future::Future;
+use std::time::Duration;
+
+use nvisy_core::Error;
+
+/// Exponential backoff retry policy.
+#[derive(Debug, Clone)]
+pub struct RetryPolicy {
+    /// Maximum number of retries (default: 3).
+    pub max_retries: u32,
+    /// Initial backoff duration (default: 300ms).
+    pub initial_backoff: Duration,
+    /// Multiplicative backoff factor (default: 2.0).
+    pub backoff_factor: f64,
+    /// Maximum backoff duration cap (default: 5s).
+    pub max_backoff: Duration,
+}
+
+impl Default for RetryPolicy {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl RetryPolicy {
+    /// Create a retry policy with default settings.
+    pub fn new() -> Self {
+        Self {
+            max_retries: 3,
+            initial_backoff: Duration::from_millis(300),
+            backoff_factor: 2.0,
+            max_backoff: Duration::from_secs(5),
+        }
+    }
+
+    /// Execute an async closure with retry on retryable errors.
+    pub async fn execute<F, Fut, T>(&self, operation: F) -> Result<T, Error>
+    where
+        F: Fn() -> Fut,
+        Fut: Future<Output = Result<T, Error>>,
+    {
+        let mut attempts = 0u32;
+        let mut backoff = self.initial_backoff;
+
+        loop {
+            match operation().await {
+                Ok(val) => return Ok(val),
+                Err(err) => {
+                    if !err.is_retryable() || attempts >= self.max_retries {
+                        return Err(err);
+                    }
+
+                    attempts += 1;
+                    tracing::warn!(
+                        attempt = attempts,
+                        max_retries = self.max_retries,
+                        backoff_ms = backoff.as_millis() as u64,
+                        error = %err,
+                        "retrying after transient error"
+                    );
+
+                    tokio::time::sleep(backoff).await;
+
+                    backoff = Duration::from_secs_f64(
+                        (backoff.as_secs_f64() * self.backoff_factor).min(self.max_backoff.as_secs_f64()),
+                    );
+                }
+            }
+        }
+    }
+
+    /// Return the number of retries that were consumed during the last
+    /// [`execute`](Self::execute) call. This is tracked externally by the
+    /// caller; here we just expose a helper to compute attempts from the
+    /// backoff state if needed.
+    pub fn max_retries(&self) -> u32 {
+        self.max_retries
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::sync::atomic::{AtomicU32, Ordering};
+
+    #[tokio::test]
+    async fn succeeds_on_first_try() {
+        let policy = RetryPolicy::new();
+        let result = policy.execute(|| async { Ok::<_, Error>(42) }).await;
+        assert_eq!(result.unwrap(), 42);
+    }
+
+    #[tokio::test]
+    async fn retries_on_retryable_error() {
+        let counter = AtomicU32::new(0);
+        let policy = RetryPolicy {
+            max_retries: 3,
+            initial_backoff: Duration::from_millis(1),
+            backoff_factor: 1.0,
+            max_backoff: Duration::from_millis(1),
+        };
+
+        let result = policy
+            .execute(|| {
+                let attempt = counter.fetch_add(1, Ordering::SeqCst);
+                async move {
+                    if attempt < 2 {
+                        Err(Error::connection("transient", "test", true))
+                    } else {
+                        Ok(42)
+                    }
+                }
+            })
+            .await;
+
+        assert_eq!(result.unwrap(), 42);
+        assert_eq!(counter.load(Ordering::SeqCst), 3);
+    }
+
+    #[tokio::test]
+    async fn does_not_retry_non_retryable() {
+        let counter = AtomicU32::new(0);
+        let policy = RetryPolicy {
+            max_retries: 3,
+            initial_backoff: Duration::from_millis(1),
+            backoff_factor: 1.0,
+            max_backoff: Duration::from_millis(1),
+        };
+
+        let result: Result<i32, _> = policy
+            .execute(|| {
+                counter.fetch_add(1, Ordering::SeqCst);
+                async { Err(Error::validation("bad input", "test")) }
+            })
+            .await;
+
+        assert!(result.is_err());
+        assert_eq!(counter.load(Ordering::SeqCst), 1);
+    }
+}
diff --git a/crates/nvisy-rig/src/bridge/mod.rs b/crates/nvisy-rig/src/bridge/mod.rs
new file mode 100644
index 0000000..eba3185
--- /dev/null
+++ b/crates/nvisy-rig/src/bridge/mod.rs
@@ -0,0 +1,131 @@
+//! Core bridge between rig-core and the [`LlmBackend`] trait.
+
+pub mod prompt;
+pub mod response;
+
+pub use prompt::PromptBuilder;
+pub use response::{EntityParser, ResponseParser};
+
+use std::sync::atomic::{AtomicU32, Ordering};
+
+use serde_json::Value;
+
+use rig::completion::CompletionModel;
+
+use nvisy_core::Error;
+
+use crate::backend::{LlmBackend, LlmConfig};
+use crate::backend::ErrorMapper;
+use crate::backend::UsageTracker;
+use crate::backend::RetryPolicy;
+
+/// Configuration for a [`RigBackend`].
+#[derive(Debug, Clone)]
+pub struct RigBackendConfig {
+    /// Sampling temperature (default: 0.1).
+    pub temperature: f64,
+    /// Maximum output tokens (default: 4096).
+    pub max_tokens: u64,
+    /// Retry policy for transient errors.
+    pub retry: RetryPolicy,
+}
+
+impl Default for RigBackendConfig {
+    fn default() -> Self {
+        Self {
+            temperature: 0.1,
+            max_tokens: 4096,
+            retry: RetryPolicy::new(),
+        }
+    }
+}
+
+/// Production [`LlmBackend`] implementation wrapping a rig-core
+/// [`CompletionModel`].
+pub struct RigBackend<M> {
+    model: M,
+    config: RigBackendConfig,
+    tracker: UsageTracker,
+}
+
+impl<M: CompletionModel> RigBackend<M> {
+    /// Create a new backend with the given model and configuration.
+    pub fn new(model: M, config: RigBackendConfig) -> Self {
+        Self {
+            model,
+            config,
+            tracker: UsageTracker::new(),
+        }
+    }
+
+    /// Access the usage tracker for this backend.
+    pub fn tracker(&self) -> &UsageTracker {
+        &self.tracker
+    }
+
+    /// Send a single completion request to the model.
+    async fn send_request(
+        &self,
+        user_prompt: &str,
+        system_prompt: Option<&str>,
+    ) -> Result<(String, rig::completion::Usage), Error> {
+        let mut builder = self
+            .model
+            .completion_request(user_prompt)
+            .temperature(self.config.temperature)
+            .max_tokens(self.config.max_tokens);
+
+        if let Some(preamble) = system_prompt {
+            builder = builder.preamble(preamble.to_string());
+        }
+
+        let response = builder.send().await.map_err(ErrorMapper::from_completion)?;
+        let text = ResponseParser::extract_text(&response)?;
+        Ok((text, response.usage))
+    }
+}
+
+#[async_trait::async_trait]
+impl<M> LlmBackend for RigBackend<M>
+where
+    M: CompletionModel + Send + Sync + 'static,
+{
+    #[tracing::instrument(skip_all, fields(text_len = text.len()))]
+    async fn detect_text(
+        &self,
+        text: &str,
+        config: &LlmConfig,
+    ) -> Result<Vec<Value>, Error> {
+        let user_prompt = PromptBuilder::new(config).build(text);
+        let system_prompt = config.system_prompt.as_deref();
+
+        let call_count = AtomicU32::new(0);
+        let result = self
+            .config
+            .retry
+            .execute(|| {
+                call_count.fetch_add(1, Ordering::Relaxed);
+                self.send_request(&user_prompt, system_prompt)
+            })
+            .await;
+
+        // Actual retries = total calls - 1 (the first attempt is not a retry).
+        let actual_retries = call_count.load(Ordering::Relaxed).saturating_sub(1);
+
+        match result {
+            Ok((response_text, usage)) => {
+                self.tracker.record(&usage, actual_retries);
+
+                tracing::debug!(
+                    input_tokens = usage.input_tokens,
+                    output_tokens = usage.output_tokens,
+                    retries = actual_retries,
+                    "LLM request completed"
+                );
+
+                ResponseParser::parse_entities(&response_text)
+            }
+            Err(e) => Err(e),
+        }
+    }
+}
diff --git a/crates/nvisy-rig/src/bridge/prompt.rs b/crates/nvisy-rig/src/bridge/prompt.rs
new file mode 100644
index 0000000..d84b6af
--- /dev/null
+++ b/crates/nvisy-rig/src/bridge/prompt.rs
@@ -0,0 +1,67 @@
+//! Prompt construction for LLM entity detection.
+
+use crate::backend::LlmConfig;
+
+/// Builds user prompts for entity detection requests.
+pub struct PromptBuilder<'a> {
+    entity_types: &'a [String],
+    confidence_threshold: f64,
+}
+
+impl<'a> PromptBuilder<'a> {
+    /// Create a prompt builder from an [`LlmConfig`].
+    pub fn new(config: &'a LlmConfig) -> Self {
+        Self {
+            entity_types: &config.entity_types,
+            confidence_threshold: config.confidence_threshold,
+        }
+    }
+
+    /// Build the user prompt for the given text.
+    pub fn build(&self, text: &str) -> String {
+        let types_hint = if self.entity_types.is_empty() {
+            "all entity types".to_string()
+        } else {
+            self.entity_types.join(", ")
+        };
+
+        format!(
+            "Detect entities of types [{types_hint}] with minimum confidence \
+             {threshold:.2} in the following text. Return a JSON array of objects \
+             with keys: category, entity_type, value, confidence, start_offset, \
+             end_offset.\n\n---\n{text}\n---",
+            types_hint = types_hint,
+            threshold = self.confidence_threshold,
+            text = text,
+        )
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn builds_prompt_with_entity_types() {
+        let config = LlmConfig {
+            entity_types: vec!["PERSON".into(), "SSN".into()],
+            confidence_threshold: 0.7,
+            system_prompt: None,
+        };
+        let prompt = PromptBuilder::new(&config).build("Hello world");
+        assert!(prompt.contains("PERSON, SSN"));
+        assert!(prompt.contains("0.70"));
+        assert!(prompt.contains("Hello world"));
+    }
+
+    #[test]
+    fn builds_prompt_without_entity_types() {
+        let config = LlmConfig {
+            entity_types: vec![],
+            confidence_threshold: 0.5,
+            system_prompt: None,
+        };
+        let prompt = PromptBuilder::new(&config).build("test");
+        assert!(prompt.contains("all entity types"));
+    }
+}
diff --git a/crates/nvisy-rig/src/bridge/response.rs b/crates/nvisy-rig/src/bridge/response.rs
new file mode 100644
index 0000000..fc1bdd7
--- /dev/null
+++ b/crates/nvisy-rig/src/bridge/response.rs
@@ -0,0 +1,293 @@
+//! Response parsing for LLM completions.
+
+use std::str::FromStr;
+
+use serde_json::Value;
+
+use rig::completion::{AssistantContent, CompletionResponse};
+
+use nvisy_core::Error;
+use nvisy_ontology::entity::{DetectionMethod, Entity, EntityCategory, EntityKind};
+use nvisy_ontology::location::{Location, TextLocation};
+
+/// Extracts text and parses JSON from LLM completion responses.
+pub struct ResponseParser;
+
+impl ResponseParser {
+    /// Extract the first text content from a completion response.
+    pub fn extract_text<T>(response: &CompletionResponse<T>) -> Result<String, Error> {
+        let texts: Vec<&str> = response
+            .choice
+            .iter()
+            .filter_map(|c| match c {
+                AssistantContent::Text(t) => Some(t.text.as_str()),
+                _ => None,
+            })
+            .collect();
+
+        if texts.is_empty() {
+            return Err(Error::runtime(
+                "LLM response contained no text content",
+                "rig",
+                false,
+            ));
+        }
+
+        Ok(texts.join("\n"))
+    }
+
+    /// Parse a JSON entity array from LLM text output.
+    ///
+    /// Handles multiple formats:
+    /// - Raw JSON array: `[{...}, ...]`
+    /// - Markdown-fenced: `` ```json\n[...]\n``` ``
+    /// - Single object: `{...}` (wrapped in array)
+    /// - Empty / "no entities" / "none": returns empty vec
+    pub fn parse_entities(text: &str) -> Result<Vec<Value>, Error> {
+        let trimmed = text.trim();
+
+        // Handle empty or "no entities" responses.
+        if trimmed.is_empty()
+            || trimmed.eq_ignore_ascii_case("none")
+            || trimmed.eq_ignore_ascii_case("no entities")
+            || trimmed == "[]"
+        {
+            return Ok(Vec::new());
+        }
+
+        // Try to extract JSON from markdown fences.
+        let json_str = extract_fenced_json(trimmed).unwrap_or(trimmed);
+
+        // Try parsing as array.
+        if let Ok(Value::Array(arr)) = serde_json::from_str(json_str) {
+            return Ok(arr);
+        }
+
+        // Try parsing as single object.
+        if let Ok(obj @ Value::Object(_)) = serde_json::from_str(json_str) {
+            return Ok(vec![obj]);
+        }
+
+        // Try to find embedded JSON array in the text.
+        if let Some(start) = trimmed.find('[') {
+            if let Some(end) = trimmed.rfind(']') {
+                if start < end {
+                    let substr = &trimmed[start..=end];
+                    if let Ok(Value::Array(arr)) = serde_json::from_str(substr) {
+                        return Ok(arr);
+                    }
+                }
+            }
+        }
+
+        Err(Error::runtime(
+            format!("Failed to parse LLM response as JSON entities: {}", truncate(trimmed, 200)),
+            "rig",
+            false,
+        ))
+    }
+}
+
+/// Parse raw JSON dicts from an LLM backend into [`Entity`] values.
+///
+/// Moved from the former `parse.rs` free function `parse_llm_entities`.
+pub struct EntityParser;
+
+impl EntityParser {
+    /// Parse raw JSON dicts into [`Entity`] values.
+    ///
+    /// Expected dict keys: `category`, `entity_type`, `value`, `confidence`,
+    /// and optionally `start_offset` / `end_offset`.
+    pub fn parse(raw: &[Value]) -> Result<Vec<Entity>, Error> {
+        let mut entities = Vec::new();
+
+        for item in raw {
+            let obj = item.as_object().ok_or_else(|| {
+                Error::validation("Expected JSON object in LLM results".to_string(), "llm-parse")
+            })?;
+
+            let category_str = obj
+                .get("category")
+                .and_then(Value::as_str)
+                .ok_or_else(|| Error::validation("Missing 'category'".to_string(), "llm-parse"))?;
+
+            let category = match category_str {
+                "pii" => EntityCategory::Pii,
+                "phi" => EntityCategory::Phi,
+                "financial" => EntityCategory::Financial,
+                "credentials" => EntityCategory::Credentials,
+                other => EntityCategory::Custom(other.to_string()),
+            };
+
+            let entity_type_str = obj
+                .get("entity_type")
+                .and_then(Value::as_str)
+                .ok_or_else(|| {
+                    Error::validation("Missing 'entity_type'".to_string(), "llm-parse")
+                })?;
+
+            let entity_kind = match EntityKind::from_str(entity_type_str) {
+                Ok(ek) => ek,
+                Err(_) => {
+                    tracing::warn!(
+                        entity_type = entity_type_str,
+                        "unknown entity type from LLM, dropping"
+                    );
+                    continue;
+                }
+            };
+
+            let value = obj
+                .get("value")
+                .and_then(Value::as_str)
+                .ok_or_else(|| Error::validation("Missing 'value'".to_string(), "llm-parse"))?;
+
+            let confidence = obj
+                .get("confidence")
+                .and_then(Value::as_f64)
+                .ok_or_else(|| {
+                    Error::validation("Missing 'confidence'".to_string(), "llm-parse")
+                })?;
+
+            let start_offset = obj
+                .get("start_offset")
+                .and_then(Value::as_u64)
+                .map(|v| v as usize)
+                .unwrap_or(0);
+
+            let end_offset = obj
+                .get("end_offset")
+                .and_then(Value::as_u64)
+                .map(|v| v as usize)
+                .unwrap_or(0);
+
+            let entity = Entity::new(
+                category,
+                entity_kind,
+                value,
+                DetectionMethod::ContextualNlp,
+                confidence,
+            )
+            .with_location(Location::Text(TextLocation {
+                start_offset,
+                end_offset,
+                ..Default::default()
+            }));
+
+            entities.push(entity);
+        }
+
+        Ok(entities)
+    }
+}
+
+/// Extract JSON content from markdown fences.
+fn extract_fenced_json(text: &str) -> Option<&str> {
+    // Look for ```json ... ``` or ``` ... ```
+    let start_marker = if let Some(pos) = text.find("```json") {
+        pos + "```json".len()
+    } else if let Some(pos) = text.find("```") {
+        pos + "```".len()
+    } else {
+        return None;
+    };
+
+    let rest = &text[start_marker..];
+    // Skip optional newline after opening fence.
+    let rest = rest.strip_prefix('\n').unwrap_or(rest);
+
+    let end = rest.find("```")?;
+    let content = rest[..end].trim();
+
+    if content.is_empty() {
+        None
+    } else {
+        Some(content)
+    }
+}
+
+/// Truncate a string for display in error messages.
+fn truncate(s: &str, max_len: usize) -> &str {
+    if s.len() <= max_len {
+        s
+    } else {
+        // Find a valid char boundary
+        let mut end = max_len;
+        while end > 0 && !s.is_char_boundary(end) {
+            end -= 1;
+        }
+        &s[..end]
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    #[test]
+    fn parse_entities_raw_array() {
+        let text = r#"[{"category":"pii","entity_type":"email_address","value":"a@b.com","confidence":0.9,"start_offset":0,"end_offset":7}]"#;
+        let result = ResponseParser::parse_entities(text).unwrap();
+        assert_eq!(result.len(), 1);
+    }
+
+    #[test]
+    fn parse_entities_fenced() {
+        let text = "```json\n[{\"category\":\"pii\",\"entity_type\":\"email_address\",\"value\":\"a@b.com\",\"confidence\":0.9}]\n```";
+        let result = ResponseParser::parse_entities(text).unwrap();
+        assert_eq!(result.len(), 1);
+    }
+
+    #[test]
+    fn parse_entities_single_object() {
+        let text = r#"{"category":"pii","entity_type":"email_address","value":"a@b.com","confidence":0.9}"#;
+        let result = ResponseParser::parse_entities(text).unwrap();
+        assert_eq!(result.len(), 1);
+    }
+
+    #[test]
+    fn parse_entities_empty() {
+        assert!(ResponseParser::parse_entities("").unwrap().is_empty());
+        assert!(ResponseParser::parse_entities("none").unwrap().is_empty());
+        assert!(ResponseParser::parse_entities("[]").unwrap().is_empty());
+        assert!(ResponseParser::parse_entities("No entities").unwrap().is_empty());
+    }
+
+    #[test]
+    fn parse_entities_embedded_array() {
+        let text = "Here are the entities:\n[{\"key\":\"val\"}]\nDone.";
+        let result = ResponseParser::parse_entities(text).unwrap();
+        assert_eq!(result.len(), 1);
+    }
+
+    #[test]
+    fn entity_parser_basic() {
+        let raw = vec![json!({
+            "category": "credentials",
+            "entity_type": "api_key",
+            "value": "SECRET",
+            "confidence": 0.92,
+            "start_offset": 9,
+            "end_offset": 15
+        })];
+
+        let entities = EntityParser::parse(&raw).unwrap();
+        assert_eq!(entities.len(), 1);
+        assert_eq!(entities[0].value, "SECRET");
+        assert_eq!(entities[0].confidence, 0.92);
+    }
+
+    #[test]
+    fn entity_parser_unknown_type_skipped() {
+        let raw = vec![json!({
+            "category": "pii",
+            "entity_type": "unknown_thing_xyz",
+            "value": "test",
+            "confidence": 0.5
+        })];
+
+        let entities = EntityParser::parse(&raw).unwrap();
+        assert!(entities.is_empty());
+    }
+}
diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs
index 4dc2bfd..10f6b13 100644
--- a/crates/nvisy-rig/src/lib.rs
+++ b/crates/nvisy-rig/src/lib.rs
@@ -2,8 +2,12 @@
 #![cfg_attr(docsrs, feature(doc_cfg))]
 #![doc = include_str!("../README.md")]
 
-mod backend;
-mod parse;
+pub mod backend;
+pub mod bridge;
+pub mod agent;
 
+pub mod prelude;
+
+// Flat re-exports for ergonomics.
 pub use backend::{LlmBackend, LlmConfig};
-pub use parse::parse_llm_entities;
+pub use bridge::{EntityParser, RigBackend, RigBackendConfig};
diff --git a/crates/nvisy-rig/src/prelude.rs b/crates/nvisy-rig/src/prelude.rs
new file mode 100644
index 0000000..9d633b3
--- /dev/null
+++ b/crates/nvisy-rig/src/prelude.rs
@@ -0,0 +1,5 @@
+//! Convenience re-exports.
+
+pub use crate::backend::{LlmBackend, LlmConfig, ContextWindow, RetryPolicy, UsageStats, UsageTracker};
+pub use crate::bridge::{EntityParser, RigBackend, RigBackendConfig};
+pub use crate::agent::{EntityList, RawEntity, StructuredBackend};

From 6a747f8260d42204d38f10cf5c7da1833287d628 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Wed, 25 Feb 2026 16:34:13 +0100
Subject: [PATCH 07/24] refactor(rig): add BaseAgent<M>, NerAgent, tool traits,
 ResponseParser wrapper

Introduce layered agent architecture:
- BaseAgent<M> with builder handling rig-core's typestate for tools
- NerAgent<M> replacing StructuredAgent with NER-specific prompts
- OcrProvider/CvProvider traits in their respective agent modules
- ResponseParser as Cow<str> wrapper with extract_text constructor
- Stub modules for ocr, cv, and redactor agents

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 Cargo.lock                                    |   5 +-
 crates/nvisy-rig/Cargo.toml                   |   7 +-
 crates/nvisy-rig/src/agent/base.rs            | 212 ++++++++++++++++++
 .../{backend/compact.rs => agent/context.rs}  |   1 +
 crates/nvisy-rig/src/agent/cv/mod.rs          |  28 +++
 crates/nvisy-rig/src/agent/mod.rs             | 150 +------------
 crates/nvisy-rig/src/agent/ner/mod.rs         |  52 +++++
 crates/nvisy-rig/src/agent/ner/output.rs      |  30 +++
 crates/nvisy-rig/src/agent/ner/prompt.rs      |  32 +++
 crates/nvisy-rig/src/agent/ocr/mod.rs         |  17 ++
 crates/nvisy-rig/src/agent/redactor/mod.rs    |   3 +
 crates/nvisy-rig/src/backend.rs               |  34 ---
 crates/nvisy-rig/src/backend/error.rs         |  49 ++--
 crates/nvisy-rig/src/backend/mod.rs           |  50 ++---
 crates/nvisy-rig/src/backend/retry.rs         | 175 ++++++++-------
 crates/nvisy-rig/src/bridge/mod.rs            | 128 +++++------
 crates/nvisy-rig/src/bridge/prompt.rs         |  75 +++++--
 crates/nvisy-rig/src/bridge/response.rs       | 120 +++++-----
 crates/nvisy-rig/src/lib.rs                   |  10 +-
 crates/nvisy-rig/src/parse.rs                 |  88 --------
 crates/nvisy-rig/src/prelude.rs               |   5 +-
 21 files changed, 712 insertions(+), 559 deletions(-)
 create mode 100644 crates/nvisy-rig/src/agent/base.rs
 rename crates/nvisy-rig/src/{backend/compact.rs => agent/context.rs} (99%)
 create mode 100644 crates/nvisy-rig/src/agent/cv/mod.rs
 create mode 100644 crates/nvisy-rig/src/agent/ner/mod.rs
 create mode 100644 crates/nvisy-rig/src/agent/ner/output.rs
 create mode 100644 crates/nvisy-rig/src/agent/ner/prompt.rs
 create mode 100644 crates/nvisy-rig/src/agent/ocr/mod.rs
 create mode 100644 crates/nvisy-rig/src/agent/redactor/mod.rs
 delete mode 100644 crates/nvisy-rig/src/backend.rs
 delete mode 100644 crates/nvisy-rig/src/parse.rs

diff --git a/Cargo.lock b/Cargo.lock
index 9045465..9a70d91 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2777,6 +2777,7 @@ dependencies = [
  "serde_json",
  "strum",
  "tokio",
+ "tower",
  "tracing",
  "uuid",
 ]
@@ -2840,12 +2841,14 @@ name = "nvisy-rig"
 version = "0.1.0"
 dependencies = [
  "async-trait",
- "nvisy-codec",
  "nvisy-core",
  "nvisy-ontology",
  "rig-core",
+ "schemars",
  "serde",
  "serde_json",
+ "tokio",
+ "tower",
  "tracing",
 ]
 
diff --git a/crates/nvisy-rig/Cargo.toml b/crates/nvisy-rig/Cargo.toml
index 03afc94..0ab3f6b 100644
--- a/crates/nvisy-rig/Cargo.toml
+++ b/crates/nvisy-rig/Cargo.toml
@@ -21,7 +21,6 @@ rustdoc-args = ["--cfg", "docsrs"]
 
 [dependencies]
 # Internal crates
-nvisy-codec = { workspace = true, features = [] }
 nvisy-core = { workspace = true, features = [] }
 nvisy-ontology = { workspace = true, features = [] }
 
@@ -30,10 +29,16 @@ rig-core = { workspace = true, features = ["derive"] }
 
 # Async runtime
 async-trait = { workspace = true, features = [] }
+tokio = { workspace = true, features = ["time"] }
+tower = { workspace = true, features = ["retry", "timeout", "util"] }
 
 # (De)serialization
 serde = { workspace = true, features = ["derive"] }
 serde_json = { workspace = true, features = [] }
+schemars = { workspace = true, features = [] }
 
 # Observability
 tracing = { workspace = true, features = [] }
+
+[dev-dependencies]
+tokio = { workspace = true, features = ["macros", "rt"] }
diff --git a/crates/nvisy-rig/src/agent/base.rs b/crates/nvisy-rig/src/agent/base.rs
new file mode 100644
index 0000000..0c1975d
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/base.rs
@@ -0,0 +1,212 @@
+//! Internal foundation agent wrapping rig-core's `Agent<M>`.
+
+use std::sync::Arc;
+
+use rig::agent::{Agent, AgentBuilder};
+use rig::completion::{CompletionModel, TypedPrompt};
+use rig::tool::{Tool, ToolDyn};
+use schemars::JsonSchema;
+use serde::de::DeserializeOwned;
+use serde::Serialize;
+
+use nvisy_core::Error;
+
+use crate::backend::{from_completion, UsageTracker};
+use crate::bridge::ResponseParser;
+
+use super::context::ContextWindow;
+
+/// Configuration for a [`BaseAgent`].
+#[derive(Debug, Clone)]
+pub struct BaseAgentConfig {
+    /// Sampling temperature (default: 0.1).
+    pub temperature: f64,
+    /// Maximum output tokens (default: 4096).
+    pub max_tokens: u64,
+    /// Optional context window for chunking large inputs.
+    pub context_window: Option<ContextWindow>,
+}
+
+impl Default for BaseAgentConfig {
+    fn default() -> Self {
+        Self {
+            temperature: 0.1,
+            max_tokens: 4096,
+            context_window: None,
+        }
+    }
+}
+
+/// Internal foundation agent wrapping rig-core's [`Agent<M>`].
+///
+/// Not exported — specialized agents (e.g. `NerAgent`) compose this.
+pub(crate) struct BaseAgent<M: CompletionModel> {
+    agent: Agent<M>,
+    model: Arc<M>,
+    config: BaseAgentConfig,
+    tracker: UsageTracker,
+}
+
+/// Builder for [`BaseAgent`] that handles rig-core's typestate for tools.
+pub(crate) struct BaseAgentBuilder<M: CompletionModel> {
+    model: Arc<M>,
+    config: BaseAgentConfig,
+    preamble: Option<String>,
+    tools: Vec<Box<dyn ToolDyn>>,
+}
+
+impl<M: CompletionModel> BaseAgentBuilder<M> {
+    /// Create a new builder with the given model and config.
+    pub fn new(model: M, config: BaseAgentConfig) -> Self {
+        Self {
+            model: Arc::new(model),
+            config,
+            preamble: None,
+            tools: Vec::new(),
+        }
+    }
+
+    /// Set the system prompt (preamble).
+    pub fn preamble(mut self, preamble: impl Into<String>) -> Self {
+        self.preamble = Some(preamble.into());
+        self
+    }
+
+    /// Add a tool to the agent.
+    pub fn tool(mut self, tool: impl Tool + 'static) -> Self {
+        self.tools.push(Box::new(tool));
+        self
+    }
+
+    /// Build the [`BaseAgent`].
+    pub fn build(self) -> BaseAgent<M> {
+        let agent = if self.tools.is_empty() {
+            let mut builder = AgentBuilder::new((*self.model).clone())
+                .temperature(self.config.temperature)
+                .max_tokens(self.config.max_tokens);
+
+            if let Some(ref preamble) = self.preamble {
+                builder = builder.preamble(preamble);
+            }
+
+            builder.build()
+        } else {
+            let mut builder = AgentBuilder::new((*self.model).clone())
+                .temperature(self.config.temperature)
+                .max_tokens(self.config.max_tokens)
+                .tools(self.tools);
+
+            if let Some(ref preamble) = self.preamble {
+                builder = builder.preamble(preamble);
+            }
+
+            builder.build()
+        };
+
+        BaseAgent {
+            agent,
+            model: self.model,
+            config: self.config,
+            tracker: UsageTracker::new(),
+        }
+    }
+}
+
+impl<M: CompletionModel> BaseAgent<M> {
+    /// Create a new builder.
+    pub fn builder(model: M, config: BaseAgentConfig) -> BaseAgentBuilder<M> {
+        BaseAgentBuilder::new(model, config)
+    }
+
+    /// Access the usage tracker.
+    pub fn tracker(&self) -> &UsageTracker {
+        &self.tracker
+    }
+
+    /// Access the config.
+    pub fn config(&self) -> &BaseAgentConfig {
+        &self.config
+    }
+
+    /// Structured output prompt: tries `prompt_typed`, falls back to text +
+    /// `parse_json`.
+    #[tracing::instrument(skip_all, fields(mode = "structured"))]
+    pub async fn prompt_structured<T>(&self, prompt: &str, system: Option<&str>) -> Result<T, Error>
+    where
+        T: DeserializeOwned + Default + JsonSchema + Serialize + Send + Sync,
+    {
+        // Try structured output first.
+        let structured_result: Result<T, _> = self.agent.prompt_typed::<T>(prompt).await;
+
+        match structured_result {
+            Ok(value) => {
+                tracing::debug!("structured output succeeded");
+                Ok(value)
+            }
+            Err(structured_err) => {
+                tracing::warn!(
+                    error = %structured_err,
+                    "structured output failed, falling back to text-based parsing"
+                );
+                self.prompt_text_and_parse(prompt, system).await
+            }
+        }
+    }
+
+    /// Raw text completion, records usage.
+    #[tracing::instrument(skip_all, fields(mode = "text"))]
+    pub async fn prompt_text(&self, prompt: &str, system: Option<&str>) -> Result<String, Error> {
+        let mut builder = self
+            .model
+            .completion_request(prompt)
+            .temperature(self.config.temperature)
+            .max_tokens(self.config.max_tokens);
+
+        if let Some(preamble) = system {
+            builder = builder.preamble(preamble.to_string());
+        }
+
+        let response = builder.send().await.map_err(from_completion)?;
+        let parsed = ResponseParser::extract_text(&response)?;
+        self.tracker.record(&response.usage, 0);
+        Ok(parsed.as_str().to_owned())
+    }
+
+    /// Splits text via [`ContextWindow`], runs `prompt_structured` per chunk,
+    /// and flattens results.
+    #[tracing::instrument(skip_all, fields(mode = "chunked"))]
+    pub async fn prompt_chunked<T, F>(
+        &self,
+        text: &str,
+        build_prompt: F,
+        system: Option<&str>,
+    ) -> Result<Vec<T>, Error>
+    where
+        T: DeserializeOwned + Default + JsonSchema + Serialize + Send + Sync,
+        F: Fn(&str) -> String,
+        Vec<T>: Default,
+    {
+        let chunks = match &self.config.context_window {
+            Some(cw) => cw.split_to_fit(text),
+            None => vec![text],
+        };
+
+        let mut all_results = Vec::new();
+        for chunk in chunks {
+            let prompt = build_prompt(chunk);
+            let chunk_results: Vec<T> = self.prompt_structured(&prompt, system).await?;
+            all_results.extend(chunk_results);
+        }
+
+        Ok(all_results)
+    }
+
+    /// Text-based fallback: complete → extract text → parse JSON.
+    async fn prompt_text_and_parse<T>(&self, prompt: &str, system: Option<&str>) -> Result<T, Error>
+    where
+        T: DeserializeOwned + Default,
+    {
+        let text = self.prompt_text(prompt, system).await?;
+        ResponseParser::from_text(text.as_str()).parse_json()
+    }
+}
diff --git a/crates/nvisy-rig/src/backend/compact.rs b/crates/nvisy-rig/src/agent/context.rs
similarity index 99%
rename from crates/nvisy-rig/src/backend/compact.rs
rename to crates/nvisy-rig/src/agent/context.rs
index 6e1aca9..42d22a6 100644
--- a/crates/nvisy-rig/src/backend/compact.rs
+++ b/crates/nvisy-rig/src/agent/context.rs
@@ -1,6 +1,7 @@
 //! Context window management for LLM token limits.
 
 /// Manages token budget estimation, splitting, and truncation.
+#[derive(Debug, Clone)]
 pub struct ContextWindow {
     /// Maximum tokens the model supports.
     max_tokens: usize,
diff --git a/crates/nvisy-rig/src/agent/cv/mod.rs b/crates/nvisy-rig/src/agent/cv/mod.rs
new file mode 100644
index 0000000..567aa54
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/cv/mod.rs
@@ -0,0 +1,28 @@
+//! Computer vision agent for face/plate/signature detection (VLM + CV).
+//!
+//! Placeholder agent — implementation deferred to a future PR.
+
+use async_trait::async_trait;
+
+use nvisy_core::Error;
+
+/// A single computer-vision detection result.
+#[derive(Debug, Clone)]
+pub struct CvDetection {
+    /// Label for the detected object (e.g. "face", "license_plate").
+    pub label: String,
+    /// Detection confidence (0.0 -- 1.0).
+    pub confidence: f64,
+    /// Bounding box: `[x, y, width, height]` in pixels.
+    pub bbox: [f64; 4],
+}
+
+/// Trait for computer-vision capabilities (face/plate/signature detection).
+///
+/// Consumers implement this trait to supply object detection from images.
+/// No rig-core types leak through this trait.
+#[async_trait]
+pub trait CvProvider: Send + Sync {
+    /// Detect objects in an image.
+    async fn detect_objects(&self, image_data: &[u8]) -> Result<Vec<CvDetection>, Error>;
+}
diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs
index 5bca2ee..4d7548d 100644
--- a/crates/nvisy-rig/src/agent/mod.rs
+++ b/crates/nvisy-rig/src/agent/mod.rs
@@ -1,144 +1,12 @@
-//! Structured output backend using rig-core's JSON schema enforcement.
+//! Agent system: base agent, specialized agents, and tool-provider traits.
 
-use schemars::JsonSchema;
-use serde::{Deserialize, Serialize};
-use serde_json::Value;
+mod base;
+mod context;
 
-use std::sync::Arc;
+pub mod ner;
+pub mod ocr;
+pub mod cv;
+pub mod redactor;
 
-use rig::agent::{Agent, AgentBuilder};
-use rig::completion::{CompletionModel, TypedPrompt};
-
-use nvisy_core::Error;
-
-use crate::backend::{LlmBackend, LlmConfig};
-use crate::bridge::prompt::PromptBuilder;
-use crate::bridge::response::ResponseParser;
-use crate::bridge::RigBackendConfig;
-use crate::backend::ErrorMapper;
-use crate::backend::UsageTracker;
-
-/// A list of entities returned by structured output.
-#[derive(Debug, Deserialize, Serialize, JsonSchema)]
-pub struct EntityList {
-    /// Detected entities.
-    pub entities: Vec<RawEntity>,
-}
-
-/// A single raw entity from structured LLM output.
-#[derive(Debug, Deserialize, Serialize, JsonSchema)]
-pub struct RawEntity {
-    /// Broad classification (e.g. "pii", "phi", "financial", "credentials").
-    pub category: String,
-    /// Specific entity type (e.g. "email_address", "person_name").
-    pub entity_type: String,
-    /// The matched text value.
-    pub value: String,
-    /// Detection confidence (0.0 -- 1.0).
-    pub confidence: f64,
-    /// Start byte offset in the input text.
-    pub start_offset: usize,
-    /// End byte offset in the input text.
-    pub end_offset: usize,
-}
-
-impl RawEntity {
-    /// Convert this raw entity into a [`serde_json::Value`] dict.
-    pub fn into_value(self) -> Value {
-        serde_json::json!({
-            "category": self.category,
-            "entity_type": self.entity_type,
-            "value": self.value,
-            "confidence": self.confidence,
-            "start_offset": self.start_offset,
-            "end_offset": self.end_offset,
-        })
-    }
-}
-
-/// Backend that uses rig-core's structured output (JSON schema enforcement)
-/// for entity detection.
-///
-/// Falls back to text-based parsing if structured output fails.
-pub struct StructuredBackend<M: CompletionModel> {
-    agent: Agent<M>,
-    model: Arc<M>,
-    config: RigBackendConfig,
-    tracker: UsageTracker,
-}
-
-impl<M: CompletionModel> StructuredBackend<M> {
-    /// Create a new structured backend.
-    pub fn new(model: M, config: RigBackendConfig) -> Self {
-        let model = Arc::new(model);
-        let agent = AgentBuilder::new((*model).clone())
-            .temperature(config.temperature)
-            .max_tokens(config.max_tokens)
-            .build();
-
-        Self {
-            agent,
-            model,
-            config,
-            tracker: UsageTracker::new(),
-        }
-    }
-
-    /// Access the usage tracker for this backend.
-    pub fn tracker(&self) -> &UsageTracker {
-        &self.tracker
-    }
-}
-
-#[async_trait::async_trait]
-impl<M> LlmBackend for StructuredBackend<M>
-where
-    M: CompletionModel + Send + Sync + 'static,
-{
-    #[tracing::instrument(skip_all, fields(text_len = text.len(), mode = "structured"))]
-    async fn detect_text(
-        &self,
-        text: &str,
-        config: &LlmConfig,
-    ) -> Result<Vec<Value>, Error> {
-        let user_prompt = PromptBuilder::new(config).build(text);
-
-        // Try structured output first.
-        let structured_result: Result<EntityList, _> = self
-            .agent
-            .prompt_typed::<EntityList>(&user_prompt)
-            .await;
-
-        match structured_result {
-            Ok(entity_list) => {
-                tracing::debug!(
-                    count = entity_list.entities.len(),
-                    "structured output succeeded"
-                );
-                Ok(entity_list.entities.into_iter().map(RawEntity::into_value).collect())
-            }
-            Err(structured_err) => {
-                tracing::warn!(
-                    error = %structured_err,
-                    "structured output failed, falling back to text-based parsing"
-                );
-
-                // Fall back to text-based completion using the model directly.
-                let mut builder = self
-                    .model
-                    .completion_request(&user_prompt)
-                    .temperature(self.config.temperature)
-                    .max_tokens(self.config.max_tokens);
-
-                if let Some(ref preamble) = config.system_prompt {
-                    builder = builder.preamble(preamble.clone());
-                }
-
-                let response = builder.send().await.map_err(ErrorMapper::from_completion)?;
-                let response_text = ResponseParser::extract_text(&response)?;
-                self.tracker.record(&response.usage, 0);
-                ResponseParser::parse_entities(&response_text)
-            }
-        }
-    }
-}
+pub(crate) use base::{BaseAgent, BaseAgentBuilder, BaseAgentConfig};
+pub(crate) use context::ContextWindow;
diff --git a/crates/nvisy-rig/src/agent/ner/mod.rs b/crates/nvisy-rig/src/agent/ner/mod.rs
new file mode 100644
index 0000000..50091b1
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/ner/mod.rs
@@ -0,0 +1,52 @@
+//! NER (Named Entity Recognition) agent for textual PII/entity detection.
+
+mod output;
+mod prompt;
+
+pub use output::{RawEntities, RawEntity};
+
+use rig::completion::CompletionModel;
+
+use nvisy_core::Error;
+
+use crate::backend::{DetectionConfig, UsageTracker};
+
+use super::base::{BaseAgent, BaseAgentConfig};
+use prompt::{NerPromptBuilder, NER_SYSTEM_PROMPT};
+
+/// Agent for textual PII/entity detection using LLM + NER.
+///
+/// Wraps [`BaseAgent`] with NER-specific prompts and output types.
+pub struct NerAgent<M: CompletionModel> {
+    base: BaseAgent<M>,
+}
+
+impl<M: CompletionModel> NerAgent<M> {
+    /// Create a new NER agent with the given model and config.
+    pub fn new(model: M, config: BaseAgentConfig) -> Self {
+        let base = BaseAgent::builder(model, config)
+            .preamble(NER_SYSTEM_PROMPT)
+            .build();
+        Self { base }
+    }
+
+    /// Access the usage tracker.
+    pub fn tracker(&self) -> &UsageTracker {
+        self.base.tracker()
+    }
+
+    /// Detect entities in text using structured output with text-based fallback.
+    #[tracing::instrument(skip_all, fields(text_len = text.len(), mode = "ner"))]
+    pub async fn detect(
+        &self,
+        text: &str,
+        config: &DetectionConfig,
+    ) -> Result<Vec<RawEntity>, Error> {
+        let prompt = NerPromptBuilder::new(config).build(text);
+        let result: RawEntities = self
+            .base
+            .prompt_structured(&prompt, config.system_prompt.as_deref())
+            .await?;
+        Ok(result.entities)
+    }
+}
diff --git a/crates/nvisy-rig/src/agent/ner/output.rs b/crates/nvisy-rig/src/agent/ner/output.rs
new file mode 100644
index 0000000..b802490
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/ner/output.rs
@@ -0,0 +1,30 @@
+//! Structured output types for NER entity detection.
+
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+
+use nvisy_ontology::entity::{EntityCategory, EntityKind};
+
+/// A list of raw entities returned by structured output.
+#[derive(Debug, Default, Deserialize, Serialize, JsonSchema)]
+pub struct RawEntities {
+    /// Detected entities.
+    pub entities: Vec<RawEntity>,
+}
+
+/// A single raw entity from structured LLM output.
+#[derive(Debug, Deserialize, Serialize, JsonSchema)]
+pub struct RawEntity {
+    /// Broad classification.
+    pub category: EntityCategory,
+    /// Specific entity type.
+    pub entity_type: EntityKind,
+    /// The matched text value.
+    pub value: String,
+    /// Detection confidence (0.0 -- 1.0).
+    pub confidence: f64,
+    /// Start byte offset in the input text.
+    pub start_offset: usize,
+    /// End byte offset in the input text.
+    pub end_offset: usize,
+}
diff --git a/crates/nvisy-rig/src/agent/ner/prompt.rs b/crates/nvisy-rig/src/agent/ner/prompt.rs
new file mode 100644
index 0000000..49ccce1
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/ner/prompt.rs
@@ -0,0 +1,32 @@
+//! NER-specific prompt construction.
+
+use crate::backend::DetectionConfig;
+use crate::bridge::PromptBuilder;
+
+/// Builds user prompts for NER entity detection.
+pub(crate) struct NerPromptBuilder<'a> {
+    inner: PromptBuilder<'a>,
+}
+
+impl<'a> NerPromptBuilder<'a> {
+    /// Create a prompt builder from a [`DetectionConfig`].
+    pub fn new(config: &'a DetectionConfig) -> Self {
+        Self {
+            inner: PromptBuilder::new(config),
+        }
+    }
+
+    /// Build the user prompt for the given text.
+    pub fn build(&self, text: &str) -> String {
+        self.inner.build(text)
+    }
+}
+
+/// Default system prompt for NER detection.
+pub(super) const NER_SYSTEM_PROMPT: &str = "\
+You are a precise named-entity recognition system. \
+Identify personally identifiable information (PII), protected health information (PHI), \
+financial data, and credentials in the provided text. \
+Return results as a JSON array of objects with keys: \
+category, entity_type, value, confidence, start_offset, end_offset. \
+If no entities are found, return an empty array [].";
diff --git a/crates/nvisy-rig/src/agent/ocr/mod.rs b/crates/nvisy-rig/src/agent/ocr/mod.rs
new file mode 100644
index 0000000..ec2b015
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/ocr/mod.rs
@@ -0,0 +1,17 @@
+//! OCR agent for vision + text extraction (VLM + OCR).
+//!
+//! Placeholder agent — implementation deferred to a future PR.
+
+use async_trait::async_trait;
+
+use nvisy_core::Error;
+
+/// Trait for OCR capabilities that can be provided to VLM agents.
+///
+/// Consumers implement this trait to supply text extraction from images.
+/// No rig-core types leak through this trait.
+#[async_trait]
+pub trait OcrProvider: Send + Sync {
+    /// Extract text from an image.
+    async fn extract_text(&self, image_data: &[u8]) -> Result<String, Error>;
+}
diff --git a/crates/nvisy-rig/src/agent/redactor/mod.rs b/crates/nvisy-rig/src/agent/redactor/mod.rs
new file mode 100644
index 0000000..74139f8
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/redactor/mod.rs
@@ -0,0 +1,3 @@
+//! Redactor agent for context-aware semantic redaction.
+//!
+//! Placeholder — implementation deferred to a future PR.
diff --git a/crates/nvisy-rig/src/backend.rs b/crates/nvisy-rig/src/backend.rs
deleted file mode 100644
index 1bdaee8..0000000
--- a/crates/nvisy-rig/src/backend.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-//! LLM backend trait and configuration.
-
-use serde_json::Value;
-
-use nvisy_core::Error;
-
-/// Configuration passed to an [`LlmBackend`] implementation.
-#[derive(Debug, Clone)]
-pub struct LlmConfig {
-    /// Entity type labels to detect (e.g., `["PERSON", "SSN"]`).
-    pub entity_types: Vec<String>,
-    /// Minimum confidence score to include a detection (0.0 -- 1.0).
-    pub confidence_threshold: f64,
-    /// System prompt override (if empty, the backend uses its default).
-    pub system_prompt: Option<String>,
-}
-
-/// Backend trait for LLM-based entity detection.
-///
-/// Implementations call an LLM service (e.g. via `rig-core`) and return
-/// raw JSON results.  Entity construction from the raw dicts is handled
-/// by the detection layers.
-#[async_trait::async_trait]
-pub trait LlmBackend: Send + Sync + 'static {
-    /// Detect entities in text using an LLM, returning raw dicts.
-    ///
-    /// Each dict should contain: `category`, `entity_type`, `value`,
-    /// `confidence`, `start_offset`, `end_offset`.
-    async fn detect_text(
-        &self,
-        text: &str,
-        config: &LlmConfig,
-    ) -> Result<Vec<Value>, Error>;
-}
diff --git a/crates/nvisy-rig/src/backend/error.rs b/crates/nvisy-rig/src/backend/error.rs
index df7ec35..67790fb 100644
--- a/crates/nvisy-rig/src/backend/error.rs
+++ b/crates/nvisy-rig/src/backend/error.rs
@@ -4,33 +4,28 @@ use rig::completion::CompletionError;
 
 use nvisy_core::Error;
 
-/// Maps [`CompletionError`] variants to [`nvisy_core::Error`].
-pub struct ErrorMapper;
-
-impl ErrorMapper {
-    /// Convert a rig-core [`CompletionError`] into a [`nvisy_core::Error`].
-    pub fn from_completion(err: CompletionError) -> Error {
-        match err {
-            CompletionError::HttpError(e) => {
-                Error::connection(format!("HTTP error: {e}"), "rig", true)
-            }
-            CompletionError::JsonError(e) => {
-                Error::new(nvisy_core::ErrorKind::Serialization, format!("JSON error: {e}"))
-                    .with_component("rig")
-            }
-            CompletionError::ProviderError(msg) => {
-                let retryable = is_retryable_provider_error(&msg);
-                Error::connection(format!("Provider error: {msg}"), "rig", retryable)
-            }
-            CompletionError::ResponseError(msg) => {
-                Error::runtime(format!("Response error: {msg}"), "rig", false)
-            }
-            CompletionError::RequestError(e) => {
-                Error::validation(format!("Request error: {e}"), "rig")
-            }
-            CompletionError::UrlError(e) => {
-                Error::validation(format!("URL error: {e}"), "rig")
-            }
+/// Convert a rig-core [`CompletionError`] into a [`nvisy_core::Error`].
+pub fn from_completion(err: CompletionError) -> Error {
+    match err {
+        CompletionError::HttpError(e) => {
+            Error::connection(format!("HTTP error: {e}"), "rig", true)
+        }
+        CompletionError::JsonError(e) => {
+            Error::new(nvisy_core::ErrorKind::Serialization, format!("JSON error: {e}"))
+                .with_component("rig")
+        }
+        CompletionError::ProviderError(msg) => {
+            let retryable = is_retryable_provider_error(&msg);
+            Error::connection(format!("Provider error: {msg}"), "rig", retryable)
+        }
+        CompletionError::ResponseError(msg) => {
+            Error::runtime(format!("Response error: {msg}"), "rig", false)
+        }
+        CompletionError::RequestError(e) => {
+            Error::validation(format!("Request error: {e}"), "rig")
+        }
+        CompletionError::UrlError(e) => {
+            Error::validation(format!("URL error: {e}"), "rig")
         }
     }
 }
diff --git a/crates/nvisy-rig/src/backend/mod.rs b/crates/nvisy-rig/src/backend/mod.rs
index d838250..8cf85cc 100644
--- a/crates/nvisy-rig/src/backend/mod.rs
+++ b/crates/nvisy-rig/src/backend/mod.rs
@@ -1,44 +1,38 @@
-//! LLM backend trait and configuration.
+//! LLM backend types, error mapping, and Tower retry policy.
 
-pub mod compact;
-pub mod error;
-pub mod metrics;
-pub mod retry;
+mod error;
+mod metrics;
+mod retry;
 
-pub use compact::ContextWindow;
-pub use error::ErrorMapper;
+pub use error::from_completion;
 pub use metrics::{UsageStats, UsageTracker};
 pub use retry::RetryPolicy;
 
 use serde_json::Value;
 
-use nvisy_core::Error;
+use nvisy_ontology::entity::EntityKind;
 
-/// Configuration passed to an [`LlmBackend`] implementation.
+/// Configuration passed to a detection backend.
 #[derive(Debug, Clone)]
-pub struct LlmConfig {
-    /// Entity type labels to detect (e.g., `["PERSON", "SSN"]`).
-    pub entity_types: Vec<String>,
+pub struct DetectionConfig {
+    /// Entity kinds to detect (empty = all).
+    pub entity_kinds: Vec<EntityKind>,
     /// Minimum confidence score to include a detection (0.0 -- 1.0).
     pub confidence_threshold: f64,
     /// System prompt override (if empty, the backend uses its default).
     pub system_prompt: Option<String>,
 }
 
-/// Backend trait for LLM-based entity detection.
-///
-/// Implementations call an LLM service (e.g. via `rig-core`) and return
-/// raw JSON results.  Entity construction from the raw dicts is handled
-/// by the detection layers.
-#[async_trait::async_trait]
-pub trait LlmBackend: Send + Sync + 'static {
-    /// Detect entities in text using an LLM, returning raw dicts.
-    ///
-    /// Each dict should contain: `category`, `entity_type`, `value`,
-    /// `confidence`, `start_offset`, `end_offset`.
-    async fn detect_text(
-        &self,
-        text: &str,
-        config: &LlmConfig,
-    ) -> Result<Vec<Value>, Error>;
+/// Request type for the Tower-based detection service.
+#[derive(Debug, Clone)]
+pub struct DetectionRequest {
+    pub text: String,
+    pub config: DetectionConfig,
+}
+
+/// Response type for the Tower-based detection service.
+#[derive(Debug, Clone)]
+pub struct DetectionResponse {
+    pub entities: Vec<Value>,
+    pub usage: Option<rig::completion::Usage>,
 }
diff --git a/crates/nvisy-rig/src/backend/retry.rs b/crates/nvisy-rig/src/backend/retry.rs
index ebc262f..0a76ed7 100644
--- a/crates/nvisy-rig/src/backend/retry.rs
+++ b/crates/nvisy-rig/src/backend/retry.rs
@@ -1,11 +1,12 @@
-//! Retry policy with exponential backoff.
+//! Tower retry policy with exponential backoff.
 
-use std::future::Future;
 use std::time::Duration;
 
 use nvisy_core::Error;
 
-/// Exponential backoff retry policy.
+use super::{DetectionRequest, DetectionResponse};
+
+/// Tower retry policy with exponential backoff for retryable errors.
 #[derive(Debug, Clone)]
 pub struct RetryPolicy {
     /// Maximum number of retries (default: 3).
@@ -16,6 +17,10 @@ pub struct RetryPolicy {
     pub backoff_factor: f64,
     /// Maximum backoff duration cap (default: 5s).
     pub max_backoff: Duration,
+    /// Current attempt counter (internal).
+    attempts: u32,
+    /// Current backoff (internal).
+    current_backoff: Duration,
 }
 
 impl Default for RetryPolicy {
@@ -32,111 +37,117 @@ impl RetryPolicy {
             initial_backoff: Duration::from_millis(300),
             backoff_factor: 2.0,
             max_backoff: Duration::from_secs(5),
+            attempts: 0,
+            current_backoff: Duration::from_millis(300),
         }
     }
 
-    /// Execute an async closure with retry on retryable errors.
-    pub async fn execute<F, Fut, T>(&self, operation: F) -> Result<T, Error>
-    where
-        F: Fn() -> Fut,
-        Fut: Future<Output = Result<T, Error>>,
-    {
-        let mut attempts = 0u32;
-        let mut backoff = self.initial_backoff;
-
-        loop {
-            match operation().await {
-                Ok(val) => return Ok(val),
-                Err(err) => {
-                    if !err.is_retryable() || attempts >= self.max_retries {
-                        return Err(err);
-                    }
-
-                    attempts += 1;
-                    tracing::warn!(
-                        attempt = attempts,
-                        max_retries = self.max_retries,
-                        backoff_ms = backoff.as_millis() as u64,
-                        error = %err,
-                        "retrying after transient error"
-                    );
-
-                    tokio::time::sleep(backoff).await;
+    pub fn max_retries(&self) -> u32 {
+        self.max_retries
+    }
+}
 
-                    backoff = Duration::from_secs_f64(
-                        (backoff.as_secs_f64() * self.backoff_factor).min(self.max_backoff.as_secs_f64()),
-                    );
+impl tower::retry::Policy<DetectionRequest, DetectionResponse, Error> for RetryPolicy {
+    type Future = std::pin::Pin<Box<dyn std::future::Future<Output = ()> + Send>>;
+
+    fn retry(
+        &mut self,
+        _req: &mut DetectionRequest,
+        result: &mut Result<DetectionResponse, Error>,
+    ) -> Option<Self::Future> {
+        match result {
+            Ok(_) => None,
+            Err(err) => {
+                if !err.is_retryable() || self.attempts >= self.max_retries {
+                    return None;
                 }
+
+                self.attempts += 1;
+                let backoff = self.current_backoff;
+
+                tracing::warn!(
+                    attempt = self.attempts,
+                    max_retries = self.max_retries,
+                    backoff_ms = backoff.as_millis() as u64,
+                    error = %err,
+                    "retrying after transient error"
+                );
+
+                self.current_backoff = Duration::from_secs_f64(
+                    (self.current_backoff.as_secs_f64() * self.backoff_factor)
+                        .min(self.max_backoff.as_secs_f64()),
+                );
+
+                Some(Box::pin(async move {
+                    tokio::time::sleep(backoff).await;
+                }))
             }
         }
     }
 
-    /// Return the number of retries that were consumed during the last
-    /// [`execute`](Self::execute) call. This is tracked externally by the
-    /// caller; here we just expose a helper to compute attempts from the
-    /// backoff state if needed.
-    pub fn max_retries(&self) -> u32 {
-        self.max_retries
+    fn clone_request(&mut self, req: &DetectionRequest) -> Option<DetectionRequest> {
+        Some(req.clone())
     }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use std::sync::atomic::{AtomicU32, Ordering};
-
-    #[tokio::test]
-    async fn succeeds_on_first_try() {
-        let policy = RetryPolicy::new();
-        let result = policy.execute(|| async { Ok::<_, Error>(42) }).await;
-        assert_eq!(result.unwrap(), 42);
-    }
+    use tower::retry::Policy;
 
     #[tokio::test]
     async fn retries_on_retryable_error() {
-        let counter = AtomicU32::new(0);
-        let policy = RetryPolicy {
-            max_retries: 3,
-            initial_backoff: Duration::from_millis(1),
-            backoff_factor: 1.0,
-            max_backoff: Duration::from_millis(1),
+        let mut policy = RetryPolicy::new();
+        let mut req = DetectionRequest {
+            text: "test".into(),
+            config: crate::backend::DetectionConfig {
+                entity_kinds: vec![],
+                confidence_threshold: 0.5,
+                system_prompt: None,
+            },
         };
+        let mut result: Result<DetectionResponse, Error> =
+            Err(Error::connection("transient", "test", true));
 
-        let result = policy
-            .execute(|| {
-                let attempt = counter.fetch_add(1, Ordering::SeqCst);
-                async move {
-                    if attempt < 2 {
-                        Err(Error::connection("transient", "test", true))
-                    } else {
-                        Ok(42)
-                    }
-                }
-            })
-            .await;
-
-        assert_eq!(result.unwrap(), 42);
-        assert_eq!(counter.load(Ordering::SeqCst), 3);
+        let fut = policy.retry(&mut req, &mut result);
+        assert!(fut.is_some());
     }
 
     #[tokio::test]
     async fn does_not_retry_non_retryable() {
-        let counter = AtomicU32::new(0);
-        let policy = RetryPolicy {
-            max_retries: 3,
-            initial_backoff: Duration::from_millis(1),
-            backoff_factor: 1.0,
-            max_backoff: Duration::from_millis(1),
+        let mut policy = RetryPolicy::new();
+        let mut req = DetectionRequest {
+            text: "test".into(),
+            config: crate::backend::DetectionConfig {
+                entity_kinds: vec![],
+                confidence_threshold: 0.5,
+                system_prompt: None,
+            },
         };
+        let mut result: Result<DetectionResponse, Error> =
+            Err(Error::validation("bad input", "test"));
+
+        let fut = policy.retry(&mut req, &mut result);
+        assert!(fut.is_none());
+    }
 
-        let result: Result<i32, _> = policy
-            .execute(|| {
-                counter.fetch_add(1, Ordering::SeqCst);
-                async { Err(Error::validation("bad input", "test")) }
-            })
-            .await;
+    #[tokio::test]
+    async fn does_not_retry_success() {
+        let mut policy = RetryPolicy::new();
+        let mut req = DetectionRequest {
+            text: "test".into(),
+            config: crate::backend::DetectionConfig {
+                entity_kinds: vec![],
+                confidence_threshold: 0.5,
+                system_prompt: None,
+            },
+        };
+        let mut result: Result<DetectionResponse, Error> = Ok(DetectionResponse {
+            entities: vec![],
+            usage: None,
+        });
 
-        assert!(result.is_err());
-        assert_eq!(counter.load(Ordering::SeqCst), 1);
+        let fut = policy.retry(&mut req, &mut result);
+        assert!(fut.is_none());
     }
 }
diff --git a/crates/nvisy-rig/src/bridge/mod.rs b/crates/nvisy-rig/src/bridge/mod.rs
index eba3185..7579cfa 100644
--- a/crates/nvisy-rig/src/bridge/mod.rs
+++ b/crates/nvisy-rig/src/bridge/mod.rs
@@ -1,23 +1,22 @@
-//! Core bridge between rig-core and the [`LlmBackend`] trait.
+//! Core bridge between rig-core and the Tower-based detection service.
 
-pub mod prompt;
-pub mod response;
+mod prompt;
+mod response;
 
 pub use prompt::PromptBuilder;
 pub use response::{EntityParser, ResponseParser};
 
-use std::sync::atomic::{AtomicU32, Ordering};
-
-use serde_json::Value;
+use std::sync::Arc;
+use std::task::{Context, Poll};
 
 use rig::completion::CompletionModel;
 
 use nvisy_core::Error;
 
-use crate::backend::{LlmBackend, LlmConfig};
-use crate::backend::ErrorMapper;
-use crate::backend::UsageTracker;
-use crate::backend::RetryPolicy;
+use crate::backend::{
+    from_completion, DetectionRequest, DetectionResponse,
+    RetryPolicy, UsageTracker,
+};
 
 /// Configuration for a [`RigBackend`].
 #[derive(Debug, Clone)]
@@ -40,21 +39,22 @@ impl Default for RigBackendConfig {
     }
 }
 
-/// Production [`LlmBackend`] implementation wrapping a rig-core
-/// [`CompletionModel`].
+/// Production detection service wrapping a rig-core [`CompletionModel`].
+///
+/// Implements `tower::Service<DetectionRequest>`.
 pub struct RigBackend<M> {
-    model: M,
+    model: Arc<M>,
     config: RigBackendConfig,
-    tracker: UsageTracker,
+    tracker: Arc<UsageTracker>,
 }
 
 impl<M: CompletionModel> RigBackend<M> {
     /// Create a new backend with the given model and configuration.
     pub fn new(model: M, config: RigBackendConfig) -> Self {
         Self {
-            model,
+            model: Arc::new(model),
             config,
-            tracker: UsageTracker::new(),
+            tracker: Arc::new(UsageTracker::new()),
         }
     }
 
@@ -62,70 +62,54 @@ impl<M: CompletionModel> RigBackend<M> {
     pub fn tracker(&self) -> &UsageTracker {
         &self.tracker
     }
-
-    /// Send a single completion request to the model.
-    async fn send_request(
-        &self,
-        user_prompt: &str,
-        system_prompt: Option<&str>,
-    ) -> Result<(String, rig::completion::Usage), Error> {
-        let mut builder = self
-            .model
-            .completion_request(user_prompt)
-            .temperature(self.config.temperature)
-            .max_tokens(self.config.max_tokens);
-
-        if let Some(preamble) = system_prompt {
-            builder = builder.preamble(preamble.to_string());
-        }
-
-        let response = builder.send().await.map_err(ErrorMapper::from_completion)?;
-        let text = ResponseParser::extract_text(&response)?;
-        Ok((text, response.usage))
-    }
 }
 
-#[async_trait::async_trait]
-impl<M> LlmBackend for RigBackend<M>
+impl<M> tower::Service<DetectionRequest> for RigBackend<M>
 where
     M: CompletionModel + Send + Sync + 'static,
 {
-    #[tracing::instrument(skip_all, fields(text_len = text.len()))]
-    async fn detect_text(
-        &self,
-        text: &str,
-        config: &LlmConfig,
-    ) -> Result<Vec<Value>, Error> {
-        let user_prompt = PromptBuilder::new(config).build(text);
-        let system_prompt = config.system_prompt.as_deref();
-
-        let call_count = AtomicU32::new(0);
-        let result = self
-            .config
-            .retry
-            .execute(|| {
-                call_count.fetch_add(1, Ordering::Relaxed);
-                self.send_request(&user_prompt, system_prompt)
-            })
-            .await;
+    type Response = DetectionResponse;
+    type Error = Error;
+    type Future = std::pin::Pin<Box<dyn std::future::Future<Output = Result<DetectionResponse, Error>> + Send>>;
 
-        // Actual retries = total calls - 1 (the first attempt is not a retry).
-        let actual_retries = call_count.load(Ordering::Relaxed).saturating_sub(1);
+    fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
+        Poll::Ready(Ok(()))
+    }
 
-        match result {
-            Ok((response_text, usage)) => {
-                self.tracker.record(&usage, actual_retries);
+    fn call(&mut self, req: DetectionRequest) -> Self::Future {
+        let user_prompt = PromptBuilder::new(&req.config).build(&req.text);
+        let system_prompt = req.config.system_prompt.clone();
+        let model = Arc::clone(&self.model);
+        let temperature = self.config.temperature;
+        let max_tokens = self.config.max_tokens;
+        let tracker = Arc::clone(&self.tracker);
+
+        Box::pin(async move {
+            let mut builder = model
+                .completion_request(&user_prompt)
+                .temperature(temperature)
+                .max_tokens(max_tokens);
+
+            if let Some(ref preamble) = system_prompt {
+                builder = builder.preamble(preamble.clone());
+            }
 
-                tracing::debug!(
-                    input_tokens = usage.input_tokens,
-                    output_tokens = usage.output_tokens,
-                    retries = actual_retries,
-                    "LLM request completed"
-                );
+            let response = builder.send().await.map_err(from_completion)?;
+            let parsed = ResponseParser::extract_text(&response)?;
+            let entities = parsed.parse_json()?;
 
-                ResponseParser::parse_entities(&response_text)
-            }
-            Err(e) => Err(e),
-        }
+            tracker.record(&response.usage, 0);
+
+            tracing::debug!(
+                input_tokens = response.usage.input_tokens,
+                output_tokens = response.usage.output_tokens,
+                "LLM request completed"
+            );
+
+            Ok(DetectionResponse {
+                entities,
+                usage: Some(response.usage),
+            })
+        })
     }
 }
diff --git a/crates/nvisy-rig/src/bridge/prompt.rs b/crates/nvisy-rig/src/bridge/prompt.rs
index d84b6af..5a6ba88 100644
--- a/crates/nvisy-rig/src/bridge/prompt.rs
+++ b/crates/nvisy-rig/src/bridge/prompt.rs
@@ -1,38 +1,57 @@
 //! Prompt construction for LLM entity detection.
 
-use crate::backend::LlmConfig;
+use std::fmt::Display;
+
+use nvisy_ontology::entity::EntityKind;
+
+use crate::backend::DetectionConfig;
+
+/// Instruction prefix for the user prompt.
+const DETECT_PREFIX: &str = "Detect entities of types";
+
+/// Fallback when no specific entity types are requested.
+const ALL_TYPES_HINT: &str = "all entity types";
+
+/// Suffix describing the expected response format.
+const RESPONSE_FORMAT: &str = "\
+Return a JSON array of objects with keys: \
+category, entity_type, value, confidence, start_offset, end_offset.";
 
 /// Builds user prompts for entity detection requests.
 pub struct PromptBuilder<'a> {
-    entity_types: &'a [String],
+    entity_kinds: &'a [EntityKind],
     confidence_threshold: f64,
 }
 
 impl<'a> PromptBuilder<'a> {
-    /// Create a prompt builder from an [`LlmConfig`].
-    pub fn new(config: &'a LlmConfig) -> Self {
+    /// Create a prompt builder from a [`DetectionConfig`].
+    pub fn new(config: &'a DetectionConfig) -> Self {
         Self {
-            entity_types: &config.entity_types,
+            entity_kinds: &config.entity_kinds,
             confidence_threshold: config.confidence_threshold,
         }
     }
 
     /// Build the user prompt for the given text.
     pub fn build(&self, text: &str) -> String {
-        let types_hint = if self.entity_types.is_empty() {
-            "all entity types".to_string()
+        self.build_for(self.entity_kinds, text)
+    }
+
+    /// Build a prompt using an arbitrary slice of displayable entity labels.
+    ///
+    /// This allows callers to pass any `Vec<E>` where `E: Display` — for
+    /// example custom string labels or [`EntityKind`] variants.
+    pub fn build_for<E: Display>(&self, entity_types: &[E], text: &str) -> String {
+        let types_hint = if entity_types.is_empty() {
+            ALL_TYPES_HINT.to_string()
         } else {
-            self.entity_types.join(", ")
+            entity_types.iter().map(|e| e.to_string()).collect::<Vec<_>>().join(", ")
         };
 
         format!(
-            "Detect entities of types [{types_hint}] with minimum confidence \
-             {threshold:.2} in the following text. Return a JSON array of objects \
-             with keys: category, entity_type, value, confidence, start_offset, \
-             end_offset.\n\n---\n{text}\n---",
-            types_hint = types_hint,
+            "{DETECT_PREFIX} [{types_hint}] with minimum confidence \
+             {threshold:.2} in the following text. {RESPONSE_FORMAT}\n\n---\n{text}\n---",
             threshold = self.confidence_threshold,
-            text = text,
         )
     }
 }
@@ -42,26 +61,40 @@ mod tests {
     use super::*;
 
     #[test]
-    fn builds_prompt_with_entity_types() {
-        let config = LlmConfig {
-            entity_types: vec!["PERSON".into(), "SSN".into()],
+    fn builds_prompt_with_entity_kinds() {
+        let config = DetectionConfig {
+            entity_kinds: vec![EntityKind::PersonName, EntityKind::GovernmentId],
             confidence_threshold: 0.7,
             system_prompt: None,
         };
         let prompt = PromptBuilder::new(&config).build("Hello world");
-        assert!(prompt.contains("PERSON, SSN"));
+        assert!(prompt.contains("person_name, government_id"));
         assert!(prompt.contains("0.70"));
         assert!(prompt.contains("Hello world"));
     }
 
     #[test]
-    fn builds_prompt_without_entity_types() {
-        let config = LlmConfig {
-            entity_types: vec![],
+    fn builds_prompt_without_entity_kinds() {
+        let config = DetectionConfig {
+            entity_kinds: vec![],
             confidence_threshold: 0.5,
             system_prompt: None,
         };
         let prompt = PromptBuilder::new(&config).build("test");
         assert!(prompt.contains("all entity types"));
     }
+
+    #[test]
+    fn build_for_with_string_labels() {
+        let config = DetectionConfig {
+            entity_kinds: vec![],
+            confidence_threshold: 0.8,
+            system_prompt: None,
+        };
+        let builder = PromptBuilder::new(&config);
+        let labels = vec!["PERSON", "SSN"];
+        let prompt = builder.build_for(&labels, "some text");
+        assert!(prompt.contains("PERSON, SSN"));
+        assert!(prompt.contains("0.80"));
+    }
 }
diff --git a/crates/nvisy-rig/src/bridge/response.rs b/crates/nvisy-rig/src/bridge/response.rs
index fc1bdd7..3ab7684 100644
--- a/crates/nvisy-rig/src/bridge/response.rs
+++ b/crates/nvisy-rig/src/bridge/response.rs
@@ -1,7 +1,9 @@
 //! Response parsing for LLM completions.
 
+use std::borrow::Cow;
 use std::str::FromStr;
 
+use serde::de::DeserializeOwned;
 use serde_json::Value;
 
 use rig::completion::{AssistantContent, CompletionResponse};
@@ -10,12 +12,16 @@ use nvisy_core::Error;
 use nvisy_ontology::entity::{DetectionMethod, Entity, EntityCategory, EntityKind};
 use nvisy_ontology::location::{Location, TextLocation};
 
-/// Extracts text and parses JSON from LLM completion responses.
-pub struct ResponseParser;
+/// Extracted text from an LLM completion response.
+///
+/// Wraps the raw text content and provides parsing accessors.
+pub struct ResponseParser<'a> {
+    text: Cow<'a, str>,
+}
 
-impl ResponseParser {
-    /// Extract the first text content from a completion response.
-    pub fn extract_text<T>(response: &CompletionResponse<T>) -> Result<String, Error> {
+impl<'a> ResponseParser<'a> {
+    /// Extract text content from a completion response.
+    pub fn extract_text<T>(response: &CompletionResponse<T>) -> Result<Self, Error> {
         let texts: Vec<&str> = response
             .choice
             .iter()
@@ -33,58 +39,53 @@ impl ResponseParser {
             ));
         }
 
-        Ok(texts.join("\n"))
+        Ok(Self {
+            text: Cow::Owned(texts.join("\n")),
+        })
+    }
+
+    /// Wrap an already-extracted string.
+    pub fn from_text(text: impl Into<Cow<'a, str>>) -> Self {
+        Self { text: text.into() }
+    }
+
+    /// The raw text content.
+    pub fn as_str(&self) -> &str {
+        &self.text
     }
 
-    /// Parse a JSON entity array from LLM text output.
+    /// Parse the text as a JSON array.
     ///
-    /// Handles multiple formats:
-    /// - Raw JSON array: `[{...}, ...]`
-    /// - Markdown-fenced: `` ```json\n[...]\n``` ``
-    /// - Single object: `{...}` (wrapped in array)
-    /// - Empty / "no entities" / "none": returns empty vec
-    pub fn parse_entities(text: &str) -> Result<Vec<Value>, Error> {
-        let trimmed = text.trim();
+    /// Convenience wrapper around [`parse_json`](Self::parse_json).
+    pub fn parse_json_array<T: DeserializeOwned>(&self) -> Result<Vec<T>, Error> {
+        self.parse_json::<Vec<T>>()
+    }
+
+    /// Parse the text as JSON into `T`.
+    ///
+    /// Strips markdown fences if present, then deserializes.
+    /// Empty / "no entities" / "none" responses return `T::default()`.
+    pub fn parse_json<T: DeserializeOwned + Default>(&self) -> Result<T, Error> {
+        let trimmed = self.text.trim();
 
         // Handle empty or "no entities" responses.
         if trimmed.is_empty()
             || trimmed.eq_ignore_ascii_case("none")
             || trimmed.eq_ignore_ascii_case("no entities")
-            || trimmed == "[]"
         {
-            return Ok(Vec::new());
+            return Ok(T::default());
         }
 
         // Try to extract JSON from markdown fences.
         let json_str = extract_fenced_json(trimmed).unwrap_or(trimmed);
 
-        // Try parsing as array.
-        if let Ok(Value::Array(arr)) = serde_json::from_str(json_str) {
-            return Ok(arr);
-        }
-
-        // Try parsing as single object.
-        if let Ok(obj @ Value::Object(_)) = serde_json::from_str(json_str) {
-            return Ok(vec![obj]);
-        }
-
-        // Try to find embedded JSON array in the text.
-        if let Some(start) = trimmed.find('[') {
-            if let Some(end) = trimmed.rfind(']') {
-                if start < end {
-                    let substr = &trimmed[start..=end];
-                    if let Ok(Value::Array(arr)) = serde_json::from_str(substr) {
-                        return Ok(arr);
-                    }
-                }
-            }
-        }
-
-        Err(Error::runtime(
-            format!("Failed to parse LLM response as JSON entities: {}", truncate(trimmed, 200)),
-            "rig",
-            false,
-        ))
+        serde_json::from_str::<T>(json_str).map_err(|e| {
+            Error::runtime(
+                format!("Failed to parse LLM response as JSON: {e}: {}", truncate(trimmed, 200)),
+                "rig",
+                false,
+            )
+        })
     }
 }
 
@@ -226,39 +227,40 @@ mod tests {
     use serde_json::json;
 
     #[test]
-    fn parse_entities_raw_array() {
+    fn parse_json_raw_array() {
         let text = r#"[{"category":"pii","entity_type":"email_address","value":"a@b.com","confidence":0.9,"start_offset":0,"end_offset":7}]"#;
-        let result = ResponseParser::parse_entities(text).unwrap();
+        let parser = ResponseParser::from_text(text);
+        let result = parser.parse_json::<Vec<Value>>().unwrap();
         assert_eq!(result.len(), 1);
     }
 
     #[test]
-    fn parse_entities_fenced() {
+    fn parse_json_fenced() {
         let text = "```json\n[{\"category\":\"pii\",\"entity_type\":\"email_address\",\"value\":\"a@b.com\",\"confidence\":0.9}]\n```";
-        let result = ResponseParser::parse_entities(text).unwrap();
+        let parser = ResponseParser::from_text(text);
+        let result = parser.parse_json::<Vec<Value>>().unwrap();
         assert_eq!(result.len(), 1);
     }
 
     #[test]
-    fn parse_entities_single_object() {
+    fn parse_json_single_object() {
         let text = r#"{"category":"pii","entity_type":"email_address","value":"a@b.com","confidence":0.9}"#;
-        let result = ResponseParser::parse_entities(text).unwrap();
-        assert_eq!(result.len(), 1);
+        let parser = ResponseParser::from_text(text);
+        let result = parser.parse_json::<Value>().unwrap();
+        assert!(result.is_object());
     }
 
     #[test]
-    fn parse_entities_empty() {
-        assert!(ResponseParser::parse_entities("").unwrap().is_empty());
-        assert!(ResponseParser::parse_entities("none").unwrap().is_empty());
-        assert!(ResponseParser::parse_entities("[]").unwrap().is_empty());
-        assert!(ResponseParser::parse_entities("No entities").unwrap().is_empty());
+    fn parse_json_empty() {
+        assert_eq!(ResponseParser::from_text("").parse_json::<Vec<Value>>().unwrap(), Vec::<Value>::new());
+        assert_eq!(ResponseParser::from_text("none").parse_json::<Vec<Value>>().unwrap(), Vec::<Value>::new());
+        assert_eq!(ResponseParser::from_text("No entities").parse_json::<Vec<Value>>().unwrap(), Vec::<Value>::new());
     }
 
     #[test]
-    fn parse_entities_embedded_array() {
-        let text = "Here are the entities:\n[{\"key\":\"val\"}]\nDone.";
-        let result = ResponseParser::parse_entities(text).unwrap();
-        assert_eq!(result.len(), 1);
+    fn as_str_returns_text() {
+        let parser = ResponseParser::from_text("hello world");
+        assert_eq!(parser.as_str(), "hello world");
     }
 
     #[test]
diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs
index 10f6b13..0370153 100644
--- a/crates/nvisy-rig/src/lib.rs
+++ b/crates/nvisy-rig/src/lib.rs
@@ -4,10 +4,14 @@
 
 pub mod backend;
 pub mod bridge;
-pub mod agent;
+pub(crate) mod agent;
 
+#[doc(hidden)]
 pub mod prelude;
 
-// Flat re-exports for ergonomics.
-pub use backend::{LlmBackend, LlmConfig};
+pub use backend::{DetectionConfig, DetectionRequest, DetectionResponse};
 pub use bridge::{EntityParser, RigBackend, RigBackendConfig};
+
+// Tool-provider traits for consumers to implement.
+pub use agent::ocr::OcrProvider;
+pub use agent::cv::{CvDetection, CvProvider};
diff --git a/crates/nvisy-rig/src/parse.rs b/crates/nvisy-rig/src/parse.rs
deleted file mode 100644
index a104082..0000000
--- a/crates/nvisy-rig/src/parse.rs
+++ /dev/null
@@ -1,88 +0,0 @@
-//! LLM result parsing.
-
-use std::str::FromStr;
-
-use serde_json::Value;
-
-use nvisy_core::Error;
-use nvisy_ontology::entity::{DetectionMethod, Entity, EntityCategory, EntityKind};
-use nvisy_ontology::location::{Location, TextLocation};
-
-/// Parse raw JSON dicts from an LLM backend into [`Entity`] values.
-///
-/// Expected dict keys: `category`, `entity_type`, `value`, `confidence`,
-/// and optionally `start_offset` / `end_offset`.
-pub fn parse_llm_entities(raw: &[Value]) -> Result<Vec<Entity>, Error> {
-    let mut entities = Vec::new();
-
-    for item in raw {
-        let obj = item.as_object().ok_or_else(|| {
-            Error::validation("Expected JSON object in LLM results".to_string(), "llm-parse")
-        })?;
-
-        let category_str = obj
-            .get("category")
-            .and_then(Value::as_str)
-            .ok_or_else(|| Error::validation("Missing 'category'".to_string(), "llm-parse"))?;
-
-        let category = match category_str {
-            "pii" => EntityCategory::Pii,
-            "phi" => EntityCategory::Phi,
-            "financial" => EntityCategory::Financial,
-            "credentials" => EntityCategory::Credentials,
-            other => EntityCategory::Custom(other.to_string()),
-        };
-
-        let entity_type_str = obj
-            .get("entity_type")
-            .and_then(Value::as_str)
-            .ok_or_else(|| Error::validation("Missing 'entity_type'".to_string(), "llm-parse"))?;
-
-        let entity_kind = match EntityKind::from_str(entity_type_str) {
-            Ok(ek) => ek,
-            Err(_) => {
-                tracing::warn!(entity_type = entity_type_str, "unknown entity type from LLM, dropping");
-                continue;
-            }
-        };
-
-        let value = obj
-            .get("value")
-            .and_then(Value::as_str)
-            .ok_or_else(|| Error::validation("Missing 'value'".to_string(), "llm-parse"))?;
-
-        let confidence = obj
-            .get("confidence")
-            .and_then(Value::as_f64)
-            .ok_or_else(|| Error::validation("Missing 'confidence'".to_string(), "llm-parse"))?;
-
-        let start_offset = obj
-            .get("start_offset")
-            .and_then(Value::as_u64)
-            .map(|v| v as usize)
-            .unwrap_or(0);
-
-        let end_offset = obj
-            .get("end_offset")
-            .and_then(Value::as_u64)
-            .map(|v| v as usize)
-            .unwrap_or(0);
-
-        let entity = Entity::new(
-            category,
-            entity_kind,
-            value,
-            DetectionMethod::ContextualNlp,
-            confidence,
-        )
-        .with_location(Location::Text(TextLocation {
-            start_offset,
-            end_offset,
-            ..Default::default()
-        }));
-
-        entities.push(entity);
-    }
-
-    Ok(entities)
-}
diff --git a/crates/nvisy-rig/src/prelude.rs b/crates/nvisy-rig/src/prelude.rs
index 9d633b3..2c35c5c 100644
--- a/crates/nvisy-rig/src/prelude.rs
+++ b/crates/nvisy-rig/src/prelude.rs
@@ -1,5 +1,6 @@
 //! Convenience re-exports.
 
-pub use crate::backend::{LlmBackend, LlmConfig, ContextWindow, RetryPolicy, UsageStats, UsageTracker};
+pub use crate::backend::{DetectionConfig, DetectionRequest, DetectionResponse, RetryPolicy, UsageStats, UsageTracker};
 pub use crate::bridge::{EntityParser, RigBackend, RigBackendConfig};
-pub use crate::agent::{EntityList, RawEntity, StructuredBackend};
+pub use crate::agent::ocr::OcrProvider;
+pub use crate::agent::cv::{CvDetection, CvProvider};

From 0e2c5ad918396e4376fb7655b85a6c62f43258f9 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Wed, 25 Feb 2026 18:40:52 +0100
Subject: [PATCH 08/24] feat(rig,ontology): implement OcrAgent, CvAgent,
 RedactorAgent; add RedactionMethod
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement the three remaining stub agents in nvisy-rig:

- OcrAgent: VLM agent with OcrProvider-backed tool, extracts text from
  images and detects entities via OcrPromptBuilder
- CvAgent: VLM agent with CvProvider-backed tool, detects faces/plates/
  signatures via CvPromptBuilder
- RedactorAgent: pure LLM agent that recommends TextRedactionMethod for
  each detected entity via RedactorPromptBuilder

Ontology changes (nvisy-ontology):
- Rename spec/ to specification/
- Split mod.rs into input.rs (*RedactionInput enums + RedactorInput) and
  method.rs (TextRedactionMethod, ImageRedactionMethod,
  AudioRedactionMethod, RedactionMethod)

Rig structural changes (nvisy-rig):
- Rename agent dirs: ner→recognize, ocr→extract, cv→detect
- Flatten agent/mod.rs re-exports (no pub submodules)
- Add PromptBuilder structs for all agents (OcrPromptBuilder,
  CvPromptBuilder, RedactorPromptBuilder)
- Add base64 and thiserror dependencies
- Improve docs and tracing across all agents

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 Cargo.lock                                    |   2 +
 crates/nvisy-core/src/fs/content_kind.rs      |   4 +-
 crates/nvisy-core/src/fs/document_type.rs     |   5 +-
 crates/nvisy-engine/src/apply/image.rs        |   6 +-
 crates/nvisy-engine/src/apply/tabular.rs      |   2 +-
 crates/nvisy-engine/src/apply/text.rs         |   2 +-
 crates/nvisy-identify/Cargo.toml              |   1 +
 crates/nvisy-identify/src/llm/detection.rs    |  88 ++++++++-----
 crates/nvisy-identify/src/policy/audit.rs     |   8 +-
 crates/nvisy-identify/src/policy/evaluate.rs  |   2 +-
 crates/nvisy-identify/src/policy/mod.rs       |   2 +-
 crates/nvisy-identify/src/policy/retention.rs |   8 +-
 crates/nvisy-identify/src/policy/rule.rs      |   2 +-
 crates/nvisy-identify/src/policy/types.rs     |   2 +-
 .../nvisy-ontology/src/entity/annotation.rs   |   6 +-
 crates/nvisy-ontology/src/entity/mod.rs       |   5 +-
 crates/nvisy-ontology/src/entity/model.rs     |   8 +-
 .../nvisy-ontology/src/entity/sensitivity.rs  |   3 +-
 crates/nvisy-ontology/src/lib.rs              |   2 +-
 crates/nvisy-ontology/src/record/mod.rs       |   2 +-
 crates/nvisy-ontology/src/record/review.rs    |   8 +-
 .../{spec/mod.rs => specification/input.rs}   |  39 +++++-
 .../src/specification/method.rs               | 114 +++++++++++++++++
 .../nvisy-ontology/src/specification/mod.rs   |  27 ++++
 crates/nvisy-rig/Cargo.toml                   |   6 +
 crates/nvisy-rig/src/agent/cv/mod.rs          |  28 -----
 crates/nvisy-rig/src/agent/detect/mod.rs      | 117 ++++++++++++++++++
 crates/nvisy-rig/src/agent/detect/output.rs   |  28 +++++
 crates/nvisy-rig/src/agent/detect/prompt.rs   |  66 ++++++++++
 crates/nvisy-rig/src/agent/detect/tool.rs     |  66 ++++++++++
 crates/nvisy-rig/src/agent/extract/mod.rs     | 101 +++++++++++++++
 crates/nvisy-rig/src/agent/extract/output.rs  |  30 +++++
 crates/nvisy-rig/src/agent/extract/prompt.rs  |  63 ++++++++++
 crates/nvisy-rig/src/agent/extract/tool.rs    |  64 ++++++++++
 crates/nvisy-rig/src/agent/mod.rs             |  17 ++-
 crates/nvisy-rig/src/agent/ocr/mod.rs         |  17 ---
 .../src/agent/{ner => recognize}/mod.rs       |  36 +++++-
 .../src/agent/{ner => recognize}/output.rs    |   0
 .../src/agent/{ner => recognize}/prompt.rs    |   0
 crates/nvisy-rig/src/agent/redactor/mod.rs    |  80 +++++++++++-
 crates/nvisy-rig/src/agent/redactor/output.rs |  26 ++++
 crates/nvisy-rig/src/agent/redactor/prompt.rs |  65 ++++++++++
 crates/nvisy-rig/src/lib.rs                   |   9 +-
 crates/nvisy-rig/src/prelude.rs               |  13 +-
 44 files changed, 1043 insertions(+), 137 deletions(-)
 rename crates/nvisy-ontology/src/{spec/mod.rs => specification/input.rs} (63%)
 create mode 100644 crates/nvisy-ontology/src/specification/method.rs
 create mode 100644 crates/nvisy-ontology/src/specification/mod.rs
 delete mode 100644 crates/nvisy-rig/src/agent/cv/mod.rs
 create mode 100644 crates/nvisy-rig/src/agent/detect/mod.rs
 create mode 100644 crates/nvisy-rig/src/agent/detect/output.rs
 create mode 100644 crates/nvisy-rig/src/agent/detect/prompt.rs
 create mode 100644 crates/nvisy-rig/src/agent/detect/tool.rs
 create mode 100644 crates/nvisy-rig/src/agent/extract/mod.rs
 create mode 100644 crates/nvisy-rig/src/agent/extract/output.rs
 create mode 100644 crates/nvisy-rig/src/agent/extract/prompt.rs
 create mode 100644 crates/nvisy-rig/src/agent/extract/tool.rs
 delete mode 100644 crates/nvisy-rig/src/agent/ocr/mod.rs
 rename crates/nvisy-rig/src/agent/{ner => recognize}/mod.rs (53%)
 rename crates/nvisy-rig/src/agent/{ner => recognize}/output.rs (100%)
 rename crates/nvisy-rig/src/agent/{ner => recognize}/prompt.rs (100%)
 create mode 100644 crates/nvisy-rig/src/agent/redactor/output.rs
 create mode 100644 crates/nvisy-rig/src/agent/redactor/prompt.rs

diff --git a/Cargo.lock b/Cargo.lock
index 9a70d91..d491205 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2841,12 +2841,14 @@ name = "nvisy-rig"
 version = "0.1.0"
 dependencies = [
  "async-trait",
+ "base64",
  "nvisy-core",
  "nvisy-ontology",
  "rig-core",
  "schemars",
  "serde",
  "serde_json",
+ "thiserror 2.0.18",
  "tokio",
  "tower",
  "tracing",
diff --git a/crates/nvisy-core/src/fs/content_kind.rs b/crates/nvisy-core/src/fs/content_kind.rs
index 288f488..8811f40 100644
--- a/crates/nvisy-core/src/fs/content_kind.rs
+++ b/crates/nvisy-core/src/fs/content_kind.rs
@@ -12,9 +12,7 @@ use strum::{AsRefStr, Display, EnumIter, EnumString};
 /// This enum represents high-level content categories without knowledge
 /// of specific file extensions or MIME types. The engine's format registry
 /// handles the mapping from extensions/MIME types to content kinds.
-#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
-#[derive(AsRefStr, Display, EnumString, EnumIter)]
-#[derive(Serialize, Deserialize)]
+#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, AsRefStr, Display, EnumString, EnumIter, Serialize, Deserialize)]
 #[strum(serialize_all = "lowercase")]
 #[serde(rename_all = "lowercase")]
 pub enum ContentKind {
diff --git a/crates/nvisy-core/src/fs/document_type.rs b/crates/nvisy-core/src/fs/document_type.rs
index ba0cec7..172e4a9 100644
--- a/crates/nvisy-core/src/fs/document_type.rs
+++ b/crates/nvisy-core/src/fs/document_type.rs
@@ -1,11 +1,12 @@
 //! Document format classification.
 
 use serde::{Deserialize, Serialize};
+use strum::{Display, EnumString};
 
 /// Document format that content can be classified as.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
-#[derive(schemars::JsonSchema)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, EnumString, Serialize, Deserialize, schemars::JsonSchema)]
 #[serde(rename_all = "snake_case")]
+#[strum(serialize_all = "snake_case")]
 pub enum DocumentType {
     /// Plain text (`.txt`, `.log`, etc.).
     Txt,
diff --git a/crates/nvisy-engine/src/apply/image.rs b/crates/nvisy-engine/src/apply/image.rs
index 9a516f3..e39bf87 100644
--- a/crates/nvisy-engine/src/apply/image.rs
+++ b/crates/nvisy-engine/src/apply/image.rs
@@ -9,7 +9,7 @@ use nvisy_codec::transform::{ImageRedaction, ImageRedactionOutput, ImageHandler}
 use nvisy_ontology::entity::Entity;
 use nvisy_ontology::location::Location;
 use nvisy_ontology::record::Redaction;
-use nvisy_ontology::spec::{ImageRedactionInput, RedactionInput};
+use nvisy_ontology::specification::{ImageRedactionInput, RedactionInput};
 use nvisy_core::Error;
 
 /// Convert a `RedactionInput::Image` into a codec [`ImageRedactionOutput`].
@@ -71,7 +71,7 @@ pub(crate) async fn apply_image_doc(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use nvisy_ontology::spec::TextRedactionInput;
+    use nvisy_ontology::specification::TextRedactionInput;
 
     #[test]
     fn image_output_blur() {
@@ -123,7 +123,7 @@ mod tests {
 
     #[test]
     fn image_output_audio_spec_returns_none() {
-        let spec = RedactionInput::Audio(nvisy_ontology::spec::AudioRedactionInput::Silence);
+        let spec = RedactionInput::Audio(nvisy_ontology::specification::AudioRedactionInput::Silence);
         assert_eq!(image_output_from_spec(&spec), None);
     }
 }
diff --git a/crates/nvisy-engine/src/apply/tabular.rs b/crates/nvisy-engine/src/apply/tabular.rs
index 3cb8b2d..5525480 100644
--- a/crates/nvisy-engine/src/apply/tabular.rs
+++ b/crates/nvisy-engine/src/apply/tabular.rs
@@ -8,7 +8,7 @@ use nvisy_codec::document::Document;
 use nvisy_ontology::entity::Entity;
 use nvisy_ontology::location::Location;
 use nvisy_ontology::record::Redaction;
-use nvisy_ontology::spec::{RedactionInput, TextRedactionInput};
+use nvisy_ontology::specification::{RedactionInput, TextRedactionInput};
 use nvisy_core::Error;
 
 pub(crate) async fn apply_tabular_doc(
diff --git a/crates/nvisy-engine/src/apply/text.rs b/crates/nvisy-engine/src/apply/text.rs
index d02a97d..fa9b210 100644
--- a/crates/nvisy-engine/src/apply/text.rs
+++ b/crates/nvisy-engine/src/apply/text.rs
@@ -9,7 +9,7 @@ use nvisy_codec::transform::{TextRedaction, TextRedactionOutput, TextHandler};
 use nvisy_ontology::entity::Entity;
 use nvisy_ontology::location::Location;
 use nvisy_ontology::record::Redaction;
-use nvisy_ontology::spec::{RedactionInput, TextRedactionInput};
+use nvisy_ontology::specification::{RedactionInput, TextRedactionInput};
 use nvisy_core::Error;
 
 /// Convert a `RedactionInput::Text` + replacement string into a codec
diff --git a/crates/nvisy-identify/Cargo.toml b/crates/nvisy-identify/Cargo.toml
index bd1733f..b32f174 100644
--- a/crates/nvisy-identify/Cargo.toml
+++ b/crates/nvisy-identify/Cargo.toml
@@ -44,6 +44,7 @@ schemars = { workspace = true, features = [] }
 # Async runtime
 tokio = { workspace = true, features = ["sync"] }
 async-trait = { workspace = true, features = [] }
+tower = { workspace = true, features = ["util"] }
 
 # Primitive datatypes
 uuid = { workspace = true, features = ["v4"] }
diff --git a/crates/nvisy-identify/src/llm/detection.rs b/crates/nvisy-identify/src/llm/detection.rs
index 000c5b5..1fbb415 100644
--- a/crates/nvisy-identify/src/llm/detection.rs
+++ b/crates/nvisy-identify/src/llm/detection.rs
@@ -6,11 +6,12 @@
 
 use serde::Deserialize;
 use tokio::sync::Mutex;
+use tower::Service;
 
 use nvisy_codec::handler::{Span, TxtSpan};
 use nvisy_ontology::entity::EntityKind;
 use nvisy_core::Error;
-use nvisy_rig::{LlmBackend, LlmConfig, parse_llm_entities};
+use nvisy_rig::{DetectionConfig, DetectionRequest, DetectionResponse, EntityParser};
 
 use crate::{Entity, Location, ModelInfo, TextLocation};
 use crate::{SequentialContext, DetectionService};
@@ -45,30 +46,33 @@ struct LlmState {
     prior_text: String,
 }
 
-/// LLM contextual detection layer — delegates to an [`LlmBackend`].
+/// LLM contextual detection layer — delegates to a Tower [`Service`].
 ///
 /// Uses [`SequentialContext`]: the orchestrator feeds one span at a
 /// time so the layer can carry sliding context between spans.
 pub struct LlmDetection<B> {
-    backend: B,
-    config: LlmConfig,
+    backend: Mutex<B>,
+    config: DetectionConfig,
     model_info: Option<ModelInfo>,
     state: Mutex<LlmState>,
 }
 
-impl<B: LlmBackend> LlmDetection<B> {
+impl<B> LlmDetection<B>
+where
+    B: Service<DetectionRequest, Response = DetectionResponse, Error = Error> + Send + 'static,
+{
     /// Create a new detection layer with the given backend and params.
     pub fn new(backend: B, params: LlmDetectionParams) -> Self {
         let system_prompt = params.system_prompt.unwrap_or_else(|| {
             prompt::system_prompt().to_string()
         });
-        let config = LlmConfig {
-            entity_types: params.entity_kinds.iter().map(|ek| ek.to_string()).collect(),
+        let config = DetectionConfig {
+            entity_kinds: params.entity_kinds,
             confidence_threshold: params.confidence_threshold,
             system_prompt: Some(system_prompt),
         };
         Self {
-            backend,
+            backend: Mutex::new(backend),
             config,
             model_info: params.model_info,
             state: Mutex::new(LlmState {
@@ -85,7 +89,11 @@ impl<B: LlmBackend> LlmDetection<B> {
 }
 
 #[async_trait::async_trait]
-impl<B: LlmBackend> DetectionService<TxtSpan, String> for LlmDetection<B> {
+impl<B> DetectionService<TxtSpan, String> for LlmDetection<B>
+where
+    B: Service<DetectionRequest, Response = DetectionResponse, Error = Error> + Send + 'static,
+    B::Future: Send,
+{
     type Context = SequentialContext;
 
     async fn detect(
@@ -108,14 +116,18 @@ impl<B: LlmBackend> DetectionService<TxtSpan, String> for LlmDetection<B> {
                 }
             };
 
-            let raw = self
-                .backend
-                .detect_text(&full_text, &self.config)
-                .await?;
+            let response = {
+                let mut backend = self.backend.lock().await;
+                let req = DetectionRequest {
+                    text: full_text,
+                    config: self.config.clone(),
+                };
+                backend.call(req).await?
+            };
 
             // Filter entities to the current span and adjust offsets.
             let span_len = span.data.len();
-            for mut e in parse_llm_entities(&raw)? {
+            for mut e in EntityParser::parse(&response.entities)? {
                 if let Some(Location::Text(ref loc)) = e.location {
                     if loc.end_offset <= context_len {
                         continue;
@@ -162,28 +174,38 @@ impl<B: LlmBackend> DetectionService<TxtSpan, String> for LlmDetection<B> {
 mod tests {
     use super::*;
     use serde_json::{json, Value};
+    use std::task::{Context, Poll};
 
     struct MockLlmBackend;
 
-    #[async_trait::async_trait]
-    impl LlmBackend for MockLlmBackend {
-        async fn detect_text(
-            &self,
-            text: &str,
-            _config: &LlmConfig,
-        ) -> Result<Vec<Value>, Error> {
-            let mut results = Vec::new();
-            if let Some(pos) = text.find("SECRET") {
-                results.push(json!({
-                    "category": "credentials",
-                    "entity_type": "api_key",
-                    "value": "SECRET",
-                    "confidence": 0.92,
-                    "start_offset": pos,
-                    "end_offset": pos + 6
-                }));
-            }
-            Ok(results)
+    impl Service<DetectionRequest> for MockLlmBackend {
+        type Response = DetectionResponse;
+        type Error = Error;
+        type Future = std::pin::Pin<Box<dyn std::future::Future<Output = Result<DetectionResponse, Error>> + Send>>;
+
+        fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
+            Poll::Ready(Ok(()))
+        }
+
+        fn call(&mut self, req: DetectionRequest) -> Self::Future {
+            let text = req.text;
+            Box::pin(async move {
+                let mut results = Vec::new();
+                if let Some(pos) = text.find("SECRET") {
+                    results.push(json!({
+                        "category": "credentials",
+                        "entity_type": "api_key",
+                        "value": "SECRET",
+                        "confidence": 0.92,
+                        "start_offset": pos,
+                        "end_offset": pos + 6
+                    }));
+                }
+                Ok(DetectionResponse {
+                    entities: results,
+                    usage: None,
+                })
+            })
         }
     }
 
diff --git a/crates/nvisy-identify/src/policy/audit.rs b/crates/nvisy-identify/src/policy/audit.rs
index 99f6cf1..0ce4bf9 100644
--- a/crates/nvisy-identify/src/policy/audit.rs
+++ b/crates/nvisy-identify/src/policy/audit.rs
@@ -5,14 +5,13 @@
 
 use jiff::Timestamp;
 use serde::{Deserialize, Serialize};
-use strum::Display;
+use strum::{Display, EnumString};
 use uuid::Uuid;
 
 use nvisy_core::path::ContentSource;
 
 /// Kind of auditable action recorded in an [`Audit`] entry.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, Serialize, Deserialize)]
-#[derive(schemars::JsonSchema)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, EnumString, Serialize, Deserialize, schemars::JsonSchema)]
 #[serde(rename_all = "snake_case")]
 #[strum(serialize_all = "snake_case")]
 pub enum AuditAction {
@@ -28,8 +27,7 @@ pub enum AuditAction {
 ///
 /// Audit entries are emitted by pipeline actions and form a tamper-evident
 /// log of all detection, redaction, and policy decisions.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[derive(schemars::JsonSchema)]
+#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
 pub struct Audit {
     /// Content source identity and lineage.
     #[serde(flatten)]
diff --git a/crates/nvisy-identify/src/policy/evaluate.rs b/crates/nvisy-identify/src/policy/evaluate.rs
index 07d624b..6330498 100644
--- a/crates/nvisy-identify/src/policy/evaluate.rs
+++ b/crates/nvisy-identify/src/policy/evaluate.rs
@@ -4,7 +4,7 @@ use serde::Deserialize;
 
 use crate::Entity;
 use nvisy_ontology::record::Redaction;
-use nvisy_ontology::spec::{RedactionInput, TextRedactionInput};
+use nvisy_ontology::specification::{RedactionInput, TextRedactionInput};
 use super::rule::PolicyRule;
 use nvisy_core::Error;
 
diff --git a/crates/nvisy-identify/src/policy/mod.rs b/crates/nvisy-identify/src/policy/mod.rs
index 484c517..df7ad1c 100644
--- a/crates/nvisy-identify/src/policy/mod.rs
+++ b/crates/nvisy-identify/src/policy/mod.rs
@@ -21,7 +21,7 @@ pub use summary::RedactionSummary;
 // Re-export data types from nvisy-ontology
 pub use nvisy_ontology::record::Redaction;
 pub use nvisy_ontology::record::{ReviewDecision, ReviewStatus};
-pub use nvisy_ontology::spec::{
+pub use nvisy_ontology::specification::{
     AudioRedactionInput, ImageRedactionInput, RedactionInput, TextRedactionInput,
     DEFAULT_BLOCK_COLOR, DEFAULT_BLUR_SIGMA, DEFAULT_MASK_CHAR, DEFAULT_PIXELATE_BLOCK_SIZE,
 };
diff --git a/crates/nvisy-identify/src/policy/retention.rs b/crates/nvisy-identify/src/policy/retention.rs
index 808ecd3..d7cc21d 100644
--- a/crates/nvisy-identify/src/policy/retention.rs
+++ b/crates/nvisy-identify/src/policy/retention.rs
@@ -3,11 +3,12 @@
 use std::time::Duration;
 
 use serde::{Deserialize, Serialize};
+use strum::{Display, EnumString};
 
 /// What class of data a retention policy applies to.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
-#[derive(schemars::JsonSchema)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, EnumString, Serialize, Deserialize, schemars::JsonSchema)]
 #[serde(rename_all = "snake_case")]
+#[strum(serialize_all = "snake_case")]
 pub enum RetentionScope {
     /// Original ingested content before redaction.
     OriginalContent,
@@ -18,8 +19,7 @@ pub enum RetentionScope {
 }
 
 /// A retention policy governing how long data is kept.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[derive(schemars::JsonSchema)]
+#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
 pub struct RetentionPolicy {
     /// What class of data this policy applies to.
     pub scope: RetentionScope,
diff --git a/crates/nvisy-identify/src/policy/rule.rs b/crates/nvisy-identify/src/policy/rule.rs
index 5d8b88e..8daa801 100644
--- a/crates/nvisy-identify/src/policy/rule.rs
+++ b/crates/nvisy-identify/src/policy/rule.rs
@@ -5,7 +5,7 @@ use strum::Display;
 use uuid::Uuid;
 
 use nvisy_core::fs::DocumentType;
-use nvisy_ontology::spec::RedactionInput;
+use nvisy_ontology::specification::RedactionInput;
 
 use crate::EntitySelector;
 
diff --git a/crates/nvisy-identify/src/policy/types.rs b/crates/nvisy-identify/src/policy/types.rs
index db36101..e959eb6 100644
--- a/crates/nvisy-identify/src/policy/types.rs
+++ b/crates/nvisy-identify/src/policy/types.rs
@@ -6,7 +6,7 @@ use uuid::Uuid;
 
 use super::rule::PolicyRule;
 use super::regulation::RegulationKind;
-use nvisy_ontology::spec::RedactionInput;
+use nvisy_ontology::specification::RedactionInput;
 
 /// A named redaction policy containing an ordered set of rules.
 #[derive(Debug, Clone, Serialize, Deserialize)]
diff --git a/crates/nvisy-ontology/src/entity/annotation.rs b/crates/nvisy-ontology/src/entity/annotation.rs
index 53f6f90..0b0f226 100644
--- a/crates/nvisy-ontology/src/entity/annotation.rs
+++ b/crates/nvisy-ontology/src/entity/annotation.rs
@@ -1,14 +1,14 @@
 //! Annotation types for pre-identified regions and classification labels.
 
 use serde::{Deserialize, Serialize};
-use strum::Display;
+use strum::{Display, EnumString};
 
 use super::{EntityCategory, EntityKind};
 
 use crate::location::Location;
 
 /// The kind of annotation applied to a content region.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, EnumString, Serialize, Deserialize)]
 #[serde(rename_all = "snake_case")]
 #[strum(serialize_all = "snake_case")]
 pub enum AnnotationKind {
@@ -21,7 +21,7 @@ pub enum AnnotationKind {
 }
 
 /// The scope to which an annotation label applies.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, EnumString, Serialize, Deserialize)]
 #[serde(rename_all = "snake_case")]
 #[strum(serialize_all = "snake_case")]
 pub enum AnnotationScope {
diff --git a/crates/nvisy-ontology/src/entity/mod.rs b/crates/nvisy-ontology/src/entity/mod.rs
index 701deea..3661f87 100644
--- a/crates/nvisy-ontology/src/entity/mod.rs
+++ b/crates/nvisy-ontology/src/entity/mod.rs
@@ -20,7 +20,7 @@ pub use sensitivity::EntitySensitivity;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use serde_json::{Map, Value};
-use strum::Display;
+use strum::{Display, EnumString};
 use uuid::Uuid;
 
 use nvisy_core::path::ContentSource;
@@ -28,8 +28,7 @@ use nvisy_core::path::ContentSource;
 use crate::location::Location;
 
 /// Method used to detect a sensitive entity.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, Serialize, Deserialize)]
-#[derive(JsonSchema)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, EnumString, Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "snake_case")]
 #[strum(serialize_all = "snake_case")]
 pub enum DetectionMethod {
diff --git a/crates/nvisy-ontology/src/entity/model.rs b/crates/nvisy-ontology/src/entity/model.rs
index c003b32..524ba58 100644
--- a/crates/nvisy-ontology/src/entity/model.rs
+++ b/crates/nvisy-ontology/src/entity/model.rs
@@ -2,11 +2,12 @@
 
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
+use strum::{Display, EnumString};
 
 /// Provenance or licensing classification of a detection model.
-#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
-#[derive(JsonSchema)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash, Display, EnumString, Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "snake_case")]
+#[strum(serialize_all = "snake_case")]
 pub enum ModelKind {
     /// Open-source model (e.g. spaCy, Hugging Face community models).
     OpenSource,
@@ -19,8 +20,7 @@ pub enum ModelKind {
 }
 
 /// Identity and version of the model used for detection.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[derive(JsonSchema)]
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
 pub struct ModelInfo {
     /// Model name (e.g. `"spacy-en-core-web-lg"`, `"gpt-4"`).
     pub name: String,
diff --git a/crates/nvisy-ontology/src/entity/sensitivity.rs b/crates/nvisy-ontology/src/entity/sensitivity.rs
index 066b509..310615e 100644
--- a/crates/nvisy-ontology/src/entity/sensitivity.rs
+++ b/crates/nvisy-ontology/src/entity/sensitivity.rs
@@ -14,8 +14,7 @@ use strum::{Display, EnumString};
 /// directly (`Critical > High > Medium > Low`).
 #[derive(
     Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord,
-    Display, EnumString,
-    Serialize, Deserialize, JsonSchema,
+    Display, EnumString, Serialize, Deserialize, JsonSchema,
 )]
 #[serde(rename_all = "snake_case")]
 #[strum(serialize_all = "snake_case")]
diff --git a/crates/nvisy-ontology/src/lib.rs b/crates/nvisy-ontology/src/lib.rs
index 309b3bc..0d84f4c 100644
--- a/crates/nvisy-ontology/src/lib.rs
+++ b/crates/nvisy-ontology/src/lib.rs
@@ -5,4 +5,4 @@
 pub mod entity;
 pub mod location;
 pub mod record;
-pub mod spec;
+pub mod specification;
diff --git a/crates/nvisy-ontology/src/record/mod.rs b/crates/nvisy-ontology/src/record/mod.rs
index a0d8a15..6e0c50a 100644
--- a/crates/nvisy-ontology/src/record/mod.rs
+++ b/crates/nvisy-ontology/src/record/mod.rs
@@ -10,7 +10,7 @@ use uuid::Uuid;
 
 use nvisy_core::path::ContentSource;
 
-use crate::spec::RedactionInput;
+use crate::specification::RedactionInput;
 
 /// A redaction decision recording how a specific entity was (or will be) redacted.
 ///
diff --git a/crates/nvisy-ontology/src/record/review.rs b/crates/nvisy-ontology/src/record/review.rs
index c624478..732e523 100644
--- a/crates/nvisy-ontology/src/record/review.rs
+++ b/crates/nvisy-ontology/src/record/review.rs
@@ -3,11 +3,12 @@
 use jiff::Timestamp;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
+use strum::{Display, EnumString};
 
 /// Status of a human review on a redaction decision.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
-#[derive(JsonSchema)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, EnumString, Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "snake_case")]
+#[strum(serialize_all = "snake_case")]
 pub enum ReviewStatus {
     /// Awaiting human review.
     Pending,
@@ -20,8 +21,7 @@ pub enum ReviewStatus {
 }
 
 /// A review decision recorded against a redaction.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[derive(JsonSchema)]
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
 pub struct ReviewDecision {
     /// Outcome of the review.
     pub status: ReviewStatus,
diff --git a/crates/nvisy-ontology/src/spec/mod.rs b/crates/nvisy-ontology/src/specification/input.rs
similarity index 63%
rename from crates/nvisy-ontology/src/spec/mod.rs
rename to crates/nvisy-ontology/src/specification/input.rs
index 9d9c0c0..1a6eb71 100644
--- a/crates/nvisy-ontology/src/spec/mod.rs
+++ b/crates/nvisy-ontology/src/specification/input.rs
@@ -1,9 +1,15 @@
-//! Redaction specifications for all modalities.
+//! Redaction input types: configuration-carrying specifications submitted
+//! to the redaction engine, and the [`RedactorInput`] context struct
+//! passed to LLM agents.
 
 use derive_more::From;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 
+use crate::entity::{EntityCategory, EntityKind};
+
+// ── defaults ────────────────────────────────────────────────────────────
+
 /// Default mask character for text redaction.
 pub const DEFAULT_MASK_CHAR: char = '*';
 
@@ -29,6 +35,8 @@ fn default_block_size() -> u32 {
     DEFAULT_PIXELATE_BLOCK_SIZE
 }
 
+// ── text / tabular ──────────────────────────────────────────────────────
+
 /// Text redaction specification with method-specific configuration.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
 #[serde(tag = "method", rename_all = "snake_case")]
@@ -80,6 +88,8 @@ pub enum TextRedactionInput {
     },
 }
 
+// ── image / video ───────────────────────────────────────────────────────
+
 /// Image redaction specification with method-specific configuration.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
 #[serde(tag = "method", rename_all = "snake_case")]
@@ -106,6 +116,8 @@ pub enum ImageRedactionInput {
     Synthesize,
 }
 
+// ── audio ───────────────────────────────────────────────────────────────
+
 /// Audio redaction specification.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
 #[serde(tag = "method", rename_all = "snake_case")]
@@ -118,6 +130,8 @@ pub enum AudioRedactionInput {
     Synthesize,
 }
 
+// ── unified ─────────────────────────────────────────────────────────────
+
 /// Unified redaction specification submitted to the engine.
 ///
 /// Carries the method to apply and its configuration parameters.
@@ -131,3 +145,26 @@ pub enum RedactionInput {
     /// Audio redaction specification.
     Audio(AudioRedactionInput),
 }
+
+// ── agent input ─────────────────────────────────────────────────────────
+
+/// Entity passed to a redactor agent for decision-making.
+///
+/// Contains the detected entity's classification, matched value, confidence,
+/// and byte offsets in the source text. The redactor uses this context to
+/// choose an appropriate [`RedactionMethod`](super::RedactionMethod).
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
+pub struct RedactorInput {
+    /// Specific entity type (e.g. `EmailAddress`, `GovernmentId`).
+    pub entity_type: EntityKind,
+    /// Broad classification (e.g. `Pii`, `Financial`).
+    pub category: EntityCategory,
+    /// The matched text value.
+    pub value: String,
+    /// Detection confidence (0.0 -- 1.0).
+    pub confidence: f64,
+    /// Start byte offset in the input text.
+    pub start_offset: usize,
+    /// End byte offset in the input text.
+    pub end_offset: usize,
+}
diff --git a/crates/nvisy-ontology/src/specification/method.rs b/crates/nvisy-ontology/src/specification/method.rs
new file mode 100644
index 0000000..90a8f09
--- /dev/null
+++ b/crates/nvisy-ontology/src/specification/method.rs
@@ -0,0 +1,114 @@
+//! Flat redaction method identifiers (no configuration payload).
+//!
+//! Each [`TextRedactionMethod`], [`ImageRedactionMethod`], and
+//! [`AudioRedactionMethod`] names the *kind* of redaction to apply
+//! without carrying method-specific parameters. These are the types an
+//! LLM agent returns when recommending a redaction strategy; downstream
+//! code maps them into the full [`TextRedactionInput`](super::TextRedactionInput),
+//! [`ImageRedactionInput`](super::ImageRedactionInput), or
+//! [`AudioRedactionInput`](super::AudioRedactionInput) with appropriate
+//! defaults.
+
+use derive_more::From;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use strum::Display;
+
+/// Text/tabular redaction method.
+///
+/// | Variant | Effect |
+/// |---|---|
+/// | `Mask` | Replace characters with a fixed mask character |
+/// | `Replace` | Substitute with a type-appropriate placeholder |
+/// | `Hash` | Replace with a one-way hash |
+/// | `Encrypt` | Encrypt the value (recoverable with key) |
+/// | `Remove` | Delete the value entirely |
+/// | `Synthesize` | Replace with a realistic synthetic value |
+/// | `Pseudonymize` | Replace with a consistent pseudonym |
+/// | `Tokenize` | Replace with a vault-backed reversible token |
+/// | `Aggregate` | Aggregate into a range or bucket |
+/// | `Generalize` | Generalize to a less precise value |
+/// | `DateShift` | Shift dates by a consistent offset |
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "snake_case")]
+#[strum(serialize_all = "snake_case")]
+pub enum TextRedactionMethod {
+    /// Replace characters with a mask character (e.g. `***`).
+    Mask,
+    /// Substitute with a fixed placeholder (e.g. `[EMAIL]`).
+    Replace,
+    /// Replace with a one-way hash.
+    Hash,
+    /// Encrypt the value; recoverable with a referenced key.
+    Encrypt,
+    /// Remove the value entirely.
+    Remove,
+    /// Replace with a synthetically generated value.
+    Synthesize,
+    /// Replace with a consistent pseudonym.
+    Pseudonymize,
+    /// Replace with a vault-backed reversible token.
+    Tokenize,
+    /// Aggregate into a range or bucket.
+    Aggregate,
+    /// Generalize to a less precise value.
+    Generalize,
+    /// Shift dates by a consistent offset.
+    DateShift,
+}
+
+/// Image/video redaction method.
+///
+/// | Variant | Effect |
+/// |---|---|
+/// | `Blur` | Apply a gaussian blur over the region |
+/// | `Block` | Overlay an opaque rectangle |
+/// | `Pixelate` | Apply pixelation / mosaic effect |
+/// | `Synthesize` | Replace with a synthetic region |
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "snake_case")]
+#[strum(serialize_all = "snake_case")]
+pub enum ImageRedactionMethod {
+    /// Apply a gaussian blur over the region.
+    Blur,
+    /// Overlay an opaque rectangle.
+    Block,
+    /// Apply pixelation / mosaic effect.
+    Pixelate,
+    /// Replace with a synthetic region.
+    Synthesize,
+}
+
+/// Audio redaction method.
+///
+/// | Variant | Effect |
+/// |---|---|
+/// | `Silence` | Replace audio segment with silence |
+/// | `Remove` | Remove the segment entirely |
+/// | `Synthesize` | Replace with synthetic audio |
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "snake_case")]
+#[strum(serialize_all = "snake_case")]
+pub enum AudioRedactionMethod {
+    /// Replace audio segment with silence.
+    Silence,
+    /// Remove the segment entirely.
+    Remove,
+    /// Replace with synthetic audio.
+    Synthesize,
+}
+
+/// Unified redaction method across all modalities.
+///
+/// Mirrors the structure of [`RedactionInput`](super::RedactionInput) but
+/// carries only the method name — no configuration payload.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, From, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum RedactionMethod {
+    /// Text/tabular redaction method.
+    Text(TextRedactionMethod),
+    /// Image/video redaction method.
+    Image(ImageRedactionMethod),
+    /// Audio redaction method.
+    Audio(AudioRedactionMethod),
+}
diff --git a/crates/nvisy-ontology/src/specification/mod.rs b/crates/nvisy-ontology/src/specification/mod.rs
new file mode 100644
index 0000000..451398f
--- /dev/null
+++ b/crates/nvisy-ontology/src/specification/mod.rs
@@ -0,0 +1,27 @@
+//! Redaction specifications for all modalities.
+//!
+//! This module contains two layers:
+//!
+//! - **Methods** ([`TextRedactionMethod`], [`ImageRedactionMethod`],
+//!   [`AudioRedactionMethod`], [`RedactionMethod`]) — flat enums naming
+//!   *what kind* of redaction to apply, without configuration. These are
+//!   returned by LLM agents when recommending a strategy.
+//!
+//! - **Inputs** ([`TextRedactionInput`], [`ImageRedactionInput`],
+//!   [`AudioRedactionInput`], [`RedactionInput`]) — tagged enums carrying
+//!   method-specific configuration (mask char, blur sigma, etc.). These
+//!   are submitted to the redaction engine for execution.
+//!
+//! The [`RedactorInput`] struct carries entity context passed *into* a
+//! redactor agent so it can choose the right method.
+
+mod input;
+mod method;
+
+pub use input::{
+    AudioRedactionInput, ImageRedactionInput, RedactionInput, RedactorInput, TextRedactionInput,
+    DEFAULT_BLOCK_COLOR, DEFAULT_BLUR_SIGMA, DEFAULT_MASK_CHAR, DEFAULT_PIXELATE_BLOCK_SIZE,
+};
+pub use method::{
+    AudioRedactionMethod, ImageRedactionMethod, RedactionMethod, TextRedactionMethod,
+};
diff --git a/crates/nvisy-rig/Cargo.toml b/crates/nvisy-rig/Cargo.toml
index 0ab3f6b..eb18d8c 100644
--- a/crates/nvisy-rig/Cargo.toml
+++ b/crates/nvisy-rig/Cargo.toml
@@ -32,11 +32,17 @@ async-trait = { workspace = true, features = [] }
 tokio = { workspace = true, features = ["time"] }
 tower = { workspace = true, features = ["retry", "timeout", "util"] }
 
+# Encoding
+base64 = { workspace = true, features = [] }
+
 # (De)serialization
 serde = { workspace = true, features = ["derive"] }
 serde_json = { workspace = true, features = [] }
 schemars = { workspace = true, features = [] }
 
+# Error handling
+thiserror = { workspace = true, features = [] }
+
 # Observability
 tracing = { workspace = true, features = [] }
 
diff --git a/crates/nvisy-rig/src/agent/cv/mod.rs b/crates/nvisy-rig/src/agent/cv/mod.rs
deleted file mode 100644
index 567aa54..0000000
--- a/crates/nvisy-rig/src/agent/cv/mod.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-//! Computer vision agent for face/plate/signature detection (VLM + CV).
-//!
-//! Placeholder agent — implementation deferred to a future PR.
-
-use async_trait::async_trait;
-
-use nvisy_core::Error;
-
-/// A single computer-vision detection result.
-#[derive(Debug, Clone)]
-pub struct CvDetection {
-    /// Label for the detected object (e.g. "face", "license_plate").
-    pub label: String,
-    /// Detection confidence (0.0 -- 1.0).
-    pub confidence: f64,
-    /// Bounding box: `[x, y, width, height]` in pixels.
-    pub bbox: [f64; 4],
-}
-
-/// Trait for computer-vision capabilities (face/plate/signature detection).
-///
-/// Consumers implement this trait to supply object detection from images.
-/// No rig-core types leak through this trait.
-#[async_trait]
-pub trait CvProvider: Send + Sync {
-    /// Detect objects in an image.
-    async fn detect_objects(&self, image_data: &[u8]) -> Result<Vec<CvDetection>, Error>;
-}
diff --git a/crates/nvisy-rig/src/agent/detect/mod.rs b/crates/nvisy-rig/src/agent/detect/mod.rs
new file mode 100644
index 0000000..8d5829a
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/detect/mod.rs
@@ -0,0 +1,117 @@
+//! Computer vision agent for face, license plate, and signature detection.
+//!
+//! [`CvAgent`] wraps a [`BaseAgent`](super::BaseAgent) with a
+//! [`CvProvider`]-backed tool. It encodes an image as base64, prompts the
+//! VLM to call the CV tool, and returns classified entities with bounding
+//! boxes.
+
+mod output;
+mod prompt;
+mod tool;
+
+pub use output::{RawCvEntities, RawCvEntity};
+
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use base64::Engine;
+use base64::engine::general_purpose::STANDARD;
+use rig::completion::CompletionModel;
+use serde::Serialize;
+
+use nvisy_core::Error;
+
+use crate::backend::{DetectionConfig, UsageTracker};
+
+use super::base::{BaseAgent, BaseAgentConfig};
+use prompt::{CvPromptBuilder, CV_SYSTEM_PROMPT};
+use tool::CvRigTool;
+
+/// A single computer-vision detection result returned by a [`CvProvider`].
+///
+/// This is the raw output from the CV backend before the VLM classifies
+/// detections into entity categories. It carries a human-readable label,
+/// a confidence score, and a pixel-space bounding box.
+#[derive(Debug, Clone, Serialize)]
+pub struct CvDetection {
+    /// Label for the detected object (e.g. `"face"`, `"license_plate"`).
+    pub label: String,
+    /// Detection confidence in the range `0.0..=1.0`.
+    pub confidence: f64,
+    /// Bounding box as `[x, y, width, height]` in pixels.
+    pub bbox: [f64; 4],
+}
+
+/// Trait for computer-vision capabilities (face/plate/signature detection).
+///
+/// Consumers implement this trait to supply object detection from images.
+/// The trait is intentionally free of rig-core types so it can be
+/// implemented in any crate without pulling in the LLM framework.
+#[async_trait]
+pub trait CvProvider: Send + Sync {
+    /// Detect objects in raw image bytes (PNG, JPEG, etc.).
+    async fn detect_objects(&self, image_data: &[u8]) -> Result<Vec<CvDetection>, Error>;
+}
+
+/// VLM agent that detects privacy-sensitive objects in images.
+///
+/// # Workflow
+///
+/// 1. Caller passes raw image bytes to [`detect`](Self::detect).
+/// 2. The agent base64-encodes the image and builds a user prompt via
+///    [`CvPromptBuilder`].
+/// 3. The VLM is instructed to call the `cv_detect_objects` tool (backed
+///    by the [`CvProvider`]) and then classify each detection into an
+///    entity category and type.
+/// 4. Structured output is parsed into a `Vec<RawCvEntity>`.
+pub struct CvAgent<M: CompletionModel> {
+    base: BaseAgent<M>,
+}
+
+impl<M: CompletionModel> CvAgent<M> {
+    /// Create a new CV agent with the given model, config, and CV provider.
+    pub fn new(model: M, config: BaseAgentConfig, cv: impl CvProvider + 'static) -> Self {
+        let base = BaseAgent::builder(model, config)
+            .preamble(CV_SYSTEM_PROMPT)
+            .tool(CvRigTool(Arc::new(cv)))
+            .build();
+        Self { base }
+    }
+
+    /// Access the usage tracker for this agent's LLM calls.
+    pub fn tracker(&self) -> &UsageTracker {
+        self.base.tracker()
+    }
+
+    /// Detect privacy-sensitive objects in an image.
+    #[tracing::instrument(
+        skip_all,
+        fields(image_bytes = image_data.len(), agent = "cv"),
+    )]
+    pub async fn detect(
+        &self,
+        image_data: &[u8],
+        config: &DetectionConfig,
+    ) -> Result<Vec<RawCvEntity>, Error> {
+        let image_b64 = STANDARD.encode(image_data);
+        tracing::debug!(
+            b64_len = image_b64.len(),
+            entity_kinds = config.entity_kinds.len(),
+            "encoded image, building prompt"
+        );
+
+        let prompt = CvPromptBuilder::new(config).build(&image_b64);
+
+        let result: RawCvEntities = self
+            .base
+            .prompt_structured(&prompt, config.system_prompt.as_deref())
+            .await?;
+
+        tracing::info!(
+            entity_count = result.entities.len(),
+            "cv detection complete"
+        );
+
+        Ok(result.entities)
+    }
+}
diff --git a/crates/nvisy-rig/src/agent/detect/output.rs b/crates/nvisy-rig/src/agent/detect/output.rs
new file mode 100644
index 0000000..595bdd7
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/detect/output.rs
@@ -0,0 +1,28 @@
+//! Structured output types for CV detection.
+
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+
+use nvisy_ontology::entity::{EntityCategory, EntityKind};
+
+/// A single entity detected by computer vision.
+#[derive(Debug, Deserialize, Serialize, JsonSchema)]
+pub struct RawCvEntity {
+    /// Broad classification.
+    pub category: EntityCategory,
+    /// Specific entity type.
+    pub entity_type: EntityKind,
+    /// Label from the CV model (e.g. "face", "license_plate").
+    pub label: String,
+    /// Detection confidence (0.0 -- 1.0).
+    pub confidence: f64,
+    /// Bounding box `[x, y, width, height]` in pixels.
+    pub bbox: [f64; 4],
+}
+
+/// Wrapper for structured output parsing.
+#[derive(Debug, Default, Deserialize, Serialize, JsonSchema)]
+pub struct RawCvEntities {
+    /// Detected entities.
+    pub entities: Vec<RawCvEntity>,
+}
diff --git a/crates/nvisy-rig/src/agent/detect/prompt.rs b/crates/nvisy-rig/src/agent/detect/prompt.rs
new file mode 100644
index 0000000..81c3048
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/detect/prompt.rs
@@ -0,0 +1,66 @@
+//! CV-specific prompt construction.
+//!
+//! [`CvPromptBuilder`] constructs the user prompt that instructs the VLM
+//! to call the CV tool and classify detections into entity categories.
+
+use crate::backend::DetectionConfig;
+
+/// Fallback when no specific entity types are requested.
+const ALL_TYPES_HINT: &str = "all detectable object types";
+
+/// Builds user prompts for CV-based object detection.
+///
+/// Encodes entity-kind filters and confidence thresholds into the prompt
+/// alongside the base64-encoded image data.
+pub(crate) struct CvPromptBuilder<'a> {
+    config: &'a DetectionConfig,
+}
+
+impl<'a> CvPromptBuilder<'a> {
+    /// Create a prompt builder from a [`DetectionConfig`].
+    pub fn new(config: &'a DetectionConfig) -> Self {
+        Self { config }
+    }
+
+    /// Build the user prompt for the given base64-encoded image.
+    pub fn build(&self, image_b64: &str) -> String {
+        let entity_hint = if self.config.entity_kinds.is_empty() {
+            ALL_TYPES_HINT.to_string()
+        } else {
+            self.config
+                .entity_kinds
+                .iter()
+                .map(|e| e.to_string())
+                .collect::<Vec<_>>()
+                .join(", ")
+        };
+
+        format!(
+            "Detect objects of types [{entity_hint}] with minimum confidence \
+             {threshold:.2} in the following base64-encoded image using the \
+             cv_detect_objects tool.\n\n\
+             Image (base64): {image_b64}",
+            threshold = self.config.confidence_threshold,
+        )
+    }
+}
+
+/// Default system prompt for the CV agent.
+pub(super) const CV_SYSTEM_PROMPT: &str = "\
+You are a vision-language model performing object detection for privacy-sensitive content in images. \
+You have access to a computer vision tool that detects faces, license plates, and signatures.\n\
+\n\
+Your workflow:\n\
+1. Use the cv_detect_objects tool to detect objects in the provided image.\n\
+2. Analyze the detections and classify each into an entity category (pii, phi, etc.) \
+   and specific entity type.\n\
+3. Return a JSON array of detected entities, each with keys: \
+   category, entity_type, label, confidence, bbox ([x, y, width, height] in pixels).\n\
+\n\
+Common entity mappings:\n\
+- face → category: pii, entity_type: biometric_data\n\
+- license_plate → category: pii, entity_type: vehicle_id\n\
+- signature → category: pii, entity_type: biometric_data\n\
+- handwriting → category: pii, entity_type: person_name (if it contains a name)\n\
+\n\
+If no objects are detected, return an empty array [].";
diff --git a/crates/nvisy-rig/src/agent/detect/tool.rs b/crates/nvisy-rig/src/agent/detect/tool.rs
new file mode 100644
index 0000000..c98ab51
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/detect/tool.rs
@@ -0,0 +1,66 @@
+//! Internal rig `Tool` wrapper for [`CvProvider`].
+
+use std::sync::Arc;
+
+use base64::Engine;
+use base64::engine::general_purpose::STANDARD;
+use rig::completion::ToolDefinition;
+use rig::tool::Tool;
+use serde::Deserialize;
+use serde_json::json;
+
+use super::CvProvider;
+
+/// Arguments for the CV tool call.
+#[derive(Deserialize)]
+pub(super) struct CvToolArgs {
+    /// Base64-encoded image data.
+    pub image_base64: String,
+}
+
+/// Error returned by the CV tool.
+#[derive(Debug, thiserror::Error)]
+#[error("{0}")]
+pub(super) struct CvToolError(String);
+
+/// Rig `Tool` wrapper around a [`CvProvider`] implementation.
+pub(super) struct CvRigTool<T: CvProvider>(pub Arc<T>);
+
+impl<T: CvProvider> Tool for CvRigTool<T> {
+    const NAME: &'static str = "cv_detect_objects";
+
+    type Error = CvToolError;
+    type Args = CvToolArgs;
+    type Output = String;
+
+    async fn definition(&self, _prompt: String) -> ToolDefinition {
+        ToolDefinition {
+            name: Self::NAME.to_string(),
+            description: "Detect objects (faces, license plates, signatures) in an image \
+                          using computer vision. Pass the image as a base64-encoded string."
+                .to_string(),
+            parameters: json!({
+                "type": "object",
+                "properties": {
+                    "image_base64": {
+                        "type": "string",
+                        "description": "Base64-encoded image data"
+                    }
+                },
+                "required": ["image_base64"]
+            }),
+        }
+    }
+
+    async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
+        let bytes = STANDARD
+            .decode(&args.image_base64)
+            .map_err(|e| CvToolError(format!("invalid base64: {e}")))?;
+        let detections = self
+            .0
+            .detect_objects(&bytes)
+            .await
+            .map_err(|e| CvToolError(e.to_string()))?;
+        serde_json::to_string(&detections).map_err(|e| CvToolError(e.to_string()))
+    }
+}
diff --git a/crates/nvisy-rig/src/agent/extract/mod.rs b/crates/nvisy-rig/src/agent/extract/mod.rs
new file mode 100644
index 0000000..547e004
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/extract/mod.rs
@@ -0,0 +1,101 @@
+//! OCR agent for vision-based text extraction and entity detection.
+//!
+//! [`OcrAgent`] wraps a [`BaseAgent`](super::BaseAgent) with an
+//! [`OcrProvider`]-backed tool. It encodes an image as base64, prompts the
+//! VLM to call the OCR tool, and returns extracted text together with any
+//! entities found in it.
+
+mod output;
+mod prompt;
+mod tool;
+
+pub use output::{OcrOutput, RawOcrEntity};
+
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use base64::Engine;
+use base64::engine::general_purpose::STANDARD;
+use rig::completion::CompletionModel;
+
+use nvisy_core::Error;
+
+use crate::backend::{DetectionConfig, UsageTracker};
+
+use super::base::{BaseAgent, BaseAgentConfig};
+use prompt::{OcrPromptBuilder, OCR_SYSTEM_PROMPT};
+use tool::OcrRigTool;
+
+/// Trait for OCR capabilities that can be provided to VLM agents.
+///
+/// Consumers implement this trait to supply text extraction from images.
+/// The trait is intentionally free of rig-core types so it can be
+/// implemented in any crate without pulling in the LLM framework.
+#[async_trait]
+pub trait OcrProvider: Send + Sync {
+    /// Extract text from raw image bytes (PNG, JPEG, etc.).
+    async fn extract_text(&self, image_data: &[u8]) -> Result<String, Error>;
+}
+
+/// VLM agent that extracts text from images and detects entities in it.
+///
+/// # Workflow
+///
+/// 1. Caller passes raw image bytes to [`extract_and_detect`](Self::extract_and_detect).
+/// 2. The agent base64-encodes the image and builds a user prompt via
+///    [`OcrPromptBuilder`].
+/// 3. The VLM is instructed to call the `ocr_extract_text` tool (backed by
+///    the [`OcrProvider`]) and then analyse the result for PII/PHI entities.
+/// 4. Structured output is parsed into [`OcrOutput`].
+pub struct OcrAgent<M: CompletionModel> {
+    base: BaseAgent<M>,
+}
+
+impl<M: CompletionModel> OcrAgent<M> {
+    /// Create a new OCR agent with the given model, config, and OCR provider.
+    pub fn new(model: M, config: BaseAgentConfig, ocr: impl OcrProvider + 'static) -> Self {
+        let base = BaseAgent::builder(model, config)
+            .preamble(OCR_SYSTEM_PROMPT)
+            .tool(OcrRigTool(Arc::new(ocr)))
+            .build();
+        Self { base }
+    }
+
+    /// Access the usage tracker for this agent's LLM calls.
+    pub fn tracker(&self) -> &UsageTracker {
+        self.base.tracker()
+    }
+
+    /// Extract text from an image and detect entities in the extracted text.
+    #[tracing::instrument(
+        skip_all,
+        fields(image_bytes = image_data.len(), agent = "ocr"),
+    )]
+    pub async fn extract_and_detect(
+        &self,
+        image_data: &[u8],
+        config: &DetectionConfig,
+    ) -> Result<OcrOutput, Error> {
+        let image_b64 = STANDARD.encode(image_data);
+        tracing::debug!(
+            b64_len = image_b64.len(),
+            entity_kinds = config.entity_kinds.len(),
+            "encoded image, building prompt"
+        );
+
+        let prompt = OcrPromptBuilder::new(config).build(&image_b64);
+
+        let output: OcrOutput = self
+            .base
+            .prompt_structured(&prompt, config.system_prompt.as_deref())
+            .await?;
+
+        tracing::info!(
+            text_len = output.extracted_text.len(),
+            entity_count = output.entities.len(),
+            "ocr extraction complete"
+        );
+
+        Ok(output)
+    }
+}
diff --git a/crates/nvisy-rig/src/agent/extract/output.rs b/crates/nvisy-rig/src/agent/extract/output.rs
new file mode 100644
index 0000000..266d096
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/extract/output.rs
@@ -0,0 +1,30 @@
+//! Structured output types for OCR entity detection.
+
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+
+use nvisy_ontology::entity::{EntityCategory, EntityKind};
+
+/// Top-level output from the OCR agent.
+#[derive(Debug, Default, Deserialize, Serialize, JsonSchema)]
+pub struct OcrOutput {
+    /// Full text extracted from the image.
+    pub extracted_text: String,
+    /// Entities detected in the extracted text.
+    pub entities: Vec<RawOcrEntity>,
+}
+
+/// A single entity detected in OCR-extracted text.
+#[derive(Debug, Deserialize, Serialize, JsonSchema)]
+pub struct RawOcrEntity {
+    /// Broad classification.
+    pub category: EntityCategory,
+    /// Specific entity type.
+    pub entity_type: EntityKind,
+    /// The matched text value.
+    pub value: String,
+    /// Detection confidence (0.0 -- 1.0).
+    pub confidence: f64,
+    /// Optional bounding box `[x, y, width, height]` in pixels.
+    pub bbox: Option<[f64; 4]>,
+}
diff --git a/crates/nvisy-rig/src/agent/extract/prompt.rs b/crates/nvisy-rig/src/agent/extract/prompt.rs
new file mode 100644
index 0000000..7f0d4dc
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/extract/prompt.rs
@@ -0,0 +1,63 @@
+//! OCR-specific prompt construction.
+//!
+//! [`OcrPromptBuilder`] constructs the user prompt that instructs the VLM
+//! to call the OCR tool and then detect entities in the extracted text.
+
+use crate::backend::DetectionConfig;
+
+/// Fallback when no specific entity types are requested.
+const ALL_TYPES_HINT: &str = "all entity types";
+
+/// Builds user prompts for OCR-based entity extraction.
+///
+/// Encodes entity-kind filters and confidence thresholds into the prompt
+/// alongside the base64-encoded image data.
+pub(crate) struct OcrPromptBuilder<'a> {
+    config: &'a DetectionConfig,
+}
+
+impl<'a> OcrPromptBuilder<'a> {
+    /// Create a prompt builder from a [`DetectionConfig`].
+    pub fn new(config: &'a DetectionConfig) -> Self {
+        Self { config }
+    }
+
+    /// Build the user prompt for the given base64-encoded image.
+    pub fn build(&self, image_b64: &str) -> String {
+        let entity_hint = if self.config.entity_kinds.is_empty() {
+            ALL_TYPES_HINT.to_string()
+        } else {
+            self.config
+                .entity_kinds
+                .iter()
+                .map(|e| e.to_string())
+                .collect::<Vec<_>>()
+                .join(", ")
+        };
+
+        format!(
+            "Extract text from the following base64-encoded image using the \
+             ocr_extract_text tool, then detect entities of types [{entity_hint}] \
+             with minimum confidence {threshold:.2}.\n\n\
+             Image (base64): {image_b64}",
+            threshold = self.config.confidence_threshold,
+        )
+    }
+}
+
+/// Default system prompt for the OCR agent.
+pub(super) const OCR_SYSTEM_PROMPT: &str = "\
+You are a vision-language model performing OCR and entity detection on images. \
+You have access to an OCR tool that extracts text from images. \
+\n\
+Your workflow:\n\
+1. Use the ocr_extract_text tool to extract all text from the provided image.\n\
+2. Analyze the extracted text for personally identifiable information (PII), \
+   protected health information (PHI), financial data, and credentials.\n\
+3. Return a JSON object with two fields:\n\
+   - \"extracted_text\": the full text extracted from the image\n\
+   - \"entities\": a JSON array of detected entities, each with keys: \
+     category, entity_type, value, confidence, bbox (optional [x, y, w, h] array)\n\
+\n\
+If no entities are found, return an empty array for \"entities\". \
+If OCR produces no text, return an empty string for \"extracted_text\" and an empty array for \"entities\".";
diff --git a/crates/nvisy-rig/src/agent/extract/tool.rs b/crates/nvisy-rig/src/agent/extract/tool.rs
new file mode 100644
index 0000000..c29ffea
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/extract/tool.rs
@@ -0,0 +1,64 @@
+//! Internal rig `Tool` wrapper for [`OcrProvider`].
+
+use std::sync::Arc;
+
+use base64::Engine;
+use base64::engine::general_purpose::STANDARD;
+use rig::completion::ToolDefinition;
+use rig::tool::Tool;
+use serde::Deserialize;
+use serde_json::json;
+
+use super::OcrProvider;
+
+/// Arguments for the OCR tool call.
+#[derive(Deserialize)]
+pub(super) struct OcrToolArgs {
+    /// Base64-encoded image data.
+    pub image_base64: String,
+}
+
+/// Error returned by the OCR tool.
+#[derive(Debug, thiserror::Error)]
+#[error("{0}")]
+pub(super) struct OcrToolError(String);
+
+/// Rig `Tool` wrapper around an [`OcrProvider`] implementation.
+pub(super) struct OcrRigTool<T: OcrProvider>(pub Arc<T>);
+
+impl<T: OcrProvider> Tool for OcrRigTool<T> {
+    const NAME: &'static str = "ocr_extract_text";
+
+    type Error = OcrToolError;
+    type Args = OcrToolArgs;
+    type Output = String;
+
+    async fn definition(&self, _prompt: String) -> ToolDefinition {
+        ToolDefinition {
+            name: Self::NAME.to_string(),
+            description: "Extract text from an image using OCR. \
+                          Pass the image as a base64-encoded string."
+                .to_string(),
+            parameters: json!({
+                "type": "object",
+                "properties": {
+                    "image_base64": {
+                        "type": "string",
+                        "description": "Base64-encoded image data"
+                    }
+                },
+                "required": ["image_base64"]
+            }),
+        }
+    }
+
+    async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
+        let bytes = STANDARD
+            .decode(&args.image_base64)
+            .map_err(|e| OcrToolError(format!("invalid base64: {e}")))?;
+        self.0
+            .extract_text(&bytes)
+            .await
+            .map_err(|e| OcrToolError(e.to_string()))
+    }
+}
diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs
index 4d7548d..dc7f0f5 100644
--- a/crates/nvisy-rig/src/agent/mod.rs
+++ b/crates/nvisy-rig/src/agent/mod.rs
@@ -1,12 +1,19 @@
 //! Agent system: base agent, specialized agents, and tool-provider traits.
+//!
+//! All public types are re-exported here — consumer code should not reach
+//! into individual agent submodules.
 
 mod base;
 mod context;
-
-pub mod ner;
-pub mod ocr;
-pub mod cv;
-pub mod redactor;
+mod detect;
+mod extract;
+mod recognize;
+mod redactor;
 
 pub(crate) use base::{BaseAgent, BaseAgentBuilder, BaseAgentConfig};
 pub(crate) use context::ContextWindow;
+
+pub use recognize::{NerAgent, RawEntities, RawEntity};
+pub use extract::{OcrAgent, OcrOutput, OcrProvider, RawOcrEntity};
+pub use detect::{CvAgent, CvDetection, CvProvider, RawCvEntities, RawCvEntity};
+pub use redactor::{RawRedaction, RedactorAgent, RedactorOutput};
diff --git a/crates/nvisy-rig/src/agent/ocr/mod.rs b/crates/nvisy-rig/src/agent/ocr/mod.rs
deleted file mode 100644
index ec2b015..0000000
--- a/crates/nvisy-rig/src/agent/ocr/mod.rs
+++ /dev/null
@@ -1,17 +0,0 @@
-//! OCR agent for vision + text extraction (VLM + OCR).
-//!
-//! Placeholder agent — implementation deferred to a future PR.
-
-use async_trait::async_trait;
-
-use nvisy_core::Error;
-
-/// Trait for OCR capabilities that can be provided to VLM agents.
-///
-/// Consumers implement this trait to supply text extraction from images.
-/// No rig-core types leak through this trait.
-#[async_trait]
-pub trait OcrProvider: Send + Sync {
-    /// Extract text from an image.
-    async fn extract_text(&self, image_data: &[u8]) -> Result<String, Error>;
-}
diff --git a/crates/nvisy-rig/src/agent/ner/mod.rs b/crates/nvisy-rig/src/agent/recognize/mod.rs
similarity index 53%
rename from crates/nvisy-rig/src/agent/ner/mod.rs
rename to crates/nvisy-rig/src/agent/recognize/mod.rs
index 50091b1..d1527fb 100644
--- a/crates/nvisy-rig/src/agent/ner/mod.rs
+++ b/crates/nvisy-rig/src/agent/recognize/mod.rs
@@ -1,4 +1,8 @@
-//! NER (Named Entity Recognition) agent for textual PII/entity detection.
+//! Named Entity Recognition (NER) agent for textual PII/entity detection.
+//!
+//! [`NerAgent`] wraps a [`BaseAgent`](super::BaseAgent) with NER-specific
+//! prompts. It is a pure LLM agent (no tools) that analyses text and
+//! returns structured entity detections with byte offsets.
 
 mod output;
 mod prompt;
@@ -14,9 +18,15 @@ use crate::backend::{DetectionConfig, UsageTracker};
 use super::base::{BaseAgent, BaseAgentConfig};
 use prompt::{NerPromptBuilder, NER_SYSTEM_PROMPT};
 
-/// Agent for textual PII/entity detection using LLM + NER.
+/// Agent for textual PII/entity detection using LLM-based NER.
 ///
-/// Wraps [`BaseAgent`] with NER-specific prompts and output types.
+/// # Workflow
+///
+/// 1. Caller passes text and a [`DetectionConfig`] to
+///    [`detect`](Self::detect).
+/// 2. The agent builds a user prompt via [`NerPromptBuilder`] that
+///    specifies entity types and confidence thresholds.
+/// 3. Structured output is parsed into `Vec<RawEntity>`.
 pub struct NerAgent<M: CompletionModel> {
     base: BaseAgent<M>,
 }
@@ -30,23 +40,39 @@ impl<M: CompletionModel> NerAgent<M> {
         Self { base }
     }
 
-    /// Access the usage tracker.
+    /// Access the usage tracker for this agent's LLM calls.
     pub fn tracker(&self) -> &UsageTracker {
         self.base.tracker()
     }
 
     /// Detect entities in text using structured output with text-based fallback.
-    #[tracing::instrument(skip_all, fields(text_len = text.len(), mode = "ner"))]
+    #[tracing::instrument(
+        skip_all,
+        fields(text_len = text.len(), agent = "ner"),
+    )]
     pub async fn detect(
         &self,
         text: &str,
         config: &DetectionConfig,
     ) -> Result<Vec<RawEntity>, Error> {
         let prompt = NerPromptBuilder::new(config).build(text);
+
+        tracing::debug!(
+            prompt_len = prompt.len(),
+            entity_kinds = config.entity_kinds.len(),
+            "built ner prompt"
+        );
+
         let result: RawEntities = self
             .base
             .prompt_structured(&prompt, config.system_prompt.as_deref())
             .await?;
+
+        tracing::info!(
+            entity_count = result.entities.len(),
+            "ner detection complete"
+        );
+
         Ok(result.entities)
     }
 }
diff --git a/crates/nvisy-rig/src/agent/ner/output.rs b/crates/nvisy-rig/src/agent/recognize/output.rs
similarity index 100%
rename from crates/nvisy-rig/src/agent/ner/output.rs
rename to crates/nvisy-rig/src/agent/recognize/output.rs
diff --git a/crates/nvisy-rig/src/agent/ner/prompt.rs b/crates/nvisy-rig/src/agent/recognize/prompt.rs
similarity index 100%
rename from crates/nvisy-rig/src/agent/ner/prompt.rs
rename to crates/nvisy-rig/src/agent/recognize/prompt.rs
diff --git a/crates/nvisy-rig/src/agent/redactor/mod.rs b/crates/nvisy-rig/src/agent/redactor/mod.rs
index 74139f8..10e1050 100644
--- a/crates/nvisy-rig/src/agent/redactor/mod.rs
+++ b/crates/nvisy-rig/src/agent/redactor/mod.rs
@@ -1,3 +1,81 @@
 //! Redactor agent for context-aware semantic redaction.
 //!
-//! Placeholder — implementation deferred to a future PR.
+//! [`RedactorAgent`] is a pure LLM agent (no tools) that takes detected
+//! entities and their surrounding text and recommends a
+//! [`TextRedactionMethod`](nvisy_ontology::specification::TextRedactionMethod)
+//! for each one. It considers sensitivity level, document context, and
+//! downstream utility when choosing between masking, replacement, hashing,
+//! synthesis, pseudonymisation, and removal.
+
+mod output;
+mod prompt;
+
+pub use output::{RawRedaction, RedactorOutput};
+
+use rig::completion::CompletionModel;
+
+use nvisy_core::Error;
+use nvisy_ontology::specification::RedactorInput;
+
+use crate::backend::UsageTracker;
+
+use super::base::{BaseAgent, BaseAgentConfig};
+use prompt::{RedactorPromptBuilder, REDACTOR_SYSTEM_PROMPT};
+
+/// Agent for context-aware redaction recommendations.
+///
+/// # Workflow
+///
+/// 1. Caller passes source text and a slice of [`RedactorInput`] entities
+///    to [`recommend`](Self::recommend).
+/// 2. The agent serialises the entities as JSON and builds a user prompt
+///    via [`RedactorPromptBuilder`].
+/// 3. The LLM returns structured output mapping each entity to a
+///    [`TextRedactionMethod`](nvisy_ontology::specification::TextRedactionMethod)
+///    with a suggested replacement string.
+/// 4. The result is parsed into `Vec<RawRedaction>`.
+pub struct RedactorAgent<M: CompletionModel> {
+    base: BaseAgent<M>,
+}
+
+impl<M: CompletionModel> RedactorAgent<M> {
+    /// Create a new redactor agent with the given model and config.
+    pub fn new(model: M, config: BaseAgentConfig) -> Self {
+        let base = BaseAgent::builder(model, config)
+            .preamble(REDACTOR_SYSTEM_PROMPT)
+            .build();
+        Self { base }
+    }
+
+    /// Access the usage tracker for this agent's LLM calls.
+    pub fn tracker(&self) -> &UsageTracker {
+        self.base.tracker()
+    }
+
+    /// Recommend redaction methods for detected entities in the given text.
+    #[tracing::instrument(
+        skip_all,
+        fields(text_len = text.len(), entity_count = entities.len(), agent = "redactor"),
+    )]
+    pub async fn recommend(
+        &self,
+        text: &str,
+        entities: &[RedactorInput],
+    ) -> Result<Vec<RawRedaction>, Error> {
+        let prompt = RedactorPromptBuilder::build(text, entities)?;
+
+        tracing::debug!(
+            prompt_len = prompt.len(),
+            "built redactor prompt"
+        );
+
+        let result: RedactorOutput = self.base.prompt_structured(&prompt, None).await?;
+
+        tracing::info!(
+            redaction_count = result.redactions.len(),
+            "redaction recommendations complete"
+        );
+
+        Ok(result.redactions)
+    }
+}
diff --git a/crates/nvisy-rig/src/agent/redactor/output.rs b/crates/nvisy-rig/src/agent/redactor/output.rs
new file mode 100644
index 0000000..577c054
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/redactor/output.rs
@@ -0,0 +1,26 @@
+//! Structured output types for redaction recommendations.
+
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+
+use nvisy_ontology::specification::TextRedactionMethod;
+
+/// A single redaction recommendation from the LLM.
+#[derive(Debug, Deserialize, Serialize, JsonSchema)]
+pub struct RawRedaction {
+    /// The original entity text that should be redacted.
+    pub entity_value: String,
+    /// Recommended redaction method.
+    pub method: TextRedactionMethod,
+    /// The suggested replacement text (e.g. `"[EMAIL]"`, `"***"`).
+    pub replacement: String,
+    /// Brief explanation of why this method was chosen.
+    pub reasoning: Option<String>,
+}
+
+/// Top-level structured output wrapper from the redactor agent.
+#[derive(Debug, Default, Deserialize, Serialize, JsonSchema)]
+pub struct RedactorOutput {
+    /// Recommended redactions for each entity.
+    pub redactions: Vec<RawRedaction>,
+}
diff --git a/crates/nvisy-rig/src/agent/redactor/prompt.rs b/crates/nvisy-rig/src/agent/redactor/prompt.rs
new file mode 100644
index 0000000..ab63a56
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/redactor/prompt.rs
@@ -0,0 +1,65 @@
+//! Redactor-specific prompt construction.
+//!
+//! [`RedactorPromptBuilder`] constructs the user prompt that presents
+//! detected entities and surrounding text to the LLM for redaction
+//! method selection.
+
+use nvisy_core::Error;
+use nvisy_ontology::specification::RedactorInput;
+
+/// Builds user prompts for redaction recommendations.
+///
+/// Serialises the entity list as JSON and wraps the source text in
+/// delimiters so the LLM has full context for sensitivity-aware decisions.
+pub(crate) struct RedactorPromptBuilder;
+
+impl RedactorPromptBuilder {
+    /// Build the user prompt for the given text and entity list.
+    pub fn build(text: &str, entities: &[RedactorInput]) -> Result<String, Error> {
+        let entities_json = serde_json::to_string_pretty(entities).map_err(|e| {
+            Error::runtime(
+                format!("failed to serialize entities for redactor: {e}"),
+                "rig",
+                false,
+            )
+        })?;
+
+        Ok(format!(
+            "Recommend redaction methods for the following entities found in the \
+             text below.\n\n\
+             Entities:\n{entities_json}\n\n\
+             ---\n{text}\n---"
+        ))
+    }
+}
+
+/// Default system prompt for the redactor agent.
+pub(super) const REDACTOR_SYSTEM_PROMPT: &str = "\
+You are a context-aware redaction system. Given a text and a list of detected entities, \
+recommend the most appropriate redaction method for each entity.\n\
+\n\
+Available redaction methods:\n\
+- \"mask\": Replace with a fixed mask (e.g. \"***\", \"[REDACTED]\"). Use for highly sensitive data \
+  where the original value must not be recoverable.\n\
+- \"replace\": Replace with a type-appropriate placeholder (e.g. \"[EMAIL]\", \"[SSN]\"). Use when \
+  the entity type should remain visible but the value hidden.\n\
+- \"hash\": Replace with a deterministic hash. Use when linkability across documents is needed \
+  without exposing the original value.\n\
+- \"synthesize\": Replace with a realistic but fake value (e.g. a fake name, fake address). Use \
+  when preserving data format and statistical properties matters.\n\
+- \"pseudonymize\": Replace with a consistent pseudonym. Use when the same entity should map to \
+  the same pseudonym across a document or dataset.\n\
+- \"remove\": Delete the entity entirely. Use for data that adds no analytical value.\n\
+\n\
+For each entity, consider:\n\
+- Sensitivity level (credentials > government IDs > names)\n\
+- Context (medical records need stricter redaction than marketing copy)\n\
+- Downstream utility (will analysts need to correlate redacted values?)\n\
+\n\
+Return a JSON object with a \"redactions\" array. Each element must have:\n\
+- \"entity_value\": the original entity text\n\
+- \"method\": one of the methods above\n\
+- \"replacement\": the suggested replacement text\n\
+- \"reasoning\": brief explanation of why this method was chosen (optional)\n\
+\n\
+If no redactions are needed, return {\"redactions\": []}.";
diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs
index 0370153..5e1300b 100644
--- a/crates/nvisy-rig/src/lib.rs
+++ b/crates/nvisy-rig/src/lib.rs
@@ -12,6 +12,9 @@ pub mod prelude;
 pub use backend::{DetectionConfig, DetectionRequest, DetectionResponse};
 pub use bridge::{EntityParser, RigBackend, RigBackendConfig};
 
-// Tool-provider traits for consumers to implement.
-pub use agent::ocr::OcrProvider;
-pub use agent::cv::{CvDetection, CvProvider};
+pub use agent::{
+    CvAgent, CvDetection, CvProvider, NerAgent,
+    OcrAgent, OcrOutput, OcrProvider,
+    RawCvEntities, RawCvEntity, RawEntities, RawEntity,
+    RawOcrEntity, RawRedaction, RedactorAgent, RedactorOutput,
+};
diff --git a/crates/nvisy-rig/src/prelude.rs b/crates/nvisy-rig/src/prelude.rs
index 2c35c5c..3dcdf65 100644
--- a/crates/nvisy-rig/src/prelude.rs
+++ b/crates/nvisy-rig/src/prelude.rs
@@ -1,6 +1,13 @@
 //! Convenience re-exports.
 
-pub use crate::backend::{DetectionConfig, DetectionRequest, DetectionResponse, RetryPolicy, UsageStats, UsageTracker};
+pub use crate::backend::{
+    DetectionConfig, DetectionRequest, DetectionResponse,
+    RetryPolicy, UsageStats, UsageTracker,
+};
 pub use crate::bridge::{EntityParser, RigBackend, RigBackendConfig};
-pub use crate::agent::ocr::OcrProvider;
-pub use crate::agent::cv::{CvDetection, CvProvider};
+pub use crate::agent::{
+    CvAgent, CvDetection, CvProvider, NerAgent,
+    OcrAgent, OcrOutput, OcrProvider,
+    RawCvEntities, RawCvEntity, RawEntities, RawEntity,
+    RawOcrEntity, RawRedaction, RedactorAgent, RedactorOutput,
+};

From 53cfafb1a479b73b7fdf8923385f3b681e4d7d59 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Wed, 25 Feb 2026 19:32:06 +0100
Subject: [PATCH 09/24] refactor(rig): route all prompts through built Agent,
 split base module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- BaseAgent.prompt_text() now uses agent.completion() instead of
  building raw requests from the model, so preamble/tools/config
  are preserved
- Remove model: Arc<M> from BaseAgent (agent owns it)
- Remove system: Option<&str> param from prompt methods (preamble is
  on the agent)
- Replace BaseAgentConfig field with context_window: Option<ContextWindow>
  since temperature/max_tokens are baked into the rig Agent at build time
- Split base.rs into base/{agent,builder,context}.rs
- Rename redactor/ → redact/ to match action-verb convention
- OcrProvider returns Vec<OcrTextRegion> with bbox support
- Add fn new() constructors to OcrRigTool and CvRigTool
- Add from_prompt error mapper for rig::PromptError
- Export OcrTextRegion from lib.rs and prelude

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/nvisy-rig/src/agent/base.rs            | 212 ------------------
 crates/nvisy-rig/src/agent/base/agent.rs      | 125 +++++++++++
 crates/nvisy-rig/src/agent/base/builder.rs    |  74 ++++++
 .../nvisy-rig/src/agent/{ => base}/context.rs |   0
 crates/nvisy-rig/src/agent/base/mod.rs        |  35 +++
 crates/nvisy-rig/src/agent/detect/mod.rs      |   6 +-
 crates/nvisy-rig/src/agent/detect/tool.rs     |   8 +-
 crates/nvisy-rig/src/agent/extract/mod.rs     |  30 ++-
 crates/nvisy-rig/src/agent/extract/tool.rs    |  18 +-
 crates/nvisy-rig/src/agent/mod.rs             |   8 +-
 crates/nvisy-rig/src/agent/recognize/mod.rs   |   2 +-
 .../src/agent/{redactor => redact}/mod.rs     |   2 +-
 .../src/agent/{redactor => redact}/output.rs  |   0
 .../src/agent/{redactor => redact}/prompt.rs  |   0
 crates/nvisy-rig/src/backend/error.rs         |  25 ++-
 crates/nvisy-rig/src/backend/mod.rs           |   2 +-
 crates/nvisy-rig/src/lib.rs                   |   2 +-
 crates/nvisy-rig/src/prelude.rs               |   2 +-
 18 files changed, 313 insertions(+), 238 deletions(-)
 delete mode 100644 crates/nvisy-rig/src/agent/base.rs
 create mode 100644 crates/nvisy-rig/src/agent/base/agent.rs
 create mode 100644 crates/nvisy-rig/src/agent/base/builder.rs
 rename crates/nvisy-rig/src/agent/{ => base}/context.rs (100%)
 create mode 100644 crates/nvisy-rig/src/agent/base/mod.rs
 rename crates/nvisy-rig/src/agent/{redactor => redact}/mod.rs (99%)
 rename crates/nvisy-rig/src/agent/{redactor => redact}/output.rs (100%)
 rename crates/nvisy-rig/src/agent/{redactor => redact}/prompt.rs (100%)

diff --git a/crates/nvisy-rig/src/agent/base.rs b/crates/nvisy-rig/src/agent/base.rs
deleted file mode 100644
index 0c1975d..0000000
--- a/crates/nvisy-rig/src/agent/base.rs
+++ /dev/null
@@ -1,212 +0,0 @@
-//! Internal foundation agent wrapping rig-core's `Agent<M>`.
-
-use std::sync::Arc;
-
-use rig::agent::{Agent, AgentBuilder};
-use rig::completion::{CompletionModel, TypedPrompt};
-use rig::tool::{Tool, ToolDyn};
-use schemars::JsonSchema;
-use serde::de::DeserializeOwned;
-use serde::Serialize;
-
-use nvisy_core::Error;
-
-use crate::backend::{from_completion, UsageTracker};
-use crate::bridge::ResponseParser;
-
-use super::context::ContextWindow;
-
-/// Configuration for a [`BaseAgent`].
-#[derive(Debug, Clone)]
-pub struct BaseAgentConfig {
-    /// Sampling temperature (default: 0.1).
-    pub temperature: f64,
-    /// Maximum output tokens (default: 4096).
-    pub max_tokens: u64,
-    /// Optional context window for chunking large inputs.
-    pub context_window: Option<ContextWindow>,
-}
-
-impl Default for BaseAgentConfig {
-    fn default() -> Self {
-        Self {
-            temperature: 0.1,
-            max_tokens: 4096,
-            context_window: None,
-        }
-    }
-}
-
-/// Internal foundation agent wrapping rig-core's [`Agent<M>`].
-///
-/// Not exported — specialized agents (e.g. `NerAgent`) compose this.
-pub(crate) struct BaseAgent<M: CompletionModel> {
-    agent: Agent<M>,
-    model: Arc<M>,
-    config: BaseAgentConfig,
-    tracker: UsageTracker,
-}
-
-/// Builder for [`BaseAgent`] that handles rig-core's typestate for tools.
-pub(crate) struct BaseAgentBuilder<M: CompletionModel> {
-    model: Arc<M>,
-    config: BaseAgentConfig,
-    preamble: Option<String>,
-    tools: Vec<Box<dyn ToolDyn>>,
-}
-
-impl<M: CompletionModel> BaseAgentBuilder<M> {
-    /// Create a new builder with the given model and config.
-    pub fn new(model: M, config: BaseAgentConfig) -> Self {
-        Self {
-            model: Arc::new(model),
-            config,
-            preamble: None,
-            tools: Vec::new(),
-        }
-    }
-
-    /// Set the system prompt (preamble).
-    pub fn preamble(mut self, preamble: impl Into<String>) -> Self {
-        self.preamble = Some(preamble.into());
-        self
-    }
-
-    /// Add a tool to the agent.
-    pub fn tool(mut self, tool: impl Tool + 'static) -> Self {
-        self.tools.push(Box::new(tool));
-        self
-    }
-
-    /// Build the [`BaseAgent`].
-    pub fn build(self) -> BaseAgent<M> {
-        let agent = if self.tools.is_empty() {
-            let mut builder = AgentBuilder::new((*self.model).clone())
-                .temperature(self.config.temperature)
-                .max_tokens(self.config.max_tokens);
-
-            if let Some(ref preamble) = self.preamble {
-                builder = builder.preamble(preamble);
-            }
-
-            builder.build()
-        } else {
-            let mut builder = AgentBuilder::new((*self.model).clone())
-                .temperature(self.config.temperature)
-                .max_tokens(self.config.max_tokens)
-                .tools(self.tools);
-
-            if let Some(ref preamble) = self.preamble {
-                builder = builder.preamble(preamble);
-            }
-
-            builder.build()
-        };
-
-        BaseAgent {
-            agent,
-            model: self.model,
-            config: self.config,
-            tracker: UsageTracker::new(),
-        }
-    }
-}
-
-impl<M: CompletionModel> BaseAgent<M> {
-    /// Create a new builder.
-    pub fn builder(model: M, config: BaseAgentConfig) -> BaseAgentBuilder<M> {
-        BaseAgentBuilder::new(model, config)
-    }
-
-    /// Access the usage tracker.
-    pub fn tracker(&self) -> &UsageTracker {
-        &self.tracker
-    }
-
-    /// Access the config.
-    pub fn config(&self) -> &BaseAgentConfig {
-        &self.config
-    }
-
-    /// Structured output prompt: tries `prompt_typed`, falls back to text +
-    /// `parse_json`.
-    #[tracing::instrument(skip_all, fields(mode = "structured"))]
-    pub async fn prompt_structured<T>(&self, prompt: &str, system: Option<&str>) -> Result<T, Error>
-    where
-        T: DeserializeOwned + Default + JsonSchema + Serialize + Send + Sync,
-    {
-        // Try structured output first.
-        let structured_result: Result<T, _> = self.agent.prompt_typed::<T>(prompt).await;
-
-        match structured_result {
-            Ok(value) => {
-                tracing::debug!("structured output succeeded");
-                Ok(value)
-            }
-            Err(structured_err) => {
-                tracing::warn!(
-                    error = %structured_err,
-                    "structured output failed, falling back to text-based parsing"
-                );
-                self.prompt_text_and_parse(prompt, system).await
-            }
-        }
-    }
-
-    /// Raw text completion, records usage.
-    #[tracing::instrument(skip_all, fields(mode = "text"))]
-    pub async fn prompt_text(&self, prompt: &str, system: Option<&str>) -> Result<String, Error> {
-        let mut builder = self
-            .model
-            .completion_request(prompt)
-            .temperature(self.config.temperature)
-            .max_tokens(self.config.max_tokens);
-
-        if let Some(preamble) = system {
-            builder = builder.preamble(preamble.to_string());
-        }
-
-        let response = builder.send().await.map_err(from_completion)?;
-        let parsed = ResponseParser::extract_text(&response)?;
-        self.tracker.record(&response.usage, 0);
-        Ok(parsed.as_str().to_owned())
-    }
-
-    /// Splits text via [`ContextWindow`], runs `prompt_structured` per chunk,
-    /// and flattens results.
-    #[tracing::instrument(skip_all, fields(mode = "chunked"))]
-    pub async fn prompt_chunked<T, F>(
-        &self,
-        text: &str,
-        build_prompt: F,
-        system: Option<&str>,
-    ) -> Result<Vec<T>, Error>
-    where
-        T: DeserializeOwned + Default + JsonSchema + Serialize + Send + Sync,
-        F: Fn(&str) -> String,
-        Vec<T>: Default,
-    {
-        let chunks = match &self.config.context_window {
-            Some(cw) => cw.split_to_fit(text),
-            None => vec![text],
-        };
-
-        let mut all_results = Vec::new();
-        for chunk in chunks {
-            let prompt = build_prompt(chunk);
-            let chunk_results: Vec<T> = self.prompt_structured(&prompt, system).await?;
-            all_results.extend(chunk_results);
-        }
-
-        Ok(all_results)
-    }
-
-    /// Text-based fallback: complete → extract text → parse JSON.
-    async fn prompt_text_and_parse<T>(&self, prompt: &str, system: Option<&str>) -> Result<T, Error>
-    where
-        T: DeserializeOwned + Default,
-    {
-        let text = self.prompt_text(prompt, system).await?;
-        ResponseParser::from_text(text.as_str()).parse_json()
-    }
-}
diff --git a/crates/nvisy-rig/src/agent/base/agent.rs b/crates/nvisy-rig/src/agent/base/agent.rs
new file mode 100644
index 0000000..aac7654
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/base/agent.rs
@@ -0,0 +1,125 @@
+//! [`BaseAgent`] — internal foundation agent wrapping rig-core's `Agent<M>`.
+
+use rig::agent::Agent;
+use rig::completion::{Completion, CompletionModel, Prompt, TypedPrompt};
+use schemars::JsonSchema;
+use serde::de::DeserializeOwned;
+use serde::Serialize;
+
+use nvisy_core::Error;
+
+use crate::backend::{from_completion, from_prompt, UsageTracker};
+use crate::bridge::ResponseParser;
+
+use super::{BaseAgentBuilder, BaseAgentConfig};
+use super::context::ContextWindow;
+
+/// Internal foundation agent wrapping rig-core's [`Agent<M>`].
+///
+/// All prompt methods route through the built `Agent<M>`, which already
+/// carries the preamble, temperature, max-tokens, and tools configured
+/// via [`BaseAgentBuilder`].
+///
+/// Not exported — specialized agents (e.g. `NerAgent`) compose this.
+pub(crate) struct BaseAgent<M: CompletionModel> {
+    pub(super) agent: Agent<M>,
+    pub(super) context_window: Option<ContextWindow>,
+    pub(super) tracker: UsageTracker,
+}
+
+impl<M: CompletionModel> BaseAgent<M> {
+    /// Create a new builder.
+    pub fn builder(model: M, config: BaseAgentConfig) -> BaseAgentBuilder<M> {
+        BaseAgentBuilder::new(model, config)
+    }
+
+    /// Access the usage tracker.
+    pub fn tracker(&self) -> &UsageTracker {
+        &self.tracker
+    }
+
+    /// Structured output prompt: tries `prompt_typed`, falls back to text +
+    /// `parse_json`.
+    #[tracing::instrument(skip_all, fields(mode = "structured"))]
+    pub async fn prompt_structured<T>(&self, prompt: &str) -> Result<T, Error>
+    where
+        T: DeserializeOwned + Default + JsonSchema + Serialize + Send + Sync,
+    {
+        let structured_result: Result<T, _> = self.agent.prompt_typed::<T>(prompt).await;
+
+        match structured_result {
+            Ok(value) => {
+                tracing::debug!("structured output succeeded");
+                Ok(value)
+            }
+            Err(structured_err) => {
+                tracing::warn!(
+                    error = %structured_err,
+                    "structured output failed, falling back to text-based parsing"
+                );
+                self.prompt_text_and_parse(prompt).await
+            }
+        }
+    }
+
+    /// Text completion through the agent, records usage.
+    #[tracing::instrument(skip_all, fields(mode = "text"))]
+    pub async fn prompt_text(&self, prompt: &str) -> Result<String, Error> {
+        let builder = self
+            .agent
+            .completion(prompt, vec![])
+            .await
+            .map_err(from_completion)?;
+
+        let response = builder.send().await.map_err(from_completion)?;
+        let parsed = ResponseParser::extract_text(&response)?;
+        self.tracker.record(&response.usage, 0);
+        Ok(parsed.as_str().to_owned())
+    }
+
+    /// Plain text completion through the agent (no usage tracking).
+    ///
+    /// Uses `Prompt::prompt` which handles tool calls automatically but
+    /// returns only the final text, not the raw response.
+    #[tracing::instrument(skip_all, fields(mode = "prompt"))]
+    pub async fn prompt(&self, prompt: &str) -> Result<String, Error> {
+        self.agent.prompt(prompt).await.map_err(from_prompt)
+    }
+
+    /// Splits text via [`ContextWindow`], runs `prompt_structured` per chunk,
+    /// and flattens results.
+    #[tracing::instrument(skip_all, fields(mode = "chunked"))]
+    pub async fn prompt_chunked<T, F>(
+        &self,
+        text: &str,
+        build_prompt: F,
+    ) -> Result<Vec<T>, Error>
+    where
+        T: DeserializeOwned + Default + JsonSchema + Serialize + Send + Sync,
+        F: Fn(&str) -> String,
+        Vec<T>: Default,
+    {
+        let chunks = match &self.context_window {
+            Some(cw) => cw.split_to_fit(text),
+            None => vec![text],
+        };
+
+        let mut all_results = Vec::new();
+        for chunk in chunks {
+            let prompt = build_prompt(chunk);
+            let chunk_results: Vec<T> = self.prompt_structured(&prompt).await?;
+            all_results.extend(chunk_results);
+        }
+
+        Ok(all_results)
+    }
+
+    /// Text-based fallback: complete → extract text → parse JSON.
+    async fn prompt_text_and_parse<T>(&self, prompt: &str) -> Result<T, Error>
+    where
+        T: DeserializeOwned + Default,
+    {
+        let text = self.prompt_text(prompt).await?;
+        ResponseParser::from_text(text.as_str()).parse_json()
+    }
+}
diff --git a/crates/nvisy-rig/src/agent/base/builder.rs b/crates/nvisy-rig/src/agent/base/builder.rs
new file mode 100644
index 0000000..a940495
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/base/builder.rs
@@ -0,0 +1,74 @@
+//! [`BaseAgentBuilder`] — builder for [`BaseAgent`] handling rig-core's
+//! typestate for optional tools.
+
+use rig::agent::AgentBuilder;
+use rig::completion::CompletionModel;
+use rig::tool::{Tool, ToolDyn};
+
+use crate::backend::UsageTracker;
+
+use super::{BaseAgent, BaseAgentConfig};
+
+/// Builder for [`BaseAgent`] that handles rig-core's typestate for tools.
+pub(crate) struct BaseAgentBuilder<M: CompletionModel> {
+    model: M,
+    config: BaseAgentConfig,
+    preamble: Option<String>,
+    tools: Vec<Box<dyn ToolDyn>>,
+}
+
+impl<M: CompletionModel> BaseAgentBuilder<M> {
+    /// Create a new builder with the given model and config.
+    pub fn new(model: M, config: BaseAgentConfig) -> Self {
+        Self {
+            model,
+            config,
+            preamble: None,
+            tools: Vec::new(),
+        }
+    }
+
+    /// Set the system prompt (preamble).
+    pub fn preamble(mut self, preamble: impl Into<String>) -> Self {
+        self.preamble = Some(preamble.into());
+        self
+    }
+
+    /// Add a tool to the agent.
+    pub fn tool(mut self, tool: impl Tool + 'static) -> Self {
+        self.tools.push(Box::new(tool));
+        self
+    }
+
+    /// Build the [`BaseAgent`].
+    pub fn build(self) -> BaseAgent<M> {
+        let agent = if self.tools.is_empty() {
+            let mut builder = AgentBuilder::new(self.model)
+                .temperature(self.config.temperature)
+                .max_tokens(self.config.max_tokens);
+
+            if let Some(ref preamble) = self.preamble {
+                builder = builder.preamble(preamble);
+            }
+
+            builder.build()
+        } else {
+            let mut builder = AgentBuilder::new(self.model)
+                .temperature(self.config.temperature)
+                .max_tokens(self.config.max_tokens)
+                .tools(self.tools);
+
+            if let Some(ref preamble) = self.preamble {
+                builder = builder.preamble(preamble);
+            }
+
+            builder.build()
+        };
+
+        BaseAgent {
+            agent,
+            context_window: self.config.context_window,
+            tracker: UsageTracker::new(),
+        }
+    }
+}
diff --git a/crates/nvisy-rig/src/agent/context.rs b/crates/nvisy-rig/src/agent/base/context.rs
similarity index 100%
rename from crates/nvisy-rig/src/agent/context.rs
rename to crates/nvisy-rig/src/agent/base/context.rs
diff --git a/crates/nvisy-rig/src/agent/base/mod.rs b/crates/nvisy-rig/src/agent/base/mod.rs
new file mode 100644
index 0000000..a7aedb9
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/base/mod.rs
@@ -0,0 +1,35 @@
+//! Internal foundation agent and builder.
+//!
+//! [`BaseAgent`] wraps rig-core's `Agent<M>` with usage tracking and
+//! structured-output fallback. [`BaseAgentBuilder`] handles rig-core's
+//! typestate for optional tools.
+
+mod agent;
+mod builder;
+pub(crate) mod context;
+
+pub(crate) use agent::BaseAgent;
+pub(crate) use builder::BaseAgentBuilder;
+
+use context::ContextWindow;
+
+/// Configuration for a [`BaseAgent`].
+#[derive(Debug, Clone)]
+pub struct BaseAgentConfig {
+    /// Sampling temperature (default: 0.1).
+    pub temperature: f64,
+    /// Maximum output tokens (default: 4096).
+    pub max_tokens: u64,
+    /// Optional context window for chunking large inputs.
+    pub context_window: Option<ContextWindow>,
+}
+
+impl Default for BaseAgentConfig {
+    fn default() -> Self {
+        Self {
+            temperature: 0.1,
+            max_tokens: 4096,
+            context_window: None,
+        }
+    }
+}
diff --git a/crates/nvisy-rig/src/agent/detect/mod.rs b/crates/nvisy-rig/src/agent/detect/mod.rs
index 8d5829a..4322b41 100644
--- a/crates/nvisy-rig/src/agent/detect/mod.rs
+++ b/crates/nvisy-rig/src/agent/detect/mod.rs
@@ -11,8 +11,6 @@ mod tool;
 
 pub use output::{RawCvEntities, RawCvEntity};
 
-use std::sync::Arc;
-
 use async_trait::async_trait;
 use base64::Engine;
 use base64::engine::general_purpose::STANDARD;
@@ -73,7 +71,7 @@ impl<M: CompletionModel> CvAgent<M> {
     pub fn new(model: M, config: BaseAgentConfig, cv: impl CvProvider + 'static) -> Self {
         let base = BaseAgent::builder(model, config)
             .preamble(CV_SYSTEM_PROMPT)
-            .tool(CvRigTool(Arc::new(cv)))
+            .tool(CvRigTool::new(cv))
             .build();
         Self { base }
     }
@@ -104,7 +102,7 @@ impl<M: CompletionModel> CvAgent<M> {
 
         let result: RawCvEntities = self
             .base
-            .prompt_structured(&prompt, config.system_prompt.as_deref())
+            .prompt_structured(&prompt)
             .await?;
 
         tracing::info!(
diff --git a/crates/nvisy-rig/src/agent/detect/tool.rs b/crates/nvisy-rig/src/agent/detect/tool.rs
index c98ab51..01a4310 100644
--- a/crates/nvisy-rig/src/agent/detect/tool.rs
+++ b/crates/nvisy-rig/src/agent/detect/tool.rs
@@ -24,7 +24,13 @@ pub(super) struct CvToolArgs {
 pub(super) struct CvToolError(String);
 
 /// Rig `Tool` wrapper around a [`CvProvider`] implementation.
-pub(super) struct CvRigTool<T: CvProvider>(pub Arc<T>);
+pub(super) struct CvRigTool<T: CvProvider>(Arc<T>);
+
+impl<T: CvProvider> CvRigTool<T> {
+    pub fn new(provider: T) -> Self {
+        Self(Arc::new(provider))
+    }
+}
 
 impl<T: CvProvider> Tool for CvRigTool<T> {
     const NAME: &'static str = "cv_detect_objects";
diff --git a/crates/nvisy-rig/src/agent/extract/mod.rs b/crates/nvisy-rig/src/agent/extract/mod.rs
index 547e004..2f9cf68 100644
--- a/crates/nvisy-rig/src/agent/extract/mod.rs
+++ b/crates/nvisy-rig/src/agent/extract/mod.rs
@@ -11,12 +11,11 @@ mod tool;
 
 pub use output::{OcrOutput, RawOcrEntity};
 
-use std::sync::Arc;
-
 use async_trait::async_trait;
 use base64::Engine;
 use base64::engine::general_purpose::STANDARD;
 use rig::completion::CompletionModel;
+use serde::Serialize;
 
 use nvisy_core::Error;
 
@@ -26,15 +25,34 @@ use super::base::{BaseAgent, BaseAgentConfig};
 use prompt::{OcrPromptBuilder, OCR_SYSTEM_PROMPT};
 use tool::OcrRigTool;
 
+/// A single text region extracted by an OCR provider.
+///
+/// Each region represents a contiguous block of text found in the image,
+/// together with an optional bounding box and confidence score.
+#[derive(Debug, Clone, Serialize)]
+pub struct OcrTextRegion {
+    /// The extracted text content.
+    pub text: String,
+    /// Confidence of the OCR extraction (0.0 -- 1.0).
+    pub confidence: f64,
+    /// Optional bounding box `[x, y, width, height]` in pixels.
+    pub bbox: Option<[f64; 4]>,
+}
+
 /// Trait for OCR capabilities that can be provided to VLM agents.
 ///
 /// Consumers implement this trait to supply text extraction from images.
 /// The trait is intentionally free of rig-core types so it can be
 /// implemented in any crate without pulling in the LLM framework.
+///
+/// Implementations return a list of [`OcrTextRegion`]s, each carrying the
+/// extracted text, a confidence score, and an optional pixel-space bounding
+/// box. Returning multiple regions allows the downstream VLM to reason
+/// about spatial layout (e.g. headers vs body text, table cells).
 #[async_trait]
 pub trait OcrProvider: Send + Sync {
-    /// Extract text from raw image bytes (PNG, JPEG, etc.).
-    async fn extract_text(&self, image_data: &[u8]) -> Result<String, Error>;
+    /// Extract text regions from raw image bytes (PNG, JPEG, etc.).
+    async fn extract_text(&self, image_data: &[u8]) -> Result<Vec<OcrTextRegion>, Error>;
 }
 
 /// VLM agent that extracts text from images and detects entities in it.
@@ -56,7 +74,7 @@ impl<M: CompletionModel> OcrAgent<M> {
     pub fn new(model: M, config: BaseAgentConfig, ocr: impl OcrProvider + 'static) -> Self {
         let base = BaseAgent::builder(model, config)
             .preamble(OCR_SYSTEM_PROMPT)
-            .tool(OcrRigTool(Arc::new(ocr)))
+            .tool(OcrRigTool::new(ocr))
             .build();
         Self { base }
     }
@@ -87,7 +105,7 @@ impl<M: CompletionModel> OcrAgent<M> {
 
         let output: OcrOutput = self
             .base
-            .prompt_structured(&prompt, config.system_prompt.as_deref())
+            .prompt_structured(&prompt)
             .await?;
 
         tracing::info!(
diff --git a/crates/nvisy-rig/src/agent/extract/tool.rs b/crates/nvisy-rig/src/agent/extract/tool.rs
index c29ffea..d271ab8 100644
--- a/crates/nvisy-rig/src/agent/extract/tool.rs
+++ b/crates/nvisy-rig/src/agent/extract/tool.rs
@@ -24,7 +24,13 @@ pub(super) struct OcrToolArgs {
 pub(super) struct OcrToolError(String);
 
 /// Rig `Tool` wrapper around an [`OcrProvider`] implementation.
-pub(super) struct OcrRigTool<T: OcrProvider>(pub Arc<T>);
+pub(super) struct OcrRigTool<T: OcrProvider>(Arc<T>);
+
+impl<T: OcrProvider> OcrRigTool<T> {
+    pub fn new(provider: T) -> Self {
+        Self(Arc::new(provider))
+    }
+}
 
 impl<T: OcrProvider> Tool for OcrRigTool<T> {
     const NAME: &'static str = "ocr_extract_text";
@@ -36,7 +42,9 @@ impl<T: OcrProvider> Tool for OcrRigTool<T> {
     async fn definition(&self, _prompt: String) -> ToolDefinition {
         ToolDefinition {
             name: Self::NAME.to_string(),
-            description: "Extract text from an image using OCR. \
+            description: "Extract text regions from an image using OCR. \
+                          Returns a JSON array of regions, each with text, \
+                          confidence, and optional bounding box. \
                           Pass the image as a base64-encoded string."
                 .to_string(),
             parameters: json!({
@@ -56,9 +64,11 @@ impl<T: OcrProvider> Tool for OcrRigTool<T> {
         let bytes = STANDARD
             .decode(&args.image_base64)
             .map_err(|e| OcrToolError(format!("invalid base64: {e}")))?;
-        self.0
+        let regions = self
+            .0
             .extract_text(&bytes)
             .await
-            .map_err(|e| OcrToolError(e.to_string()))
+            .map_err(|e| OcrToolError(e.to_string()))?;
+        serde_json::to_string(&regions).map_err(|e| OcrToolError(e.to_string()))
     }
 }
diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs
index dc7f0f5..6d43412 100644
--- a/crates/nvisy-rig/src/agent/mod.rs
+++ b/crates/nvisy-rig/src/agent/mod.rs
@@ -4,16 +4,14 @@
 //! into individual agent submodules.
 
 mod base;
-mod context;
 mod detect;
 mod extract;
 mod recognize;
-mod redactor;
+mod redact;
 
 pub(crate) use base::{BaseAgent, BaseAgentBuilder, BaseAgentConfig};
-pub(crate) use context::ContextWindow;
 
 pub use recognize::{NerAgent, RawEntities, RawEntity};
-pub use extract::{OcrAgent, OcrOutput, OcrProvider, RawOcrEntity};
+pub use extract::{OcrAgent, OcrOutput, OcrProvider, OcrTextRegion, RawOcrEntity};
 pub use detect::{CvAgent, CvDetection, CvProvider, RawCvEntities, RawCvEntity};
-pub use redactor::{RawRedaction, RedactorAgent, RedactorOutput};
+pub use redact::{RawRedaction, RedactorAgent, RedactorOutput};
diff --git a/crates/nvisy-rig/src/agent/recognize/mod.rs b/crates/nvisy-rig/src/agent/recognize/mod.rs
index d1527fb..ed2dabd 100644
--- a/crates/nvisy-rig/src/agent/recognize/mod.rs
+++ b/crates/nvisy-rig/src/agent/recognize/mod.rs
@@ -65,7 +65,7 @@ impl<M: CompletionModel> NerAgent<M> {
 
         let result: RawEntities = self
             .base
-            .prompt_structured(&prompt, config.system_prompt.as_deref())
+            .prompt_structured(&prompt)
             .await?;
 
         tracing::info!(
diff --git a/crates/nvisy-rig/src/agent/redactor/mod.rs b/crates/nvisy-rig/src/agent/redact/mod.rs
similarity index 99%
rename from crates/nvisy-rig/src/agent/redactor/mod.rs
rename to crates/nvisy-rig/src/agent/redact/mod.rs
index 10e1050..ea8c4b1 100644
--- a/crates/nvisy-rig/src/agent/redactor/mod.rs
+++ b/crates/nvisy-rig/src/agent/redact/mod.rs
@@ -69,7 +69,7 @@ impl<M: CompletionModel> RedactorAgent<M> {
             "built redactor prompt"
         );
 
-        let result: RedactorOutput = self.base.prompt_structured(&prompt, None).await?;
+        let result: RedactorOutput = self.base.prompt_structured(&prompt).await?;
 
         tracing::info!(
             redaction_count = result.redactions.len(),
diff --git a/crates/nvisy-rig/src/agent/redactor/output.rs b/crates/nvisy-rig/src/agent/redact/output.rs
similarity index 100%
rename from crates/nvisy-rig/src/agent/redactor/output.rs
rename to crates/nvisy-rig/src/agent/redact/output.rs
diff --git a/crates/nvisy-rig/src/agent/redactor/prompt.rs b/crates/nvisy-rig/src/agent/redact/prompt.rs
similarity index 100%
rename from crates/nvisy-rig/src/agent/redactor/prompt.rs
rename to crates/nvisy-rig/src/agent/redact/prompt.rs
diff --git a/crates/nvisy-rig/src/backend/error.rs b/crates/nvisy-rig/src/backend/error.rs
index 67790fb..5074944 100644
--- a/crates/nvisy-rig/src/backend/error.rs
+++ b/crates/nvisy-rig/src/backend/error.rs
@@ -1,9 +1,32 @@
 //! Error mapping from rig-core errors to nvisy-core errors.
 
-use rig::completion::CompletionError;
+use rig::completion::{CompletionError, PromptError};
 
 use nvisy_core::Error;
 
+/// Convert a rig-core [`PromptError`] into a [`nvisy_core::Error`].
+pub fn from_prompt(err: PromptError) -> Error {
+    match err {
+        PromptError::CompletionError(e) => from_completion(e),
+        PromptError::ToolError(e) => {
+            Error::runtime(format!("Tool error: {e}"), "rig", false)
+        }
+        PromptError::ToolServerError(e) => {
+            Error::runtime(format!("Tool server error: {e}"), "rig", true)
+        }
+        PromptError::MaxTurnsError { max_turns, .. } => {
+            Error::runtime(
+                format!("Agent exceeded max turn limit ({max_turns})"),
+                "rig",
+                false,
+            )
+        }
+        PromptError::PromptCancelled { reason, .. } => {
+            Error::runtime(format!("Prompt cancelled: {reason}"), "rig", false)
+        }
+    }
+}
+
 /// Convert a rig-core [`CompletionError`] into a [`nvisy_core::Error`].
 pub fn from_completion(err: CompletionError) -> Error {
     match err {
diff --git a/crates/nvisy-rig/src/backend/mod.rs b/crates/nvisy-rig/src/backend/mod.rs
index 8cf85cc..5952588 100644
--- a/crates/nvisy-rig/src/backend/mod.rs
+++ b/crates/nvisy-rig/src/backend/mod.rs
@@ -4,7 +4,7 @@ mod error;
 mod metrics;
 mod retry;
 
-pub use error::from_completion;
+pub use error::{from_completion, from_prompt};
 pub use metrics::{UsageStats, UsageTracker};
 pub use retry::RetryPolicy;
 
diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs
index 5e1300b..585d52e 100644
--- a/crates/nvisy-rig/src/lib.rs
+++ b/crates/nvisy-rig/src/lib.rs
@@ -14,7 +14,7 @@ pub use bridge::{EntityParser, RigBackend, RigBackendConfig};
 
 pub use agent::{
     CvAgent, CvDetection, CvProvider, NerAgent,
-    OcrAgent, OcrOutput, OcrProvider,
+    OcrAgent, OcrOutput, OcrProvider, OcrTextRegion,
     RawCvEntities, RawCvEntity, RawEntities, RawEntity,
     RawOcrEntity, RawRedaction, RedactorAgent, RedactorOutput,
 };
diff --git a/crates/nvisy-rig/src/prelude.rs b/crates/nvisy-rig/src/prelude.rs
index 3dcdf65..f6fc160 100644
--- a/crates/nvisy-rig/src/prelude.rs
+++ b/crates/nvisy-rig/src/prelude.rs
@@ -7,7 +7,7 @@ pub use crate::backend::{
 pub use crate::bridge::{EntityParser, RigBackend, RigBackendConfig};
 pub use crate::agent::{
     CvAgent, CvDetection, CvProvider, NerAgent,
-    OcrAgent, OcrOutput, OcrProvider,
+    OcrAgent, OcrOutput, OcrProvider, OcrTextRegion,
     RawCvEntities, RawCvEntity, RawEntities, RawEntity,
     RawOcrEntity, RawRedaction, RedactorAgent, RedactorOutput,
 };

From ef67c1e13957ba504f2565295ad71fefdeef6c0d Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Wed, 25 Feb 2026 19:39:32 +0100
Subject: [PATCH 10/24] feat(rig): add UUIDv7 agent id, generic retry policy,
 doc fixes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add BaseAgent.id (UUIDv7) for observability; expose id() on all
  specialized agents and include agent_id in tracing spans
- Make RetryPolicy generic over any Req: Clone + Res instead of
  hardcoding DetectionRequest/DetectionResponse
- Use : instead of — as doc separator
- Use 0.0..=1.0 range notation in confidence docs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 Cargo.lock                                    |  1 +
 crates/nvisy-rig/Cargo.toml                   |  3 +++
 crates/nvisy-rig/src/agent/base/agent.rs      | 21 ++++++++++------
 crates/nvisy-rig/src/agent/base/builder.rs    |  4 +++-
 crates/nvisy-rig/src/agent/detect/mod.rs      | 15 +++++++-----
 crates/nvisy-rig/src/agent/detect/output.rs   |  2 +-
 crates/nvisy-rig/src/agent/extract/mod.rs     | 17 +++++++------
 crates/nvisy-rig/src/agent/extract/output.rs  |  2 +-
 crates/nvisy-rig/src/agent/recognize/mod.rs   | 15 +++++++-----
 .../nvisy-rig/src/agent/recognize/output.rs   |  2 +-
 crates/nvisy-rig/src/agent/redact/mod.rs      | 15 +++++++-----
 crates/nvisy-rig/src/backend/mod.rs           |  2 +-
 crates/nvisy-rig/src/backend/retry.rs         | 24 ++++++++++++-------
 13 files changed, 77 insertions(+), 46 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index d491205..5c704bb 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2852,6 +2852,7 @@ dependencies = [
  "tokio",
  "tower",
  "tracing",
+ "uuid",
 ]
 
 [[package]]
diff --git a/crates/nvisy-rig/Cargo.toml b/crates/nvisy-rig/Cargo.toml
index eb18d8c..4b93770 100644
--- a/crates/nvisy-rig/Cargo.toml
+++ b/crates/nvisy-rig/Cargo.toml
@@ -43,6 +43,9 @@ schemars = { workspace = true, features = [] }
 # Error handling
 thiserror = { workspace = true, features = [] }
 
+# Identifiers
+uuid = { workspace = true, features = ["v7"] }
+
 # Observability
 tracing = { workspace = true, features = [] }
 
diff --git a/crates/nvisy-rig/src/agent/base/agent.rs b/crates/nvisy-rig/src/agent/base/agent.rs
index aac7654..fd4e63d 100644
--- a/crates/nvisy-rig/src/agent/base/agent.rs
+++ b/crates/nvisy-rig/src/agent/base/agent.rs
@@ -1,10 +1,11 @@
-//! [`BaseAgent`] — internal foundation agent wrapping rig-core's `Agent<M>`.
+//! [`BaseAgent`]: internal foundation agent wrapping rig-core's `Agent<M>`.
 
 use rig::agent::Agent;
 use rig::completion::{Completion, CompletionModel, Prompt, TypedPrompt};
 use schemars::JsonSchema;
 use serde::de::DeserializeOwned;
 use serde::Serialize;
+use uuid::Uuid;
 
 use nvisy_core::Error;
 
@@ -17,11 +18,12 @@ use super::context::ContextWindow;
 /// Internal foundation agent wrapping rig-core's [`Agent<M>`].
 ///
 /// All prompt methods route through the built `Agent<M>`, which already
-/// carries the preamble, temperature, max-tokens, and tools configured
+/// carries the preamble, temperature, max_tokens, and tools configured
 /// via [`BaseAgentBuilder`].
 ///
-/// Not exported — specialized agents (e.g. `NerAgent`) compose this.
+/// Not exported: specialized agents (e.g. `NerAgent`) compose this.
 pub(crate) struct BaseAgent<M: CompletionModel> {
+    pub(super) id: Uuid,
     pub(super) agent: Agent<M>,
     pub(super) context_window: Option<ContextWindow>,
     pub(super) tracker: UsageTracker,
@@ -33,6 +35,11 @@ impl<M: CompletionModel> BaseAgent<M> {
         BaseAgentBuilder::new(model, config)
     }
 
+    /// Unique identifier for this agent instance (UUIDv7).
+    pub fn id(&self) -> Uuid {
+        self.id
+    }
+
     /// Access the usage tracker.
     pub fn tracker(&self) -> &UsageTracker {
         &self.tracker
@@ -40,7 +47,7 @@ impl<M: CompletionModel> BaseAgent<M> {
 
     /// Structured output prompt: tries `prompt_typed`, falls back to text +
     /// `parse_json`.
-    #[tracing::instrument(skip_all, fields(mode = "structured"))]
+    #[tracing::instrument(skip_all, fields(agent_id = %self.id, mode = "structured"))]
     pub async fn prompt_structured<T>(&self, prompt: &str) -> Result<T, Error>
     where
         T: DeserializeOwned + Default + JsonSchema + Serialize + Send + Sync,
@@ -63,7 +70,7 @@ impl<M: CompletionModel> BaseAgent<M> {
     }
 
     /// Text completion through the agent, records usage.
-    #[tracing::instrument(skip_all, fields(mode = "text"))]
+    #[tracing::instrument(skip_all, fields(agent_id = %self.id, mode = "text"))]
     pub async fn prompt_text(&self, prompt: &str) -> Result<String, Error> {
         let builder = self
             .agent
@@ -81,14 +88,14 @@ impl<M: CompletionModel> BaseAgent<M> {
     ///
     /// Uses `Prompt::prompt` which handles tool calls automatically but
     /// returns only the final text, not the raw response.
-    #[tracing::instrument(skip_all, fields(mode = "prompt"))]
+    #[tracing::instrument(skip_all, fields(agent_id = %self.id, mode = "prompt"))]
     pub async fn prompt(&self, prompt: &str) -> Result<String, Error> {
         self.agent.prompt(prompt).await.map_err(from_prompt)
     }
 
     /// Splits text via [`ContextWindow`], runs `prompt_structured` per chunk,
     /// and flattens results.
-    #[tracing::instrument(skip_all, fields(mode = "chunked"))]
+    #[tracing::instrument(skip_all, fields(agent_id = %self.id, mode = "chunked"))]
     pub async fn prompt_chunked<T, F>(
         &self,
         text: &str,
diff --git a/crates/nvisy-rig/src/agent/base/builder.rs b/crates/nvisy-rig/src/agent/base/builder.rs
index a940495..e6477e3 100644
--- a/crates/nvisy-rig/src/agent/base/builder.rs
+++ b/crates/nvisy-rig/src/agent/base/builder.rs
@@ -1,9 +1,10 @@
-//! [`BaseAgentBuilder`] — builder for [`BaseAgent`] handling rig-core's
+//! [`BaseAgentBuilder`]: builder for [`BaseAgent`] handling rig-core's
 //! typestate for optional tools.
 
 use rig::agent::AgentBuilder;
 use rig::completion::CompletionModel;
 use rig::tool::{Tool, ToolDyn};
+use uuid::Uuid;
 
 use crate::backend::UsageTracker;
 
@@ -66,6 +67,7 @@ impl<M: CompletionModel> BaseAgentBuilder<M> {
         };
 
         BaseAgent {
+            id: Uuid::now_v7(),
             agent,
             context_window: self.config.context_window,
             tracker: UsageTracker::new(),
diff --git a/crates/nvisy-rig/src/agent/detect/mod.rs b/crates/nvisy-rig/src/agent/detect/mod.rs
index 4322b41..2f441df 100644
--- a/crates/nvisy-rig/src/agent/detect/mod.rs
+++ b/crates/nvisy-rig/src/agent/detect/mod.rs
@@ -16,13 +16,14 @@ use base64::Engine;
 use base64::engine::general_purpose::STANDARD;
 use rig::completion::CompletionModel;
 use serde::Serialize;
+use uuid::Uuid;
 
 use nvisy_core::Error;
 
 use crate::backend::{DetectionConfig, UsageTracker};
 
-use super::base::{BaseAgent, BaseAgentConfig};
-use prompt::{CvPromptBuilder, CV_SYSTEM_PROMPT};
+use super::{BaseAgent, BaseAgentConfig};
+use prompt::{CV_SYSTEM_PROMPT, CvPromptBuilder};
 use tool::CvRigTool;
 
 /// A single computer-vision detection result returned by a [`CvProvider`].
@@ -76,6 +77,11 @@ impl<M: CompletionModel> CvAgent<M> {
         Self { base }
     }
 
+    /// Unique identifier for this agent instance (UUIDv7).
+    pub fn id(&self) -> Uuid {
+        self.base.id()
+    }
+
     /// Access the usage tracker for this agent's LLM calls.
     pub fn tracker(&self) -> &UsageTracker {
         self.base.tracker()
@@ -100,10 +106,7 @@ impl<M: CompletionModel> CvAgent<M> {
 
         let prompt = CvPromptBuilder::new(config).build(&image_b64);
 
-        let result: RawCvEntities = self
-            .base
-            .prompt_structured(&prompt)
-            .await?;
+        let result: RawCvEntities = self.base.prompt_structured(&prompt).await?;
 
         tracing::info!(
             entity_count = result.entities.len(),
diff --git a/crates/nvisy-rig/src/agent/detect/output.rs b/crates/nvisy-rig/src/agent/detect/output.rs
index 595bdd7..ea1eb2d 100644
--- a/crates/nvisy-rig/src/agent/detect/output.rs
+++ b/crates/nvisy-rig/src/agent/detect/output.rs
@@ -14,7 +14,7 @@ pub struct RawCvEntity {
     pub entity_type: EntityKind,
     /// Label from the CV model (e.g. "face", "license_plate").
     pub label: String,
-    /// Detection confidence (0.0 -- 1.0).
+    /// Detection confidence (0.0..=1.0).
     pub confidence: f64,
     /// Bounding box `[x, y, width, height]` in pixels.
     pub bbox: [f64; 4],
diff --git a/crates/nvisy-rig/src/agent/extract/mod.rs b/crates/nvisy-rig/src/agent/extract/mod.rs
index 2f9cf68..19e928b 100644
--- a/crates/nvisy-rig/src/agent/extract/mod.rs
+++ b/crates/nvisy-rig/src/agent/extract/mod.rs
@@ -16,13 +16,14 @@ use base64::Engine;
 use base64::engine::general_purpose::STANDARD;
 use rig::completion::CompletionModel;
 use serde::Serialize;
+use uuid::Uuid;
 
 use nvisy_core::Error;
 
 use crate::backend::{DetectionConfig, UsageTracker};
 
-use super::base::{BaseAgent, BaseAgentConfig};
-use prompt::{OcrPromptBuilder, OCR_SYSTEM_PROMPT};
+use super::{BaseAgent, BaseAgentConfig};
+use prompt::{OCR_SYSTEM_PROMPT, OcrPromptBuilder};
 use tool::OcrRigTool;
 
 /// A single text region extracted by an OCR provider.
@@ -33,7 +34,7 @@ use tool::OcrRigTool;
 pub struct OcrTextRegion {
     /// The extracted text content.
     pub text: String,
-    /// Confidence of the OCR extraction (0.0 -- 1.0).
+    /// Confidence of the OCR extraction (0.0..=1.0).
     pub confidence: f64,
     /// Optional bounding box `[x, y, width, height]` in pixels.
     pub bbox: Option<[f64; 4]>,
@@ -79,6 +80,11 @@ impl<M: CompletionModel> OcrAgent<M> {
         Self { base }
     }
 
+    /// Unique identifier for this agent instance (UUIDv7).
+    pub fn id(&self) -> Uuid {
+        self.base.id()
+    }
+
     /// Access the usage tracker for this agent's LLM calls.
     pub fn tracker(&self) -> &UsageTracker {
         self.base.tracker()
@@ -103,10 +109,7 @@ impl<M: CompletionModel> OcrAgent<M> {
 
         let prompt = OcrPromptBuilder::new(config).build(&image_b64);
 
-        let output: OcrOutput = self
-            .base
-            .prompt_structured(&prompt)
-            .await?;
+        let output: OcrOutput = self.base.prompt_structured(&prompt).await?;
 
         tracing::info!(
             text_len = output.extracted_text.len(),
diff --git a/crates/nvisy-rig/src/agent/extract/output.rs b/crates/nvisy-rig/src/agent/extract/output.rs
index 266d096..fb8caaa 100644
--- a/crates/nvisy-rig/src/agent/extract/output.rs
+++ b/crates/nvisy-rig/src/agent/extract/output.rs
@@ -23,7 +23,7 @@ pub struct RawOcrEntity {
     pub entity_type: EntityKind,
     /// The matched text value.
     pub value: String,
-    /// Detection confidence (0.0 -- 1.0).
+    /// Detection confidence (0.0..=1.0).
     pub confidence: f64,
     /// Optional bounding box `[x, y, width, height]` in pixels.
     pub bbox: Option<[f64; 4]>,
diff --git a/crates/nvisy-rig/src/agent/recognize/mod.rs b/crates/nvisy-rig/src/agent/recognize/mod.rs
index ed2dabd..5f89735 100644
--- a/crates/nvisy-rig/src/agent/recognize/mod.rs
+++ b/crates/nvisy-rig/src/agent/recognize/mod.rs
@@ -10,13 +10,14 @@ mod prompt;
 pub use output::{RawEntities, RawEntity};
 
 use rig::completion::CompletionModel;
+use uuid::Uuid;
 
 use nvisy_core::Error;
 
 use crate::backend::{DetectionConfig, UsageTracker};
 
-use super::base::{BaseAgent, BaseAgentConfig};
-use prompt::{NerPromptBuilder, NER_SYSTEM_PROMPT};
+use super::{BaseAgent, BaseAgentConfig};
+use prompt::{NER_SYSTEM_PROMPT, NerPromptBuilder};
 
 /// Agent for textual PII/entity detection using LLM-based NER.
 ///
@@ -40,6 +41,11 @@ impl<M: CompletionModel> NerAgent<M> {
         Self { base }
     }
 
+    /// Unique identifier for this agent instance (UUIDv7).
+    pub fn id(&self) -> Uuid {
+        self.base.id()
+    }
+
     /// Access the usage tracker for this agent's LLM calls.
     pub fn tracker(&self) -> &UsageTracker {
         self.base.tracker()
@@ -63,10 +69,7 @@ impl<M: CompletionModel> NerAgent<M> {
             "built ner prompt"
         );
 
-        let result: RawEntities = self
-            .base
-            .prompt_structured(&prompt)
-            .await?;
+        let result: RawEntities = self.base.prompt_structured(&prompt).await?;
 
         tracing::info!(
             entity_count = result.entities.len(),
diff --git a/crates/nvisy-rig/src/agent/recognize/output.rs b/crates/nvisy-rig/src/agent/recognize/output.rs
index b802490..ae05062 100644
--- a/crates/nvisy-rig/src/agent/recognize/output.rs
+++ b/crates/nvisy-rig/src/agent/recognize/output.rs
@@ -21,7 +21,7 @@ pub struct RawEntity {
     pub entity_type: EntityKind,
     /// The matched text value.
     pub value: String,
-    /// Detection confidence (0.0 -- 1.0).
+    /// Detection confidence (0.0..=1.0).
     pub confidence: f64,
     /// Start byte offset in the input text.
     pub start_offset: usize,
diff --git a/crates/nvisy-rig/src/agent/redact/mod.rs b/crates/nvisy-rig/src/agent/redact/mod.rs
index ea8c4b1..04683f8 100644
--- a/crates/nvisy-rig/src/agent/redact/mod.rs
+++ b/crates/nvisy-rig/src/agent/redact/mod.rs
@@ -13,14 +13,15 @@ mod prompt;
 pub use output::{RawRedaction, RedactorOutput};
 
 use rig::completion::CompletionModel;
+use uuid::Uuid;
 
 use nvisy_core::Error;
 use nvisy_ontology::specification::RedactorInput;
 
 use crate::backend::UsageTracker;
 
-use super::base::{BaseAgent, BaseAgentConfig};
-use prompt::{RedactorPromptBuilder, REDACTOR_SYSTEM_PROMPT};
+use super::{BaseAgent, BaseAgentConfig};
+use prompt::{REDACTOR_SYSTEM_PROMPT, RedactorPromptBuilder};
 
 /// Agent for context-aware redaction recommendations.
 ///
@@ -47,6 +48,11 @@ impl<M: CompletionModel> RedactorAgent<M> {
         Self { base }
     }
 
+    /// Unique identifier for this agent instance (UUIDv7).
+    pub fn id(&self) -> Uuid {
+        self.base.id()
+    }
+
     /// Access the usage tracker for this agent's LLM calls.
     pub fn tracker(&self) -> &UsageTracker {
         self.base.tracker()
@@ -64,10 +70,7 @@ impl<M: CompletionModel> RedactorAgent<M> {
     ) -> Result<Vec<RawRedaction>, Error> {
         let prompt = RedactorPromptBuilder::build(text, entities)?;
 
-        tracing::debug!(
-            prompt_len = prompt.len(),
-            "built redactor prompt"
-        );
+        tracing::debug!(prompt_len = prompt.len(), "built redactor prompt");
 
         let result: RedactorOutput = self.base.prompt_structured(&prompt).await?;
 
diff --git a/crates/nvisy-rig/src/backend/mod.rs b/crates/nvisy-rig/src/backend/mod.rs
index 5952588..7982a02 100644
--- a/crates/nvisy-rig/src/backend/mod.rs
+++ b/crates/nvisy-rig/src/backend/mod.rs
@@ -17,7 +17,7 @@ use nvisy_ontology::entity::EntityKind;
 pub struct DetectionConfig {
     /// Entity kinds to detect (empty = all).
     pub entity_kinds: Vec<EntityKind>,
-    /// Minimum confidence score to include a detection (0.0 -- 1.0).
+    /// Minimum confidence score to include a detection (0.0..=1.0).
     pub confidence_threshold: f64,
     /// System prompt override (if empty, the backend uses its default).
     pub system_prompt: Option<String>,
diff --git a/crates/nvisy-rig/src/backend/retry.rs b/crates/nvisy-rig/src/backend/retry.rs
index 0a76ed7..3de416e 100644
--- a/crates/nvisy-rig/src/backend/retry.rs
+++ b/crates/nvisy-rig/src/backend/retry.rs
@@ -4,9 +4,11 @@ use std::time::Duration;
 
 use nvisy_core::Error;
 
-use super::{DetectionRequest, DetectionResponse};
-
 /// Tower retry policy with exponential backoff for retryable errors.
+///
+/// Generic over any request/response types: the request must be `Clone`
+/// (so Tower can re-issue it) and the error type is [`nvisy_core::Error`]
+/// whose `is_retryable()` flag drives the retry decision.
 #[derive(Debug, Clone)]
 pub struct RetryPolicy {
     /// Maximum number of retries (default: 3).
@@ -47,13 +49,16 @@ impl RetryPolicy {
     }
 }
 
-impl tower::retry::Policy<DetectionRequest, DetectionResponse, Error> for RetryPolicy {
+impl<Req, Res> tower::retry::Policy<Req, Res, Error> for RetryPolicy
+where
+    Req: Clone,
+{
     type Future = std::pin::Pin<Box<dyn std::future::Future<Output = ()> + Send>>;
 
     fn retry(
         &mut self,
-        _req: &mut DetectionRequest,
-        result: &mut Result<DetectionResponse, Error>,
+        _req: &mut Req,
+        result: &mut Result<Res, Error>,
     ) -> Option<Self::Future> {
         match result {
             Ok(_) => None,
@@ -85,7 +90,7 @@ impl tower::retry::Policy<DetectionRequest, DetectionResponse, Error> for RetryP
         }
     }
 
-    fn clone_request(&mut self, req: &DetectionRequest) -> Option<DetectionRequest> {
+    fn clone_request(&mut self, req: &Req) -> Option<Req> {
         Some(req.clone())
     }
 }
@@ -93,6 +98,7 @@ impl tower::retry::Policy<DetectionRequest, DetectionResponse, Error> for RetryP
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::backend::{DetectionRequest, DetectionResponse, DetectionConfig};
     use tower::retry::Policy;
 
     #[tokio::test]
@@ -100,7 +106,7 @@ mod tests {
         let mut policy = RetryPolicy::new();
         let mut req = DetectionRequest {
             text: "test".into(),
-            config: crate::backend::DetectionConfig {
+            config: DetectionConfig {
                 entity_kinds: vec![],
                 confidence_threshold: 0.5,
                 system_prompt: None,
@@ -118,7 +124,7 @@ mod tests {
         let mut policy = RetryPolicy::new();
         let mut req = DetectionRequest {
             text: "test".into(),
-            config: crate::backend::DetectionConfig {
+            config: DetectionConfig {
                 entity_kinds: vec![],
                 confidence_threshold: 0.5,
                 system_prompt: None,
@@ -136,7 +142,7 @@ mod tests {
         let mut policy = RetryPolicy::new();
         let mut req = DetectionRequest {
             text: "test".into(),
-            config: crate::backend::DetectionConfig {
+            config: DetectionConfig {
                 entity_kinds: vec![],
                 confidence_threshold: 0.5,
                 system_prompt: None,

From 085244a7e197d5a288d534969c29867de0de4407 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Wed, 25 Feb 2026 20:10:58 +0100
Subject: [PATCH 11/24] fix(rig): UTF-8 safety, usage tracking, API cleanup,
 ServiceBackend refactor

Fix UTF-8 panics in split_to_fit/truncate_to_fit by snapping byte
positions to char boundaries. Rewrite prompt_structured to use
completion()+output_schema so usage is always recorded. Refactor
RigBackend into generic ServiceBackend<S> wrapping any inner Tower
service with usage tracking and tracing. Export BaseAgentConfig and
ContextWindow for external consumers. Add Clone+PartialEq to all
public output types. Restrict from_completion/from_prompt to
pub(crate). Deduplicate ALL_TYPES_HINT. Remove dead parse_json_array.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/nvisy-rig/src/agent/base/agent.rs      |  36 +++---
 crates/nvisy-rig/src/agent/base/context.rs    |  82 +++++++++++-
 crates/nvisy-rig/src/agent/base/mod.rs        |   2 +-
 crates/nvisy-rig/src/agent/detect/mod.rs      |   2 +-
 crates/nvisy-rig/src/agent/detect/output.rs   |   4 +-
 crates/nvisy-rig/src/agent/extract/mod.rs     |   2 +-
 crates/nvisy-rig/src/agent/extract/output.rs  |   4 +-
 crates/nvisy-rig/src/agent/extract/prompt.rs  |   5 +-
 crates/nvisy-rig/src/agent/mod.rs             |   3 +-
 .../nvisy-rig/src/agent/recognize/output.rs   |   4 +-
 crates/nvisy-rig/src/agent/redact/output.rs   |   4 +-
 crates/nvisy-rig/src/backend/error.rs         |   4 +-
 crates/nvisy-rig/src/backend/mod.rs           |   5 +-
 crates/nvisy-rig/src/bridge/mod.rs            | 118 ++++++++++++++----
 crates/nvisy-rig/src/bridge/prompt.rs         |   5 +-
 crates/nvisy-rig/src/bridge/response.rs       |   7 --
 crates/nvisy-rig/src/lib.rs                   |   3 +-
 crates/nvisy-rig/src/prelude.rs               |   3 +-
 18 files changed, 213 insertions(+), 80 deletions(-)

diff --git a/crates/nvisy-rig/src/agent/base/agent.rs b/crates/nvisy-rig/src/agent/base/agent.rs
index fd4e63d..5bc4501 100644
--- a/crates/nvisy-rig/src/agent/base/agent.rs
+++ b/crates/nvisy-rig/src/agent/base/agent.rs
@@ -1,7 +1,7 @@
 //! [`BaseAgent`]: internal foundation agent wrapping rig-core's `Agent<M>`.
 
 use rig::agent::Agent;
-use rig::completion::{Completion, CompletionModel, Prompt, TypedPrompt};
+use rig::completion::{Completion, CompletionModel, Prompt};
 use schemars::JsonSchema;
 use serde::de::DeserializeOwned;
 use serde::Serialize;
@@ -45,16 +45,30 @@ impl<M: CompletionModel> BaseAgent<M> {
         &self.tracker
     }
 
-    /// Structured output prompt: tries `prompt_typed`, falls back to text +
-    /// `parse_json`.
+    /// Structured output prompt with usage tracking.
+    ///
+    /// Uses `agent.completion()` with an `output_schema` so the provider
+    /// constrains its response to valid JSON matching `T`. Falls back to
+    /// text-based parsing on deserialization failure.
     #[tracing::instrument(skip_all, fields(agent_id = %self.id, mode = "structured"))]
     pub async fn prompt_structured<T>(&self, prompt: &str) -> Result<T, Error>
     where
         T: DeserializeOwned + Default + JsonSchema + Serialize + Send + Sync,
     {
-        let structured_result: Result<T, _> = self.agent.prompt_typed::<T>(prompt).await;
+        let schema = schemars::schema_for!(T);
 
-        match structured_result {
+        let builder = self
+            .agent
+            .completion(prompt, vec![])
+            .await
+            .map_err(from_completion)?
+            .output_schema(schema);
+
+        let response = builder.send().await.map_err(from_completion)?;
+        let parsed = ResponseParser::extract_text(&response)?;
+        self.tracker.record(&response.usage, 0);
+
+        match serde_json::from_str::<T>(parsed.as_str()) {
             Ok(value) => {
                 tracing::debug!("structured output succeeded");
                 Ok(value)
@@ -62,9 +76,9 @@ impl<M: CompletionModel> BaseAgent<M> {
             Err(structured_err) => {
                 tracing::warn!(
                     error = %structured_err,
-                    "structured output failed, falling back to text-based parsing"
+                    "structured JSON parse failed, falling back to text-based parsing"
                 );
-                self.prompt_text_and_parse(prompt).await
+                parsed.parse_json()
             }
         }
     }
@@ -121,12 +135,4 @@ impl<M: CompletionModel> BaseAgent<M> {
         Ok(all_results)
     }
 
-    /// Text-based fallback: complete → extract text → parse JSON.
-    async fn prompt_text_and_parse<T>(&self, prompt: &str) -> Result<T, Error>
-    where
-        T: DeserializeOwned + Default,
-    {
-        let text = self.prompt_text(prompt).await?;
-        ResponseParser::from_text(text.as_str()).parse_json()
-    }
 }
diff --git a/crates/nvisy-rig/src/agent/base/context.rs b/crates/nvisy-rig/src/agent/base/context.rs
index 42d22a6..2b6c32c 100644
--- a/crates/nvisy-rig/src/agent/base/context.rs
+++ b/crates/nvisy-rig/src/agent/base/context.rs
@@ -36,7 +36,8 @@ impl ContextWindow {
 
     /// Split text into chunks that each fit within the input budget.
     ///
-    /// Splitting respects sentence boundaries (`. ` and `\n`) where possible.
+    /// Splitting respects sentence boundaries (`. ` and `\n`) where possible
+    /// and is safe for multi-byte UTF-8 input.
     pub fn split_to_fit<'a>(&self, text: &'a str) -> Vec<&'a str> {
         if self.fits(text) {
             return vec![text];
@@ -55,8 +56,8 @@ impl ContextWindow {
                 break;
             }
 
-            // Take up to char_budget characters, then find a sentence boundary.
-            let take = remaining.len().min(char_budget);
+            // Take up to char_budget bytes, snapped to a char boundary.
+            let take = snap_to_boundary(remaining, remaining.len().min(char_budget));
             let candidate = &remaining[..take];
 
             // Try to split at the last sentence boundary within the candidate.
@@ -65,7 +66,7 @@ impl ContextWindow {
             let (chunk, rest) = remaining.split_at(split_pos);
             if chunk.is_empty() {
                 // No boundary found within budget; force-split at char_budget.
-                let forced = remaining.len().min(char_budget);
+                let forced = snap_to_boundary(remaining, remaining.len().min(char_budget));
                 let (chunk, rest) = remaining.split_at(forced);
                 chunks.push(chunk);
                 remaining = rest;
@@ -79,6 +80,8 @@ impl ContextWindow {
     }
 
     /// Truncate text to fit, keeping the end (most recent context).
+    ///
+    /// Safe for multi-byte UTF-8 input.
     pub fn truncate_to_fit<'a>(&self, text: &'a str) -> &'a str {
         if self.fits(text) {
             return text;
@@ -91,15 +94,26 @@ impl ContextWindow {
             return text;
         }
 
-        let start = text.len() - char_budget;
+        let start = snap_to_boundary(text, text.len() - char_budget);
         // Try to start at a boundary to avoid splitting mid-sentence.
         let adjusted = text[start..]
             .find(['\n', '.'])
             .map(|pos| start + pos + 1)
             .unwrap_or(start);
 
-        &text[adjusted.min(text.len())..]
+        let adjusted = snap_to_boundary(text, adjusted.min(text.len()));
+        &text[adjusted..]
+    }
+}
+
+/// Snap a byte position to the nearest valid UTF-8 char boundary,
+/// walking backward if necessary.
+fn snap_to_boundary(text: &str, pos: usize) -> usize {
+    let mut p = pos.min(text.len());
+    while p > 0 && !text.is_char_boundary(p) {
+        p -= 1;
     }
+    p
 }
 
 /// Find the last sentence boundary (`. ` or `\n`) in the text.
@@ -155,4 +169,60 @@ mod tests {
         assert!(truncated.len() <= 32 + 10); // some slack for boundary adjustment
         assert!(text.ends_with(truncated) || truncated.contains("sentence"));
     }
+
+    #[test]
+    fn snap_to_boundary_ascii() {
+        let text = "hello";
+        assert_eq!(super::snap_to_boundary(text, 3), 3);
+        assert_eq!(super::snap_to_boundary(text, 10), 5); // clamps to len
+    }
+
+    #[test]
+    fn snap_to_boundary_multibyte() {
+        // '🔥' is 4 bytes
+        let text = "a🔥b";
+        // byte 0: 'a', bytes 1-4: '🔥', byte 5: 'b'
+        assert_eq!(super::snap_to_boundary(text, 1), 1); // valid
+        assert_eq!(super::snap_to_boundary(text, 2), 1); // mid-emoji → snap back
+        assert_eq!(super::snap_to_boundary(text, 3), 1); // mid-emoji → snap back
+        assert_eq!(super::snap_to_boundary(text, 4), 1); // mid-emoji → snap back
+        assert_eq!(super::snap_to_boundary(text, 5), 5); // valid (after emoji)
+    }
+
+    #[test]
+    fn split_to_fit_emoji() {
+        // Budget: 2 tokens = ~8 bytes. Each emoji is 4 bytes.
+        let cw = ContextWindow::new(4, 2);
+        let text = "🔥🔥🔥🔥"; // 16 bytes total
+        let chunks = cw.split_to_fit(text);
+        // Should not panic and every chunk must be valid UTF-8
+        assert!(chunks.len() >= 2);
+        for chunk in &chunks {
+            assert!(!chunk.is_empty());
+        }
+    }
+
+    #[test]
+    fn split_to_fit_cjk() {
+        // CJK chars are 3 bytes each
+        let cw = ContextWindow::new(4, 2);
+        // Budget: 2 tokens = ~8 bytes → fits 2 CJK chars (6 bytes)
+        let text = "你好世界测试文字"; // 8 chars × 3 bytes = 24 bytes
+        let chunks = cw.split_to_fit(text);
+        assert!(chunks.len() >= 2);
+        for chunk in &chunks {
+            assert!(!chunk.is_empty());
+        }
+    }
+
+    #[test]
+    fn truncate_to_fit_emoji() {
+        let cw = ContextWindow::new(4, 2);
+        // Budget: 2 tokens = ~8 bytes
+        let text = "🔥🔥🔥🔥"; // 16 bytes
+        let truncated = cw.truncate_to_fit(text);
+        // Should not panic, should be valid UTF-8, and should be the tail
+        assert!(!truncated.is_empty());
+        assert!(text.ends_with(truncated));
+    }
 }
diff --git a/crates/nvisy-rig/src/agent/base/mod.rs b/crates/nvisy-rig/src/agent/base/mod.rs
index a7aedb9..2029865 100644
--- a/crates/nvisy-rig/src/agent/base/mod.rs
+++ b/crates/nvisy-rig/src/agent/base/mod.rs
@@ -11,7 +11,7 @@ pub(crate) mod context;
 pub(crate) use agent::BaseAgent;
 pub(crate) use builder::BaseAgentBuilder;
 
-use context::ContextWindow;
+pub use context::ContextWindow;
 
 /// Configuration for a [`BaseAgent`].
 #[derive(Debug, Clone)]
diff --git a/crates/nvisy-rig/src/agent/detect/mod.rs b/crates/nvisy-rig/src/agent/detect/mod.rs
index 2f441df..4ea4cbf 100644
--- a/crates/nvisy-rig/src/agent/detect/mod.rs
+++ b/crates/nvisy-rig/src/agent/detect/mod.rs
@@ -31,7 +31,7 @@ use tool::CvRigTool;
 /// This is the raw output from the CV backend before the VLM classifies
 /// detections into entity categories. It carries a human-readable label,
 /// a confidence score, and a pixel-space bounding box.
-#[derive(Debug, Clone, Serialize)]
+#[derive(Debug, Clone, PartialEq, Serialize)]
 pub struct CvDetection {
     /// Label for the detected object (e.g. `"face"`, `"license_plate"`).
     pub label: String,
diff --git a/crates/nvisy-rig/src/agent/detect/output.rs b/crates/nvisy-rig/src/agent/detect/output.rs
index ea1eb2d..d40cb12 100644
--- a/crates/nvisy-rig/src/agent/detect/output.rs
+++ b/crates/nvisy-rig/src/agent/detect/output.rs
@@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize};
 use nvisy_ontology::entity::{EntityCategory, EntityKind};
 
 /// A single entity detected by computer vision.
-#[derive(Debug, Deserialize, Serialize, JsonSchema)]
+#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema)]
 pub struct RawCvEntity {
     /// Broad classification.
     pub category: EntityCategory,
@@ -21,7 +21,7 @@ pub struct RawCvEntity {
 }
 
 /// Wrapper for structured output parsing.
-#[derive(Debug, Default, Deserialize, Serialize, JsonSchema)]
+#[derive(Debug, Clone, Default, PartialEq, Deserialize, Serialize, JsonSchema)]
 pub struct RawCvEntities {
     /// Detected entities.
     pub entities: Vec<RawCvEntity>,
diff --git a/crates/nvisy-rig/src/agent/extract/mod.rs b/crates/nvisy-rig/src/agent/extract/mod.rs
index 19e928b..15a03ba 100644
--- a/crates/nvisy-rig/src/agent/extract/mod.rs
+++ b/crates/nvisy-rig/src/agent/extract/mod.rs
@@ -30,7 +30,7 @@ use tool::OcrRigTool;
 ///
 /// Each region represents a contiguous block of text found in the image,
 /// together with an optional bounding box and confidence score.
-#[derive(Debug, Clone, Serialize)]
+#[derive(Debug, Clone, PartialEq, Serialize)]
 pub struct OcrTextRegion {
     /// The extracted text content.
     pub text: String,
diff --git a/crates/nvisy-rig/src/agent/extract/output.rs b/crates/nvisy-rig/src/agent/extract/output.rs
index fb8caaa..0743de9 100644
--- a/crates/nvisy-rig/src/agent/extract/output.rs
+++ b/crates/nvisy-rig/src/agent/extract/output.rs
@@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize};
 use nvisy_ontology::entity::{EntityCategory, EntityKind};
 
 /// Top-level output from the OCR agent.
-#[derive(Debug, Default, Deserialize, Serialize, JsonSchema)]
+#[derive(Debug, Clone, Default, PartialEq, Deserialize, Serialize, JsonSchema)]
 pub struct OcrOutput {
     /// Full text extracted from the image.
     pub extracted_text: String,
@@ -15,7 +15,7 @@ pub struct OcrOutput {
 }
 
 /// A single entity detected in OCR-extracted text.
-#[derive(Debug, Deserialize, Serialize, JsonSchema)]
+#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema)]
 pub struct RawOcrEntity {
     /// Broad classification.
     pub category: EntityCategory,
diff --git a/crates/nvisy-rig/src/agent/extract/prompt.rs b/crates/nvisy-rig/src/agent/extract/prompt.rs
index 7f0d4dc..bfb7384 100644
--- a/crates/nvisy-rig/src/agent/extract/prompt.rs
+++ b/crates/nvisy-rig/src/agent/extract/prompt.rs
@@ -3,10 +3,7 @@
 //! [`OcrPromptBuilder`] constructs the user prompt that instructs the VLM
 //! to call the OCR tool and then detect entities in the extracted text.
 
-use crate::backend::DetectionConfig;
-
-/// Fallback when no specific entity types are requested.
-const ALL_TYPES_HINT: &str = "all entity types";
+use crate::backend::{DetectionConfig, ALL_TYPES_HINT};
 
 /// Builds user prompts for OCR-based entity extraction.
 ///
diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs
index 6d43412..0d1adf6 100644
--- a/crates/nvisy-rig/src/agent/mod.rs
+++ b/crates/nvisy-rig/src/agent/mod.rs
@@ -9,7 +9,8 @@ mod extract;
 mod recognize;
 mod redact;
 
-pub(crate) use base::{BaseAgent, BaseAgentBuilder, BaseAgentConfig};
+pub(crate) use base::BaseAgent;
+pub use base::{BaseAgentConfig, ContextWindow};
 
 pub use recognize::{NerAgent, RawEntities, RawEntity};
 pub use extract::{OcrAgent, OcrOutput, OcrProvider, OcrTextRegion, RawOcrEntity};
diff --git a/crates/nvisy-rig/src/agent/recognize/output.rs b/crates/nvisy-rig/src/agent/recognize/output.rs
index ae05062..63167f3 100644
--- a/crates/nvisy-rig/src/agent/recognize/output.rs
+++ b/crates/nvisy-rig/src/agent/recognize/output.rs
@@ -6,14 +6,14 @@ use serde::{Deserialize, Serialize};
 use nvisy_ontology::entity::{EntityCategory, EntityKind};
 
 /// A list of raw entities returned by structured output.
-#[derive(Debug, Default, Deserialize, Serialize, JsonSchema)]
+#[derive(Debug, Clone, Default, PartialEq, Deserialize, Serialize, JsonSchema)]
 pub struct RawEntities {
     /// Detected entities.
     pub entities: Vec<RawEntity>,
 }
 
 /// A single raw entity from structured LLM output.
-#[derive(Debug, Deserialize, Serialize, JsonSchema)]
+#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema)]
 pub struct RawEntity {
     /// Broad classification.
     pub category: EntityCategory,
diff --git a/crates/nvisy-rig/src/agent/redact/output.rs b/crates/nvisy-rig/src/agent/redact/output.rs
index 577c054..7662dae 100644
--- a/crates/nvisy-rig/src/agent/redact/output.rs
+++ b/crates/nvisy-rig/src/agent/redact/output.rs
@@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize};
 use nvisy_ontology::specification::TextRedactionMethod;
 
 /// A single redaction recommendation from the LLM.
-#[derive(Debug, Deserialize, Serialize, JsonSchema)]
+#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema)]
 pub struct RawRedaction {
     /// The original entity text that should be redacted.
     pub entity_value: String,
@@ -19,7 +19,7 @@ pub struct RawRedaction {
 }
 
 /// Top-level structured output wrapper from the redactor agent.
-#[derive(Debug, Default, Deserialize, Serialize, JsonSchema)]
+#[derive(Debug, Clone, Default, PartialEq, Deserialize, Serialize, JsonSchema)]
 pub struct RedactorOutput {
     /// Recommended redactions for each entity.
     pub redactions: Vec<RawRedaction>,
diff --git a/crates/nvisy-rig/src/backend/error.rs b/crates/nvisy-rig/src/backend/error.rs
index 5074944..54895ac 100644
--- a/crates/nvisy-rig/src/backend/error.rs
+++ b/crates/nvisy-rig/src/backend/error.rs
@@ -5,7 +5,7 @@ use rig::completion::{CompletionError, PromptError};
 use nvisy_core::Error;
 
 /// Convert a rig-core [`PromptError`] into a [`nvisy_core::Error`].
-pub fn from_prompt(err: PromptError) -> Error {
+pub(crate) fn from_prompt(err: PromptError) -> Error {
     match err {
         PromptError::CompletionError(e) => from_completion(e),
         PromptError::ToolError(e) => {
@@ -28,7 +28,7 @@ pub fn from_prompt(err: PromptError) -> Error {
 }
 
 /// Convert a rig-core [`CompletionError`] into a [`nvisy_core::Error`].
-pub fn from_completion(err: CompletionError) -> Error {
+pub(crate) fn from_completion(err: CompletionError) -> Error {
     match err {
         CompletionError::HttpError(e) => {
             Error::connection(format!("HTTP error: {e}"), "rig", true)
diff --git a/crates/nvisy-rig/src/backend/mod.rs b/crates/nvisy-rig/src/backend/mod.rs
index 7982a02..a903516 100644
--- a/crates/nvisy-rig/src/backend/mod.rs
+++ b/crates/nvisy-rig/src/backend/mod.rs
@@ -4,10 +4,13 @@ mod error;
 mod metrics;
 mod retry;
 
-pub use error::{from_completion, from_prompt};
+pub(crate) use error::{from_completion, from_prompt};
 pub use metrics::{UsageStats, UsageTracker};
 pub use retry::RetryPolicy;
 
+/// Fallback hint used in prompts when no specific entity types are requested.
+pub(crate) const ALL_TYPES_HINT: &str = "all entity types";
+
 use serde_json::Value;
 
 use nvisy_ontology::entity::EntityKind;
diff --git a/crates/nvisy-rig/src/bridge/mod.rs b/crates/nvisy-rig/src/bridge/mod.rs
index 7579cfa..5d68a02 100644
--- a/crates/nvisy-rig/src/bridge/mod.rs
+++ b/crates/nvisy-rig/src/bridge/mod.rs
@@ -18,13 +18,9 @@ use crate::backend::{
     RetryPolicy, UsageTracker,
 };
 
-/// Configuration for a [`RigBackend`].
+/// Configuration for [`ServiceBackend`] (and its [`RigBackend`] specialisation).
 #[derive(Debug, Clone)]
 pub struct RigBackendConfig {
-    /// Sampling temperature (default: 0.1).
-    pub temperature: f64,
-    /// Maximum output tokens (default: 4096).
-    pub max_tokens: u64,
     /// Retry policy for transient errors.
     pub retry: RetryPolicy,
 }
@@ -32,39 +28,92 @@ pub struct RigBackendConfig {
 impl Default for RigBackendConfig {
     fn default() -> Self {
         Self {
-            temperature: 0.1,
-            max_tokens: 4096,
             retry: RetryPolicy::new(),
         }
     }
 }
 
-/// Production detection service wrapping a rig-core [`CompletionModel`].
+/// Generic Tower service adapter.
 ///
-/// Implements `tower::Service<DetectionRequest>`.
-pub struct RigBackend<M> {
-    model: Arc<M>,
+/// Wraps any inner service `S` with a retry policy and usage tracking.
+/// The inner service handles prompt construction and LLM interaction;
+/// the wrapper provides observability and resilience.
+pub struct ServiceBackend<S> {
+    inner: S,
     config: RigBackendConfig,
     tracker: Arc<UsageTracker>,
 }
 
-impl<M: CompletionModel> RigBackend<M> {
-    /// Create a new backend with the given model and configuration.
-    pub fn new(model: M, config: RigBackendConfig) -> Self {
+impl<S> ServiceBackend<S> {
+    /// Create a new service backend wrapping an arbitrary inner service.
+    pub fn new(inner: S, config: RigBackendConfig) -> Self {
         Self {
-            model: Arc::new(model),
+            inner,
             config,
             tracker: Arc::new(UsageTracker::new()),
         }
     }
 
+    /// Access the retry policy.
+    pub fn retry_policy(&self) -> &RetryPolicy {
+        &self.config.retry
+    }
+
     /// Access the usage tracker for this backend.
     pub fn tracker(&self) -> &UsageTracker {
         &self.tracker
     }
 }
 
-impl<M> tower::Service<DetectionRequest> for RigBackend<M>
+impl<S> tower::Service<DetectionRequest> for ServiceBackend<S>
+where
+    S: tower::Service<DetectionRequest, Response = DetectionResponse, Error = Error>,
+    S::Future: Send + 'static,
+{
+    type Response = DetectionResponse;
+    type Error = Error;
+    type Future = std::pin::Pin<Box<dyn std::future::Future<Output = Result<DetectionResponse, Error>> + Send>>;
+
+    fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
+        self.inner.poll_ready(cx)
+    }
+
+    fn call(&mut self, req: DetectionRequest) -> Self::Future {
+        let tracker = Arc::clone(&self.tracker);
+        let fut = self.inner.call(req);
+
+        Box::pin(async move {
+            let span = tracing::info_span!("service_backend_call");
+            let _enter = span.enter();
+
+            let response = fut.await?;
+
+            if let Some(ref usage) = response.usage {
+                tracker.record(usage, 0);
+
+                tracing::debug!(
+                    input_tokens = usage.input_tokens,
+                    output_tokens = usage.output_tokens,
+                    "LLM request completed"
+                );
+            }
+
+            Ok(response)
+        })
+    }
+}
+
+/// Inner service that drives a raw rig-core [`CompletionModel`].
+///
+/// This is the low-level service that constructs prompts and parses
+/// responses. Wrap it in [`ServiceBackend`] for retry and usage tracking.
+pub struct RigBackendInner<M> {
+    model: Arc<M>,
+    temperature: f64,
+    max_tokens: u64,
+}
+
+impl<M> tower::Service<DetectionRequest> for RigBackendInner<M>
 where
     M: CompletionModel + Send + Sync + 'static,
 {
@@ -80,11 +129,13 @@ where
         let user_prompt = PromptBuilder::new(&req.config).build(&req.text);
         let system_prompt = req.config.system_prompt.clone();
         let model = Arc::clone(&self.model);
-        let temperature = self.config.temperature;
-        let max_tokens = self.config.max_tokens;
-        let tracker = Arc::clone(&self.tracker);
+        let temperature = self.temperature;
+        let max_tokens = self.max_tokens;
 
         Box::pin(async move {
+            let span = tracing::info_span!("rig_backend_call");
+            let _enter = span.enter();
+
             let mut builder = model
                 .completion_request(&user_prompt)
                 .temperature(temperature)
@@ -98,14 +149,6 @@ where
             let parsed = ResponseParser::extract_text(&response)?;
             let entities = parsed.parse_json()?;
 
-            tracker.record(&response.usage, 0);
-
-            tracing::debug!(
-                input_tokens = response.usage.input_tokens,
-                output_tokens = response.usage.output_tokens,
-                "LLM request completed"
-            );
-
             Ok(DetectionResponse {
                 entities,
                 usage: Some(response.usage),
@@ -113,3 +156,24 @@ where
         })
     }
 }
+
+/// Production detection service wrapping a rig-core [`CompletionModel`].
+///
+/// This is a convenience alias for `ServiceBackend<RigBackendInner<M>>`.
+/// Use [`RigBackend::from_model`] to construct one.
+pub type RigBackend<M> = ServiceBackend<RigBackendInner<M>>;
+
+impl<M: CompletionModel> RigBackend<M> {
+    /// Create a new backend with the given model and configuration.
+    ///
+    /// Temperature and max_tokens are configured on the inner model service.
+    /// The [`RigBackendConfig`] controls retry policy.
+    pub fn from_model(model: M, temperature: f64, max_tokens: u64, config: RigBackendConfig) -> Self {
+        let inner = RigBackendInner {
+            model: Arc::new(model),
+            temperature,
+            max_tokens,
+        };
+        ServiceBackend::new(inner, config)
+    }
+}
diff --git a/crates/nvisy-rig/src/bridge/prompt.rs b/crates/nvisy-rig/src/bridge/prompt.rs
index 5a6ba88..159025e 100644
--- a/crates/nvisy-rig/src/bridge/prompt.rs
+++ b/crates/nvisy-rig/src/bridge/prompt.rs
@@ -4,14 +4,11 @@ use std::fmt::Display;
 
 use nvisy_ontology::entity::EntityKind;
 
-use crate::backend::DetectionConfig;
+use crate::backend::{DetectionConfig, ALL_TYPES_HINT};
 
 /// Instruction prefix for the user prompt.
 const DETECT_PREFIX: &str = "Detect entities of types";
 
-/// Fallback when no specific entity types are requested.
-const ALL_TYPES_HINT: &str = "all entity types";
-
 /// Suffix describing the expected response format.
 const RESPONSE_FORMAT: &str = "\
 Return a JSON array of objects with keys: \
diff --git a/crates/nvisy-rig/src/bridge/response.rs b/crates/nvisy-rig/src/bridge/response.rs
index 3ab7684..c217373 100644
--- a/crates/nvisy-rig/src/bridge/response.rs
+++ b/crates/nvisy-rig/src/bridge/response.rs
@@ -54,13 +54,6 @@ impl<'a> ResponseParser<'a> {
         &self.text
     }
 
-    /// Parse the text as a JSON array.
-    ///
-    /// Convenience wrapper around [`parse_json`](Self::parse_json).
-    pub fn parse_json_array<T: DeserializeOwned>(&self) -> Result<Vec<T>, Error> {
-        self.parse_json::<Vec<T>>()
-    }
-
     /// Parse the text as JSON into `T`.
     ///
     /// Strips markdown fences if present, then deserializes.
diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs
index 585d52e..b5735f3 100644
--- a/crates/nvisy-rig/src/lib.rs
+++ b/crates/nvisy-rig/src/lib.rs
@@ -10,9 +10,10 @@ pub(crate) mod agent;
 pub mod prelude;
 
 pub use backend::{DetectionConfig, DetectionRequest, DetectionResponse};
-pub use bridge::{EntityParser, RigBackend, RigBackendConfig};
+pub use bridge::{EntityParser, RigBackend, RigBackendConfig, ServiceBackend};
 
 pub use agent::{
+    BaseAgentConfig, ContextWindow,
     CvAgent, CvDetection, CvProvider, NerAgent,
     OcrAgent, OcrOutput, OcrProvider, OcrTextRegion,
     RawCvEntities, RawCvEntity, RawEntities, RawEntity,
diff --git a/crates/nvisy-rig/src/prelude.rs b/crates/nvisy-rig/src/prelude.rs
index f6fc160..c2b0e41 100644
--- a/crates/nvisy-rig/src/prelude.rs
+++ b/crates/nvisy-rig/src/prelude.rs
@@ -4,8 +4,9 @@ pub use crate::backend::{
     DetectionConfig, DetectionRequest, DetectionResponse,
     RetryPolicy, UsageStats, UsageTracker,
 };
-pub use crate::bridge::{EntityParser, RigBackend, RigBackendConfig};
+pub use crate::bridge::{EntityParser, RigBackend, RigBackendConfig, ServiceBackend};
 pub use crate::agent::{
+    BaseAgentConfig, ContextWindow,
     CvAgent, CvDetection, CvProvider, NerAgent,
     OcrAgent, OcrOutput, OcrProvider, OcrTextRegion,
     RawCvEntities, RawCvEntity, RawEntities, RawEntity,

From 04cb4910e1ec05463e5b7d8be691f14128e88793 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Wed, 25 Feb 2026 21:23:31 +0100
Subject: [PATCH 12/24] refactor(rig): add ContextWindow::compact, move OCR to
 paddle module, remove RedactorAgent

- Add LLM-based compact() on ContextWindow and prompt_compact() on
  BaseAgent for summarizing text that exceeds the token budget
- Delete nvisy-ocr crate; move OcrBackend, OcrConfig, parse_ocr_entities,
  and PythonBridge impl into nvisy-rig/src/paddle module
- Update nvisy-identify and nvisy-augment to import from nvisy_rig::paddle
- Remove RedactorAgent, keeping NerAgent, OcrAgent, and CvAgent
- Clean up workspace Cargo.toml, Dockerfile, and all re-exports

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 Cargo.lock                                    | 14 +---
 Cargo.toml                                    |  2 -
 crates/nvisy-augment/Cargo.toml               |  1 -
 crates/nvisy-augment/README.md                |  2 +-
 crates/nvisy-augment/src/ocr.rs               |  2 +-
 crates/nvisy-identify/Cargo.toml              |  1 -
 crates/nvisy-identify/src/vision/ocr.rs       |  2 +-
 crates/nvisy-ocr/Cargo.toml                   | 34 --------
 crates/nvisy-ocr/README.md                    | 25 ------
 crates/nvisy-ocr/src/lib.rs                   | 10 ---
 crates/nvisy-rig/Cargo.toml                   |  1 +
 crates/nvisy-rig/src/agent/base/agent.rs      | 12 +++
 crates/nvisy-rig/src/agent/base/context.rs    | 42 ++++++++++
 crates/nvisy-rig/src/agent/mod.rs             |  2 -
 crates/nvisy-rig/src/agent/redact/mod.rs      | 84 -------------------
 crates/nvisy-rig/src/agent/redact/output.rs   | 26 ------
 crates/nvisy-rig/src/agent/redact/prompt.rs   | 65 --------------
 crates/nvisy-rig/src/lib.rs                   |  3 +-
 .../src => nvisy-rig/src/paddle}/backend.rs   |  0
 .../src => nvisy-rig/src/paddle}/bridge.rs    |  2 +-
 crates/nvisy-rig/src/paddle/mod.rs            | 11 +++
 .../src => nvisy-rig/src/paddle}/parse.rs     |  0
 crates/nvisy-rig/src/prelude.rs               |  3 +-
 docker/Dockerfile                             |  5 +-
 24 files changed, 77 insertions(+), 272 deletions(-)
 delete mode 100644 crates/nvisy-ocr/Cargo.toml
 delete mode 100644 crates/nvisy-ocr/README.md
 delete mode 100644 crates/nvisy-ocr/src/lib.rs
 delete mode 100644 crates/nvisy-rig/src/agent/redact/mod.rs
 delete mode 100644 crates/nvisy-rig/src/agent/redact/output.rs
 delete mode 100644 crates/nvisy-rig/src/agent/redact/prompt.rs
 rename crates/{nvisy-ocr/src => nvisy-rig/src/paddle}/backend.rs (100%)
 rename crates/{nvisy-ocr/src => nvisy-rig/src/paddle}/bridge.rs (94%)
 create mode 100644 crates/nvisy-rig/src/paddle/mod.rs
 rename crates/{nvisy-ocr/src => nvisy-rig/src/paddle}/parse.rs (100%)

diff --git a/Cargo.lock b/Cargo.lock
index 5c704bb..706dbe8 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2659,7 +2659,6 @@ dependencies = [
  "nvisy-asr",
  "nvisy-codec",
  "nvisy-core",
- "nvisy-ocr",
  "nvisy-ontology",
  "nvisy-python",
  "nvisy-rig",
@@ -2766,7 +2765,6 @@ dependencies = [
  "nvisy-asr",
  "nvisy-codec",
  "nvisy-core",
- "nvisy-ocr",
  "nvisy-ontology",
  "nvisy-pattern",
  "nvisy-python",
@@ -2782,17 +2780,6 @@ dependencies = [
  "uuid",
 ]
 
-[[package]]
-name = "nvisy-ocr"
-version = "0.1.0"
-dependencies = [
- "async-trait",
- "nvisy-core",
- "nvisy-ontology",
- "nvisy-python",
- "serde_json",
-]
-
 [[package]]
 name = "nvisy-ontology"
 version = "0.1.0"
@@ -2844,6 +2831,7 @@ dependencies = [
  "base64",
  "nvisy-core",
  "nvisy-ontology",
+ "nvisy-python",
  "rig-core",
  "schemars",
  "serde",
diff --git a/Cargo.toml b/Cargo.toml
index 5c36cf5..3b7de91 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,7 +10,6 @@ members = [
     "./crates/nvisy-core",
     "./crates/nvisy-engine",
     "./crates/nvisy-identify",
-    "./crates/nvisy-ocr",
     "./crates/nvisy-ontology",
     "./crates/nvisy-pattern",
     "./crates/nvisy-python",
@@ -43,7 +42,6 @@ nvisy-codec = { path = "./crates/nvisy-codec", version = "0.1.0" }
 nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" }
 nvisy-engine = { path = "./crates/nvisy-engine", version = "0.1.0" }
 nvisy-identify = { path = "./crates/nvisy-identify", version = "0.1.0" }
-nvisy-ocr = { path = "./crates/nvisy-ocr", version = "0.1.0" }
 nvisy-ontology = { path = "./crates/nvisy-ontology", version = "0.1.0" }
 nvisy-pattern = { path = "./crates/nvisy-pattern", version = "0.1.0" }
 nvisy-python = { path = "./crates/nvisy-python", version = "0.1.0" }
diff --git a/crates/nvisy-augment/Cargo.toml b/crates/nvisy-augment/Cargo.toml
index 3aa69d9..cdb6f68 100644
--- a/crates/nvisy-augment/Cargo.toml
+++ b/crates/nvisy-augment/Cargo.toml
@@ -28,7 +28,6 @@ nvisy-ontology = { workspace = true, features = [] }
 nvisy-codec = { workspace = true, features = [] }
 nvisy-python = { workspace = true, features = [] }
 nvisy-rig = { workspace = true, features = [] }
-nvisy-ocr = { workspace = true, features = [] }
 nvisy-asr = { workspace = true, features = [] }
 
 # (De)serialization
diff --git a/crates/nvisy-augment/README.md b/crates/nvisy-augment/README.md
index 8fa28f5..f75fcc3 100644
--- a/crates/nvisy-augment/README.md
+++ b/crates/nvisy-augment/README.md
@@ -4,7 +4,7 @@
 
 Content augmentation actions for the Nvisy runtime.
 
-Provides OCR text extraction from images (via `nvisy-ocr`), audio transcription (via `nvisy-asr`), and synthetic data generation for replacing redacted entities with realistic placeholder values.
+Provides OCR text extraction from images (via `nvisy-rig`), audio transcription (via `nvisy-asr`), and synthetic data generation for replacing redacted entities with realistic placeholder values.
 
 ## Documentation
 
diff --git a/crates/nvisy-augment/src/ocr.rs b/crates/nvisy-augment/src/ocr.rs
index 5eb86bb..09443c6 100644
--- a/crates/nvisy-augment/src/ocr.rs
+++ b/crates/nvisy-augment/src/ocr.rs
@@ -9,7 +9,7 @@ use nvisy_core::Error;
 
 use nvisy_ontology::entity::Entity;
 
-pub use nvisy_ocr::{OcrBackend, OcrConfig, parse_ocr_entities};
+pub use nvisy_rig::paddle::{OcrBackend, OcrConfig, parse_ocr_entities};
 
 fn default_language() -> String {
     "eng".into()
diff --git a/crates/nvisy-identify/Cargo.toml b/crates/nvisy-identify/Cargo.toml
index b32f174..779cb60 100644
--- a/crates/nvisy-identify/Cargo.toml
+++ b/crates/nvisy-identify/Cargo.toml
@@ -33,7 +33,6 @@ nvisy-codec = { workspace = true, features = [] }
 nvisy-pattern = { workspace = true, features = [] }
 nvisy-python = { workspace = true, features = [] }
 nvisy-rig = { workspace = true, features = [] }
-nvisy-ocr = { workspace = true, features = [] }
 nvisy-asr = { workspace = true, features = [] }
 
 # (De)serialization
diff --git a/crates/nvisy-identify/src/vision/ocr.rs b/crates/nvisy-identify/src/vision/ocr.rs
index ce3850f..79664b8 100644
--- a/crates/nvisy-identify/src/vision/ocr.rs
+++ b/crates/nvisy-identify/src/vision/ocr.rs
@@ -5,7 +5,7 @@
 
 use nvisy_codec::handler::{ImageData, Span};
 use nvisy_core::Error;
-use nvisy_ocr::{OcrBackend, OcrConfig, parse_ocr_entities};
+use nvisy_rig::paddle::{OcrBackend, OcrConfig, parse_ocr_entities};
 
 use crate::Entity;
 use crate::{ParallelContext, DetectionService};
diff --git a/crates/nvisy-ocr/Cargo.toml b/crates/nvisy-ocr/Cargo.toml
deleted file mode 100644
index ec97198..0000000
--- a/crates/nvisy-ocr/Cargo.toml
+++ /dev/null
@@ -1,34 +0,0 @@
-# https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[package]
-name = "nvisy-ocr"
-description = "OCR backend trait and provider integration for Nvisy"
-keywords = ["nvisy", "ocr", "tesseract", "text-extraction"]
-categories = ["text-processing"]
-
-version = { workspace = true }
-rust-version = { workspace = true }
-edition = { workspace = true }
-license = { workspace = true }
-publish = { workspace = true }
-
-authors = { workspace = true }
-repository = { workspace = true }
-homepage = { workspace = true }
-documentation = { workspace = true }
-
-[package.metadata.docs.rs]
-all-features = true
-rustdoc-args = ["--cfg", "docsrs"]
-
-[dependencies]
-# Internal crates
-nvisy-core = { workspace = true, features = [] }
-nvisy-ontology = { workspace = true, features = [] }
-nvisy-python = { workspace = true, features = [] }
-
-# (De)serialization
-serde_json = { workspace = true, features = [] }
-
-# Async runtime
-async-trait = { workspace = true, features = [] }
diff --git a/crates/nvisy-ocr/README.md b/crates/nvisy-ocr/README.md
deleted file mode 100644
index 7a4bf7f..0000000
--- a/crates/nvisy-ocr/README.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# nvisy-ocr
-
-[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/runtime/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/runtime/actions/workflows/build.yml)
-
-OCR backend trait and provider integration for the Nvisy runtime.
-
-Defines the `OcrBackend` trait for optical character recognition providers, configuration types, result parsing from raw JSON into entity types, and a `PythonBridge` implementation that delegates to the `nvisy_ai` Python module.
-
-## Documentation
-
-See [`docs/`](../../docs/) for architecture, security, and API documentation.
-
-## Changelog
-
-See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history.
-
-## License
-
-Apache 2.0 License, see [LICENSE.txt](../../LICENSE.txt)
-
-## Support
-
-- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com)
-- **Issues**: [GitHub Issues](https://github.com/nvisycom/runtime/issues)
-- **Email**: [support@nvisy.com](mailto:support@nvisy.com)
diff --git a/crates/nvisy-ocr/src/lib.rs b/crates/nvisy-ocr/src/lib.rs
deleted file mode 100644
index ae2b5a9..0000000
--- a/crates/nvisy-ocr/src/lib.rs
+++ /dev/null
@@ -1,10 +0,0 @@
-#![forbid(unsafe_code)]
-#![cfg_attr(docsrs, feature(doc_cfg))]
-#![doc = include_str!("../README.md")]
-
-mod backend;
-mod bridge;
-mod parse;
-
-pub use backend::{OcrBackend, OcrConfig};
-pub use parse::parse_ocr_entities;
diff --git a/crates/nvisy-rig/Cargo.toml b/crates/nvisy-rig/Cargo.toml
index 4b93770..9cbc64e 100644
--- a/crates/nvisy-rig/Cargo.toml
+++ b/crates/nvisy-rig/Cargo.toml
@@ -23,6 +23,7 @@ rustdoc-args = ["--cfg", "docsrs"]
 # Internal crates
 nvisy-core = { workspace = true, features = [] }
 nvisy-ontology = { workspace = true, features = [] }
+nvisy-python = { workspace = true, features = [] }
 
 # LLM framework
 rig-core = { workspace = true, features = ["derive"] }
diff --git a/crates/nvisy-rig/src/agent/base/agent.rs b/crates/nvisy-rig/src/agent/base/agent.rs
index 5bc4501..bdb8020 100644
--- a/crates/nvisy-rig/src/agent/base/agent.rs
+++ b/crates/nvisy-rig/src/agent/base/agent.rs
@@ -107,6 +107,18 @@ impl<M: CompletionModel> BaseAgent<M> {
         self.agent.prompt(prompt).await.map_err(from_prompt)
     }
 
+    /// Summarize text via LLM to fit within the context window's input budget.
+    ///
+    /// Delegates to [`ContextWindow::compact`]. Returns the text unchanged if
+    /// no context window is configured or the text already fits.
+    #[tracing::instrument(skip_all, fields(agent_id = %self.id, mode = "compact"))]
+    pub async fn prompt_compact(&self, text: &str) -> Result<String, Error> {
+        match &self.context_window {
+            Some(cw) => cw.compact(text, self).await,
+            None => Ok(text.to_owned()),
+        }
+    }
+
     /// Splits text via [`ContextWindow`], runs `prompt_structured` per chunk,
     /// and flattens results.
     #[tracing::instrument(skip_all, fields(agent_id = %self.id, mode = "chunked"))]
diff --git a/crates/nvisy-rig/src/agent/base/context.rs b/crates/nvisy-rig/src/agent/base/context.rs
index 2b6c32c..1680299 100644
--- a/crates/nvisy-rig/src/agent/base/context.rs
+++ b/crates/nvisy-rig/src/agent/base/context.rs
@@ -1,5 +1,11 @@
 //! Context window management for LLM token limits.
 
+use rig::completion::CompletionModel;
+
+use nvisy_core::Error;
+
+use super::agent::BaseAgent;
+
 /// Manages token budget estimation, splitting, and truncation.
 #[derive(Debug, Clone)]
 pub struct ContextWindow {
@@ -79,6 +85,31 @@ impl ContextWindow {
         chunks
     }
 
+    /// Summarize text via LLM to fit within the input token budget.
+    ///
+    /// If the text already fits, returns it unchanged. Otherwise sends a
+    /// summarization prompt to the given agent and returns the condensed
+    /// version.
+    pub(crate) async fn compact<M: CompletionModel>(
+        &self,
+        text: &str,
+        agent: &BaseAgent<M>,
+    ) -> Result<String, Error> {
+        if self.fits(text) {
+            return Ok(text.to_owned());
+        }
+
+        let budget = self.input_budget();
+        let prompt = format!(
+            "Summarize the following text to fit within {budget} tokens. \
+             Preserve all key entities, names, numbers, dates, and facts. \
+             Remove redundancy and filler. Return ONLY the condensed text, \
+             no preamble.\n\n{text}"
+        );
+
+        agent.prompt_text(&prompt).await
+    }
+
     /// Truncate text to fit, keeping the end (most recent context).
     ///
     /// Safe for multi-byte UTF-8 input.
@@ -225,4 +256,15 @@ mod tests {
         assert!(!truncated.is_empty());
         assert!(text.ends_with(truncated));
     }
+
+    #[test]
+    fn compact_returns_unchanged_when_fits() {
+        // compact requires async + a real model, so we only test the
+        // early-return path via `fits` logic.  The "already fits" branch
+        // returns `Ok(text.to_owned())` synchronously — verify the
+        // prerequisite here.
+        let cw = ContextWindow::new(100, 20);
+        let short = "a".repeat(300); // ~75 tokens, budget is 80
+        assert!(cw.fits(&short));
+    }
 }
diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs
index 0d1adf6..ed2f21e 100644
--- a/crates/nvisy-rig/src/agent/mod.rs
+++ b/crates/nvisy-rig/src/agent/mod.rs
@@ -7,7 +7,6 @@ mod base;
 mod detect;
 mod extract;
 mod recognize;
-mod redact;
 
 pub(crate) use base::BaseAgent;
 pub use base::{BaseAgentConfig, ContextWindow};
@@ -15,4 +14,3 @@ pub use base::{BaseAgentConfig, ContextWindow};
 pub use recognize::{NerAgent, RawEntities, RawEntity};
 pub use extract::{OcrAgent, OcrOutput, OcrProvider, OcrTextRegion, RawOcrEntity};
 pub use detect::{CvAgent, CvDetection, CvProvider, RawCvEntities, RawCvEntity};
-pub use redact::{RawRedaction, RedactorAgent, RedactorOutput};
diff --git a/crates/nvisy-rig/src/agent/redact/mod.rs b/crates/nvisy-rig/src/agent/redact/mod.rs
deleted file mode 100644
index 04683f8..0000000
--- a/crates/nvisy-rig/src/agent/redact/mod.rs
+++ /dev/null
@@ -1,84 +0,0 @@
-//! Redactor agent for context-aware semantic redaction.
-//!
-//! [`RedactorAgent`] is a pure LLM agent (no tools) that takes detected
-//! entities and their surrounding text and recommends a
-//! [`TextRedactionMethod`](nvisy_ontology::specification::TextRedactionMethod)
-//! for each one. It considers sensitivity level, document context, and
-//! downstream utility when choosing between masking, replacement, hashing,
-//! synthesis, pseudonymisation, and removal.
-
-mod output;
-mod prompt;
-
-pub use output::{RawRedaction, RedactorOutput};
-
-use rig::completion::CompletionModel;
-use uuid::Uuid;
-
-use nvisy_core::Error;
-use nvisy_ontology::specification::RedactorInput;
-
-use crate::backend::UsageTracker;
-
-use super::{BaseAgent, BaseAgentConfig};
-use prompt::{REDACTOR_SYSTEM_PROMPT, RedactorPromptBuilder};
-
-/// Agent for context-aware redaction recommendations.
-///
-/// # Workflow
-///
-/// 1. Caller passes source text and a slice of [`RedactorInput`] entities
-///    to [`recommend`](Self::recommend).
-/// 2. The agent serialises the entities as JSON and builds a user prompt
-///    via [`RedactorPromptBuilder`].
-/// 3. The LLM returns structured output mapping each entity to a
-///    [`TextRedactionMethod`](nvisy_ontology::specification::TextRedactionMethod)
-///    with a suggested replacement string.
-/// 4. The result is parsed into `Vec<RawRedaction>`.
-pub struct RedactorAgent<M: CompletionModel> {
-    base: BaseAgent<M>,
-}
-
-impl<M: CompletionModel> RedactorAgent<M> {
-    /// Create a new redactor agent with the given model and config.
-    pub fn new(model: M, config: BaseAgentConfig) -> Self {
-        let base = BaseAgent::builder(model, config)
-            .preamble(REDACTOR_SYSTEM_PROMPT)
-            .build();
-        Self { base }
-    }
-
-    /// Unique identifier for this agent instance (UUIDv7).
-    pub fn id(&self) -> Uuid {
-        self.base.id()
-    }
-
-    /// Access the usage tracker for this agent's LLM calls.
-    pub fn tracker(&self) -> &UsageTracker {
-        self.base.tracker()
-    }
-
-    /// Recommend redaction methods for detected entities in the given text.
-    #[tracing::instrument(
-        skip_all,
-        fields(text_len = text.len(), entity_count = entities.len(), agent = "redactor"),
-    )]
-    pub async fn recommend(
-        &self,
-        text: &str,
-        entities: &[RedactorInput],
-    ) -> Result<Vec<RawRedaction>, Error> {
-        let prompt = RedactorPromptBuilder::build(text, entities)?;
-
-        tracing::debug!(prompt_len = prompt.len(), "built redactor prompt");
-
-        let result: RedactorOutput = self.base.prompt_structured(&prompt).await?;
-
-        tracing::info!(
-            redaction_count = result.redactions.len(),
-            "redaction recommendations complete"
-        );
-
-        Ok(result.redactions)
-    }
-}
diff --git a/crates/nvisy-rig/src/agent/redact/output.rs b/crates/nvisy-rig/src/agent/redact/output.rs
deleted file mode 100644
index 7662dae..0000000
--- a/crates/nvisy-rig/src/agent/redact/output.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-//! Structured output types for redaction recommendations.
-
-use schemars::JsonSchema;
-use serde::{Deserialize, Serialize};
-
-use nvisy_ontology::specification::TextRedactionMethod;
-
-/// A single redaction recommendation from the LLM.
-#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema)]
-pub struct RawRedaction {
-    /// The original entity text that should be redacted.
-    pub entity_value: String,
-    /// Recommended redaction method.
-    pub method: TextRedactionMethod,
-    /// The suggested replacement text (e.g. `"[EMAIL]"`, `"***"`).
-    pub replacement: String,
-    /// Brief explanation of why this method was chosen.
-    pub reasoning: Option<String>,
-}
-
-/// Top-level structured output wrapper from the redactor agent.
-#[derive(Debug, Clone, Default, PartialEq, Deserialize, Serialize, JsonSchema)]
-pub struct RedactorOutput {
-    /// Recommended redactions for each entity.
-    pub redactions: Vec<RawRedaction>,
-}
diff --git a/crates/nvisy-rig/src/agent/redact/prompt.rs b/crates/nvisy-rig/src/agent/redact/prompt.rs
deleted file mode 100644
index ab63a56..0000000
--- a/crates/nvisy-rig/src/agent/redact/prompt.rs
+++ /dev/null
@@ -1,65 +0,0 @@
-//! Redactor-specific prompt construction.
-//!
-//! [`RedactorPromptBuilder`] constructs the user prompt that presents
-//! detected entities and surrounding text to the LLM for redaction
-//! method selection.
-
-use nvisy_core::Error;
-use nvisy_ontology::specification::RedactorInput;
-
-/// Builds user prompts for redaction recommendations.
-///
-/// Serialises the entity list as JSON and wraps the source text in
-/// delimiters so the LLM has full context for sensitivity-aware decisions.
-pub(crate) struct RedactorPromptBuilder;
-
-impl RedactorPromptBuilder {
-    /// Build the user prompt for the given text and entity list.
-    pub fn build(text: &str, entities: &[RedactorInput]) -> Result<String, Error> {
-        let entities_json = serde_json::to_string_pretty(entities).map_err(|e| {
-            Error::runtime(
-                format!("failed to serialize entities for redactor: {e}"),
-                "rig",
-                false,
-            )
-        })?;
-
-        Ok(format!(
-            "Recommend redaction methods for the following entities found in the \
-             text below.\n\n\
-             Entities:\n{entities_json}\n\n\
-             ---\n{text}\n---"
-        ))
-    }
-}
-
-/// Default system prompt for the redactor agent.
-pub(super) const REDACTOR_SYSTEM_PROMPT: &str = "\
-You are a context-aware redaction system. Given a text and a list of detected entities, \
-recommend the most appropriate redaction method for each entity.\n\
-\n\
-Available redaction methods:\n\
-- \"mask\": Replace with a fixed mask (e.g. \"***\", \"[REDACTED]\"). Use for highly sensitive data \
-  where the original value must not be recoverable.\n\
-- \"replace\": Replace with a type-appropriate placeholder (e.g. \"[EMAIL]\", \"[SSN]\"). Use when \
-  the entity type should remain visible but the value hidden.\n\
-- \"hash\": Replace with a deterministic hash. Use when linkability across documents is needed \
-  without exposing the original value.\n\
-- \"synthesize\": Replace with a realistic but fake value (e.g. a fake name, fake address). Use \
-  when preserving data format and statistical properties matters.\n\
-- \"pseudonymize\": Replace with a consistent pseudonym. Use when the same entity should map to \
-  the same pseudonym across a document or dataset.\n\
-- \"remove\": Delete the entity entirely. Use for data that adds no analytical value.\n\
-\n\
-For each entity, consider:\n\
-- Sensitivity level (credentials > government IDs > names)\n\
-- Context (medical records need stricter redaction than marketing copy)\n\
-- Downstream utility (will analysts need to correlate redacted values?)\n\
-\n\
-Return a JSON object with a \"redactions\" array. Each element must have:\n\
-- \"entity_value\": the original entity text\n\
-- \"method\": one of the methods above\n\
-- \"replacement\": the suggested replacement text\n\
-- \"reasoning\": brief explanation of why this method was chosen (optional)\n\
-\n\
-If no redactions are needed, return {\"redactions\": []}.";
diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs
index b5735f3..4353a8b 100644
--- a/crates/nvisy-rig/src/lib.rs
+++ b/crates/nvisy-rig/src/lib.rs
@@ -4,6 +4,7 @@
 
 pub mod backend;
 pub mod bridge;
+pub mod paddle;
 pub(crate) mod agent;
 
 #[doc(hidden)]
@@ -17,5 +18,5 @@ pub use agent::{
     CvAgent, CvDetection, CvProvider, NerAgent,
     OcrAgent, OcrOutput, OcrProvider, OcrTextRegion,
     RawCvEntities, RawCvEntity, RawEntities, RawEntity,
-    RawOcrEntity, RawRedaction, RedactorAgent, RedactorOutput,
+    RawOcrEntity,
 };
diff --git a/crates/nvisy-ocr/src/backend.rs b/crates/nvisy-rig/src/paddle/backend.rs
similarity index 100%
rename from crates/nvisy-ocr/src/backend.rs
rename to crates/nvisy-rig/src/paddle/backend.rs
diff --git a/crates/nvisy-ocr/src/bridge.rs b/crates/nvisy-rig/src/paddle/bridge.rs
similarity index 94%
rename from crates/nvisy-ocr/src/bridge.rs
rename to crates/nvisy-rig/src/paddle/bridge.rs
index 9ea3e5d..44b9108 100644
--- a/crates/nvisy-ocr/src/bridge.rs
+++ b/crates/nvisy-rig/src/paddle/bridge.rs
@@ -6,7 +6,7 @@ use nvisy_core::Error;
 use nvisy_python::bridge::PythonBridge;
 use nvisy_python::ocr::OcrParams;
 
-use crate::backend::{OcrBackend, OcrConfig};
+use super::backend::{OcrBackend, OcrConfig};
 
 /// Converts [`OcrConfig`] to [`OcrParams`] and delegates to `nvisy_python::ocr`.
 #[async_trait::async_trait]
diff --git a/crates/nvisy-rig/src/paddle/mod.rs b/crates/nvisy-rig/src/paddle/mod.rs
new file mode 100644
index 0000000..803eb3b
--- /dev/null
+++ b/crates/nvisy-rig/src/paddle/mod.rs
@@ -0,0 +1,11 @@
+//! PaddleOCR / OCR backend integration.
+//!
+//! Re-exports the OCR backend trait, configuration, entity parsing, and
+//! the [`PythonBridge`] implementation.
+
+mod backend;
+mod bridge;
+mod parse;
+
+pub use backend::{OcrBackend, OcrConfig};
+pub use parse::parse_ocr_entities;
diff --git a/crates/nvisy-ocr/src/parse.rs b/crates/nvisy-rig/src/paddle/parse.rs
similarity index 100%
rename from crates/nvisy-ocr/src/parse.rs
rename to crates/nvisy-rig/src/paddle/parse.rs
diff --git a/crates/nvisy-rig/src/prelude.rs b/crates/nvisy-rig/src/prelude.rs
index c2b0e41..5803e0a 100644
--- a/crates/nvisy-rig/src/prelude.rs
+++ b/crates/nvisy-rig/src/prelude.rs
@@ -10,5 +10,6 @@ pub use crate::agent::{
     CvAgent, CvDetection, CvProvider, NerAgent,
     OcrAgent, OcrOutput, OcrProvider, OcrTextRegion,
     RawCvEntities, RawCvEntity, RawEntities, RawEntity,
-    RawOcrEntity, RawRedaction, RedactorAgent, RedactorOutput,
+    RawOcrEntity,
 };
+pub use crate::paddle::{OcrBackend, OcrConfig, parse_ocr_entities};
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 35159f5..da4b294 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -13,7 +13,6 @@ COPY crates/nvisy-codec/Cargo.toml crates/nvisy-codec/Cargo.toml
 COPY crates/nvisy-core/Cargo.toml crates/nvisy-core/Cargo.toml
 COPY crates/nvisy-engine/Cargo.toml crates/nvisy-engine/Cargo.toml
 COPY crates/nvisy-identify/Cargo.toml crates/nvisy-identify/Cargo.toml
-COPY crates/nvisy-ocr/Cargo.toml crates/nvisy-ocr/Cargo.toml
 COPY crates/nvisy-ontology/Cargo.toml crates/nvisy-ontology/Cargo.toml
 COPY crates/nvisy-pattern/Cargo.toml crates/nvisy-pattern/Cargo.toml
 COPY crates/nvisy-python/Cargo.toml crates/nvisy-python/Cargo.toml
@@ -21,14 +20,14 @@ COPY crates/nvisy-rig/Cargo.toml crates/nvisy-rig/Cargo.toml
 COPY crates/nvisy-server/Cargo.toml crates/nvisy-server/Cargo.toml
 
 # Create empty src files to satisfy cargo's manifest checks
-RUN for crate in nvisy-asr nvisy-augment nvisy-codec nvisy-core nvisy-engine nvisy-identify nvisy-ocr nvisy-ontology nvisy-pattern nvisy-python nvisy-rig; do \
+RUN for crate in nvisy-asr nvisy-augment nvisy-codec nvisy-core nvisy-engine nvisy-identify nvisy-ontology nvisy-pattern nvisy-python nvisy-rig; do \
       mkdir -p crates/$crate/src && echo "" > crates/$crate/src/lib.rs; \
     done && \
     mkdir -p crates/nvisy-cli/src && echo "fn main() {}" > crates/nvisy-cli/src/main.rs && \
     mkdir -p crates/nvisy-server/src && echo "fn main() {}" > crates/nvisy-server/src/main.rs
 
 # Create stub READMEs for crates that use doc = include_str!("../README.md")
-RUN for crate in nvisy-asr nvisy-augment nvisy-cli nvisy-codec nvisy-core nvisy-engine nvisy-identify nvisy-ocr nvisy-ontology nvisy-pattern nvisy-python nvisy-rig nvisy-server; do \
+RUN for crate in nvisy-asr nvisy-augment nvisy-cli nvisy-codec nvisy-core nvisy-engine nvisy-identify nvisy-ontology nvisy-pattern nvisy-python nvisy-rig nvisy-server; do \
       touch crates/$crate/README.md; \
     done
 

From 89ee0d1924edab2b71335d91468c840ce9ce691e Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Wed, 25 Feb 2026 21:44:46 +0100
Subject: [PATCH 13/24] refactor: extract paddle module from nvisy-rig into
 standalone nvisy-paddle crate

Move OCR backend code out of nvisy-rig/src/paddle/ into a new
nvisy-paddle crate so nvisy-rig no longer depends on nvisy-python.
Consumers (nvisy-identify, nvisy-augment) now import from nvisy_paddle.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 Cargo.lock                                    | 14 +++++++-
 Cargo.toml                                    |  2 ++
 crates/nvisy-augment/Cargo.toml               |  1 +
 crates/nvisy-augment/src/ocr.rs               |  2 +-
 crates/nvisy-identify/Cargo.toml              |  1 +
 crates/nvisy-identify/src/vision/ocr.rs       |  2 +-
 crates/nvisy-paddle/Cargo.toml                | 34 +++++++++++++++++++
 crates/nvisy-paddle/README.md                 |  3 ++
 .../paddle => nvisy-paddle/src}/backend.rs    |  0
 .../src/paddle => nvisy-paddle/src}/bridge.rs |  2 +-
 crates/nvisy-paddle/src/lib.rs                | 10 ++++++
 .../src/paddle => nvisy-paddle/src}/parse.rs  |  0
 crates/nvisy-rig/Cargo.toml                   |  1 -
 crates/nvisy-rig/src/lib.rs                   |  1 -
 crates/nvisy-rig/src/paddle/mod.rs            | 11 ------
 crates/nvisy-rig/src/prelude.rs               |  1 -
 docker/Dockerfile                             |  5 +--
 17 files changed, 70 insertions(+), 20 deletions(-)
 create mode 100644 crates/nvisy-paddle/Cargo.toml
 create mode 100644 crates/nvisy-paddle/README.md
 rename crates/{nvisy-rig/src/paddle => nvisy-paddle/src}/backend.rs (100%)
 rename crates/{nvisy-rig/src/paddle => nvisy-paddle/src}/bridge.rs (94%)
 create mode 100644 crates/nvisy-paddle/src/lib.rs
 rename crates/{nvisy-rig/src/paddle => nvisy-paddle/src}/parse.rs (100%)
 delete mode 100644 crates/nvisy-rig/src/paddle/mod.rs

diff --git a/Cargo.lock b/Cargo.lock
index 706dbe8..2e11dd2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2660,6 +2660,7 @@ dependencies = [
  "nvisy-codec",
  "nvisy-core",
  "nvisy-ontology",
+ "nvisy-paddle",
  "nvisy-python",
  "nvisy-rig",
  "serde",
@@ -2766,6 +2767,7 @@ dependencies = [
  "nvisy-codec",
  "nvisy-core",
  "nvisy-ontology",
+ "nvisy-paddle",
  "nvisy-pattern",
  "nvisy-python",
  "nvisy-rig",
@@ -2794,6 +2796,17 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "nvisy-paddle"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "nvisy-core",
+ "nvisy-ontology",
+ "nvisy-python",
+ "serde_json",
+]
+
 [[package]]
 name = "nvisy-pattern"
 version = "0.1.0"
@@ -2831,7 +2844,6 @@ dependencies = [
  "base64",
  "nvisy-core",
  "nvisy-ontology",
- "nvisy-python",
  "rig-core",
  "schemars",
  "serde",
diff --git a/Cargo.toml b/Cargo.toml
index 3b7de91..9518f29 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,6 +11,7 @@ members = [
     "./crates/nvisy-engine",
     "./crates/nvisy-identify",
     "./crates/nvisy-ontology",
+    "./crates/nvisy-paddle",
     "./crates/nvisy-pattern",
     "./crates/nvisy-python",
     "./crates/nvisy-rig",
@@ -43,6 +44,7 @@ nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" }
 nvisy-engine = { path = "./crates/nvisy-engine", version = "0.1.0" }
 nvisy-identify = { path = "./crates/nvisy-identify", version = "0.1.0" }
 nvisy-ontology = { path = "./crates/nvisy-ontology", version = "0.1.0" }
+nvisy-paddle = { path = "./crates/nvisy-paddle", version = "0.1.0" }
 nvisy-pattern = { path = "./crates/nvisy-pattern", version = "0.1.0" }
 nvisy-python = { path = "./crates/nvisy-python", version = "0.1.0" }
 nvisy-rig = { path = "./crates/nvisy-rig", version = "0.1.0" }
diff --git a/crates/nvisy-augment/Cargo.toml b/crates/nvisy-augment/Cargo.toml
index cdb6f68..222d382 100644
--- a/crates/nvisy-augment/Cargo.toml
+++ b/crates/nvisy-augment/Cargo.toml
@@ -28,6 +28,7 @@ nvisy-ontology = { workspace = true, features = [] }
 nvisy-codec = { workspace = true, features = [] }
 nvisy-python = { workspace = true, features = [] }
 nvisy-rig = { workspace = true, features = [] }
+nvisy-paddle = { workspace = true, features = [] }
 nvisy-asr = { workspace = true, features = [] }
 
 # (De)serialization
diff --git a/crates/nvisy-augment/src/ocr.rs b/crates/nvisy-augment/src/ocr.rs
index 09443c6..92574e4 100644
--- a/crates/nvisy-augment/src/ocr.rs
+++ b/crates/nvisy-augment/src/ocr.rs
@@ -9,7 +9,7 @@ use nvisy_core::Error;
 
 use nvisy_ontology::entity::Entity;
 
-pub use nvisy_rig::paddle::{OcrBackend, OcrConfig, parse_ocr_entities};
+pub use nvisy_paddle::{OcrBackend, OcrConfig, parse_ocr_entities};
 
 fn default_language() -> String {
     "eng".into()
diff --git a/crates/nvisy-identify/Cargo.toml b/crates/nvisy-identify/Cargo.toml
index 779cb60..316e3bb 100644
--- a/crates/nvisy-identify/Cargo.toml
+++ b/crates/nvisy-identify/Cargo.toml
@@ -33,6 +33,7 @@ nvisy-codec = { workspace = true, features = [] }
 nvisy-pattern = { workspace = true, features = [] }
 nvisy-python = { workspace = true, features = [] }
 nvisy-rig = { workspace = true, features = [] }
+nvisy-paddle = { workspace = true, features = [] }
 nvisy-asr = { workspace = true, features = [] }
 
 # (De)serialization
diff --git a/crates/nvisy-identify/src/vision/ocr.rs b/crates/nvisy-identify/src/vision/ocr.rs
index 79664b8..ee55904 100644
--- a/crates/nvisy-identify/src/vision/ocr.rs
+++ b/crates/nvisy-identify/src/vision/ocr.rs
@@ -5,7 +5,7 @@
 
 use nvisy_codec::handler::{ImageData, Span};
 use nvisy_core::Error;
-use nvisy_rig::paddle::{OcrBackend, OcrConfig, parse_ocr_entities};
+use nvisy_paddle::{OcrBackend, OcrConfig, parse_ocr_entities};
 
 use crate::Entity;
 use crate::{ParallelContext, DetectionService};
diff --git a/crates/nvisy-paddle/Cargo.toml b/crates/nvisy-paddle/Cargo.toml
new file mode 100644
index 0000000..a570333
--- /dev/null
+++ b/crates/nvisy-paddle/Cargo.toml
@@ -0,0 +1,34 @@
+# https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[package]
+name = "nvisy-paddle"
+description = "PaddleOCR backend trait and provider integration for Nvisy"
+keywords = ["nvisy", "ocr", "paddle", "text-extraction"]
+categories = ["text-processing"]
+
+version = { workspace = true }
+rust-version = { workspace = true }
+edition = { workspace = true }
+license = { workspace = true }
+publish = { workspace = true }
+
+authors = { workspace = true }
+repository = { workspace = true }
+homepage = { workspace = true }
+documentation = { workspace = true }
+
+[package.metadata.docs.rs]
+all-features = true
+rustdoc-args = ["--cfg", "docsrs"]
+
+[dependencies]
+# Internal crates
+nvisy-core = { workspace = true, features = [] }
+nvisy-ontology = { workspace = true, features = [] }
+nvisy-python = { workspace = true, features = [] }
+
+# (De)serialization
+serde_json = { workspace = true, features = [] }
+
+# Async runtime
+async-trait = { workspace = true, features = [] }
diff --git a/crates/nvisy-paddle/README.md b/crates/nvisy-paddle/README.md
new file mode 100644
index 0000000..bd19cf5
--- /dev/null
+++ b/crates/nvisy-paddle/README.md
@@ -0,0 +1,3 @@
+# nvisy-paddle
+
+PaddleOCR backend trait and provider integration for the Nvisy runtime.
diff --git a/crates/nvisy-rig/src/paddle/backend.rs b/crates/nvisy-paddle/src/backend.rs
similarity index 100%
rename from crates/nvisy-rig/src/paddle/backend.rs
rename to crates/nvisy-paddle/src/backend.rs
diff --git a/crates/nvisy-rig/src/paddle/bridge.rs b/crates/nvisy-paddle/src/bridge.rs
similarity index 94%
rename from crates/nvisy-rig/src/paddle/bridge.rs
rename to crates/nvisy-paddle/src/bridge.rs
index 44b9108..9ea3e5d 100644
--- a/crates/nvisy-rig/src/paddle/bridge.rs
+++ b/crates/nvisy-paddle/src/bridge.rs
@@ -6,7 +6,7 @@ use nvisy_core::Error;
 use nvisy_python::bridge::PythonBridge;
 use nvisy_python::ocr::OcrParams;
 
-use super::backend::{OcrBackend, OcrConfig};
+use crate::backend::{OcrBackend, OcrConfig};
 
 /// Converts [`OcrConfig`] to [`OcrParams`] and delegates to `nvisy_python::ocr`.
 #[async_trait::async_trait]
diff --git a/crates/nvisy-paddle/src/lib.rs b/crates/nvisy-paddle/src/lib.rs
new file mode 100644
index 0000000..ae2b5a9
--- /dev/null
+++ b/crates/nvisy-paddle/src/lib.rs
@@ -0,0 +1,10 @@
+#![forbid(unsafe_code)]
+#![cfg_attr(docsrs, feature(doc_cfg))]
+#![doc = include_str!("../README.md")]
+
+mod backend;
+mod bridge;
+mod parse;
+
+pub use backend::{OcrBackend, OcrConfig};
+pub use parse::parse_ocr_entities;
diff --git a/crates/nvisy-rig/src/paddle/parse.rs b/crates/nvisy-paddle/src/parse.rs
similarity index 100%
rename from crates/nvisy-rig/src/paddle/parse.rs
rename to crates/nvisy-paddle/src/parse.rs
diff --git a/crates/nvisy-rig/Cargo.toml b/crates/nvisy-rig/Cargo.toml
index 9cbc64e..4b93770 100644
--- a/crates/nvisy-rig/Cargo.toml
+++ b/crates/nvisy-rig/Cargo.toml
@@ -23,7 +23,6 @@ rustdoc-args = ["--cfg", "docsrs"]
 # Internal crates
 nvisy-core = { workspace = true, features = [] }
 nvisy-ontology = { workspace = true, features = [] }
-nvisy-python = { workspace = true, features = [] }
 
 # LLM framework
 rig-core = { workspace = true, features = ["derive"] }
diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs
index 4353a8b..7510356 100644
--- a/crates/nvisy-rig/src/lib.rs
+++ b/crates/nvisy-rig/src/lib.rs
@@ -4,7 +4,6 @@
 
 pub mod backend;
 pub mod bridge;
-pub mod paddle;
 pub(crate) mod agent;
 
 #[doc(hidden)]
diff --git a/crates/nvisy-rig/src/paddle/mod.rs b/crates/nvisy-rig/src/paddle/mod.rs
deleted file mode 100644
index 803eb3b..0000000
--- a/crates/nvisy-rig/src/paddle/mod.rs
+++ /dev/null
@@ -1,11 +0,0 @@
-//! PaddleOCR / OCR backend integration.
-//!
-//! Re-exports the OCR backend trait, configuration, entity parsing, and
-//! the [`PythonBridge`] implementation.
-
-mod backend;
-mod bridge;
-mod parse;
-
-pub use backend::{OcrBackend, OcrConfig};
-pub use parse::parse_ocr_entities;
diff --git a/crates/nvisy-rig/src/prelude.rs b/crates/nvisy-rig/src/prelude.rs
index 5803e0a..3874b44 100644
--- a/crates/nvisy-rig/src/prelude.rs
+++ b/crates/nvisy-rig/src/prelude.rs
@@ -12,4 +12,3 @@ pub use crate::agent::{
     RawCvEntities, RawCvEntity, RawEntities, RawEntity,
     RawOcrEntity,
 };
-pub use crate::paddle::{OcrBackend, OcrConfig, parse_ocr_entities};
diff --git a/docker/Dockerfile b/docker/Dockerfile
index da4b294..68f8bfa 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -14,20 +14,21 @@ COPY crates/nvisy-core/Cargo.toml crates/nvisy-core/Cargo.toml
 COPY crates/nvisy-engine/Cargo.toml crates/nvisy-engine/Cargo.toml
 COPY crates/nvisy-identify/Cargo.toml crates/nvisy-identify/Cargo.toml
 COPY crates/nvisy-ontology/Cargo.toml crates/nvisy-ontology/Cargo.toml
+COPY crates/nvisy-paddle/Cargo.toml crates/nvisy-paddle/Cargo.toml
 COPY crates/nvisy-pattern/Cargo.toml crates/nvisy-pattern/Cargo.toml
 COPY crates/nvisy-python/Cargo.toml crates/nvisy-python/Cargo.toml
 COPY crates/nvisy-rig/Cargo.toml crates/nvisy-rig/Cargo.toml
 COPY crates/nvisy-server/Cargo.toml crates/nvisy-server/Cargo.toml
 
 # Create empty src files to satisfy cargo's manifest checks
-RUN for crate in nvisy-asr nvisy-augment nvisy-codec nvisy-core nvisy-engine nvisy-identify nvisy-ontology nvisy-pattern nvisy-python nvisy-rig; do \
+RUN for crate in nvisy-asr nvisy-augment nvisy-codec nvisy-core nvisy-engine nvisy-identify nvisy-ontology nvisy-paddle nvisy-pattern nvisy-python nvisy-rig; do \
       mkdir -p crates/$crate/src && echo "" > crates/$crate/src/lib.rs; \
     done && \
     mkdir -p crates/nvisy-cli/src && echo "fn main() {}" > crates/nvisy-cli/src/main.rs && \
     mkdir -p crates/nvisy-server/src && echo "fn main() {}" > crates/nvisy-server/src/main.rs
 
 # Create stub READMEs for crates that use doc = include_str!("../README.md")
-RUN for crate in nvisy-asr nvisy-augment nvisy-cli nvisy-codec nvisy-core nvisy-engine nvisy-identify nvisy-ontology nvisy-pattern nvisy-python nvisy-rig nvisy-server; do \
+RUN for crate in nvisy-asr nvisy-augment nvisy-cli nvisy-codec nvisy-core nvisy-engine nvisy-identify nvisy-ontology nvisy-paddle nvisy-pattern nvisy-python nvisy-rig nvisy-server; do \
       touch crates/$crate/README.md; \
     done
 

From 373de8ef7ea54f6f7837b7803bf3328e612c64b5 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Wed, 25 Feb 2026 21:53:02 +0100
Subject: [PATCH 14/24] refactor(rig): replace from_completion/from_prompt with
 Error enum in src/error.rs

Add a proper Error enum that implements From<CompletionError>,
From<PromptError>, and Into<nvisy_core::Error>. Delete the old
backend/error.rs helper functions and update all call sites.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/nvisy-rig/src/agent/base/agent.rs |  13 +--
 crates/nvisy-rig/src/backend/error.rs    |  66 -------------
 crates/nvisy-rig/src/backend/mod.rs      |   2 -
 crates/nvisy-rig/src/bridge/mod.rs       |   8 +-
 crates/nvisy-rig/src/error.rs            | 114 +++++++++++++++++++++++
 crates/nvisy-rig/src/lib.rs              |   1 +
 6 files changed, 125 insertions(+), 79 deletions(-)
 delete mode 100644 crates/nvisy-rig/src/backend/error.rs
 create mode 100644 crates/nvisy-rig/src/error.rs

diff --git a/crates/nvisy-rig/src/agent/base/agent.rs b/crates/nvisy-rig/src/agent/base/agent.rs
index bdb8020..b456267 100644
--- a/crates/nvisy-rig/src/agent/base/agent.rs
+++ b/crates/nvisy-rig/src/agent/base/agent.rs
@@ -9,7 +9,8 @@ use uuid::Uuid;
 
 use nvisy_core::Error;
 
-use crate::backend::{from_completion, from_prompt, UsageTracker};
+use crate::backend::UsageTracker;
+use crate::error::Error as RigError;
 use crate::bridge::ResponseParser;
 
 use super::{BaseAgentBuilder, BaseAgentConfig};
@@ -61,10 +62,10 @@ impl<M: CompletionModel> BaseAgent<M> {
             .agent
             .completion(prompt, vec![])
             .await
-            .map_err(from_completion)?
+            .map_err(|e| Error::from(RigError::from(e)))?
             .output_schema(schema);
 
-        let response = builder.send().await.map_err(from_completion)?;
+        let response = builder.send().await.map_err(|e| Error::from(RigError::from(e)))?;
         let parsed = ResponseParser::extract_text(&response)?;
         self.tracker.record(&response.usage, 0);
 
@@ -90,9 +91,9 @@ impl<M: CompletionModel> BaseAgent<M> {
             .agent
             .completion(prompt, vec![])
             .await
-            .map_err(from_completion)?;
+            .map_err(|e| Error::from(RigError::from(e)))?;
 
-        let response = builder.send().await.map_err(from_completion)?;
+        let response = builder.send().await.map_err(|e| Error::from(RigError::from(e)))?;
         let parsed = ResponseParser::extract_text(&response)?;
         self.tracker.record(&response.usage, 0);
         Ok(parsed.as_str().to_owned())
@@ -104,7 +105,7 @@ impl<M: CompletionModel> BaseAgent<M> {
     /// returns only the final text, not the raw response.
     #[tracing::instrument(skip_all, fields(agent_id = %self.id, mode = "prompt"))]
     pub async fn prompt(&self, prompt: &str) -> Result<String, Error> {
-        self.agent.prompt(prompt).await.map_err(from_prompt)
+        self.agent.prompt(prompt).await.map_err(|e| Error::from(RigError::from(e)))
     }
 
     /// Summarize text via LLM to fit within the context window's input budget.
diff --git a/crates/nvisy-rig/src/backend/error.rs b/crates/nvisy-rig/src/backend/error.rs
deleted file mode 100644
index 54895ac..0000000
--- a/crates/nvisy-rig/src/backend/error.rs
+++ /dev/null
@@ -1,66 +0,0 @@
-//! Error mapping from rig-core errors to nvisy-core errors.
-
-use rig::completion::{CompletionError, PromptError};
-
-use nvisy_core::Error;
-
-/// Convert a rig-core [`PromptError`] into a [`nvisy_core::Error`].
-pub(crate) fn from_prompt(err: PromptError) -> Error {
-    match err {
-        PromptError::CompletionError(e) => from_completion(e),
-        PromptError::ToolError(e) => {
-            Error::runtime(format!("Tool error: {e}"), "rig", false)
-        }
-        PromptError::ToolServerError(e) => {
-            Error::runtime(format!("Tool server error: {e}"), "rig", true)
-        }
-        PromptError::MaxTurnsError { max_turns, .. } => {
-            Error::runtime(
-                format!("Agent exceeded max turn limit ({max_turns})"),
-                "rig",
-                false,
-            )
-        }
-        PromptError::PromptCancelled { reason, .. } => {
-            Error::runtime(format!("Prompt cancelled: {reason}"), "rig", false)
-        }
-    }
-}
-
-/// Convert a rig-core [`CompletionError`] into a [`nvisy_core::Error`].
-pub(crate) fn from_completion(err: CompletionError) -> Error {
-    match err {
-        CompletionError::HttpError(e) => {
-            Error::connection(format!("HTTP error: {e}"), "rig", true)
-        }
-        CompletionError::JsonError(e) => {
-            Error::new(nvisy_core::ErrorKind::Serialization, format!("JSON error: {e}"))
-                .with_component("rig")
-        }
-        CompletionError::ProviderError(msg) => {
-            let retryable = is_retryable_provider_error(&msg);
-            Error::connection(format!("Provider error: {msg}"), "rig", retryable)
-        }
-        CompletionError::ResponseError(msg) => {
-            Error::runtime(format!("Response error: {msg}"), "rig", false)
-        }
-        CompletionError::RequestError(e) => {
-            Error::validation(format!("Request error: {e}"), "rig")
-        }
-        CompletionError::UrlError(e) => {
-            Error::validation(format!("URL error: {e}"), "rig")
-        }
-    }
-}
-
-/// Check if a provider error message indicates a retryable condition.
-fn is_retryable_provider_error(msg: &str) -> bool {
-    let lower = msg.to_lowercase();
-    lower.contains("rate_limit")
-        || lower.contains("rate limit")
-        || lower.contains("overloaded")
-        || lower.contains("timeout")
-        || lower.contains("429")
-        || lower.contains("503")
-        || lower.contains("529")
-}
diff --git a/crates/nvisy-rig/src/backend/mod.rs b/crates/nvisy-rig/src/backend/mod.rs
index a903516..8952389 100644
--- a/crates/nvisy-rig/src/backend/mod.rs
+++ b/crates/nvisy-rig/src/backend/mod.rs
@@ -1,10 +1,8 @@
 //! LLM backend types, error mapping, and Tower retry policy.
 
-mod error;
 mod metrics;
 mod retry;
 
-pub(crate) use error::{from_completion, from_prompt};
 pub use metrics::{UsageStats, UsageTracker};
 pub use retry::RetryPolicy;
 
diff --git a/crates/nvisy-rig/src/bridge/mod.rs b/crates/nvisy-rig/src/bridge/mod.rs
index 5d68a02..d393a05 100644
--- a/crates/nvisy-rig/src/bridge/mod.rs
+++ b/crates/nvisy-rig/src/bridge/mod.rs
@@ -13,10 +13,8 @@ use rig::completion::CompletionModel;
 
 use nvisy_core::Error;
 
-use crate::backend::{
-    from_completion, DetectionRequest, DetectionResponse,
-    RetryPolicy, UsageTracker,
-};
+use crate::backend::{DetectionRequest, DetectionResponse, RetryPolicy, UsageTracker};
+use crate::error::Error as RigError;
 
 /// Configuration for [`ServiceBackend`] (and its [`RigBackend`] specialisation).
 #[derive(Debug, Clone)]
@@ -145,7 +143,7 @@ where
                 builder = builder.preamble(preamble.clone());
             }
 
-            let response = builder.send().await.map_err(from_completion)?;
+            let response = builder.send().await.map_err(|e| Error::from(RigError::from(e)))?;
             let parsed = ResponseParser::extract_text(&response)?;
             let entities = parsed.parse_json()?;
 
diff --git a/crates/nvisy-rig/src/error.rs b/crates/nvisy-rig/src/error.rs
new file mode 100644
index 0000000..f03513a
--- /dev/null
+++ b/crates/nvisy-rig/src/error.rs
@@ -0,0 +1,114 @@
+//! Error types for the rig crate.
+
+use rig::completion::{CompletionError, PromptError};
+
+/// Errors produced by rig-core LLM interactions.
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+    /// An HTTP / network error from the LLM provider.
+    #[error("HTTP error: {0}")]
+    Http(String),
+
+    /// A JSON (de)serialization error.
+    #[error("JSON error: {0}")]
+    Json(#[from] serde_json::Error),
+
+    /// The LLM provider returned an error response.
+    #[error("Provider error: {0}")]
+    Provider(String),
+
+    /// The LLM response was malformed or unexpected.
+    #[error("Response error: {0}")]
+    Response(String),
+
+    /// The request could not be constructed.
+    #[error("Request error: {0}")]
+    Request(String),
+
+    /// A tool call failed during an agent prompt.
+    #[error("Tool error: {0}")]
+    Tool(String),
+
+    /// The agent exceeded its maximum turn limit.
+    #[error("Agent exceeded max turn limit ({0})")]
+    MaxTurns(usize),
+
+    /// The prompt was cancelled.
+    #[error("Prompt cancelled: {0}")]
+    Cancelled(String),
+}
+
+impl Error {
+    /// Whether this error is likely transient and safe to retry.
+    pub fn is_retryable(&self) -> bool {
+        match self {
+            Self::Http(_) => true,
+            Self::Provider(msg) => is_retryable_provider_error(msg),
+            _ => false,
+        }
+    }
+}
+
+impl From<CompletionError> for Error {
+    fn from(err: CompletionError) -> Self {
+        match err {
+            CompletionError::HttpError(e) => Self::Http(e.to_string()),
+            CompletionError::JsonError(e) => Self::Json(e),
+            CompletionError::ProviderError(msg) => Self::Provider(msg),
+            CompletionError::ResponseError(msg) => Self::Response(msg),
+            CompletionError::RequestError(e) => Self::Request(e.to_string()),
+            CompletionError::UrlError(e) => Self::Request(format!("URL: {e}")),
+        }
+    }
+}
+
+impl From<PromptError> for Error {
+    fn from(err: PromptError) -> Self {
+        match err {
+            PromptError::CompletionError(e) => Self::from(e),
+            PromptError::ToolError(e) => Self::Tool(e.to_string()),
+            PromptError::ToolServerError(e) => Self::Tool(format!("server: {e}")),
+            PromptError::MaxTurnsError { max_turns, .. } => Self::MaxTurns(max_turns),
+            PromptError::PromptCancelled { reason, .. } => Self::Cancelled(reason),
+        }
+    }
+}
+
+impl From<Error> for nvisy_core::Error {
+    fn from(err: Error) -> Self {
+        match &err {
+            Error::Http(_) => {
+                nvisy_core::Error::connection(err.to_string(), "rig", true)
+            }
+            Error::Json(_) => {
+                nvisy_core::Error::new(nvisy_core::ErrorKind::Serialization, err.to_string())
+                    .with_component("rig")
+            }
+            Error::Provider(msg) => {
+                let retryable = is_retryable_provider_error(msg);
+                nvisy_core::Error::connection(err.to_string(), "rig", retryable)
+            }
+            Error::Response(_) => {
+                nvisy_core::Error::runtime(err.to_string(), "rig", false)
+            }
+            Error::Request(_) => {
+                nvisy_core::Error::validation(err.to_string(), "rig")
+            }
+            Error::Tool(_) | Error::MaxTurns(_) | Error::Cancelled(_) => {
+                nvisy_core::Error::runtime(err.to_string(), "rig", false)
+            }
+        }
+    }
+}
+
+/// Check if a provider error message indicates a retryable condition.
+fn is_retryable_provider_error(msg: &str) -> bool {
+    let lower = msg.to_lowercase();
+    lower.contains("rate_limit")
+        || lower.contains("rate limit")
+        || lower.contains("overloaded")
+        || lower.contains("timeout")
+        || lower.contains("429")
+        || lower.contains("503")
+        || lower.contains("529")
+}
diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs
index 7510356..fe5f2dc 100644
--- a/crates/nvisy-rig/src/lib.rs
+++ b/crates/nvisy-rig/src/lib.rs
@@ -4,6 +4,7 @@
 
 pub mod backend;
 pub mod bridge;
+pub mod error;
 pub(crate) mod agent;
 
 #[doc(hidden)]

From de5eb965f043848338497d59f3bfa0b45e27a6d8 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Thu, 26 Feb 2026 00:05:24 +0100
Subject: [PATCH 15/24] refactor(rig): remove generics from public API, add
 Provider enum with plain connection params

Replace all CompletionModel generics with a Provider enum holding
connection parameters (api_key, base_url). Client construction is
deferred to build time via ProviderClient. Agent and backend constructors
now return Result to propagate client errors instead of panicking.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/nvisy-rig/src/agent/base/agent.rs    |  81 ++++++------
 crates/nvisy-rig/src/agent/base/builder.rs  | 101 ++++++++++-----
 crates/nvisy-rig/src/agent/base/context.rs  |  10 +-
 crates/nvisy-rig/src/agent/base/dispatch.rs |  29 +++++
 crates/nvisy-rig/src/agent/base/mod.rs      |   9 +-
 crates/nvisy-rig/src/agent/base/provider.rs | 134 +++++++++++++++++++
 crates/nvisy-rig/src/agent/detect/mod.rs    |  27 ++--
 crates/nvisy-rig/src/agent/extract/mod.rs   |  27 ++--
 crates/nvisy-rig/src/agent/mod.rs           |   4 +-
 crates/nvisy-rig/src/agent/recognize/mod.rs |  22 ++--
 crates/nvisy-rig/src/backend/retry.rs       |  15 +--
 crates/nvisy-rig/src/bridge/mod.rs          | 137 +++++++++++++-------
 crates/nvisy-rig/src/bridge/response.rs     |  33 ++---
 crates/nvisy-rig/src/error.rs               |  48 ++++++-
 crates/nvisy-rig/src/lib.rs                 |   4 +-
 crates/nvisy-rig/src/prelude.rs             |   4 +-
 16 files changed, 493 insertions(+), 192 deletions(-)
 create mode 100644 crates/nvisy-rig/src/agent/base/dispatch.rs
 create mode 100644 crates/nvisy-rig/src/agent/base/provider.rs

diff --git a/crates/nvisy-rig/src/agent/base/agent.rs b/crates/nvisy-rig/src/agent/base/agent.rs
index b456267..c926451 100644
--- a/crates/nvisy-rig/src/agent/base/agent.rs
+++ b/crates/nvisy-rig/src/agent/base/agent.rs
@@ -1,39 +1,36 @@
-//! [`BaseAgent`]: internal foundation agent wrapping rig-core's `Agent<M>`.
+//! [`BaseAgent`]: internal foundation agent wrapping rig-core agents.
 
-use rig::agent::Agent;
-use rig::completion::{Completion, CompletionModel, Prompt};
+use rig::completion::{Completion, Prompt};
 use schemars::JsonSchema;
 use serde::de::DeserializeOwned;
 use serde::Serialize;
 use uuid::Uuid;
 
-use nvisy_core::Error;
-
 use crate::backend::UsageTracker;
-use crate::error::Error as RigError;
 use crate::bridge::ResponseParser;
+use crate::error::Error;
 
+use super::dispatch::{Agents, dispatch};
 use super::{BaseAgentBuilder, BaseAgentConfig};
 use super::context::ContextWindow;
 
-/// Internal foundation agent wrapping rig-core's [`Agent<M>`].
+/// Internal foundation agent wrapping a provider-specific rig-core agent.
 ///
-/// All prompt methods route through the built `Agent<M>`, which already
-/// carries the preamble, temperature, max_tokens, and tools configured
-/// via [`BaseAgentBuilder`].
+/// All prompt methods dispatch to the concrete agent variant held inside
+/// [`Agents`]. Specialized agents (e.g. `NerAgent`) compose this type.
 ///
 /// Not exported: specialized agents (e.g. `NerAgent`) compose this.
-pub(crate) struct BaseAgent<M: CompletionModel> {
+pub(crate) struct BaseAgent {
     pub(super) id: Uuid,
-    pub(super) agent: Agent<M>,
+    pub(super) inner: Agents,
     pub(super) context_window: Option<ContextWindow>,
     pub(super) tracker: UsageTracker,
 }
 
-impl<M: CompletionModel> BaseAgent<M> {
+impl BaseAgent {
     /// Create a new builder.
-    pub fn builder(model: M, config: BaseAgentConfig) -> BaseAgentBuilder<M> {
-        BaseAgentBuilder::new(model, config)
+    pub fn builder(provider: &crate::agent::Provider, model_name: &str, config: BaseAgentConfig) -> BaseAgentBuilder {
+        BaseAgentBuilder::new(provider, model_name, config)
     }
 
     /// Unique identifier for this agent instance (UUIDv7).
@@ -58,18 +55,22 @@ impl<M: CompletionModel> BaseAgent<M> {
     {
         let schema = schemars::schema_for!(T);
 
-        let builder = self
-            .agent
-            .completion(prompt, vec![])
-            .await
-            .map_err(|e| Error::from(RigError::from(e)))?
-            .output_schema(schema);
+        let (text, usage) = dispatch!(&self.inner, |agent| {
+            let builder = agent
+                .completion(prompt, vec![])
+                .await
+                .map_err(Error::from)?
+                .output_schema(schema);
+
+            let response = builder.send().await.map_err(Error::from)?;
+            let parsed = ResponseParser::extract_text(&response)?;
+            Ok::<_, Error>((parsed.into_string(), response.usage))
+        })?;
 
-        let response = builder.send().await.map_err(|e| Error::from(RigError::from(e)))?;
-        let parsed = ResponseParser::extract_text(&response)?;
-        self.tracker.record(&response.usage, 0);
+        self.tracker.record(&usage, 0);
 
-        match serde_json::from_str::<T>(parsed.as_str()) {
+        let parser = ResponseParser::from_text(&text);
+        match serde_json::from_str::<T>(&text) {
             Ok(value) => {
                 tracing::debug!("structured output succeeded");
                 Ok(value)
@@ -79,7 +80,7 @@ impl<M: CompletionModel> BaseAgent<M> {
                     error = %structured_err,
                     "structured JSON parse failed, falling back to text-based parsing"
                 );
-                parsed.parse_json()
+                parser.parse_json()
             }
         }
     }
@@ -87,16 +88,19 @@ impl<M: CompletionModel> BaseAgent<M> {
     /// Text completion through the agent, records usage.
     #[tracing::instrument(skip_all, fields(agent_id = %self.id, mode = "text"))]
     pub async fn prompt_text(&self, prompt: &str) -> Result<String, Error> {
-        let builder = self
-            .agent
-            .completion(prompt, vec![])
-            .await
-            .map_err(|e| Error::from(RigError::from(e)))?;
-
-        let response = builder.send().await.map_err(|e| Error::from(RigError::from(e)))?;
-        let parsed = ResponseParser::extract_text(&response)?;
-        self.tracker.record(&response.usage, 0);
-        Ok(parsed.as_str().to_owned())
+        let (text, usage) = dispatch!(&self.inner, |agent| {
+            let builder = agent
+                .completion(prompt, vec![])
+                .await
+                .map_err(Error::from)?;
+
+            let response = builder.send().await.map_err(Error::from)?;
+            let parsed = ResponseParser::extract_text(&response)?;
+            Ok::<_, Error>((parsed.into_string(), response.usage))
+        })?;
+
+        self.tracker.record(&usage, 0);
+        Ok(text)
     }
 
     /// Plain text completion through the agent (no usage tracking).
@@ -105,7 +109,9 @@ impl<M: CompletionModel> BaseAgent<M> {
     /// returns only the final text, not the raw response.
     #[tracing::instrument(skip_all, fields(agent_id = %self.id, mode = "prompt"))]
     pub async fn prompt(&self, prompt: &str) -> Result<String, Error> {
-        self.agent.prompt(prompt).await.map_err(|e| Error::from(RigError::from(e)))
+        dispatch!(&self.inner, |agent| {
+            agent.prompt(prompt).await.map_err(Error::from)
+        })
     }
 
     /// Summarize text via LLM to fit within the context window's input budget.
@@ -147,5 +153,4 @@ impl<M: CompletionModel> BaseAgent<M> {
 
         Ok(all_results)
     }
-
 }
diff --git a/crates/nvisy-rig/src/agent/base/builder.rs b/crates/nvisy-rig/src/agent/base/builder.rs
index e6477e3..b06bb5f 100644
--- a/crates/nvisy-rig/src/agent/base/builder.rs
+++ b/crates/nvisy-rig/src/agent/base/builder.rs
@@ -2,27 +2,33 @@
 //! typestate for optional tools.
 
 use rig::agent::AgentBuilder;
+use rig::client::CompletionClient;
 use rig::completion::CompletionModel;
 use rig::tool::{Tool, ToolDyn};
 use uuid::Uuid;
 
 use crate::backend::UsageTracker;
+use crate::error::Error;
 
+use super::dispatch::Agents;
+use super::provider::{Provider, ProviderClient};
 use super::{BaseAgent, BaseAgentConfig};
 
-/// Builder for [`BaseAgent`] that handles rig-core's typestate for tools.
-pub(crate) struct BaseAgentBuilder<M: CompletionModel> {
-    model: M,
+/// Builder for [`BaseAgent`] that takes a `&Provider` + model name.
+pub(crate) struct BaseAgentBuilder {
+    provider: Provider,
+    model_name: String,
     config: BaseAgentConfig,
     preamble: Option<String>,
     tools: Vec<Box<dyn ToolDyn>>,
 }
 
-impl<M: CompletionModel> BaseAgentBuilder<M> {
-    /// Create a new builder with the given model and config.
-    pub fn new(model: M, config: BaseAgentConfig) -> Self {
+impl BaseAgentBuilder {
+    /// Create a new builder with the given provider, model name, and config.
+    pub fn new(provider: &Provider, model_name: &str, config: BaseAgentConfig) -> Self {
         Self {
-            model,
+            provider: provider.clone(),
+            model_name: model_name.to_owned(),
             config,
             preamble: None,
             tools: Vec::new(),
@@ -42,35 +48,72 @@ impl<M: CompletionModel> BaseAgentBuilder<M> {
     }
 
     /// Build the [`BaseAgent`].
-    pub fn build(self) -> BaseAgent<M> {
-        let agent = if self.tools.is_empty() {
-            let mut builder = AgentBuilder::new(self.model)
-                .temperature(self.config.temperature)
-                .max_tokens(self.config.max_tokens);
-
-            if let Some(ref preamble) = self.preamble {
-                builder = builder.preamble(preamble);
-            }
+    pub fn build(self) -> Result<BaseAgent, Error> {
+        let Self {
+            provider,
+            model_name,
+            config,
+            preamble,
+            tools,
+        } = self;
 
-            builder.build()
-        } else {
-            let mut builder = AgentBuilder::new(self.model)
-                .temperature(self.config.temperature)
-                .max_tokens(self.config.max_tokens)
-                .tools(self.tools);
+        let preamble_ref = preamble.as_deref();
+        let client = ProviderClient::from_provider(&provider)?;
 
-            if let Some(ref preamble) = self.preamble {
-                builder = builder.preamble(preamble);
+        let inner = match client {
+            ProviderClient::OpenAi(c) => {
+                Agents::OpenAi(build_rig_agent(c.completion_model(&model_name), &config, preamble_ref, tools))
+            }
+            ProviderClient::Anthropic(c) => {
+                Agents::Anthropic(build_rig_agent(c.completion_model(&model_name), &config, preamble_ref, tools))
+            }
+            ProviderClient::Gemini(c) => {
+                Agents::Gemini(build_rig_agent(c.completion_model(&model_name), &config, preamble_ref, tools))
+            }
+            ProviderClient::Ollama(c) => {
+                Agents::Ollama(build_rig_agent(c.completion_model(&model_name), &config, preamble_ref, tools))
             }
-
-            builder.build()
         };
 
-        BaseAgent {
+        Ok(BaseAgent {
             id: Uuid::now_v7(),
-            agent,
-            context_window: self.config.context_window,
+            inner,
+            context_window: config.context_window,
             tracker: UsageTracker::new(),
+        })
+    }
+}
+
+/// Build a concrete rig-core `Agent<M>`.
+///
+/// Generic over `M` but only called inside [`BaseAgentBuilder::build`] —
+/// the generic never escapes the module boundary.
+fn build_rig_agent<M: CompletionModel>(
+    model: M,
+    config: &BaseAgentConfig,
+    preamble: Option<&str>,
+    tools: Vec<Box<dyn ToolDyn>>,
+) -> rig::agent::Agent<M> {
+    if tools.is_empty() {
+        let mut builder = AgentBuilder::new(model)
+            .temperature(config.temperature)
+            .max_tokens(config.max_tokens);
+
+        if let Some(preamble) = preamble {
+            builder = builder.preamble(preamble);
         }
+
+        builder.build()
+    } else {
+        let mut builder = AgentBuilder::new(model)
+            .temperature(config.temperature)
+            .max_tokens(config.max_tokens)
+            .tools(tools);
+
+        if let Some(preamble) = preamble {
+            builder = builder.preamble(preamble);
+        }
+
+        builder.build()
     }
 }
diff --git a/crates/nvisy-rig/src/agent/base/context.rs b/crates/nvisy-rig/src/agent/base/context.rs
index 1680299..17e3cb6 100644
--- a/crates/nvisy-rig/src/agent/base/context.rs
+++ b/crates/nvisy-rig/src/agent/base/context.rs
@@ -1,8 +1,6 @@
 //! Context window management for LLM token limits.
 
-use rig::completion::CompletionModel;
-
-use nvisy_core::Error;
+use crate::error::Error;
 
 use super::agent::BaseAgent;
 
@@ -27,7 +25,7 @@ impl ContextWindow {
     /// Estimate the number of tokens in a string (~4 chars per token).
     pub fn estimate_tokens(text: &str) -> usize {
         // Rough heuristic: ~4 characters per token for English text.
-        (text.len() + 3) / 4
+        text.len().div_ceil(4)
     }
 
     /// Available input token budget (max minus reserved output).
@@ -90,10 +88,10 @@ impl ContextWindow {
     /// If the text already fits, returns it unchanged. Otherwise sends a
     /// summarization prompt to the given agent and returns the condensed
     /// version.
-    pub(crate) async fn compact<M: CompletionModel>(
+    pub(crate) async fn compact(
         &self,
         text: &str,
-        agent: &BaseAgent<M>,
+        agent: &BaseAgent,
     ) -> Result<String, Error> {
         if self.fits(text) {
             return Ok(text.to_owned());
diff --git a/crates/nvisy-rig/src/agent/base/dispatch.rs b/crates/nvisy-rig/src/agent/base/dispatch.rs
new file mode 100644
index 0000000..a3f842b
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/base/dispatch.rs
@@ -0,0 +1,29 @@
+//! Provider-specific agent variants.
+//!
+//! [`Agents`] wraps a concrete `rig::agent::Agent<M>` for each
+//! supported provider, enabling dispatch without exposing `CompletionModel`
+//! generics to the public API.
+
+use rig::agent::Agent;
+use rig::providers::{anthropic, gemini, ollama, openai};
+
+pub(crate) enum Agents {
+    OpenAi(Agent<openai::completion::CompletionModel>),
+    Anthropic(Agent<anthropic::completion::CompletionModel>),
+    Gemini(Agent<gemini::completion::CompletionModel>),
+    Ollama(Agent<ollama::CompletionModel>),
+}
+
+/// Dispatch a call to the concrete agent inside each variant.
+macro_rules! dispatch {
+    ($inner:expr, |$agent:ident| $body:expr) => {
+        match $inner {
+            Agents::OpenAi($agent) => $body,
+            Agents::Anthropic($agent) => $body,
+            Agents::Gemini($agent) => $body,
+            Agents::Ollama($agent) => $body,
+        }
+    };
+}
+
+pub(crate) use dispatch;
diff --git a/crates/nvisy-rig/src/agent/base/mod.rs b/crates/nvisy-rig/src/agent/base/mod.rs
index 2029865..122d6cb 100644
--- a/crates/nvisy-rig/src/agent/base/mod.rs
+++ b/crates/nvisy-rig/src/agent/base/mod.rs
@@ -1,17 +1,20 @@
 //! Internal foundation agent and builder.
 //!
-//! [`BaseAgent`] wraps rig-core's `Agent<M>` with usage tracking and
-//! structured-output fallback. [`BaseAgentBuilder`] handles rig-core's
-//! typestate for optional tools.
+//! [`BaseAgent`] wraps a provider-specific rig-core agent with usage tracking
+//! and structured-output fallback. [`BaseAgentBuilder`] takes a `&Provider` +
+//! model name and dispatches to the correct concrete agent type internally.
 
 mod agent;
 mod builder;
 pub(crate) mod context;
+pub(crate) mod dispatch;
+pub(crate) mod provider;
 
 pub(crate) use agent::BaseAgent;
 pub(crate) use builder::BaseAgentBuilder;
 
 pub use context::ContextWindow;
+pub use provider::{AuthenticatedProvider, Provider, UnauthenticatedProvider};
 
 /// Configuration for a [`BaseAgent`].
 #[derive(Debug, Clone)]
diff --git a/crates/nvisy-rig/src/agent/base/provider.rs b/crates/nvisy-rig/src/agent/base/provider.rs
new file mode 100644
index 0000000..39f93ac
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/base/provider.rs
@@ -0,0 +1,134 @@
+//! LLM provider connection parameters.
+//!
+//! [`Provider`] is a plain data enum carrying API keys and optional base
+//! URLs. Client construction is deferred until an agent or backend is built.
+
+use rig::client::Nothing;
+use rig::providers::{anthropic, gemini, ollama, openai};
+
+use crate::error::Error;
+
+/// Provider that requires an API key (OpenAI, Anthropic, Gemini).
+#[derive(Clone)]
+pub struct AuthenticatedProvider {
+    pub api_key: String,
+    pub base_url: Option<String>,
+}
+
+/// Provider that does not require an API key (Ollama).
+#[derive(Clone)]
+pub struct UnauthenticatedProvider {
+    pub base_url: Option<String>,
+}
+
+/// Supported LLM providers.
+///
+/// Each variant holds only connection parameters. The actual rig client
+/// is constructed lazily when an agent or backend is built.
+///
+/// # Example
+/// ```rust,ignore
+/// let provider = Provider::openai("sk-...");
+/// let agent = NerAgent::new(&provider, "gpt-4o", config);
+/// ```
+#[derive(Clone)]
+pub enum Provider {
+    /// OpenAI (GPT-4o, GPT-4, etc.)
+    OpenAi(AuthenticatedProvider),
+    /// Anthropic (Claude)
+    Anthropic(AuthenticatedProvider),
+    /// Google Gemini
+    Gemini(AuthenticatedProvider),
+    /// Ollama (local models)
+    Ollama(UnauthenticatedProvider),
+}
+
+impl Provider {
+    /// Create an OpenAI provider from an API key.
+    pub fn openai(api_key: &str) -> Self {
+        Self::OpenAi(AuthenticatedProvider {
+            api_key: api_key.to_owned(),
+            base_url: None,
+        })
+    }
+
+    /// Create an Anthropic provider from an API key.
+    pub fn anthropic(api_key: &str) -> Self {
+        Self::Anthropic(AuthenticatedProvider {
+            api_key: api_key.to_owned(),
+            base_url: None,
+        })
+    }
+
+    /// Create a Google Gemini provider from an API key.
+    pub fn gemini(api_key: &str) -> Self {
+        Self::Gemini(AuthenticatedProvider {
+            api_key: api_key.to_owned(),
+            base_url: None,
+        })
+    }
+
+    /// Create an Ollama provider using the default local URL.
+    pub fn ollama() -> Self {
+        Self::Ollama(UnauthenticatedProvider { base_url: None })
+    }
+
+    /// Create an Ollama provider with a custom base URL.
+    pub fn ollama_with_url(url: &str) -> Self {
+        Self::Ollama(UnauthenticatedProvider {
+            base_url: Some(url.to_owned()),
+        })
+    }
+}
+
+/// Internal helper — builds a concrete rig client from connection params.
+pub(crate) enum ProviderClient {
+    OpenAi(openai::CompletionsClient),
+    Anthropic(anthropic::Client),
+    Gemini(gemini::Client),
+    Ollama(ollama::Client),
+}
+
+impl ProviderClient {
+    pub(crate) fn from_provider(provider: &Provider) -> Result<Self, Error> {
+        match provider {
+            Provider::OpenAi(p) => {
+                let mut builder = openai::Client::builder().api_key(&p.api_key);
+                if let Some(url) = &p.base_url {
+                    builder = builder.base_url(url);
+                }
+                let client = builder
+                    .build()
+                    .map_err(|e| Error::Client(e.to_string()))?;
+                Ok(Self::OpenAi(client.completions_api()))
+            }
+            Provider::Anthropic(p) => {
+                let mut builder = anthropic::Client::builder().api_key(&p.api_key);
+                if let Some(url) = &p.base_url {
+                    builder = builder.base_url(url);
+                }
+                Ok(Self::Anthropic(
+                    builder.build().map_err(|e| Error::Client(e.to_string()))?,
+                ))
+            }
+            Provider::Gemini(p) => {
+                let mut builder = gemini::Client::builder().api_key(&p.api_key);
+                if let Some(url) = &p.base_url {
+                    builder = builder.base_url(url);
+                }
+                Ok(Self::Gemini(
+                    builder.build().map_err(|e| Error::Client(e.to_string()))?,
+                ))
+            }
+            Provider::Ollama(p) => {
+                let mut builder = ollama::Client::builder().api_key(Nothing);
+                if let Some(url) = &p.base_url {
+                    builder = builder.base_url(url);
+                }
+                Ok(Self::Ollama(
+                    builder.build().map_err(|e| Error::Client(e.to_string()))?,
+                ))
+            }
+        }
+    }
+}
diff --git a/crates/nvisy-rig/src/agent/detect/mod.rs b/crates/nvisy-rig/src/agent/detect/mod.rs
index 4ea4cbf..abae137 100644
--- a/crates/nvisy-rig/src/agent/detect/mod.rs
+++ b/crates/nvisy-rig/src/agent/detect/mod.rs
@@ -14,15 +14,13 @@ pub use output::{RawCvEntities, RawCvEntity};
 use async_trait::async_trait;
 use base64::Engine;
 use base64::engine::general_purpose::STANDARD;
-use rig::completion::CompletionModel;
 use serde::Serialize;
 use uuid::Uuid;
 
-use nvisy_core::Error;
-
 use crate::backend::{DetectionConfig, UsageTracker};
+use crate::error::Error;
 
-use super::{BaseAgent, BaseAgentConfig};
+use super::{BaseAgent, BaseAgentConfig, Provider};
 use prompt::{CV_SYSTEM_PROMPT, CvPromptBuilder};
 use tool::CvRigTool;
 
@@ -63,18 +61,23 @@ pub trait CvProvider: Send + Sync {
 ///    by the [`CvProvider`]) and then classify each detection into an
 ///    entity category and type.
 /// 4. Structured output is parsed into a `Vec<RawCvEntity>`.
-pub struct CvAgent<M: CompletionModel> {
-    base: BaseAgent<M>,
+pub struct CvAgent {
+    base: BaseAgent,
 }
 
-impl<M: CompletionModel> CvAgent<M> {
-    /// Create a new CV agent with the given model, config, and CV provider.
-    pub fn new(model: M, config: BaseAgentConfig, cv: impl CvProvider + 'static) -> Self {
-        let base = BaseAgent::builder(model, config)
+impl CvAgent {
+    /// Create a new CV agent with the given provider, model name, config, and CV provider.
+    pub fn new(
+        provider: &Provider,
+        model: &str,
+        config: BaseAgentConfig,
+        cv: impl CvProvider + 'static,
+    ) -> Result<Self, Error> {
+        let base = BaseAgent::builder(provider, model, config)
             .preamble(CV_SYSTEM_PROMPT)
             .tool(CvRigTool::new(cv))
-            .build();
-        Self { base }
+            .build()?;
+        Ok(Self { base })
     }
 
     /// Unique identifier for this agent instance (UUIDv7).
diff --git a/crates/nvisy-rig/src/agent/extract/mod.rs b/crates/nvisy-rig/src/agent/extract/mod.rs
index 15a03ba..01194c0 100644
--- a/crates/nvisy-rig/src/agent/extract/mod.rs
+++ b/crates/nvisy-rig/src/agent/extract/mod.rs
@@ -14,15 +14,13 @@ pub use output::{OcrOutput, RawOcrEntity};
 use async_trait::async_trait;
 use base64::Engine;
 use base64::engine::general_purpose::STANDARD;
-use rig::completion::CompletionModel;
 use serde::Serialize;
 use uuid::Uuid;
 
-use nvisy_core::Error;
-
 use crate::backend::{DetectionConfig, UsageTracker};
+use crate::error::Error;
 
-use super::{BaseAgent, BaseAgentConfig};
+use super::{BaseAgent, BaseAgentConfig, Provider};
 use prompt::{OCR_SYSTEM_PROMPT, OcrPromptBuilder};
 use tool::OcrRigTool;
 
@@ -66,18 +64,23 @@ pub trait OcrProvider: Send + Sync {
 /// 3. The VLM is instructed to call the `ocr_extract_text` tool (backed by
 ///    the [`OcrProvider`]) and then analyse the result for PII/PHI entities.
 /// 4. Structured output is parsed into [`OcrOutput`].
-pub struct OcrAgent<M: CompletionModel> {
-    base: BaseAgent<M>,
+pub struct OcrAgent {
+    base: BaseAgent,
 }
 
-impl<M: CompletionModel> OcrAgent<M> {
-    /// Create a new OCR agent with the given model, config, and OCR provider.
-    pub fn new(model: M, config: BaseAgentConfig, ocr: impl OcrProvider + 'static) -> Self {
-        let base = BaseAgent::builder(model, config)
+impl OcrAgent {
+    /// Create a new OCR agent with the given provider, model name, config, and OCR provider.
+    pub fn new(
+        provider: &Provider,
+        model: &str,
+        config: BaseAgentConfig,
+        ocr: impl OcrProvider + 'static,
+    ) -> Result<Self, Error> {
+        let base = BaseAgent::builder(provider, model, config)
             .preamble(OCR_SYSTEM_PROMPT)
             .tool(OcrRigTool::new(ocr))
-            .build();
-        Self { base }
+            .build()?;
+        Ok(Self { base })
     }
 
     /// Unique identifier for this agent instance (UUIDv7).
diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs
index ed2f21e..1cbcda2 100644
--- a/crates/nvisy-rig/src/agent/mod.rs
+++ b/crates/nvisy-rig/src/agent/mod.rs
@@ -3,13 +3,13 @@
 //! All public types are re-exported here — consumer code should not reach
 //! into individual agent submodules.
 
-mod base;
+pub(crate) mod base;
 mod detect;
 mod extract;
 mod recognize;
 
 pub(crate) use base::BaseAgent;
-pub use base::{BaseAgentConfig, ContextWindow};
+pub use base::{AuthenticatedProvider, BaseAgentConfig, ContextWindow, Provider, UnauthenticatedProvider};
 
 pub use recognize::{NerAgent, RawEntities, RawEntity};
 pub use extract::{OcrAgent, OcrOutput, OcrProvider, OcrTextRegion, RawOcrEntity};
diff --git a/crates/nvisy-rig/src/agent/recognize/mod.rs b/crates/nvisy-rig/src/agent/recognize/mod.rs
index 5f89735..8716da5 100644
--- a/crates/nvisy-rig/src/agent/recognize/mod.rs
+++ b/crates/nvisy-rig/src/agent/recognize/mod.rs
@@ -9,14 +9,12 @@ mod prompt;
 
 pub use output::{RawEntities, RawEntity};
 
-use rig::completion::CompletionModel;
 use uuid::Uuid;
 
-use nvisy_core::Error;
-
 use crate::backend::{DetectionConfig, UsageTracker};
+use crate::error::Error;
 
-use super::{BaseAgent, BaseAgentConfig};
+use super::{BaseAgent, BaseAgentConfig, Provider};
 use prompt::{NER_SYSTEM_PROMPT, NerPromptBuilder};
 
 /// Agent for textual PII/entity detection using LLM-based NER.
@@ -28,17 +26,17 @@ use prompt::{NER_SYSTEM_PROMPT, NerPromptBuilder};
 /// 2. The agent builds a user prompt via [`NerPromptBuilder`] that
 ///    specifies entity types and confidence thresholds.
 /// 3. Structured output is parsed into `Vec<RawEntity>`.
-pub struct NerAgent<M: CompletionModel> {
-    base: BaseAgent<M>,
+pub struct NerAgent {
+    base: BaseAgent,
 }
 
-impl<M: CompletionModel> NerAgent<M> {
-    /// Create a new NER agent with the given model and config.
-    pub fn new(model: M, config: BaseAgentConfig) -> Self {
-        let base = BaseAgent::builder(model, config)
+impl NerAgent {
+    /// Create a new NER agent with the given provider, model name, and config.
+    pub fn new(provider: &Provider, model: &str, config: BaseAgentConfig) -> Result<Self, Error> {
+        let base = BaseAgent::builder(provider, model, config)
             .preamble(NER_SYSTEM_PROMPT)
-            .build();
-        Self { base }
+            .build()?;
+        Ok(Self { base })
     }
 
     /// Unique identifier for this agent instance (UUIDv7).
diff --git a/crates/nvisy-rig/src/backend/retry.rs b/crates/nvisy-rig/src/backend/retry.rs
index 3de416e..18e9892 100644
--- a/crates/nvisy-rig/src/backend/retry.rs
+++ b/crates/nvisy-rig/src/backend/retry.rs
@@ -1,8 +1,9 @@
 //! Tower retry policy with exponential backoff.
 
-use std::time::Duration;
+use std::{pin::Pin, time::Duration};
 
 use nvisy_core::Error;
+use tower::retry::Policy;
 
 /// Tower retry policy with exponential backoff for retryable errors.
 ///
@@ -49,17 +50,13 @@ impl RetryPolicy {
     }
 }
 
-impl<Req, Res> tower::retry::Policy<Req, Res, Error> for RetryPolicy
+impl<Req, Res> Policy<Req, Res, Error> for RetryPolicy
 where
     Req: Clone,
 {
-    type Future = std::pin::Pin<Box<dyn std::future::Future<Output = ()> + Send>>;
+    type Future = Pin<Box<dyn std::future::Future<Output = ()> + Send>>;
 
-    fn retry(
-        &mut self,
-        _req: &mut Req,
-        result: &mut Result<Res, Error>,
-    ) -> Option<Self::Future> {
+    fn retry(&mut self, _req: &mut Req, result: &mut Result<Res, Error>) -> Option<Self::Future> {
         match result {
             Ok(_) => None,
             Err(err) => {
@@ -98,7 +95,7 @@ where
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::backend::{DetectionRequest, DetectionResponse, DetectionConfig};
+    use crate::backend::{DetectionConfig, DetectionRequest, DetectionResponse};
     use tower::retry::Policy;
 
     #[tokio::test]
diff --git a/crates/nvisy-rig/src/bridge/mod.rs b/crates/nvisy-rig/src/bridge/mod.rs
index d393a05..3c5b7ca 100644
--- a/crates/nvisy-rig/src/bridge/mod.rs
+++ b/crates/nvisy-rig/src/bridge/mod.rs
@@ -9,28 +9,22 @@ pub use response::{EntityParser, ResponseParser};
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
+use rig::client::CompletionClient;
 use rig::completion::CompletionModel;
+use rig::providers::{anthropic, gemini, ollama, openai};
 
-use nvisy_core::Error;
-
+use crate::agent::Provider;
+use crate::agent::base::provider::ProviderClient;
 use crate::backend::{DetectionRequest, DetectionResponse, RetryPolicy, UsageTracker};
-use crate::error::Error as RigError;
+use crate::error::Error;
 
 /// Configuration for [`ServiceBackend`] (and its [`RigBackend`] specialisation).
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Default)]
 pub struct RigBackendConfig {
     /// Retry policy for transient errors.
     pub retry: RetryPolicy,
 }
 
-impl Default for RigBackendConfig {
-    fn default() -> Self {
-        Self {
-            retry: RetryPolicy::new(),
-        }
-    }
-}
-
 /// Generic Tower service adapter.
 ///
 /// Wraps any inner service `S` with a retry policy and usage tracking.
@@ -65,12 +59,12 @@ impl<S> ServiceBackend<S> {
 
 impl<S> tower::Service<DetectionRequest> for ServiceBackend<S>
 where
-    S: tower::Service<DetectionRequest, Response = DetectionResponse, Error = Error>,
+    S: tower::Service<DetectionRequest, Response = DetectionResponse, Error = nvisy_core::Error>,
     S::Future: Send + 'static,
 {
     type Response = DetectionResponse;
-    type Error = Error;
-    type Future = std::pin::Pin<Box<dyn std::future::Future<Output = Result<DetectionResponse, Error>> + Send>>;
+    type Error = nvisy_core::Error;
+    type Future = std::pin::Pin<Box<dyn std::future::Future<Output = Result<DetectionResponse, nvisy_core::Error>> + Send>>;
 
     fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
         self.inner.poll_ready(cx)
@@ -101,23 +95,49 @@ where
     }
 }
 
-/// Inner service that drives a raw rig-core [`CompletionModel`].
+enum InnerModel {
+    OpenAi(Arc<openai::completion::CompletionModel>),
+    Anthropic(Arc<anthropic::completion::CompletionModel>),
+    Gemini(Arc<gemini::completion::CompletionModel>),
+    Ollama(Arc<ollama::CompletionModel>),
+}
+
+impl InnerModel {
+    fn clone_arc(&self) -> Self {
+        match self {
+            Self::OpenAi(m) => Self::OpenAi(Arc::clone(m)),
+            Self::Anthropic(m) => Self::Anthropic(Arc::clone(m)),
+            Self::Gemini(m) => Self::Gemini(Arc::clone(m)),
+            Self::Ollama(m) => Self::Ollama(Arc::clone(m)),
+        }
+    }
+}
+
+macro_rules! dispatch_model {
+    ($inner:expr, |$model:ident| $body:expr) => {
+        match $inner {
+            InnerModel::OpenAi($model) => $body,
+            InnerModel::Anthropic($model) => $body,
+            InnerModel::Gemini($model) => $body,
+            InnerModel::Ollama($model) => $body,
+        }
+    };
+}
+
+/// Inner service that drives a rig-core completion model.
 ///
 /// This is the low-level service that constructs prompts and parses
 /// responses. Wrap it in [`ServiceBackend`] for retry and usage tracking.
-pub struct RigBackendInner<M> {
-    model: Arc<M>,
+pub struct RigBackendInner {
+    model: InnerModel,
     temperature: f64,
     max_tokens: u64,
 }
 
-impl<M> tower::Service<DetectionRequest> for RigBackendInner<M>
-where
-    M: CompletionModel + Send + Sync + 'static,
-{
+impl tower::Service<DetectionRequest> for RigBackendInner {
     type Response = DetectionResponse;
-    type Error = Error;
-    type Future = std::pin::Pin<Box<dyn std::future::Future<Output = Result<DetectionResponse, Error>> + Send>>;
+    type Error = nvisy_core::Error;
+    type Future = std::pin::Pin<Box<dyn std::future::Future<Output = Result<DetectionResponse, nvisy_core::Error>> + Send>>;
 
     fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
         Poll::Ready(Ok(()))
@@ -126,7 +146,7 @@ where
     fn call(&mut self, req: DetectionRequest) -> Self::Future {
         let user_prompt = PromptBuilder::new(&req.config).build(&req.text);
         let system_prompt = req.config.system_prompt.clone();
-        let model = Arc::clone(&self.model);
+        let model = self.model.clone_arc();
         let temperature = self.temperature;
         let max_tokens = self.max_tokens;
 
@@ -134,44 +154,63 @@ where
             let span = tracing::info_span!("rig_backend_call");
             let _enter = span.enter();
 
-            let mut builder = model
-                .completion_request(&user_prompt)
-                .temperature(temperature)
-                .max_tokens(max_tokens);
+            let (parsed, usage) = dispatch_model!(&model, |model| {
+                let mut builder = model
+                    .completion_request(&user_prompt)
+                    .temperature(temperature)
+                    .max_tokens(max_tokens);
 
-            if let Some(ref preamble) = system_prompt {
-                builder = builder.preamble(preamble.clone());
-            }
+                if let Some(ref preamble) = system_prompt {
+                    builder = builder.preamble(preamble.clone());
+                }
+
+                let response = builder.send().await.map_err(|e| {
+                    nvisy_core::Error::from(Error::from(e))
+                })?;
+                let text = ResponseParser::extract_text(&response)
+                    .map_err(nvisy_core::Error::from)?;
+                Ok::<_, nvisy_core::Error>((text, response.usage))
+            })?;
 
-            let response = builder.send().await.map_err(|e| Error::from(RigError::from(e)))?;
-            let parsed = ResponseParser::extract_text(&response)?;
-            let entities = parsed.parse_json()?;
+            let entities = parsed.parse_json().map_err(nvisy_core::Error::from)?;
 
             Ok(DetectionResponse {
                 entities,
-                usage: Some(response.usage),
+                usage: Some(usage),
             })
         })
     }
 }
 
-/// Production detection service wrapping a rig-core [`CompletionModel`].
+/// Production detection service wrapping a rig-core completion model.
 ///
-/// This is a convenience alias for `ServiceBackend<RigBackendInner<M>>`.
-/// Use [`RigBackend::from_model`] to construct one.
-pub type RigBackend<M> = ServiceBackend<RigBackendInner<M>>;
-
-impl<M: CompletionModel> RigBackend<M> {
-    /// Create a new backend with the given model and configuration.
-    ///
-    /// Temperature and max_tokens are configured on the inner model service.
-    /// The [`RigBackendConfig`] controls retry policy.
-    pub fn from_model(model: M, temperature: f64, max_tokens: u64, config: RigBackendConfig) -> Self {
+/// This is a convenience alias for `ServiceBackend<RigBackendInner>`.
+/// Use [`RigBackend::from_provider`] to construct one.
+pub type RigBackend = ServiceBackend<RigBackendInner>;
+
+impl RigBackend {
+    /// Create a new backend from a provider, model name, and configuration.
+    pub fn from_provider(
+        provider: &Provider,
+        model_name: &str,
+        temperature: f64,
+        max_tokens: u64,
+        config: RigBackendConfig,
+    ) -> Result<Self, Error> {
+        let client = ProviderClient::from_provider(provider)?;
+        let model = match client {
+            ProviderClient::OpenAi(c) => InnerModel::OpenAi(Arc::new(c.completion_model(model_name))),
+            ProviderClient::Anthropic(c) => InnerModel::Anthropic(Arc::new(c.completion_model(model_name))),
+            ProviderClient::Gemini(c) => InnerModel::Gemini(Arc::new(c.completion_model(model_name))),
+            ProviderClient::Ollama(c) => InnerModel::Ollama(Arc::new(c.completion_model(model_name))),
+        };
+
         let inner = RigBackendInner {
-            model: Arc::new(model),
+            model,
             temperature,
             max_tokens,
         };
-        ServiceBackend::new(inner, config)
+
+        Ok(ServiceBackend::new(inner, config))
     }
 }
diff --git a/crates/nvisy-rig/src/bridge/response.rs b/crates/nvisy-rig/src/bridge/response.rs
index c217373..275a4ca 100644
--- a/crates/nvisy-rig/src/bridge/response.rs
+++ b/crates/nvisy-rig/src/bridge/response.rs
@@ -8,10 +8,11 @@ use serde_json::Value;
 
 use rig::completion::{AssistantContent, CompletionResponse};
 
-use nvisy_core::Error;
 use nvisy_ontology::entity::{DetectionMethod, Entity, EntityCategory, EntityKind};
 use nvisy_ontology::location::{Location, TextLocation};
 
+use crate::error::Error;
+
 /// Extracted text from an LLM completion response.
 ///
 /// Wraps the raw text content and provides parsing accessors.
@@ -32,10 +33,8 @@ impl<'a> ResponseParser<'a> {
             .collect();
 
         if texts.is_empty() {
-            return Err(Error::runtime(
-                "LLM response contained no text content",
-                "rig",
-                false,
+            return Err(Error::Response(
+                "LLM response contained no text content".to_string(),
             ));
         }
 
@@ -54,6 +53,11 @@ impl<'a> ResponseParser<'a> {
         &self.text
     }
 
+    /// Consume the parser and return the owned text.
+    pub fn into_string(self) -> String {
+        self.text.into_owned()
+    }
+
     /// Parse the text as JSON into `T`.
     ///
     /// Strips markdown fences if present, then deserializes.
@@ -73,11 +77,10 @@ impl<'a> ResponseParser<'a> {
         let json_str = extract_fenced_json(trimmed).unwrap_or(trimmed);
 
         serde_json::from_str::<T>(json_str).map_err(|e| {
-            Error::runtime(
-                format!("Failed to parse LLM response as JSON: {e}: {}", truncate(trimmed, 200)),
-                "rig",
-                false,
-            )
+            Error::Response(format!(
+                "Failed to parse LLM response as JSON: {e}: {}",
+                truncate(trimmed, 200),
+            ))
         })
     }
 }
@@ -97,13 +100,13 @@ impl EntityParser {
 
         for item in raw {
             let obj = item.as_object().ok_or_else(|| {
-                Error::validation("Expected JSON object in LLM results".to_string(), "llm-parse")
+                Error::Validation("Expected JSON object in LLM results".to_string())
             })?;
 
             let category_str = obj
                 .get("category")
                 .and_then(Value::as_str)
-                .ok_or_else(|| Error::validation("Missing 'category'".to_string(), "llm-parse"))?;
+                .ok_or_else(|| Error::Validation("Missing 'category'".to_string()))?;
 
             let category = match category_str {
                 "pii" => EntityCategory::Pii,
@@ -117,7 +120,7 @@ impl EntityParser {
                 .get("entity_type")
                 .and_then(Value::as_str)
                 .ok_or_else(|| {
-                    Error::validation("Missing 'entity_type'".to_string(), "llm-parse")
+                    Error::Validation("Missing 'entity_type'".to_string())
                 })?;
 
             let entity_kind = match EntityKind::from_str(entity_type_str) {
@@ -134,13 +137,13 @@ impl EntityParser {
             let value = obj
                 .get("value")
                 .and_then(Value::as_str)
-                .ok_or_else(|| Error::validation("Missing 'value'".to_string(), "llm-parse"))?;
+                .ok_or_else(|| Error::Validation("Missing 'value'".to_string()))?;
 
             let confidence = obj
                 .get("confidence")
                 .and_then(Value::as_f64)
                 .ok_or_else(|| {
-                    Error::validation("Missing 'confidence'".to_string(), "llm-parse")
+                    Error::Validation("Missing 'confidence'".to_string())
                 })?;
 
             let start_offset = obj
diff --git a/crates/nvisy-rig/src/error.rs b/crates/nvisy-rig/src/error.rs
index f03513a..b46c970 100644
--- a/crates/nvisy-rig/src/error.rs
+++ b/crates/nvisy-rig/src/error.rs
@@ -1,6 +1,6 @@
 //! Error types for the rig crate.
 
-use rig::completion::{CompletionError, PromptError};
+use rig::completion::{CompletionError, PromptError, StructuredOutputError};
 
 /// Errors produced by rig-core LLM interactions.
 #[derive(Debug, thiserror::Error)]
@@ -36,6 +36,22 @@ pub enum Error {
     /// The prompt was cancelled.
     #[error("Prompt cancelled: {0}")]
     Cancelled(String),
+
+    /// A validation or parse failure.
+    #[error("{0}")]
+    Validation(String),
+
+    /// Wraps `nvisy_core::Error` from provider implementations.
+    #[error(transparent)]
+    Core(#[from] nvisy_core::Error),
+
+    /// Structured output failed (prompt error or deserialization).
+    #[error("Structured output error: {0}")]
+    StructuredOutput(String),
+
+    /// Failed to construct a provider client.
+    #[error("Client error: {0}")]
+    Client(String),
 }
 
 impl Error {
@@ -74,8 +90,30 @@ impl From<PromptError> for Error {
     }
 }
 
+impl From<StructuredOutputError> for Error {
+    fn from(err: StructuredOutputError) -> Self {
+        match err {
+            StructuredOutputError::PromptError(e) => Self::from(e),
+            StructuredOutputError::DeserializationError(e) => {
+                Self::StructuredOutput(e.to_string())
+            }
+            StructuredOutputError::EmptyResponse => {
+                Self::StructuredOutput("model returned no content".to_string())
+            }
+        }
+    }
+}
+
 impl From<Error> for nvisy_core::Error {
     fn from(err: Error) -> Self {
+        // Handle the owned `Core` variant first to avoid borrowing issues.
+        if matches!(&err, Error::Core(_)) {
+            return match err {
+                Error::Core(inner) => inner,
+                _ => unreachable!(),
+            };
+        }
+
         match &err {
             Error::Http(_) => {
                 nvisy_core::Error::connection(err.to_string(), "rig", true)
@@ -88,15 +126,19 @@ impl From<Error> for nvisy_core::Error {
                 let retryable = is_retryable_provider_error(msg);
                 nvisy_core::Error::connection(err.to_string(), "rig", retryable)
             }
-            Error::Response(_) => {
+            Error::Response(_) | Error::StructuredOutput(_) => {
                 nvisy_core::Error::runtime(err.to_string(), "rig", false)
             }
-            Error::Request(_) => {
+            Error::Request(_) | Error::Validation(_) => {
                 nvisy_core::Error::validation(err.to_string(), "rig")
             }
             Error::Tool(_) | Error::MaxTurns(_) | Error::Cancelled(_) => {
                 nvisy_core::Error::runtime(err.to_string(), "rig", false)
             }
+            Error::Client(_) => {
+                nvisy_core::Error::connection(err.to_string(), "rig", false)
+            }
+            Error::Core(_) => unreachable!(),
         }
     }
 }
diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs
index fe5f2dc..824a0f0 100644
--- a/crates/nvisy-rig/src/lib.rs
+++ b/crates/nvisy-rig/src/lib.rs
@@ -12,9 +12,11 @@ pub mod prelude;
 
 pub use backend::{DetectionConfig, DetectionRequest, DetectionResponse};
 pub use bridge::{EntityParser, RigBackend, RigBackendConfig, ServiceBackend};
+pub use error::Error;
 
 pub use agent::{
-    BaseAgentConfig, ContextWindow,
+    AuthenticatedProvider, BaseAgentConfig, ContextWindow, Provider,
+    UnauthenticatedProvider,
     CvAgent, CvDetection, CvProvider, NerAgent,
     OcrAgent, OcrOutput, OcrProvider, OcrTextRegion,
     RawCvEntities, RawCvEntity, RawEntities, RawEntity,
diff --git a/crates/nvisy-rig/src/prelude.rs b/crates/nvisy-rig/src/prelude.rs
index 3874b44..ab04a77 100644
--- a/crates/nvisy-rig/src/prelude.rs
+++ b/crates/nvisy-rig/src/prelude.rs
@@ -5,8 +5,10 @@ pub use crate::backend::{
     RetryPolicy, UsageStats, UsageTracker,
 };
 pub use crate::bridge::{EntityParser, RigBackend, RigBackendConfig, ServiceBackend};
+pub use crate::error::Error;
 pub use crate::agent::{
-    BaseAgentConfig, ContextWindow,
+    AuthenticatedProvider, BaseAgentConfig, ContextWindow, Provider,
+    UnauthenticatedProvider,
     CvAgent, CvDetection, CvProvider, NerAgent,
     OcrAgent, OcrOutput, OcrProvider, OcrTextRegion,
     RawCvEntities, RawCvEntity, RawEntities, RawEntity,

From ed245c3f85236baf1de6fd1aca4d037e5ae27a3e Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Thu, 26 Feb 2026 00:58:46 +0100
Subject: [PATCH 16/24] refactor(rig): remove Tower, use reqwest-middleware for
 HTTP retries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace Tower service layer with reqwest-middleware + reqwest-retry for
transparent HTTP-level retries. Delete ServiceBackend, RigBackend,
RetryPolicy, and dispatch_model! macro. Replace tower::Service bound in
nvisy-identify with LlmBackend async trait. Rename agent submodules:
detect→cv, extract→ocr, recognize→ner.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 Cargo.lock                                    |  64 +++++-
 Cargo.toml                                    |   4 +
 crates/nvisy-identify/Cargo.toml              |   1 -
 crates/nvisy-identify/src/lib.rs              |   2 +-
 crates/nvisy-identify/src/llm/detection.rs    |  82 +++----
 crates/nvisy-identify/src/llm/mod.rs          |   2 +-
 crates/nvisy-rig/Cargo.toml                   |   7 +-
 crates/nvisy-rig/src/agent/base/builder.rs    |   6 +-
 crates/nvisy-rig/src/agent/base/dispatch.rs   |  10 +-
 crates/nvisy-rig/src/agent/base/provider.rs   |  58 ++++-
 .../nvisy-rig/src/agent/{detect => cv}/mod.rs |   0
 .../src/agent/{detect => cv}/output.rs        |   0
 .../src/agent/{detect => cv}/prompt.rs        |   0
 .../src/agent/{detect => cv}/tool.rs          |   0
 crates/nvisy-rig/src/agent/mod.rs             |  12 +-
 .../src/agent/{recognize => ner}/mod.rs       |   0
 .../src/agent/{recognize => ner}/output.rs    |   0
 .../src/agent/{recognize => ner}/prompt.rs    |   0
 .../src/agent/{extract => ocr}/mod.rs         |   0
 .../src/agent/{extract => ocr}/output.rs      |   0
 .../src/agent/{extract => ocr}/prompt.rs      |   0
 .../src/agent/{extract => ocr}/tool.rs        |   0
 crates/nvisy-rig/src/backend/mod.rs           |   8 +-
 crates/nvisy-rig/src/backend/retry.rs         | 156 -------------
 crates/nvisy-rig/src/bridge/mod.rs            | 213 +-----------------
 crates/nvisy-rig/src/lib.rs                   |   2 +-
 crates/nvisy-rig/src/prelude.rs               |   4 +-
 27 files changed, 182 insertions(+), 449 deletions(-)
 rename crates/nvisy-rig/src/agent/{detect => cv}/mod.rs (100%)
 rename crates/nvisy-rig/src/agent/{detect => cv}/output.rs (100%)
 rename crates/nvisy-rig/src/agent/{detect => cv}/prompt.rs (100%)
 rename crates/nvisy-rig/src/agent/{detect => cv}/tool.rs (100%)
 rename crates/nvisy-rig/src/agent/{recognize => ner}/mod.rs (100%)
 rename crates/nvisy-rig/src/agent/{recognize => ner}/output.rs (100%)
 rename crates/nvisy-rig/src/agent/{recognize => ner}/prompt.rs (100%)
 rename crates/nvisy-rig/src/agent/{extract => ocr}/mod.rs (100%)
 rename crates/nvisy-rig/src/agent/{extract => ocr}/output.rs (100%)
 rename crates/nvisy-rig/src/agent/{extract => ocr}/prompt.rs (100%)
 rename crates/nvisy-rig/src/agent/{extract => ocr}/tool.rs (100%)
 delete mode 100644 crates/nvisy-rig/src/backend/retry.rs

diff --git a/Cargo.lock b/Cargo.lock
index 2e11dd2..8ac92b9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2777,7 +2777,6 @@ dependencies = [
  "serde_json",
  "strum",
  "tokio",
- "tower",
  "tracing",
  "uuid",
 ]
@@ -2844,13 +2843,14 @@ dependencies = [
  "base64",
  "nvisy-core",
  "nvisy-ontology",
+ "reqwest-middleware",
+ "reqwest-retry",
  "rig-core",
  "schemars",
  "serde",
  "serde_json",
  "thiserror 2.0.18",
  "tokio",
- "tower",
  "tracing",
  "uuid",
 ]
@@ -3681,6 +3681,51 @@ dependencies = [
  "web-sys",
 ]
 
+[[package]]
+name = "reqwest-middleware"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "199dda04a536b532d0cc04d7979e39b1c763ea749bf91507017069c00b96056f"
+dependencies = [
+ "anyhow",
+ "async-trait",
+ "http",
+ "reqwest",
+ "serde",
+ "thiserror 2.0.18",
+ "tower-service",
+]
+
+[[package]]
+name = "reqwest-retry"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe2412db2af7d2268e7a5406be0431f37d9eb67ff390f35b395716f5f06c2eaa"
+dependencies = [
+ "anyhow",
+ "async-trait",
+ "futures",
+ "getrandom 0.2.17",
+ "http",
+ "hyper",
+ "reqwest",
+ "reqwest-middleware",
+ "retry-policies",
+ "thiserror 2.0.18",
+ "tokio",
+ "tracing",
+ "wasmtimer",
+]
+
+[[package]]
+name = "retry-policies"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46a4bd6027df676bcb752d3724db0ea3c0c5fc1dd0376fec51ac7dcaf9cc69be"
+dependencies = [
+ "rand 0.9.2",
+]
+
 [[package]]
 name = "rgb"
 version = "0.8.52"
@@ -3709,6 +3754,7 @@ dependencies = [
  "ordered-float",
  "pin-project-lite",
  "reqwest",
+ "reqwest-middleware",
  "rig-derive",
  "schemars",
  "serde",
@@ -5034,6 +5080,20 @@ dependencies = [
  "semver",
 ]
 
+[[package]]
+name = "wasmtimer"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1c598d6b99ea013e35844697fc4670d08339d5cda15588f193c6beedd12f644b"
+dependencies = [
+ "futures",
+ "js-sys",
+ "parking_lot",
+ "pin-utils",
+ "slab",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "web-sys"
 version = "0.3.85"
diff --git a/Cargo.toml b/Cargo.toml
index 9518f29..25da0c3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -53,6 +53,10 @@ nvisy-server = { path = "./crates/nvisy-server", version = "0.1.0" }
 # LLM framework
 rig-core = { version = "0.31", features = [] }
 
+# HTTP middleware
+reqwest-middleware = { version = "0.5" }
+reqwest-retry = { version = "0.9" }
+
 # Async runtime
 tokio = { version = "1.0", features = [] }
 tokio-util = { version = "0.7", features = [] }
diff --git a/crates/nvisy-identify/Cargo.toml b/crates/nvisy-identify/Cargo.toml
index 316e3bb..3019af4 100644
--- a/crates/nvisy-identify/Cargo.toml
+++ b/crates/nvisy-identify/Cargo.toml
@@ -44,7 +44,6 @@ schemars = { workspace = true, features = [] }
 # Async runtime
 tokio = { workspace = true, features = ["sync"] }
 async-trait = { workspace = true, features = [] }
-tower = { workspace = true, features = ["util"] }
 
 # Primitive datatypes
 uuid = { workspace = true, features = ["v4"] }
diff --git a/crates/nvisy-identify/src/lib.rs b/crates/nvisy-identify/src/lib.rs
index cde8d5b..5825c2a 100644
--- a/crates/nvisy-identify/src/lib.rs
+++ b/crates/nvisy-identify/src/lib.rs
@@ -28,7 +28,7 @@ pub use pattern::{PatternDetection, PatternDetectionParams};
 pub use ner::{NerDetection, NerDetectionParams};
 pub use ner::ImageNerDetection;
 pub use vision::{FaceBackend, FaceDetection, ObjectBackend, ObjectDetection, OcrDetection};
-pub use llm::{LlmDetection, LlmDetectionParams, user_prompt as llm_user_prompt};
+pub use llm::{LlmBackend, LlmDetection, LlmDetectionParams, user_prompt as llm_user_prompt};
 pub use audio::TranscriptNerDetection;
 
 // --- Post-detection actions ---
diff --git a/crates/nvisy-identify/src/llm/detection.rs b/crates/nvisy-identify/src/llm/detection.rs
index 1fbb415..28ccbe0 100644
--- a/crates/nvisy-identify/src/llm/detection.rs
+++ b/crates/nvisy-identify/src/llm/detection.rs
@@ -6,7 +6,6 @@
 
 use serde::Deserialize;
 use tokio::sync::Mutex;
-use tower::Service;
 
 use nvisy_codec::handler::{Span, TxtSpan};
 use nvisy_ontology::entity::EntityKind;
@@ -22,6 +21,13 @@ fn default_confidence() -> f64 {
     0.5
 }
 
+/// Async backend trait replacing the former `tower::Service` bound.
+#[async_trait::async_trait]
+pub trait LlmBackend: Send + Sync + 'static {
+    /// Run a detection request and return the response.
+    async fn detect(&self, req: DetectionRequest) -> Result<DetectionResponse, Error>;
+}
+
 /// Typed parameters for [`LlmDetection`].
 #[derive(Debug, Deserialize)]
 #[serde(rename_all = "camelCase")]
@@ -46,21 +52,18 @@ struct LlmState {
     prior_text: String,
 }
 
-/// LLM contextual detection layer — delegates to a Tower [`Service`].
+/// LLM contextual detection layer — delegates to an [`LlmBackend`].
 ///
 /// Uses [`SequentialContext`]: the orchestrator feeds one span at a
 /// time so the layer can carry sliding context between spans.
 pub struct LlmDetection<B> {
-    backend: Mutex<B>,
+    backend: B,
     config: DetectionConfig,
     model_info: Option<ModelInfo>,
     state: Mutex<LlmState>,
 }
 
-impl<B> LlmDetection<B>
-where
-    B: Service<DetectionRequest, Response = DetectionResponse, Error = Error> + Send + 'static,
-{
+impl<B: LlmBackend> LlmDetection<B> {
     /// Create a new detection layer with the given backend and params.
     pub fn new(backend: B, params: LlmDetectionParams) -> Self {
         let system_prompt = params.system_prompt.unwrap_or_else(|| {
@@ -72,7 +75,7 @@ where
             system_prompt: Some(system_prompt),
         };
         Self {
-            backend: Mutex::new(backend),
+            backend,
             config,
             model_info: params.model_info,
             state: Mutex::new(LlmState {
@@ -89,11 +92,7 @@ where
 }
 
 #[async_trait::async_trait]
-impl<B> DetectionService<TxtSpan, String> for LlmDetection<B>
-where
-    B: Service<DetectionRequest, Response = DetectionResponse, Error = Error> + Send + 'static,
-    B::Future: Send,
-{
+impl<B: LlmBackend> DetectionService<TxtSpan, String> for LlmDetection<B> {
     type Context = SequentialContext;
 
     async fn detect(
@@ -116,14 +115,11 @@ where
                 }
             };
 
-            let response = {
-                let mut backend = self.backend.lock().await;
-                let req = DetectionRequest {
-                    text: full_text,
-                    config: self.config.clone(),
-                };
-                backend.call(req).await?
+            let req = DetectionRequest {
+                text: full_text,
+                config: self.config.clone(),
             };
+            let response = self.backend.detect(req).await?;
 
             // Filter entities to the current span and adjust offsets.
             let span_len = span.data.len();
@@ -173,38 +169,28 @@ where
 #[cfg(test)]
 mod tests {
     use super::*;
-    use serde_json::{json, Value};
-    use std::task::{Context, Poll};
+    use serde_json::json;
 
     struct MockLlmBackend;
 
-    impl Service<DetectionRequest> for MockLlmBackend {
-        type Response = DetectionResponse;
-        type Error = Error;
-        type Future = std::pin::Pin<Box<dyn std::future::Future<Output = Result<DetectionResponse, Error>> + Send>>;
-
-        fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
-            Poll::Ready(Ok(()))
-        }
-
-        fn call(&mut self, req: DetectionRequest) -> Self::Future {
+    #[async_trait::async_trait]
+    impl LlmBackend for MockLlmBackend {
+        async fn detect(&self, req: DetectionRequest) -> Result<DetectionResponse, Error> {
             let text = req.text;
-            Box::pin(async move {
-                let mut results = Vec::new();
-                if let Some(pos) = text.find("SECRET") {
-                    results.push(json!({
-                        "category": "credentials",
-                        "entity_type": "api_key",
-                        "value": "SECRET",
-                        "confidence": 0.92,
-                        "start_offset": pos,
-                        "end_offset": pos + 6
-                    }));
-                }
-                Ok(DetectionResponse {
-                    entities: results,
-                    usage: None,
-                })
+            let mut results = Vec::new();
+            if let Some(pos) = text.find("SECRET") {
+                results.push(json!({
+                    "category": "credentials",
+                    "entity_type": "api_key",
+                    "value": "SECRET",
+                    "confidence": 0.92,
+                    "start_offset": pos,
+                    "end_offset": pos + 6
+                }));
+            }
+            Ok(DetectionResponse {
+                entities: results,
+                usage: None,
             })
         }
     }
diff --git a/crates/nvisy-identify/src/llm/mod.rs b/crates/nvisy-identify/src/llm/mod.rs
index dbbc0eb..8d40832 100644
--- a/crates/nvisy-identify/src/llm/mod.rs
+++ b/crates/nvisy-identify/src/llm/mod.rs
@@ -3,5 +3,5 @@
 pub mod detection;
 pub mod prompt;
 
-pub use detection::{LlmDetection, LlmDetectionParams};
+pub use detection::{LlmBackend, LlmDetection, LlmDetectionParams};
 pub use prompt::user_prompt;
diff --git a/crates/nvisy-rig/Cargo.toml b/crates/nvisy-rig/Cargo.toml
index 4b93770..5f1b3f1 100644
--- a/crates/nvisy-rig/Cargo.toml
+++ b/crates/nvisy-rig/Cargo.toml
@@ -25,12 +25,15 @@ nvisy-core = { workspace = true, features = [] }
 nvisy-ontology = { workspace = true, features = [] }
 
 # LLM framework
-rig-core = { workspace = true, features = ["derive"] }
+rig-core = { workspace = true, features = ["derive", "reqwest-middleware"] }
 
 # Async runtime
 async-trait = { workspace = true, features = [] }
 tokio = { workspace = true, features = ["time"] }
-tower = { workspace = true, features = ["retry", "timeout", "util"] }
+
+# HTTP middleware (retry)
+reqwest-middleware = { workspace = true }
+reqwest-retry = { workspace = true }
 
 # Encoding
 base64 = { workspace = true, features = [] }
diff --git a/crates/nvisy-rig/src/agent/base/builder.rs b/crates/nvisy-rig/src/agent/base/builder.rs
index b06bb5f..1c39521 100644
--- a/crates/nvisy-rig/src/agent/base/builder.rs
+++ b/crates/nvisy-rig/src/agent/base/builder.rs
@@ -4,6 +4,7 @@
 use rig::agent::AgentBuilder;
 use rig::client::CompletionClient;
 use rig::completion::CompletionModel;
+use rig::providers::gemini;
 use rig::tool::{Tool, ToolDyn};
 use uuid::Uuid;
 
@@ -68,7 +69,10 @@ impl BaseAgentBuilder {
                 Agents::Anthropic(build_rig_agent(c.completion_model(&model_name), &config, preamble_ref, tools))
             }
             ProviderClient::Gemini(c) => {
-                Agents::Gemini(build_rig_agent(c.completion_model(&model_name), &config, preamble_ref, tools))
+                // rig-core 0.31: Gemini's Capabilities impl doesn't propagate H,
+                // so CompletionClient is unavailable for non-default H.
+                let model = gemini::completion::CompletionModel::new(c, &model_name);
+                Agents::Gemini(build_rig_agent(model, &config, preamble_ref, tools))
             }
             ProviderClient::Ollama(c) => {
                 Agents::Ollama(build_rig_agent(c.completion_model(&model_name), &config, preamble_ref, tools))
diff --git a/crates/nvisy-rig/src/agent/base/dispatch.rs b/crates/nvisy-rig/src/agent/base/dispatch.rs
index a3f842b..c54da69 100644
--- a/crates/nvisy-rig/src/agent/base/dispatch.rs
+++ b/crates/nvisy-rig/src/agent/base/dispatch.rs
@@ -7,11 +7,13 @@
 use rig::agent::Agent;
 use rig::providers::{anthropic, gemini, ollama, openai};
 
+use super::provider::HttpClient;
+
 pub(crate) enum Agents {
-    OpenAi(Agent<openai::completion::CompletionModel>),
-    Anthropic(Agent<anthropic::completion::CompletionModel>),
-    Gemini(Agent<gemini::completion::CompletionModel>),
-    Ollama(Agent<ollama::CompletionModel>),
+    OpenAi(Agent<openai::completion::CompletionModel<HttpClient>>),
+    Anthropic(Agent<anthropic::completion::CompletionModel<HttpClient>>),
+    Gemini(Agent<gemini::completion::CompletionModel<HttpClient>>),
+    Ollama(Agent<ollama::CompletionModel<HttpClient>>),
 }
 
 /// Dispatch a call to the concrete agent inside each variant.
diff --git a/crates/nvisy-rig/src/agent/base/provider.rs b/crates/nvisy-rig/src/agent/base/provider.rs
index 39f93ac..66bc930 100644
--- a/crates/nvisy-rig/src/agent/base/provider.rs
+++ b/crates/nvisy-rig/src/agent/base/provider.rs
@@ -3,22 +3,34 @@
 //! [`Provider`] is a plain data enum carrying API keys and optional base
 //! URLs. Client construction is deferred until an agent or backend is built.
 
+use reqwest_middleware::ClientBuilder;
+use reqwest_retry::{RetryTransientMiddleware, policies::ExponentialBackoff};
 use rig::client::Nothing;
 use rig::providers::{anthropic, gemini, ollama, openai};
 
 use crate::error::Error;
 
+/// HTTP client type used by all rig provider clients.
+pub(crate) type HttpClient = reqwest_middleware::ClientWithMiddleware;
+
+/// Default number of retries for transient HTTP errors.
+const DEFAULT_MAX_RETRIES: u32 = 3;
+
 /// Provider that requires an API key (OpenAI, Anthropic, Gemini).
 #[derive(Clone)]
 pub struct AuthenticatedProvider {
     pub api_key: String,
     pub base_url: Option<String>,
+    /// Maximum retries for transient HTTP errors.
+    pub max_retries: u32,
 }
 
 /// Provider that does not require an API key (Ollama).
 #[derive(Clone)]
 pub struct UnauthenticatedProvider {
     pub base_url: Option<String>,
+    /// Maximum retries for transient HTTP errors.
+    pub max_retries: u32,
 }
 
 /// Supported LLM providers.
@@ -49,6 +61,7 @@ impl Provider {
         Self::OpenAi(AuthenticatedProvider {
             api_key: api_key.to_owned(),
             base_url: None,
+            max_retries: DEFAULT_MAX_RETRIES,
         })
     }
 
@@ -57,6 +70,7 @@ impl Provider {
         Self::Anthropic(AuthenticatedProvider {
             api_key: api_key.to_owned(),
             base_url: None,
+            max_retries: DEFAULT_MAX_RETRIES,
         })
     }
 
@@ -65,35 +79,52 @@ impl Provider {
         Self::Gemini(AuthenticatedProvider {
             api_key: api_key.to_owned(),
             base_url: None,
+            max_retries: DEFAULT_MAX_RETRIES,
         })
     }
 
     /// Create an Ollama provider using the default local URL.
     pub fn ollama() -> Self {
-        Self::Ollama(UnauthenticatedProvider { base_url: None })
+        Self::Ollama(UnauthenticatedProvider {
+            base_url: None,
+            max_retries: DEFAULT_MAX_RETRIES,
+        })
     }
 
     /// Create an Ollama provider with a custom base URL.
     pub fn ollama_with_url(url: &str) -> Self {
         Self::Ollama(UnauthenticatedProvider {
             base_url: Some(url.to_owned()),
+            max_retries: DEFAULT_MAX_RETRIES,
         })
     }
 }
 
+/// Build a `ClientWithMiddleware` with retry middleware.
+fn build_http_client(max_retries: u32) -> HttpClient {
+    let retry_policy = ExponentialBackoff::builder()
+        .build_with_max_retries(max_retries);
+    ClientBuilder::new(reqwest_middleware::reqwest::Client::new())
+        .with(RetryTransientMiddleware::new_with_policy(retry_policy))
+        .build()
+}
+
 /// Internal helper — builds a concrete rig client from connection params.
 pub(crate) enum ProviderClient {
-    OpenAi(openai::CompletionsClient),
-    Anthropic(anthropic::Client),
-    Gemini(gemini::Client),
-    Ollama(ollama::Client),
+    OpenAi(openai::CompletionsClient<HttpClient>),
+    Anthropic(anthropic::Client<HttpClient>),
+    Gemini(gemini::Client<HttpClient>),
+    Ollama(ollama::Client<HttpClient>),
 }
 
 impl ProviderClient {
     pub(crate) fn from_provider(provider: &Provider) -> Result<Self, Error> {
         match provider {
             Provider::OpenAi(p) => {
-                let mut builder = openai::Client::builder().api_key(&p.api_key);
+                let http_client = build_http_client(p.max_retries);
+                let mut builder = openai::Client::<HttpClient>::builder()
+                    .api_key(&p.api_key)
+                    .http_client(http_client);
                 if let Some(url) = &p.base_url {
                     builder = builder.base_url(url);
                 }
@@ -103,7 +134,10 @@ impl ProviderClient {
                 Ok(Self::OpenAi(client.completions_api()))
             }
             Provider::Anthropic(p) => {
-                let mut builder = anthropic::Client::builder().api_key(&p.api_key);
+                let http_client = build_http_client(p.max_retries);
+                let mut builder = anthropic::Client::<HttpClient>::builder()
+                    .api_key(&p.api_key)
+                    .http_client(http_client);
                 if let Some(url) = &p.base_url {
                     builder = builder.base_url(url);
                 }
@@ -112,7 +146,10 @@ impl ProviderClient {
                 ))
             }
             Provider::Gemini(p) => {
-                let mut builder = gemini::Client::builder().api_key(&p.api_key);
+                let http_client = build_http_client(p.max_retries);
+                let mut builder = gemini::Client::<HttpClient>::builder()
+                    .api_key(&p.api_key)
+                    .http_client(http_client);
                 if let Some(url) = &p.base_url {
                     builder = builder.base_url(url);
                 }
@@ -121,7 +158,10 @@ impl ProviderClient {
                 ))
             }
             Provider::Ollama(p) => {
-                let mut builder = ollama::Client::builder().api_key(Nothing);
+                let http_client = build_http_client(p.max_retries);
+                let mut builder = ollama::Client::<HttpClient>::builder()
+                    .api_key(Nothing)
+                    .http_client(http_client);
                 if let Some(url) = &p.base_url {
                     builder = builder.base_url(url);
                 }
diff --git a/crates/nvisy-rig/src/agent/detect/mod.rs b/crates/nvisy-rig/src/agent/cv/mod.rs
similarity index 100%
rename from crates/nvisy-rig/src/agent/detect/mod.rs
rename to crates/nvisy-rig/src/agent/cv/mod.rs
diff --git a/crates/nvisy-rig/src/agent/detect/output.rs b/crates/nvisy-rig/src/agent/cv/output.rs
similarity index 100%
rename from crates/nvisy-rig/src/agent/detect/output.rs
rename to crates/nvisy-rig/src/agent/cv/output.rs
diff --git a/crates/nvisy-rig/src/agent/detect/prompt.rs b/crates/nvisy-rig/src/agent/cv/prompt.rs
similarity index 100%
rename from crates/nvisy-rig/src/agent/detect/prompt.rs
rename to crates/nvisy-rig/src/agent/cv/prompt.rs
diff --git a/crates/nvisy-rig/src/agent/detect/tool.rs b/crates/nvisy-rig/src/agent/cv/tool.rs
similarity index 100%
rename from crates/nvisy-rig/src/agent/detect/tool.rs
rename to crates/nvisy-rig/src/agent/cv/tool.rs
diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs
index 1cbcda2..fd2f988 100644
--- a/crates/nvisy-rig/src/agent/mod.rs
+++ b/crates/nvisy-rig/src/agent/mod.rs
@@ -4,13 +4,13 @@
 //! into individual agent submodules.
 
 pub(crate) mod base;
-mod detect;
-mod extract;
-mod recognize;
+mod cv;
+mod ocr;
+mod ner;
 
 pub(crate) use base::BaseAgent;
 pub use base::{AuthenticatedProvider, BaseAgentConfig, ContextWindow, Provider, UnauthenticatedProvider};
 
-pub use recognize::{NerAgent, RawEntities, RawEntity};
-pub use extract::{OcrAgent, OcrOutput, OcrProvider, OcrTextRegion, RawOcrEntity};
-pub use detect::{CvAgent, CvDetection, CvProvider, RawCvEntities, RawCvEntity};
+pub use ner::{NerAgent, RawEntities, RawEntity};
+pub use ocr::{OcrAgent, OcrOutput, OcrProvider, OcrTextRegion, RawOcrEntity};
+pub use cv::{CvAgent, CvDetection, CvProvider, RawCvEntities, RawCvEntity};
diff --git a/crates/nvisy-rig/src/agent/recognize/mod.rs b/crates/nvisy-rig/src/agent/ner/mod.rs
similarity index 100%
rename from crates/nvisy-rig/src/agent/recognize/mod.rs
rename to crates/nvisy-rig/src/agent/ner/mod.rs
diff --git a/crates/nvisy-rig/src/agent/recognize/output.rs b/crates/nvisy-rig/src/agent/ner/output.rs
similarity index 100%
rename from crates/nvisy-rig/src/agent/recognize/output.rs
rename to crates/nvisy-rig/src/agent/ner/output.rs
diff --git a/crates/nvisy-rig/src/agent/recognize/prompt.rs b/crates/nvisy-rig/src/agent/ner/prompt.rs
similarity index 100%
rename from crates/nvisy-rig/src/agent/recognize/prompt.rs
rename to crates/nvisy-rig/src/agent/ner/prompt.rs
diff --git a/crates/nvisy-rig/src/agent/extract/mod.rs b/crates/nvisy-rig/src/agent/ocr/mod.rs
similarity index 100%
rename from crates/nvisy-rig/src/agent/extract/mod.rs
rename to crates/nvisy-rig/src/agent/ocr/mod.rs
diff --git a/crates/nvisy-rig/src/agent/extract/output.rs b/crates/nvisy-rig/src/agent/ocr/output.rs
similarity index 100%
rename from crates/nvisy-rig/src/agent/extract/output.rs
rename to crates/nvisy-rig/src/agent/ocr/output.rs
diff --git a/crates/nvisy-rig/src/agent/extract/prompt.rs b/crates/nvisy-rig/src/agent/ocr/prompt.rs
similarity index 100%
rename from crates/nvisy-rig/src/agent/extract/prompt.rs
rename to crates/nvisy-rig/src/agent/ocr/prompt.rs
diff --git a/crates/nvisy-rig/src/agent/extract/tool.rs b/crates/nvisy-rig/src/agent/ocr/tool.rs
similarity index 100%
rename from crates/nvisy-rig/src/agent/extract/tool.rs
rename to crates/nvisy-rig/src/agent/ocr/tool.rs
diff --git a/crates/nvisy-rig/src/backend/mod.rs b/crates/nvisy-rig/src/backend/mod.rs
index 8952389..455c5b7 100644
--- a/crates/nvisy-rig/src/backend/mod.rs
+++ b/crates/nvisy-rig/src/backend/mod.rs
@@ -1,10 +1,8 @@
-//! LLM backend types, error mapping, and Tower retry policy.
+//! LLM backend types and usage tracking.
 
 mod metrics;
-mod retry;
 
 pub use metrics::{UsageStats, UsageTracker};
-pub use retry::RetryPolicy;
 
 /// Fallback hint used in prompts when no specific entity types are requested.
 pub(crate) const ALL_TYPES_HINT: &str = "all entity types";
@@ -24,14 +22,14 @@ pub struct DetectionConfig {
     pub system_prompt: Option<String>,
 }
 
-/// Request type for the Tower-based detection service.
+/// Request type for the detection service.
 #[derive(Debug, Clone)]
 pub struct DetectionRequest {
     pub text: String,
     pub config: DetectionConfig,
 }
 
-/// Response type for the Tower-based detection service.
+/// Response type for the detection service.
 #[derive(Debug, Clone)]
 pub struct DetectionResponse {
     pub entities: Vec<Value>,
diff --git a/crates/nvisy-rig/src/backend/retry.rs b/crates/nvisy-rig/src/backend/retry.rs
deleted file mode 100644
index 18e9892..0000000
--- a/crates/nvisy-rig/src/backend/retry.rs
+++ /dev/null
@@ -1,156 +0,0 @@
-//! Tower retry policy with exponential backoff.
-
-use std::{pin::Pin, time::Duration};
-
-use nvisy_core::Error;
-use tower::retry::Policy;
-
-/// Tower retry policy with exponential backoff for retryable errors.
-///
-/// Generic over any request/response types: the request must be `Clone`
-/// (so Tower can re-issue it) and the error type is [`nvisy_core::Error`]
-/// whose `is_retryable()` flag drives the retry decision.
-#[derive(Debug, Clone)]
-pub struct RetryPolicy {
-    /// Maximum number of retries (default: 3).
-    pub max_retries: u32,
-    /// Initial backoff duration (default: 300ms).
-    pub initial_backoff: Duration,
-    /// Multiplicative backoff factor (default: 2.0).
-    pub backoff_factor: f64,
-    /// Maximum backoff duration cap (default: 5s).
-    pub max_backoff: Duration,
-    /// Current attempt counter (internal).
-    attempts: u32,
-    /// Current backoff (internal).
-    current_backoff: Duration,
-}
-
-impl Default for RetryPolicy {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl RetryPolicy {
-    /// Create a retry policy with default settings.
-    pub fn new() -> Self {
-        Self {
-            max_retries: 3,
-            initial_backoff: Duration::from_millis(300),
-            backoff_factor: 2.0,
-            max_backoff: Duration::from_secs(5),
-            attempts: 0,
-            current_backoff: Duration::from_millis(300),
-        }
-    }
-
-    pub fn max_retries(&self) -> u32 {
-        self.max_retries
-    }
-}
-
-impl<Req, Res> Policy<Req, Res, Error> for RetryPolicy
-where
-    Req: Clone,
-{
-    type Future = Pin<Box<dyn std::future::Future<Output = ()> + Send>>;
-
-    fn retry(&mut self, _req: &mut Req, result: &mut Result<Res, Error>) -> Option<Self::Future> {
-        match result {
-            Ok(_) => None,
-            Err(err) => {
-                if !err.is_retryable() || self.attempts >= self.max_retries {
-                    return None;
-                }
-
-                self.attempts += 1;
-                let backoff = self.current_backoff;
-
-                tracing::warn!(
-                    attempt = self.attempts,
-                    max_retries = self.max_retries,
-                    backoff_ms = backoff.as_millis() as u64,
-                    error = %err,
-                    "retrying after transient error"
-                );
-
-                self.current_backoff = Duration::from_secs_f64(
-                    (self.current_backoff.as_secs_f64() * self.backoff_factor)
-                        .min(self.max_backoff.as_secs_f64()),
-                );
-
-                Some(Box::pin(async move {
-                    tokio::time::sleep(backoff).await;
-                }))
-            }
-        }
-    }
-
-    fn clone_request(&mut self, req: &Req) -> Option<Req> {
-        Some(req.clone())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::backend::{DetectionConfig, DetectionRequest, DetectionResponse};
-    use tower::retry::Policy;
-
-    #[tokio::test]
-    async fn retries_on_retryable_error() {
-        let mut policy = RetryPolicy::new();
-        let mut req = DetectionRequest {
-            text: "test".into(),
-            config: DetectionConfig {
-                entity_kinds: vec![],
-                confidence_threshold: 0.5,
-                system_prompt: None,
-            },
-        };
-        let mut result: Result<DetectionResponse, Error> =
-            Err(Error::connection("transient", "test", true));
-
-        let fut = policy.retry(&mut req, &mut result);
-        assert!(fut.is_some());
-    }
-
-    #[tokio::test]
-    async fn does_not_retry_non_retryable() {
-        let mut policy = RetryPolicy::new();
-        let mut req = DetectionRequest {
-            text: "test".into(),
-            config: DetectionConfig {
-                entity_kinds: vec![],
-                confidence_threshold: 0.5,
-                system_prompt: None,
-            },
-        };
-        let mut result: Result<DetectionResponse, Error> =
-            Err(Error::validation("bad input", "test"));
-
-        let fut = policy.retry(&mut req, &mut result);
-        assert!(fut.is_none());
-    }
-
-    #[tokio::test]
-    async fn does_not_retry_success() {
-        let mut policy = RetryPolicy::new();
-        let mut req = DetectionRequest {
-            text: "test".into(),
-            config: DetectionConfig {
-                entity_kinds: vec![],
-                confidence_threshold: 0.5,
-                system_prompt: None,
-            },
-        };
-        let mut result: Result<DetectionResponse, Error> = Ok(DetectionResponse {
-            entities: vec![],
-            usage: None,
-        });
-
-        let fut = policy.retry(&mut req, &mut result);
-        assert!(fut.is_none());
-    }
-}
diff --git a/crates/nvisy-rig/src/bridge/mod.rs b/crates/nvisy-rig/src/bridge/mod.rs
index 3c5b7ca..063d236 100644
--- a/crates/nvisy-rig/src/bridge/mod.rs
+++ b/crates/nvisy-rig/src/bridge/mod.rs
@@ -1,216 +1,9 @@
-//! Core bridge between rig-core and the Tower-based detection service.
+//! Bridge between rig-core and the detection service.
+//!
+//! Prompt building and response parsing utilities.
 
 mod prompt;
 mod response;
 
 pub use prompt::PromptBuilder;
 pub use response::{EntityParser, ResponseParser};
-
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use rig::client::CompletionClient;
-use rig::completion::CompletionModel;
-use rig::providers::{anthropic, gemini, ollama, openai};
-
-use crate::agent::Provider;
-use crate::agent::base::provider::ProviderClient;
-use crate::backend::{DetectionRequest, DetectionResponse, RetryPolicy, UsageTracker};
-use crate::error::Error;
-
-/// Configuration for [`ServiceBackend`] (and its [`RigBackend`] specialisation).
-#[derive(Debug, Clone, Default)]
-pub struct RigBackendConfig {
-    /// Retry policy for transient errors.
-    pub retry: RetryPolicy,
-}
-
-/// Generic Tower service adapter.
-///
-/// Wraps any inner service `S` with a retry policy and usage tracking.
-/// The inner service handles prompt construction and LLM interaction;
-/// the wrapper provides observability and resilience.
-pub struct ServiceBackend<S> {
-    inner: S,
-    config: RigBackendConfig,
-    tracker: Arc<UsageTracker>,
-}
-
-impl<S> ServiceBackend<S> {
-    /// Create a new service backend wrapping an arbitrary inner service.
-    pub fn new(inner: S, config: RigBackendConfig) -> Self {
-        Self {
-            inner,
-            config,
-            tracker: Arc::new(UsageTracker::new()),
-        }
-    }
-
-    /// Access the retry policy.
-    pub fn retry_policy(&self) -> &RetryPolicy {
-        &self.config.retry
-    }
-
-    /// Access the usage tracker for this backend.
-    pub fn tracker(&self) -> &UsageTracker {
-        &self.tracker
-    }
-}
-
-impl<S> tower::Service<DetectionRequest> for ServiceBackend<S>
-where
-    S: tower::Service<DetectionRequest, Response = DetectionResponse, Error = nvisy_core::Error>,
-    S::Future: Send + 'static,
-{
-    type Response = DetectionResponse;
-    type Error = nvisy_core::Error;
-    type Future = std::pin::Pin<Box<dyn std::future::Future<Output = Result<DetectionResponse, nvisy_core::Error>> + Send>>;
-
-    fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
-        self.inner.poll_ready(cx)
-    }
-
-    fn call(&mut self, req: DetectionRequest) -> Self::Future {
-        let tracker = Arc::clone(&self.tracker);
-        let fut = self.inner.call(req);
-
-        Box::pin(async move {
-            let span = tracing::info_span!("service_backend_call");
-            let _enter = span.enter();
-
-            let response = fut.await?;
-
-            if let Some(ref usage) = response.usage {
-                tracker.record(usage, 0);
-
-                tracing::debug!(
-                    input_tokens = usage.input_tokens,
-                    output_tokens = usage.output_tokens,
-                    "LLM request completed"
-                );
-            }
-
-            Ok(response)
-        })
-    }
-}
-
-enum InnerModel {
-    OpenAi(Arc<openai::completion::CompletionModel>),
-    Anthropic(Arc<anthropic::completion::CompletionModel>),
-    Gemini(Arc<gemini::completion::CompletionModel>),
-    Ollama(Arc<ollama::CompletionModel>),
-}
-
-impl InnerModel {
-    fn clone_arc(&self) -> Self {
-        match self {
-            Self::OpenAi(m) => Self::OpenAi(Arc::clone(m)),
-            Self::Anthropic(m) => Self::Anthropic(Arc::clone(m)),
-            Self::Gemini(m) => Self::Gemini(Arc::clone(m)),
-            Self::Ollama(m) => Self::Ollama(Arc::clone(m)),
-        }
-    }
-}
-
-macro_rules! dispatch_model {
-    ($inner:expr, |$model:ident| $body:expr) => {
-        match $inner {
-            InnerModel::OpenAi($model) => $body,
-            InnerModel::Anthropic($model) => $body,
-            InnerModel::Gemini($model) => $body,
-            InnerModel::Ollama($model) => $body,
-        }
-    };
-}
-
-/// Inner service that drives a rig-core completion model.
-///
-/// This is the low-level service that constructs prompts and parses
-/// responses. Wrap it in [`ServiceBackend`] for retry and usage tracking.
-pub struct RigBackendInner {
-    model: InnerModel,
-    temperature: f64,
-    max_tokens: u64,
-}
-
-impl tower::Service<DetectionRequest> for RigBackendInner {
-    type Response = DetectionResponse;
-    type Error = nvisy_core::Error;
-    type Future = std::pin::Pin<Box<dyn std::future::Future<Output = Result<DetectionResponse, nvisy_core::Error>> + Send>>;
-
-    fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
-        Poll::Ready(Ok(()))
-    }
-
-    fn call(&mut self, req: DetectionRequest) -> Self::Future {
-        let user_prompt = PromptBuilder::new(&req.config).build(&req.text);
-        let system_prompt = req.config.system_prompt.clone();
-        let model = self.model.clone_arc();
-        let temperature = self.temperature;
-        let max_tokens = self.max_tokens;
-
-        Box::pin(async move {
-            let span = tracing::info_span!("rig_backend_call");
-            let _enter = span.enter();
-
-            let (parsed, usage) = dispatch_model!(&model, |model| {
-                let mut builder = model
-                    .completion_request(&user_prompt)
-                    .temperature(temperature)
-                    .max_tokens(max_tokens);
-
-                if let Some(ref preamble) = system_prompt {
-                    builder = builder.preamble(preamble.clone());
-                }
-
-                let response = builder.send().await.map_err(|e| {
-                    nvisy_core::Error::from(Error::from(e))
-                })?;
-                let text = ResponseParser::extract_text(&response)
-                    .map_err(nvisy_core::Error::from)?;
-                Ok::<_, nvisy_core::Error>((text, response.usage))
-            })?;
-
-            let entities = parsed.parse_json().map_err(nvisy_core::Error::from)?;
-
-            Ok(DetectionResponse {
-                entities,
-                usage: Some(usage),
-            })
-        })
-    }
-}
-
-/// Production detection service wrapping a rig-core completion model.
-///
-/// This is a convenience alias for `ServiceBackend<RigBackendInner>`.
-/// Use [`RigBackend::from_provider`] to construct one.
-pub type RigBackend = ServiceBackend<RigBackendInner>;
-
-impl RigBackend {
-    /// Create a new backend from a provider, model name, and configuration.
-    pub fn from_provider(
-        provider: &Provider,
-        model_name: &str,
-        temperature: f64,
-        max_tokens: u64,
-        config: RigBackendConfig,
-    ) -> Result<Self, Error> {
-        let client = ProviderClient::from_provider(provider)?;
-        let model = match client {
-            ProviderClient::OpenAi(c) => InnerModel::OpenAi(Arc::new(c.completion_model(model_name))),
-            ProviderClient::Anthropic(c) => InnerModel::Anthropic(Arc::new(c.completion_model(model_name))),
-            ProviderClient::Gemini(c) => InnerModel::Gemini(Arc::new(c.completion_model(model_name))),
-            ProviderClient::Ollama(c) => InnerModel::Ollama(Arc::new(c.completion_model(model_name))),
-        };
-
-        let inner = RigBackendInner {
-            model,
-            temperature,
-            max_tokens,
-        };
-
-        Ok(ServiceBackend::new(inner, config))
-    }
-}
diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs
index 824a0f0..5111c09 100644
--- a/crates/nvisy-rig/src/lib.rs
+++ b/crates/nvisy-rig/src/lib.rs
@@ -11,7 +11,7 @@ pub(crate) mod agent;
 pub mod prelude;
 
 pub use backend::{DetectionConfig, DetectionRequest, DetectionResponse};
-pub use bridge::{EntityParser, RigBackend, RigBackendConfig, ServiceBackend};
+pub use bridge::EntityParser;
 pub use error::Error;
 
 pub use agent::{
diff --git a/crates/nvisy-rig/src/prelude.rs b/crates/nvisy-rig/src/prelude.rs
index ab04a77..8f773ef 100644
--- a/crates/nvisy-rig/src/prelude.rs
+++ b/crates/nvisy-rig/src/prelude.rs
@@ -2,9 +2,9 @@
 
 pub use crate::backend::{
     DetectionConfig, DetectionRequest, DetectionResponse,
-    RetryPolicy, UsageStats, UsageTracker,
+    UsageStats, UsageTracker,
 };
-pub use crate::bridge::{EntityParser, RigBackend, RigBackendConfig, ServiceBackend};
+pub use crate::bridge::EntityParser;
 pub use crate::error::Error;
 pub use crate::agent::{
     AuthenticatedProvider, BaseAgentConfig, ContextWindow, Provider,

From 42492dbe1377aa9da5cfdbae12b7bbdd5bdf10de Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Thu, 26 Feb 2026 01:10:03 +0100
Subject: [PATCH 17/24] refactor(rig): extract RetryConfig, remove HttpClient
 alias, rename Raw* types, move compact to BaseAgent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extract max_retries from provider structs into standalone RetryConfig.
Replace HttpClient type alias with ClientWithMiddleware directly. Rename
entity types: RawEntity→NerEntity, RawCvEntity→CvEntity,
RawOcrEntity→OcrEntity. Move compact logic from ContextWindow to
BaseAgent::prompt_compact where it belongs.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/nvisy-rig/src/agent/base/agent.rs    | 23 ++++++---
 crates/nvisy-rig/src/agent/base/builder.rs  | 13 ++++-
 crates/nvisy-rig/src/agent/base/context.rs  | 37 +-------------
 crates/nvisy-rig/src/agent/base/dispatch.rs | 11 ++---
 crates/nvisy-rig/src/agent/base/mod.rs      |  2 +-
 crates/nvisy-rig/src/agent/base/provider.rs | 53 ++++++++++-----------
 crates/nvisy-rig/src/agent/cv/mod.rs        |  8 ++--
 crates/nvisy-rig/src/agent/cv/output.rs     |  6 +--
 crates/nvisy-rig/src/agent/mod.rs           |  8 ++--
 crates/nvisy-rig/src/agent/ner/mod.rs       |  8 ++--
 crates/nvisy-rig/src/agent/ner/output.rs    | 10 ++--
 crates/nvisy-rig/src/agent/ocr/mod.rs       |  2 +-
 crates/nvisy-rig/src/agent/ocr/output.rs    |  4 +-
 crates/nvisy-rig/src/lib.rs                 |  9 ++--
 crates/nvisy-rig/src/prelude.rs             |  9 ++--
 15 files changed, 92 insertions(+), 111 deletions(-)

diff --git a/crates/nvisy-rig/src/agent/base/agent.rs b/crates/nvisy-rig/src/agent/base/agent.rs
index c926451..26e091e 100644
--- a/crates/nvisy-rig/src/agent/base/agent.rs
+++ b/crates/nvisy-rig/src/agent/base/agent.rs
@@ -116,14 +116,25 @@ impl BaseAgent {
 
     /// Summarize text via LLM to fit within the context window's input budget.
     ///
-    /// Delegates to [`ContextWindow::compact`]. Returns the text unchanged if
-    /// no context window is configured or the text already fits.
+    /// Returns the text unchanged if no context window is configured or the
+    /// text already fits. Otherwise sends a summarization prompt and returns
+    /// the condensed version.
     #[tracing::instrument(skip_all, fields(agent_id = %self.id, mode = "compact"))]
     pub async fn prompt_compact(&self, text: &str) -> Result<String, Error> {
-        match &self.context_window {
-            Some(cw) => cw.compact(text, self).await,
-            None => Ok(text.to_owned()),
-        }
+        let cw = match &self.context_window {
+            Some(cw) if !cw.fits(text) => cw,
+            _ => return Ok(text.to_owned()),
+        };
+
+        let budget = cw.input_budget();
+        let prompt = format!(
+            "Summarize the following text to fit within {budget} tokens. \
+             Preserve all key entities, names, numbers, dates, and facts. \
+             Remove redundancy and filler. Return ONLY the condensed text, \
+             no preamble.\n\n{text}"
+        );
+
+        self.prompt_text(&prompt).await
     }
 
     /// Splits text via [`ContextWindow`], runs `prompt_structured` per chunk,
diff --git a/crates/nvisy-rig/src/agent/base/builder.rs b/crates/nvisy-rig/src/agent/base/builder.rs
index 1c39521..3a90d4b 100644
--- a/crates/nvisy-rig/src/agent/base/builder.rs
+++ b/crates/nvisy-rig/src/agent/base/builder.rs
@@ -12,7 +12,7 @@ use crate::backend::UsageTracker;
 use crate::error::Error;
 
 use super::dispatch::Agents;
-use super::provider::{Provider, ProviderClient};
+use super::provider::{Provider, ProviderClient, RetryConfig};
 use super::{BaseAgent, BaseAgentConfig};
 
 /// Builder for [`BaseAgent`] that takes a `&Provider` + model name.
@@ -20,6 +20,7 @@ pub(crate) struct BaseAgentBuilder {
     provider: Provider,
     model_name: String,
     config: BaseAgentConfig,
+    retry: RetryConfig,
     preamble: Option<String>,
     tools: Vec<Box<dyn ToolDyn>>,
 }
@@ -31,11 +32,18 @@ impl BaseAgentBuilder {
             provider: provider.clone(),
             model_name: model_name.to_owned(),
             config,
+            retry: RetryConfig::default(),
             preamble: None,
             tools: Vec::new(),
         }
     }
 
+    /// Set retry configuration for transient HTTP errors.
+    pub fn retry(mut self, retry: RetryConfig) -> Self {
+        self.retry = retry;
+        self
+    }
+
     /// Set the system prompt (preamble).
     pub fn preamble(mut self, preamble: impl Into<String>) -> Self {
         self.preamble = Some(preamble.into());
@@ -54,12 +62,13 @@ impl BaseAgentBuilder {
             provider,
             model_name,
             config,
+            retry,
             preamble,
             tools,
         } = self;
 
         let preamble_ref = preamble.as_deref();
-        let client = ProviderClient::from_provider(&provider)?;
+        let client = ProviderClient::from_provider(&provider, &retry)?;
 
         let inner = match client {
             ProviderClient::OpenAi(c) => {
diff --git a/crates/nvisy-rig/src/agent/base/context.rs b/crates/nvisy-rig/src/agent/base/context.rs
index 17e3cb6..b14ee18 100644
--- a/crates/nvisy-rig/src/agent/base/context.rs
+++ b/crates/nvisy-rig/src/agent/base/context.rs
@@ -1,9 +1,5 @@
 //! Context window management for LLM token limits.
 
-use crate::error::Error;
-
-use super::agent::BaseAgent;
-
 /// Manages token budget estimation, splitting, and truncation.
 #[derive(Debug, Clone)]
 pub struct ContextWindow {
@@ -29,7 +25,7 @@ impl ContextWindow {
     }
 
     /// Available input token budget (max minus reserved output).
-    fn input_budget(&self) -> usize {
+    pub(crate) fn input_budget(&self) -> usize {
         self.max_tokens.saturating_sub(self.reserve_output)
     }
 
@@ -83,31 +79,6 @@ impl ContextWindow {
         chunks
     }
 
-    /// Summarize text via LLM to fit within the input token budget.
-    ///
-    /// If the text already fits, returns it unchanged. Otherwise sends a
-    /// summarization prompt to the given agent and returns the condensed
-    /// version.
-    pub(crate) async fn compact(
-        &self,
-        text: &str,
-        agent: &BaseAgent,
-    ) -> Result<String, Error> {
-        if self.fits(text) {
-            return Ok(text.to_owned());
-        }
-
-        let budget = self.input_budget();
-        let prompt = format!(
-            "Summarize the following text to fit within {budget} tokens. \
-             Preserve all key entities, names, numbers, dates, and facts. \
-             Remove redundancy and filler. Return ONLY the condensed text, \
-             no preamble.\n\n{text}"
-        );
-
-        agent.prompt_text(&prompt).await
-    }
-
     /// Truncate text to fit, keeping the end (most recent context).
     ///
     /// Safe for multi-byte UTF-8 input.
@@ -256,11 +227,7 @@ mod tests {
     }
 
     #[test]
-    fn compact_returns_unchanged_when_fits() {
-        // compact requires async + a real model, so we only test the
-        // early-return path via `fits` logic.  The "already fits" branch
-        // returns `Ok(text.to_owned())` synchronously — verify the
-        // prerequisite here.
+    fn fits_respects_budget() {
         let cw = ContextWindow::new(100, 20);
         let short = "a".repeat(300); // ~75 tokens, budget is 80
         assert!(cw.fits(&short));
diff --git a/crates/nvisy-rig/src/agent/base/dispatch.rs b/crates/nvisy-rig/src/agent/base/dispatch.rs
index c54da69..12016aa 100644
--- a/crates/nvisy-rig/src/agent/base/dispatch.rs
+++ b/crates/nvisy-rig/src/agent/base/dispatch.rs
@@ -4,16 +4,15 @@
 //! supported provider, enabling dispatch without exposing `CompletionModel`
 //! generics to the public API.
 
+use reqwest_middleware::ClientWithMiddleware;
 use rig::agent::Agent;
 use rig::providers::{anthropic, gemini, ollama, openai};
 
-use super::provider::HttpClient;
-
 pub(crate) enum Agents {
-    OpenAi(Agent<openai::completion::CompletionModel<HttpClient>>),
-    Anthropic(Agent<anthropic::completion::CompletionModel<HttpClient>>),
-    Gemini(Agent<gemini::completion::CompletionModel<HttpClient>>),
-    Ollama(Agent<ollama::CompletionModel<HttpClient>>),
+    OpenAi(Agent<openai::completion::CompletionModel<ClientWithMiddleware>>),
+    Anthropic(Agent<anthropic::completion::CompletionModel<ClientWithMiddleware>>),
+    Gemini(Agent<gemini::completion::CompletionModel<ClientWithMiddleware>>),
+    Ollama(Agent<ollama::CompletionModel<ClientWithMiddleware>>),
 }
 
 /// Dispatch a call to the concrete agent inside each variant.
diff --git a/crates/nvisy-rig/src/agent/base/mod.rs b/crates/nvisy-rig/src/agent/base/mod.rs
index 122d6cb..5ee9dc1 100644
--- a/crates/nvisy-rig/src/agent/base/mod.rs
+++ b/crates/nvisy-rig/src/agent/base/mod.rs
@@ -14,7 +14,7 @@ pub(crate) use agent::BaseAgent;
 pub(crate) use builder::BaseAgentBuilder;
 
 pub use context::ContextWindow;
-pub use provider::{AuthenticatedProvider, Provider, UnauthenticatedProvider};
+pub use provider::{AuthenticatedProvider, Provider, RetryConfig, UnauthenticatedProvider};
 
 /// Configuration for a [`BaseAgent`].
 #[derive(Debug, Clone)]
diff --git a/crates/nvisy-rig/src/agent/base/provider.rs b/crates/nvisy-rig/src/agent/base/provider.rs
index 66bc930..f34ec63 100644
--- a/crates/nvisy-rig/src/agent/base/provider.rs
+++ b/crates/nvisy-rig/src/agent/base/provider.rs
@@ -4,33 +4,37 @@
 //! URLs. Client construction is deferred until an agent or backend is built.
 
 use reqwest_middleware::ClientBuilder;
+use reqwest_middleware::ClientWithMiddleware;
 use reqwest_retry::{RetryTransientMiddleware, policies::ExponentialBackoff};
 use rig::client::Nothing;
 use rig::providers::{anthropic, gemini, ollama, openai};
 
 use crate::error::Error;
 
-/// HTTP client type used by all rig provider clients.
-pub(crate) type HttpClient = reqwest_middleware::ClientWithMiddleware;
+/// Retry configuration for transient HTTP errors.
+#[derive(Debug, Clone)]
+pub struct RetryConfig {
+    /// Maximum retries for transient HTTP errors.
+    pub max_retries: u32,
+}
 
-/// Default number of retries for transient HTTP errors.
-const DEFAULT_MAX_RETRIES: u32 = 3;
+impl Default for RetryConfig {
+    fn default() -> Self {
+        Self { max_retries: 3 }
+    }
+}
 
 /// Provider that requires an API key (OpenAI, Anthropic, Gemini).
 #[derive(Clone)]
 pub struct AuthenticatedProvider {
     pub api_key: String,
     pub base_url: Option<String>,
-    /// Maximum retries for transient HTTP errors.
-    pub max_retries: u32,
 }
 
 /// Provider that does not require an API key (Ollama).
 #[derive(Clone)]
 pub struct UnauthenticatedProvider {
     pub base_url: Option<String>,
-    /// Maximum retries for transient HTTP errors.
-    pub max_retries: u32,
 }
 
 /// Supported LLM providers.
@@ -61,7 +65,6 @@ impl Provider {
         Self::OpenAi(AuthenticatedProvider {
             api_key: api_key.to_owned(),
             base_url: None,
-            max_retries: DEFAULT_MAX_RETRIES,
         })
     }
 
@@ -70,7 +73,6 @@ impl Provider {
         Self::Anthropic(AuthenticatedProvider {
             api_key: api_key.to_owned(),
             base_url: None,
-            max_retries: DEFAULT_MAX_RETRIES,
         })
     }
 
@@ -79,7 +81,6 @@ impl Provider {
         Self::Gemini(AuthenticatedProvider {
             api_key: api_key.to_owned(),
             base_url: None,
-            max_retries: DEFAULT_MAX_RETRIES,
         })
     }
 
@@ -87,7 +88,6 @@ impl Provider {
     pub fn ollama() -> Self {
         Self::Ollama(UnauthenticatedProvider {
             base_url: None,
-            max_retries: DEFAULT_MAX_RETRIES,
         })
     }
 
@@ -95,15 +95,14 @@ impl Provider {
     pub fn ollama_with_url(url: &str) -> Self {
         Self::Ollama(UnauthenticatedProvider {
             base_url: Some(url.to_owned()),
-            max_retries: DEFAULT_MAX_RETRIES,
         })
     }
 }
 
 /// Build a `ClientWithMiddleware` with retry middleware.
-fn build_http_client(max_retries: u32) -> HttpClient {
+fn build_http_client(retry: &RetryConfig) -> ClientWithMiddleware {
     let retry_policy = ExponentialBackoff::builder()
-        .build_with_max_retries(max_retries);
+        .build_with_max_retries(retry.max_retries);
     ClientBuilder::new(reqwest_middleware::reqwest::Client::new())
         .with(RetryTransientMiddleware::new_with_policy(retry_policy))
         .build()
@@ -111,18 +110,19 @@ fn build_http_client(max_retries: u32) -> HttpClient {
 
 /// Internal helper — builds a concrete rig client from connection params.
 pub(crate) enum ProviderClient {
-    OpenAi(openai::CompletionsClient<HttpClient>),
-    Anthropic(anthropic::Client<HttpClient>),
-    Gemini(gemini::Client<HttpClient>),
-    Ollama(ollama::Client<HttpClient>),
+    OpenAi(openai::CompletionsClient<ClientWithMiddleware>),
+    Anthropic(anthropic::Client<ClientWithMiddleware>),
+    Gemini(gemini::Client<ClientWithMiddleware>),
+    Ollama(ollama::Client<ClientWithMiddleware>),
 }
 
 impl ProviderClient {
-    pub(crate) fn from_provider(provider: &Provider) -> Result<Self, Error> {
+    pub(crate) fn from_provider(provider: &Provider, retry: &RetryConfig) -> Result<Self, Error> {
+        let http_client = build_http_client(retry);
+
         match provider {
             Provider::OpenAi(p) => {
-                let http_client = build_http_client(p.max_retries);
-                let mut builder = openai::Client::<HttpClient>::builder()
+                let mut builder = openai::Client::<ClientWithMiddleware>::builder()
                     .api_key(&p.api_key)
                     .http_client(http_client);
                 if let Some(url) = &p.base_url {
@@ -134,8 +134,7 @@ impl ProviderClient {
                 Ok(Self::OpenAi(client.completions_api()))
             }
             Provider::Anthropic(p) => {
-                let http_client = build_http_client(p.max_retries);
-                let mut builder = anthropic::Client::<HttpClient>::builder()
+                let mut builder = anthropic::Client::<ClientWithMiddleware>::builder()
                     .api_key(&p.api_key)
                     .http_client(http_client);
                 if let Some(url) = &p.base_url {
@@ -146,8 +145,7 @@ impl ProviderClient {
                 ))
             }
             Provider::Gemini(p) => {
-                let http_client = build_http_client(p.max_retries);
-                let mut builder = gemini::Client::<HttpClient>::builder()
+                let mut builder = gemini::Client::<ClientWithMiddleware>::builder()
                     .api_key(&p.api_key)
                     .http_client(http_client);
                 if let Some(url) = &p.base_url {
@@ -158,8 +156,7 @@ impl ProviderClient {
                 ))
             }
             Provider::Ollama(p) => {
-                let http_client = build_http_client(p.max_retries);
-                let mut builder = ollama::Client::<HttpClient>::builder()
+                let mut builder = ollama::Client::<ClientWithMiddleware>::builder()
                     .api_key(Nothing)
                     .http_client(http_client);
                 if let Some(url) = &p.base_url {
diff --git a/crates/nvisy-rig/src/agent/cv/mod.rs b/crates/nvisy-rig/src/agent/cv/mod.rs
index abae137..f7357e9 100644
--- a/crates/nvisy-rig/src/agent/cv/mod.rs
+++ b/crates/nvisy-rig/src/agent/cv/mod.rs
@@ -9,7 +9,7 @@ mod output;
 mod prompt;
 mod tool;
 
-pub use output::{RawCvEntities, RawCvEntity};
+pub use output::{CvEntities, CvEntity};
 
 use async_trait::async_trait;
 use base64::Engine;
@@ -60,7 +60,7 @@ pub trait CvProvider: Send + Sync {
 /// 3. The VLM is instructed to call the `cv_detect_objects` tool (backed
 ///    by the [`CvProvider`]) and then classify each detection into an
 ///    entity category and type.
-/// 4. Structured output is parsed into a `Vec<RawCvEntity>`.
+/// 4. Structured output is parsed into a `Vec<CvEntity>`.
 pub struct CvAgent {
     base: BaseAgent,
 }
@@ -99,7 +99,7 @@ impl CvAgent {
         &self,
         image_data: &[u8],
         config: &DetectionConfig,
-    ) -> Result<Vec<RawCvEntity>, Error> {
+    ) -> Result<Vec<CvEntity>, Error> {
         let image_b64 = STANDARD.encode(image_data);
         tracing::debug!(
             b64_len = image_b64.len(),
@@ -109,7 +109,7 @@ impl CvAgent {
 
         let prompt = CvPromptBuilder::new(config).build(&image_b64);
 
-        let result: RawCvEntities = self.base.prompt_structured(&prompt).await?;
+        let result: CvEntities = self.base.prompt_structured(&prompt).await?;
 
         tracing::info!(
             entity_count = result.entities.len(),
diff --git a/crates/nvisy-rig/src/agent/cv/output.rs b/crates/nvisy-rig/src/agent/cv/output.rs
index d40cb12..32331b7 100644
--- a/crates/nvisy-rig/src/agent/cv/output.rs
+++ b/crates/nvisy-rig/src/agent/cv/output.rs
@@ -7,7 +7,7 @@ use nvisy_ontology::entity::{EntityCategory, EntityKind};
 
 /// A single entity detected by computer vision.
 #[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema)]
-pub struct RawCvEntity {
+pub struct CvEntity {
     /// Broad classification.
     pub category: EntityCategory,
     /// Specific entity type.
@@ -22,7 +22,7 @@ pub struct RawCvEntity {
 
 /// Wrapper for structured output parsing.
 #[derive(Debug, Clone, Default, PartialEq, Deserialize, Serialize, JsonSchema)]
-pub struct RawCvEntities {
+pub struct CvEntities {
     /// Detected entities.
-    pub entities: Vec<RawCvEntity>,
+    pub entities: Vec<CvEntity>,
 }
diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs
index fd2f988..21ff4e4 100644
--- a/crates/nvisy-rig/src/agent/mod.rs
+++ b/crates/nvisy-rig/src/agent/mod.rs
@@ -9,8 +9,8 @@ mod ocr;
 mod ner;
 
 pub(crate) use base::BaseAgent;
-pub use base::{AuthenticatedProvider, BaseAgentConfig, ContextWindow, Provider, UnauthenticatedProvider};
+pub use base::{AuthenticatedProvider, BaseAgentConfig, ContextWindow, Provider, RetryConfig, UnauthenticatedProvider};
 
-pub use ner::{NerAgent, RawEntities, RawEntity};
-pub use ocr::{OcrAgent, OcrOutput, OcrProvider, OcrTextRegion, RawOcrEntity};
-pub use cv::{CvAgent, CvDetection, CvProvider, RawCvEntities, RawCvEntity};
+pub use ner::{NerAgent, NerEntities, NerEntity};
+pub use ocr::{OcrAgent, OcrEntity, OcrOutput, OcrProvider, OcrTextRegion};
+pub use cv::{CvAgent, CvDetection, CvEntities, CvEntity, CvProvider};
diff --git a/crates/nvisy-rig/src/agent/ner/mod.rs b/crates/nvisy-rig/src/agent/ner/mod.rs
index 8716da5..6098e96 100644
--- a/crates/nvisy-rig/src/agent/ner/mod.rs
+++ b/crates/nvisy-rig/src/agent/ner/mod.rs
@@ -7,7 +7,7 @@
 mod output;
 mod prompt;
 
-pub use output::{RawEntities, RawEntity};
+pub use output::{NerEntities, NerEntity};
 
 use uuid::Uuid;
 
@@ -25,7 +25,7 @@ use prompt::{NER_SYSTEM_PROMPT, NerPromptBuilder};
 ///    [`detect`](Self::detect).
 /// 2. The agent builds a user prompt via [`NerPromptBuilder`] that
 ///    specifies entity types and confidence thresholds.
-/// 3. Structured output is parsed into `Vec<RawEntity>`.
+/// 3. Structured output is parsed into `Vec<NerEntity>`.
 pub struct NerAgent {
     base: BaseAgent,
 }
@@ -58,7 +58,7 @@ impl NerAgent {
         &self,
         text: &str,
         config: &DetectionConfig,
-    ) -> Result<Vec<RawEntity>, Error> {
+    ) -> Result<Vec<NerEntity>, Error> {
         let prompt = NerPromptBuilder::new(config).build(text);
 
         tracing::debug!(
@@ -67,7 +67,7 @@ impl NerAgent {
             "built ner prompt"
         );
 
-        let result: RawEntities = self.base.prompt_structured(&prompt).await?;
+        let result: NerEntities = self.base.prompt_structured(&prompt).await?;
 
         tracing::info!(
             entity_count = result.entities.len(),
diff --git a/crates/nvisy-rig/src/agent/ner/output.rs b/crates/nvisy-rig/src/agent/ner/output.rs
index 63167f3..8e2df0e 100644
--- a/crates/nvisy-rig/src/agent/ner/output.rs
+++ b/crates/nvisy-rig/src/agent/ner/output.rs
@@ -5,16 +5,16 @@ use serde::{Deserialize, Serialize};
 
 use nvisy_ontology::entity::{EntityCategory, EntityKind};
 
-/// A list of raw entities returned by structured output.
+/// A list of NER entities returned by structured output.
 #[derive(Debug, Clone, Default, PartialEq, Deserialize, Serialize, JsonSchema)]
-pub struct RawEntities {
+pub struct NerEntities {
     /// Detected entities.
-    pub entities: Vec<RawEntity>,
+    pub entities: Vec<NerEntity>,
 }
 
-/// A single raw entity from structured LLM output.
+/// A single NER entity from structured LLM output.
 #[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema)]
-pub struct RawEntity {
+pub struct NerEntity {
     /// Broad classification.
     pub category: EntityCategory,
     /// Specific entity type.
diff --git a/crates/nvisy-rig/src/agent/ocr/mod.rs b/crates/nvisy-rig/src/agent/ocr/mod.rs
index 01194c0..da78764 100644
--- a/crates/nvisy-rig/src/agent/ocr/mod.rs
+++ b/crates/nvisy-rig/src/agent/ocr/mod.rs
@@ -9,7 +9,7 @@ mod output;
 mod prompt;
 mod tool;
 
-pub use output::{OcrOutput, RawOcrEntity};
+pub use output::{OcrOutput, OcrEntity};
 
 use async_trait::async_trait;
 use base64::Engine;
diff --git a/crates/nvisy-rig/src/agent/ocr/output.rs b/crates/nvisy-rig/src/agent/ocr/output.rs
index 0743de9..66baf8b 100644
--- a/crates/nvisy-rig/src/agent/ocr/output.rs
+++ b/crates/nvisy-rig/src/agent/ocr/output.rs
@@ -11,12 +11,12 @@ pub struct OcrOutput {
     /// Full text extracted from the image.
     pub extracted_text: String,
     /// Entities detected in the extracted text.
-    pub entities: Vec<RawOcrEntity>,
+    pub entities: Vec<OcrEntity>,
 }
 
 /// A single entity detected in OCR-extracted text.
 #[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema)]
-pub struct RawOcrEntity {
+pub struct OcrEntity {
     /// Broad classification.
     pub category: EntityCategory,
     /// Specific entity type.
diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs
index 5111c09..f26e0dd 100644
--- a/crates/nvisy-rig/src/lib.rs
+++ b/crates/nvisy-rig/src/lib.rs
@@ -16,9 +16,8 @@ pub use error::Error;
 
 pub use agent::{
     AuthenticatedProvider, BaseAgentConfig, ContextWindow, Provider,
-    UnauthenticatedProvider,
-    CvAgent, CvDetection, CvProvider, NerAgent,
-    OcrAgent, OcrOutput, OcrProvider, OcrTextRegion,
-    RawCvEntities, RawCvEntity, RawEntities, RawEntity,
-    RawOcrEntity,
+    RetryConfig, UnauthenticatedProvider,
+    CvAgent, CvDetection, CvEntities, CvEntity, CvProvider,
+    NerAgent, NerEntities, NerEntity,
+    OcrAgent, OcrEntity, OcrOutput, OcrProvider, OcrTextRegion,
 };
diff --git a/crates/nvisy-rig/src/prelude.rs b/crates/nvisy-rig/src/prelude.rs
index 8f773ef..4552dda 100644
--- a/crates/nvisy-rig/src/prelude.rs
+++ b/crates/nvisy-rig/src/prelude.rs
@@ -8,9 +8,8 @@ pub use crate::bridge::EntityParser;
 pub use crate::error::Error;
 pub use crate::agent::{
     AuthenticatedProvider, BaseAgentConfig, ContextWindow, Provider,
-    UnauthenticatedProvider,
-    CvAgent, CvDetection, CvProvider, NerAgent,
-    OcrAgent, OcrOutput, OcrProvider, OcrTextRegion,
-    RawCvEntities, RawCvEntity, RawEntities, RawEntity,
-    RawOcrEntity,
+    RetryConfig, UnauthenticatedProvider,
+    CvAgent, CvDetection, CvEntities, CvEntity, CvProvider,
+    NerAgent, NerEntities, NerEntity,
+    OcrAgent, OcrEntity, OcrOutput, OcrProvider, OcrTextRegion,
 };

From 9675410d11ba98ec7809852e78a8fdf0a621e0fb Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Thu, 26 Feb 2026 01:15:34 +0100
Subject: [PATCH 18/24] refactor(rig): delete ProviderClient, move model into
 Provider, merge RetryConfig into BaseAgentConfig

Fold client construction directly into Agents::build(), eliminating the
ProviderClient intermediary. Move model_name from a separate parameter
into Provider variants so each provider carries its full identity.
Merge max_retries into BaseAgentConfig, removing the standalone
RetryConfig struct.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/nvisy-rig/src/agent/base/agent.rs    |   4 +-
 crates/nvisy-rig/src/agent/base/builder.rs  |  84 ++------------
 crates/nvisy-rig/src/agent/base/dispatch.rs | 103 ++++++++++++++++-
 crates/nvisy-rig/src/agent/base/mod.rs      |   5 +-
 crates/nvisy-rig/src/agent/base/provider.rs | 122 +++++---------------
 crates/nvisy-rig/src/agent/cv/mod.rs        |   5 +-
 crates/nvisy-rig/src/agent/mod.rs           |   2 +-
 crates/nvisy-rig/src/agent/ner/mod.rs       |   6 +-
 crates/nvisy-rig/src/agent/ocr/mod.rs       |   5 +-
 crates/nvisy-rig/src/lib.rs                 |   2 +-
 crates/nvisy-rig/src/prelude.rs             |   2 +-
 11 files changed, 157 insertions(+), 183 deletions(-)

diff --git a/crates/nvisy-rig/src/agent/base/agent.rs b/crates/nvisy-rig/src/agent/base/agent.rs
index 26e091e..8c250ed 100644
--- a/crates/nvisy-rig/src/agent/base/agent.rs
+++ b/crates/nvisy-rig/src/agent/base/agent.rs
@@ -29,8 +29,8 @@ pub(crate) struct BaseAgent {
 
 impl BaseAgent {
     /// Create a new builder.
-    pub fn builder(provider: &crate::agent::Provider, model_name: &str, config: BaseAgentConfig) -> BaseAgentBuilder {
-        BaseAgentBuilder::new(provider, model_name, config)
+    pub fn builder(provider: &crate::agent::Provider, config: BaseAgentConfig) -> BaseAgentBuilder {
+        BaseAgentBuilder::new(provider, config)
     }
 
     /// Unique identifier for this agent instance (UUIDv7).
diff --git a/crates/nvisy-rig/src/agent/base/builder.rs b/crates/nvisy-rig/src/agent/base/builder.rs
index 3a90d4b..e9820c8 100644
--- a/crates/nvisy-rig/src/agent/base/builder.rs
+++ b/crates/nvisy-rig/src/agent/base/builder.rs
@@ -1,10 +1,6 @@
 //! [`BaseAgentBuilder`]: builder for [`BaseAgent`] handling rig-core's
 //! typestate for optional tools.
 
-use rig::agent::AgentBuilder;
-use rig::client::CompletionClient;
-use rig::completion::CompletionModel;
-use rig::providers::gemini;
 use rig::tool::{Tool, ToolDyn};
 use uuid::Uuid;
 
@@ -12,38 +8,28 @@ use crate::backend::UsageTracker;
 use crate::error::Error;
 
 use super::dispatch::Agents;
-use super::provider::{Provider, ProviderClient, RetryConfig};
+use super::provider::Provider;
 use super::{BaseAgent, BaseAgentConfig};
 
-/// Builder for [`BaseAgent`] that takes a `&Provider` + model name.
+/// Builder for [`BaseAgent`] that takes a `&Provider` + config.
 pub(crate) struct BaseAgentBuilder {
     provider: Provider,
-    model_name: String,
     config: BaseAgentConfig,
-    retry: RetryConfig,
     preamble: Option<String>,
     tools: Vec<Box<dyn ToolDyn>>,
 }
 
 impl BaseAgentBuilder {
-    /// Create a new builder with the given provider, model name, and config.
-    pub fn new(provider: &Provider, model_name: &str, config: BaseAgentConfig) -> Self {
+    /// Create a new builder with the given provider and config.
+    pub fn new(provider: &Provider, config: BaseAgentConfig) -> Self {
         Self {
             provider: provider.clone(),
-            model_name: model_name.to_owned(),
             config,
-            retry: RetryConfig::default(),
             preamble: None,
             tools: Vec::new(),
         }
     }
 
-    /// Set retry configuration for transient HTTP errors.
-    pub fn retry(mut self, retry: RetryConfig) -> Self {
-        self.retry = retry;
-        self
-    }
-
     /// Set the system prompt (preamble).
     pub fn preamble(mut self, preamble: impl Into<String>) -> Self {
         self.preamble = Some(preamble.into());
@@ -60,33 +46,17 @@ impl BaseAgentBuilder {
     pub fn build(self) -> Result<BaseAgent, Error> {
         let Self {
             provider,
-            model_name,
             config,
-            retry,
             preamble,
             tools,
         } = self;
 
-        let preamble_ref = preamble.as_deref();
-        let client = ProviderClient::from_provider(&provider, &retry)?;
-
-        let inner = match client {
-            ProviderClient::OpenAi(c) => {
-                Agents::OpenAi(build_rig_agent(c.completion_model(&model_name), &config, preamble_ref, tools))
-            }
-            ProviderClient::Anthropic(c) => {
-                Agents::Anthropic(build_rig_agent(c.completion_model(&model_name), &config, preamble_ref, tools))
-            }
-            ProviderClient::Gemini(c) => {
-                // rig-core 0.31: Gemini's Capabilities impl doesn't propagate H,
-                // so CompletionClient is unavailable for non-default H.
-                let model = gemini::completion::CompletionModel::new(c, &model_name);
-                Agents::Gemini(build_rig_agent(model, &config, preamble_ref, tools))
-            }
-            ProviderClient::Ollama(c) => {
-                Agents::Ollama(build_rig_agent(c.completion_model(&model_name), &config, preamble_ref, tools))
-            }
-        };
+        let inner = Agents::build(
+            &provider,
+            &config,
+            preamble.as_deref(),
+            tools,
+        )?;
 
         Ok(BaseAgent {
             id: Uuid::now_v7(),
@@ -96,37 +66,3 @@ impl BaseAgentBuilder {
         })
     }
 }
-
-/// Build a concrete rig-core `Agent<M>`.
-///
-/// Generic over `M` but only called inside [`BaseAgentBuilder::build`] —
-/// the generic never escapes the module boundary.
-fn build_rig_agent<M: CompletionModel>(
-    model: M,
-    config: &BaseAgentConfig,
-    preamble: Option<&str>,
-    tools: Vec<Box<dyn ToolDyn>>,
-) -> rig::agent::Agent<M> {
-    if tools.is_empty() {
-        let mut builder = AgentBuilder::new(model)
-            .temperature(config.temperature)
-            .max_tokens(config.max_tokens);
-
-        if let Some(preamble) = preamble {
-            builder = builder.preamble(preamble);
-        }
-
-        builder.build()
-    } else {
-        let mut builder = AgentBuilder::new(model)
-            .temperature(config.temperature)
-            .max_tokens(config.max_tokens)
-            .tools(tools);
-
-        if let Some(preamble) = preamble {
-            builder = builder.preamble(preamble);
-        }
-
-        builder.build()
-    }
-}
diff --git a/crates/nvisy-rig/src/agent/base/dispatch.rs b/crates/nvisy-rig/src/agent/base/dispatch.rs
index 12016aa..d891b58 100644
--- a/crates/nvisy-rig/src/agent/base/dispatch.rs
+++ b/crates/nvisy-rig/src/agent/base/dispatch.rs
@@ -5,8 +5,16 @@
 //! generics to the public API.
 
 use reqwest_middleware::ClientWithMiddleware;
-use rig::agent::Agent;
+use rig::agent::{Agent, AgentBuilder};
+use rig::client::CompletionClient;
+use rig::completion::CompletionModel;
 use rig::providers::{anthropic, gemini, ollama, openai};
+use rig::tool::ToolDyn;
+
+use crate::error::Error;
+
+use super::BaseAgentConfig;
+use super::provider::{Provider, build_http_client};
 
 pub(crate) enum Agents {
     OpenAi(Agent<openai::completion::CompletionModel<ClientWithMiddleware>>),
@@ -15,6 +23,99 @@ pub(crate) enum Agents {
     Ollama(Agent<ollama::CompletionModel<ClientWithMiddleware>>),
 }
 
+impl Agents {
+    /// Build an [`Agents`] variant from provider connection params.
+    pub(crate) fn build(
+        provider: &Provider,
+        config: &BaseAgentConfig,
+        preamble: Option<&str>,
+        tools: Vec<Box<dyn ToolDyn>>,
+    ) -> Result<Self, Error> {
+        let http_client = build_http_client(config.max_retries);
+
+        match provider {
+            Provider::OpenAi(p) => {
+                let mut builder = openai::Client::<ClientWithMiddleware>::builder()
+                    .api_key(&p.api_key)
+                    .http_client(http_client);
+                if let Some(url) = &p.base_url {
+                    builder = builder.base_url(url);
+                }
+                let client = builder.build().map_err(|e| Error::Client(e.to_string()))?;
+                let model = client.completions_api().completion_model(&p.model);
+                Ok(Self::OpenAi(build_rig_agent(model, config, preamble, tools)))
+            }
+            Provider::Anthropic(p) => {
+                let mut builder = anthropic::Client::<ClientWithMiddleware>::builder()
+                    .api_key(&p.api_key)
+                    .http_client(http_client);
+                if let Some(url) = &p.base_url {
+                    builder = builder.base_url(url);
+                }
+                let client = builder.build().map_err(|e| Error::Client(e.to_string()))?;
+                let model = client.completion_model(&p.model);
+                Ok(Self::Anthropic(build_rig_agent(model, config, preamble, tools)))
+            }
+            Provider::Gemini(p) => {
+                let mut builder = gemini::Client::<ClientWithMiddleware>::builder()
+                    .api_key(&p.api_key)
+                    .http_client(http_client);
+                if let Some(url) = &p.base_url {
+                    builder = builder.base_url(url);
+                }
+                let client = builder.build().map_err(|e| Error::Client(e.to_string()))?;
+                // rig-core 0.31: Gemini's Capabilities impl doesn't propagate H,
+                // so CompletionClient is unavailable for non-default H.
+                let model = gemini::completion::CompletionModel::new(client, &p.model);
+                Ok(Self::Gemini(build_rig_agent(model, config, preamble, tools)))
+            }
+            Provider::Ollama(p) => {
+                let mut builder = ollama::Client::<ClientWithMiddleware>::builder()
+                    .api_key(rig::client::Nothing)
+                    .http_client(http_client);
+                if let Some(url) = &p.base_url {
+                    builder = builder.base_url(url);
+                }
+                let client = builder.build().map_err(|e| Error::Client(e.to_string()))?;
+                let model = client.completion_model(&p.model);
+                Ok(Self::Ollama(build_rig_agent(model, config, preamble, tools)))
+            }
+        }
+    }
+}
+
+/// Build a concrete rig-core `Agent<M>`.
+///
+/// Generic over `M` but only called inside [`Agents::build`]:
+/// the generic never escapes the module boundary.
+fn build_rig_agent<M: CompletionModel>(
+    model: M,
+    config: &BaseAgentConfig,
+    preamble: Option<&str>,
+    tools: Vec<Box<dyn ToolDyn>>,
+) -> Agent<M> {
+    // AgentBuilder uses typestate: `.tools()` changes the type parameter,
+    // so the with-tools and without-tools paths cannot share a binding.
+    if tools.is_empty() {
+        let mut b = AgentBuilder::new(model)
+            .temperature(config.temperature)
+            .max_tokens(config.max_tokens);
+        if let Some(p) = preamble {
+            b = b.preamble(p);
+        }
+        b.build()
+    } else {
+        let mut b = AgentBuilder::new(model)
+            .temperature(config.temperature)
+            .max_tokens(config.max_tokens)
+            .tools(tools);
+        if let Some(p) = preamble {
+            b = b.preamble(p);
+        }
+        b.build()
+    }
+}
+
 /// Dispatch a call to the concrete agent inside each variant.
 macro_rules! dispatch {
     ($inner:expr, |$agent:ident| $body:expr) => {
diff --git a/crates/nvisy-rig/src/agent/base/mod.rs b/crates/nvisy-rig/src/agent/base/mod.rs
index 5ee9dc1..d37fc9d 100644
--- a/crates/nvisy-rig/src/agent/base/mod.rs
+++ b/crates/nvisy-rig/src/agent/base/mod.rs
@@ -14,7 +14,7 @@ pub(crate) use agent::BaseAgent;
 pub(crate) use builder::BaseAgentBuilder;
 
 pub use context::ContextWindow;
-pub use provider::{AuthenticatedProvider, Provider, RetryConfig, UnauthenticatedProvider};
+pub use provider::{AuthenticatedProvider, Provider, UnauthenticatedProvider};
 
 /// Configuration for a [`BaseAgent`].
 #[derive(Debug, Clone)]
@@ -23,6 +23,8 @@ pub struct BaseAgentConfig {
     pub temperature: f64,
     /// Maximum output tokens (default: 4096).
     pub max_tokens: u64,
+    /// Maximum retries for transient HTTP errors (default: 3).
+    pub max_retries: u32,
     /// Optional context window for chunking large inputs.
     pub context_window: Option<ContextWindow>,
 }
@@ -32,6 +34,7 @@ impl Default for BaseAgentConfig {
         Self {
             temperature: 0.1,
             max_tokens: 4096,
+            max_retries: 3,
             context_window: None,
         }
     }
diff --git a/crates/nvisy-rig/src/agent/base/provider.rs b/crates/nvisy-rig/src/agent/base/provider.rs
index f34ec63..e199e44 100644
--- a/crates/nvisy-rig/src/agent/base/provider.rs
+++ b/crates/nvisy-rig/src/agent/base/provider.rs
@@ -6,46 +6,31 @@
 use reqwest_middleware::ClientBuilder;
 use reqwest_middleware::ClientWithMiddleware;
 use reqwest_retry::{RetryTransientMiddleware, policies::ExponentialBackoff};
-use rig::client::Nothing;
-use rig::providers::{anthropic, gemini, ollama, openai};
-
-use crate::error::Error;
-
-/// Retry configuration for transient HTTP errors.
-#[derive(Debug, Clone)]
-pub struct RetryConfig {
-    /// Maximum retries for transient HTTP errors.
-    pub max_retries: u32,
-}
-
-impl Default for RetryConfig {
-    fn default() -> Self {
-        Self { max_retries: 3 }
-    }
-}
 
 /// Provider that requires an API key (OpenAI, Anthropic, Gemini).
 #[derive(Clone)]
 pub struct AuthenticatedProvider {
     pub api_key: String,
+    pub model: String,
     pub base_url: Option<String>,
 }
 
 /// Provider that does not require an API key (Ollama).
 #[derive(Clone)]
 pub struct UnauthenticatedProvider {
+    pub model: String,
     pub base_url: Option<String>,
 }
 
 /// Supported LLM providers.
 ///
-/// Each variant holds only connection parameters. The actual rig client
-/// is constructed lazily when an agent or backend is built.
+/// Each variant holds connection parameters and the model name. The actual
+/// rig client is constructed lazily when an agent is built.
 ///
 /// # Example
 /// ```rust,ignore
-/// let provider = Provider::openai("sk-...");
-/// let agent = NerAgent::new(&provider, "gpt-4o", config);
+/// let provider = Provider::openai("sk-...", "gpt-4o");
+/// let agent = NerAgent::new(&provider, config);
 /// ```
 #[derive(Clone)]
 pub enum Provider {
@@ -60,112 +45,63 @@ pub enum Provider {
 }
 
 impl Provider {
-    /// Create an OpenAI provider from an API key.
-    pub fn openai(api_key: &str) -> Self {
+    /// Create an OpenAI provider.
+    pub fn openai(api_key: &str, model: &str) -> Self {
         Self::OpenAi(AuthenticatedProvider {
             api_key: api_key.to_owned(),
+            model: model.to_owned(),
             base_url: None,
         })
     }
 
-    /// Create an Anthropic provider from an API key.
-    pub fn anthropic(api_key: &str) -> Self {
+    /// Create an Anthropic provider.
+    pub fn anthropic(api_key: &str, model: &str) -> Self {
         Self::Anthropic(AuthenticatedProvider {
             api_key: api_key.to_owned(),
+            model: model.to_owned(),
             base_url: None,
         })
     }
 
-    /// Create a Google Gemini provider from an API key.
-    pub fn gemini(api_key: &str) -> Self {
+    /// Create a Google Gemini provider.
+    pub fn gemini(api_key: &str, model: &str) -> Self {
         Self::Gemini(AuthenticatedProvider {
             api_key: api_key.to_owned(),
+            model: model.to_owned(),
             base_url: None,
         })
     }
 
     /// Create an Ollama provider using the default local URL.
-    pub fn ollama() -> Self {
+    pub fn ollama(model: &str) -> Self {
         Self::Ollama(UnauthenticatedProvider {
+            model: model.to_owned(),
             base_url: None,
         })
     }
 
     /// Create an Ollama provider with a custom base URL.
-    pub fn ollama_with_url(url: &str) -> Self {
+    pub fn ollama_with_url(model: &str, url: &str) -> Self {
         Self::Ollama(UnauthenticatedProvider {
+            model: model.to_owned(),
             base_url: Some(url.to_owned()),
         })
     }
+
+    /// The model name for this provider.
+    pub fn model(&self) -> &str {
+        match self {
+            Self::OpenAi(p) | Self::Anthropic(p) | Self::Gemini(p) => &p.model,
+            Self::Ollama(p) => &p.model,
+        }
+    }
 }
 
 /// Build a `ClientWithMiddleware` with retry middleware.
-fn build_http_client(retry: &RetryConfig) -> ClientWithMiddleware {
+pub(crate) fn build_http_client(max_retries: u32) -> ClientWithMiddleware {
     let retry_policy = ExponentialBackoff::builder()
-        .build_with_max_retries(retry.max_retries);
+        .build_with_max_retries(max_retries);
     ClientBuilder::new(reqwest_middleware::reqwest::Client::new())
         .with(RetryTransientMiddleware::new_with_policy(retry_policy))
         .build()
 }
-
-/// Internal helper — builds a concrete rig client from connection params.
-pub(crate) enum ProviderClient {
-    OpenAi(openai::CompletionsClient<ClientWithMiddleware>),
-    Anthropic(anthropic::Client<ClientWithMiddleware>),
-    Gemini(gemini::Client<ClientWithMiddleware>),
-    Ollama(ollama::Client<ClientWithMiddleware>),
-}
-
-impl ProviderClient {
-    pub(crate) fn from_provider(provider: &Provider, retry: &RetryConfig) -> Result<Self, Error> {
-        let http_client = build_http_client(retry);
-
-        match provider {
-            Provider::OpenAi(p) => {
-                let mut builder = openai::Client::<ClientWithMiddleware>::builder()
-                    .api_key(&p.api_key)
-                    .http_client(http_client);
-                if let Some(url) = &p.base_url {
-                    builder = builder.base_url(url);
-                }
-                let client = builder
-                    .build()
-                    .map_err(|e| Error::Client(e.to_string()))?;
-                Ok(Self::OpenAi(client.completions_api()))
-            }
-            Provider::Anthropic(p) => {
-                let mut builder = anthropic::Client::<ClientWithMiddleware>::builder()
-                    .api_key(&p.api_key)
-                    .http_client(http_client);
-                if let Some(url) = &p.base_url {
-                    builder = builder.base_url(url);
-                }
-                Ok(Self::Anthropic(
-                    builder.build().map_err(|e| Error::Client(e.to_string()))?,
-                ))
-            }
-            Provider::Gemini(p) => {
-                let mut builder = gemini::Client::<ClientWithMiddleware>::builder()
-                    .api_key(&p.api_key)
-                    .http_client(http_client);
-                if let Some(url) = &p.base_url {
-                    builder = builder.base_url(url);
-                }
-                Ok(Self::Gemini(
-                    builder.build().map_err(|e| Error::Client(e.to_string()))?,
-                ))
-            }
-            Provider::Ollama(p) => {
-                let mut builder = ollama::Client::<ClientWithMiddleware>::builder()
-                    .api_key(Nothing)
-                    .http_client(http_client);
-                if let Some(url) = &p.base_url {
-                    builder = builder.base_url(url);
-                }
-                Ok(Self::Ollama(
-                    builder.build().map_err(|e| Error::Client(e.to_string()))?,
-                ))
-            }
-        }
-    }
-}
diff --git a/crates/nvisy-rig/src/agent/cv/mod.rs b/crates/nvisy-rig/src/agent/cv/mod.rs
index f7357e9..2e87887 100644
--- a/crates/nvisy-rig/src/agent/cv/mod.rs
+++ b/crates/nvisy-rig/src/agent/cv/mod.rs
@@ -66,14 +66,13 @@ pub struct CvAgent {
 }
 
 impl CvAgent {
-    /// Create a new CV agent with the given provider, model name, config, and CV provider.
+    /// Create a new CV agent.
     pub fn new(
         provider: &Provider,
-        model: &str,
         config: BaseAgentConfig,
         cv: impl CvProvider + 'static,
     ) -> Result<Self, Error> {
-        let base = BaseAgent::builder(provider, model, config)
+        let base = BaseAgent::builder(provider, config)
             .preamble(CV_SYSTEM_PROMPT)
             .tool(CvRigTool::new(cv))
             .build()?;
diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs
index 21ff4e4..d98e13e 100644
--- a/crates/nvisy-rig/src/agent/mod.rs
+++ b/crates/nvisy-rig/src/agent/mod.rs
@@ -9,7 +9,7 @@ mod ocr;
 mod ner;
 
 pub(crate) use base::BaseAgent;
-pub use base::{AuthenticatedProvider, BaseAgentConfig, ContextWindow, Provider, RetryConfig, UnauthenticatedProvider};
+pub use base::{AuthenticatedProvider, BaseAgentConfig, ContextWindow, Provider, UnauthenticatedProvider};
 
 pub use ner::{NerAgent, NerEntities, NerEntity};
 pub use ocr::{OcrAgent, OcrEntity, OcrOutput, OcrProvider, OcrTextRegion};
diff --git a/crates/nvisy-rig/src/agent/ner/mod.rs b/crates/nvisy-rig/src/agent/ner/mod.rs
index 6098e96..9b6b66e 100644
--- a/crates/nvisy-rig/src/agent/ner/mod.rs
+++ b/crates/nvisy-rig/src/agent/ner/mod.rs
@@ -31,9 +31,9 @@ pub struct NerAgent {
 }
 
 impl NerAgent {
-    /// Create a new NER agent with the given provider, model name, and config.
-    pub fn new(provider: &Provider, model: &str, config: BaseAgentConfig) -> Result<Self, Error> {
-        let base = BaseAgent::builder(provider, model, config)
+    /// Create a new NER agent.
+    pub fn new(provider: &Provider, config: BaseAgentConfig) -> Result<Self, Error> {
+        let base = BaseAgent::builder(provider, config)
             .preamble(NER_SYSTEM_PROMPT)
             .build()?;
         Ok(Self { base })
diff --git a/crates/nvisy-rig/src/agent/ocr/mod.rs b/crates/nvisy-rig/src/agent/ocr/mod.rs
index da78764..2f1b37c 100644
--- a/crates/nvisy-rig/src/agent/ocr/mod.rs
+++ b/crates/nvisy-rig/src/agent/ocr/mod.rs
@@ -69,14 +69,13 @@ pub struct OcrAgent {
 }
 
 impl OcrAgent {
-    /// Create a new OCR agent with the given provider, model name, config, and OCR provider.
+    /// Create a new OCR agent.
     pub fn new(
         provider: &Provider,
-        model: &str,
         config: BaseAgentConfig,
         ocr: impl OcrProvider + 'static,
     ) -> Result<Self, Error> {
-        let base = BaseAgent::builder(provider, model, config)
+        let base = BaseAgent::builder(provider, config)
             .preamble(OCR_SYSTEM_PROMPT)
             .tool(OcrRigTool::new(ocr))
             .build()?;
diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs
index f26e0dd..797d8a4 100644
--- a/crates/nvisy-rig/src/lib.rs
+++ b/crates/nvisy-rig/src/lib.rs
@@ -16,7 +16,7 @@ pub use error::Error;
 
 pub use agent::{
     AuthenticatedProvider, BaseAgentConfig, ContextWindow, Provider,
-    RetryConfig, UnauthenticatedProvider,
+    UnauthenticatedProvider,
     CvAgent, CvDetection, CvEntities, CvEntity, CvProvider,
     NerAgent, NerEntities, NerEntity,
     OcrAgent, OcrEntity, OcrOutput, OcrProvider, OcrTextRegion,
diff --git a/crates/nvisy-rig/src/prelude.rs b/crates/nvisy-rig/src/prelude.rs
index 4552dda..9243656 100644
--- a/crates/nvisy-rig/src/prelude.rs
+++ b/crates/nvisy-rig/src/prelude.rs
@@ -8,7 +8,7 @@ pub use crate::bridge::EntityParser;
 pub use crate::error::Error;
 pub use crate::agent::{
     AuthenticatedProvider, BaseAgentConfig, ContextWindow, Provider,
-    RetryConfig, UnauthenticatedProvider,
+    UnauthenticatedProvider,
     CvAgent, CvDetection, CvEntities, CvEntity, CvProvider,
     NerAgent, NerEntities, NerEntity,
     OcrAgent, OcrEntity, OcrOutput, OcrProvider, OcrTextRegion,

From 610810376cff4f42517c49813be1de9371439a01 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Thu, 26 Feb 2026 01:59:24 +0100
Subject: [PATCH 19/24] refactor(rig): move base agent into backend/, improve
 docs, remove trivial tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move agent/base/* files (BaseAgent, BaseAgentBuilder, BaseAgentConfig,
ContextWindow, Provider) into backend/ so the agent infrastructure lives
alongside usage tracking and detection types. Make the agent module
private (was pub(crate)) and re-export public types through backend/.

Improve module and type documentation across the crate. Remove 9 trivial
tests that only verified arithmetic or getters (23 → 14 tests).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/nvisy-rig/src/agent/base/builder.rs    |  68 --------
 crates/nvisy-rig/src/agent/base/dispatch.rs   | 131 ---------------
 crates/nvisy-rig/src/agent/base/mod.rs        |  41 -----
 crates/nvisy-rig/src/agent/cv/mod.rs          |   6 +-
 crates/nvisy-rig/src/agent/mod.rs             |  15 +-
 crates/nvisy-rig/src/agent/ner/mod.rs         |  10 +-
 crates/nvisy-rig/src/agent/ocr/mod.rs         |   6 +-
 .../src/{agent/base => backend}/agent.rs      | 108 +++++++++----
 crates/nvisy-rig/src/backend/builder.rs       | 150 ++++++++++++++++++
 .../src/{agent/base => backend}/context.rs    | 114 ++++---------
 crates/nvisy-rig/src/backend/metrics.rs       |  79 +--------
 crates/nvisy-rig/src/backend/mod.rs           |  24 ++-
 .../src/{agent/base => backend}/provider.rs   |   5 +-
 crates/nvisy-rig/src/bridge/mod.rs            |   7 +-
 crates/nvisy-rig/src/bridge/prompt.rs         |   6 +-
 crates/nvisy-rig/src/bridge/response.rs       |  74 +++------
 crates/nvisy-rig/src/error.rs                 |   8 +-
 crates/nvisy-rig/src/lib.rs                   |  10 +-
 crates/nvisy-rig/src/prelude.rs               |   5 +-
 19 files changed, 351 insertions(+), 516 deletions(-)
 delete mode 100644 crates/nvisy-rig/src/agent/base/builder.rs
 delete mode 100644 crates/nvisy-rig/src/agent/base/dispatch.rs
 delete mode 100644 crates/nvisy-rig/src/agent/base/mod.rs
 rename crates/nvisy-rig/src/{agent/base => backend}/agent.rs (60%)
 create mode 100644 crates/nvisy-rig/src/backend/builder.rs
 rename crates/nvisy-rig/src/{agent/base => backend}/context.rs (54%)
 rename crates/nvisy-rig/src/{agent/base => backend}/provider.rs (94%)

diff --git a/crates/nvisy-rig/src/agent/base/builder.rs b/crates/nvisy-rig/src/agent/base/builder.rs
deleted file mode 100644
index e9820c8..0000000
--- a/crates/nvisy-rig/src/agent/base/builder.rs
+++ /dev/null
@@ -1,68 +0,0 @@
-//! [`BaseAgentBuilder`]: builder for [`BaseAgent`] handling rig-core's
-//! typestate for optional tools.
-
-use rig::tool::{Tool, ToolDyn};
-use uuid::Uuid;
-
-use crate::backend::UsageTracker;
-use crate::error::Error;
-
-use super::dispatch::Agents;
-use super::provider::Provider;
-use super::{BaseAgent, BaseAgentConfig};
-
-/// Builder for [`BaseAgent`] that takes a `&Provider` + config.
-pub(crate) struct BaseAgentBuilder {
-    provider: Provider,
-    config: BaseAgentConfig,
-    preamble: Option<String>,
-    tools: Vec<Box<dyn ToolDyn>>,
-}
-
-impl BaseAgentBuilder {
-    /// Create a new builder with the given provider and config.
-    pub fn new(provider: &Provider, config: BaseAgentConfig) -> Self {
-        Self {
-            provider: provider.clone(),
-            config,
-            preamble: None,
-            tools: Vec::new(),
-        }
-    }
-
-    /// Set the system prompt (preamble).
-    pub fn preamble(mut self, preamble: impl Into<String>) -> Self {
-        self.preamble = Some(preamble.into());
-        self
-    }
-
-    /// Add a tool to the agent.
-    pub fn tool(mut self, tool: impl Tool + 'static) -> Self {
-        self.tools.push(Box::new(tool));
-        self
-    }
-
-    /// Build the [`BaseAgent`].
-    pub fn build(self) -> Result<BaseAgent, Error> {
-        let Self {
-            provider,
-            config,
-            preamble,
-            tools,
-        } = self;
-
-        let inner = Agents::build(
-            &provider,
-            &config,
-            preamble.as_deref(),
-            tools,
-        )?;
-
-        Ok(BaseAgent {
-            id: Uuid::now_v7(),
-            inner,
-            context_window: config.context_window,
-            tracker: UsageTracker::new(),
-        })
-    }
-}
diff --git a/crates/nvisy-rig/src/agent/base/dispatch.rs b/crates/nvisy-rig/src/agent/base/dispatch.rs
deleted file mode 100644
index d891b58..0000000
--- a/crates/nvisy-rig/src/agent/base/dispatch.rs
+++ /dev/null
@@ -1,131 +0,0 @@
-//! Provider-specific agent variants.
-//!
-//! [`Agents`] wraps a concrete `rig::agent::Agent<M>` for each
-//! supported provider, enabling dispatch without exposing `CompletionModel`
-//! generics to the public API.
-
-use reqwest_middleware::ClientWithMiddleware;
-use rig::agent::{Agent, AgentBuilder};
-use rig::client::CompletionClient;
-use rig::completion::CompletionModel;
-use rig::providers::{anthropic, gemini, ollama, openai};
-use rig::tool::ToolDyn;
-
-use crate::error::Error;
-
-use super::BaseAgentConfig;
-use super::provider::{Provider, build_http_client};
-
-pub(crate) enum Agents {
-    OpenAi(Agent<openai::completion::CompletionModel<ClientWithMiddleware>>),
-    Anthropic(Agent<anthropic::completion::CompletionModel<ClientWithMiddleware>>),
-    Gemini(Agent<gemini::completion::CompletionModel<ClientWithMiddleware>>),
-    Ollama(Agent<ollama::CompletionModel<ClientWithMiddleware>>),
-}
-
-impl Agents {
-    /// Build an [`Agents`] variant from provider connection params.
-    pub(crate) fn build(
-        provider: &Provider,
-        config: &BaseAgentConfig,
-        preamble: Option<&str>,
-        tools: Vec<Box<dyn ToolDyn>>,
-    ) -> Result<Self, Error> {
-        let http_client = build_http_client(config.max_retries);
-
-        match provider {
-            Provider::OpenAi(p) => {
-                let mut builder = openai::Client::<ClientWithMiddleware>::builder()
-                    .api_key(&p.api_key)
-                    .http_client(http_client);
-                if let Some(url) = &p.base_url {
-                    builder = builder.base_url(url);
-                }
-                let client = builder.build().map_err(|e| Error::Client(e.to_string()))?;
-                let model = client.completions_api().completion_model(&p.model);
-                Ok(Self::OpenAi(build_rig_agent(model, config, preamble, tools)))
-            }
-            Provider::Anthropic(p) => {
-                let mut builder = anthropic::Client::<ClientWithMiddleware>::builder()
-                    .api_key(&p.api_key)
-                    .http_client(http_client);
-                if let Some(url) = &p.base_url {
-                    builder = builder.base_url(url);
-                }
-                let client = builder.build().map_err(|e| Error::Client(e.to_string()))?;
-                let model = client.completion_model(&p.model);
-                Ok(Self::Anthropic(build_rig_agent(model, config, preamble, tools)))
-            }
-            Provider::Gemini(p) => {
-                let mut builder = gemini::Client::<ClientWithMiddleware>::builder()
-                    .api_key(&p.api_key)
-                    .http_client(http_client);
-                if let Some(url) = &p.base_url {
-                    builder = builder.base_url(url);
-                }
-                let client = builder.build().map_err(|e| Error::Client(e.to_string()))?;
-                // rig-core 0.31: Gemini's Capabilities impl doesn't propagate H,
-                // so CompletionClient is unavailable for non-default H.
-                let model = gemini::completion::CompletionModel::new(client, &p.model);
-                Ok(Self::Gemini(build_rig_agent(model, config, preamble, tools)))
-            }
-            Provider::Ollama(p) => {
-                let mut builder = ollama::Client::<ClientWithMiddleware>::builder()
-                    .api_key(rig::client::Nothing)
-                    .http_client(http_client);
-                if let Some(url) = &p.base_url {
-                    builder = builder.base_url(url);
-                }
-                let client = builder.build().map_err(|e| Error::Client(e.to_string()))?;
-                let model = client.completion_model(&p.model);
-                Ok(Self::Ollama(build_rig_agent(model, config, preamble, tools)))
-            }
-        }
-    }
-}
-
-/// Build a concrete rig-core `Agent<M>`.
-///
-/// Generic over `M` but only called inside [`Agents::build`]:
-/// the generic never escapes the module boundary.
-fn build_rig_agent<M: CompletionModel>(
-    model: M,
-    config: &BaseAgentConfig,
-    preamble: Option<&str>,
-    tools: Vec<Box<dyn ToolDyn>>,
-) -> Agent<M> {
-    // AgentBuilder uses typestate: `.tools()` changes the type parameter,
-    // so the with-tools and without-tools paths cannot share a binding.
-    if tools.is_empty() {
-        let mut b = AgentBuilder::new(model)
-            .temperature(config.temperature)
-            .max_tokens(config.max_tokens);
-        if let Some(p) = preamble {
-            b = b.preamble(p);
-        }
-        b.build()
-    } else {
-        let mut b = AgentBuilder::new(model)
-            .temperature(config.temperature)
-            .max_tokens(config.max_tokens)
-            .tools(tools);
-        if let Some(p) = preamble {
-            b = b.preamble(p);
-        }
-        b.build()
-    }
-}
-
-/// Dispatch a call to the concrete agent inside each variant.
-macro_rules! dispatch {
-    ($inner:expr, |$agent:ident| $body:expr) => {
-        match $inner {
-            Agents::OpenAi($agent) => $body,
-            Agents::Anthropic($agent) => $body,
-            Agents::Gemini($agent) => $body,
-            Agents::Ollama($agent) => $body,
-        }
-    };
-}
-
-pub(crate) use dispatch;
diff --git a/crates/nvisy-rig/src/agent/base/mod.rs b/crates/nvisy-rig/src/agent/base/mod.rs
deleted file mode 100644
index d37fc9d..0000000
--- a/crates/nvisy-rig/src/agent/base/mod.rs
+++ /dev/null
@@ -1,41 +0,0 @@
-//! Internal foundation agent and builder.
-//!
-//! [`BaseAgent`] wraps a provider-specific rig-core agent with usage tracking
-//! and structured-output fallback. [`BaseAgentBuilder`] takes a `&Provider` +
-//! model name and dispatches to the correct concrete agent type internally.
-
-mod agent;
-mod builder;
-pub(crate) mod context;
-pub(crate) mod dispatch;
-pub(crate) mod provider;
-
-pub(crate) use agent::BaseAgent;
-pub(crate) use builder::BaseAgentBuilder;
-
-pub use context::ContextWindow;
-pub use provider::{AuthenticatedProvider, Provider, UnauthenticatedProvider};
-
-/// Configuration for a [`BaseAgent`].
-#[derive(Debug, Clone)]
-pub struct BaseAgentConfig {
-    /// Sampling temperature (default: 0.1).
-    pub temperature: f64,
-    /// Maximum output tokens (default: 4096).
-    pub max_tokens: u64,
-    /// Maximum retries for transient HTTP errors (default: 3).
-    pub max_retries: u32,
-    /// Optional context window for chunking large inputs.
-    pub context_window: Option<ContextWindow>,
-}
-
-impl Default for BaseAgentConfig {
-    fn default() -> Self {
-        Self {
-            temperature: 0.1,
-            max_tokens: 4096,
-            max_retries: 3,
-            context_window: None,
-        }
-    }
-}
diff --git a/crates/nvisy-rig/src/agent/cv/mod.rs b/crates/nvisy-rig/src/agent/cv/mod.rs
index 2e87887..3acb3c0 100644
--- a/crates/nvisy-rig/src/agent/cv/mod.rs
+++ b/crates/nvisy-rig/src/agent/cv/mod.rs
@@ -1,6 +1,6 @@
 //! Computer vision agent for face, license plate, and signature detection.
 //!
-//! [`CvAgent`] wraps a [`BaseAgent`](super::BaseAgent) with a
+//! [`CvAgent`] wraps a [`BaseAgent`](crate::backend::BaseAgent) with a
 //! [`CvProvider`]-backed tool. It encodes an image as base64, prompts the
 //! VLM to call the CV tool, and returns classified entities with bounding
 //! boxes.
@@ -17,10 +17,8 @@ use base64::engine::general_purpose::STANDARD;
 use serde::Serialize;
 use uuid::Uuid;
 
-use crate::backend::{DetectionConfig, UsageTracker};
+use crate::backend::{BaseAgent, BaseAgentConfig, DetectionConfig, Provider, UsageTracker};
 use crate::error::Error;
-
-use super::{BaseAgent, BaseAgentConfig, Provider};
 use prompt::{CV_SYSTEM_PROMPT, CvPromptBuilder};
 use tool::CvRigTool;
 
diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs
index d98e13e..a3be9c6 100644
--- a/crates/nvisy-rig/src/agent/mod.rs
+++ b/crates/nvisy-rig/src/agent/mod.rs
@@ -1,16 +1,13 @@
-//! Agent system: base agent, specialized agents, and tool-provider traits.
+//! Specialized detection agents: NER (text), CV (vision), and OCR (image-to-text).
 //!
-//! All public types are re-exported here — consumer code should not reach
-//! into individual agent submodules.
+//! Each agent composes a [`BaseAgent`](crate::backend::BaseAgent) with
+//! domain-specific prompts and optional tools. Public types are re-exported
+//! from [`crate`] — consumer code should not reach into submodules.
 
-pub(crate) mod base;
 mod cv;
-mod ocr;
 mod ner;
+mod ocr;
 
-pub(crate) use base::BaseAgent;
-pub use base::{AuthenticatedProvider, BaseAgentConfig, ContextWindow, Provider, UnauthenticatedProvider};
-
+pub use cv::{CvAgent, CvDetection, CvEntities, CvEntity, CvProvider};
 pub use ner::{NerAgent, NerEntities, NerEntity};
 pub use ocr::{OcrAgent, OcrEntity, OcrOutput, OcrProvider, OcrTextRegion};
-pub use cv::{CvAgent, CvDetection, CvEntities, CvEntity, CvProvider};
diff --git a/crates/nvisy-rig/src/agent/ner/mod.rs b/crates/nvisy-rig/src/agent/ner/mod.rs
index 9b6b66e..b7c3391 100644
--- a/crates/nvisy-rig/src/agent/ner/mod.rs
+++ b/crates/nvisy-rig/src/agent/ner/mod.rs
@@ -1,8 +1,8 @@
 //! Named Entity Recognition (NER) agent for textual PII/entity detection.
 //!
-//! [`NerAgent`] wraps a [`BaseAgent`](super::BaseAgent) with NER-specific
-//! prompts. It is a pure LLM agent (no tools) that analyses text and
-//! returns structured entity detections with byte offsets.
+//! [`NerAgent`] wraps a [`BaseAgent`](crate::backend::BaseAgent) with
+//! NER-specific prompts. It is a pure LLM agent (no tools) that analyses
+//! text and returns structured entity detections with byte offsets.
 
 mod output;
 mod prompt;
@@ -11,10 +11,8 @@ pub use output::{NerEntities, NerEntity};
 
 use uuid::Uuid;
 
-use crate::backend::{DetectionConfig, UsageTracker};
+use crate::backend::{BaseAgent, BaseAgentConfig, DetectionConfig, Provider, UsageTracker};
 use crate::error::Error;
-
-use super::{BaseAgent, BaseAgentConfig, Provider};
 use prompt::{NER_SYSTEM_PROMPT, NerPromptBuilder};
 
 /// Agent for textual PII/entity detection using LLM-based NER.
diff --git a/crates/nvisy-rig/src/agent/ocr/mod.rs b/crates/nvisy-rig/src/agent/ocr/mod.rs
index 2f1b37c..dec1d1c 100644
--- a/crates/nvisy-rig/src/agent/ocr/mod.rs
+++ b/crates/nvisy-rig/src/agent/ocr/mod.rs
@@ -1,6 +1,6 @@
 //! OCR agent for vision-based text extraction and entity detection.
 //!
-//! [`OcrAgent`] wraps a [`BaseAgent`](super::BaseAgent) with an
+//! [`OcrAgent`] wraps a [`BaseAgent`](crate::backend::BaseAgent) with an
 //! [`OcrProvider`]-backed tool. It encodes an image as base64, prompts the
 //! VLM to call the OCR tool, and returns extracted text together with any
 //! entities found in it.
@@ -17,10 +17,8 @@ use base64::engine::general_purpose::STANDARD;
 use serde::Serialize;
 use uuid::Uuid;
 
-use crate::backend::{DetectionConfig, UsageTracker};
+use crate::backend::{BaseAgent, BaseAgentConfig, DetectionConfig, Provider, UsageTracker};
 use crate::error::Error;
-
-use super::{BaseAgent, BaseAgentConfig, Provider};
 use prompt::{OCR_SYSTEM_PROMPT, OcrPromptBuilder};
 use tool::OcrRigTool;
 
diff --git a/crates/nvisy-rig/src/agent/base/agent.rs b/crates/nvisy-rig/src/backend/agent.rs
similarity index 60%
rename from crates/nvisy-rig/src/agent/base/agent.rs
rename to crates/nvisy-rig/src/backend/agent.rs
index 8c250ed..f2efb5b 100644
--- a/crates/nvisy-rig/src/agent/base/agent.rs
+++ b/crates/nvisy-rig/src/backend/agent.rs
@@ -1,53 +1,101 @@
-//! [`BaseAgent`]: internal foundation agent wrapping rig-core agents.
+//! Foundation agent that wraps provider-specific rig-core agents.
 
+#[path = "builder.rs"]
+mod builder;
+
+pub(crate) use builder::BaseAgentBuilder;
+
+use reqwest_middleware::ClientWithMiddleware;
+use rig::agent::Agent;
 use rig::completion::{Completion, Prompt};
+use rig::providers::{anthropic, gemini, ollama, openai};
 use schemars::JsonSchema;
 use serde::de::DeserializeOwned;
 use serde::Serialize;
 use uuid::Uuid;
 
-use crate::backend::UsageTracker;
+use super::context::ContextWindow;
+use super::provider::Provider;
+use super::UsageTracker;
 use crate::bridge::ResponseParser;
 use crate::error::Error;
 
-use super::dispatch::{Agents, dispatch};
-use super::{BaseAgentBuilder, BaseAgentConfig};
-use super::context::ContextWindow;
+/// Sampling, retry, and context-window settings shared by all agents.
+#[derive(Debug, Clone)]
+pub struct BaseAgentConfig {
+    /// Sampling temperature (default: 0.1).
+    pub temperature: f64,
+    /// Maximum output tokens (default: 4096).
+    pub max_tokens: u64,
+    /// Maximum retries for transient HTTP errors (default: 3).
+    pub max_retries: u32,
+    /// Context window for chunking large inputs.
+    pub context_window: Option<ContextWindow>,
+}
 
-/// Internal foundation agent wrapping a provider-specific rig-core agent.
+impl Default for BaseAgentConfig {
+    fn default() -> Self {
+        Self {
+            temperature: 0.1,
+            max_tokens: 4096,
+            max_retries: 3,
+            context_window: None,
+        }
+    }
+}
+
+enum Agents {
+    OpenAi(Agent<openai::completion::CompletionModel<ClientWithMiddleware>>),
+    Anthropic(Agent<anthropic::completion::CompletionModel<ClientWithMiddleware>>),
+    Gemini(Agent<gemini::completion::CompletionModel<ClientWithMiddleware>>),
+    Ollama(Agent<ollama::CompletionModel<ClientWithMiddleware>>),
+}
+
+macro_rules! dispatch {
+    ($inner:expr, |$agent:ident| $body:expr) => {
+        match $inner {
+            Agents::OpenAi($agent) => $body,
+            Agents::Anthropic($agent) => $body,
+            Agents::Gemini($agent) => $body,
+            Agents::Ollama($agent) => $body,
+        }
+    };
+}
+
+/// Internal foundation agent wrapping a provider-specific rig-core agent
+/// with usage tracking and structured-output fallback.
 ///
-/// All prompt methods dispatch to the concrete agent variant held inside
-/// [`Agents`]. Specialized agents (e.g. `NerAgent`) compose this type.
+/// Specialized agents ([`NerAgent`], [`CvAgent`], [`OcrAgent`]) compose this
+/// type rather than inheriting from it.
 ///
-/// Not exported: specialized agents (e.g. `NerAgent`) compose this.
+/// [`NerAgent`]: crate::NerAgent
+/// [`CvAgent`]: crate::CvAgent
+/// [`OcrAgent`]: crate::OcrAgent
 pub(crate) struct BaseAgent {
-    pub(super) id: Uuid,
-    pub(super) inner: Agents,
-    pub(super) context_window: Option<ContextWindow>,
-    pub(super) tracker: UsageTracker,
+    id: Uuid,
+    inner: Agents,
+    context_window: Option<ContextWindow>,
+    tracker: UsageTracker,
 }
 
 impl BaseAgent {
-    /// Create a new builder.
-    pub fn builder(provider: &crate::agent::Provider, config: BaseAgentConfig) -> BaseAgentBuilder {
+    pub fn builder(provider: &Provider, config: BaseAgentConfig) -> BaseAgentBuilder {
         BaseAgentBuilder::new(provider, config)
     }
 
-    /// Unique identifier for this agent instance (UUIDv7).
     pub fn id(&self) -> Uuid {
         self.id
     }
 
-    /// Access the usage tracker.
     pub fn tracker(&self) -> &UsageTracker {
         &self.tracker
     }
 
-    /// Structured output prompt with usage tracking.
+    /// Structured-output prompt with usage tracking and JSON fallback.
     ///
-    /// Uses `agent.completion()` with an `output_schema` so the provider
-    /// constrains its response to valid JSON matching `T`. Falls back to
-    /// text-based parsing on deserialization failure.
+    /// Sends a completion request with an `output_schema` so the provider
+    /// constrains its response to valid JSON matching `T`. On deserialization
+    /// failure the raw text is re-parsed via [`ResponseParser`].
     #[tracing::instrument(skip_all, fields(agent_id = %self.id, mode = "structured"))]
     pub async fn prompt_structured<T>(&self, prompt: &str) -> Result<T, Error>
     where
@@ -85,7 +133,7 @@ impl BaseAgent {
         }
     }
 
-    /// Text completion through the agent, records usage.
+    /// Text completion with usage tracking.
     #[tracing::instrument(skip_all, fields(agent_id = %self.id, mode = "text"))]
     pub async fn prompt_text(&self, prompt: &str) -> Result<String, Error> {
         let (text, usage) = dispatch!(&self.inner, |agent| {
@@ -103,10 +151,7 @@ impl BaseAgent {
         Ok(text)
     }
 
-    /// Plain text completion through the agent (no usage tracking).
-    ///
-    /// Uses `Prompt::prompt` which handles tool calls automatically but
-    /// returns only the final text, not the raw response.
+    /// Plain text completion (no usage tracking).
     #[tracing::instrument(skip_all, fields(agent_id = %self.id, mode = "prompt"))]
     pub async fn prompt(&self, prompt: &str) -> Result<String, Error> {
         dispatch!(&self.inner, |agent| {
@@ -114,11 +159,10 @@ impl BaseAgent {
         })
     }
 
-    /// Summarize text via LLM to fit within the context window's input budget.
+    /// Summarize text to fit within the context window's input budget.
     ///
-    /// Returns the text unchanged if no context window is configured or the
-    /// text already fits. Otherwise sends a summarization prompt and returns
-    /// the condensed version.
+    /// Returns the text unchanged when no context window is configured or
+    /// the text already fits.
     #[tracing::instrument(skip_all, fields(agent_id = %self.id, mode = "compact"))]
     pub async fn prompt_compact(&self, text: &str) -> Result<String, Error> {
         let cw = match &self.context_window {
@@ -137,8 +181,8 @@ impl BaseAgent {
         self.prompt_text(&prompt).await
     }
 
-    /// Splits text via [`ContextWindow`], runs `prompt_structured` per chunk,
-    /// and flattens results.
+    /// Split text via [`ContextWindow`], run `prompt_structured` per chunk,
+    /// and flatten results.
     #[tracing::instrument(skip_all, fields(agent_id = %self.id, mode = "chunked"))]
     pub async fn prompt_chunked<T, F>(
         &self,
diff --git a/crates/nvisy-rig/src/backend/builder.rs b/crates/nvisy-rig/src/backend/builder.rs
new file mode 100644
index 0000000..0e2be32
--- /dev/null
+++ b/crates/nvisy-rig/src/backend/builder.rs
@@ -0,0 +1,150 @@
+//! Builder for [`BaseAgent`](super::BaseAgent).
+
+use reqwest_middleware::ClientWithMiddleware;
+use rig::agent::{Agent, AgentBuilder};
+use rig::client::CompletionClient;
+use rig::completion::CompletionModel;
+use rig::providers::{anthropic, gemini, ollama, openai};
+use rig::tool::{Tool, ToolDyn};
+use uuid::Uuid;
+
+use super::super::provider::{Provider, build_http_client};
+use super::super::UsageTracker;
+use super::{Agents, BaseAgent, BaseAgentConfig};
+use crate::error::Error;
+
+/// Builder for [`BaseAgent`].
+///
+/// Created via [`BaseAgent::builder`]. Collects a provider reference, config,
+/// optional preamble (system prompt), and optional tools, then constructs the
+/// concrete rig-core agent on [`build`](Self::build).
+pub(crate) struct BaseAgentBuilder {
+    provider: Provider,
+    config: BaseAgentConfig,
+    preamble: Option<String>,
+    tools: Vec<Box<dyn ToolDyn>>,
+}
+
+impl BaseAgentBuilder {
+    pub fn new(provider: &Provider, config: BaseAgentConfig) -> Self {
+        Self {
+            provider: provider.clone(),
+            config,
+            preamble: None,
+            tools: Vec::new(),
+        }
+    }
+
+    /// Set the system prompt (preamble).
+    pub fn preamble(mut self, preamble: impl Into<String>) -> Self {
+        self.preamble = Some(preamble.into());
+        self
+    }
+
+    /// Register a tool the agent can call during prompts.
+    pub fn tool(mut self, tool: impl Tool + 'static) -> Self {
+        self.tools.push(Box::new(tool));
+        self
+    }
+
+    /// Build the [`BaseAgent`], constructing the provider-specific rig client.
+    pub fn build(self) -> Result<BaseAgent, Error> {
+        let Self {
+            provider,
+            config,
+            preamble,
+            tools,
+        } = self;
+
+        let http_client = build_http_client(config.max_retries);
+        let preamble = preamble.as_deref();
+
+        let inner = match &provider {
+            Provider::OpenAi(p) => {
+                let mut b = openai::Client::<ClientWithMiddleware>::builder()
+                    .api_key(&p.api_key)
+                    .http_client(http_client);
+                if let Some(url) = &p.base_url {
+                    b = b.base_url(url);
+                }
+                let client = b.build().map_err(|e| Error::Client(e.to_string()))?;
+                let model = client.completions_api().completion_model(&p.model);
+                Agents::OpenAi(build_rig_agent(model, &config, preamble, tools))
+            }
+            Provider::Anthropic(p) => {
+                let mut b = anthropic::Client::<ClientWithMiddleware>::builder()
+                    .api_key(&p.api_key)
+                    .http_client(http_client);
+                if let Some(url) = &p.base_url {
+                    b = b.base_url(url);
+                }
+                let client = b.build().map_err(|e| Error::Client(e.to_string()))?;
+                let model = client.completion_model(&p.model);
+                Agents::Anthropic(build_rig_agent(model, &config, preamble, tools))
+            }
+            Provider::Gemini(p) => {
+                let mut b = gemini::Client::<ClientWithMiddleware>::builder()
+                    .api_key(&p.api_key)
+                    .http_client(http_client);
+                if let Some(url) = &p.base_url {
+                    b = b.base_url(url);
+                }
+                let client = b.build().map_err(|e| Error::Client(e.to_string()))?;
+                // rig-core 0.31: Gemini's Capabilities doesn't propagate H,
+                // so CompletionClient is unavailable for non-default H.
+                let model = gemini::completion::CompletionModel::new(client, &p.model);
+                Agents::Gemini(build_rig_agent(model, &config, preamble, tools))
+            }
+            Provider::Ollama(p) => {
+                let mut b = ollama::Client::<ClientWithMiddleware>::builder()
+                    .api_key(rig::client::Nothing)
+                    .http_client(http_client);
+                if let Some(url) = &p.base_url {
+                    b = b.base_url(url);
+                }
+                let client = b.build().map_err(|e| Error::Client(e.to_string()))?;
+                let model = client.completion_model(&p.model);
+                Agents::Ollama(build_rig_agent(model, &config, preamble, tools))
+            }
+        };
+
+        Ok(BaseAgent {
+            id: Uuid::now_v7(),
+            inner,
+            context_window: config.context_window,
+            tracker: UsageTracker::new(),
+        })
+    }
+}
+
+/// Build a concrete rig-core `Agent<M>`.
+///
+/// Generic over `M` but only called inside [`BaseAgentBuilder::build`] —
+/// the generic never escapes the module boundary.
+fn build_rig_agent<M: CompletionModel>(
+    model: M,
+    config: &BaseAgentConfig,
+    preamble: Option<&str>,
+    tools: Vec<Box<dyn ToolDyn>>,
+) -> Agent<M> {
+    // AgentBuilder uses typestate: `.tools()` changes the type parameter,
+    // so the with-tools and without-tools paths cannot share a binding.
+    if tools.is_empty() {
+        let mut b = AgentBuilder::new(model)
+            .temperature(config.temperature)
+            .max_tokens(config.max_tokens);
+        if let Some(p) = preamble {
+            b = b.preamble(p);
+        }
+        b.build()
+    } else {
+        let mut b = AgentBuilder::new(model)
+            .temperature(config.temperature)
+            .max_tokens(config.max_tokens)
+            .tools(tools);
+        if let Some(p) = preamble {
+            b = b.preamble(p);
+        }
+        b.build()
+    }
+}
diff --git a/crates/nvisy-rig/src/agent/base/context.rs b/crates/nvisy-rig/src/backend/context.rs
similarity index 54%
rename from crates/nvisy-rig/src/agent/base/context.rs
rename to crates/nvisy-rig/src/backend/context.rs
index b14ee18..d2f15d1 100644
--- a/crates/nvisy-rig/src/agent/base/context.rs
+++ b/crates/nvisy-rig/src/backend/context.rs
@@ -1,6 +1,14 @@
-//! Context window management for LLM token limits.
-
-/// Manages token budget estimation, splitting, and truncation.
+//! Token budget estimation, text splitting, and truncation.
+//!
+//! [`ContextWindow`] provides a simple heuristic (~4 chars/token) to decide
+//! whether text fits within a model's input budget and, when it doesn't,
+//! to split or truncate it at sentence boundaries while staying UTF-8 safe.
+
+/// Token budget manager for a single model context window.
+///
+/// All arithmetic is based on a rough **4 characters ≈ 1 token** heuristic.
+/// This is intentionally conservative: over-splitting is harmless while
+/// exceeding the real limit causes provider errors.
 #[derive(Debug, Clone)]
 pub struct ContextWindow {
     /// Maximum tokens the model supports.
@@ -10,7 +18,6 @@ pub struct ContextWindow {
 }
 
 impl ContextWindow {
-    /// Create a new context window with the given limits.
     pub fn new(max_tokens: usize, reserve_output: usize) -> Self {
         Self {
             max_tokens,
@@ -18,33 +25,31 @@ impl ContextWindow {
         }
     }
 
-    /// Estimate the number of tokens in a string (~4 chars per token).
+    /// Rough token count (~4 chars per token for English text).
     pub fn estimate_tokens(text: &str) -> usize {
-        // Rough heuristic: ~4 characters per token for English text.
         text.len().div_ceil(4)
     }
 
-    /// Available input token budget (max minus reserved output).
+    /// Input token budget (`max_tokens − reserve_output`).
     pub(crate) fn input_budget(&self) -> usize {
         self.max_tokens.saturating_sub(self.reserve_output)
     }
 
-    /// Check if the text fits within the available input budget.
+    /// Whether `text` fits within the input budget.
     pub fn fits(&self, text: &str) -> bool {
         Self::estimate_tokens(text) <= self.input_budget()
     }
 
     /// Split text into chunks that each fit within the input budget.
     ///
-    /// Splitting respects sentence boundaries (`. ` and `\n`) where possible
-    /// and is safe for multi-byte UTF-8 input.
+    /// Prefers sentence boundaries (`. ` and `\n`) and is safe for
+    /// multi-byte UTF-8.
     pub fn split_to_fit<'a>(&self, text: &'a str) -> Vec<&'a str> {
         if self.fits(text) {
             return vec![text];
         }
 
         let budget = self.input_budget();
-        // Approximate char budget from token budget.
         let char_budget = budget * 4;
 
         let mut chunks = Vec::new();
@@ -56,16 +61,12 @@ impl ContextWindow {
                 break;
             }
 
-            // Take up to char_budget bytes, snapped to a char boundary.
             let take = snap_to_boundary(remaining, remaining.len().min(char_budget));
             let candidate = &remaining[..take];
-
-            // Try to split at the last sentence boundary within the candidate.
             let split_pos = find_last_boundary(candidate).unwrap_or(take);
 
             let (chunk, rest) = remaining.split_at(split_pos);
             if chunk.is_empty() {
-                // No boundary found within budget; force-split at char_budget.
                 let forced = snap_to_boundary(remaining, remaining.len().min(char_budget));
                 let (chunk, rest) = remaining.split_at(forced);
                 chunks.push(chunk);
@@ -79,9 +80,9 @@ impl ContextWindow {
         chunks
     }
 
-    /// Truncate text to fit, keeping the end (most recent context).
+    /// Truncate text to fit, keeping the **tail** (most recent context).
     ///
-    /// Safe for multi-byte UTF-8 input.
+    /// Safe for multi-byte UTF-8.
     pub fn truncate_to_fit<'a>(&self, text: &'a str) -> &'a str {
         if self.fits(text) {
             return text;
@@ -95,7 +96,6 @@ impl ContextWindow {
         }
 
         let start = snap_to_boundary(text, text.len() - char_budget);
-        // Try to start at a boundary to avoid splitting mid-sentence.
         let adjusted = text[start..]
             .find(['\n', '.'])
             .map(|pos| start + pos + 1)
@@ -106,8 +106,7 @@ impl ContextWindow {
     }
 }
 
-/// Snap a byte position to the nearest valid UTF-8 char boundary,
-/// walking backward if necessary.
+/// Snap a byte position to the nearest valid UTF-8 char boundary (walks backward).
 fn snap_to_boundary(text: &str, pos: usize) -> usize {
     let mut p = pos.min(text.len());
     while p > 0 && !text.is_char_boundary(p) {
@@ -116,7 +115,7 @@ fn snap_to_boundary(text: &str, pos: usize) -> usize {
     p
 }
 
-/// Find the last sentence boundary (`. ` or `\n`) in the text.
+/// Last sentence boundary (`. ` or `\n`) in `text`.
 fn find_last_boundary(text: &str) -> Option<usize> {
     let last_newline = text.rfind('\n');
     let last_period = text.rfind(". ").map(|p| p + 2);
@@ -133,69 +132,37 @@ fn find_last_boundary(text: &str) -> Option<usize> {
 mod tests {
     use super::*;
 
-    #[test]
-    fn estimate_tokens_basic() {
-        assert_eq!(ContextWindow::estimate_tokens(""), 0);
-        assert_eq!(ContextWindow::estimate_tokens("abcd"), 1);
-        assert_eq!(ContextWindow::estimate_tokens("abcdefgh"), 2);
-    }
-
     #[test]
     fn fits_within_budget() {
         let cw = ContextWindow::new(100, 20);
-        // Budget = 80 tokens = ~320 chars
-        let short = "a".repeat(300);
-        assert!(cw.fits(&short));
-
-        let long = "a".repeat(400);
-        assert!(!cw.fits(&long));
-    }
-
-    #[test]
-    fn split_short_text() {
-        let cw = ContextWindow::new(100, 20);
-        let text = "hello world";
-        let chunks = cw.split_to_fit(text);
-        assert_eq!(chunks, vec!["hello world"]);
+        assert!(cw.fits(&"a".repeat(300)));   // ~75 tokens, budget 80
+        assert!(!cw.fits(&"a".repeat(400)));  // ~100 tokens, budget 80
     }
 
     #[test]
     fn truncate_keeps_end() {
-        let cw = ContextWindow::new(10, 2);
-        // Budget = 8 tokens = ~32 chars
+        let cw = ContextWindow::new(10, 2); // budget = 8 tokens ≈ 32 chars
         let text = "First sentence. Second sentence. Third sentence. Fourth sentence.";
         let truncated = cw.truncate_to_fit(text);
-        // Should keep the tail end
-        assert!(truncated.len() <= 32 + 10); // some slack for boundary adjustment
+        assert!(truncated.len() <= 42); // 32 + slack for boundary
         assert!(text.ends_with(truncated) || truncated.contains("sentence"));
     }
 
-    #[test]
-    fn snap_to_boundary_ascii() {
-        let text = "hello";
-        assert_eq!(super::snap_to_boundary(text, 3), 3);
-        assert_eq!(super::snap_to_boundary(text, 10), 5); // clamps to len
-    }
-
     #[test]
     fn snap_to_boundary_multibyte() {
-        // '🔥' is 4 bytes
-        let text = "a🔥b";
-        // byte 0: 'a', bytes 1-4: '🔥', byte 5: 'b'
-        assert_eq!(super::snap_to_boundary(text, 1), 1); // valid
-        assert_eq!(super::snap_to_boundary(text, 2), 1); // mid-emoji → snap back
-        assert_eq!(super::snap_to_boundary(text, 3), 1); // mid-emoji → snap back
-        assert_eq!(super::snap_to_boundary(text, 4), 1); // mid-emoji → snap back
-        assert_eq!(super::snap_to_boundary(text, 5), 5); // valid (after emoji)
+        let text = "a🔥b"; // byte 0: 'a', bytes 1–4: '🔥', byte 5: 'b'
+        assert_eq!(snap_to_boundary(text, 1), 1);
+        assert_eq!(snap_to_boundary(text, 2), 1); // mid-emoji → snap back
+        assert_eq!(snap_to_boundary(text, 3), 1);
+        assert_eq!(snap_to_boundary(text, 4), 1);
+        assert_eq!(snap_to_boundary(text, 5), 5);
     }
 
     #[test]
     fn split_to_fit_emoji() {
-        // Budget: 2 tokens = ~8 bytes. Each emoji is 4 bytes.
-        let cw = ContextWindow::new(4, 2);
-        let text = "🔥🔥🔥🔥"; // 16 bytes total
+        let cw = ContextWindow::new(4, 2); // budget = 2 tokens ≈ 8 bytes
+        let text = "🔥🔥🔥🔥"; // 16 bytes
         let chunks = cw.split_to_fit(text);
-        // Should not panic and every chunk must be valid UTF-8
         assert!(chunks.len() >= 2);
         for chunk in &chunks {
             assert!(!chunk.is_empty());
@@ -204,10 +171,8 @@ mod tests {
 
     #[test]
     fn split_to_fit_cjk() {
-        // CJK chars are 3 bytes each
-        let cw = ContextWindow::new(4, 2);
-        // Budget: 2 tokens = ~8 bytes → fits 2 CJK chars (6 bytes)
-        let text = "你好世界测试文字"; // 8 chars × 3 bytes = 24 bytes
+        let cw = ContextWindow::new(4, 2); // budget ≈ 8 bytes
+        let text = "你好世界测试文字"; // 24 bytes (3 bytes × 8 chars)
         let chunks = cw.split_to_fit(text);
         assert!(chunks.len() >= 2);
         for chunk in &chunks {
@@ -217,19 +182,10 @@ mod tests {
 
     #[test]
     fn truncate_to_fit_emoji() {
-        let cw = ContextWindow::new(4, 2);
-        // Budget: 2 tokens = ~8 bytes
+        let cw = ContextWindow::new(4, 2); // budget ≈ 8 bytes
         let text = "🔥🔥🔥🔥"; // 16 bytes
         let truncated = cw.truncate_to_fit(text);
-        // Should not panic, should be valid UTF-8, and should be the tail
         assert!(!truncated.is_empty());
         assert!(text.ends_with(truncated));
     }
-
-    #[test]
-    fn fits_respects_budget() {
-        let cw = ContextWindow::new(100, 20);
-        let short = "a".repeat(300); // ~75 tokens, budget is 80
-        assert!(cw.fits(&short));
-    }
 }
diff --git a/crates/nvisy-rig/src/backend/metrics.rs b/crates/nvisy-rig/src/backend/metrics.rs
index 6c1c1a8..49e71f7 100644
--- a/crates/nvisy-rig/src/backend/metrics.rs
+++ b/crates/nvisy-rig/src/backend/metrics.rs
@@ -1,36 +1,33 @@
-//! Token usage tracking and statistics.
+//! Cumulative token-usage tracking across LLM requests.
 
 use std::sync::Mutex;
 
 use rig::completion::Usage;
 
-/// Tracks cumulative token usage across LLM requests.
+/// Thread-safe accumulator for LLM token usage.
+///
+/// Each agent owns one tracker; callers snapshot it to inspect costs.
 pub struct UsageTracker {
     inner: Mutex<UsageStats>,
 }
 
-/// Snapshot of accumulated usage statistics.
+/// Point-in-time snapshot of accumulated usage counters.
 #[derive(Debug, Default, Clone)]
 pub struct UsageStats {
-    /// Total input (prompt) tokens consumed.
     pub total_input_tokens: u64,
-    /// Total output (completion) tokens consumed.
     pub total_output_tokens: u64,
-    /// Total number of LLM requests sent.
     pub total_requests: u64,
-    /// Total number of retries across all requests.
     pub total_retries: u64,
 }
 
 impl UsageTracker {
-    /// Create a new tracker with zeroed counters.
     pub fn new() -> Self {
         Self {
             inner: Mutex::new(UsageStats::default()),
         }
     }
 
-    /// Record usage from a single request, including retry count.
+    /// Record a single LLM request's token usage and retry count.
     pub fn record(&self, usage: &Usage, retries: u32) {
         let mut stats = self.inner.lock().expect("usage tracker lock poisoned");
         stats.total_input_tokens += usage.input_tokens;
@@ -39,7 +36,7 @@ impl UsageTracker {
         stats.total_retries += u64::from(retries);
     }
 
-    /// Take a snapshot of the current accumulated statistics.
+    /// Snapshot the current counters without resetting them.
     pub fn snapshot(&self) -> UsageStats {
         self.inner.lock().expect("usage tracker lock poisoned").clone()
     }
@@ -55,65 +52,3 @@ impl Default for UsageTracker {
         Self::new()
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn tracks_usage() {
-        let tracker = UsageTracker::new();
-
-        let usage = Usage {
-            input_tokens: 100,
-            output_tokens: 50,
-            total_tokens: 150,
-            cached_input_tokens: 0,
-        };
-        tracker.record(&usage, 2);
-
-        let snap = tracker.snapshot();
-        assert_eq!(snap.total_input_tokens, 100);
-        assert_eq!(snap.total_output_tokens, 50);
-        assert_eq!(snap.total_requests, 1);
-        assert_eq!(snap.total_retries, 2);
-    }
-
-    #[test]
-    fn accumulates_across_requests() {
-        let tracker = UsageTracker::new();
-
-        let usage = Usage {
-            input_tokens: 10,
-            output_tokens: 5,
-            total_tokens: 15,
-            cached_input_tokens: 0,
-        };
-        tracker.record(&usage, 0);
-        tracker.record(&usage, 1);
-
-        let snap = tracker.snapshot();
-        assert_eq!(snap.total_input_tokens, 20);
-        assert_eq!(snap.total_output_tokens, 10);
-        assert_eq!(snap.total_requests, 2);
-        assert_eq!(snap.total_retries, 1);
-    }
-
-    #[test]
-    fn reset_clears_stats() {
-        let tracker = UsageTracker::new();
-
-        let usage = Usage {
-            input_tokens: 100,
-            output_tokens: 50,
-            total_tokens: 150,
-            cached_input_tokens: 0,
-        };
-        tracker.record(&usage, 0);
-        tracker.reset();
-
-        let snap = tracker.snapshot();
-        assert_eq!(snap.total_input_tokens, 0);
-        assert_eq!(snap.total_requests, 0);
-    }
-}
diff --git a/crates/nvisy-rig/src/backend/mod.rs b/crates/nvisy-rig/src/backend/mod.rs
index 455c5b7..4626d2e 100644
--- a/crates/nvisy-rig/src/backend/mod.rs
+++ b/crates/nvisy-rig/src/backend/mod.rs
@@ -1,35 +1,43 @@
-//! LLM backend types and usage tracking.
+//! LLM backend: agent infrastructure, provider connections, and usage tracking.
 
+mod agent;
+pub(crate) mod context;
 mod metrics;
+pub(crate) mod provider;
 
+pub(crate) use agent::BaseAgent;
+pub use agent::BaseAgentConfig;
+pub use context::ContextWindow;
 pub use metrics::{UsageStats, UsageTracker};
-
-/// Fallback hint used in prompts when no specific entity types are requested.
-pub(crate) const ALL_TYPES_HINT: &str = "all entity types";
+pub use provider::{AuthenticatedProvider, Provider, UnauthenticatedProvider};
 
 use serde_json::Value;
 
 use nvisy_ontology::entity::EntityKind;
 
-/// Configuration passed to a detection backend.
+/// Fallback hint used in prompts when no specific entity types are requested.
+pub(crate) const ALL_TYPES_HINT: &str = "all entity types";
+
+/// Configuration for entity detection: which types to look for and at what
+/// confidence threshold.
 #[derive(Debug, Clone)]
 pub struct DetectionConfig {
     /// Entity kinds to detect (empty = all).
     pub entity_kinds: Vec<EntityKind>,
     /// Minimum confidence score to include a detection (0.0..=1.0).
     pub confidence_threshold: f64,
-    /// System prompt override (if empty, the backend uses its default).
+    /// System prompt override (if set, replaces the agent's default).
     pub system_prompt: Option<String>,
 }
 
-/// Request type for the detection service.
+/// Request payload for the detection service.
 #[derive(Debug, Clone)]
 pub struct DetectionRequest {
     pub text: String,
     pub config: DetectionConfig,
 }
 
-/// Response type for the detection service.
+/// Response from the detection service.
 #[derive(Debug, Clone)]
 pub struct DetectionResponse {
     pub entities: Vec<Value>,
diff --git a/crates/nvisy-rig/src/agent/base/provider.rs b/crates/nvisy-rig/src/backend/provider.rs
similarity index 94%
rename from crates/nvisy-rig/src/agent/base/provider.rs
rename to crates/nvisy-rig/src/backend/provider.rs
index e199e44..c2d8baf 100644
--- a/crates/nvisy-rig/src/agent/base/provider.rs
+++ b/crates/nvisy-rig/src/backend/provider.rs
@@ -1,7 +1,8 @@
 //! LLM provider connection parameters.
 //!
-//! [`Provider`] is a plain data enum carrying API keys and optional base
-//! URLs. Client construction is deferred until an agent or backend is built.
+//! [`Provider`] is a plain enum carrying API keys, model names, and optional
+//! base URLs. The actual rig-core client is constructed lazily when a
+//! [`BaseAgent`](super::BaseAgent) is built.
 
 use reqwest_middleware::ClientBuilder;
 use reqwest_middleware::ClientWithMiddleware;
diff --git a/crates/nvisy-rig/src/bridge/mod.rs b/crates/nvisy-rig/src/bridge/mod.rs
index 063d236..9257de3 100644
--- a/crates/nvisy-rig/src/bridge/mod.rs
+++ b/crates/nvisy-rig/src/bridge/mod.rs
@@ -1,6 +1,9 @@
-//! Bridge between rig-core and the detection service.
+//! Prompt construction and LLM response parsing.
 //!
-//! Prompt building and response parsing utilities.
+//! [`PromptBuilder`] assembles user prompts with entity-kind filters and
+//! confidence thresholds. [`ResponseParser`] extracts and deserializes
+//! text from rig-core completion responses. [`EntityParser`] converts raw
+//! JSON dicts into [`Entity`](nvisy_ontology::entity::Entity) values.
 
 mod prompt;
 mod response;
diff --git a/crates/nvisy-rig/src/bridge/prompt.rs b/crates/nvisy-rig/src/bridge/prompt.rs
index 159025e..c0b0f99 100644
--- a/crates/nvisy-rig/src/bridge/prompt.rs
+++ b/crates/nvisy-rig/src/bridge/prompt.rs
@@ -1,4 +1,8 @@
-//! Prompt construction for LLM entity detection.
+//! User-prompt construction for LLM entity detection.
+//!
+//! [`PromptBuilder`] formats the entity-kind list, confidence threshold,
+//! and input text into a single prompt string that agent-specific prompt
+//! builders can delegate to.
 
 use std::fmt::Display;
 
diff --git a/crates/nvisy-rig/src/bridge/response.rs b/crates/nvisy-rig/src/bridge/response.rs
index 275a4ca..2c7f7dc 100644
--- a/crates/nvisy-rig/src/bridge/response.rs
+++ b/crates/nvisy-rig/src/bridge/response.rs
@@ -1,4 +1,10 @@
-//! Response parsing for LLM completions.
+//! LLM completion response parsing.
+//!
+//! [`ResponseParser`] extracts text from rig-core completion responses
+//! and deserializes JSON (handling markdown fences and empty responses).
+//! [`EntityParser`] converts raw JSON dicts into [`Entity`] values.
+//!
+//! [`Entity`]: nvisy_ontology::entity::Entity
 
 use std::borrow::Cow;
 use std::str::FromStr;
@@ -13,15 +19,13 @@ use nvisy_ontology::location::{Location, TextLocation};
 
 use crate::error::Error;
 
-/// Extracted text from an LLM completion response.
-///
-/// Wraps the raw text content and provides parsing accessors.
+/// Thin wrapper around text extracted from an LLM completion response.
 pub struct ResponseParser<'a> {
     text: Cow<'a, str>,
 }
 
 impl<'a> ResponseParser<'a> {
-    /// Extract text content from a completion response.
+    /// Extract the text content blocks from a completion response.
     pub fn extract_text<T>(response: &CompletionResponse<T>) -> Result<Self, Error> {
         let texts: Vec<&str> = response
             .choice
@@ -48,24 +52,21 @@ impl<'a> ResponseParser<'a> {
         Self { text: text.into() }
     }
 
-    /// The raw text content.
     pub fn as_str(&self) -> &str {
         &self.text
     }
 
-    /// Consume the parser and return the owned text.
     pub fn into_string(self) -> String {
         self.text.into_owned()
     }
 
-    /// Parse the text as JSON into `T`.
+    /// Deserialize the text as JSON into `T`.
     ///
-    /// Strips markdown fences if present, then deserializes.
-    /// Empty / "no entities" / "none" responses return `T::default()`.
+    /// Strips markdown fences when present. Returns `T::default()` for
+    /// empty / `"none"` / `"no entities"` responses.
     pub fn parse_json<T: DeserializeOwned + Default>(&self) -> Result<T, Error> {
         let trimmed = self.text.trim();
 
-        // Handle empty or "no entities" responses.
         if trimmed.is_empty()
             || trimmed.eq_ignore_ascii_case("none")
             || trimmed.eq_ignore_ascii_case("no entities")
@@ -73,7 +74,6 @@ impl<'a> ResponseParser<'a> {
             return Ok(T::default());
         }
 
-        // Try to extract JSON from markdown fences.
         let json_str = extract_fenced_json(trimmed).unwrap_or(trimmed);
 
         serde_json::from_str::<T>(json_str).map_err(|e| {
@@ -85,15 +85,16 @@ impl<'a> ResponseParser<'a> {
     }
 }
 
-/// Parse raw JSON dicts from an LLM backend into [`Entity`] values.
+/// Convert raw JSON dicts (as returned by an LLM) into [`Entity`] values.
 ///
-/// Moved from the former `parse.rs` free function `parse_llm_entities`.
+/// Unknown `entity_type` values are silently dropped — LLMs occasionally
+/// hallucinate types that don't exist in the ontology.
 pub struct EntityParser;
 
 impl EntityParser {
-    /// Parse raw JSON dicts into [`Entity`] values.
+    /// Parse an array of JSON objects into entities.
     ///
-    /// Expected dict keys: `category`, `entity_type`, `value`, `confidence`,
+    /// Expected keys: `category`, `entity_type`, `value`, `confidence`,
     /// and optionally `start_offset` / `end_offset`.
     pub fn parse(raw: &[Value]) -> Result<Vec<Entity>, Error> {
         let mut entities = Vec::new();
@@ -178,9 +179,8 @@ impl EntityParser {
     }
 }
 
-/// Extract JSON content from markdown fences.
+/// Extract JSON content from markdown fences (```` ```json ... ``` ````).
 fn extract_fenced_json(text: &str) -> Option<&str> {
-    // Look for ```json ... ``` or ``` ... ```
     let start_marker = if let Some(pos) = text.find("```json") {
         pos + "```json".len()
     } else if let Some(pos) = text.find("```") {
@@ -190,9 +190,7 @@ fn extract_fenced_json(text: &str) -> Option<&str> {
     };
 
     let rest = &text[start_marker..];
-    // Skip optional newline after opening fence.
     let rest = rest.strip_prefix('\n').unwrap_or(rest);
-
     let end = rest.find("```")?;
     let content = rest[..end].trim();
 
@@ -203,12 +201,10 @@ fn extract_fenced_json(text: &str) -> Option<&str> {
     }
 }
 
-/// Truncate a string for display in error messages.
 fn truncate(s: &str, max_len: usize) -> &str {
     if s.len() <= max_len {
         s
     } else {
-        // Find a valid char boundary
         let mut end = max_len;
         while end > 0 && !s.is_char_boundary(end) {
             end -= 1;
@@ -225,38 +221,23 @@ mod tests {
     #[test]
     fn parse_json_raw_array() {
         let text = r#"[{"category":"pii","entity_type":"email_address","value":"a@b.com","confidence":0.9,"start_offset":0,"end_offset":7}]"#;
-        let parser = ResponseParser::from_text(text);
-        let result = parser.parse_json::<Vec<Value>>().unwrap();
+        let result = ResponseParser::from_text(text).parse_json::<Vec<Value>>().unwrap();
         assert_eq!(result.len(), 1);
     }
 
     #[test]
     fn parse_json_fenced() {
         let text = "```json\n[{\"category\":\"pii\",\"entity_type\":\"email_address\",\"value\":\"a@b.com\",\"confidence\":0.9}]\n```";
-        let parser = ResponseParser::from_text(text);
-        let result = parser.parse_json::<Vec<Value>>().unwrap();
+        let result = ResponseParser::from_text(text).parse_json::<Vec<Value>>().unwrap();
         assert_eq!(result.len(), 1);
     }
 
     #[test]
-    fn parse_json_single_object() {
-        let text = r#"{"category":"pii","entity_type":"email_address","value":"a@b.com","confidence":0.9}"#;
-        let parser = ResponseParser::from_text(text);
-        let result = parser.parse_json::<Value>().unwrap();
-        assert!(result.is_object());
-    }
-
-    #[test]
-    fn parse_json_empty() {
-        assert_eq!(ResponseParser::from_text("").parse_json::<Vec<Value>>().unwrap(), Vec::<Value>::new());
-        assert_eq!(ResponseParser::from_text("none").parse_json::<Vec<Value>>().unwrap(), Vec::<Value>::new());
-        assert_eq!(ResponseParser::from_text("No entities").parse_json::<Vec<Value>>().unwrap(), Vec::<Value>::new());
-    }
-
-    #[test]
-    fn as_str_returns_text() {
-        let parser = ResponseParser::from_text("hello world");
-        assert_eq!(parser.as_str(), "hello world");
+    fn parse_json_empty_and_sentinel() {
+        let empty: Vec<Value> = vec![];
+        assert_eq!(ResponseParser::from_text("").parse_json::<Vec<Value>>().unwrap(), empty);
+        assert_eq!(ResponseParser::from_text("none").parse_json::<Vec<Value>>().unwrap(), empty);
+        assert_eq!(ResponseParser::from_text("No entities").parse_json::<Vec<Value>>().unwrap(), empty);
     }
 
     #[test]
@@ -269,7 +250,6 @@ mod tests {
             "start_offset": 9,
             "end_offset": 15
         })];
-
         let entities = EntityParser::parse(&raw).unwrap();
         assert_eq!(entities.len(), 1);
         assert_eq!(entities[0].value, "SECRET");
@@ -284,8 +264,6 @@ mod tests {
             "value": "test",
             "confidence": 0.5
         })];
-
-        let entities = EntityParser::parse(&raw).unwrap();
-        assert!(entities.is_empty());
+        assert!(EntityParser::parse(&raw).unwrap().is_empty());
     }
 }
diff --git a/crates/nvisy-rig/src/error.rs b/crates/nvisy-rig/src/error.rs
index b46c970..60a689f 100644
--- a/crates/nvisy-rig/src/error.rs
+++ b/crates/nvisy-rig/src/error.rs
@@ -1,8 +1,12 @@
-//! Error types for the rig crate.
+//! Unified error type covering LLM provider, serialization, and tool failures.
 
 use rig::completion::{CompletionError, PromptError, StructuredOutputError};
 
-/// Errors produced by rig-core LLM interactions.
+/// Error type for all LLM interactions.
+///
+/// Variants map 1:1 to rig-core error categories plus crate-specific
+/// additions (`Validation`, `Client`, `Core`). Use [`is_retryable`](Self::is_retryable)
+/// to decide whether a failed request should be retried.
 #[derive(Debug, thiserror::Error)]
 pub enum Error {
     /// An HTTP / network error from the LLM provider.
diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs
index 797d8a4..b21a161 100644
--- a/crates/nvisy-rig/src/lib.rs
+++ b/crates/nvisy-rig/src/lib.rs
@@ -5,18 +5,20 @@
 pub mod backend;
 pub mod bridge;
 pub mod error;
-pub(crate) mod agent;
+mod agent;
 
 #[doc(hidden)]
 pub mod prelude;
 
-pub use backend::{DetectionConfig, DetectionRequest, DetectionResponse};
+pub use backend::{
+    AuthenticatedProvider, BaseAgentConfig, ContextWindow,
+    DetectionConfig, DetectionRequest, DetectionResponse,
+    Provider, UnauthenticatedProvider, UsageStats, UsageTracker,
+};
 pub use bridge::EntityParser;
 pub use error::Error;
 
 pub use agent::{
-    AuthenticatedProvider, BaseAgentConfig, ContextWindow, Provider,
-    UnauthenticatedProvider,
     CvAgent, CvDetection, CvEntities, CvEntity, CvProvider,
     NerAgent, NerEntities, NerEntity,
     OcrAgent, OcrEntity, OcrOutput, OcrProvider, OcrTextRegion,
diff --git a/crates/nvisy-rig/src/prelude.rs b/crates/nvisy-rig/src/prelude.rs
index 9243656..e527528 100644
--- a/crates/nvisy-rig/src/prelude.rs
+++ b/crates/nvisy-rig/src/prelude.rs
@@ -1,14 +1,13 @@
 //! Convenience re-exports.
 
 pub use crate::backend::{
+    AuthenticatedProvider, BaseAgentConfig, ContextWindow,
     DetectionConfig, DetectionRequest, DetectionResponse,
-    UsageStats, UsageTracker,
+    Provider, UnauthenticatedProvider, UsageStats, UsageTracker,
 };
 pub use crate::bridge::EntityParser;
 pub use crate::error::Error;
 pub use crate::agent::{
-    AuthenticatedProvider, BaseAgentConfig, ContextWindow, Provider,
-    UnauthenticatedProvider,
     CvAgent, CvDetection, CvEntities, CvEntity, CvProvider,
     NerAgent, NerEntities, NerEntity,
     OcrAgent, OcrEntity, OcrOutput, OcrProvider, OcrTextRegion,

From e476bf5d87201bfb0ea3b2e3d9312a97dd4ebdf0 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Thu, 26 Feb 2026 02:40:03 +0100
Subject: [PATCH 20/24] refactor(rig,identify): add HTTP tracing/timeout,
 reorganize modules, delete EntityParser/vision/ontology

- Add reqwest-tracing middleware and 120s timeout to HTTP client
- Move base agent from backend/agent/ to agent/base/ module
- Delete EntityParser from nvisy-rig, inline logic in nvisy-identify
- Delete vision/ and ontology/ modules from nvisy-identify
- Make all internal modules private, re-export from parent mod.rs
- Remove nvisy-paddle dependency from nvisy-identify

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 Cargo.lock                                    |  18 ++-
 Cargo.toml                                    |   1 +
 crates/nvisy-identify/Cargo.toml              |   1 -
 crates/nvisy-identify/src/lib.rs              |  13 +-
 crates/nvisy-identify/src/llm/detection.rs    |  90 ++++++++++-
 crates/nvisy-identify/src/ontology/mod.rs     |   7 -
 crates/nvisy-identify/src/vision/face.rs      | 126 ---------------
 crates/nvisy-identify/src/vision/mod.rs       |   9 --
 crates/nvisy-identify/src/vision/object.rs    | 153 ------------------
 crates/nvisy-identify/src/vision/ocr.rs       | 105 ------------
 crates/nvisy-rig/Cargo.toml                   |   3 +-
 .../src/{backend => agent/base}/agent.rs      |  23 ++-
 .../src/{backend => agent/base}/builder.rs    |   3 +-
 crates/nvisy-rig/src/agent/base/mod.rs        |   8 +
 crates/nvisy-rig/src/agent/cv/mod.rs          |   3 +-
 crates/nvisy-rig/src/agent/mod.rs             |  10 +-
 crates/nvisy-rig/src/agent/ner/mod.rs         |   3 +-
 crates/nvisy-rig/src/agent/ocr/mod.rs         |   3 +-
 crates/nvisy-rig/src/backend/mod.rs           |  10 +-
 crates/nvisy-rig/src/backend/provider.rs      |  14 +-
 crates/nvisy-rig/src/bridge/mod.rs            |   5 +-
 crates/nvisy-rig/src/bridge/response.rs       | 130 +--------------
 crates/nvisy-rig/src/lib.rs                   |   4 +-
 crates/nvisy-rig/src/prelude.rs               |   4 +-
 24 files changed, 170 insertions(+), 576 deletions(-)
 delete mode 100644 crates/nvisy-identify/src/ontology/mod.rs
 delete mode 100644 crates/nvisy-identify/src/vision/face.rs
 delete mode 100644 crates/nvisy-identify/src/vision/mod.rs
 delete mode 100644 crates/nvisy-identify/src/vision/object.rs
 delete mode 100644 crates/nvisy-identify/src/vision/ocr.rs
 rename crates/nvisy-rig/src/{backend => agent/base}/agent.rs (95%)
 rename crates/nvisy-rig/src/{backend => agent/base}/builder.rs (98%)
 create mode 100644 crates/nvisy-rig/src/agent/base/mod.rs

diff --git a/Cargo.lock b/Cargo.lock
index 8ac92b9..6a1a7eb 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2767,7 +2767,6 @@ dependencies = [
  "nvisy-codec",
  "nvisy-core",
  "nvisy-ontology",
- "nvisy-paddle",
  "nvisy-pattern",
  "nvisy-python",
  "nvisy-rig",
@@ -2845,6 +2844,7 @@ dependencies = [
  "nvisy-ontology",
  "reqwest-middleware",
  "reqwest-retry",
+ "reqwest-tracing",
  "rig-core",
  "schemars",
  "serde",
@@ -3717,6 +3717,22 @@ dependencies = [
  "wasmtimer",
 ]
 
+[[package]]
+name = "reqwest-tracing"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d5c1a1510677d43dce9e9c0c07fc5db8772c0e5a43e4f9cef75a11affa05a578"
+dependencies = [
+ "anyhow",
+ "async-trait",
+ "getrandom 0.2.17",
+ "http",
+ "matchit",
+ "reqwest",
+ "reqwest-middleware",
+ "tracing",
+]
+
 [[package]]
 name = "retry-policies"
 version = "0.5.1"
diff --git a/Cargo.toml b/Cargo.toml
index 25da0c3..9ff374b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -56,6 +56,7 @@ rig-core = { version = "0.31", features = [] }
 # HTTP middleware
 reqwest-middleware = { version = "0.5" }
 reqwest-retry = { version = "0.9" }
+reqwest-tracing = { version = "0.7" }
 
 # Async runtime
 tokio = { version = "1.0", features = [] }
diff --git a/crates/nvisy-identify/Cargo.toml b/crates/nvisy-identify/Cargo.toml
index 3019af4..f98dd64 100644
--- a/crates/nvisy-identify/Cargo.toml
+++ b/crates/nvisy-identify/Cargo.toml
@@ -33,7 +33,6 @@ nvisy-codec = { workspace = true, features = [] }
 nvisy-pattern = { workspace = true, features = [] }
 nvisy-python = { workspace = true, features = [] }
 nvisy-rig = { workspace = true, features = [] }
-nvisy-paddle = { workspace = true, features = [] }
 nvisy-asr = { workspace = true, features = [] }
 
 # (De)serialization
diff --git a/crates/nvisy-identify/src/lib.rs b/crates/nvisy-identify/src/lib.rs
index 5825c2a..c44deb0 100644
--- a/crates/nvisy-identify/src/lib.rs
+++ b/crates/nvisy-identify/src/lib.rs
@@ -2,11 +2,9 @@
 #![cfg_attr(docsrs, feature(doc_cfg))]
 #![doc = include_str!("../README.md")]
 
-mod ontology;
 mod layer;
 mod pattern;
 mod ner;
-mod vision;
 mod llm;
 mod audio;
 mod fusion;
@@ -14,8 +12,14 @@ mod policy;
 
 pub mod prelude;
 
-// --- Domain types ---
-pub use ontology::*;
+// --- Domain types (re-exported from nvisy-ontology) ---
+pub use nvisy_ontology::entity::{
+    Annotation, AnnotationKind, AnnotationLabel, AnnotationScope,
+    DetectionMethod, DetectionOutput, Entity, EntitySelector, ModelInfo, ModelKind,
+};
+pub use nvisy_ontology::location::{
+    AudioLocation, ImageLocation, Location, TabularLocation, TextLocation, VideoLocation,
+};
 
 // --- Layer traits ---
 pub use layer::*;
@@ -27,7 +31,6 @@ pub use ner::{NerBackend, NerConfig};
 pub use pattern::{PatternDetection, PatternDetectionParams};
 pub use ner::{NerDetection, NerDetectionParams};
 pub use ner::ImageNerDetection;
-pub use vision::{FaceBackend, FaceDetection, ObjectBackend, ObjectDetection, OcrDetection};
 pub use llm::{LlmBackend, LlmDetection, LlmDetectionParams, user_prompt as llm_user_prompt};
 pub use audio::TranscriptNerDetection;
 
diff --git a/crates/nvisy-identify/src/llm/detection.rs b/crates/nvisy-identify/src/llm/detection.rs
index 28ccbe0..003fd97 100644
--- a/crates/nvisy-identify/src/llm/detection.rs
+++ b/crates/nvisy-identify/src/llm/detection.rs
@@ -4,13 +4,16 @@
 //! a time, allowing the layer to accumulate prior text for contextual
 //! understanding across spans.
 
+use std::str::FromStr;
+
 use serde::Deserialize;
+use serde_json::Value;
 use tokio::sync::Mutex;
 
 use nvisy_codec::handler::{Span, TxtSpan};
-use nvisy_ontology::entity::EntityKind;
+use nvisy_ontology::entity::{DetectionMethod, EntityCategory, EntityKind};
 use nvisy_core::Error;
-use nvisy_rig::{DetectionConfig, DetectionRequest, DetectionResponse, EntityParser};
+use nvisy_rig::{DetectionConfig, DetectionRequest, DetectionResponse};
 
 use crate::{Entity, Location, ModelInfo, TextLocation};
 use crate::{SequentialContext, DetectionService};
@@ -123,7 +126,7 @@ impl<B: LlmBackend> DetectionService<TxtSpan, String> for LlmDetection<B> {
 
             // Filter entities to the current span and adjust offsets.
             let span_len = span.data.len();
-            for mut e in EntityParser::parse(&response.entities)? {
+            for mut e in parse_entities(&response.entities)? {
                 if let Some(Location::Text(ref loc)) = e.location {
                     if loc.end_offset <= context_len {
                         continue;
@@ -166,6 +169,87 @@ impl<B: LlmBackend> DetectionService<TxtSpan, String> for LlmDetection<B> {
     }
 }
 
+/// Parse raw JSON dicts (from an LLM detection response) into [`Entity`] values.
+///
+/// Unknown `entity_type` values are silently dropped.
+fn parse_entities(raw: &[Value]) -> Result<Vec<Entity>, Error> {
+    let mut entities = Vec::new();
+
+    for item in raw {
+        let obj = item
+            .as_object()
+            .ok_or_else(|| Error::validation("Expected JSON object in LLM results", "llm"))?;
+
+        let category_str = obj
+            .get("category")
+            .and_then(Value::as_str)
+            .ok_or_else(|| Error::validation("Missing 'category'", "llm"))?;
+
+        let category = match category_str {
+            "pii" => EntityCategory::Pii,
+            "phi" => EntityCategory::Phi,
+            "financial" => EntityCategory::Financial,
+            "credentials" => EntityCategory::Credentials,
+            other => EntityCategory::Custom(other.to_string()),
+        };
+
+        let entity_type_str = obj
+            .get("entity_type")
+            .and_then(Value::as_str)
+            .ok_or_else(|| Error::validation("Missing 'entity_type'", "llm"))?;
+
+        let entity_kind = match EntityKind::from_str(entity_type_str) {
+            Ok(ek) => ek,
+            Err(_) => {
+                tracing::warn!(
+                    entity_type = entity_type_str,
+                    "unknown entity type from LLM, dropping"
+                );
+                continue;
+            }
+        };
+
+        let value = obj
+            .get("value")
+            .and_then(Value::as_str)
+            .ok_or_else(|| Error::validation("Missing 'value'", "llm"))?;
+
+        let confidence = obj
+            .get("confidence")
+            .and_then(Value::as_f64)
+            .ok_or_else(|| Error::validation("Missing 'confidence'", "llm"))?;
+
+        let start_offset = obj
+            .get("start_offset")
+            .and_then(Value::as_u64)
+            .map(|v| v as usize)
+            .unwrap_or(0);
+
+        let end_offset = obj
+            .get("end_offset")
+            .and_then(Value::as_u64)
+            .map(|v| v as usize)
+            .unwrap_or(0);
+
+        let entity = Entity::new(
+            category,
+            entity_kind,
+            value,
+            DetectionMethod::ContextualNlp,
+            confidence,
+        )
+        .with_location(Location::Text(TextLocation {
+            start_offset,
+            end_offset,
+            ..Default::default()
+        }));
+
+        entities.push(entity);
+    }
+
+    Ok(entities)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/crates/nvisy-identify/src/ontology/mod.rs b/crates/nvisy-identify/src/ontology/mod.rs
deleted file mode 100644
index 8145ee5..0000000
--- a/crates/nvisy-identify/src/ontology/mod.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-pub use nvisy_ontology::entity::{
-    Annotation, AnnotationKind, AnnotationLabel, AnnotationScope,
-    DetectionMethod, DetectionOutput, Entity, EntitySelector, ModelInfo, ModelKind,
-};
-pub use nvisy_ontology::location::{
-    AudioLocation, ImageLocation, Location, TabularLocation, TextLocation, VideoLocation,
-};
diff --git a/crates/nvisy-identify/src/vision/face.rs b/crates/nvisy-identify/src/vision/face.rs
deleted file mode 100644
index 71209ad..0000000
--- a/crates/nvisy-identify/src/vision/face.rs
+++ /dev/null
@@ -1,126 +0,0 @@
-//! Face detection layer for images.
-//!
-//! Delegates to a [`FaceBackend`] to detect human faces in images,
-//! producing entities with [`ImageLocation`] bounding boxes.
-
-use serde_json::Value;
-
-use nvisy_codec::handler::{ImageData, Span};
-use nvisy_core::math::BoundingBox;
-use nvisy_core::Error;
-
-use nvisy_ontology::entity::{EntityCategory, EntityKind};
-
-use crate::{DetectionMethod, Entity, ImageLocation, Location};
-use crate::{ParallelContext, DetectionService};
-
-/// Backend trait for face detection providers.
-#[async_trait::async_trait]
-pub trait FaceBackend: Send + Sync + 'static {
-    /// Detect faces in an image, returning raw JSON dicts.
-    ///
-    /// Each dict should contain: `confidence`, `x`, `y`, `width`, `height`.
-    async fn detect_faces(
-        &self,
-        image_data: &[u8],
-        mime_type: &str,
-    ) -> Result<Vec<Value>, Error>;
-}
-
-/// Face detection layer — delegates to a [`FaceBackend`] at runtime.
-pub struct FaceDetection<B> {
-    backend: B,
-}
-
-impl<B: FaceBackend> FaceDetection<B> {
-    /// Create a new face detection layer with the given backend.
-    pub fn new(backend: B) -> Self {
-        Self { backend }
-    }
-}
-
-#[async_trait::async_trait]
-impl<B: FaceBackend> DetectionService<(), ImageData> for FaceDetection<B> {
-    type Context = ParallelContext;
-
-    async fn detect(
-        &self,
-        spans: Vec<Span<(), ImageData>>,
-    ) -> Result<Vec<Entity>, Error> {
-        let mut entities = Vec::new();
-
-        for span in &spans {
-            let png_bytes = span.data.encode_png()?;
-
-            let raw = self.backend.detect_faces(&png_bytes, "image/png").await?;
-
-            for item in &raw {
-                let obj = item.as_object().ok_or_else(|| {
-                    Error::python("Expected JSON object in face detection results".to_string())
-                })?;
-
-                let confidence = obj.get("confidence").and_then(Value::as_f64).unwrap_or(0.0);
-                let x = obj.get("x").and_then(Value::as_f64).unwrap_or(0.0);
-                let y = obj.get("y").and_then(Value::as_f64).unwrap_or(0.0);
-                let width = obj.get("width").and_then(Value::as_f64).unwrap_or(0.0);
-                let height = obj.get("height").and_then(Value::as_f64).unwrap_or(0.0);
-
-                let entity = Entity::new(
-                    EntityCategory::Biometric,
-                    EntityKind::Face,
-                    "face",
-                    DetectionMethod::FaceDetection,
-                    confidence,
-                )
-                .with_location(Location::Image(ImageLocation {
-                    bounding_box: BoundingBox { x, y, width, height },
-                    image_id: None,
-                    page_number: None,
-                }))
-                .with_parent(&span.source);
-
-                entities.push(entity);
-            }
-        }
-
-        Ok(entities)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use serde_json::json;
-
-    struct MockFaceBackend;
-
-    #[async_trait::async_trait]
-    impl FaceBackend for MockFaceBackend {
-        async fn detect_faces(&self, _: &[u8], _: &str) -> Result<Vec<Value>, Error> {
-            Ok(vec![json!({
-                "confidence": 0.98,
-                "x": 50.0,
-                "y": 30.0,
-                "width": 120.0,
-                "height": 150.0
-            })])
-        }
-    }
-
-    #[tokio::test]
-    async fn detect_face_produces_image_location() {
-        let layer = FaceDetection::new(MockFaceBackend);
-
-        let img = ImageData::new_rgb(200, 200);
-        let spans = vec![Span::new((), img)];
-
-        let entities = layer.detect(spans).await.unwrap();
-        assert_eq!(entities.len(), 1);
-        assert_eq!(entities[0].entity_kind, EntityKind::Face);
-        assert_eq!(entities[0].detection_method, DetectionMethod::FaceDetection);
-
-        let loc = entities[0].location.as_ref().unwrap().as_image().unwrap();
-        assert!((loc.bounding_box.x - 50.0).abs() < f64::EPSILON);
-        assert!((loc.bounding_box.width - 120.0).abs() < f64::EPSILON);
-    }
-}
diff --git a/crates/nvisy-identify/src/vision/mod.rs b/crates/nvisy-identify/src/vision/mod.rs
deleted file mode 100644
index af91b5d..0000000
--- a/crates/nvisy-identify/src/vision/mod.rs
+++ /dev/null
@@ -1,9 +0,0 @@
-//! Computer vision detection layers.
-
-pub mod face;
-pub mod object;
-pub mod ocr;
-
-pub use face::{FaceBackend, FaceDetection};
-pub use object::{ObjectBackend, ObjectDetection};
-pub use ocr::OcrDetection;
diff --git a/crates/nvisy-identify/src/vision/object.rs b/crates/nvisy-identify/src/vision/object.rs
deleted file mode 100644
index e21e41c..0000000
--- a/crates/nvisy-identify/src/vision/object.rs
+++ /dev/null
@@ -1,153 +0,0 @@
-//! Object detection layer for images.
-//!
-//! Delegates to an [`ObjectBackend`] to detect objects in images,
-//! producing entities with [`ImageLocation`] bounding boxes.
-
-use std::str::FromStr;
-
-use serde_json::Value;
-
-use nvisy_codec::handler::{ImageData, Span};
-use nvisy_ontology::entity::{EntityCategory, EntityKind};
-use nvisy_core::math::BoundingBox;
-use nvisy_core::Error;
-
-use crate::{DetectionMethod, Entity, ImageLocation, Location};
-use crate::{ParallelContext, DetectionService};
-
-/// Backend trait for object detection providers.
-#[async_trait::async_trait]
-pub trait ObjectBackend: Send + Sync + 'static {
-    /// Detect objects in an image, returning raw JSON dicts.
-    ///
-    /// Each dict should contain: `label`, `confidence`, `x`, `y`, `width`, `height`,
-    /// and optionally `category` and `entity_type`.
-    async fn detect_objects(
-        &self,
-        image_data: &[u8],
-        mime_type: &str,
-    ) -> Result<Vec<Value>, Error>;
-}
-
-/// Object detection layer — delegates to an [`ObjectBackend`] at runtime.
-pub struct ObjectDetection<B> {
-    backend: B,
-}
-
-impl<B: ObjectBackend> ObjectDetection<B> {
-    /// Create a new object detection layer with the given backend.
-    pub fn new(backend: B) -> Self {
-        Self { backend }
-    }
-}
-
-#[async_trait::async_trait]
-impl<B: ObjectBackend> DetectionService<(), ImageData> for ObjectDetection<B> {
-    type Context = ParallelContext;
-
-    async fn detect(
-        &self,
-        spans: Vec<Span<(), ImageData>>,
-    ) -> Result<Vec<Entity>, Error> {
-        let mut entities = Vec::new();
-
-        for span in &spans {
-            let png_bytes = span.data.encode_png()?;
-
-            let raw = self.backend.detect_objects(&png_bytes, "image/png").await?;
-
-            for item in &raw {
-                let obj = item.as_object().ok_or_else(|| {
-                    Error::python("Expected JSON object in object detection results".to_string())
-                })?;
-
-                let label = obj
-                    .get("label")
-                    .and_then(Value::as_str)
-                    .unwrap_or("unknown");
-
-                let entity_kind = obj
-                    .get("entity_type")
-                    .and_then(Value::as_str)
-                    .and_then(|s| EntityKind::from_str(s).ok())
-                    .unwrap_or(EntityKind::Logo);
-
-                let category = obj
-                    .get("category")
-                    .and_then(Value::as_str)
-                    .map(|s| match s {
-                        "pii" => EntityCategory::Pii,
-                        "phi" => EntityCategory::Phi,
-                        "biometric" => EntityCategory::Biometric,
-                        other => EntityCategory::Custom(other.to_string()),
-                    })
-                    .unwrap_or(EntityCategory::Pii);
-
-                let confidence = obj.get("confidence").and_then(Value::as_f64).unwrap_or(0.0);
-                let x = obj.get("x").and_then(Value::as_f64).unwrap_or(0.0);
-                let y = obj.get("y").and_then(Value::as_f64).unwrap_or(0.0);
-                let width = obj.get("width").and_then(Value::as_f64).unwrap_or(0.0);
-                let height = obj.get("height").and_then(Value::as_f64).unwrap_or(0.0);
-
-                let entity = Entity::new(
-                    category,
-                    entity_kind,
-                    label,
-                    DetectionMethod::ObjectDetection,
-                    confidence,
-                )
-                .with_location(Location::Image(ImageLocation {
-                    bounding_box: BoundingBox { x, y, width, height },
-                    image_id: None,
-                    page_number: None,
-                }))
-                .with_parent(&span.source);
-
-                entities.push(entity);
-            }
-        }
-
-        Ok(entities)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use serde_json::json;
-
-    struct MockObjectBackend;
-
-    #[async_trait::async_trait]
-    impl ObjectBackend for MockObjectBackend {
-        async fn detect_objects(&self, _: &[u8], _: &str) -> Result<Vec<Value>, Error> {
-            Ok(vec![json!({
-                "label": "license_plate",
-                "entity_type": "license_plate",
-                "category": "pii",
-                "confidence": 0.88,
-                "x": 100.0,
-                "y": 200.0,
-                "width": 80.0,
-                "height": 30.0
-            })])
-        }
-    }
-
-    #[tokio::test]
-    async fn detect_object_produces_image_location() {
-        let layer = ObjectDetection::new(MockObjectBackend);
-
-        let img = ImageData::new_rgb(400, 300);
-        let spans = vec![Span::new((), img)];
-
-        let entities = layer.detect(spans).await.unwrap();
-        assert_eq!(entities.len(), 1);
-        assert_eq!(entities[0].entity_kind, EntityKind::LicensePlate);
-        assert_eq!(entities[0].detection_method, DetectionMethod::ObjectDetection);
-        assert_eq!(entities[0].value, "license_plate");
-
-        let loc = entities[0].location.as_ref().unwrap().as_image().unwrap();
-        assert!((loc.bounding_box.x - 100.0).abs() < f64::EPSILON);
-    }
-}
diff --git a/crates/nvisy-identify/src/vision/ocr.rs b/crates/nvisy-identify/src/vision/ocr.rs
deleted file mode 100644
index ee55904..0000000
--- a/crates/nvisy-identify/src/vision/ocr.rs
+++ /dev/null
@@ -1,105 +0,0 @@
-//! OCR detection layer for images.
-//!
-//! Wraps an [`OcrBackend`] as a [`DetectionService`] that produces entities
-//! with [`ImageLocation`] bounding boxes from OCR text extraction.
-
-use nvisy_codec::handler::{ImageData, Span};
-use nvisy_core::Error;
-use nvisy_paddle::{OcrBackend, OcrConfig, parse_ocr_entities};
-
-use crate::Entity;
-use crate::{ParallelContext, DetectionService};
-
-/// OCR detection layer — delegates to an [`OcrBackend`] at runtime.
-///
-/// Encodes each image span to PNG and runs OCR to produce text entities
-/// with bounding-box locations.
-pub struct OcrDetection<B> {
-    backend: B,
-    config: OcrConfig,
-}
-
-impl<B: OcrBackend> OcrDetection<B> {
-    /// Create a new OCR detection layer with the given backend and config.
-    pub fn new(backend: B, config: OcrConfig) -> Self {
-        Self { backend, config }
-    }
-}
-
-#[async_trait::async_trait]
-impl<B: OcrBackend> DetectionService<(), ImageData> for OcrDetection<B> {
-    type Context = ParallelContext;
-
-    async fn detect(
-        &self,
-        spans: Vec<Span<(), ImageData>>,
-    ) -> Result<Vec<Entity>, Error> {
-        let mut entities = Vec::new();
-
-        for span in &spans {
-            let png_bytes = span.data.encode_png()?;
-
-            let raw = self
-                .backend
-                .detect_ocr(&png_bytes, "image/png", &self.config)
-                .await?;
-
-            for entity in parse_ocr_entities(&raw)? {
-                entities.push(entity.with_parent(&span.source));
-            }
-        }
-
-        Ok(entities)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use nvisy_ontology::entity::{DetectionMethod, EntityKind};
-    use serde_json::{json, Value};
-
-    struct MockOcrBackend;
-
-    #[async_trait::async_trait]
-    impl OcrBackend for MockOcrBackend {
-        async fn detect_ocr(
-            &self,
-            _image_data: &[u8],
-            _mime_type: &str,
-            _config: &OcrConfig,
-        ) -> Result<Vec<Value>, Error> {
-            Ok(vec![json!({
-                "text": "John Doe",
-                "x": 10.0,
-                "y": 20.0,
-                "width": 100.0,
-                "height": 30.0,
-                "confidence": 0.88
-            })])
-        }
-    }
-
-    #[tokio::test]
-    async fn detect_ocr_produces_image_location() {
-        let config = OcrConfig {
-            language: "eng".into(),
-            engine: "tesseract".into(),
-            confidence_threshold: 0.5,
-        };
-        let layer = OcrDetection::new(MockOcrBackend, config);
-
-        let img = ImageData::new_rgb(200, 100);
-        let spans = vec![Span::new((), img)];
-
-        let entities = layer.detect(spans).await.unwrap();
-        assert_eq!(entities.len(), 1);
-        assert_eq!(entities[0].value, "John Doe");
-        assert_eq!(entities[0].entity_kind, EntityKind::Handwriting);
-        assert_eq!(entities[0].detection_method, DetectionMethod::Ocr);
-
-        let loc = entities[0].location.as_ref().unwrap().as_image().unwrap();
-        assert!((loc.bounding_box.x - 10.0).abs() < f64::EPSILON);
-        assert!((loc.bounding_box.width - 100.0).abs() < f64::EPSILON);
-    }
-}
diff --git a/crates/nvisy-rig/Cargo.toml b/crates/nvisy-rig/Cargo.toml
index 5f1b3f1..7ad6990 100644
--- a/crates/nvisy-rig/Cargo.toml
+++ b/crates/nvisy-rig/Cargo.toml
@@ -31,9 +31,10 @@ rig-core = { workspace = true, features = ["derive", "reqwest-middleware"] }
 async-trait = { workspace = true, features = [] }
 tokio = { workspace = true, features = ["time"] }
 
-# HTTP middleware (retry)
+# HTTP middleware (retry, tracing)
 reqwest-middleware = { workspace = true }
 reqwest-retry = { workspace = true }
+reqwest-tracing = { workspace = true }
 
 # Encoding
 base64 = { workspace = true, features = [] }
diff --git a/crates/nvisy-rig/src/backend/agent.rs b/crates/nvisy-rig/src/agent/base/agent.rs
similarity index 95%
rename from crates/nvisy-rig/src/backend/agent.rs
rename to crates/nvisy-rig/src/agent/base/agent.rs
index f2efb5b..a61237e 100644
--- a/crates/nvisy-rig/src/backend/agent.rs
+++ b/crates/nvisy-rig/src/agent/base/agent.rs
@@ -1,10 +1,5 @@
 //! Foundation agent that wraps provider-specific rig-core agents.
 
-#[path = "builder.rs"]
-mod builder;
-
-pub(crate) use builder::BaseAgentBuilder;
-
 use reqwest_middleware::ClientWithMiddleware;
 use rig::agent::Agent;
 use rig::completion::{Completion, Prompt};
@@ -14,12 +9,12 @@ use serde::de::DeserializeOwned;
 use serde::Serialize;
 use uuid::Uuid;
 
-use super::context::ContextWindow;
-use super::provider::Provider;
-use super::UsageTracker;
+use crate::backend::{ContextWindow, Provider, UsageTracker};
 use crate::bridge::ResponseParser;
 use crate::error::Error;
 
+use super::BaseAgentBuilder;
+
 /// Sampling, retry, and context-window settings shared by all agents.
 #[derive(Debug, Clone)]
 pub struct BaseAgentConfig {
@@ -44,7 +39,7 @@ impl Default for BaseAgentConfig {
     }
 }
 
-enum Agents {
+pub(crate) enum Agents {
     OpenAi(Agent<openai::completion::CompletionModel<ClientWithMiddleware>>),
     Anthropic(Agent<anthropic::completion::CompletionModel<ClientWithMiddleware>>),
     Gemini(Agent<gemini::completion::CompletionModel<ClientWithMiddleware>>),
@@ -71,13 +66,15 @@ macro_rules! dispatch {
 /// [`NerAgent`]: crate::NerAgent
 /// [`CvAgent`]: crate::CvAgent
 /// [`OcrAgent`]: crate::OcrAgent
+#[allow(dead_code)]
 pub(crate) struct BaseAgent {
-    id: Uuid,
-    inner: Agents,
-    context_window: Option<ContextWindow>,
-    tracker: UsageTracker,
+    pub(super) id: Uuid,
+    pub(super) inner: Agents,
+    pub(super) context_window: Option<ContextWindow>,
+    pub(super) tracker: UsageTracker,
 }
 
+#[allow(dead_code)]
 impl BaseAgent {
     pub fn builder(provider: &Provider, config: BaseAgentConfig) -> BaseAgentBuilder {
         BaseAgentBuilder::new(provider, config)
diff --git a/crates/nvisy-rig/src/backend/builder.rs b/crates/nvisy-rig/src/agent/base/builder.rs
similarity index 98%
rename from crates/nvisy-rig/src/backend/builder.rs
rename to crates/nvisy-rig/src/agent/base/builder.rs
index 0e2be32..7046c64 100644
--- a/crates/nvisy-rig/src/backend/builder.rs
+++ b/crates/nvisy-rig/src/agent/base/builder.rs
@@ -8,8 +8,7 @@ use rig::providers::{anthropic, gemini, ollama, openai};
 use rig::tool::{Tool, ToolDyn};
 use uuid::Uuid;
 
-use super::super::provider::{Provider, build_http_client};
-use super::super::UsageTracker;
+use crate::backend::{Provider, UsageTracker, build_http_client};
 use super::{Agents, BaseAgent, BaseAgentConfig};
 use crate::error::Error;
 
diff --git a/crates/nvisy-rig/src/agent/base/mod.rs b/crates/nvisy-rig/src/agent/base/mod.rs
new file mode 100644
index 0000000..914639e
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/base/mod.rs
@@ -0,0 +1,8 @@
+//! Foundation agent and builder shared by all specialized agents.
+
+mod agent;
+mod builder;
+
+pub use agent::BaseAgentConfig;
+pub(crate) use agent::{Agents, BaseAgent};
+pub(crate) use builder::BaseAgentBuilder;
diff --git a/crates/nvisy-rig/src/agent/cv/mod.rs b/crates/nvisy-rig/src/agent/cv/mod.rs
index 3acb3c0..6f58326 100644
--- a/crates/nvisy-rig/src/agent/cv/mod.rs
+++ b/crates/nvisy-rig/src/agent/cv/mod.rs
@@ -17,7 +17,8 @@ use base64::engine::general_purpose::STANDARD;
 use serde::Serialize;
 use uuid::Uuid;
 
-use crate::backend::{BaseAgent, BaseAgentConfig, DetectionConfig, Provider, UsageTracker};
+use crate::backend::{DetectionConfig, Provider, UsageTracker};
+use super::{BaseAgent, BaseAgentConfig};
 use crate::error::Error;
 use prompt::{CV_SYSTEM_PROMPT, CvPromptBuilder};
 use tool::CvRigTool;
diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs
index a3be9c6..a8ee93d 100644
--- a/crates/nvisy-rig/src/agent/mod.rs
+++ b/crates/nvisy-rig/src/agent/mod.rs
@@ -1,13 +1,17 @@
 //! Specialized detection agents: NER (text), CV (vision), and OCR (image-to-text).
 //!
-//! Each agent composes a [`BaseAgent`](crate::backend::BaseAgent) with
-//! domain-specific prompts and optional tools. Public types are re-exported
-//! from [`crate`] — consumer code should not reach into submodules.
+//! Each agent composes a [`BaseAgent`](base::BaseAgent) with domain-specific
+//! prompts and optional tools. Public types are re-exported from [`crate`] —
+//! consumer code should not reach into submodules.
 
+mod base;
 mod cv;
 mod ner;
 mod ocr;
 
+pub use base::BaseAgentConfig;
+pub(crate) use base::BaseAgent;
+
 pub use cv::{CvAgent, CvDetection, CvEntities, CvEntity, CvProvider};
 pub use ner::{NerAgent, NerEntities, NerEntity};
 pub use ocr::{OcrAgent, OcrEntity, OcrOutput, OcrProvider, OcrTextRegion};
diff --git a/crates/nvisy-rig/src/agent/ner/mod.rs b/crates/nvisy-rig/src/agent/ner/mod.rs
index b7c3391..bd11060 100644
--- a/crates/nvisy-rig/src/agent/ner/mod.rs
+++ b/crates/nvisy-rig/src/agent/ner/mod.rs
@@ -11,7 +11,8 @@ pub use output::{NerEntities, NerEntity};
 
 use uuid::Uuid;
 
-use crate::backend::{BaseAgent, BaseAgentConfig, DetectionConfig, Provider, UsageTracker};
+use crate::backend::{DetectionConfig, Provider, UsageTracker};
+use super::{BaseAgent, BaseAgentConfig};
 use crate::error::Error;
 use prompt::{NER_SYSTEM_PROMPT, NerPromptBuilder};
 
diff --git a/crates/nvisy-rig/src/agent/ocr/mod.rs b/crates/nvisy-rig/src/agent/ocr/mod.rs
index dec1d1c..29e1a25 100644
--- a/crates/nvisy-rig/src/agent/ocr/mod.rs
+++ b/crates/nvisy-rig/src/agent/ocr/mod.rs
@@ -17,7 +17,8 @@ use base64::engine::general_purpose::STANDARD;
 use serde::Serialize;
 use uuid::Uuid;
 
-use crate::backend::{BaseAgent, BaseAgentConfig, DetectionConfig, Provider, UsageTracker};
+use crate::backend::{DetectionConfig, Provider, UsageTracker};
+use super::{BaseAgent, BaseAgentConfig};
 use crate::error::Error;
 use prompt::{OCR_SYSTEM_PROMPT, OcrPromptBuilder};
 use tool::OcrRigTool;
diff --git a/crates/nvisy-rig/src/backend/mod.rs b/crates/nvisy-rig/src/backend/mod.rs
index 4626d2e..07660c0 100644
--- a/crates/nvisy-rig/src/backend/mod.rs
+++ b/crates/nvisy-rig/src/backend/mod.rs
@@ -1,15 +1,13 @@
-//! LLM backend: agent infrastructure, provider connections, and usage tracking.
+//! LLM backend: provider connections, context windowing, and usage tracking.
 
-mod agent;
-pub(crate) mod context;
+mod context;
 mod metrics;
-pub(crate) mod provider;
+mod provider;
 
-pub(crate) use agent::BaseAgent;
-pub use agent::BaseAgentConfig;
 pub use context::ContextWindow;
 pub use metrics::{UsageStats, UsageTracker};
 pub use provider::{AuthenticatedProvider, Provider, UnauthenticatedProvider};
+pub(crate) use provider::build_http_client;
 
 use serde_json::Value;
 
diff --git a/crates/nvisy-rig/src/backend/provider.rs b/crates/nvisy-rig/src/backend/provider.rs
index c2d8baf..be98030 100644
--- a/crates/nvisy-rig/src/backend/provider.rs
+++ b/crates/nvisy-rig/src/backend/provider.rs
@@ -4,9 +4,12 @@
 //! base URLs. The actual rig-core client is constructed lazily when a
 //! [`BaseAgent`](super::BaseAgent) is built.
 
+use std::time::Duration;
+
 use reqwest_middleware::ClientBuilder;
 use reqwest_middleware::ClientWithMiddleware;
 use reqwest_retry::{RetryTransientMiddleware, policies::ExponentialBackoff};
+use reqwest_tracing::TracingMiddleware;
 
 /// Provider that requires an API key (OpenAI, Anthropic, Gemini).
 #[derive(Clone)]
@@ -98,11 +101,18 @@ impl Provider {
     }
 }
 
-/// Build a `ClientWithMiddleware` with retry middleware.
+/// Build a `ClientWithMiddleware` with timeout, retry, and tracing middleware.
 pub(crate) fn build_http_client(max_retries: u32) -> ClientWithMiddleware {
     let retry_policy = ExponentialBackoff::builder()
         .build_with_max_retries(max_retries);
-    ClientBuilder::new(reqwest_middleware::reqwest::Client::new())
+
+    let client = reqwest_middleware::reqwest::Client::builder()
+        .timeout(Duration::from_secs(120))
+        .build()
+        .expect("failed to build reqwest client");
+
+    ClientBuilder::new(client)
+        .with(TracingMiddleware::default())
         .with(RetryTransientMiddleware::new_with_policy(retry_policy))
         .build()
 }
diff --git a/crates/nvisy-rig/src/bridge/mod.rs b/crates/nvisy-rig/src/bridge/mod.rs
index 9257de3..4f2b725 100644
--- a/crates/nvisy-rig/src/bridge/mod.rs
+++ b/crates/nvisy-rig/src/bridge/mod.rs
@@ -2,11 +2,10 @@
 //!
 //! [`PromptBuilder`] assembles user prompts with entity-kind filters and
 //! confidence thresholds. [`ResponseParser`] extracts and deserializes
-//! text from rig-core completion responses. [`EntityParser`] converts raw
-//! JSON dicts into [`Entity`](nvisy_ontology::entity::Entity) values.
+//! text from rig-core completion responses.
 
 mod prompt;
 mod response;
 
 pub use prompt::PromptBuilder;
-pub use response::{EntityParser, ResponseParser};
+pub use response::ResponseParser;
diff --git a/crates/nvisy-rig/src/bridge/response.rs b/crates/nvisy-rig/src/bridge/response.rs
index 2c7f7dc..7a28e20 100644
--- a/crates/nvisy-rig/src/bridge/response.rs
+++ b/crates/nvisy-rig/src/bridge/response.rs
@@ -2,21 +2,13 @@
 //!
 //! [`ResponseParser`] extracts text from rig-core completion responses
 //! and deserializes JSON (handling markdown fences and empty responses).
-//! [`EntityParser`] converts raw JSON dicts into [`Entity`] values.
-//!
-//! [`Entity`]: nvisy_ontology::entity::Entity
 
 use std::borrow::Cow;
-use std::str::FromStr;
 
 use serde::de::DeserializeOwned;
-use serde_json::Value;
 
 use rig::completion::{AssistantContent, CompletionResponse};
 
-use nvisy_ontology::entity::{DetectionMethod, Entity, EntityCategory, EntityKind};
-use nvisy_ontology::location::{Location, TextLocation};
-
 use crate::error::Error;
 
 /// Thin wrapper around text extracted from an LLM completion response.
@@ -85,100 +77,6 @@ impl<'a> ResponseParser<'a> {
     }
 }
 
-/// Convert raw JSON dicts (as returned by an LLM) into [`Entity`] values.
-///
-/// Unknown `entity_type` values are silently dropped — LLMs occasionally
-/// hallucinate types that don't exist in the ontology.
-pub struct EntityParser;
-
-impl EntityParser {
-    /// Parse an array of JSON objects into entities.
-    ///
-    /// Expected keys: `category`, `entity_type`, `value`, `confidence`,
-    /// and optionally `start_offset` / `end_offset`.
-    pub fn parse(raw: &[Value]) -> Result<Vec<Entity>, Error> {
-        let mut entities = Vec::new();
-
-        for item in raw {
-            let obj = item.as_object().ok_or_else(|| {
-                Error::Validation("Expected JSON object in LLM results".to_string())
-            })?;
-
-            let category_str = obj
-                .get("category")
-                .and_then(Value::as_str)
-                .ok_or_else(|| Error::Validation("Missing 'category'".to_string()))?;
-
-            let category = match category_str {
-                "pii" => EntityCategory::Pii,
-                "phi" => EntityCategory::Phi,
-                "financial" => EntityCategory::Financial,
-                "credentials" => EntityCategory::Credentials,
-                other => EntityCategory::Custom(other.to_string()),
-            };
-
-            let entity_type_str = obj
-                .get("entity_type")
-                .and_then(Value::as_str)
-                .ok_or_else(|| {
-                    Error::Validation("Missing 'entity_type'".to_string())
-                })?;
-
-            let entity_kind = match EntityKind::from_str(entity_type_str) {
-                Ok(ek) => ek,
-                Err(_) => {
-                    tracing::warn!(
-                        entity_type = entity_type_str,
-                        "unknown entity type from LLM, dropping"
-                    );
-                    continue;
-                }
-            };
-
-            let value = obj
-                .get("value")
-                .and_then(Value::as_str)
-                .ok_or_else(|| Error::Validation("Missing 'value'".to_string()))?;
-
-            let confidence = obj
-                .get("confidence")
-                .and_then(Value::as_f64)
-                .ok_or_else(|| {
-                    Error::Validation("Missing 'confidence'".to_string())
-                })?;
-
-            let start_offset = obj
-                .get("start_offset")
-                .and_then(Value::as_u64)
-                .map(|v| v as usize)
-                .unwrap_or(0);
-
-            let end_offset = obj
-                .get("end_offset")
-                .and_then(Value::as_u64)
-                .map(|v| v as usize)
-                .unwrap_or(0);
-
-            let entity = Entity::new(
-                category,
-                entity_kind,
-                value,
-                DetectionMethod::ContextualNlp,
-                confidence,
-            )
-            .with_location(Location::Text(TextLocation {
-                start_offset,
-                end_offset,
-                ..Default::default()
-            }));
-
-            entities.push(entity);
-        }
-
-        Ok(entities)
-    }
-}
-
 /// Extract JSON content from markdown fences (```` ```json ... ``` ````).
 fn extract_fenced_json(text: &str) -> Option<&str> {
     let start_marker = if let Some(pos) = text.find("```json") {
@@ -216,7 +114,7 @@ fn truncate(s: &str, max_len: usize) -> &str {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use serde_json::json;
+    use serde_json::Value;
 
     #[test]
     fn parse_json_raw_array() {
@@ -240,30 +138,4 @@ mod tests {
         assert_eq!(ResponseParser::from_text("No entities").parse_json::<Vec<Value>>().unwrap(), empty);
     }
 
-    #[test]
-    fn entity_parser_basic() {
-        let raw = vec![json!({
-            "category": "credentials",
-            "entity_type": "api_key",
-            "value": "SECRET",
-            "confidence": 0.92,
-            "start_offset": 9,
-            "end_offset": 15
-        })];
-        let entities = EntityParser::parse(&raw).unwrap();
-        assert_eq!(entities.len(), 1);
-        assert_eq!(entities[0].value, "SECRET");
-        assert_eq!(entities[0].confidence, 0.92);
-    }
-
-    #[test]
-    fn entity_parser_unknown_type_skipped() {
-        let raw = vec![json!({
-            "category": "pii",
-            "entity_type": "unknown_thing_xyz",
-            "value": "test",
-            "confidence": 0.5
-        })];
-        assert!(EntityParser::parse(&raw).unwrap().is_empty());
-    }
 }
diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs
index b21a161..c7c2a98 100644
--- a/crates/nvisy-rig/src/lib.rs
+++ b/crates/nvisy-rig/src/lib.rs
@@ -10,12 +10,12 @@ mod agent;
 #[doc(hidden)]
 pub mod prelude;
 
+pub use agent::BaseAgentConfig;
 pub use backend::{
-    AuthenticatedProvider, BaseAgentConfig, ContextWindow,
+    AuthenticatedProvider, ContextWindow,
     DetectionConfig, DetectionRequest, DetectionResponse,
     Provider, UnauthenticatedProvider, UsageStats, UsageTracker,
 };
-pub use bridge::EntityParser;
 pub use error::Error;
 
 pub use agent::{
diff --git a/crates/nvisy-rig/src/prelude.rs b/crates/nvisy-rig/src/prelude.rs
index e527528..8f68602 100644
--- a/crates/nvisy-rig/src/prelude.rs
+++ b/crates/nvisy-rig/src/prelude.rs
@@ -1,11 +1,11 @@
 //! Convenience re-exports.
 
+pub use crate::agent::BaseAgentConfig;
 pub use crate::backend::{
-    AuthenticatedProvider, BaseAgentConfig, ContextWindow,
+    AuthenticatedProvider, ContextWindow,
     DetectionConfig, DetectionRequest, DetectionResponse,
     Provider, UnauthenticatedProvider, UsageStats, UsageTracker,
 };
-pub use crate::bridge::EntityParser;
 pub use crate::error::Error;
 pub use crate::agent::{
     CvAgent, CvDetection, CvEntities, CvEntity, CvProvider,

From 82bef6ae9bab61934e542f236146c566d789cda9 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Thu, 26 Feb 2026 02:51:53 +0100
Subject: [PATCH 21/24] fix(engine): fix truncated import path; refactor(rig):
 derive JsonSchema for tool args
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Fix nvisy_ontology::spec → nvisy_ontology::specification in engine test
- Replace hand-written json!() tool schemas with schemars::schema_for!()
- Add Debug, Clone, JsonSchema derives to CvToolArgs and OcrToolArgs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/nvisy-engine/src/apply/text.rs  |  2 +-
 crates/nvisy-rig/src/agent/cv/tool.rs  | 16 ++++------------
 crates/nvisy-rig/src/agent/ocr/tool.rs | 16 ++++------------
 3 files changed, 9 insertions(+), 25 deletions(-)

diff --git a/crates/nvisy-engine/src/apply/text.rs b/crates/nvisy-engine/src/apply/text.rs
index fa9b210..c5678b6 100644
--- a/crates/nvisy-engine/src/apply/text.rs
+++ b/crates/nvisy-engine/src/apply/text.rs
@@ -129,7 +129,7 @@ pub(crate) async fn apply_text_doc(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use nvisy_ontology::spec::ImageRedactionInput;
+    use nvisy_ontology::specification::ImageRedactionInput;
 
     #[test]
     fn text_output_remove_empty_replacement() {
diff --git a/crates/nvisy-rig/src/agent/cv/tool.rs b/crates/nvisy-rig/src/agent/cv/tool.rs
index 01a4310..bfc0ef1 100644
--- a/crates/nvisy-rig/src/agent/cv/tool.rs
+++ b/crates/nvisy-rig/src/agent/cv/tool.rs
@@ -6,13 +6,13 @@ use base64::Engine;
 use base64::engine::general_purpose::STANDARD;
 use rig::completion::ToolDefinition;
 use rig::tool::Tool;
+use schemars::JsonSchema;
 use serde::Deserialize;
-use serde_json::json;
 
 use super::CvProvider;
 
 /// Arguments for the CV tool call.
-#[derive(Deserialize)]
+#[derive(Debug, Clone, Deserialize, JsonSchema)]
 pub(super) struct CvToolArgs {
     /// Base64-encoded image data.
     pub image_base64: String,
@@ -45,16 +45,8 @@ impl<T: CvProvider> Tool for CvRigTool<T> {
             description: "Detect objects (faces, license plates, signatures) in an image \
                           using computer vision. Pass the image as a base64-encoded string."
                 .to_string(),
-            parameters: json!({
-                "type": "object",
-                "properties": {
-                    "image_base64": {
-                        "type": "string",
-                        "description": "Base64-encoded image data"
-                    }
-                },
-                "required": ["image_base64"]
-            }),
+            parameters: serde_json::to_value(schemars::schema_for!(CvToolArgs))
+                .unwrap_or_default(),
         }
     }
 
diff --git a/crates/nvisy-rig/src/agent/ocr/tool.rs b/crates/nvisy-rig/src/agent/ocr/tool.rs
index d271ab8..66fd3b2 100644
--- a/crates/nvisy-rig/src/agent/ocr/tool.rs
+++ b/crates/nvisy-rig/src/agent/ocr/tool.rs
@@ -6,13 +6,13 @@ use base64::Engine;
 use base64::engine::general_purpose::STANDARD;
 use rig::completion::ToolDefinition;
 use rig::tool::Tool;
+use schemars::JsonSchema;
 use serde::Deserialize;
-use serde_json::json;
 
 use super::OcrProvider;
 
 /// Arguments for the OCR tool call.
-#[derive(Deserialize)]
+#[derive(Debug, Clone, Deserialize, JsonSchema)]
 pub(super) struct OcrToolArgs {
     /// Base64-encoded image data.
     pub image_base64: String,
@@ -47,16 +47,8 @@ impl<T: OcrProvider> Tool for OcrRigTool<T> {
                           confidence, and optional bounding box. \
                           Pass the image as a base64-encoded string."
                 .to_string(),
-            parameters: json!({
-                "type": "object",
-                "properties": {
-                    "image_base64": {
-                        "type": "string",
-                        "description": "Base64-encoded image data"
-                    }
-                },
-                "required": ["image_base64"]
-            }),
+            parameters: serde_json::to_value(schemars::schema_for!(OcrToolArgs))
+                .unwrap_or_default(),
         }
     }
 

From 6ede686c98358fcdf1741cbb88e6a71e2163d5a1 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Thu, 26 Feb 2026 02:55:47 +0100
Subject: [PATCH 22/24] chore: add features=[] to workspace deps, remove
 re-exports from nvisy-server

- Add missing features = [] to reqwest-middleware, reqwest-retry,
  reqwest-tracing in workspace Cargo.toml
- Remove pub use re-exports (routes, ServiceState) from nvisy-server
- Update nvisy-cli to use full module paths

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 Cargo.toml                     | 6 +++---
 crates/nvisy-cli/src/main.rs   | 4 ++--
 crates/nvisy-server/src/lib.rs | 2 --
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 9ff374b..03eb266 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -54,9 +54,9 @@ nvisy-server = { path = "./crates/nvisy-server", version = "0.1.0" }
 rig-core = { version = "0.31", features = [] }
 
 # HTTP middleware
-reqwest-middleware = { version = "0.5" }
-reqwest-retry = { version = "0.9" }
-reqwest-tracing = { version = "0.7" }
+reqwest-middleware = { version = "0.5", features = [] }
+reqwest-retry = { version = "0.9", features = [] }
+reqwest-tracing = { version = "0.7", features = [] }
 
 # Async runtime
 tokio = { version = "1.0", features = [] }
diff --git a/crates/nvisy-cli/src/main.rs b/crates/nvisy-cli/src/main.rs
index 8b2125a..ffef9db 100644
--- a/crates/nvisy-cli/src/main.rs
+++ b/crates/nvisy-cli/src/main.rs
@@ -11,7 +11,7 @@ use axum::Router;
 use clap::Parser;
 use nvisy_core::fs::ContentRegistry;
 use nvisy_server::middleware::*;
-use nvisy_server::ServiceState;
+use nvisy_server::service::ServiceState;
 
 use crate::config::Cli;
 
@@ -46,7 +46,7 @@ async fn run() -> anyhow::Result<()> {
 
 /// Creates the router with all middleware layers applied.
 fn create_router(cli: &Cli, state: ServiceState) -> Router {
-    nvisy_server::routes()
+    nvisy_server::handler::routes()
         .with_open_api(&cli.open_api_config())
         .with_recovery(&cli.recovery_config())
         .with_observability()
diff --git a/crates/nvisy-server/src/lib.rs b/crates/nvisy-server/src/lib.rs
index c3e7bc3..1f91167 100644
--- a/crates/nvisy-server/src/lib.rs
+++ b/crates/nvisy-server/src/lib.rs
@@ -6,5 +6,3 @@ pub mod handler;
 pub mod middleware;
 pub mod service;
 
-pub use handler::routes;
-pub use service::ServiceState;

From ebd73892851a288d6857e41e061926c75bcf6613 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Thu, 26 Feb 2026 05:28:10 +0100
Subject: [PATCH 23/24] feat(rig): add NER coreference resolution with
 entity_id, context-based offset resolution, and KnownNerEntity accumulation

Move preamble into BaseAgentConfig so specialized agents set it via
config. Redesign NerEntity with entity_id for coreference, optional
category/entity_type/confidence, context snippet for deterministic
offset resolution, and LLM-produced description. Add KnownNerEntity
for lightweight cross-chunk context, NerContext with merge/set_text
for accumulating surface forms and descriptions across calls, and
ResolvedOffsets with type-safe resolve_offsets tied to the source
NerContext.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/nvisy-rig/src/agent/base/agent.rs   |   5 +-
 crates/nvisy-rig/src/agent/base/builder.rs |  15 +-
 crates/nvisy-rig/src/agent/cv/mod.rs       |   4 +-
 crates/nvisy-rig/src/agent/mod.rs          |   2 +-
 crates/nvisy-rig/src/agent/ner/context.rs  | 168 +++++++++++++++++++++
 crates/nvisy-rig/src/agent/ner/mod.rs      |  28 ++--
 crates/nvisy-rig/src/agent/ner/output.rs   | 160 ++++++++++++++++++--
 crates/nvisy-rig/src/agent/ner/prompt.rs   |  47 +++++-
 crates/nvisy-rig/src/agent/ocr/mod.rs      |   4 +-
 crates/nvisy-rig/src/bridge/prompt.rs      |   2 +-
 crates/nvisy-rig/src/lib.rs                |   2 +-
 crates/nvisy-rig/src/prelude.rs            |   2 +-
 crates/nvisy-server/src/lib.rs             |   1 -
 13 files changed, 393 insertions(+), 47 deletions(-)
 create mode 100644 crates/nvisy-rig/src/agent/ner/context.rs

diff --git a/crates/nvisy-rig/src/agent/base/agent.rs b/crates/nvisy-rig/src/agent/base/agent.rs
index a61237e..18a951f 100644
--- a/crates/nvisy-rig/src/agent/base/agent.rs
+++ b/crates/nvisy-rig/src/agent/base/agent.rs
@@ -15,7 +15,7 @@ use crate::error::Error;
 
 use super::BaseAgentBuilder;
 
-/// Sampling, retry, and context-window settings shared by all agents.
+/// Sampling, retry, context-window, and preamble settings shared by all agents.
 #[derive(Debug, Clone)]
 pub struct BaseAgentConfig {
     /// Sampling temperature (default: 0.1).
@@ -26,6 +26,8 @@ pub struct BaseAgentConfig {
     pub max_retries: u32,
     /// Context window for chunking large inputs.
     pub context_window: Option<ContextWindow>,
+    /// System prompt (preamble) for the agent.
+    pub preamble: Option<String>,
 }
 
 impl Default for BaseAgentConfig {
@@ -35,6 +37,7 @@ impl Default for BaseAgentConfig {
             max_tokens: 4096,
             max_retries: 3,
             context_window: None,
+            preamble: None,
         }
     }
 }
diff --git a/crates/nvisy-rig/src/agent/base/builder.rs b/crates/nvisy-rig/src/agent/base/builder.rs
index 7046c64..5cae5ba 100644
--- a/crates/nvisy-rig/src/agent/base/builder.rs
+++ b/crates/nvisy-rig/src/agent/base/builder.rs
@@ -15,12 +15,11 @@ use crate::error::Error;
 /// Builder for [`BaseAgent`].
 ///
 /// Created via [`BaseAgent::builder`]. Collects a provider reference, config,
-/// optional preamble (system prompt), and optional tools, then constructs the
-/// concrete rig-core agent on [`build`](Self::build).
+/// and optional tools, then constructs the concrete rig-core agent on
+/// [`build`](Self::build).
 pub(crate) struct BaseAgentBuilder {
     provider: Provider,
     config: BaseAgentConfig,
-    preamble: Option<String>,
     tools: Vec<Box<dyn ToolDyn>>,
 }
 
@@ -29,17 +28,10 @@ impl BaseAgentBuilder {
         Self {
             provider: provider.clone(),
             config,
-            preamble: None,
             tools: Vec::new(),
         }
     }
 
-    /// Set the system prompt (preamble).
-    pub fn preamble(mut self, preamble: impl Into<String>) -> Self {
-        self.preamble = Some(preamble.into());
-        self
-    }
-
     /// Register a tool the agent can call during prompts.
     pub fn tool(mut self, tool: impl Tool + 'static) -> Self {
         self.tools.push(Box::new(tool));
@@ -51,12 +43,11 @@ impl BaseAgentBuilder {
         let Self {
             provider,
             config,
-            preamble,
             tools,
         } = self;
 
         let http_client = build_http_client(config.max_retries);
-        let preamble = preamble.as_deref();
+        let preamble = config.preamble.as_deref();
 
         let inner = match &provider {
             Provider::OpenAi(p) => {
diff --git a/crates/nvisy-rig/src/agent/cv/mod.rs b/crates/nvisy-rig/src/agent/cv/mod.rs
index 6f58326..8f42f79 100644
--- a/crates/nvisy-rig/src/agent/cv/mod.rs
+++ b/crates/nvisy-rig/src/agent/cv/mod.rs
@@ -68,11 +68,11 @@ impl CvAgent {
     /// Create a new CV agent.
     pub fn new(
         provider: &Provider,
-        config: BaseAgentConfig,
+        mut config: BaseAgentConfig,
         cv: impl CvProvider + 'static,
     ) -> Result<Self, Error> {
+        config.preamble.get_or_insert_with(|| CV_SYSTEM_PROMPT.into());
         let base = BaseAgent::builder(provider, config)
-            .preamble(CV_SYSTEM_PROMPT)
             .tool(CvRigTool::new(cv))
             .build()?;
         Ok(Self { base })
diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs
index a8ee93d..2415c84 100644
--- a/crates/nvisy-rig/src/agent/mod.rs
+++ b/crates/nvisy-rig/src/agent/mod.rs
@@ -13,5 +13,5 @@ pub use base::BaseAgentConfig;
 pub(crate) use base::BaseAgent;
 
 pub use cv::{CvAgent, CvDetection, CvEntities, CvEntity, CvProvider};
-pub use ner::{NerAgent, NerEntities, NerEntity};
+pub use ner::{KnownNerEntity, NerAgent, NerContext, NerEntities, NerEntity, ResolvedOffsets};
 pub use ocr::{OcrAgent, OcrEntity, OcrOutput, OcrProvider, OcrTextRegion};
diff --git a/crates/nvisy-rig/src/agent/ner/context.rs b/crates/nvisy-rig/src/agent/ner/context.rs
new file mode 100644
index 0000000..7a6490a
--- /dev/null
+++ b/crates/nvisy-rig/src/agent/ner/context.rs
@@ -0,0 +1,168 @@
+//! Input context for NER detection calls.
+
+use super::{KnownNerEntity, NerEntity};
+
+/// Input context for a single NER detection call.
+///
+/// Bundles the text to analyse together with any previously identified
+/// entities so the LLM can assign consistent `entity_id` values across
+/// chunks or sequential calls.
+///
+/// Use [`merge`](Self::merge) to accumulate entities from successive
+/// detection calls, then update the text with [`set_text`](Self::set_text)
+/// before the next call.
+pub struct NerContext<'a> {
+    /// The text to analyse.
+    pub text: &'a str,
+    /// Accumulated known entities from prior detection calls.
+    pub known_entities: Vec<KnownNerEntity>,
+}
+
+impl<'a> NerContext<'a> {
+    /// Create a context with no known entities.
+    pub fn new(text: &'a str) -> Self {
+        Self {
+            text,
+            known_entities: Vec::new(),
+        }
+    }
+
+    /// Create a context with previously identified entities.
+    pub fn with_known(text: &'a str, known_entities: Vec<KnownNerEntity>) -> Self {
+        Self {
+            text,
+            known_entities,
+        }
+    }
+
+    /// Set the text to analyse, keeping accumulated known entities.
+    pub fn set_text(&mut self, text: &'a str) {
+        self.text = text;
+    }
+
+    /// Merge newly detected entities into the known set.
+    ///
+    /// For each entity: if a [`KnownNerEntity`] with the same `entity_id`
+    /// already exists, its `values` list is extended with any new surface
+    /// forms and new descriptions are appended. Otherwise a new
+    /// `KnownNerEntity` is created.
+    pub fn merge(&mut self, entities: Vec<NerEntity>) {
+        for entity in entities {
+            if let Some(known) = self
+                .known_entities
+                .iter_mut()
+                .find(|k| k.entity_id == entity.entity_id)
+            {
+                // Add new surface form if not already present.
+                if !known.values.iter().any(|v| v == &entity.value) {
+                    known.values.push(entity.value);
+                }
+
+                // Append new description if not already present.
+                if let Some(desc) = entity.description
+                    && !known.descriptions.iter().any(|d| d == &desc)
+                {
+                    known.descriptions.push(desc);
+                }
+
+                // Fill in entity_type if it was previously unknown.
+                if known.entity_type.is_none() {
+                    known.entity_type = entity.entity_type;
+                }
+            } else {
+                self.known_entities.push(KnownNerEntity {
+                    entity_id: entity.entity_id,
+                    entity_type: entity.entity_type,
+                    values: vec![entity.value],
+                    descriptions: entity.description.into_iter().collect(),
+                });
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use nvisy_ontology::entity::EntityKind;
+
+    fn ner_entity(id: &str, value: &str, desc: Option<&str>) -> NerEntity {
+        NerEntity {
+            entity_id: id.into(),
+            category: None,
+            entity_type: Some(EntityKind::PersonName),
+            value: value.into(),
+            confidence: None,
+            context: None,
+            description: desc.map(Into::into),
+        }
+    }
+
+    #[test]
+    fn merge_creates_new_known_entity() {
+        let mut ctx = NerContext::new("");
+        ctx.merge(vec![ner_entity("person_1", "John Smith", Some("the CEO"))]);
+
+        assert_eq!(ctx.known_entities.len(), 1);
+        assert_eq!(ctx.known_entities[0].entity_id, "person_1");
+        assert_eq!(ctx.known_entities[0].values, vec!["John Smith"]);
+        assert_eq!(ctx.known_entities[0].descriptions, vec!["the CEO"]);
+    }
+
+    #[test]
+    fn merge_accumulates_surface_forms() {
+        let mut ctx = NerContext::new("");
+        ctx.merge(vec![ner_entity("person_1", "John Smith", None)]);
+        ctx.merge(vec![ner_entity("person_1", "John", None)]);
+        ctx.merge(vec![ner_entity("person_1", "Mr. Smith", None)]);
+        // Duplicate value should not be added.
+        ctx.merge(vec![ner_entity("person_1", "John", None)]);
+
+        assert_eq!(ctx.known_entities.len(), 1);
+        assert_eq!(
+            ctx.known_entities[0].values,
+            vec!["John Smith", "John", "Mr. Smith"],
+        );
+    }
+
+    #[test]
+    fn merge_accumulates_descriptions() {
+        let mut ctx = NerContext::new("");
+        ctx.merge(vec![ner_entity("person_1", "Alice", Some("the CEO"))]);
+        ctx.merge(vec![ner_entity("person_1", "Alice", Some("signed the contract on Jan 5"))]);
+
+        assert_eq!(
+            ctx.known_entities[0].descriptions,
+            vec!["the CEO", "signed the contract on Jan 5"],
+        );
+    }
+
+    #[test]
+    fn merge_deduplicates_descriptions() {
+        let mut ctx = NerContext::new("");
+        ctx.merge(vec![ner_entity("person_1", "Alice", Some("the CEO"))]);
+        ctx.merge(vec![ner_entity("person_1", "Alice", Some("the CEO"))]);
+
+        assert_eq!(ctx.known_entities[0].descriptions, vec!["the CEO"]);
+    }
+
+    #[test]
+    fn merge_no_description() {
+        let mut ctx = NerContext::new("");
+        ctx.merge(vec![ner_entity("person_1", "Alice", None)]);
+
+        assert!(ctx.known_entities[0].descriptions.is_empty());
+    }
+
+    #[test]
+    fn merge_fills_missing_entity_type() {
+        let mut ctx = NerContext::new("");
+        let mut e = ner_entity("org_1", "Acme", None);
+        e.entity_type = None;
+        ctx.merge(vec![e]);
+        assert!(ctx.known_entities[0].entity_type.is_none());
+
+        ctx.merge(vec![ner_entity("org_1", "Acme Corp", None)]);
+        assert_eq!(ctx.known_entities[0].entity_type, Some(EntityKind::PersonName));
+    }
+}
diff --git a/crates/nvisy-rig/src/agent/ner/mod.rs b/crates/nvisy-rig/src/agent/ner/mod.rs
index bd11060..bbccfe5 100644
--- a/crates/nvisy-rig/src/agent/ner/mod.rs
+++ b/crates/nvisy-rig/src/agent/ner/mod.rs
@@ -2,12 +2,14 @@
 //!
 //! [`NerAgent`] wraps a [`BaseAgent`](crate::backend::BaseAgent) with
 //! NER-specific prompts. It is a pure LLM agent (no tools) that analyses
-//! text and returns structured entity detections with byte offsets.
+//! text and returns structured entity detections.
 
+mod context;
 mod output;
 mod prompt;
 
-pub use output::{NerEntities, NerEntity};
+pub use context::NerContext;
+pub use output::{KnownNerEntity, NerEntities, NerEntity, ResolvedOffsets};
 
 use uuid::Uuid;
 
@@ -20,10 +22,10 @@ use prompt::{NER_SYSTEM_PROMPT, NerPromptBuilder};
 ///
 /// # Workflow
 ///
-/// 1. Caller passes text and a [`DetectionConfig`] to
+/// 1. Caller passes a [`NerContext`] and a [`DetectionConfig`] to
 ///    [`detect`](Self::detect).
 /// 2. The agent builds a user prompt via [`NerPromptBuilder`] that
-///    specifies entity types and confidence thresholds.
+///    specifies entity types, confidence thresholds, and known entities.
 /// 3. Structured output is parsed into `Vec<NerEntity>`.
 pub struct NerAgent {
     base: BaseAgent,
@@ -31,10 +33,9 @@ pub struct NerAgent {
 
 impl NerAgent {
     /// Create a new NER agent.
-    pub fn new(provider: &Provider, config: BaseAgentConfig) -> Result<Self, Error> {
-        let base = BaseAgent::builder(provider, config)
-            .preamble(NER_SYSTEM_PROMPT)
-            .build()?;
+    pub fn new(provider: &Provider, mut config: BaseAgentConfig) -> Result<Self, Error> {
+        config.preamble.get_or_insert_with(|| NER_SYSTEM_PROMPT.into());
+        let base = BaseAgent::builder(provider, config).build()?;
         Ok(Self { base })
     }
 
@@ -49,20 +50,25 @@ impl NerAgent {
     }
 
     /// Detect entities in text using structured output with text-based fallback.
+    ///
+    /// When [`NerContext::known_entities`] is non-empty the LLM is
+    /// instructed to reuse their `entity_id` values for coreferent
+    /// mentions, enabling cross-chunk coreference resolution.
     #[tracing::instrument(
         skip_all,
-        fields(text_len = text.len(), agent = "ner"),
+        fields(text_len = ctx.text.len(), agent = "ner"),
     )]
     pub async fn detect(
         &self,
-        text: &str,
+        ctx: &NerContext<'_>,
         config: &DetectionConfig,
     ) -> Result<Vec<NerEntity>, Error> {
-        let prompt = NerPromptBuilder::new(config).build(text);
+        let prompt = NerPromptBuilder::new(config, &ctx.known_entities).build(ctx.text);
 
         tracing::debug!(
             prompt_len = prompt.len(),
             entity_kinds = config.entity_kinds.len(),
+            known = ctx.known_entities.len(),
             "built ner prompt"
         );
 
diff --git a/crates/nvisy-rig/src/agent/ner/output.rs b/crates/nvisy-rig/src/agent/ner/output.rs
index 8e2df0e..3a87845 100644
--- a/crates/nvisy-rig/src/agent/ner/output.rs
+++ b/crates/nvisy-rig/src/agent/ner/output.rs
@@ -5,6 +5,8 @@ use serde::{Deserialize, Serialize};
 
 use nvisy_ontology::entity::{EntityCategory, EntityKind};
 
+use super::NerContext;
+
 /// A list of NER entities returned by structured output.
 #[derive(Debug, Clone, Default, PartialEq, Deserialize, Serialize, JsonSchema)]
 pub struct NerEntities {
@@ -15,16 +17,156 @@ pub struct NerEntities {
 /// A single NER entity from structured LLM output.
 #[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema)]
 pub struct NerEntity {
-    /// Broad classification.
-    pub category: EntityCategory,
-    /// Specific entity type.
-    pub entity_type: EntityKind,
+    /// Stable identifier for the real-world entity this mention refers to.
+    ///
+    /// All mentions of the same person, organisation, etc. share the same
+    /// `entity_id` (e.g. `"person_1"`). When known entities are provided
+    /// as context, the LLM reuses their IDs for coreferent mentions.
+    pub entity_id: String,
+    /// Broad classification (may be absent for coreferent mentions like pronouns).
+    pub category: Option<EntityCategory>,
+    /// Specific entity type (may be absent for coreferent mentions like pronouns).
+    pub entity_type: Option<EntityKind>,
     /// The matched text value.
     pub value: String,
     /// Detection confidence (0.0..=1.0).
-    pub confidence: f64,
-    /// Start byte offset in the input text.
-    pub start_offset: usize,
-    /// End byte offset in the input text.
-    pub end_offset: usize,
+    pub confidence: Option<f64>,
+    /// A short snippet of surrounding text that uniquely locates this mention
+    /// within the input. Used to compute byte offsets deterministically by
+    /// finding `context` in the span, then `value` within the `context`.
+    pub context: Option<String>,
+    /// Brief description of the real-world entity (e.g. "CEO of Acme Corp,
+    /// mentioned as the signatory"). Carried forward via [`KnownNerEntity`] so
+    /// the LLM can disambiguate entities across chunks.
+    pub description: Option<String>,
+}
+
+/// A previously identified entity carried as context between detection calls.
+///
+/// Lighter than [`NerEntity`] — holds only the information the LLM needs to
+/// recognise and reuse an existing `entity_id`. Created via
+/// [`NerContext::merge`].
+#[derive(Debug, Clone, PartialEq)]
+pub struct KnownNerEntity {
+    /// Stable identifier (e.g. `"person_1"`).
+    pub entity_id: String,
+    /// Entity type, if known.
+    pub entity_type: Option<EntityKind>,
+    /// All surface forms seen so far (e.g. `["John Smith", "John", "Mr. Smith"]`).
+    pub values: Vec<String>,
+    /// Accumulated descriptions from successive detection calls.
+    pub descriptions: Vec<String>,
+}
+
+/// Resolved byte offsets for an entity mention within its source text.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct ResolvedOffsets {
+    /// Start byte offset in the source text.
+    pub start: usize,
+    /// End byte offset (exclusive) in the source text.
+    pub end: usize,
+}
+
+impl NerEntity {
+    /// Resolve byte offsets of this entity's `value` within the text
+    /// from the [`NerContext`] that produced it.
+    ///
+    /// When `context` is present, first locates the context snippet in
+    /// the source text, then finds `value` within it. Falls back to
+    /// searching for `value` directly in the source text when `context`
+    /// is absent or not found.
+    ///
+    /// Returns `None` if the value cannot be located.
+    pub fn resolve_offsets(&self, ctx: &NerContext<'_>) -> Option<ResolvedOffsets> {
+        let text = ctx.text;
+
+        if let Some(ref context) = self.context
+            && let Some(ctx_start) = text.find(context.as_str())
+            && let Some(val_offset) = context.find(&self.value)
+        {
+            let start = ctx_start + val_offset;
+            return Some(ResolvedOffsets {
+                start,
+                end: start + self.value.len(),
+            });
+        }
+
+        // Fallback: search for value directly in the source text.
+        let start = text.find(&self.value)?;
+        Some(ResolvedOffsets {
+            start,
+            end: start + self.value.len(),
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn entity(value: &str, context: Option<&str>) -> NerEntity {
+        NerEntity {
+            entity_id: "test_1".into(),
+            category: None,
+            entity_type: None,
+            value: value.into(),
+            confidence: None,
+            context: context.map(Into::into),
+            description: None,
+        }
+    }
+
+    #[test]
+    fn resolve_with_context() {
+        let text = "Alice met Bob. Later Alice called him.";
+        let ctx = NerContext::new(text);
+        let e = entity("Alice", Some("Later Alice called"));
+
+        let offsets = e.resolve_offsets(&ctx).unwrap();
+        assert_eq!(offsets.start, 21);
+        assert_eq!(offsets.end, 26);
+        assert_eq!(&text[offsets.start..offsets.end], "Alice");
+    }
+
+    #[test]
+    fn resolve_without_context_finds_first() {
+        let text = "Alice met Bob. Later Alice called him.";
+        let ctx = NerContext::new(text);
+        let e = entity("Alice", None);
+
+        let offsets = e.resolve_offsets(&ctx).unwrap();
+        assert_eq!(offsets.start, 0);
+        assert_eq!(offsets.end, 5);
+    }
+
+    #[test]
+    fn resolve_missing_value_returns_none() {
+        let text = "No match here.";
+        let ctx = NerContext::new(text);
+        let e = entity("Charlie", Some("with Charlie"));
+
+        assert!(e.resolve_offsets(&ctx).is_none());
+    }
+
+    #[test]
+    fn resolve_context_not_found_falls_back() {
+        let text = "Alice is here.";
+        let ctx = NerContext::new(text);
+        let e = entity("Alice", Some("stale context from another chunk"));
+
+        let offsets = e.resolve_offsets(&ctx).unwrap();
+        assert_eq!(offsets.start, 0);
+        assert_eq!(offsets.end, 5);
+    }
+
+    #[test]
+    fn resolve_disambiguates_duplicate_values() {
+        let text = "He went home. She said he was tired.";
+        let ctx = NerContext::new(text);
+
+        let e1 = entity("he", Some("said he was"));
+        let offsets = e1.resolve_offsets(&ctx).unwrap();
+        assert_eq!(&text[offsets.start..offsets.end], "he");
+        assert_eq!(offsets.start, 23);
+    }
 }
diff --git a/crates/nvisy-rig/src/agent/ner/prompt.rs b/crates/nvisy-rig/src/agent/ner/prompt.rs
index 49ccce1..27c54f6 100644
--- a/crates/nvisy-rig/src/agent/ner/prompt.rs
+++ b/crates/nvisy-rig/src/agent/ner/prompt.rs
@@ -3,22 +3,48 @@
 use crate::backend::DetectionConfig;
 use crate::bridge::PromptBuilder;
 
+use super::KnownNerEntity;
+
 /// Builds user prompts for NER entity detection.
 pub(crate) struct NerPromptBuilder<'a> {
     inner: PromptBuilder<'a>,
+    known_entities: &'a [KnownNerEntity],
 }
 
 impl<'a> NerPromptBuilder<'a> {
     /// Create a prompt builder from a [`DetectionConfig`].
-    pub fn new(config: &'a DetectionConfig) -> Self {
+    pub fn new(config: &'a DetectionConfig, known_entities: &'a [KnownNerEntity]) -> Self {
         Self {
             inner: PromptBuilder::new(config),
+            known_entities,
         }
     }
 
     /// Build the user prompt for the given text.
     pub fn build(&self, text: &str) -> String {
-        self.inner.build(text)
+        let mut prompt = self.inner.build(text);
+
+        if !self.known_entities.is_empty() {
+            prompt.push_str("\n\nPreviously identified entities (reuse their entity_id for coreferent mentions):\n");
+            for e in self.known_entities {
+                let type_str = match &e.entity_type {
+                    Some(t) => t.to_string(),
+                    None => "unknown".to_string(),
+                };
+                let values = e.values.iter().map(|v| format!("\"{v}\"")).collect::<Vec<_>>().join(", ");
+                prompt.push_str(&format!(
+                    "- entity_id={}, type={}, values=[{}]",
+                    e.entity_id, type_str, values,
+                ));
+                if !e.descriptions.is_empty() {
+                    let descs = e.descriptions.join("; ");
+                    prompt.push_str(&format!(", description=\"{descs}\""));
+                }
+                prompt.push('\n');
+            }
+        }
+
+        prompt
     }
 }
 
@@ -27,6 +53,17 @@ pub(super) const NER_SYSTEM_PROMPT: &str = "\
 You are a precise named-entity recognition system. \
 Identify personally identifiable information (PII), protected health information (PHI), \
 financial data, and credentials in the provided text. \
-Return results as a JSON array of objects with keys: \
-category, entity_type, value, confidence, start_offset, end_offset. \
-If no entities are found, return an empty array [].";
+Return results as a JSON object with an \"entities\" key containing an array of objects with keys: \
+entity_id, category (optional), entity_type (optional), value, confidence (optional), \
+context (optional), description (optional). \
+Assign a stable entity_id (e.g. \"person_1\", \"org_1\") to each unique real-world entity. \
+All mentions of the same entity must share the same entity_id. \
+When previously identified entities are provided, reuse their entity_id for any coreferent mentions. \
+The \"context\" field should be a short surrounding snippet of text that uniquely locates this \
+mention within the input. Include enough words before and after the value so that the context \
+string appears exactly once in the input text. This is especially important when the same value \
+(e.g. \"he\") appears multiple times. \
+The \"description\" field should be a brief description of the real-world entity \
+(e.g. \"CEO of Acme Corp\", \"patient's home address\"). Provide it for the first mention \
+of each entity or when additional context becomes available. \
+If no entities are found, return {\"entities\": []}.";
diff --git a/crates/nvisy-rig/src/agent/ocr/mod.rs b/crates/nvisy-rig/src/agent/ocr/mod.rs
index 29e1a25..539a034 100644
--- a/crates/nvisy-rig/src/agent/ocr/mod.rs
+++ b/crates/nvisy-rig/src/agent/ocr/mod.rs
@@ -71,11 +71,11 @@ impl OcrAgent {
     /// Create a new OCR agent.
     pub fn new(
         provider: &Provider,
-        config: BaseAgentConfig,
+        mut config: BaseAgentConfig,
         ocr: impl OcrProvider + 'static,
     ) -> Result<Self, Error> {
+        config.preamble.get_or_insert_with(|| OCR_SYSTEM_PROMPT.into());
         let base = BaseAgent::builder(provider, config)
-            .preamble(OCR_SYSTEM_PROMPT)
             .tool(OcrRigTool::new(ocr))
             .build()?;
         Ok(Self { base })
diff --git a/crates/nvisy-rig/src/bridge/prompt.rs b/crates/nvisy-rig/src/bridge/prompt.rs
index c0b0f99..efe095d 100644
--- a/crates/nvisy-rig/src/bridge/prompt.rs
+++ b/crates/nvisy-rig/src/bridge/prompt.rs
@@ -16,7 +16,7 @@ const DETECT_PREFIX: &str = "Detect entities of types";
 /// Suffix describing the expected response format.
 const RESPONSE_FORMAT: &str = "\
 Return a JSON array of objects with keys: \
-category, entity_type, value, confidence, start_offset, end_offset.";
+entity_id, category, entity_type, value, confidence, context.";
 
 /// Builds user prompts for entity detection requests.
 pub struct PromptBuilder<'a> {
diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs
index c7c2a98..edb522d 100644
--- a/crates/nvisy-rig/src/lib.rs
+++ b/crates/nvisy-rig/src/lib.rs
@@ -20,6 +20,6 @@ pub use error::Error;
 
 pub use agent::{
     CvAgent, CvDetection, CvEntities, CvEntity, CvProvider,
-    NerAgent, NerEntities, NerEntity,
+    KnownNerEntity, NerAgent, NerContext, NerEntities, NerEntity, ResolvedOffsets,
     OcrAgent, OcrEntity, OcrOutput, OcrProvider, OcrTextRegion,
 };
diff --git a/crates/nvisy-rig/src/prelude.rs b/crates/nvisy-rig/src/prelude.rs
index 8f68602..c626bbb 100644
--- a/crates/nvisy-rig/src/prelude.rs
+++ b/crates/nvisy-rig/src/prelude.rs
@@ -9,6 +9,6 @@ pub use crate::backend::{
 pub use crate::error::Error;
 pub use crate::agent::{
     CvAgent, CvDetection, CvEntities, CvEntity, CvProvider,
-    NerAgent, NerEntities, NerEntity,
+    KnownNerEntity, NerAgent, NerContext, NerEntities, NerEntity, ResolvedOffsets,
     OcrAgent, OcrEntity, OcrOutput, OcrProvider, OcrTextRegion,
 };
diff --git a/crates/nvisy-server/src/lib.rs b/crates/nvisy-server/src/lib.rs
index 1f91167..322d894 100644
--- a/crates/nvisy-server/src/lib.rs
+++ b/crates/nvisy-server/src/lib.rs
@@ -5,4 +5,3 @@
 pub mod handler;
 pub mod middleware;
 pub mod service;
-

From 03ace7f3d5b50770389300054d01205123d28e03 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Thu, 26 Feb 2026 22:19:15 +0100
Subject: [PATCH 24/24] refactor(identify): replace ner/, llm/, pattern/,
 audio/ with method/ adapters

Delete the old detection modules that duplicated logic now provided by
nvisy-rig and nvisy-pattern. Replace them with thin adapter structs in a
new method/ module: NerMethod (wraps NerAgent), CvMethod (wraps CvAgent),
and PatternDetection (migrated as-is). Remove nvisy-python and bytes deps
that were only needed by the deleted code.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 Cargo.lock                                    |   2 -
 crates/nvisy-identify/Cargo.toml              |   2 -
 crates/nvisy-identify/src/audio/mod.rs        |   5 -
 crates/nvisy-identify/src/audio/transcript.rs | 198 ----------
 crates/nvisy-identify/src/lib.rs              |  16 +-
 crates/nvisy-identify/src/llm/detection.rs    | 326 -----------------
 crates/nvisy-identify/src/llm/mod.rs          |   7 -
 crates/nvisy-identify/src/llm/prompt.rs       |  26 --
 crates/nvisy-identify/src/method/cv.rs        |  75 ++++
 crates/nvisy-identify/src/method/mod.rs       |  14 +
 crates/nvisy-identify/src/method/ner.rs       | 176 +++++++++
 .../src/{pattern/mod.rs => method/pattern.rs} |   0
 crates/nvisy-identify/src/ner/backend.rs      |  42 ---
 crates/nvisy-identify/src/ner/bridge.rs       |  38 --
 crates/nvisy-identify/src/ner/image.rs        | 112 ------
 crates/nvisy-identify/src/ner/mod.rs          |  13 -
 crates/nvisy-identify/src/ner/parse.rs        | 150 --------
 crates/nvisy-identify/src/ner/text.rs         | 339 ------------------
 crates/nvisy-identify/src/prelude.rs          |   4 +-
 19 files changed, 271 insertions(+), 1274 deletions(-)
 delete mode 100644 crates/nvisy-identify/src/audio/mod.rs
 delete mode 100644 crates/nvisy-identify/src/audio/transcript.rs
 delete mode 100644 crates/nvisy-identify/src/llm/detection.rs
 delete mode 100644 crates/nvisy-identify/src/llm/mod.rs
 delete mode 100644 crates/nvisy-identify/src/llm/prompt.rs
 create mode 100644 crates/nvisy-identify/src/method/cv.rs
 create mode 100644 crates/nvisy-identify/src/method/mod.rs
 create mode 100644 crates/nvisy-identify/src/method/ner.rs
 rename crates/nvisy-identify/src/{pattern/mod.rs => method/pattern.rs} (100%)
 delete mode 100644 crates/nvisy-identify/src/ner/backend.rs
 delete mode 100644 crates/nvisy-identify/src/ner/bridge.rs
 delete mode 100644 crates/nvisy-identify/src/ner/image.rs
 delete mode 100644 crates/nvisy-identify/src/ner/mod.rs
 delete mode 100644 crates/nvisy-identify/src/ner/parse.rs
 delete mode 100644 crates/nvisy-identify/src/ner/text.rs

diff --git a/Cargo.lock b/Cargo.lock
index 6a1a7eb..e69a15f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2761,14 +2761,12 @@ name = "nvisy-identify"
 version = "0.1.0"
 dependencies = [
  "async-trait",
- "bytes",
  "jiff",
  "nvisy-asr",
  "nvisy-codec",
  "nvisy-core",
  "nvisy-ontology",
  "nvisy-pattern",
- "nvisy-python",
  "nvisy-rig",
  "schemars",
  "semver",
diff --git a/crates/nvisy-identify/Cargo.toml b/crates/nvisy-identify/Cargo.toml
index f98dd64..e51ce04 100644
--- a/crates/nvisy-identify/Cargo.toml
+++ b/crates/nvisy-identify/Cargo.toml
@@ -31,7 +31,6 @@ nvisy-core = { workspace = true, features = [] }
 nvisy-ontology = { workspace = true, features = [] }
 nvisy-codec = { workspace = true, features = [] }
 nvisy-pattern = { workspace = true, features = [] }
-nvisy-python = { workspace = true, features = [] }
 nvisy-rig = { workspace = true, features = [] }
 nvisy-asr = { workspace = true, features = [] }
 
@@ -47,7 +46,6 @@ async-trait = { workspace = true, features = [] }
 # Primitive datatypes
 uuid = { workspace = true, features = ["v4"] }
 jiff = { workspace = true, features = [] }
-bytes = { workspace = true, features = [] }
 semver = { workspace = true, features = [] }
 
 # Derive macros and error handling
diff --git a/crates/nvisy-identify/src/audio/mod.rs b/crates/nvisy-identify/src/audio/mod.rs
deleted file mode 100644
index 45004ed..0000000
--- a/crates/nvisy-identify/src/audio/mod.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-//! Audio detection layers.
-
-pub mod transcript;
-
-pub use transcript::TranscriptNerDetection;
diff --git a/crates/nvisy-identify/src/audio/transcript.rs b/crates/nvisy-identify/src/audio/transcript.rs
deleted file mode 100644
index 0d99d48..0000000
--- a/crates/nvisy-identify/src/audio/transcript.rs
+++ /dev/null
@@ -1,198 +0,0 @@
-//! Composite audio detection: transcription followed by NER.
-//!
-//! Chains a [`TranscribeBackend`] with an [`NerBackend`] to detect
-//! entities in audio content.  The ASR stage produces a transcript
-//! with time-aligned segments, then NER runs on the combined text
-//! and the resulting text-location entities are mapped back to
-//! [`AudioLocation`] time spans.
-
-use bytes::Bytes;
-
-use nvisy_codec::handler::Span;
-use nvisy_core::Error;
-
-use nvisy_asr::{TranscribeBackend, TranscribeConfig, parse_transcribe_entities};
-
-use crate::ner::{NerBackend, NerConfig, parse_ner_entities};
-use crate::{Entity, Location};
-use crate::{ParallelContext, DetectionService};
-
-/// Composite audio detection layer: transcription + NER.
-///
-/// First transcribes each audio span via [`TranscribeBackend`], then
-/// runs [`NerBackend`] on the resulting transcript text.  Entities
-/// from transcription carry [`AudioLocation`] with time spans;
-/// entities from NER carry text locations within the transcript.
-pub struct TranscriptNerDetection<T, N> {
-    transcribe_backend: T,
-    transcribe_config: TranscribeConfig,
-    ner_backend: N,
-    ner_config: NerConfig,
-}
-
-impl<T: TranscribeBackend, N: NerBackend> TranscriptNerDetection<T, N> {
-    /// Create a new composite detection layer.
-    pub fn new(
-        transcribe_backend: T,
-        transcribe_config: TranscribeConfig,
-        ner_backend: N,
-        ner_config: NerConfig,
-    ) -> Self {
-        Self {
-            transcribe_backend,
-            transcribe_config,
-            ner_backend,
-            ner_config,
-        }
-    }
-}
-
-#[async_trait::async_trait]
-impl<T: TranscribeBackend, N: NerBackend> DetectionService<(), Bytes>
-    for TranscriptNerDetection<T, N>
-{
-    type Context = ParallelContext;
-
-    async fn detect(
-        &self,
-        spans: Vec<Span<(), Bytes>>,
-    ) -> Result<Vec<Entity>, Error> {
-        let mut entities = Vec::new();
-
-        for span in &spans {
-            let audio_bytes: &[u8] = &span.data;
-
-            // Step 1: Transcribe audio → time-aligned segments.
-            let raw_segments = self
-                .transcribe_backend
-                .transcribe(audio_bytes, "audio/wav", &self.transcribe_config)
-                .await?;
-
-            let transcript_entities = parse_transcribe_entities(&raw_segments)?;
-
-            // Collect transcript text for NER.
-            let transcript_text: String = transcript_entities
-                .iter()
-                .map(|e| e.value.as_str())
-                .collect::<Vec<_>>()
-                .join(" ");
-
-            // Include the raw transcript entities (audio-located).
-            for entity in transcript_entities {
-                entities.push(entity.with_parent(&span.source));
-            }
-
-            // Step 2: Run NER on the combined transcript text.
-            if !transcript_text.is_empty() {
-                let raw_ner = self
-                    .ner_backend
-                    .detect_text(&transcript_text, &self.ner_config)
-                    .await?;
-
-                for mut entity in parse_ner_entities(&raw_ner)? {
-                    // NER entities from transcript get a text location
-                    // within the transcript. For now we keep them as-is;
-                    // a future enhancement could map text offsets back to
-                    // audio time spans using segment boundaries.
-                    if entity.location.is_none() {
-                        entity.location = Some(Location::Text(Default::default()));
-                    }
-                    entities.push(entity.with_parent(&span.source));
-                }
-            }
-        }
-
-        Ok(entities)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use nvisy_ontology::entity::DetectionMethod;
-    use serde_json::{json, Value};
-
-    struct MockTranscribeBackend;
-
-    #[async_trait::async_trait]
-    impl TranscribeBackend for MockTranscribeBackend {
-        async fn transcribe(
-            &self,
-            _audio_data: &[u8],
-            _mime_type: &str,
-            _config: &TranscribeConfig,
-        ) -> Result<Vec<Value>, Error> {
-            Ok(vec![
-                json!({
-                    "text": "My name is John Doe",
-                    "start_time": 0.0,
-                    "end_time": 2.0,
-                    "confidence": 0.95
-                }),
-            ])
-        }
-    }
-
-    struct MockNerBackend;
-
-    #[async_trait::async_trait]
-    impl NerBackend for MockNerBackend {
-        async fn detect_text(
-            &self,
-            text: &str,
-            _config: &NerConfig,
-        ) -> Result<Vec<Value>, Error> {
-            let mut results = Vec::new();
-            if let Some(pos) = text.find("John Doe") {
-                results.push(json!({
-                    "category": "pii",
-                    "entity_type": "person_name",
-                    "value": "John Doe",
-                    "confidence": 0.9,
-                    "start_offset": pos,
-                    "end_offset": pos + 8
-                }));
-            }
-            Ok(results)
-        }
-
-        async fn detect_image(
-            &self,
-            _: &[u8], _: &str, _: &NerConfig,
-        ) -> Result<Vec<Value>, Error> {
-            Ok(Vec::new())
-        }
-    }
-
-    #[tokio::test]
-    async fn transcript_ner_produces_both_entity_types() {
-        let layer = TranscriptNerDetection::new(
-            MockTranscribeBackend,
-            TranscribeConfig {
-                language: "en".into(),
-                enable_speaker_diarization: false,
-                confidence_threshold: 0.5,
-            },
-            MockNerBackend,
-            NerConfig {
-                entity_types: vec![],
-                confidence_threshold: 0.0,
-            },
-        );
-
-        let audio = Bytes::from_static(b"fake-wav-data");
-        let spans = vec![Span::new((), audio)];
-
-        let entities = layer.detect(spans).await.unwrap();
-        // Should have: 1 transcript entity + 1 NER entity
-        assert_eq!(entities.len(), 2);
-
-        // First entity is from transcription (audio location).
-        assert_eq!(entities[0].detection_method, DetectionMethod::SpeechTranscript);
-        assert!(entities[0].location.as_ref().unwrap().as_audio().is_some());
-
-        // Second entity is from NER (text location).
-        assert_eq!(entities[1].detection_method, DetectionMethod::Ner);
-        assert_eq!(entities[1].value, "John Doe");
-    }
-}
diff --git a/crates/nvisy-identify/src/lib.rs b/crates/nvisy-identify/src/lib.rs
index c44deb0..f7b4905 100644
--- a/crates/nvisy-identify/src/lib.rs
+++ b/crates/nvisy-identify/src/lib.rs
@@ -3,10 +3,7 @@
 #![doc = include_str!("../README.md")]
 
 mod layer;
-mod pattern;
-mod ner;
-mod llm;
-mod audio;
+mod method;
 mod fusion;
 mod policy;
 
@@ -24,15 +21,8 @@ pub use nvisy_ontology::location::{
 // --- Layer traits ---
 pub use layer::*;
 
-// --- NER backend ---
-pub use ner::{NerBackend, NerConfig};
-
-// --- Detection layers ---
-pub use pattern::{PatternDetection, PatternDetectionParams};
-pub use ner::{NerDetection, NerDetectionParams};
-pub use ner::ImageNerDetection;
-pub use llm::{LlmBackend, LlmDetection, LlmDetectionParams, user_prompt as llm_user_prompt};
-pub use audio::TranscriptNerDetection;
+// --- Detection methods ---
+pub use method::{NerMethod, NerMethodParams, CvMethod, PatternDetection, PatternDetectionParams};
 
 // --- Post-detection actions ---
 pub use fusion::{DetectManualAction, DetectManualParams, Exclusion, ManualOutput, is_excluded};
diff --git a/crates/nvisy-identify/src/llm/detection.rs b/crates/nvisy-identify/src/llm/detection.rs
deleted file mode 100644
index 003fd97..0000000
--- a/crates/nvisy-identify/src/llm/detection.rs
+++ /dev/null
@@ -1,326 +0,0 @@
-//! LLM contextual detection layer.
-//!
-//! Uses a [`SequentialContext`] so the orchestrator feeds one span at
-//! a time, allowing the layer to accumulate prior text for contextual
-//! understanding across spans.
-
-use std::str::FromStr;
-
-use serde::Deserialize;
-use serde_json::Value;
-use tokio::sync::Mutex;
-
-use nvisy_codec::handler::{Span, TxtSpan};
-use nvisy_ontology::entity::{DetectionMethod, EntityCategory, EntityKind};
-use nvisy_core::Error;
-use nvisy_rig::{DetectionConfig, DetectionRequest, DetectionResponse};
-
-use crate::{Entity, Location, ModelInfo, TextLocation};
-use crate::{SequentialContext, DetectionService};
-
-use super::prompt;
-
-fn default_confidence() -> f64 {
-    0.5
-}
-
-/// Async backend trait replacing the former `tower::Service` bound.
-#[async_trait::async_trait]
-pub trait LlmBackend: Send + Sync + 'static {
-    /// Run a detection request and return the response.
-    async fn detect(&self, req: DetectionRequest) -> Result<DetectionResponse, Error>;
-}
-
-/// Typed parameters for [`LlmDetection`].
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-pub struct LlmDetectionParams {
-    /// Entity kinds to detect (empty = all).
-    #[serde(rename = "entityTypes", default)]
-    pub entity_kinds: Vec<EntityKind>,
-    /// Minimum confidence score for returned entities.
-    #[serde(default = "default_confidence")]
-    pub confidence_threshold: f64,
-    /// Optional model info to attach to every LLM-produced entity.
-    #[serde(skip)]
-    pub model_info: Option<ModelInfo>,
-    /// Optional system prompt override.
-    #[serde(default)]
-    pub system_prompt: Option<String>,
-}
-
-/// Accumulated state between sequential span calls.
-struct LlmState {
-    /// Text from previously processed spans (for sliding context).
-    prior_text: String,
-}
-
-/// LLM contextual detection layer — delegates to an [`LlmBackend`].
-///
-/// Uses [`SequentialContext`]: the orchestrator feeds one span at a
-/// time so the layer can carry sliding context between spans.
-pub struct LlmDetection<B> {
-    backend: B,
-    config: DetectionConfig,
-    model_info: Option<ModelInfo>,
-    state: Mutex<LlmState>,
-}
-
-impl<B: LlmBackend> LlmDetection<B> {
-    /// Create a new detection layer with the given backend and params.
-    pub fn new(backend: B, params: LlmDetectionParams) -> Self {
-        let system_prompt = params.system_prompt.unwrap_or_else(|| {
-            prompt::system_prompt().to_string()
-        });
-        let config = DetectionConfig {
-            entity_kinds: params.entity_kinds,
-            confidence_threshold: params.confidence_threshold,
-            system_prompt: Some(system_prompt),
-        };
-        Self {
-            backend,
-            config,
-            model_info: params.model_info,
-            state: Mutex::new(LlmState {
-                prior_text: String::new(),
-            }),
-        }
-    }
-
-    /// Clear accumulated state between documents.
-    pub async fn reset(&self) {
-        let mut state = self.state.lock().await;
-        state.prior_text.clear();
-    }
-}
-
-#[async_trait::async_trait]
-impl<B: LlmBackend> DetectionService<TxtSpan, String> for LlmDetection<B> {
-    type Context = SequentialContext;
-
-    async fn detect(
-        &self,
-        spans: Vec<Span<TxtSpan, String>>,
-    ) -> Result<Vec<Entity>, Error> {
-        let mut entities = Vec::new();
-
-        for span in &spans {
-            // Build the full text with prior context prepended.
-            let (full_text, context_len) = {
-                let state = self.state.lock().await;
-                if state.prior_text.is_empty() {
-                    (span.data.clone(), 0)
-                } else {
-                    let sep = "\n";
-                    let context_len = state.prior_text.len() + sep.len();
-                    let full = format!("{}{}{}", state.prior_text, sep, span.data);
-                    (full, context_len)
-                }
-            };
-
-            let req = DetectionRequest {
-                text: full_text,
-                config: self.config.clone(),
-            };
-            let response = self.backend.detect(req).await?;
-
-            // Filter entities to the current span and adjust offsets.
-            let span_len = span.data.len();
-            for mut e in parse_entities(&response.entities)? {
-                if let Some(Location::Text(ref loc)) = e.location {
-                    if loc.end_offset <= context_len {
-                        continue;
-                    }
-                    if loc.start_offset < context_len {
-                        continue;
-                    }
-                    if loc.start_offset - context_len >= span_len {
-                        continue;
-                    }
-                    e.location = Some(Location::Text(TextLocation {
-                        start_offset: loc.start_offset - context_len,
-                        end_offset: loc.end_offset - context_len,
-                        element_id: Some(span.id.0.to_string()),
-                        ..Default::default()
-                    }));
-                } else {
-                    e.location = Some(Location::Text(TextLocation {
-                        element_id: Some(span.id.0.to_string()),
-                        ..Default::default()
-                    }));
-                }
-
-                if let Some(ref model) = self.model_info {
-                    e.model = Some(model.clone());
-                }
-
-                entities.push(e.with_parent(&span.source));
-            }
-
-            // Accumulate text for sliding context.
-            let mut state = self.state.lock().await;
-            if !state.prior_text.is_empty() {
-                state.prior_text.push('\n');
-            }
-            state.prior_text.push_str(&span.data);
-        }
-
-        Ok(entities)
-    }
-}
-
-/// Parse raw JSON dicts (from an LLM detection response) into [`Entity`] values.
-///
-/// Unknown `entity_type` values are silently dropped.
-fn parse_entities(raw: &[Value]) -> Result<Vec<Entity>, Error> {
-    let mut entities = Vec::new();
-
-    for item in raw {
-        let obj = item
-            .as_object()
-            .ok_or_else(|| Error::validation("Expected JSON object in LLM results", "llm"))?;
-
-        let category_str = obj
-            .get("category")
-            .and_then(Value::as_str)
-            .ok_or_else(|| Error::validation("Missing 'category'", "llm"))?;
-
-        let category = match category_str {
-            "pii" => EntityCategory::Pii,
-            "phi" => EntityCategory::Phi,
-            "financial" => EntityCategory::Financial,
-            "credentials" => EntityCategory::Credentials,
-            other => EntityCategory::Custom(other.to_string()),
-        };
-
-        let entity_type_str = obj
-            .get("entity_type")
-            .and_then(Value::as_str)
-            .ok_or_else(|| Error::validation("Missing 'entity_type'", "llm"))?;
-
-        let entity_kind = match EntityKind::from_str(entity_type_str) {
-            Ok(ek) => ek,
-            Err(_) => {
-                tracing::warn!(
-                    entity_type = entity_type_str,
-                    "unknown entity type from LLM, dropping"
-                );
-                continue;
-            }
-        };
-
-        let value = obj
-            .get("value")
-            .and_then(Value::as_str)
-            .ok_or_else(|| Error::validation("Missing 'value'", "llm"))?;
-
-        let confidence = obj
-            .get("confidence")
-            .and_then(Value::as_f64)
-            .ok_or_else(|| Error::validation("Missing 'confidence'", "llm"))?;
-
-        let start_offset = obj
-            .get("start_offset")
-            .and_then(Value::as_u64)
-            .map(|v| v as usize)
-            .unwrap_or(0);
-
-        let end_offset = obj
-            .get("end_offset")
-            .and_then(Value::as_u64)
-            .map(|v| v as usize)
-            .unwrap_or(0);
-
-        let entity = Entity::new(
-            category,
-            entity_kind,
-            value,
-            DetectionMethod::ContextualNlp,
-            confidence,
-        )
-        .with_location(Location::Text(TextLocation {
-            start_offset,
-            end_offset,
-            ..Default::default()
-        }));
-
-        entities.push(entity);
-    }
-
-    Ok(entities)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use serde_json::json;
-
-    struct MockLlmBackend;
-
-    #[async_trait::async_trait]
-    impl LlmBackend for MockLlmBackend {
-        async fn detect(&self, req: DetectionRequest) -> Result<DetectionResponse, Error> {
-            let text = req.text;
-            let mut results = Vec::new();
-            if let Some(pos) = text.find("SECRET") {
-                results.push(json!({
-                    "category": "credentials",
-                    "entity_type": "api_key",
-                    "value": "SECRET",
-                    "confidence": 0.92,
-                    "start_offset": pos,
-                    "end_offset": pos + 6
-                }));
-            }
-            Ok(DetectionResponse {
-                entities: results,
-                usage: None,
-            })
-        }
-    }
-
-    #[tokio::test]
-    async fn llm_detection_basic() {
-        let params = LlmDetectionParams {
-            entity_kinds: vec![],
-            confidence_threshold: 0.0,
-            model_info: None,
-            system_prompt: None,
-        };
-        let llm = LlmDetection::new(MockLlmBackend, params);
-
-        let spans = vec![Span::new(TxtSpan(0), "contains SECRET key".into())];
-        let entities = llm.detect(spans).await.unwrap();
-        assert_eq!(entities.len(), 1);
-        assert_eq!(entities[0].value, "SECRET");
-
-        let loc = entities[0].location.as_ref().unwrap().as_text().unwrap();
-        assert_eq!(loc.start_offset, 9);
-        assert_eq!(loc.end_offset, 15);
-    }
-
-    #[tokio::test]
-    async fn llm_detection_with_context() {
-        let params = LlmDetectionParams {
-            entity_kinds: vec![],
-            confidence_threshold: 0.0,
-            model_info: None,
-            system_prompt: None,
-        };
-        let llm = LlmDetection::new(MockLlmBackend, params);
-
-        // First span: no entity.
-        let span1 = vec![Span::new(TxtSpan(0), "some context".into())];
-        let result1 = llm.detect(span1).await.unwrap();
-        assert!(result1.is_empty());
-
-        // Second span: entity in current span.
-        let span2 = vec![Span::new(TxtSpan(1), "has SECRET here".into())];
-        let result2 = llm.detect(span2).await.unwrap();
-        assert_eq!(result2.len(), 1);
-
-        let loc = result2[0].location.as_ref().unwrap().as_text().unwrap();
-        assert_eq!(loc.start_offset, 4);
-        assert_eq!(loc.end_offset, 10);
-    }
-}
diff --git a/crates/nvisy-identify/src/llm/mod.rs b/crates/nvisy-identify/src/llm/mod.rs
deleted file mode 100644
index 8d40832..0000000
--- a/crates/nvisy-identify/src/llm/mod.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-//! LLM-based contextual entity detection.
-
-pub mod detection;
-pub mod prompt;
-
-pub use detection::{LlmBackend, LlmDetection, LlmDetectionParams};
-pub use prompt::user_prompt;
diff --git a/crates/nvisy-identify/src/llm/prompt.rs b/crates/nvisy-identify/src/llm/prompt.rs
deleted file mode 100644
index 0502e0b..0000000
--- a/crates/nvisy-identify/src/llm/prompt.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-//! System and user prompt templates for LLM-based PII/sensitive-data detection.
-
-/// Default system prompt for LLM-based entity detection.
-///
-/// Instructs the model to identify PII and sensitive data, returning
-/// structured JSON results.
-pub fn system_prompt() -> &'static str {
-    r#"You are a precise PII and sensitive data detection system. Your task is to identify personally identifiable information (PII), protected health information (PHI), financial data, and credentials in the provided text.
-
-For each entity found, return a JSON object with these fields:
-- "category": one of "pii", "phi", "financial", "credentials", or a custom category
-- "entity_type": the specific entity type (e.g., "person_name", "email_address", "ssn", "credit_card_number")
-- "value": the exact text matched
-- "confidence": your confidence score from 0.0 to 1.0
-- "start_offset": character offset where the entity starts in the input text
-- "end_offset": character offset where the entity ends in the input text
-
-Return a JSON array of objects. If no entities are found, return an empty array [].
-
-Be thorough but precise — prioritize precision over recall. Consider context when assessing whether text constitutes sensitive data."#
-}
-
-/// Build a user prompt from the input text.
-pub fn user_prompt(text: &str) -> String {
-    format!("Detect all PII and sensitive data in the following text:\n\n{text}")
-}
diff --git a/crates/nvisy-identify/src/method/cv.rs b/crates/nvisy-identify/src/method/cv.rs
new file mode 100644
index 0000000..3d9a697
--- /dev/null
+++ b/crates/nvisy-identify/src/method/cv.rs
@@ -0,0 +1,75 @@
+//! Computer-vision detection adapter wrapping [`CvAgent`] from `nvisy-rig`.
+//!
+//! Detects entities in image spans by delegating to the CvAgent's
+//! object-detection + LLM-classification pipeline.
+
+use nvisy_codec::handler::{ImageData, Span};
+use nvisy_core::Error;
+use nvisy_rig::{CvAgent, CvEntity, DetectionConfig};
+
+use crate::{DetectionMethod, Entity, ImageLocation, Location};
+use crate::{ParallelContext, DetectionService};
+use nvisy_core::math::BoundingBox;
+
+/// Computer-vision detection method — thin adapter around [`CvAgent`].
+pub struct CvMethod {
+    agent: CvAgent,
+    config: DetectionConfig,
+}
+
+impl CvMethod {
+    /// Create a new CV method from a pre-built agent and detection config.
+    pub fn from_agent(agent: CvAgent, config: DetectionConfig) -> Self {
+        Self { agent, config }
+    }
+}
+
+#[async_trait::async_trait]
+impl DetectionService<(), ImageData> for CvMethod {
+    type Context = ParallelContext;
+
+    async fn detect(
+        &self,
+        spans: Vec<Span<(), ImageData>>,
+    ) -> Result<Vec<Entity>, Error> {
+        let mut entities = Vec::new();
+
+        for span in &spans {
+            let png_bytes = span.data.encode_png()?;
+
+            let cv_entities = self
+                .agent
+                .detect(&png_bytes, &self.config)
+                .await
+                .map_err(|e| Error::runtime(e.to_string(), "cv-agent", e.is_retryable()))?;
+
+            for cv_entity in &cv_entities {
+                let entity = map_cv_entity(cv_entity);
+                entities.push(entity.with_parent(&span.source));
+            }
+        }
+
+        Ok(entities)
+    }
+}
+
+/// Convert a [`CvEntity`] to an [`Entity`] with [`ImageLocation`].
+fn map_cv_entity(cv: &CvEntity) -> Entity {
+    Entity::new(
+        cv.category.clone(),
+        cv.entity_type,
+        &cv.label,
+        DetectionMethod::ObjectDetection,
+        cv.confidence,
+    )
+    .with_location(Location::Image(ImageLocation {
+        bounding_box: BoundingBox {
+            x: cv.bbox[0],
+            y: cv.bbox[1],
+            width: cv.bbox[2],
+            height: cv.bbox[3],
+        },
+        image_id: None,
+        page_number: None,
+    }))
+}
diff --git a/crates/nvisy-identify/src/method/mod.rs b/crates/nvisy-identify/src/method/mod.rs
new file mode 100644
index 0000000..e3d5197
--- /dev/null
+++ b/crates/nvisy-identify/src/method/mod.rs
@@ -0,0 +1,14 @@
+//! Detection method adapters wrapping external crates.
+//!
+//! Each sub-module provides a thin struct that holds an agent or engine
+//! from `nvisy-rig` / `nvisy-pattern` and implements the
+//! [`DetectionLayer`](crate::DetectionLayer) /
+//! [`DetectionService`](crate::DetectionService) traits.
+
+mod ner;
+mod cv;
+mod pattern;
+
+pub use ner::{NerMethod, NerMethodParams};
+pub use cv::CvMethod;
+pub use pattern::{PatternDetection, PatternDetectionParams};
diff --git a/crates/nvisy-identify/src/method/ner.rs b/crates/nvisy-identify/src/method/ner.rs
new file mode 100644
index 0000000..8bd7b5d
--- /dev/null
+++ b/crates/nvisy-identify/src/method/ner.rs
@@ -0,0 +1,176 @@
+//! NER detection adapter wrapping [`NerAgent`] from `nvisy-rig`.
+//!
+//! Uses a [`SequentialContext`] so the orchestrator feeds one span at
+//! a time, allowing the adapter to accumulate known entities between
+//! spans for coreference resolution.
+
+use serde::Deserialize;
+use tokio::sync::Mutex;
+
+use nvisy_codec::handler::{Span, TxtSpan};
+use nvisy_ontology::entity::EntityKind;
+use nvisy_core::Error;
+use nvisy_ontology::entity::EntityCategory;
+use nvisy_rig::{
+    BaseAgentConfig, DetectionConfig, KnownNerEntity, NerAgent, NerContext, Provider,
+};
+
+use crate::{DetectionMethod, Entity, Location, TextLocation};
+use crate::{SequentialContext, DetectionLayer, DetectionService};
+
+fn default_confidence() -> f64 {
+    0.5
+}
+
+/// Typed parameters for [`NerMethod`].
+#[derive(Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct NerMethodParams {
+    /// Entity kinds to detect (empty = all).
+    #[serde(rename = "entityTypes", default)]
+    pub entity_kinds: Vec<EntityKind>,
+    /// Minimum confidence score for returned entities.
+    #[serde(default = "default_confidence")]
+    pub confidence_threshold: f64,
+    /// Provider configuration for the NER agent.
+    #[serde(skip)]
+    pub provider: Option<Provider>,
+    /// Optional agent config overrides.
+    #[serde(skip)]
+    pub agent_config: Option<BaseAgentConfig>,
+}
+
+/// Accumulated state between sequential span calls.
+struct NerState {
+    /// Known entities from prior detection calls (for coreference).
+    known_entities: Vec<KnownNerEntity>,
+}
+
+/// NER detection method — thin adapter around [`NerAgent`].
+///
+/// Uses [`SequentialContext`]: the orchestrator feeds one span at a
+/// time so the adapter can carry known-entity context between spans.
+pub struct NerMethod {
+    agent: NerAgent,
+    config: DetectionConfig,
+    state: Mutex<NerState>,
+}
+
+impl NerMethod {
+    /// Create a new NER method from a pre-built agent and detection config.
+    pub fn from_agent(agent: NerAgent, config: DetectionConfig) -> Self {
+        Self {
+            agent,
+            config,
+            state: Mutex::new(NerState {
+                known_entities: Vec::new(),
+            }),
+        }
+    }
+
+    /// Clear accumulated state between documents.
+    pub async fn reset(&self) {
+        let mut state = self.state.lock().await;
+        state.known_entities.clear();
+    }
+}
+
+#[async_trait::async_trait]
+impl DetectionLayer for NerMethod {
+    type Params = NerMethodParams;
+
+    async fn connect(params: Self::Params) -> Result<Self, Error> {
+        let provider = params.provider.ok_or_else(|| {
+            Error::validation("NerMethod requires a provider", "ner-method")
+        })?;
+        let agent_config = params.agent_config.unwrap_or_default();
+        let agent = NerAgent::new(&provider, agent_config).map_err(|e| {
+            Error::validation(e.to_string(), "ner-method")
+        })?;
+        let config = DetectionConfig {
+            entity_kinds: params.entity_kinds,
+            confidence_threshold: params.confidence_threshold,
+            system_prompt: None,
+        };
+        Ok(Self::from_agent(agent, config))
+    }
+}
+
+#[async_trait::async_trait]
+impl DetectionService<TxtSpan, String> for NerMethod {
+    type Context = SequentialContext;
+
+    async fn detect(
+        &self,
+        spans: Vec<Span<TxtSpan, String>>,
+    ) -> Result<Vec<Entity>, Error> {
+        let mut entities = Vec::new();
+
+        for span in &spans {
+            // Build NER context with accumulated known entities.
+            let known = {
+                let state = self.state.lock().await;
+                state.known_entities.clone()
+            };
+            let ctx = NerContext::with_known(&span.data, known);
+
+            let ner_entities = self
+                .agent
+                .detect(&ctx, &self.config)
+                .await
+                .map_err(|e| Error::runtime(e.to_string(), "ner-agent", e.is_retryable()))?;
+
+            // Convert NerEntity → Entity with resolved offsets.
+            for ner_entity in &ner_entities {
+                let category: EntityCategory = match ner_entity.category {
+                    Some(ref c) => c.clone(),
+                    None => continue,
+                };
+                let entity_kind = match ner_entity.entity_type {
+                    Some(ek) => ek,
+                    None => continue,
+                };
+                let confidence = ner_entity.confidence.unwrap_or(0.0);
+                if confidence < self.config.confidence_threshold {
+                    continue;
+                }
+
+                let mut entity = Entity::new(
+                    category,
+                    entity_kind,
+                    &ner_entity.value,
+                    DetectionMethod::Ner,
+                    confidence,
+                );
+
+                // Resolve offsets within the current span text.
+                if let Some(offsets) = ner_entity.resolve_offsets(&ctx) {
+                    entity = entity.with_location(Location::Text(TextLocation {
+                        start_offset: offsets.start,
+                        end_offset: offsets.end,
+                        element_id: Some(span.id.0.to_string()),
+                        ..Default::default()
+                    }));
+                } else {
+                    entity = entity.with_location(Location::Text(TextLocation {
+                        element_id: Some(span.id.0.to_string()),
+                        ..Default::default()
+                    }));
+                }
+
+                entities.push(entity.with_parent(&span.source));
+            }
+
+            // Accumulate known entities for coreference across spans.
+            let mut state = self.state.lock().await;
+            let mut merge_ctx = NerContext::with_known(
+                &span.data,
+                std::mem::take(&mut state.known_entities),
+            );
+            merge_ctx.merge(ner_entities);
+            state.known_entities = merge_ctx.known_entities;
+        }
+
+        Ok(entities)
+    }
+}
diff --git a/crates/nvisy-identify/src/pattern/mod.rs b/crates/nvisy-identify/src/method/pattern.rs
similarity index 100%
rename from crates/nvisy-identify/src/pattern/mod.rs
rename to crates/nvisy-identify/src/method/pattern.rs
diff --git a/crates/nvisy-identify/src/ner/backend.rs b/crates/nvisy-identify/src/ner/backend.rs
deleted file mode 100644
index 18b0bba..0000000
--- a/crates/nvisy-identify/src/ner/backend.rs
+++ /dev/null
@@ -1,42 +0,0 @@
-//! NER backend trait and configuration.
-
-use serde_json::Value;
-
-use nvisy_core::Error;
-
-/// Configuration passed to an [`NerBackend`] implementation.
-///
-/// Contains only the model-agnostic parameters that every backend needs.
-/// Provider-specific fields (API key, model name, etc.) belong in the
-/// action's [`NerDetectionParams`](super::text::NerDetectionParams)
-/// or the provider's credentials.
-#[derive(Debug, Clone)]
-pub struct NerConfig {
-    /// Entity type labels to detect (e.g., `["PERSON", "SSN"]`).
-    pub entity_types: Vec<String>,
-    /// Minimum confidence score to include a detection (0.0 -- 1.0).
-    pub confidence_threshold: f64,
-}
-
-/// Backend trait for NER providers.
-///
-/// Implementations call an external NER service (e.g. via Python, HTTP)
-/// and return raw JSON results.  Entity construction from the raw dicts
-/// is handled by the detection layers.
-#[async_trait::async_trait]
-pub trait NerBackend: Send + Sync + 'static {
-    /// Detect entities in text, returning raw dicts.
-    async fn detect_text(
-        &self,
-        text: &str,
-        config: &NerConfig,
-    ) -> Result<Vec<Value>, Error>;
-
-    /// Detect entities in an image, returning raw dicts.
-    async fn detect_image(
-        &self,
-        image_data: &[u8],
-        mime_type: &str,
-        config: &NerConfig,
-    ) -> Result<Vec<Value>, Error>;
-}
diff --git a/crates/nvisy-identify/src/ner/bridge.rs b/crates/nvisy-identify/src/ner/bridge.rs
deleted file mode 100644
index a4ef000..0000000
--- a/crates/nvisy-identify/src/ner/bridge.rs
+++ /dev/null
@@ -1,38 +0,0 @@
-//! [`NerBackend`] implementation for [`PythonBridge`].
-
-use serde_json::Value;
-
-use nvisy_core::Error;
-use nvisy_python::bridge::PythonBridge;
-use nvisy_python::ner::NerParams;
-
-use super::backend::{NerBackend, NerConfig};
-
-/// Converts [`NerConfig`] to [`NerParams`] and delegates to `nvisy_python::ner`.
-#[async_trait::async_trait]
-impl NerBackend for PythonBridge {
-    async fn detect_text(
-        &self,
-        text: &str,
-        config: &NerConfig,
-    ) -> Result<Vec<Value>, Error> {
-        let params = NerParams {
-            entity_types: config.entity_types.clone(),
-            confidence_threshold: config.confidence_threshold,
-        };
-        nvisy_python::ner::detect_ner(self, text, &params).await
-    }
-
-    async fn detect_image(
-        &self,
-        image_data: &[u8],
-        mime_type: &str,
-        config: &NerConfig,
-    ) -> Result<Vec<Value>, Error> {
-        let params = NerParams {
-            entity_types: config.entity_types.clone(),
-            confidence_threshold: config.confidence_threshold,
-        };
-        nvisy_python::ner::detect_ner_image(self, image_data, mime_type, &params).await
-    }
-}
diff --git a/crates/nvisy-identify/src/ner/image.rs b/crates/nvisy-identify/src/ner/image.rs
deleted file mode 100644
index afeae94..0000000
--- a/crates/nvisy-identify/src/ner/image.rs
+++ /dev/null
@@ -1,112 +0,0 @@
-//! NER detection on images via [`NerBackend::detect_image`].
-//!
-//! Encodes the image to PNG, sends it to the NER backend, and returns
-//! entities with [`ImageLocation`] bounding boxes.
-
-use nvisy_codec::handler::{ImageData, Span};
-use nvisy_core::Error;
-
-use crate::Entity;
-use crate::{ParallelContext, DetectionService};
-use super::{NerBackend, NerConfig, parse_image_ner_entity};
-
-/// NER detection layer for images.
-///
-/// Encodes each image span to PNG and delegates to an [`NerBackend`]
-/// for entity recognition.
-pub struct ImageNerDetection<B> {
-    backend: B,
-    config: NerConfig,
-}
-
-impl<B: NerBackend> ImageNerDetection<B> {
-    /// Create a new image NER detection layer.
-    pub fn new(backend: B, config: NerConfig) -> Self {
-        Self { backend, config }
-    }
-}
-
-#[async_trait::async_trait]
-impl<B: NerBackend> DetectionService<(), ImageData> for ImageNerDetection<B> {
-    type Context = ParallelContext;
-
-    async fn detect(
-        &self,
-        spans: Vec<Span<(), ImageData>>,
-    ) -> Result<Vec<Entity>, Error> {
-        let mut entities = Vec::new();
-
-        for span in &spans {
-            let png_bytes = span.data.encode_png()?;
-
-            let raw = self
-                .backend
-                .detect_image(&png_bytes, "image/png", &self.config)
-                .await?;
-
-            for item in &raw {
-                if let Some(entity) = parse_image_ner_entity(item)? {
-                    entities.push(entity.with_parent(&span.source));
-                }
-            }
-        }
-
-        Ok(entities)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::DetectionMethod;
-    use serde_json::{json, Value};
-
-    struct MockImageNerBackend;
-
-    #[async_trait::async_trait]
-    impl NerBackend for MockImageNerBackend {
-        async fn detect_text(&self, _: &str, _: &NerConfig) -> Result<Vec<Value>, Error> {
-            Ok(Vec::new())
-        }
-
-        async fn detect_image(
-            &self,
-            _image_data: &[u8],
-            _mime_type: &str,
-            _config: &NerConfig,
-        ) -> Result<Vec<Value>, Error> {
-            Ok(vec![json!({
-                "category": "pii",
-                "entity_type": "person_name",
-                "value": "John Doe",
-                "confidence": 0.92,
-                "x": 10.0,
-                "y": 20.0,
-                "width": 100.0,
-                "height": 30.0
-            })])
-        }
-    }
-
-    #[tokio::test]
-    async fn detect_image_produces_image_location() {
-        let config = NerConfig {
-            entity_types: vec![],
-            confidence_threshold: 0.0,
-        };
-        let layer = ImageNerDetection::new(MockImageNerBackend, config);
-
-        // Create a tiny 1x1 image.
-        let img = ImageData::new_rgb(1, 1);
-        let spans = vec![Span::new((), img)];
-
-        let entities = layer.detect(spans).await.unwrap();
-        assert_eq!(entities.len(), 1);
-        assert_eq!(entities[0].value, "John Doe");
-        assert_eq!(entities[0].detection_method, DetectionMethod::Ner);
-
-        let loc = entities[0].location.as_ref().unwrap().as_image().unwrap();
-        assert!((loc.bounding_box.x - 10.0).abs() < f64::EPSILON);
-        assert!((loc.bounding_box.y - 20.0).abs() < f64::EPSILON);
-    }
-}
diff --git a/crates/nvisy-identify/src/ner/mod.rs b/crates/nvisy-identify/src/ner/mod.rs
deleted file mode 100644
index 4013890..0000000
--- a/crates/nvisy-identify/src/ner/mod.rs
+++ /dev/null
@@ -1,13 +0,0 @@
-//! Cross-modal NER backend trait, configuration, detection layers, and
-//! result parsing.
-
-mod backend;
-mod bridge;
-mod parse;
-pub mod text;
-pub mod image;
-
-pub use backend::{NerBackend, NerConfig};
-pub use parse::{parse_image_ner_entity, parse_ner_entities};
-pub use text::{NerDetection, NerDetectionParams};
-pub use image::ImageNerDetection;
diff --git a/crates/nvisy-identify/src/ner/parse.rs b/crates/nvisy-identify/src/ner/parse.rs
deleted file mode 100644
index e37fd1b..0000000
--- a/crates/nvisy-identify/src/ner/parse.rs
+++ /dev/null
@@ -1,150 +0,0 @@
-//! NER result parsing for text and image modalities.
-
-use std::str::FromStr;
-
-use serde_json::Value;
-
-use nvisy_ontology::entity::{EntityCategory, EntityKind};
-use nvisy_core::math::BoundingBox;
-use nvisy_core::Error;
-
-use crate::{DetectionMethod, Entity, ImageLocation, Location, TextLocation};
-
-/// Parse raw JSON dicts from an NER backend into [`Entity`] values.
-///
-/// Expected dict keys: `category`, `entity_type`, `value`, `confidence`,
-/// and optionally `start_offset` / `end_offset`.
-pub fn parse_ner_entities(raw: &[Value]) -> Result<Vec<Entity>, Error> {
-    let mut entities = Vec::new();
-
-    for item in raw {
-        let obj = item.as_object().ok_or_else(|| {
-            Error::python("Expected JSON object in NER results".to_string())
-        })?;
-
-        let category_str = obj
-            .get("category")
-            .and_then(Value::as_str)
-            .ok_or_else(|| Error::python("Missing 'category'".to_string()))?;
-
-        let category = match category_str {
-            "pii" => EntityCategory::Pii,
-            "phi" => EntityCategory::Phi,
-            "financial" => EntityCategory::Financial,
-            "credentials" => EntityCategory::Credentials,
-            other => EntityCategory::Custom(other.to_string()),
-        };
-
-        let entity_type_str = obj
-            .get("entity_type")
-            .and_then(Value::as_str)
-            .ok_or_else(|| Error::python("Missing 'entity_type'".to_string()))?;
-
-        let entity_kind = match EntityKind::from_str(entity_type_str) {
-            Ok(ek) => ek,
-            Err(_) => {
-                tracing::warn!(entity_type = entity_type_str, "unknown entity type from NER, dropping");
-                continue;
-            }
-        };
-
-        let value = obj
-            .get("value")
-            .and_then(Value::as_str)
-            .ok_or_else(|| Error::python("Missing 'value'".to_string()))?;
-
-        let confidence = obj
-            .get("confidence")
-            .and_then(Value::as_f64)
-            .ok_or_else(|| Error::python("Missing 'confidence'".to_string()))?;
-
-        let start_offset = obj
-            .get("start_offset")
-            .and_then(Value::as_u64)
-            .map(|v| v as usize)
-            .unwrap_or(0);
-
-        let end_offset = obj
-            .get("end_offset")
-            .and_then(Value::as_u64)
-            .map(|v| v as usize)
-            .unwrap_or(0);
-
-        let entity = Entity::new(
-            category,
-            entity_kind,
-            value,
-            DetectionMethod::Ner,
-            confidence,
-        )
-        .with_location(Location::Text(TextLocation {
-            start_offset,
-            end_offset,
-            ..Default::default()
-        }));
-
-        entities.push(entity);
-    }
-
-    Ok(entities)
-}
-
-/// Parse a single NER result dict into an [`Entity`] with [`ImageLocation`].
-///
-/// Expected keys: `category`, `entity_type`, `value`, `confidence`,
-/// and optionally bounding box fields `x`, `y`, `width`, `height`.
-pub fn parse_image_ner_entity(item: &Value) -> Result<Option<Entity>, Error> {
-    let obj = item.as_object().ok_or_else(|| {
-        Error::python("Expected JSON object in image NER results".to_string())
-    })?;
-
-    let category_str = obj
-        .get("category")
-        .and_then(Value::as_str)
-        .ok_or_else(|| Error::python("Missing 'category'".to_string()))?;
-
-    let category = match category_str {
-        "pii" => EntityCategory::Pii,
-        "phi" => EntityCategory::Phi,
-        "financial" => EntityCategory::Financial,
-        "credentials" => EntityCategory::Credentials,
-        other => EntityCategory::Custom(other.to_string()),
-    };
-
-    let entity_type_str = obj
-        .get("entity_type")
-        .and_then(Value::as_str)
-        .ok_or_else(|| Error::python("Missing 'entity_type'".to_string()))?;
-
-    let entity_kind = match EntityKind::from_str(entity_type_str) {
-        Ok(ek) => ek,
-        Err(_) => {
-            tracing::warn!(entity_type = entity_type_str, "unknown entity type from image NER, dropping");
-            return Ok(None);
-        }
-    };
-
-    let value = obj
-        .get("value")
-        .and_then(Value::as_str)
-        .ok_or_else(|| Error::python("Missing 'value'".to_string()))?;
-
-    let confidence = obj
-        .get("confidence")
-        .and_then(Value::as_f64)
-        .ok_or_else(|| Error::python("Missing 'confidence'".to_string()))?;
-
-    let x = obj.get("x").and_then(Value::as_f64).unwrap_or(0.0);
-    let y = obj.get("y").and_then(Value::as_f64).unwrap_or(0.0);
-    let width = obj.get("width").and_then(Value::as_f64).unwrap_or(0.0);
-    let height = obj.get("height").and_then(Value::as_f64).unwrap_or(0.0);
-
-    let entity = Entity::new(category, entity_kind, value, DetectionMethod::Ner, confidence)
-        .with_location(Location::Image(ImageLocation {
-            bounding_box: BoundingBox { x, y, width, height },
-            image_id: None,
-            page_number: None,
-        }));
-
-    Ok(Some(entity))
-}
diff --git a/crates/nvisy-identify/src/ner/text.rs b/crates/nvisy-identify/src/ner/text.rs
deleted file mode 100644
index 312274a..0000000
--- a/crates/nvisy-identify/src/ner/text.rs
+++ /dev/null
@@ -1,339 +0,0 @@
-//! AI-powered named-entity recognition (NER) detection layer for text.
-//!
-//! Uses a [`SequentialContext`] so the orchestrator feeds one span at
-//! a time, allowing the layer to accumulate prior text/entities
-//! between spans via interior mutability.
-
-use serde::Deserialize;
-use tokio::sync::Mutex;
-
-use nvisy_codec::handler::{Span, TxtSpan};
-use nvisy_ontology::entity::EntityKind;
-use nvisy_core::Error;
-
-use super::{NerBackend, NerConfig, parse_ner_entities};
-use crate::{Entity, Location, ModelInfo, TextLocation};
-use crate::{SequentialContext, DetectionService};
-
-fn default_confidence() -> f64 {
-    0.5
-}
-
-/// Typed parameters for [`NerDetection`].
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "camelCase")]
-pub struct NerDetectionParams {
-    /// Entity kinds to detect (empty = all).
-    #[serde(rename = "entityTypes", default)]
-    pub entity_kinds: Vec<EntityKind>,
-    /// Minimum confidence score for returned entities.
-    #[serde(default = "default_confidence")]
-    pub confidence_threshold: f64,
-    /// Optional model info to attach to every NER-produced entity.
-    #[serde(skip)]
-    pub model_info: Option<ModelInfo>,
-}
-
-/// Accumulated state between sequential span calls.
-struct NerState {
-    /// Text from previously processed spans (for sliding context).
-    prior_text: String,
-}
-
-/// AI NER detection layer — delegates to an [`NerBackend`] at runtime.
-///
-/// Uses [`SequentialContext`]: the orchestrator feeds one span at a
-/// time so the layer can carry sliding context between spans.
-pub struct NerDetection<B> {
-    backend: B,
-    config: NerConfig,
-    model_info: Option<ModelInfo>,
-    state: Mutex<NerState>,
-}
-
-impl<B: NerBackend> NerDetection<B> {
-    /// Create a new detection layer with the given backend and params.
-    pub fn new(backend: B, params: NerDetectionParams) -> Self {
-        let config = NerConfig {
-            entity_types: params.entity_kinds.iter().map(|ek| ek.to_string()).collect(),
-            confidence_threshold: params.confidence_threshold,
-        };
-        Self {
-            backend,
-            config,
-            model_info: params.model_info,
-            state: Mutex::new(NerState {
-                prior_text: String::new(),
-            }),
-        }
-    }
-
-    /// Clear accumulated state between documents.
-    pub async fn reset(&self) {
-        let mut state = self.state.lock().await;
-        state.prior_text.clear();
-    }
-}
-
-#[async_trait::async_trait]
-impl<B: NerBackend> DetectionService<TxtSpan, String> for NerDetection<B> {
-    type Context = SequentialContext;
-
-    async fn detect(
-        &self,
-        spans: Vec<Span<TxtSpan, String>>,
-    ) -> Result<Vec<Entity>, Error> {
-        let mut entities = Vec::new();
-
-        for span in &spans {
-            // Build the full text with prior context prepended.
-            let (full_text, context_len) = {
-                let state = self.state.lock().await;
-                if state.prior_text.is_empty() {
-                    (span.data.clone(), 0)
-                } else {
-                    let sep = "\n";
-                    let context_len = state.prior_text.len() + sep.len();
-                    let full = format!("{}{}{}", state.prior_text, sep, span.data);
-                    (full, context_len)
-                }
-            };
-
-            let raw = self
-                .backend
-                .detect_text(&full_text, &self.config)
-                .await?;
-
-            // Filter entities to the current span and adjust offsets.
-            let span_len = span.data.len();
-            for mut e in parse_ner_entities(&raw)? {
-                if let Some(Location::Text(ref loc)) = e.location {
-                    // Skip entities that fall entirely within the prior context.
-                    if loc.end_offset <= context_len {
-                        continue;
-                    }
-                    // Skip entities that start before the current span.
-                    if loc.start_offset < context_len {
-                        continue;
-                    }
-                    // Skip entities that extend beyond the current span.
-                    if loc.start_offset - context_len >= span_len {
-                        continue;
-                    }
-                    // Adjust offsets to be relative to the current span.
-                    e.location = Some(Location::Text(TextLocation {
-                        start_offset: loc.start_offset - context_len,
-                        end_offset: loc.end_offset - context_len,
-                        element_id: Some(span.id.0.to_string()),
-                        ..Default::default()
-                    }));
-                } else {
-                    // Non-text entity: set element_id via a new text location.
-                    e.location = Some(Location::Text(TextLocation {
-                        element_id: Some(span.id.0.to_string()),
-                        ..Default::default()
-                    }));
-                }
-
-                // Attach model info if provided.
-                if let Some(ref model) = self.model_info {
-                    e.model = Some(model.clone());
-                }
-
-                entities.push(e.with_parent(&span.source));
-            }
-
-            // Accumulate text for sliding context.
-            let mut state = self.state.lock().await;
-            if !state.prior_text.is_empty() {
-                state.prior_text.push('\n');
-            }
-            state.prior_text.push_str(&span.data);
-        }
-
-        Ok(entities)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use serde_json::{json, Value};
-
-    #[test]
-    fn parse_ner_entities_basic() {
-        let raw = vec![json!({
-            "category": "pii",
-            "entity_type": "person_name",
-            "value": "John Doe",
-            "confidence": 0.95,
-            "start_offset": 10,
-            "end_offset": 18
-        })];
-        let entities = parse_ner_entities(&raw).unwrap();
-        assert_eq!(entities.len(), 1);
-        assert_eq!(entities[0].value, "John Doe");
-        assert_eq!(entities[0].entity_kind, EntityKind::PersonName);
-        let loc = entities[0].location.as_ref().unwrap().as_text().unwrap();
-        assert_eq!(loc.start_offset, 10);
-        assert_eq!(loc.end_offset, 18);
-    }
-
-    #[test]
-    fn parse_ner_entities_sets_element_id_none_by_default() {
-        let raw = vec![json!({
-            "category": "pii",
-            "entity_type": "email_address",
-            "value": "a@b.com",
-            "confidence": 0.9,
-            "start_offset": 0,
-            "end_offset": 7
-        })];
-        let entities = parse_ner_entities(&raw).unwrap();
-        let loc = entities[0].location.as_ref().unwrap().as_text().unwrap();
-        assert!(loc.element_id.is_none());
-    }
-
-    /// Mock NER backend that returns entities relative to the full text it receives.
-    struct MockNerBackend;
-
-    #[async_trait::async_trait]
-    impl NerBackend for MockNerBackend {
-        async fn detect_text(
-            &self,
-            text: &str,
-            _config: &NerConfig,
-        ) -> Result<Vec<Value>, Error> {
-            // Find "ENTITY" in the text and report its position.
-            let mut results = Vec::new();
-            if let Some(pos) = text.find("ENTITY") {
-                results.push(json!({
-                    "category": "pii",
-                    "entity_type": "person_name",
-                    "value": "ENTITY",
-                    "confidence": 0.95,
-                    "start_offset": pos,
-                    "end_offset": pos + 6
-                }));
-            }
-            Ok(results)
-        }
-
-        async fn detect_image(
-            &self,
-            _image_data: &[u8],
-            _mime_type: &str,
-            _config: &NerConfig,
-        ) -> Result<Vec<Value>, Error> {
-            Ok(Vec::new())
-        }
-    }
-
-    #[tokio::test]
-    async fn sliding_context_prepended_and_offsets_adjusted() {
-        let params = NerDetectionParams {
-            entity_kinds: vec![],
-            confidence_threshold: 0.0,
-            model_info: None,
-        };
-        let ner = NerDetection::new(MockNerBackend, params);
-
-        // First span: no entity, just context.
-        let span1 = vec![Span::new(TxtSpan(0), "some context text".into())];
-        let result1 = ner.detect(span1).await.unwrap();
-        assert!(result1.is_empty());
-
-        // Second span: entity in current span. Backend sees prior + current.
-        let span2 = vec![Span::new(TxtSpan(1), "has ENTITY here".into())];
-        let result2 = ner.detect(span2).await.unwrap();
-        assert_eq!(result2.len(), 1);
-
-        // Offsets should be adjusted to current span (relative).
-        let loc = result2[0].location.as_ref().unwrap().as_text().unwrap();
-        assert_eq!(loc.start_offset, 4); // "has " = 4 chars
-        assert_eq!(loc.end_offset, 10);  // "has ENTITY" = 10 chars
-        assert_eq!(loc.element_id.as_deref(), Some("1"));
-    }
-
-    #[tokio::test]
-    async fn element_id_set_from_span() {
-        let params = NerDetectionParams {
-            entity_kinds: vec![],
-            confidence_threshold: 0.0,
-            model_info: None,
-        };
-        let ner = NerDetection::new(MockNerBackend, params);
-
-        let spans = vec![Span::new(TxtSpan(42), "ENTITY".into())];
-        let entities = ner.detect(spans).await.unwrap();
-        assert_eq!(entities.len(), 1);
-        let loc = entities[0].location.as_ref().unwrap().as_text().unwrap();
-        assert_eq!(loc.element_id.as_deref(), Some("42"));
-    }
-
-    #[tokio::test]
-    async fn model_info_attached_when_provided() {
-        let model = ModelInfo {
-            name: "test-model".into(),
-            kind: crate::ModelKind::OpenSource,
-            version: "1.0".into(),
-        };
-        let params = NerDetectionParams {
-            entity_kinds: vec![],
-            confidence_threshold: 0.0,
-            model_info: Some(model.clone()),
-        };
-        let ner = NerDetection::new(MockNerBackend, params);
-
-        let spans = vec![Span::new(TxtSpan(0), "ENTITY".into())];
-        let entities = ner.detect(spans).await.unwrap();
-        assert_eq!(entities.len(), 1);
-        assert_eq!(entities[0].model.as_ref().unwrap().name, "test-model");
-    }
-
-    #[tokio::test]
-    async fn entities_in_prior_context_are_filtered_out() {
-        // Backend that always returns an entity at position 0..6.
-        struct AlwaysFirstBackend;
-
-        #[async_trait::async_trait]
-        impl NerBackend for AlwaysFirstBackend {
-            async fn detect_text(
-                &self,
-                _text: &str,
-                _config: &NerConfig,
-            ) -> Result<Vec<Value>, Error> {
-                Ok(vec![json!({
-                    "category": "pii",
-                    "entity_type": "person_name",
-                    "value": "ENTITY",
-                    "confidence": 0.95,
-                    "start_offset": 0,
-                    "end_offset": 6
-                })])
-            }
-
-            async fn detect_image(
-                &self,
-                _: &[u8], _: &str, _: &NerConfig,
-            ) -> Result<Vec<Value>, Error> {
-                Ok(Vec::new())
-            }
-        }
-
-        let params = NerDetectionParams {
-            entity_kinds: vec![],
-            confidence_threshold: 0.0,
-            model_info: None,
-        };
-        let ner = NerDetection::new(AlwaysFirstBackend, params);
-
-        // First span — entity at 0..6 in current span: should be included.
-        let result1 = ner.detect(vec![Span::new(TxtSpan(0), "ENTITY here".into())]).await.unwrap();
-        assert_eq!(result1.len(), 1);
-
-        // Second span — entity at 0..6 is now in the prior context, should be filtered.
-        let result2 = ner.detect(vec![Span::new(TxtSpan(1), "no entity".into())]).await.unwrap();
-        assert!(result2.is_empty());
-    }
-}
diff --git a/crates/nvisy-identify/src/prelude.rs b/crates/nvisy-identify/src/prelude.rs
index 3a6e834..aa4f302 100644
--- a/crates/nvisy-identify/src/prelude.rs
+++ b/crates/nvisy-identify/src/prelude.rs
@@ -7,5 +7,7 @@ pub use crate::{
     Annotation, AnnotationKind, AnnotationLabel, AnnotationScope,
     DetectionContext, ParallelContext, SequentialContext,
     DetectionLayer, DetectionService,
-    NerBackend, NerConfig,
+    NerMethod, NerMethodParams,
+    CvMethod,
+    PatternDetection, PatternDetectionParams,
 };