diff --git a/CMakeLists.txt b/CMakeLists.txt
index ce9ebbe5..54006c93 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -722,6 +722,7 @@ list(APPEND DAWN_SOURCES
     src/auth/auth_db_migrations_v64.c
     src/auth/auth_db_migrations_v65.c
     src/auth/auth_db_migrations_v66.c
+    src/auth/auth_db_migrations_v67.c
     src/auth/auth_db_statements.c
     # DB-layer tables depended on by the always-compiled memory / OTA / tool
     # subsystems (extraction, summaries, history, satellite OTA, image tools), so
@@ -783,6 +784,7 @@ if(ENABLE_AUTH)
         src/auth/admin_socket_memory.c
         src/auth/admin_socket_memory_entity.c
         src/auth/admin_socket_messaging.c
+        src/auth/admin_socket_music.c
         src/auth/admin_socket_ota.c
         src/auth/auth_db_session.c
         src/auth/auth_db_rate_limit.c
diff --git a/cmake/DawnTools.cmake b/cmake/DawnTools.cmake
index 45110a08..71c1b8c6 100644
--- a/cmake/DawnTools.cmake
+++ b/cmake/DawnTools.cmake
@@ -156,6 +156,14 @@ else()
     message(STATUS "DAWN: Memory tool DISABLED")
 endif()
 
+# Unified cross-source recall tool — aggregates the focus adapters
+# (memory/notes/documents/calendar) via focus_compose_ex.  Always compiled;
+# recall_is_available() gates at runtime on the embedding engine.
+list(APPEND TOOL_SOURCES
+    src/tools/recall_tool.c
+    src/tools/recall_format.c)
+message(STATUS "DAWN: Recall tool ENABLED")
+
 # DateTime Tools (date and time)
 if(DAWN_ENABLE_DATETIME_TOOL)
     add_definitions(-DDAWN_ENABLE_DATETIME_TOOL)
diff --git a/docs/LCM_DESIGN.md b/docs/LCM_DESIGN.md
index b8769d85..b8a899b0 100644
--- a/docs/LCM_DESIGN.md
+++ b/docs/LCM_DESIGN.md
@@ -94,7 +94,7 @@ Compaction is non-destructive: the summary embeds a structured `[COMPACTED conv=
 - **`conv_db_get_message_ids()`** — returns ordered array of message IDs for a conversation. Dynamic array with realloc growth.
 - **`conv_db_get_messages_by_range()`** — retrieves messages filtered by ID range with explicit ownership pre-check returning `AUTH_DB_FORBIDDEN` (not silent empty result like the JOIN-only approach).
 - **`context_expand` tool** — new modular tool registered via `cmake/DawnTools.cmake`. All params optional: use `start_id`/`end_id` for raw messages, or `node_id` alone for hierarchical summaries (Phase 4). Token budget hardcoded at 4000. Range cap at 500 messages.
-- **Continuation handling**: `conversation_id` from the `[COMPACTED]` tag points to the parent conversation. If omitted, the tool checks `continued_from` on the current conversation to find the parent.
+- **Continuation handling**: `conversation_id` from the `[COMPACTED]` tag points to the parent conversation. If omitted, the tool checks `continued_from` on the current conversation to find the parent. *(Legacy as of Phase 5: new conversations are no longer forked, so the marker's `conv=N` is the same conversation and the `continued_from` fallback only fires for pre-v67 chains.)*
 - **`note_len` buffer** increased to `strlen(summary) + 256` for the longer COMPACTED prefix.
 
 ### Files modified
@@ -134,7 +134,7 @@ CREATE TABLE summary_nodes (
 
 ### Key implementation details
 
-- **Node creation** in `llm_context_compact()` after message IDs are resolved and summary text is generated. Queries `summary_node_get_latest(conv_id)` for the prior node; if not found in current conversation, traverses `continued_from` chain to find prior nodes from parent conversations.
+- **Node creation** in `llm_context_compact()` after message IDs are resolved and summary text is generated. Queries `summary_node_get_latest(conv_id)` for the prior node; if not found in current conversation, traverses `continued_from` chain to find prior nodes from parent conversations. *(Phase 5: with single continuous conversations the prior node is always in the same conversation, so `summary_node_get_latest(conv_id)` hits directly and the `continued_from` traversal is legacy-only.)*
 - **CRUD functions**: `summary_node_create()`, `summary_node_get()`, `summary_node_get_latest()`, `summary_node_free()` — all ad-hoc queries (not prepared statements) since compaction is infrequent.
 - **`[COMPACTED]` tag** includes `node=Z depth=D` when node creation succeeds. Falls back to node-less format on DB failure (graceful degradation).
 - **`context_expand` node_id path**: when `node_id` is provided (no other params needed), retrieves the node and its prior node's summary. Returns both summaries with metadata (depth, level, message range, conversation ID). Buffer right-sized to `summary_len + prior_len + 512` (not fixed 16KB).
@@ -164,15 +164,47 @@ The model can drill down iteratively: expand node 2 → see both summaries → e
 
 ---
 
+## Phase 5: Compaction Watermark — Single Continuous Conversations (Shipped)
+
+Replaces fork-on-compaction. Previously, compaction archived the conversation (`is_archived=1`, read-only) and created a new continuation row (`continued_from` → parent) seeded with the summary — the fork was what bounded *reload* context. That produced a user-visible read-only hierarchy plus a "both-locked" bug (a duplicate `continue_conversation` archived the continuation child too, with no idempotency anywhere). Now compaction records an in-conversation **watermark** on the same row; the conversation stays single and always writable, and reload is bounded by the watermark instead of by the fork.
+
+### Schema (v67)
+
+- `conversations.context_watermark_msg_id INTEGER NOT NULL DEFAULT 0` — last compacted message id. `0` = never compacted (load all; the zero-risk gate so un-compacted conversations are byte-identical to pre-v67).
+- One-time migration unlocks legacy split-archived conversations (`UPDATE conversations SET is_archived=0 WHERE is_archived=1` — the continuation split was the only writer of that flag).
+
+### Key implementation details
+
+- **Persist** — `conv_db_set_compaction_watermark()`, called from `llm_context_compact()` right after `summary_node_create`. A single atomic UPDATE writes `compaction_summary` + `context_watermark_msg_id` on the same row, guarded `WHERE id=? AND user_id=? AND ? >= context_watermark_msg_id` so a stale async compaction is a harmless no-op (never rewinds). No archive, no continuation row. Skipped (never writes 0) when the message id is unresolved (voice path with no command-context user).
+- **Bounded restore** (gated on `watermark > 0`) — `conv_db_get_messages_after(conv_id, user_id, after_id, ...)` (full tool/reasoning columns, same ownership JOIN as `conv_db_get_messages`) loads only post-watermark messages; the summary is prepended. The WebUI **display** load stays full (the user sees the entire transcript); only the **LLM-context** restore is bounded. The same gate is applied inside the messaging forever-conversation loader (`memory_history_load_from_db`), so every reload path is bounded by one shared check.
+- **Marker re-injection across reloads** — the summary is injected as an **assistant** message carrying a reconstructed `[COMPACTED conv=N msgs=X-Y node=Z depth=D]` marker (built by `conv_db_format_compaction_context()` from the latest `summary_node`), so the LLM keeps a `context_expand` handle to the originals *after a reload*, not just in the live session. Assistant role (not system) is required: `session_update_system_messages` rebuilds the leading context into exactly two system messages every turn and drops any other system message — so a system-role summary was silently lost. This was a latent bug since the prompt-cache two-system-message refactor (reloaded continuation summaries were also being dropped); fixed here by matching the live marker's assistant role.
+- **Continuation machinery is now legacy.** `continued_from` is retained as a breadcrumb but is never written for new conversations; the Phase 3/4 `continued_from` chain-walk fires only for pre-v67 conversations. The old split path (`conv_db_create_continuation`, `handle_continue_conversation`, the `continue_conversation` WS message + the client's split trigger) is left **dormant** and logs a WARNING if invoked, pending removal after a production soak.
+
+### Files modified
+
+`include/auth/auth_db_internal.h`, `include/auth/auth_db.h`, `src/auth/auth_db_schema.c`, `src/auth/auth_db_migrations.c`, `src/auth/auth_db_migrations_v67.c` (new), `src/auth/auth_db_statements.c`, `src/auth/auth_db_conv.c`, `src/llm/llm_context.c`, `src/webui/webui_server.c`, `src/webui/webui_history.c`, `src/memory/memory_history_loader.c`, `www/js/ui/history.js`, `tests/test_auth_db.c`
+
+### Live test results (June 16, 2026)
+
+- conv 845: watermark set and advanced (17874 → 17891) across 8+ compactions, `is_archived=0`, no continuation child — one writable conversation throughout.
+- Bounded reload restored 46 post-watermark messages + the summary; the full 94-message transcript still displayed in the UI.
+- After reload, the `[COMPACTED conv=845 msgs=17834-17891 node=N depth=N]` marker was visible to the LLM (assistant role survived the per-turn rebuild); the model called `context_expand` on its own — `expanding msgs 17834-17891 from conv 845` — and retrieved the verbatim originals (recalled the user's actual first message).
+- All previously-archived conversations unlocked (0 archived post-migration).
+- Build clean, 88/88 CI, +2 unit tests (monotonic guard, bounded fetch); five-agent review applied.
+
+---
+
 ## Implementation Order
 
 ```
 Phase 1 (escalation) ──┐
-                        ├──> Phase 3 (lossless pointers) ──> Phase 4 (DAG)
+                        ├──> Phase 3 (lossless pointers) ──> Phase 4 (DAG) ──> Phase 5 (watermark)
 Phase 2 (async)  ───────┘
-         ✓ shipped       ✓ shipped                    ✓ shipped
+         ✓ shipped       ✓ shipped                    ✓ shipped        ✓ shipped
 ```
 
+Phase 5 (June 2026) retired fork-on-compaction in favor of the in-conversation watermark — conversations stay single and writable; the continuation/`continued_from` model from Phases 3–4 is now legacy (dormant, fires only for pre-v67 conversations).
+
 ---
 
 ## Follow-up Optimizations
diff --git a/include/auth/admin_socket_internal.h b/include/auth/admin_socket_internal.h
index 11c87a37..3d2ddde6 100644
--- a/include/auth/admin_socket_internal.h
+++ b/include/auth/admin_socket_internal.h
@@ -102,6 +102,13 @@ int handle_ota_push_all_cmd(int client_fd, const char *payload, uint16_t payload
 int handle_ota_rollout_status_cmd(int client_fd);
 int handle_ota_rollout_abort_cmd(int client_fd);
 
+/* Music-DB handlers (admin_socket_music.c).  Dispatched from handle_client()
+ * in admin_socket.c against ADMIN_MSG_MUSIC_* opcodes. */
+int admin_handle_music_stats(int client_fd);
+int admin_handle_music_search(int client_fd, const char *payload, uint16_t len);
+int admin_handle_music_list(int client_fd, const char *payload, uint16_t len);
+int admin_handle_music_rescan(int client_fd);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/auth/auth_db.h b/include/auth/auth_db.h
index 3ac3be0b..d0e2face 100644
--- a/include/auth/auth_db.h
+++ b/include/auth/auth_db.h
@@ -1036,10 +1036,11 @@ typedef struct {
    time_t updated_at;
    int message_count;
    bool is_archived;
-   int context_tokens;       /**< Last known context token count */
-   int context_max;          /**< Context window size */
-   int64_t continued_from;   /**< Parent conversation ID (0 = none) */
-   char *compaction_summary; /**< Summary from parent (NULL if not a continuation) */
+   int context_tokens;               /**< Last known context token count */
+   int context_max;                  /**< Context window size */
+   int64_t continued_from;           /**< Parent conversation ID (0 = none) */
+   char *compaction_summary;         /**< Summary from parent (NULL if not a continuation) */
+   int64_t context_watermark_msg_id; /**< v67: last compacted msg id; 0 = none (load all) */
    /* Per-conversation LLM settings (v11) - empty string means use defaults */
    char llm_type[16];         /**< "local" or "cloud" */
    char cloud_provider[16];   /**< "openai" or "claude" */
@@ -1351,6 +1352,44 @@ int conv_db_set_title_locked(int64_t conv_id, int user_id, int locked);
  */
 int conv_db_update_context(int64_t conv_id, int user_id, int context_tokens, int context_max);
 
+/**
+ * @brief Persist a compaction watermark + summary on a conversation (v67).
+ *
+ * Replaces fork-on-compaction: records @watermark_msg_id (the last compacted
+ * message id) and @summary on the SAME conversation row. Reload then bounds
+ * context to messages with id > watermark + the summary. Single atomic UPDATE
+ * with a monotonic guard (a stale watermark <= the stored one is a no-op).
+ *
+ * @param conv_id Conversation id (> 0).
+ * @param user_id Owner id (ownership-checked in the UPDATE).
+ * @param summary Latest compaction summary (may be NULL).
+ * @param watermark_msg_id Last compacted message id (> 0; <= 0 returns AUTH_DB_INVALID).
+ * @return AUTH_DB_SUCCESS (incl. benign no-op), AUTH_DB_INVALID, or AUTH_DB_FAILURE.
+ */
+int conv_db_set_compaction_watermark(int64_t conv_id,
+                                     int user_id,
+                                     const char *summary,
+                                     int64_t watermark_msg_id);
+
+/**
+ * @brief Format the reload context line for a (watermarked) conversation.
+ *
+ * Writes a `[COMPACTED conv=N msgs=X-Y node=Z depth=D] Previous conversation
+ * context (summarized): <summary>` marker into @out when a summary node exists
+ * (so a reloaded LLM keeps a context_expand handle to the compacted originals),
+ * else a plain summary line. @out is always NUL-terminated. Empty @summary
+ * yields an empty string.
+ *
+ * @param conv_id Conversation id (for summary-node lookup + the marker).
+ * @param summary The conversation's compaction_summary text (may be NULL).
+ * @param out Output buffer.
+ * @param out_len Size of @out.
+ */
+void conv_db_format_compaction_context(int64_t conv_id,
+                                       const char *summary,
+                                       char *out,
+                                       size_t out_len);
+
 /**
  * @brief Lock LLM settings for a conversation
  *
@@ -1505,6 +1544,26 @@ int conv_db_add_message_with_tools(int64_t conv_id,
  */
 int conv_db_get_messages(int64_t conv_id, int user_id, message_callback_t callback, void *ctx);
 
+/**
+ * @brief Like conv_db_get_messages but only messages with id > @after_id.
+ *
+ * The compaction-watermark restore path (v67): load only post-watermark messages
+ * into the LLM context. Same full column set / ownership check / chronological
+ * order as conv_db_get_messages. @after_id = 0 returns all messages.
+ *
+ * @param conv_id Conversation ID
+ * @param user_id User ID (for authorization check)
+ * @param after_id Exclusive lower bound on message id (0 = all)
+ * @param callback Function called for each message
+ * @param ctx User-provided context passed to callback
+ * @return AUTH_DB_SUCCESS, AUTH_DB_INVALID, or AUTH_DB_FAILURE
+ */
+int conv_db_get_messages_after(int64_t conv_id,
+                               int user_id,
+                               int64_t after_id,
+                               message_callback_t callback,
+                               void *ctx);
+
 /**
  * @brief Get messages for a conversation (admin only, no ownership check)
  *
diff --git a/include/auth/auth_db_internal.h b/include/auth/auth_db_internal.h
index 3979c9af..8bb383bb 100644
--- a/include/auth/auth_db_internal.h
+++ b/include/auth/auth_db_internal.h
@@ -56,7 +56,7 @@
  * DAWN_ENABLE_MCP_BRIDGE_TOOL / DAWN_ENABLE_CODE_PROJECTS. Gating them on a
  * feature flag would fork the schema timeline across binaries; do not do it.
  * (arch-A2) */
-#define AUTH_DB_SCHEMA_VERSION 66
+#define AUTH_DB_SCHEMA_VERSION 67
 
 /* Retention periods */
 #define LOGIN_ATTEMPT_RETENTION_SEC (7 * 24 * 60 * 60) /* 7 days */
@@ -129,9 +129,11 @@ typedef struct {
    sqlite3_stmt *stmt_conv_count;
    sqlite3_stmt *stmt_msg_add;
    sqlite3_stmt *stmt_msg_get;
+   sqlite3_stmt *stmt_msg_get_after;
    sqlite3_stmt *stmt_msg_get_admin;
    sqlite3_stmt *stmt_conv_update_meta;
    sqlite3_stmt *stmt_conv_update_context;
+   sqlite3_stmt *stmt_conv_set_watermark;
    sqlite3_stmt *stmt_conv_create_origin;
    sqlite3_stmt *stmt_conv_reassign;
 
@@ -504,6 +506,14 @@ int auth_db_migrations_v65(sqlite3 *db);
  */
 int auth_db_migrations_v66(sqlite3 *db);
 
+/**
+ * @brief v67 migration: add conversations.context_watermark_msg_id (compaction
+ *        watermark, replacing fork-on-compaction) and one-time unlock of legacy
+ *        split-archived conversations. Idempotent (probes PRAGMA table_info).
+ * @return AUTH_DB_SUCCESS or AUTH_DB_FAILURE.
+ */
+int auth_db_migrations_v67(sqlite3 *db);
+
 /**
  * @brief Prepare every cached sqlite3_stmt* in s_db.
  *
diff --git a/include/config/dawn_config.h b/include/config/dawn_config.h
index 4191ce9f..0d5fd46f 100644
--- a/include/config/dawn_config.h
+++ b/include/config/dawn_config.h
@@ -572,6 +572,22 @@ typedef struct {
    } dominant_token_heuristic;
 } focus_injection_config_t;
 
+/* Unified cross-source `recall` tool (docs/CROSS_TOOL_RECALL_DESIGN.md).
+ * Deep on-demand gather across all focus sources at a budget LARGER than the
+ * per-turn injection, kept in a SEPARATE block so tuning the deep path never
+ * disturbs the tuned per-turn `focus_injection` values.  recall_tool copies
+ * top_k/min_score/budget_bytes into a focus_limits_t via DESIGNATED initializers
+ * (mapping is by-name, not positional — field order here need not match
+ * focus_limits_t); per_source_max rides focus_compose_ex's existing param. */
+typedef struct {
+   int top_k;          /* Max candidates retained after ranking (deep gather) */
+   int budget_bytes;   /* Byte cap on the assembled recall result text         */
+   float min_score;    /* Floor — lower than per-turn so weaker hits surface    */
+   int per_source_max; /* Per-adapter fan-out cap before ranking.
+                        * INVARIANT: per_source_max * MAX_FOCUS_SOURCES <= 256
+                        * or the dominant-token heuristic self-disables. */
+} recall_config_t;
+
 typedef struct {
    bool enabled;                         /* Enable memory system */
    int context_budget_tokens;            /* Max tokens for memory context (~800) */
@@ -802,6 +818,10 @@ typedef struct {
     * `docs/DYNAMIC_CONTEXT_INJECTION_DESIGN.md` §"Phase 1 — Per-Turn Focus". */
    focus_injection_config_t focus_injection;
 
+   /* Unified cross-source `recall` tool — deep gather at a larger budget than
+    * the per-turn focus injection above.  See recall_config_t. */
+   recall_config_t recall;
+
    /* Phase 2 entity-merge auto-merge gate.  Fires after each extraction
     * completes, evaluates was_created entities against existing canonicals
     * via the resolver cascade, and routes by composite score:
diff --git a/include/core/focus/focus_candidate_helpers.h b/include/core/focus/focus_candidate_helpers.h
index 24f86067..38a5eed0 100644
--- a/include/core/focus/focus_candidate_helpers.h
+++ b/include/core/focus/focus_candidate_helpers.h
@@ -74,6 +74,20 @@ extern "C" {
  * document_chunk / calendar_occ variants all fit within 64. */
 #define FOCUS_ITEM_ID_BUFLEN 64
 
+/**
+ * @brief Largest byte length <= @p max_bytes that does not split a UTF-8
+ *        character in @p text.
+ *
+ * Truncating multi-byte UTF-8 text at a raw byte offset can leave a partial
+ * character at the end, which is invalid UTF-8 and breaks any consumer that
+ * requires valid UTF-8 (notably WebSocket text frames — the browser rejects the
+ * frame and drops the connection). This returns a cut length that lands on a
+ * character boundary: `strlen(text)` when it already fits, otherwise @p
+ * max_bytes backed up past any trailing UTF-8 continuation bytes. Returns 0 on
+ * NULL @p text.
+ */
+size_t focus_utf8_safe_cap(const char *text, size_t max_bytes);
+
 /**
  * @brief Free `text` and `item_id` on a single candidate (failure path).
  *
diff --git a/include/core/focus/focus_source.h b/include/core/focus/focus_source.h
index ef0117b7..7ee079dd 100644
--- a/include/core/focus/focus_source.h
+++ b/include/core/focus/focus_source.h
@@ -271,6 +271,23 @@ typedef struct focus_compose_result_s {
    int rejection_count;
 } focus_compose_result_t;
 
+/* =============================================================================
+ * Per-call trim-limit overrides (focus_compose_ex)
+ *
+ * The per-turn focus path (build_focus_block) uses the config-driven trim limits
+ * in `g_config.memory.focus_injection`.  The unified `recall` tool needs a
+ * larger, separate budget for a deep cross-source gather.  `focus_limits_t`
+ * overrides ONLY the three trim values; a NULL pointer or a zero/negative field
+ * falls back to the config value for that field.  Ranking WEIGHTS are NOT
+ * overridable here — they stay config-sourced and shared, so recall changes how
+ * MUCH is kept, not HOW it is ranked.
+ * ============================================================================= */
+typedef struct {
+   int top_k;        /* > 0 overrides focus_injection.top_k              */
+   float min_score;  /* >= 0 overrides focus_injection.min_score         */
+   int budget_bytes; /* > 0 overrides focus_injection.focus_budget_bytes */
+} focus_limits_t;
+
 /* =============================================================================
  * Public API
  * ============================================================================= */
@@ -348,6 +365,32 @@ int focus_compose(int user_id,
                   int per_source_max_candidates,
                   focus_compose_result_t *out_result);
 
+/**
+ * @brief Like focus_compose(), but with per-call trim-limit overrides.
+ *
+ * Identical pipeline to focus_compose(); the only difference is steps 3-5 (the
+ * min_score / top_k / byte-budget trim) consult `limits` first, falling back to
+ * `g_config.memory.focus_injection` per field when `limits` is NULL or a field
+ * is zero/negative.  Ranking (step 2) is unchanged — weights are always config.
+ *
+ * The unified `recall` tool uses this for a deep cross-source gather at a budget
+ * larger than the per-turn injection.  focus_compose() is a thin wrapper passing
+ * `limits = NULL`.
+ *
+ * @param limits Per-call trim overrides, or NULL to use config for all three.
+ * @see focus_compose for all other parameter semantics.
+ * @return SUCCESS on success, FAILURE on error (same contract as focus_compose).
+ */
+int focus_compose_ex(int user_id,
+                     bool include_private,
+                     const char *query_text,
+                     const float *query_embedding,
+                     size_t embed_dim,
+                     time_t now,
+                     int per_source_max_candidates,
+                     const focus_limits_t *limits,
+                     focus_compose_result_t *out_result);
+
 /**
  * @brief Release everything `focus_compose()` allocated into `result`.
  *
diff --git a/include/tools/mcp_bridge.h b/include/tools/mcp_bridge.h
index b8874c91..1102b336 100644
--- a/include/tools/mcp_bridge.h
+++ b/include/tools/mcp_bridge.h
@@ -125,4 +125,25 @@ int mcp_bridge_call_tool(const char *server_alias,
  */
 int mcp_bridge_server_connected(const char *server_alias);
 
+/**
+ * @brief Ensure an upstream server is connected, reconnecting it if it wasn't
+ *        ready at startup.
+ *
+ * Active counterpart to @ref mcp_bridge_server_connected: callers that gate a
+ * code path on a server being usable (e.g. the code-graph provider's
+ * availability check) should use this so a server that came up after DAWN
+ * connects self-heals on first use instead of staying unavailable until a
+ * daemon restart. Blocks for the connect handshake; call off the main loop.
+ *
+ * Restores the connection only — it does NOT register the server's LLM-facing
+ * tools (those are registered at startup against the still-unlocked registry; a
+ * server first reached after init exposes its tools on the next restart). This
+ * is sufficient for direct programmatic callers such as @ref mcp_bridge_call_tool.
+ *
+ * @param server_alias Configured alias of the upstream server to connect.
+ * @return SUCCESS if the alias is connected (already or after reconnect);
+ *         FAILURE if it is not configured or the reconnect failed.
+ */
+int mcp_bridge_ensure_connected(const char *server_alias);
+
 #endif /* MCP_BRIDGE_H */
diff --git a/include/tools/recall_format.h b/include/tools/recall_format.h
new file mode 100644
index 00000000..0b7f843d
--- /dev/null
+++ b/include/tools/recall_format.h
@@ -0,0 +1,65 @@
+/*
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * By contributing to this project, you agree to license your contributions
+ * under the GPLv3 (or any later version) or any future licenses chosen by
+ * the project author(s).
+ *
+ * Recall tool result formatter — renders a focus_compose result into a
+ * grouped, source-tagged "here's what we know" block with per-item
+ * read-pointers.  Split into its own translation unit so it is unit-testable
+ * without the daemon and so the grouping/pointer logic can grow (Phase 2
+ * scope filter, entity/relation pointers) without bloating recall_tool.c.
+ */
+
+#ifndef TOOLS_RECALL_FORMAT_H
+#define TOOLS_RECALL_FORMAT_H
+
+#include "core/focus/focus_source.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @brief Render a composed cross-source recall result for the LLM.
+ *
+ * Groups `result->candidates` (already ranked descending) by source family —
+ * memory facts/relationships, conversation summaries, notes & documents,
+ * calendar — and renders each as a bulleted line carrying a pointer to where
+ * the exact/full text lives (document_read "<label>", memory get <id>, etc.).
+ *
+ * Dedup-vs-injection (design §4.2a): a candidate whose `item_id` appears in
+ * `injected_ids` is marked "already in current context" rather than dropped,
+ * so the LLM still sees the linkage but the byte budget favours fresh hits.
+ * Pass `injected_ids = NULL` (and `injected_count = 0`) when the caller cannot
+ * supply the turn's injected set — the formatter then appends a brief overlap
+ * note instead (the v1 fallback).
+ *
+ * @param query          The user's recall query (echoed in the header).
+ * @param result         Composed focus result (candidates + breakdowns).
+ * @param injected_ids   Item-ids already injected this turn, or NULL.
+ * @param injected_count Length of injected_ids (0 when NULL).
+ * @return Newly-allocated string the caller owns and frees, or NULL on OOM.
+ */
+char *recall_format_result(const char *query,
+                           const focus_compose_result_t *result,
+                           const char *const *injected_ids,
+                           int injected_count);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* TOOLS_RECALL_FORMAT_H */
diff --git a/include/tools/recall_tool.h b/include/tools/recall_tool.h
new file mode 100644
index 00000000..5c262d0d
--- /dev/null
+++ b/include/tools/recall_tool.h
@@ -0,0 +1,43 @@
+/*
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * By contributing to this project, you agree to license your contributions
+ * under the GPLv3 (or any later version) or any future licenses chosen by
+ * the project author(s).
+ *
+ * Unified cross-source recall tool — one high-level "gather everything known
+ * about X" call that fans out across all focus sources (memory facts/entities/
+ * relations/summaries, notes, documents, calendar) via the focus engine at a
+ * larger budget than the per-turn injection.  See
+ * docs/CROSS_TOOL_RECALL_DESIGN.md.
+ */
+
+#ifndef TOOLS_RECALL_TOOL_H
+#define TOOLS_RECALL_TOOL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @brief Register the `recall` tool with the tool registry.
+ * @return SUCCESS or FAILURE (per tool_registry_register).
+ */
+int recall_tool_register(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* TOOLS_RECALL_TOOL_H */
diff --git a/include/tools/tool_registry.h b/include/tools/tool_registry.h
index 531b0393..1914fe55 100644
--- a/include/tools/tool_registry.h
+++ b/include/tools/tool_registry.h
@@ -46,11 +46,14 @@ extern "C" {
  * Constants
  * ============================================================================= */
 
-#define TOOL_MAX_REGISTERED 64        /* Max tools in registry */
-#define TOOL_NAME_MAX 64              /* Max length of tool name */
-#define TOOL_DESC_MAX 512             /* Max length of description */
-#define TOOL_TOPIC_MAX 32             /* Max length of MQTT topic */
-#define TOOL_PARAM_MAX 12             /* Max parameters per tool */
+#define TOOL_MAX_REGISTERED 64 /* Max tools in registry */
+#define TOOL_NAME_MAX 64       /* Max length of tool name */
+#define TOOL_DESC_MAX 512      /* Max length of description */
+#define TOOL_TOPIC_MAX 32      /* Max length of MQTT topic */
+/* Max parameters per tool. No array is sized by this — it's a validation/hardening
+ * cap (also the MCP bridge's property limit). 20 admits real MCP tools like cbm's
+ * search_graph (14 params) while still bounding an untrusted upstream schema. */
+#define TOOL_PARAM_MAX 20
 #define TOOL_PARAM_ENUM_MAX 16        /* Max enum values per parameter */
 #define TOOL_ALIAS_MAX 8              /* Max aliases per tool */
 #define TOOL_DEVICE_MAP_MAX 8         /* Max device map entries for meta-tools */
diff --git a/services/dawn-server/dawn-server.service b/services/dawn-server/dawn-server.service
index eec69f77..59fe57a7 100644
--- a/services/dawn-server/dawn-server.service
+++ b/services/dawn-server/dawn-server.service
@@ -1,8 +1,15 @@
 [Unit]
 Description=DAWN Voice Assistant Server
 Documentation=https://github.com/The-OASIS-Project/dawn
-After=network-online.target mosquitto.service
+After=network-online.target mosquitto.service cbm-mcp.service
 Wants=network-online.target
+# Weak ordering after the code-graph MCP server (coding-harness builds only).
+# Wants= (not Requires=) keeps DAWN starting when cbm-mcp is absent/disabled; if
+# cbm-mcp.service is not installed these are simply ignored. NOTE: cbm-mcp is
+# Type=simple, so After= only guarantees it has *started*, not that its stdio
+# child is *ready* — it shrinks the boot race window but does not close it. The
+# MCP bridge's lazy reconnect + handshake retry is the actual fix.
+Wants=cbm-mcp.service
 
 [Service]
 Type=simple
diff --git a/src/auth/admin_socket.c b/src/auth/admin_socket.c
index c2058885..96e9d2a6 100644
--- a/src/auth/admin_socket.c
+++ b/src/auth/admin_socket.c
@@ -2100,137 +2100,6 @@ static int handle_delete_conversation(int client_fd, const char *payload, uint16
    return send_response(client_fd, ADMIN_RESP_SUCCESS);
 }
 
-/* =============================================================================
- * Music Database Handlers
- * =============================================================================
- */
-
-static int handle_music_stats(int client_fd) {
-   if (!music_db_is_initialized()) {
-      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR,
-                                "Music database not initialized");
-   }
-
-   music_db_stats_t stats;
-   if (music_db_get_stats(&stats) != 0) {
-      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR, "Failed to get music stats");
-   }
-
-   bool scanner_running = music_scanner_is_running();
-   bool initial_complete = music_scanner_initial_scan_complete();
-
-   char response[ADMIN_MSG_CONTENT_MAX];
-   snprintf(response, sizeof(response),
-            "Music Database Statistics\n"
-            "-------------------------\n"
-            "Tracks:  %d\n"
-            "Artists: %d\n"
-            "Albums:  %d\n"
-            "Scanner: %s\n"
-            "Status:  %s",
-            stats.track_count, stats.artist_count, stats.album_count,
-            scanner_running ? "running" : "stopped", initial_complete ? "ready" : "indexing");
-
-   return send_text_response(client_fd, ADMIN_RESP_SUCCESS, response);
-}
-
-static int handle_music_search(int client_fd, const char *payload, uint16_t len) {
-   if (len == 0 || len > 200) {
-      return send_text_response(client_fd, ADMIN_RESP_FAILURE, "Invalid search query");
-   }
-
-   if (!music_db_is_initialized()) {
-      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR,
-                                "Music database not initialized");
-   }
-
-   /* Allocate results on heap */
-   music_search_result_t *results = malloc(50 * sizeof(music_search_result_t));
-   if (!results) {
-      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR, "Memory allocation failed");
-   }
-
-   int count = 0;
-   if (music_db_search(payload, results, 50, &count) != SUCCESS) {
-      free(results);
-      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR, "Search failed");
-   }
-
-   if (count == 0) {
-      free(results);
-      return send_text_response(client_fd, ADMIN_RESP_SUCCESS, "No results found");
-   }
-
-   /* Build response */
-   char response[ADMIN_MSG_CONTENT_MAX];
-   int offset = snprintf(response, sizeof(response), "Found %d result(s):\n", count);
-
-   for (int i = 0; i < count && offset < (int)sizeof(response) - 100; i++) {
-      offset += snprintf(response + offset, sizeof(response) - offset, "%d. %s\n", i + 1,
-                         results[i].display_name);
-   }
-
-   free(results);
-   return send_text_response(client_fd, ADMIN_RESP_SUCCESS, response);
-}
-
-static int handle_music_list(int client_fd, const char *payload, uint16_t len) {
-   if (!music_db_is_initialized()) {
-      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR,
-                                "Music database not initialized");
-   }
-
-   /* Parse limit from payload (0 or empty = all tracks) */
-   int limit = 0;
-   if (len > 0) {
-      limit = atoi(payload);
-   }
-   /* 0 means show all, cap at reasonable max for response size */
-   if (limit <= 0) {
-      limit = 1000;
-   } else if (limit > 1000) {
-      limit = 1000;
-   }
-
-   /* List all tracks (no pattern filtering) */
-   music_search_result_t *results = malloc(limit * sizeof(music_search_result_t));
-   if (!results) {
-      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR, "Memory allocation failed");
-   }
-
-   int count = 0;
-   if (music_db_list(results, limit, &count) != SUCCESS) {
-      free(results);
-      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR, "List failed");
-   }
-
-   if (count == 0) {
-      free(results);
-      return send_text_response(client_fd, ADMIN_RESP_SUCCESS, "No tracks in database");
-   }
-
-   /* Build response */
-   char response[ADMIN_MSG_CONTENT_MAX];
-   int offset = snprintf(response, sizeof(response), "Showing %d track(s):\n", count);
-
-   for (int i = 0; i < count && offset < (int)sizeof(response) - 100; i++) {
-      offset += snprintf(response + offset, sizeof(response) - offset, "%d. %s\n", i + 1,
-                         results[i].display_name);
-   }
-
-   free(results);
-   return send_text_response(client_fd, ADMIN_RESP_SUCCESS, response);
-}
-
-static int handle_music_rescan(int client_fd) {
-   if (!music_scanner_is_running()) {
-      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR, "Music scanner not running");
-   }
-
-   music_scanner_trigger_rescan();
-   return send_text_response(client_fd, ADMIN_RESP_SUCCESS, "Rescan triggered");
-}
-
 /* =============================================================================
  * Client Handler
  * =============================================================================
@@ -2355,16 +2224,16 @@ static int handle_client(int client_fd) {
 
       /* Phase 5: Music Database */
       case ADMIN_MSG_MUSIC_STATS:
-         return handle_music_stats(client_fd);
+         return admin_handle_music_stats(client_fd);
 
       case ADMIN_MSG_MUSIC_SEARCH:
-         return handle_music_search(client_fd, payload, header.payload_len);
+         return admin_handle_music_search(client_fd, payload, header.payload_len);
 
       case ADMIN_MSG_MUSIC_LIST:
-         return handle_music_list(client_fd, payload, header.payload_len);
+         return admin_handle_music_list(client_fd, payload, header.payload_len);
 
       case ADMIN_MSG_MUSIC_RESCAN:
-         return handle_music_rescan(client_fd);
+         return admin_handle_music_rescan(client_fd);
 
       /* Phase 6: Memory */
       case ADMIN_MSG_MEMORY_RECATEGORIZE:
diff --git a/src/auth/admin_socket_music.c b/src/auth/admin_socket_music.c
new file mode 100644
index 00000000..fcd717ab
--- /dev/null
+++ b/src/auth/admin_socket_music.c
@@ -0,0 +1,160 @@
+/*
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * By contributing to this project, you agree to license your contributions
+ * under the GPLv3 (or any later version) or any future licenses chosen by
+ * the project author(s).
+ *
+ * Music-database admin handlers (stats/search/list/rescan) for the dawn-admin
+ * CLI.  Extracted from admin_socket.c; dispatched from handle_client() against
+ * the ADMIN_MSG_MUSIC_* opcodes.
+ */
+
+#define ADMIN_SOCKET_INTERNAL_ALLOWED
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "audio/music_db.h"
+#include "audio/music_scanner.h"
+#include "auth/admin_socket_internal.h"
+#include "dawn_error.h"
+
+int admin_handle_music_stats(int client_fd) {
+   if (!music_db_is_initialized()) {
+      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR,
+                                "Music database not initialized");
+   }
+
+   music_db_stats_t stats;
+   if (music_db_get_stats(&stats) != 0) {
+      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR, "Failed to get music stats");
+   }
+
+   bool scanner_running = music_scanner_is_running();
+   bool initial_complete = music_scanner_initial_scan_complete();
+
+   char response[ADMIN_MSG_CONTENT_MAX];
+   snprintf(response, sizeof(response),
+            "Music Database Statistics\n"
+            "-------------------------\n"
+            "Tracks:  %d\n"
+            "Artists: %d\n"
+            "Albums:  %d\n"
+            "Scanner: %s\n"
+            "Status:  %s",
+            stats.track_count, stats.artist_count, stats.album_count,
+            scanner_running ? "running" : "stopped", initial_complete ? "ready" : "indexing");
+
+   return send_text_response(client_fd, ADMIN_RESP_SUCCESS, response);
+}
+
+int admin_handle_music_search(int client_fd, const char *payload, uint16_t len) {
+   if (len == 0 || len > 200) {
+      return send_text_response(client_fd, ADMIN_RESP_FAILURE, "Invalid search query");
+   }
+
+   if (!music_db_is_initialized()) {
+      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR,
+                                "Music database not initialized");
+   }
+
+   /* Allocate results on heap */
+   music_search_result_t *results = malloc(50 * sizeof(music_search_result_t));
+   if (!results) {
+      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR, "Memory allocation failed");
+   }
+
+   int count = 0;
+   if (music_db_search(payload, results, 50, &count) != SUCCESS) {
+      free(results);
+      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR, "Search failed");
+   }
+
+   if (count == 0) {
+      free(results);
+      return send_text_response(client_fd, ADMIN_RESP_SUCCESS, "No results found");
+   }
+
+   /* Build response */
+   char response[ADMIN_MSG_CONTENT_MAX];
+   int offset = snprintf(response, sizeof(response), "Found %d result(s):\n", count);
+
+   for (int i = 0; i < count && offset < (int)sizeof(response) - 100; i++) {
+      offset += snprintf(response + offset, sizeof(response) - offset, "%d. %s\n", i + 1,
+                         results[i].display_name);
+   }
+
+   free(results);
+   return send_text_response(client_fd, ADMIN_RESP_SUCCESS, response);
+}
+
+int admin_handle_music_list(int client_fd, const char *payload, uint16_t len) {
+   if (!music_db_is_initialized()) {
+      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR,
+                                "Music database not initialized");
+   }
+
+   /* Parse limit from payload (0 or empty = all tracks) */
+   int limit = 0;
+   if (len > 0) {
+      limit = atoi(payload);
+   }
+   /* 0 means show all, cap at reasonable max for response size */
+   if (limit <= 0) {
+      limit = 1000;
+   } else if (limit > 1000) {
+      limit = 1000;
+   }
+
+   /* List all tracks (no pattern filtering) */
+   music_search_result_t *results = malloc(limit * sizeof(music_search_result_t));
+   if (!results) {
+      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR, "Memory allocation failed");
+   }
+
+   int count = 0;
+   if (music_db_list(results, limit, &count) != SUCCESS) {
+      free(results);
+      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR, "List failed");
+   }
+
+   if (count == 0) {
+      free(results);
+      return send_text_response(client_fd, ADMIN_RESP_SUCCESS, "No tracks in database");
+   }
+
+   /* Build response */
+   char response[ADMIN_MSG_CONTENT_MAX];
+   int offset = snprintf(response, sizeof(response), "Showing %d track(s):\n", count);
+
+   for (int i = 0; i < count && offset < (int)sizeof(response) - 100; i++) {
+      offset += snprintf(response + offset, sizeof(response) - offset, "%d. %s\n", i + 1,
+                         results[i].display_name);
+   }
+
+   free(results);
+   return send_text_response(client_fd, ADMIN_RESP_SUCCESS, response);
+}
+
+int admin_handle_music_rescan(int client_fd) {
+   if (!music_scanner_is_running()) {
+      return send_text_response(client_fd, ADMIN_RESP_SERVICE_ERROR, "Music scanner not running");
+   }
+
+   music_scanner_trigger_rescan();
+   return send_text_response(client_fd, ADMIN_RESP_SUCCESS, "Rescan triggered");
+}
diff --git a/src/auth/auth_db_conv.c b/src/auth/auth_db_conv.c
index 4ebfe0cc..34506703 100644
--- a/src/auth/auth_db_conv.c
+++ b/src/auth/auth_db_conv.c
@@ -350,6 +350,9 @@ int conv_db_get(int64_t conv_id, int user_id, conversation_t *conv_out) {
       conv_out->reasoning_effort[sizeof(conv_out->reasoning_effort) - 1] = '\0';
    }
 
+   /* Compaction watermark (schema v67+) */
+   conv_out->context_watermark_msg_id = sqlite3_column_int64(s_db.stmt_conv_get, 19);
+
    sqlite3_reset(s_db.stmt_conv_get);
    AUTH_DB_UNLOCK();
 
@@ -1161,6 +1164,75 @@ int conv_db_update_context(int64_t conv_id, int user_id, int context_tokens, int
    return (changes > 0) ? AUTH_DB_SUCCESS : AUTH_DB_NOT_FOUND;
 }
 
+int conv_db_set_compaction_watermark(int64_t conv_id,
+                                     int user_id,
+                                     const char *summary,
+                                     int64_t watermark_msg_id) {
+   if (conv_id <= 0 || watermark_msg_id <= 0) {
+      return AUTH_DB_INVALID;
+   }
+
+   AUTH_DB_LOCK_OR_FAIL();
+
+   /* Single atomic monotonic UPDATE on the same conversation row — no archive,
+    * no continuation.  The WHERE guard `? >= context_watermark_msg_id` makes a
+    * stale async compaction a harmless no-op rather than a watermark rewind. */
+   sqlite3_reset(s_db.stmt_conv_set_watermark);
+   if (summary) {
+      sqlite3_bind_text(s_db.stmt_conv_set_watermark, 1, summary, -1, SQLITE_TRANSIENT);
+   } else {
+      sqlite3_bind_null(s_db.stmt_conv_set_watermark, 1);
+   }
+   sqlite3_bind_int64(s_db.stmt_conv_set_watermark, 2, watermark_msg_id);
+   sqlite3_bind_int64(s_db.stmt_conv_set_watermark, 3, conv_id);
+   sqlite3_bind_int(s_db.stmt_conv_set_watermark, 4, user_id);
+   sqlite3_bind_int64(s_db.stmt_conv_set_watermark, 5, watermark_msg_id);
+
+   int rc = sqlite3_step(s_db.stmt_conv_set_watermark);
+   sqlite3_reset(s_db.stmt_conv_set_watermark);
+
+   if (rc != SQLITE_DONE) {
+      AUTH_DB_UNLOCK();
+      return AUTH_DB_FAILURE;
+   }
+
+   /* 0 changes = not found / wrong owner / stale (guard rejected the rewind).
+    * Stale is benign, so treat 0 changes as success for the caller's purposes. */
+   AUTH_DB_UNLOCK();
+   return AUTH_DB_SUCCESS;
+}
+
+void conv_db_format_compaction_context(int64_t conv_id,
+                                       const char *summary,
+                                       char *out,
+                                       size_t out_len) {
+   if (!out || out_len == 0) {
+      return;
+   }
+   out[0] = '\0';
+   if (!summary || !summary[0]) {
+      return;
+   }
+
+   /* Reconstruct a [COMPACTED conv=N msgs=X-Y node=Z depth=D] marker from the
+    * latest summary node so a RELOADED session keeps a context_expand handle to
+    * the compacted originals (the live in-memory marker is built in
+    * llm_context.c; keep the two formats recognizable to the same tool/parser).
+    * Falls back to a plain summary line when no node metadata exists. */
+   summary_node_t node = { 0 };
+   if (summary_node_get_latest(conv_id, &node) == AUTH_DB_SUCCESS && node.msg_id_start > 0 &&
+       node.msg_id_end > 0) {
+      snprintf(out, out_len,
+               "[COMPACTED conv=%lld msgs=%lld-%lld node=%lld depth=%d] "
+               "Previous conversation context (summarized): %s",
+               (long long)conv_id, (long long)node.msg_id_start, (long long)node.msg_id_end,
+               (long long)node.id, node.depth, summary);
+   } else {
+      snprintf(out, out_len, "Previous conversation context (summarized): %s", summary);
+   }
+   summary_node_free(&node);
+}
+
 int conv_db_lock_llm_settings(int64_t conv_id,
                               int user_id,
                               const char *llm_type,
@@ -1427,6 +1499,50 @@ int conv_db_get_messages(int64_t conv_id, int user_id, message_callback_t callba
    return AUTH_DB_SUCCESS;
 }
 
+int conv_db_get_messages_after(int64_t conv_id,
+                               int user_id,
+                               int64_t after_id,
+                               message_callback_t callback,
+                               void *ctx) {
+   if (conv_id <= 0 || !callback) {
+      return AUTH_DB_INVALID;
+   }
+
+   AUTH_DB_LOCK_OR_FAIL();
+
+   /* Ownership check via JOIN; bounded to messages after the compaction watermark. */
+   sqlite3_reset(s_db.stmt_msg_get_after);
+   sqlite3_bind_int64(s_db.stmt_msg_get_after, 1, conv_id);
+   sqlite3_bind_int(s_db.stmt_msg_get_after, 2, user_id);
+   sqlite3_bind_int64(s_db.stmt_msg_get_after, 3, after_id);
+
+   int rc;
+   while ((rc = sqlite3_step(s_db.stmt_msg_get_after)) == SQLITE_ROW) {
+      conversation_message_t msg = { 0 };
+
+      msg.id = sqlite3_column_int64(s_db.stmt_msg_get_after, 0);
+      msg.conversation_id = sqlite3_column_int64(s_db.stmt_msg_get_after, 1);
+
+      const char *role = (const char *)sqlite3_column_text(s_db.stmt_msg_get_after, 2);
+      if (role) {
+         strncpy(msg.role, role, CONV_ROLE_MAX - 1);
+         msg.role[CONV_ROLE_MAX - 1] = '\0';
+      }
+
+      /* Column pointers are only valid during the callback */
+      msg_read_columns(&msg, s_db.stmt_msg_get_after);
+
+      if (callback(&msg, ctx) != 0) {
+         break;
+      }
+   }
+
+   sqlite3_reset(s_db.stmt_msg_get_after);
+   AUTH_DB_UNLOCK();
+
+   return AUTH_DB_SUCCESS;
+}
+
 int conv_db_get_messages_admin(int64_t conv_id, message_callback_t callback, void *ctx) {
    if (conv_id <= 0 || !callback) {
       return AUTH_DB_INVALID;
diff --git a/src/auth/auth_db_migrations.c b/src/auth/auth_db_migrations.c
index be369b1e..041e804f 100644
--- a/src/auth/auth_db_migrations.c
+++ b/src/auth/auth_db_migrations.c
@@ -2695,6 +2695,18 @@ int auth_db_apply_migrations(int current_version, const char *db_path) {
       }
    }
 
+   /* v67 — conversations.context_watermark_msg_id (compaction watermark, replaces
+    * fork-on-compaction) + one-time unlock of legacy split-archived conversations.
+    * Idempotent ALTER (probes PRAGMA table_info). */
+   bool v67_ok = (current_version >= 67);
+   if (current_version < 67) {
+      if (auth_db_migrations_v67(s_db.db) == AUTH_DB_SUCCESS) {
+         v67_ok = true;
+      } else {
+         OLOG_ERROR("auth_db: v67 migration (compaction watermark) failed");
+      }
+   }
+
    /* Log migration if upgrading from an older version */
    if (current_version > 0 && current_version < AUTH_DB_SCHEMA_VERSION) {
       OLOG_INFO("auth_db: migrated schema from v%d to v%d", current_version,
@@ -2715,7 +2727,7 @@ int auth_db_apply_migrations(int current_version, const char *db_path) {
     * Never downgrade — prevents old code from corrupting a newer DB. */
    const bool ready_to_bump = v48_ok && v49_ok && v50_ok && v51_ok && v52_ok && v53_ok && v54_ok &&
                               v55_ok && v56_ok && v57_ok && v58_ok && v59_ok && v60_ok && v61_ok &&
-                              v62_ok && v63_ok && v64_ok && v65_ok && v66_ok;
+                              v62_ok && v63_ok && v64_ok && v65_ok && v66_ok && v67_ok;
    if (current_version < AUTH_DB_SCHEMA_VERSION && ready_to_bump) {
       rc = sqlite3_exec(s_db.db, "DELETE FROM schema_version", NULL, NULL, &errmsg);
       if (rc != SQLITE_OK) {
diff --git a/src/auth/auth_db_migrations_v67.c b/src/auth/auth_db_migrations_v67.c
new file mode 100644
index 00000000..7736e679
--- /dev/null
+++ b/src/auth/auth_db_migrations_v67.c
@@ -0,0 +1,102 @@
+/*
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * By contributing to this project, you agree to license your contributions
+ * under the GPLv3 (or any later version) or any future licenses chosen by
+ * the project author(s).
+ *
+ * Schema migration v67: add conversations.context_watermark_msg_id and unlock
+ * legacy split-archived conversations.
+ *
+ * The compaction-watermark model replaces fork-on-compaction: instead of
+ * archiving a conversation and creating a continuation row, compaction now
+ * records a watermark (the last compacted message id) on the SAME row, and
+ * reload bounds context to messages after the watermark.  This migration:
+ *   1. Adds `context_watermark_msg_id INTEGER NOT NULL DEFAULT 0` (0 = never
+ *      compacted -> load all, the pre-existing behavior; the zero-risk gate).
+ *   2. One-time unlocks every conversation that the old split path archived.
+ *      `is_archived = 1` was only ever written by conv_db_create_continuation
+ *      (the split), so clearing it makes those (now read-only) conversations
+ *      writable again.  `continued_from` is left intact as a historical
+ *      breadcrumb for sidebar chain rendering.
+ *
+ * ALTER TABLE ADD COLUMN is NOT idempotent (it errors on a duplicate column),
+ * and the migration ladder hard-gates the schema_version bump on this function
+ * returning AUTH_DB_SUCCESS, so we probe PRAGMA table_info first and skip the
+ * ALTER when the column already exists.  A literal DEFAULT keeps SQLite on the
+ * fast ALTER path (no table rewrite).
+ */
+
+#define AUTH_DB_INTERNAL_ALLOWED
+
+#include <sqlite3.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "auth/auth_db.h"
+#include "auth/auth_db_internal.h"
+#include "logging.h"
+
+/* True if @col exists on @table. @table is a fixed literal (not user input);
+ * PRAGMA cannot be parameterized, so it is interpolated directly. */
+static bool conv_column_exists(sqlite3 *db, const char *table, const char *col) {
+   char sql[128];
+   snprintf(sql, sizeof(sql), "PRAGMA table_info(%s)", table);
+   sqlite3_stmt *st = NULL;
+   if (sqlite3_prepare_v2(db, sql, -1, &st, NULL) != SQLITE_OK) {
+      return false;
+   }
+   bool found = false;
+   while (sqlite3_step(st) == SQLITE_ROW) {
+      const unsigned char *name = sqlite3_column_text(st, 1); /* col 1 = column name */
+      if (name != NULL && strcmp((const char *)name, col) == 0) {
+         found = true;
+         break;
+      }
+   }
+   sqlite3_finalize(st);
+   return found;
+}
+
+int auth_db_migrations_v67(sqlite3 *db) {
+   if (db == NULL) {
+      return AUTH_DB_FAILURE;
+   }
+
+   if (!conv_column_exists(db, "conversations", "context_watermark_msg_id")) {
+      char *errmsg = NULL;
+      if (sqlite3_exec(db,
+                       "ALTER TABLE conversations ADD COLUMN "
+                       "context_watermark_msg_id INTEGER NOT NULL DEFAULT 0",
+                       NULL, NULL, &errmsg) != SQLITE_OK) {
+         OLOG_ERROR("auth_db: v67 ALTER (context_watermark_msg_id) failed: %s",
+                    errmsg ? errmsg : "unknown");
+         sqlite3_free(errmsg);
+         return AUTH_DB_FAILURE;
+      }
+   }
+
+   /* One-time unlock of legacy split-archived conversations.  Harmless to
+    * re-run (no rows match once cleared); runs only in the < v67 block. */
+   char *errmsg = NULL;
+   if (sqlite3_exec(db, "UPDATE conversations SET is_archived = 0 WHERE is_archived = 1", NULL,
+                    NULL, &errmsg) != SQLITE_OK) {
+      OLOG_ERROR("auth_db: v67 unlock (is_archived) failed: %s", errmsg ? errmsg : "unknown");
+      sqlite3_free(errmsg);
+      return AUTH_DB_FAILURE;
+   }
+
+   return AUTH_DB_SUCCESS;
+}
diff --git a/src/auth/auth_db_schema.c b/src/auth/auth_db_schema.c
index 6dfe564b..75a9a146 100644
--- a/src/auth/auth_db_schema.c
+++ b/src/auth/auth_db_schema.c
@@ -153,6 +153,10 @@ static const char *SCHEMA_SQL =
     "   context_max INTEGER DEFAULT 0,"
     "   continued_from INTEGER DEFAULT NULL,"
     "   compaction_summary TEXT DEFAULT NULL,"
+    /* context_watermark_msg_id (v67): last compacted message id.  0 = never
+     * compacted -> reload loads all messages (pre-watermark behavior).  When > 0,
+     * context restore is bounded to messages with id > watermark + the summary. */
+    "   context_watermark_msg_id INTEGER NOT NULL DEFAULT 0,"
     "   llm_type TEXT DEFAULT NULL,"
     "   cloud_provider TEXT DEFAULT NULL,"
     "   model TEXT DEFAULT NULL,"
diff --git a/src/auth/auth_db_statements.c b/src/auth/auth_db_statements.c
index 0eb47701..53d5d241 100644
--- a/src/auth/auth_db_statements.c
+++ b/src/auth/auth_db_statements.c
@@ -233,7 +233,7 @@ int auth_db_prepare_statements(void) {
        "SELECT id, user_id, title, created_at, updated_at, message_count, is_archived, "
        "context_tokens, context_max, continued_from, compaction_summary, "
        "llm_type, cloud_provider, model, tools_mode, thinking_mode, is_private, origin, "
-       "reasoning_effort "
+       "reasoning_effort, context_watermark_msg_id "
        "FROM conversations WHERE id = ?",
        -1, &s_db.stmt_conv_get, NULL);
    if (rc != SQLITE_OK) {
@@ -348,6 +348,21 @@ int auth_db_prepare_statements(void) {
       return AUTH_DB_FAILURE;
    }
 
+   /* v67: same as stmt_msg_get but bounded to id > ? — the compaction-watermark
+    * restore path (load only post-watermark messages). Full column set so tool /
+    * reasoning rehydration works identically to the unbounded load. */
+   rc = sqlite3_prepare_v2(
+       s_db.db,
+       "SELECT m.id, m.conversation_id, m.role, m.content, m.tool_calls, m.tool_call_id, "
+       "m.reasoning, m.created_at FROM messages m "
+       "INNER JOIN conversations c ON m.conversation_id = c.id "
+       "WHERE m.conversation_id = ? AND c.user_id = ? AND m.id > ? ORDER BY m.id ASC",
+       -1, &s_db.stmt_msg_get_after, NULL);
+   if (rc != SQLITE_OK) {
+      OLOG_ERROR("auth_db: prepare msg_get_after failed: %s", sqlite3_errmsg(s_db.db));
+      return AUTH_DB_FAILURE;
+   }
+
    /* Admin-only: get messages without user ownership check */
    rc = sqlite3_prepare_v2(
        s_db.db,
@@ -377,6 +392,19 @@ int auth_db_prepare_statements(void) {
       return AUTH_DB_FAILURE;
    }
 
+   /* v67: compaction watermark + summary, on the same conversation row (no fork).
+    * The trailing `? >= context_watermark_msg_id` is a monotonic guard so a stale
+    * async compaction can't rewind a watermark already advanced by a later pass. */
+   rc = sqlite3_prepare_v2(
+       s_db.db,
+       "UPDATE conversations SET compaction_summary = ?, context_watermark_msg_id = ? "
+       "WHERE id = ? AND user_id = ? AND ? >= context_watermark_msg_id",
+       -1, &s_db.stmt_conv_set_watermark, NULL);
+   if (rc != SQLITE_OK) {
+      OLOG_ERROR("auth_db: prepare conv_set_watermark failed: %s", sqlite3_errmsg(s_db.db));
+      return AUTH_DB_FAILURE;
+   }
+
    rc = sqlite3_prepare_v2(
        s_db.db,
        "INSERT INTO conversations (user_id, title, created_at, updated_at, origin, anchor_date) "
@@ -2295,12 +2323,16 @@ void auth_db_finalize_statements(void) {
       sqlite3_finalize(s_db.stmt_msg_add);
    if (s_db.stmt_msg_get)
       sqlite3_finalize(s_db.stmt_msg_get);
+   if (s_db.stmt_msg_get_after)
+      sqlite3_finalize(s_db.stmt_msg_get_after);
    if (s_db.stmt_msg_get_admin)
       sqlite3_finalize(s_db.stmt_msg_get_admin);
    if (s_db.stmt_conv_update_meta)
       sqlite3_finalize(s_db.stmt_conv_update_meta);
    if (s_db.stmt_conv_update_context)
       sqlite3_finalize(s_db.stmt_conv_update_context);
+   if (s_db.stmt_conv_set_watermark)
+      sqlite3_finalize(s_db.stmt_conv_set_watermark);
    if (s_db.stmt_conv_create_origin)
       sqlite3_finalize(s_db.stmt_conv_create_origin);
    if (s_db.stmt_conv_reassign)
diff --git a/src/config/config_defaults.c b/src/config/config_defaults.c
index faff7ab6..8faef730 100644
--- a/src/config/config_defaults.c
+++ b/src/config/config_defaults.c
@@ -495,6 +495,19 @@ void config_set_defaults(dawn_config_t *config) {
    config->memory.focus_injection.dominant_token_heuristic.threshold = 0.60f;
    config->memory.focus_injection.dominant_token_heuristic.base_penalty = 0.40f;
 
+   /* Unified cross-source `recall` tool (docs/CROSS_TOOL_RECALL_DESIGN.md §4.5).
+    * Deep gather: bigger than per-turn (top_k 12 / 10240 / 0.4) but bounded —
+    * 24 KB ≈ 6 K tokens fed back, deliberately NOT the 64 KB cap (no reranker to
+    * defend a long tail).  min_score lower so weaker-but-relevant hits surface.
+    * per_source_max 16 keeps any one source from crowding out the rest; at the
+    * current 6 live adapters that is a 96-candidate pool, and 16*MAX_FOCUS_SOURCES
+    * (16) = 256 keeps the dominant-token heuristic alive even at a full registry
+    * (see the validation cap in config_validate.c). */
+   config->memory.recall.top_k = 40;
+   config->memory.recall.budget_bytes = 24576;
+   config->memory.recall.min_score = 0.25f;
+   config->memory.recall.per_source_max = 16;
+
    /* Phase 2 entity-merge auto-merge gate.  auto_threshold=0.90 stays
     * conservative because auto-merges land with no human approval.
     * review_threshold=0.50 is permissive — proposals go through the
diff --git a/src/config/config_env.c b/src/config/config_env.c
index 9e17d5f1..21679c53 100644
--- a/src/config/config_env.c
+++ b/src/config/config_env.c
@@ -2280,15 +2280,11 @@ int config_write_toml(const dawn_config_t *config, const char *path) {
    fprintf(fp, "bm25_enabled = %s\n", config->memory.bm25_enabled ? "true" : "false");
    fprintf(fp, "category_threshold = %.2f\n", config->memory.category_threshold);
    fprintf(fp, "search_score_floor = %.2f\n", config->memory.search_score_floor);
-
-   fprintf(fp, "\n[memory.graph_retrieval]\n");
-   fprintf(fp, "enabled = %s\n", config->memory.graph_retrieval.enabled ? "true" : "false");
-   fprintf(fp, "entity_grounding_bonus = %.2f\n",
-           config->memory.graph_retrieval.entity_grounding_bonus);
-   fprintf(fp, "max_facts_per_query = %d\n", config->memory.graph_retrieval.max_facts_per_query);
-   fprintf(fp, "use_query_scoring = %s\n",
-           config->memory.graph_retrieval.use_query_scoring ? "true" : "false");
-   fprintf(fp, "entity_bonus = %.2f\n", config->memory.graph_retrieval.entity_bonus);
+   /* backfill_on_startup / model_id / recompute_* belong to the embeddings
+    * sub-table: the parser reads them from [memory.embeddings] (config_parser.c),
+    * NOT graph_retrieval.  Writing them here keeps writer + parser in the same
+    * section so a saved config round-trips instead of emitting "unknown key"
+    * warnings and silently falling back to defaults. */
    fprintf(fp, "backfill_on_startup = %s\n",
            config->memory.embedding_backfill_on_startup ? "true" : "false");
    if (config->memory.model_id[0]) {
@@ -2301,6 +2297,15 @@ int config_write_toml(const dawn_config_t *config, const char *path) {
    fprintf(fp, "recompute_batch_size = %d\n", config->memory.recompute_batch_size);
    fprintf(fp, "recompute_batch_sleep_ms = %d\n", config->memory.recompute_batch_sleep_ms);
 
+   fprintf(fp, "\n[memory.graph_retrieval]\n");
+   fprintf(fp, "enabled = %s\n", config->memory.graph_retrieval.enabled ? "true" : "false");
+   fprintf(fp, "entity_grounding_bonus = %.2f\n",
+           config->memory.graph_retrieval.entity_grounding_bonus);
+   fprintf(fp, "max_facts_per_query = %d\n", config->memory.graph_retrieval.max_facts_per_query);
+   fprintf(fp, "use_query_scoring = %s\n",
+           config->memory.graph_retrieval.use_query_scoring ? "true" : "false");
+   fprintf(fp, "entity_bonus = %.2f\n", config->memory.graph_retrieval.entity_bonus);
+
    fprintf(fp, "\n[memory.recovery]\n");
    fprintf(fp, "enabled = %s\n", config->memory.recovery_enabled ? "true" : "false");
    fprintf(fp, "idle_threshold_seconds = %d\n", config->memory.recovery_idle_threshold_seconds);
diff --git a/src/config/config_parser.c b/src/config/config_parser.c
index 529885df..d5880104 100644
--- a/src/config/config_parser.c
+++ b/src/config/config_parser.c
@@ -1145,6 +1145,7 @@ static void parse_memory(toml_table_t *table, memory_config_t *config) {
                                              "extraction_timeout_ms",
                                              "paraphrase_dedup_enabled",
                                              "paraphrase_dedup_threshold",
+                                             "note_extraction_guard",
                                              "pruning_enabled",
                                              "prune_superseded_days",
                                              "prune_stale_days",
@@ -1515,6 +1516,22 @@ static void parse_memory(toml_table_t *table, memory_config_t *config) {
       }
    }
 
+   /* Parse [memory.recall] sub-table — unified cross-source recall tool
+    * (docs/CROSS_TOOL_RECALL_DESIGN.md).  Deep-gather trim limits, separate
+    * from the per-turn focus_injection block above. */
+   toml_table_t *recall = toml_table_in(table, "recall");
+   if (recall) {
+      static const char *const recall_keys[] = { "top_k", "budget_bytes", "min_score",
+                                                 "per_source_max", NULL };
+      warn_unknown_keys(recall, "memory.recall", recall_keys);
+
+      recall_config_t *rc = &config->recall;
+      PARSE_INT(recall, "top_k", rc->top_k);
+      PARSE_INT(recall, "budget_bytes", rc->budget_bytes);
+      PARSE_DOUBLE(recall, "min_score", rc->min_score);
+      PARSE_INT(recall, "per_source_max", rc->per_source_max);
+   }
+
    /* Parse [memory.entity_merge] sub-table — Phase 2 auto-merge gate. */
    toml_table_t *emerge = toml_table_in(table, "entity_merge");
    if (emerge) {
diff --git a/src/config/config_validate.c b/src/config/config_validate.c
index 9654a6ee..a2322010 100644
--- a/src/config/config_validate.c
+++ b/src/config/config_validate.c
@@ -187,6 +187,14 @@ int config_validate(const dawn_config_t *config,
    VALIDATE_RANGE_INT("memory.focus_injection.top_k", config->memory.focus_injection.top_k, 1, 64);
    VALIDATE_RANGE_INT("memory.focus_injection.summary_max_scan",
                       config->memory.focus_injection.summary_max_scan, 256, 16384);
+   /* Unified recall tool deep-gather limits.  per_source_max upper bound 16 keeps
+    * 16 * MAX_FOCUS_SOURCES(16) = 256 — the dominant-token heuristic pool ceiling
+    * — even in the worst case of a full adapter registry. */
+   VALIDATE_RANGE_INT("memory.recall.top_k", config->memory.recall.top_k, 1, 64);
+   VALIDATE_RANGE_INT("memory.recall.budget_bytes", config->memory.recall.budget_bytes, 1024,
+                      65536);
+   VALIDATE_RANGE_INT("memory.recall.per_source_max", config->memory.recall.per_source_max, 1, 16);
+   VALIDATE_RANGE_FLOAT("memory.recall.min_score", config->memory.recall.min_score, 0.0f, 1.0f);
    VALIDATE_RANGE_FLOAT("memory.focus_injection.min_score",
                         config->memory.focus_injection.min_score, 0.0f, 1.0f);
    VALIDATE_RANGE_FLOAT("memory.focus_injection.weight_semantic",
diff --git a/src/core/focus/focus_candidate_helpers.c b/src/core/focus/focus_candidate_helpers.c
index 6bd9a1ff..7095c5f7 100644
--- a/src/core/focus/focus_candidate_helpers.c
+++ b/src/core/focus/focus_candidate_helpers.c
@@ -28,6 +28,23 @@
 #include "dawn_error.h"
 #include "logging.h"
 
+size_t focus_utf8_safe_cap(const char *text, size_t max_bytes) {
+   if (text == NULL)
+      return 0;
+   const size_t n = strlen(text);
+   if (n <= max_bytes)
+      return n;
+   /* Cutting at byte `max_bytes` would split a character if that byte is a
+    * UTF-8 continuation byte (0b10xxxxxx). Back up to the start of that
+    * character so the returned prefix never ends mid-sequence — otherwise the
+    * truncated text is invalid UTF-8 and breaks a WebSocket text frame. Bounded
+    * by the max UTF-8 sequence length, so at most 3 steps. */
+   size_t cut = max_bytes;
+   while (cut > 0 && ((unsigned char)text[cut] & 0xC0) == 0x80)
+      cut--;
+   return cut;
+}
+
 void focus_candidate_cleanup_on_failure(focus_candidate_t *c) {
    if (c == NULL)
       return;
@@ -74,7 +91,11 @@ int focus_candidate_init(focus_candidate_t *c,
    c->item_timestamp = item_timestamp;
 
    const size_t text_len = strlen(text);
-   const size_t copy_len = (text_len > FOCUS_TEXT_MAX_BYTES) ? FOCUS_TEXT_MAX_BYTES : text_len;
+   /* UTF-8-safe truncation: a raw byte cut at FOCUS_TEXT_MAX_BYTES could split a
+    * multi-byte character, yielding invalid UTF-8 that later breaks the
+    * context_injection WebSocket text frame (browser rejects it, drops the
+    * connection). Back the cut up to a character boundary. */
+   const size_t copy_len = focus_utf8_safe_cap(text, FOCUS_TEXT_MAX_BYTES);
    c->text = malloc(copy_len + 1);
    if (c->text == NULL) {
       focus_candidate_cleanup_on_failure(c);
diff --git a/src/core/focus/focus_source.c b/src/core/focus/focus_source.c
index 5d20ae7b..4d4e9c61 100644
--- a/src/core/focus/focus_source.c
+++ b/src/core/focus/focus_source.c
@@ -227,14 +227,15 @@ static int candidate_byte_cost(const char *text) {
  * focus_compose — pipeline
  * ============================================================================= */
 
-int focus_compose(int user_id,
-                  bool include_private,
-                  const char *query_text,
-                  const float *query_embedding,
-                  size_t embed_dim,
-                  time_t now,
-                  int per_source_max_candidates,
-                  focus_compose_result_t *out_result) {
+int focus_compose_ex(int user_id,
+                     bool include_private,
+                     const char *query_text,
+                     const float *query_embedding,
+                     size_t embed_dim,
+                     time_t now,
+                     int per_source_max_candidates,
+                     const focus_limits_t *limits,
+                     focus_compose_result_t *out_result) {
    if (out_result == NULL)
       return FAILURE;
 
@@ -459,9 +460,18 @@ int focus_compose(int user_id,
     * Trim — min_score, top_k, token-budget
     * ===================================================================== */
    const focus_injection_config_t *fi = &g_config.memory.focus_injection;
+   /* Per-call overrides (recall tool) take precedence over config; a NULL
+    * limits or zero/negative field falls back to the per-turn config value.
+    * Only the THREE trim values are overridable — ranking weights stay
+    * config-sourced and shared with the per-turn path. */
+   const int cfg_budget = (limits && limits->budget_bytes > 0) ? limits->budget_bytes
+                                                               : fi->focus_budget_bytes;
+   const int cfg_top_k = (limits && limits->top_k > 0) ? limits->top_k : fi->top_k;
+   const float min_score = (limits && limits->min_score >= 0.0f) ? limits->min_score
+                                                                 : fi->min_score;
    int kept = 0;
-   int budget_left = fi->focus_budget_bytes;
-   const int top_k = (fi->top_k > 0) ? fi->top_k : pool_count;
+   int budget_left = cfg_budget;
+   const int top_k = (cfg_top_k > 0) ? cfg_top_k : pool_count;
 
    /* `keep[]` marks pool indices that survive trimming. */
    bool *keep = calloc((size_t)pool_count, sizeof(*keep));
@@ -478,7 +488,7 @@ int focus_compose(int user_id,
 
    for (int rank = 0; rank < pool_count && kept < top_k; rank++) {
       const ranker_entry_t *e = &order[rank];
-      if (e->score < fi->min_score)
+      if (e->score < min_score)
          break; /* Sorted desc — once below, all rest are below. */
 
       const int cost = candidate_byte_cost(pool[e->idx].text);
@@ -586,6 +596,19 @@ int focus_compose(int user_id,
    return SUCCESS;
 }
 
+int focus_compose(int user_id,
+                  bool include_private,
+                  const char *query_text,
+                  const float *query_embedding,
+                  size_t embed_dim,
+                  time_t now,
+                  int per_source_max_candidates,
+                  focus_compose_result_t *out_result) {
+   /* Thin wrapper: the per-turn path uses all config-driven trim limits. */
+   return focus_compose_ex(user_id, include_private, query_text, query_embedding, embed_dim, now,
+                           per_source_max_candidates, NULL, out_result);
+}
+
 void focus_result_free(focus_compose_result_t *result) {
    if (result == NULL)
       return;
diff --git a/src/dawn.c b/src/dawn.c
index c668ccc2..66924a75 100644
--- a/src/dawn.c
+++ b/src/dawn.c
@@ -95,6 +95,7 @@
 #include "tools/mcp_bridge.h"
 #endif
 #ifdef DAWN_ENABLE_CODE_PROJECTS
+#include "tools/code_project_namemap.h"
 #include "tools/code_project_service.h"
 #endif
 #include "tts/text_to_speech.h"
@@ -2398,6 +2399,19 @@ int main(int argc, char *argv[]) {
          auth_db_mcp_grant_all_admins(g_config.mcp.servers[i].alias);
       }
    }
+
+#ifdef DAWN_ENABLE_CODE_PROJECTS
+   /* Build the cbm name-translation map (clean project name <-> cbm path slug)
+    * now that the DB is open AND cbm is connected (it came up during
+    * tools_register_all). This is the REAL startup capture: the call inside
+    * mcp_bridge_init() runs before auth_db_init(), so code_project_db can't be
+    * read there and the map would stay empty until the first index — leaving the
+    * LLM to use raw cbm slugs after a plain restart. Harmless no-op if cbm wasn't
+    * up at boot; mcp_bridge_ensure_connected() rebuilds it on lazy reconnect. */
+   if (auth_db_ready) {
+      code_project_namemap_capture();
+   }
+#endif
 #endif
 
    /* OTA subsystem (after the DB is ready — it reconciles device state).
diff --git a/src/llm/llm_command_parser.c b/src/llm/llm_command_parser.c
index 44655b7a..17bd0f0b 100644
--- a/src/llm/llm_command_parser.c
+++ b/src/llm/llm_command_parser.c
@@ -67,7 +67,14 @@ static const char *NATIVE_TOOLS_RULES =
    "5. Search results include snippets with key information. Answer from snippets directly.\n"
    "   Only fetch a URL if the user asks for details about a specific article.\n"
    "6. Do NOT lead responses with weather, time, or location info unless explicitly asked.\n"
-   "   Vary your greetings and openers. The user's context below is for tool use only.\n";
+   "   Vary your greetings and openers. The user's context below is for tool use only.\n"
+   "7. For \"what do we know / what's the status / where do things stand / tell me about\" "
+   "questions about a topic, person, or project, call `recall` FIRST. It gathers across memory, "
+   "notes, documents, and calendar in one pass and points you to the exact sources.\n"
+   "   Do NOT jump straight to a single memory or document search for these — that misses "
+   "cross-source context. Example: \"what's my wrist status?\" → call `recall` first, then drill "
+   "into a specific source only if needed. Go direct to one source only when you already know "
+   "exactly which item holds the answer.\n";
 
 // clang-format off
 static const char *PLAN_EXECUTOR_PROMPT =
diff --git a/src/llm/llm_context.c b/src/llm/llm_context.c
index de2ca7cb..6520425d 100644
--- a/src/llm/llm_context.c
+++ b/src/llm/llm_context.c
@@ -164,6 +164,19 @@ static const model_context_entry_t s_gemini_models[] = {
 /* Re-query local context size every 5 minutes (matches model list TTL) */
 #define LLM_CONTEXT_LOCAL_TTL 300
 
+/* OpenRouter model catalog is stable (vendors rarely change context_length on a
+ * shipped model), so refresh it on a longer cadence than the local /props poll. */
+#define LLM_CONTEXT_OPENROUTER_TTL 3600 /* Re-fetch /api/v1/models hourly */
+#define LLM_CONTEXT_OPENROUTER_MAX_MODELS \
+   256                                      /* Cache capacity (catalog is ~400; we keep first N) */
+#define LLM_CONTEXT_OPENROUTER_SLUG_MAX 128 /* Max "vendor/model" slug length stored */
+
+/* One cached OpenRouter catalog entry: slug -> context_length */
+typedef struct {
+   char slug[LLM_CONTEXT_OPENROUTER_SLUG_MAX];
+   int context_length;
+} openrouter_model_entry_t;
+
 static struct {
    bool initialized;
    int local_context_size;            /* Cached context size */
@@ -174,7 +187,15 @@ static struct {
    uint32_t local_context_generation; /* Incremented on invalidation, detects stale writes */
    int last_prompt_tokens;            /* Last known prompt tokens (for WebUI) */
    int last_context_size;             /* Last known context size (for WebUI) */
-   pthread_mutex_t mutex;             /* Protects state */
+
+   /* OpenRouter /api/v1/models catalog cache (used only under gateway mode) */
+   openrouter_model_entry_t or_models[LLM_CONTEXT_OPENROUTER_MAX_MODELS];
+   int or_model_count;   /* Number of valid entries in or_models */
+   bool or_queried;      /* True once a fetch has populated (or attempted to populate) the cache */
+   bool or_querying;     /* True while a thread is fetching the catalog (single-flight) */
+   time_t or_queried_at; /* When the catalog was last fetched (for TTL) */
+
+   pthread_mutex_t mutex; /* Protects state */
 } s_state = {
    .initialized = false,
    .local_context_size = LLM_CONTEXT_DEFAULT_LOCAL,
@@ -185,6 +206,10 @@ static struct {
    .local_context_generation = 0,
    .last_prompt_tokens = 0,
    .last_context_size = 0,
+   .or_model_count = 0,
+   .or_queried = false,
+   .or_querying = false,
+   .or_queried_at = 0,
 };
 
 /* Per-session token tracking */
@@ -206,6 +231,12 @@ static int s_session_token_count = 0;
 
 #define LLM_CONTEXT_MAX_RESPONSE_SIZE (64 * 1024) /* 64KB max response */
 
+/* The OpenRouter /api/v1/models catalog is large (hundreds of KB — ~400 models
+ * with rich metadata), so it needs a much bigger cap than the local /props poll
+ * or the body would be truncated and json-c parse would fail. */
+#define LLM_CONTEXT_OPENROUTER_MAX_RESPONSE_SIZE (2 * 1024 * 1024) /* 2MB max catalog response */
+#define LLM_CONTEXT_OPENROUTER_MODELS_URL "https://openrouter.ai/api/v1/models"
+
 /* =============================================================================
  * Lifecycle Functions
  * ============================================================================= */
@@ -363,6 +394,151 @@ void llm_context_refresh_local(void) {
    OLOG_INFO("llm_context: Local context cache invalidated, will re-query on next use");
 }
 
+/* =============================================================================
+ * OpenRouter Model Catalog (gateway mode — exact context_length per slug)
+ * ============================================================================= */
+
+/**
+ * @brief Fetch the OpenRouter model catalog and populate the slug->context cache.
+ *
+ * GETs https://openrouter.ai/api/v1/models, parses data[].id + data[].context_length
+ * with json-c, and fills s_state.or_models[]. The Bearer key is optional for this
+ * endpoint, but we include it when configured. Graceful: on no key / curl error /
+ * parse failure, leaves the cache empty and returns FAILURE — callers then fall
+ * back to the offline vendor-strip probe, so there is no regression when offline.
+ *
+ * Must be called WITHOUT s_state.mutex held (does network I/O); it takes the mutex
+ * only at the end to swap in the parsed results.
+ */
+static int llm_context_query_openrouter_models(void) {
+   /* Key is optional for /models, but pass it if we have one. */
+   const char *api_key = g_secrets.openrouter_api_key[0] != '\0' ? g_secrets.openrouter_api_key
+                                                                 : NULL;
+
+   CURL *curl = curl_easy_init();
+   if (!curl) {
+      OLOG_WARNING("llm_context: Failed to init CURL for OpenRouter /models query");
+      return FAILURE;
+   }
+
+   curl_buffer_t response;
+   curl_buffer_init_with_max(&response, LLM_CONTEXT_OPENROUTER_MAX_RESPONSE_SIZE);
+
+   struct curl_slist *headers = NULL;
+   char auth_header[CONFIG_API_KEY_MAX + 32];
+   if (api_key) {
+      snprintf(auth_header, sizeof(auth_header), "Authorization: Bearer %s", api_key);
+      headers = curl_slist_append(headers, auth_header);
+   }
+
+   curl_easy_setopt(curl, CURLOPT_URL, LLM_CONTEXT_OPENROUTER_MODELS_URL);
+   if (headers) {
+      curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+   }
+   curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curl_buffer_write_callback);
+   curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response);
+   curl_easy_setopt(curl, CURLOPT_TIMEOUT, 10L);
+   curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 5L);
+
+   CURLcode res = curl_easy_perform(curl);
+   if (headers) {
+      curl_slist_free_all(headers);
+   }
+   curl_easy_cleanup(curl);
+
+   if (res != CURLE_OK) {
+      OLOG_WARNING("llm_context: Failed to query OpenRouter /models: %s", curl_easy_strerror(res));
+      curl_buffer_free(&response);
+      return FAILURE;
+   }
+
+   if (!response.data) {
+      curl_buffer_free(&response);
+      return FAILURE;
+   }
+
+   struct json_object *root = json_tokener_parse(response.data);
+   curl_buffer_free(&response);
+
+   if (!root) {
+      OLOG_WARNING("llm_context: Failed to parse OpenRouter /models response");
+      return FAILURE;
+   }
+
+   struct json_object *data = NULL;
+   if (!json_object_object_get_ex(root, "data", &data) ||
+       !json_object_is_type(data, json_type_array)) {
+      OLOG_WARNING("llm_context: OpenRouter /models response missing 'data' array");
+      json_object_put(root);
+      return FAILURE;
+   }
+
+   /* Parse into a local table first, then swap in under the mutex. */
+   static openrouter_model_entry_t parsed[LLM_CONTEXT_OPENROUTER_MAX_MODELS];
+   int parsed_count = 0;
+
+   int n = json_object_array_length(data);
+   for (int i = 0; i < n && parsed_count < LLM_CONTEXT_OPENROUTER_MAX_MODELS; i++) {
+      struct json_object *entry = json_object_array_get_idx(data, i);
+      struct json_object *id_obj = NULL;
+      struct json_object *ctx_obj = NULL;
+
+      if (!json_object_object_get_ex(entry, "id", &id_obj)) {
+         continue;
+      }
+      if (!json_object_object_get_ex(entry, "context_length", &ctx_obj)) {
+         continue;
+      }
+
+      const char *id = json_object_get_string(id_obj);
+      int ctx_len = json_object_get_int(ctx_obj);
+      if (!id || id[0] == '\0' || ctx_len <= 0) {
+         continue;
+      }
+
+      safe_strncpy(parsed[parsed_count].slug, id, sizeof(parsed[parsed_count].slug));
+      parsed[parsed_count].context_length = ctx_len;
+      parsed_count++;
+   }
+
+   json_object_put(root);
+
+   if (parsed_count == 0) {
+      OLOG_WARNING("llm_context: OpenRouter /models returned no usable entries");
+      return FAILURE;
+   }
+
+   /* Swap the parsed catalog into the shared cache under the mutex. */
+   pthread_mutex_lock(&s_state.mutex);
+   memcpy(s_state.or_models, parsed, sizeof(openrouter_model_entry_t) * parsed_count);
+   s_state.or_model_count = parsed_count;
+   pthread_mutex_unlock(&s_state.mutex);
+
+   OLOG_INFO("llm_context: Cached %d OpenRouter model context sizes", parsed_count);
+   return SUCCESS;
+}
+
+/**
+ * @brief Look up a model's context_length from the OpenRouter catalog cache.
+ *
+ * Exact, case-insensitive match on the full "vendor/model" slug.
+ *
+ * @param slug OpenRouter model id (e.g. "deepseek/deepseek-chat")
+ * @return context_length in tokens, or 0 on miss / empty cache.
+ *         Caller must hold s_state.mutex.
+ */
+static int openrouter_lookup_context(const char *slug) {
+   if (!slug || slug[0] == '\0') {
+      return 0;
+   }
+   for (int i = 0; i < s_state.or_model_count; i++) {
+      if (strcasecmp(slug, s_state.or_models[i].slug) == 0) {
+         return s_state.or_models[i].context_length;
+      }
+   }
+   return 0;
+}
+
 int llm_context_get_size(llm_type_t type, cloud_provider_t provider, const char *model) {
    if (type == LLM_LOCAL) {
       pthread_mutex_lock(&s_state.mutex);
@@ -441,21 +617,53 @@ int llm_context_get_size(llm_type_t type, cloud_provider_t provider, const char
          size = LLM_CONTEXT_DEFAULT_GEMINI;
       }
    } else if (provider == CLOUD_PROVIDER_OPENROUTER) {
-      /* OpenRouter IDs are "vendor/model".  Best-effort: strip the vendor prefix
-       * and probe the known tables (many OpenRouter models map to names we know).
-       * Otherwise use a conservative default.  Phase 2 will supply the exact
-       * context_length from the /api/v1/models catalog. */
-      const char *bare = model ? strrchr(model, '/') : NULL;
-      bare = bare ? bare + 1 : model;
-      size = lookup_model_context(s_openai_models, bare);
-      if (size == 0) {
-         size = lookup_model_context(s_claude_models, bare);
-      }
-      if (size == 0) {
-         size = lookup_model_context(s_gemini_models, bare);
+      /* OpenRouter IDs are "vendor/model".  PRIMARY: look the exact slug up in the
+       * fetched /api/v1/models catalog (querying/refreshing it on a TTL).  This is
+       * the only source that's correct for OpenRouter-only vendors (mistralai/
+       * deepseek/qwen/meta-llama) and any slug that doesn't prefix-match our direct
+       * tables. */
+      pthread_mutex_lock(&s_state.mutex);
+
+      time_t now = time(NULL);
+      bool ttl_expired = s_state.or_queried &&
+                         (now - s_state.or_queried_at) >= LLM_CONTEXT_OPENROUTER_TTL;
+      bool need_query = !s_state.or_queried || ttl_expired;
+
+      /* Single-flight guard: only one thread fetches the (large) catalog at a time. */
+      if (need_query && !s_state.or_querying) {
+         s_state.or_querying = true;
+         pthread_mutex_unlock(&s_state.mutex);
+
+         /* Network I/O without the mutex held; the fetch swaps results in under it. */
+         (void)llm_context_query_openrouter_models();
+
+         pthread_mutex_lock(&s_state.mutex);
+         s_state.or_querying = false;
+         /* Mark queried regardless of outcome so a hard failure doesn't stampede
+          * every turn; the TTL gates the next retry. */
+         s_state.or_queried = true;
+         s_state.or_queried_at = now;
       }
+
+      size = openrouter_lookup_context(model);
+      pthread_mutex_unlock(&s_state.mutex);
+
+      /* FALLBACK (offline / cache miss / pre-fetch): strip the vendor prefix and
+       * probe the known direct tables, then a conservative default.  Preserves the
+       * old behavior so there's no regression when the catalog is unavailable. */
       if (size == 0) {
-         size = LLM_CONTEXT_DEFAULT_OPENAI; /* conservative 128K default */
+         const char *bare = model ? strrchr(model, '/') : NULL;
+         bare = bare ? bare + 1 : model;
+         size = lookup_model_context(s_openai_models, bare);
+         if (size == 0) {
+            size = lookup_model_context(s_claude_models, bare);
+         }
+         if (size == 0) {
+            size = lookup_model_context(s_gemini_models, bare);
+         }
+         if (size == 0) {
+            size = LLM_CONTEXT_DEFAULT_OPENAI; /* conservative 128K default */
+         }
       }
    } else {
       size = LLM_CONTEXT_DEFAULT_OPENAI; /* Fallback for unknown providers */
@@ -1407,6 +1615,27 @@ int llm_context_compact(uint32_t session_id,
       summary_node_create(&node, &node_id);
    }
 
+   /* Persist the compaction watermark on the same conversation (v67 — replaces
+    * fork-on-compaction).  Reload bounds context to messages after the watermark
+    * + the summary, so no archive / no continuation row is needed.  Skip when
+    * last_msg_id is unresolved (e.g. voice path with no command-context user) —
+    * never write 0; the monotonic guard would reject it anyway. */
+   if (conv_id > 0 && last_msg_id > 0) {
+      int wm_user_id = 0;
+      session_t *wm_session = session_get_command_context();
+      if (wm_session) {
+         wm_user_id = wm_session->metrics.user_id;
+      }
+      if (wm_user_id > 0) {
+         if (conv_db_set_compaction_watermark(conv_id, wm_user_id, summary, last_msg_id) !=
+             AUTH_DB_SUCCESS) {
+            OLOG_WARNING("llm_context: failed to persist compaction watermark for conv %lld; "
+                         "next reload will load full history",
+                         (long long)conv_id);
+         }
+      }
+   }
+
    /* Add summary as assistant message with dynamic buffer */
    struct json_object *summary_msg = json_object_new_object();
    json_object_object_add(summary_msg, "role", json_object_new_string("assistant"));
diff --git a/src/memory/memory_history_loader.c b/src/memory/memory_history_loader.c
index e3bc4b9f..5d232314 100644
--- a/src/memory/memory_history_loader.c
+++ b/src/memory/memory_history_loader.c
@@ -106,7 +106,37 @@ struct json_object *memory_history_load_from_db(int64_t conv_id,
       return NULL;
    }
 
-   int rc = conv_db_get_messages(conv_id, user_id, append_message_to_history, &ctx);
+   /* v67: if the conversation carries a compaction watermark, bound the reload to
+    * post-watermark messages and prepend the summary — mirrors the WebUI restore
+    * funnel (webui_restore_conversation_context) so any loader, including the
+    * messaging forever-conversation path, stays context-bounded.  watermark == 0
+    * (never compacted) keeps the original full-history behavior. */
+   int64_t watermark = 0;
+   conversation_t conv = { 0 };
+   if (conv_db_get(conv_id, user_id, &conv) == AUTH_DB_SUCCESS) {
+      watermark = conv.context_watermark_msg_id;
+      if (watermark > 0 && conv.compaction_summary && conv.compaction_summary[0]) {
+         struct json_object *summary_msg = json_object_new_object();
+         if (summary_msg) {
+            char note[CONV_SUMMARY_MAX];
+            /* Same reconstructed [COMPACTED ...] marker as the WebUI restore path, so a
+             * reloaded messaging session also keeps a context_expand handle.  ASSISTANT
+             * role (not system): the per-turn two-system-message rebuild drops extra
+             * system messages — matches the live compaction marker so it survives. */
+            conv_db_format_compaction_context(conv_id, conv.compaction_summary, note, sizeof(note));
+            json_object_object_add(summary_msg, "role", json_object_new_string("assistant"));
+            json_object_object_add(summary_msg, "content", json_object_new_string(note));
+            json_object_array_add(ctx.array, summary_msg);
+            ctx.total_text_len += strlen(note);
+         }
+      }
+   }
+   conv_free(&conv);
+
+   int rc = (watermark > 0)
+                ? conv_db_get_messages_after(conv_id, user_id, watermark, append_message_to_history,
+                                             &ctx)
+                : conv_db_get_messages(conv_id, user_id, append_message_to_history, &ctx);
    if (rc != AUTH_DB_SUCCESS) {
       json_object_put(ctx.array);
       return NULL;
diff --git a/src/tools/code_graph_provider_cbm.c b/src/tools/code_graph_provider_cbm.c
index 3452241f..5c69522f 100644
--- a/src/tools/code_graph_provider_cbm.c
+++ b/src/tools/code_graph_provider_cbm.c
@@ -107,7 +107,12 @@ static int cbm_delete_project(const char *graph_name) {
 }
 
 static int cbm_is_available(void) {
-   return mcp_bridge_server_connected(CBM_ALIAS);
+   /* Active check: if cbm wasn't ready when DAWN started (e.g. mcp-proxy still
+    * spawning its stdio child at boot), reconnect now rather than reporting it
+    * permanently absent. By the time a user imports/indexes a repo, the server
+    * is invariably warm — this turns the boot-race into a transparent reconnect
+    * instead of a "no code server connected" error that needs a daemon restart. */
+   return mcp_bridge_ensure_connected(CBM_ALIAS);
 }
 
 const code_graph_provider_t code_graph_provider_cbm = {
diff --git a/src/tools/code_project_service.c b/src/tools/code_project_service.c
index dcf9f277..207f9455 100644
--- a/src/tools/code_project_service.c
+++ b/src/tools/code_project_service.c
@@ -591,6 +591,14 @@ int code_project_import(int64_t requester_user_id,
    if (cfg == NULL || !cfg->code_projects.enabled) {
       return FAILURE;
    }
+   /* An import with no owning user (operator/CLI path, requester 0) must be
+    * global. Otherwise the row is written user_id=NULL/is_global=0 — owned by no
+    * one and shared with no one — and is invisible to every code_project_db_list_visible()
+    * caller (the LLM tool, and every non-admin user); only admins see it via the
+    * WebUI's list_all fallback. "Ownerless" therefore means "shared". */
+   if (requester_user_id <= 0) {
+      global = true;
+   }
    if (!valid_name(desired_name)) {
       OLOG_WARNING("code_project: invalid project name");
       return FAILURE;
diff --git a/src/tools/document_grep_tool.c b/src/tools/document_grep_tool.c
index ef762e72..3617f1df 100644
--- a/src/tools/document_grep_tool.c
+++ b/src/tools/document_grep_tool.c
@@ -113,10 +113,12 @@ static const tool_metadata_t doc_grep_metadata = {
    .name = "document_grep",
    .device_string = "document grep",
    .description = "Find an EXACT string in the user's saved documents and notes and return each "
-                  "match with its neighboring chunks. Use this instead of document_search when "
-                  "you need literal/exact matches — IDs, codes, exact field values, or a specific "
-                  "phrase — especially in structured data (YAML/CSV/logs) where a record spans "
-                  "chunks. Deterministic: no ranking, no embeddings. Paginated via offset.",
+                  "match with its neighboring chunks. A TARGETED follow-up: use it when you "
+                  "already know you need a literal/exact match in a document — IDs, codes, exact "
+                  "field values, or a specific phrase — especially in structured data "
+                  "(YAML/CSV/logs) where a record spans chunks. For a BROAD 'what do we know "
+                  "about X' question, call 'recall' FIRST; use grep to drill in for an exact "
+                  "string. Deterministic: no ranking, no embeddings. Paginated via offset.",
    .params = doc_grep_params,
    .param_count = 4,
    .device_type = TOOL_DEVICE_TYPE_GETTER,
diff --git a/src/tools/document_search.c b/src/tools/document_search.c
index 1d596861..3c819a50 100644
--- a/src/tools/document_search.c
+++ b/src/tools/document_search.c
@@ -80,13 +80,15 @@ static const treg_param_t doc_search_params[] = {
 static const tool_metadata_t doc_search_metadata = {
    .name = "document_search",
    .device_string = "document search",
-   .description = "Search the user's saved documents and notes (hybrid keyword + semantic). "
-                  "Use this for content they've uploaded (PDFs, manuals) OR authored reference "
-                  "text they filed under a label — a bio, an elevator pitch, an address, a saved "
-                  "note. Results rank EXACT label matches first, so to pull back a specific saved "
-                  "item ask for its label (e.g. 'public bio'). For the verbatim full text of a "
-                  "known note, prefer document_read with its exact label. Returns excerpts with "
-                  "source citations. Do NOT use this for general web searches.",
+   .description = "Search ONLY the user's saved documents and notes (hybrid keyword + semantic). "
+                  "This is a TARGETED follow-up: use it when you already know the answer lives in "
+                  "an uploaded file or filed note. For a BROAD 'what do we know about X' question "
+                  "that could also touch memory or the calendar, call 'recall' FIRST (it spans all "
+                  "sources) and use this only to drill into documents specifically. "
+                  "Results rank EXACT label matches first, so to pull back a specific saved item "
+                  "ask for its label (e.g. 'public bio'). For the verbatim full text of a known "
+                  "note, prefer document_read with its exact label. Returns excerpts with source "
+                  "citations. Do NOT use this for general web searches.",
    .params = doc_search_params,
    .param_count = 1,
    .device_type = TOOL_DEVICE_TYPE_GETTER,
diff --git a/src/tools/mcp_bridge_schema.c b/src/tools/mcp_bridge_schema.c
index 63cf9042..d0e0e5e9 100644
--- a/src/tools/mcp_bridge_schema.c
+++ b/src/tools/mcp_bridge_schema.c
@@ -30,8 +30,11 @@
 #include "logging.h"
 #include "tools/tool_registry.h"
 
-/* Property cap is the registry's documented per-tool parameter limit (12), NOT
- * the design's original 32: TOOL_PARAM_MAX is the binding contract here. */
+/* Property cap = the registry's documented per-tool parameter limit
+ * (TOOL_PARAM_MAX). It's a validation/hardening bound (reject an untrusted
+ * upstream that declares an absurd number of properties), not a storage size —
+ * nothing is arrayed by it. Kept tied to TOOL_PARAM_MAX so the bridge and the
+ * registry agree on one number; raise that define if real MCP tools need more. */
 #define MCP_SCHEMA_MAX_PROPERTIES TOOL_PARAM_MAX
 #define MCP_SCHEMA_MAX_ENUM TOOL_PARAM_ENUM_MAX
 #define MCP_SCHEMA_MAX_ONEOF_DEPTH 2
diff --git a/src/tools/mcp_bridge_tool.c b/src/tools/mcp_bridge_tool.c
index ad1713ff..ddd7f531 100644
--- a/src/tools/mcp_bridge_tool.c
+++ b/src/tools/mcp_bridge_tool.c
@@ -86,7 +86,14 @@ static mcp_slot_t s_slots[MCP_BRIDGE_MAX_SLOTS];
 static int s_slot_count;
 static pthread_mutex_t s_slots_mutex = PTHREAD_MUTEX_INITIALIZER;
 
-/* One connected upstream server; owns the client for the bridge's lifetime. */
+/* One configured upstream server; owns the client for the bridge's lifetime.
+ * The slot is kept even when the startup connect fails so the server can be
+ * reconnected lazily on first use (e.g. a proxy still spawning its child at
+ * boot). NOTE: a server's LLM-facing tools are registered only at startup, while
+ * the tool registry is still unlocked (it locks after tools_register_all). A
+ * lazy reconnect therefore restores the *connection* — enough for direct callers
+ * like the code-graph provider — but cannot add tools to the locked registry;
+ * those appear on the next daemon restart. */
 typedef struct {
    char alias[MCP_SERVER_ALIAS_MAX];
    mcp_client_t *client;
@@ -309,7 +316,13 @@ int mcp_bridge_register_tool(mcp_client_t *client,
    snprintf(slot->upstream_tool_name, sizeof(slot->upstream_tool_name), "%s", upstream_tool_name);
    snprintf(slot->dawn_tool_name, sizeof(slot->dawn_tool_name), "%s", dawn_tool_name);
    slot->description = mcp_schema_wrap_description(server_alias, description);
-   slot->params = *params; /* move ownership */
+   /* Move ownership: null the source set immediately (not just on success) so the
+    * failure path below frees it exactly once. Otherwise both the slot and the
+    * caller free the same array -> double free (only reachable when the registry
+    * rejects the insert, e.g. locked/full/duplicate). */
+   slot->params = *params;
+   params->params = NULL;
+   params->param_count = 0;
 
    tool_metadata_t meta;
    memset(&meta, 0, sizeof(meta));
@@ -343,10 +356,6 @@ int mcp_bridge_register_tool(mcp_client_t *client,
    s_slot_count++;
    pthread_mutex_unlock(&s_slots_mutex);
 
-   /* Caller's set was moved into the slot; clear it so their free is a no-op. */
-   params->params = NULL;
-   params->param_count = 0;
-
    OLOG_INFO("MCP bridge: registered tool '%s' (server '%s'%s)", dawn_tool_name, server_alias,
              dangerous ? ", dangerous" : "");
    return SUCCESS;
@@ -485,35 +494,46 @@ int mcp_bridge_init(void) {
          OLOG_WARNING("MCP bridge: failed to create client for server '%s'", srv->alias);
          continue;
       }
-      if (mcp_client_connect(client) != SUCCESS) {
-         OLOG_WARNING("MCP bridge: could not connect to server '%s' (%s); skipping", srv->alias,
-                      srv->url);
-         mcp_client_destroy(client);
-         continue;
-      }
 
-      snprintf(s_servers[s_server_count].alias, sizeof(s_servers[s_server_count].alias), "%s",
-               srv->alias);
-      s_servers[s_server_count].client = client;
-      s_servers[s_server_count].in_use = true;
+      /* Register the slot up front, regardless of the connect outcome: a server
+       * that isn't ready at startup (e.g. mcp-proxy still spawning its stdio
+       * child) must remain reconnectable. Destroying it here would lose it until
+       * a full daemon restart and make `dawn-admin mcp reset` a no-op for it. */
+      int idx = s_server_count;
+      snprintf(s_servers[idx].alias, sizeof(s_servers[idx].alias), "%s", srv->alias);
+      s_servers[idx].client = client;
+      s_servers[idx].in_use = true;
       s_server_count++;
 
-      register_server_tools(srv->alias, client);
+      /* Register tools only here, at startup, while the registry is still
+       * unlocked. A later lazy reconnect cannot add tools (see mcp_server_entry_t
+       * note), so this is the one chance to expose this server's LLM tools. */
+      if (mcp_client_connect(client) == SUCCESS) {
+         register_server_tools(srv->alias, client);
+      } else {
+         OLOG_WARNING("MCP bridge: server '%s' (%s) not ready at startup; will connect on "
+                      "first use",
+                      srv->alias, srv->url);
+      }
       /* Admin access bootstrap (auth_db_mcp_grant_all_admins) intentionally does
        * NOT run here: mcp_bridge_init() executes during tools_register_all(),
        * before auth_db_init(), so the grant would hit a closed DB. dawn.c runs it
        * after auth_db_init() for every configured server instead. */
    }
 
-   OLOG_INFO("MCP bridge: initialized with %d connected server(s)", s_server_count);
-
-#ifdef DAWN_ENABLE_CODE_PROJECTS
-   /* Capture cbm's path-derived graph-name prefix now that the client is
-    * connected, so the name-translation boundary works on the first cbm tool
-    * call after a restart (projects already indexed). Refreshed post-index. */
-   code_project_namemap_capture();
-#endif
+   int connected = 0;
+   for (int i = 0; i < s_server_count; i++) {
+      if (s_servers[i].in_use && mcp_client_state(s_servers[i].client) == MCP_STATE_CONNECTED) {
+         connected++;
+      }
+   }
+   OLOG_INFO("MCP bridge: %d configured server(s), %d connected", s_server_count, connected);
 
+   /* The cbm name-translation map is NOT captured here: mcp_bridge_init() runs
+    * during tools_register_all(), before auth_db_init(), so code_project_db is
+    * not open yet and the capture would silently fail. dawn.c captures it after
+    * auth_db_init() instead; a later lazy reconnect refreshes it (see
+    * mcp_bridge_ensure_connected). */
    return SUCCESS;
 }
 
@@ -593,7 +613,9 @@ int mcp_bridge_status_text(char *out, size_t out_len, int *bytes_written_out) {
 int mcp_bridge_reconnect(int *connected_out) {
    /* Snapshot the client pointers under the lock so a concurrent shutdown or
     * call_tool can't tear the table out from under us (sec-S4), then run the
-    * blocking reset/connect without holding the lock. */
+    * blocking reset/connect without holding the lock. Restores connectivity for
+    * servers that were down at startup; it does not (re)register tools — the
+    * registry is locked post-init (see mcp_server_entry_t note). */
    mcp_client_t *clients[MCP_SERVERS_MAX];
    int n = 0;
    pthread_mutex_lock(&s_slots_mutex);
@@ -617,6 +639,51 @@ int mcp_bridge_reconnect(int *connected_out) {
    return SUCCESS;
 }
 
+int mcp_bridge_ensure_connected(const char *server_alias) {
+   if (server_alias == NULL) {
+      return FAILURE;
+   }
+   if (mcp_bridge_server_connected(server_alias) == SUCCESS) {
+      return SUCCESS; /* fast path: already connected, no blocking work */
+   }
+
+   /* Disconnected (or never connected at startup). Find the slot, then run the
+    * blocking connect without holding the slot lock. mcp_client_connect() is
+    * internally serialized, so a concurrent caller racing the same server is
+    * safe — the loser observes CONNECTED and returns immediately. Connectivity
+    * is all that's restored here; tool registration happened (or didn't) at
+    * startup and cannot be redone against the locked registry. */
+   mcp_client_t *client = NULL;
+   pthread_mutex_lock(&s_slots_mutex);
+   for (int i = 0; i < s_server_count; i++) {
+      if (s_servers[i].in_use && strcmp(s_servers[i].alias, server_alias) == 0) {
+         client = s_servers[i].client;
+         break;
+      }
+   }
+   pthread_mutex_unlock(&s_slots_mutex);
+   if (client == NULL) {
+      return FAILURE; /* not a configured server */
+   }
+   if (mcp_client_connect(client) != SUCCESS) {
+      return FAILURE;
+   }
+
+#ifdef DAWN_ENABLE_CODE_PROJECTS
+   /* A cbm that comes up via lazy reconnect (down at boot) needs its
+    * name-translation map rebuilt — the dawn.c startup capture only fired once,
+    * when cbm wasn't up. Without this the map stays empty and the LLM must use
+    * raw cbm graph slugs. Only the slow path reaches here (the fast path above
+    * returns on an already-connected server), so this runs once per reconnect,
+    * not per call; the nested call_tool inside capture hits the fast path, so no
+    * recursion. */
+   if (strcmp(server_alias, "cbm") == 0) {
+      code_project_namemap_capture();
+   }
+#endif
+   return SUCCESS;
+}
+
 int mcp_bridge_server_connected(const char *server_alias) {
    if (server_alias == NULL) {
       return FAILURE;
@@ -647,6 +714,15 @@ int mcp_bridge_call_tool(const char *server_alias,
       return FAILURE;
    }
 
+   /* Self-heal: reconnect a server that wasn't ready at startup before the call
+    * (mcp_client_call also lazily reconnects; doing it here surfaces a genuinely
+    * down server as the clean "no connected server" failure below). The return
+    * is intentionally not checked — a failed reconnect leaves the slot's client
+    * NULL, which the lookup below catches and reports. Note: this restores the
+    * connection only; it does NOT register the server's tools (the registry is
+    * locked post-init — see mcp_bridge_ensure_connected). */
+   mcp_bridge_ensure_connected(server_alias);
+
    mcp_client_t *client = NULL;
    pthread_mutex_lock(&s_slots_mutex);
    for (int i = 0; i < s_server_count; i++) {
diff --git a/src/tools/mcp_client.c b/src/tools/mcp_client.c
index 0e176adf..edafb9f6 100644
--- a/src/tools/mcp_client.c
+++ b/src/tools/mcp_client.c
@@ -49,6 +49,16 @@
 #define MCP_DEFAULT_TIMEOUT_MS 30000
 #define MCP_DEFAULT_CONNECT_WAIT_MS 10000
 
+/* Handshake tools/list retry: a proxy (e.g. mcp-proxy fronting a stdio child)
+ * can answer `initialize` before its upstream child is ready to serve
+ * `tools/list`, returning a transient JSON-RPC error (observed: -32602) during
+ * the cold-start window. Retry the read-only tools/list a few times before
+ * failing the handshake, so a server that comes up moments after DAWN connects
+ * still completes on the first attempt rather than being deferred to a lazy
+ * reconnect. */
+#define MCP_TOOLS_LIST_RETRIES 3
+#define MCP_TOOLS_LIST_RETRY_DELAY_MS 300
+
 /** One in-flight request awaiting a response. Address is stable until removed. */
 typedef struct {
    uint64_t id;
@@ -436,6 +446,12 @@ static int do_request(mcp_client_t *c,
       *result_out = result;
       result = NULL;
    }
+   /* Surface the JSON-RPC error body the RPC-error path otherwise discards, so a
+    * failed call shows the server's real message + code instead of a bare error
+    * number. Keeps handshake and tool-call failures diagnosable from the log. */
+   if (status == MCP_ERR_RPC && result != NULL) {
+      OLOG_WARNING("MCP %s: RPC error for '%s': %s", c->name, method, result);
+   }
    free(result);
    pending_destroy(p);
    return status;
@@ -483,7 +499,10 @@ static int client_handshake(mcp_client_t *c) {
    char *init_res = NULL;
    int rc = do_request(c, "initialize", init_params, c->request_timeout_ms, NULL, NULL, &init_res);
    if (rc != SUCCESS) {
-      OLOG_ERROR("MCP %s: initialize failed (%d)", c->name, rc);
+      /* WARNING not ERROR: the bridge treats a failed handshake as skip-and-continue
+       * (an optional MCP server may simply not be up yet), and re-reports the
+       * user-facing outcome at WARN.  Don't throw red for an expected-absent server. */
+      OLOG_WARNING("MCP %s: initialize failed (%d)", c->name, rc);
       free(init_res);
       return FAILURE;
    }
@@ -495,10 +514,26 @@ static int client_handshake(mcp_client_t *c) {
 
    send_notification(c, "notifications/initialized", NULL);
 
+   /* Retry tools/list across the proxy cold-start window (see the retry-constant
+    * comment above). Only RPC errors and timeouts are retried — those are the
+    * "server up but not ready yet" signatures; a dropped/disabled transport is
+    * not retryable here and falls through to fail the handshake. */
    char *tools = NULL;
    rc = do_request(c, "tools/list", NULL, c->request_timeout_ms, NULL, NULL, &tools);
+   for (int attempt = 1;
+        attempt <= MCP_TOOLS_LIST_RETRIES && (rc == MCP_ERR_RPC || rc == MCP_ERR_TIMEOUT);
+        attempt++) {
+      OLOG_INFO("MCP %s: tools/list not ready (%d), retry %d/%d", c->name, rc, attempt,
+                MCP_TOOLS_LIST_RETRIES);
+      free(tools);
+      tools = NULL;
+      struct timespec delay = { .tv_sec = MCP_TOOLS_LIST_RETRY_DELAY_MS / 1000,
+                                .tv_nsec = (MCP_TOOLS_LIST_RETRY_DELAY_MS % 1000) * 1000000L };
+      nanosleep(&delay, NULL);
+      rc = do_request(c, "tools/list", NULL, c->request_timeout_ms, NULL, NULL, &tools);
+   }
    if (rc != SUCCESS) {
-      OLOG_ERROR("MCP %s: tools/list failed (%d)", c->name, rc);
+      OLOG_WARNING("MCP %s: tools/list failed (%d)", c->name, rc);
       free(tools);
       return FAILURE;
    }
diff --git a/src/tools/mcp_transport_http_sse.c b/src/tools/mcp_transport_http_sse.c
index b9c7b04b..573b9bdb 100644
--- a/src/tools/mcp_transport_http_sse.c
+++ b/src/tools/mcp_transport_http_sse.c
@@ -181,7 +181,10 @@ static void sse_event(const char *event_type, const char *event_data, void *user
       pthread_mutex_unlock(&t->ep_mtx);
 
       if (ep != NULL) {
-         OLOG_INFO("MCP transport: endpoint ready");
+         /* Log the resolved endpoint (incl. session_id): a second endpoint event
+          * with a different session mid-handshake would repoint POSTs at a fresh,
+          * un-initialized session — worth being able to see in the log. */
+         OLOG_INFO("MCP transport: endpoint ready -> %s", ep);
          if (t->on_state != NULL) {
             t->on_state(t->user, MCP_TRANSPORT_CONNECTED);
          }
diff --git a/src/tools/memory_tool.c b/src/tools/memory_tool.c
index 83fc250f..bf169b65 100644
--- a/src/tools/memory_tool.c
+++ b/src/tools/memory_tool.c
@@ -247,8 +247,11 @@ static const tool_metadata_t memory_metadata = {
    .name = "memory",
    .device_string = "memory",
    .topic = "dawn",
-   .aliases = { "remember", "recall" },
-   .alias_count = 2,
+   /* 'recall' was an alias here; it is now the dedicated unified cross-source
+    * recall tool (recall_tool.c).  Registry rejects duplicate names/aliases,
+    * so the alias was removed.  'remember' stays. */
+   .aliases = { "remember" },
+   .alias_count = 1,
 
    .description = "Store and retrieve persistent memories about the user. "
                   "Use 'remember' to store facts (preferences, information shared by user). "
@@ -271,8 +274,11 @@ static const tool_metadata_t memory_metadata = {
                   "address, a saved answer — use document_manage (save_note / edit) instead, NOT "
                   "remember. "
                   "Call 'remember' directly; it cannot be used inside execute_plan. "
-                  "Use 'search' to find relevant stored memories (optionally filtered by "
-                  "time_range like '24h', '7d', '2w'). "
+                  "Use 'search' for a TARGETED lookup of stored memory facts (optionally filtered "
+                  "by time_range like '24h', '7d', '2w'). For a BROAD 'what do we know about X' / "
+                  "'how do things stand' question, call the 'recall' tool FIRST instead — it spans "
+                  "memory, notes, documents, and the calendar in one pass; reach for memory "
+                  "'search' when you specifically need memory facts and recall isn't warranted. "
                   "Use 'forget' to remove memories by numeric ID (you MUST use 'search', 'recent', "
                   "or 'find_duplicates' first to find the ID); pass a comma-separated list for "
                   "several. 'forget' has TWO modes — pick deliberately: (1) DELETE (default, "
diff --git a/src/tools/recall_format.c b/src/tools/recall_format.c
new file mode 100644
index 00000000..f23530d6
--- /dev/null
+++ b/src/tools/recall_format.c
@@ -0,0 +1,271 @@
+/*
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * By contributing to this project, you agree to license your contributions
+ * under the GPLv3 (or any later version) or any future licenses chosen by
+ * the project author(s).
+ *
+ * Recall tool result formatter — see recall_format.h.
+ */
+
+#include "tools/recall_format.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "core/strbuf.h"
+#include "dawn_error.h"
+
+/* Per-line one-liner cap — keeps a single candidate from dominating the budget
+ * the engine already bounded; the read-pointer tells the LLM where the full
+ * text lives. */
+#define RECALL_LINE_TEXT_MAX 240
+
+/* Stack buffer for a parsed document/note label.  DOC_FILENAME_MAX is 256 in
+ * document_db.h; 300 leaves margin without coupling this file to that header. */
+#define RECALL_FNAME_MAX 300
+
+/* Source families, in render order. */
+typedef enum {
+   FAM_MEMORY = 0, /* memory_fact / memory_entity / memory_relation */
+   FAM_SUMMARY,    /* memory_summary */
+   FAM_DOC,        /* document_chunk (notes + documents) */
+   FAM_CALENDAR,   /* calendar_event */
+   FAM_OTHER,      /* anything else / future sources */
+   FAM_COUNT
+} recall_family_t;
+
+static const char *const k_family_title[FAM_COUNT] = {
+   "MEMORY — facts & relationships",
+   "MEMORY — past conversation summaries",
+   "NOTES & DOCUMENTS",
+   "CALENDAR",
+   "OTHER",
+};
+
+static recall_family_t family_of(const char *source_id) {
+   if (source_id == NULL)
+      return FAM_OTHER;
+   if (strcmp(source_id, "memory_fact") == 0 || strcmp(source_id, "memory_entity") == 0 ||
+       strcmp(source_id, "memory_relation") == 0)
+      return FAM_MEMORY;
+   if (strcmp(source_id, "memory_summary") == 0)
+      return FAM_SUMMARY;
+   if (strcmp(source_id, "document_chunk") == 0)
+      return FAM_DOC;
+   if (strcmp(source_id, "calendar_event") == 0)
+      return FAM_CALENDAR;
+   return FAM_OTHER;
+}
+
+/* item_id is "<prefix>:<numeric id>" (focus_candidate_format_item_id).  Return a
+ * pointer to the id substring, or NULL if no ':' present. */
+static const char *id_after_colon(const char *item_id) {
+   if (item_id == NULL)
+      return NULL;
+   const char *c = strchr(item_id, ':');
+   return (c && c[1] != '\0') ? c + 1 : NULL;
+}
+
+/* Document/note candidate text is rendered "[<filename>] <chunk text>".  Copy
+ * the bracketed label into buf; return true if found. */
+static bool filename_from_text(const char *text, char *buf, size_t buflen) {
+   if (text == NULL || text[0] != '[')
+      return false;
+   const char *close = strchr(text, ']');
+   if (close == NULL || close == text + 1)
+      return false;
+   size_t n = (size_t)(close - (text + 1));
+   if (n >= buflen)
+      n = buflen - 1;
+   memcpy(buf, text + 1, n);
+   buf[n] = '\0';
+   return true;
+}
+
+/* Append `text` as a single line: collapse any CR/LF/tab to spaces and cap at
+ * RECALL_LINE_TEXT_MAX bytes (with an ellipsis when truncated). */
+static void append_oneline(strbuf_t *sb, const char *text) {
+   if (text == NULL) {
+      (void)strbuf_append(sb, "(no text)");
+      return;
+   }
+   char line[RECALL_LINE_TEXT_MAX + 4];
+   size_t w = 0;
+   bool prev_space = false;
+   const char *p = text;
+   for (; *p && w < RECALL_LINE_TEXT_MAX; p++) {
+      char c = *p;
+      if (c == '\n' || c == '\r' || c == '\t')
+         c = ' ';
+      if (c == ' ') {
+         if (prev_space)
+            continue; /* collapse runs of whitespace */
+         prev_space = true;
+      } else {
+         prev_space = false;
+      }
+      line[w++] = c;
+   }
+   line[w] = '\0';
+   (void)strbuf_append(sb, line);
+   /* `*p` non-NUL ⇒ the loop stopped at the cap, not the terminator: truncated.
+    * (Tracking the loop exit avoids a full strlen rescan and is also correct
+    * under whitespace collapse, which a raw strlen>cap test is not.) */
+   if (*p)
+      (void)strbuf_append(sb, "...");
+}
+
+static bool is_injected(const char *item_id, const char *const *injected_ids, int n) {
+   if (item_id == NULL || injected_ids == NULL)
+      return false;
+   for (int i = 0; i < n; i++) {
+      if (injected_ids[i] && strcmp(injected_ids[i], item_id) == 0)
+         return true;
+   }
+   return false;
+}
+
+/* Emit one candidate's bullet line (without leading marker), including its
+ * source-appropriate read-pointer. */
+static void append_candidate_line(strbuf_t *sb, const focus_candidate_t *c, recall_family_t fam) {
+   append_oneline(sb, c->text);
+   switch (fam) {
+      case FAM_MEMORY:
+         /* Only facts carry a directly-fetchable id; entities/relations are
+          * self-describing context, no precise fetch verb. */
+         if (c->source_id && strcmp(c->source_id, "memory_fact") == 0) {
+            const char *id = id_after_colon(c->item_id);
+            if (id)
+               (void)strbuf_appendf(sb, "   [memory id %s]", id);
+         }
+         break;
+      case FAM_SUMMARY:
+         /* No fetch pointer: summary item_ids are `summary:<id>`, a different
+          * id space than facts, and `memory get` resolves only fact ids — a
+          * `[memory id N]` here would dead-end.  Summaries are self-describing
+          * narrative context, like entities/relations above. */
+         break;
+      case FAM_DOC: {
+         char fname[RECALL_FNAME_MAX];
+         if (filename_from_text(c->text, fname, sizeof(fname)))
+            (void)strbuf_appendf(sb, "   -> document_read \"%s\"", fname);
+         break;
+      }
+      case FAM_CALENDAR:
+         (void)strbuf_append(sb, "   -> calendar (query by date/title)");
+         break;
+      default:
+         break;
+   }
+}
+
+char *recall_format_result(const char *query,
+                           const focus_compose_result_t *result,
+                           const char *const *injected_ids,
+                           int injected_count) {
+   const int n = (result != NULL) ? result->candidate_count : 0;
+
+   /* Zero-result: be explicit so the LLM doesn't hallucinate coverage. */
+   if (n <= 0) {
+      strbuf_t z;
+      strbuf_init(&z, 160);
+      (void)strbuf_appendf(&z,
+                           "I have nothing on file about \"%s\" in memory, notes, documents, "
+                           "or the calendar.",
+                           query ? query : "");
+      char *out = strbuf_steal(&z);
+      strbuf_free(&z);
+      return out ? out : strdup("(recall: nothing found)");
+   }
+
+   strbuf_t sb;
+   strbuf_init(&sb, 2048);
+   (void)strbuf_appendf(&sb, "What I know about \"%s\":\n", query ? query : "");
+
+   int per_family[FAM_COUNT] = { 0 };
+   int injected_shown = 0;
+
+   for (int fam = 0; fam < FAM_COUNT; fam++) {
+      /* First pass: count this family so we can print a header with a count. */
+      int count = 0;
+      for (int i = 0; i < n; i++) {
+         if (family_of(result->candidates[i].source_id) == (recall_family_t)fam)
+            count++;
+      }
+      per_family[fam] = count;
+      if (count == 0)
+         continue;
+
+      (void)strbuf_appendf(&sb, "\n%s (%d)\n", k_family_title[fam], count);
+      for (int i = 0; i < n; i++) {
+         const focus_candidate_t *c = &result->candidates[i];
+         if (family_of(c->source_id) != (recall_family_t)fam)
+            continue;
+         bool dup = is_injected(c->item_id, injected_ids, injected_count);
+         (void)strbuf_append(&sb, dup ? "  · " : "  • ");
+         append_candidate_line(&sb, c, (recall_family_t)fam);
+         if (dup) {
+            (void)strbuf_append(&sb, "   (already in current context)");
+            injected_shown++;
+         }
+         (void)strbuf_append(&sb, "\n");
+      }
+   }
+
+   /* Footer: preserve-specifics instruction (Phase 1.5 — counters the
+    * observed failure where the broad gather makes the model write a vague
+    * high-level summary that drops the specific facts it just retrieved),
+    * then how to get exact/full text, and which sources were empty. */
+   (void)strbuf_append(&sb,
+                       "\nWhen you answer from this, KEEP the specific facts, dates, counts, "
+                       "names, and statuses above — list them, don't flatten them into a vague "
+                       "summary. If the user asked \"how are we looking / where do things stand\", "
+                       "lead with the concrete items (e.g. \"4 of 8 tasks done\", exact dates), "
+                       "not just a percentage or a headline.\n");
+   (void)strbuf_append(&sb,
+                       "For exact/full text, follow a pointer: document_read \"<label>\" for a "
+                       "note/document, or memory get <id> for a fact.\n");
+
+   strbuf_t empties;
+   strbuf_init(&empties, 64);
+   int empty_n = 0;
+   for (int fam = 0; fam < FAM_OTHER; fam++) {
+      if (per_family[fam] == 0) {
+         (void)strbuf_appendf(&empties, "%s%s", empty_n ? ", " : "", k_family_title[fam]);
+         empty_n++;
+      }
+   }
+   if (empty_n > 0)
+      (void)strbuf_appendf(&sb, "Nothing found in: %s.\n", strbuf_str(&empties));
+   strbuf_free(&empties);
+
+   /* Dedup fallback note (design §4.2a): when the caller can't supply the
+    * turn's injected-id set, flag the likely overlap rather than silently
+    * re-stating context the LLM already has. */
+   if (injected_ids == NULL)
+      (void)strbuf_append(&sb,
+                          "(Some top items may already be in this turn's injected context.)\n");
+   else if (injected_shown > 0)
+      (void)strbuf_appendf(&sb, "(%d item(s) marked · were already in your current context.)\n",
+                           injected_shown);
+
+   if (strbuf_oom(&sb)) {
+      strbuf_free(&sb);
+      return strdup("recall: result too large to format.");
+   }
+   char *out = strbuf_steal(&sb);
+   strbuf_free(&sb);
+   return out ? out : strdup("recall: allocation failed.");
+}
diff --git a/src/tools/recall_tool.c b/src/tools/recall_tool.c
new file mode 100644
index 00000000..8714d7fd
--- /dev/null
+++ b/src/tools/recall_tool.c
@@ -0,0 +1,161 @@
+/*
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * By contributing to this project, you agree to license your contributions
+ * under the GPLv3 (or any later version) or any future licenses chosen by
+ * the project author(s).
+ *
+ * Unified cross-source recall tool — see recall_tool.h and
+ * docs/CROSS_TOOL_RECALL_DESIGN.md.
+ */
+
+#include "tools/recall_tool.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "config/dawn_config.h"
+#include "core/embedding_engine.h"
+#include "core/focus/focus_source.h"
+#include "dawn_error.h"
+#include "logging.h"
+#include "tools/recall_format.h"
+#include "tools/tool_registry.h"
+
+/* =============================================================================
+ * Forward declarations
+ * ============================================================================= */
+
+static char *recall_callback(const char *action, char *value, int *should_respond);
+static bool recall_is_available(void);
+
+/* =============================================================================
+ * Tool metadata
+ * ============================================================================= */
+
+static const treg_param_t recall_params[] = {
+   {
+       .name = "query",
+       .description = "What to gather context about — a topic, person, project, or status "
+                      "(a few words, e.g. 'Open Sauce 2026 prep' or 'wrist recovery').",
+       .type = TOOL_PARAM_TYPE_STRING,
+       .required = true,
+       .maps_to = TOOL_MAPS_TO_VALUE,
+   },
+};
+
+static const tool_metadata_t recall_metadata = {
+   .name = "recall",
+   .device_string = "recall",
+   .description =
+       "Your FIRST move for any \"what do we know / what's the status / how does X stand / tell me "
+       "about\" question answered from the user's own knowledge. Gathers EVERYTHING known about a "
+       "topic, person, project, or status in ONE call — searches memory (facts, summaries, "
+       "relationships), saved notes, uploaded documents, and the calendar together, ranked by "
+       "relevance (hybrid keyword + semantic). Prefer this over jumping to a single per-source "
+       "search, which misses cross-source context — UNLESS you already know the exact note label, "
+       "document, or memory id to fetch (then use document_read / memory get directly). "
+       "Returns results grouped by source, each with a pointer to where to read the full or exact "
+       "text. This is the high-level context tool; the per-source search tools are for targeted "
+       "follow-ups.",
+   .params = recall_params,
+   .param_count = 1,
+   .device_type = TOOL_DEVICE_TYPE_GETTER,
+   .capabilities = 0,
+   .is_getter = true,
+   .is_available = recall_is_available,
+   .callback = recall_callback,
+};
+
+/* =============================================================================
+ * Registration / availability
+ * ============================================================================= */
+
+int recall_tool_register(void) {
+   return tool_registry_register(&recall_metadata);
+}
+
+static bool recall_is_available(void) {
+   /* Most sources (documents, calendar, summary/entity adapters) need an
+    * embedding to rank; gate on the engine like document_search.  Note: this
+    * hides the tool entirely when the engine is DOWN.  The callback's
+    * keyword-only degrade (qptr=NULL) therefore covers only a transient
+    * per-query embed FAILURE, not the engine-unavailable case. */
+   return embedding_engine_available();
+}
+
+/* =============================================================================
+ * Callback
+ * ============================================================================= */
+
+static char *recall_callback(const char *action, char *value, int *should_respond) {
+   (void)action;
+   *should_respond = 1;
+
+   if (!value || value[0] == '\0')
+      return strdup("Error: recall needs a query (a topic, person, project, or status).");
+
+   const int user_id = tool_get_current_user_id();
+   const int dims = embedding_engine_dims();
+   if (dims <= 0)
+      return strdup("Error: embedding engine not initialized.");
+
+   float *qvec = malloc((size_t)dims * sizeof(float));
+   if (!qvec)
+      return strdup("Error: memory allocation failed.");
+
+   int out_dims = 0;
+   const float *qptr = qvec;
+   if (embedding_engine_embed(value, qvec, dims, &out_dims) != 0 || out_dims != dims) {
+      /* Graceful degrade: keyword-capable adapters still run; the framework
+       * skips embedding-only adapters (documents/calendar) when qptr == NULL. */
+      OLOG_WARNING("recall: query embedding failed — proceeding keyword-only");
+      qptr = NULL;
+   }
+
+   const recall_config_t *rc = &g_config.memory.recall;
+   const focus_limits_t limits = {
+      .top_k = rc->top_k,
+      .min_score = rc->min_score,
+      .budget_bytes = rc->budget_bytes,
+   };
+
+   focus_compose_result_t result;
+   memset(&result, 0, sizeof(result));
+   /* Match the per-turn focus path's include_private=false.  The flag is
+    * currently a no-op (every adapter ignores it — the "1f gap"), so this is
+    * behaviorally inert today; passing false means that if private-conversation
+    * filtering is ever implemented, recall inherits the SAFE default rather than
+    * surfacing private content into a possibly-shared session. */
+   const int compose_rc = focus_compose_ex(user_id, /*include_private=*/false, value, qptr,
+                                           (size_t)dims, time(NULL), rc->per_source_max, &limits,
+                                           &result);
+   free(qvec);
+
+   if (compose_rc != SUCCESS) {
+      focus_result_free(&result);
+      return strdup("Error: couldn't gather context (recall failed). Try a targeted memory or "
+                    "document search instead.");
+   }
+
+   OLOG_INFO("recall: user=%d query='%.40s' candidates=%d", user_id, value, result.candidate_count);
+
+   /* v1: the turn's injected-id set isn't wired through yet (design §4.2a
+    * fallback) — pass NULL so the formatter notes the likely overlap instead
+    * of silently re-stating already-injected context. */
+   char *out = recall_format_result(value, &result, NULL, 0);
+   focus_result_free(&result);
+   return out ? out : strdup("recall: failed to format result.");
+}
diff --git a/src/tools/tools_init.c b/src/tools/tools_init.c
index 809e5087..924431ab 100644
--- a/src/tools/tools_init.c
+++ b/src/tools/tools_init.c
@@ -123,6 +123,7 @@
 #endif
 
 #include "tools/plan_executor.h"
+#include "tools/recall_tool.h"
 
 /* ========== Registration ========== */
 
@@ -178,6 +179,13 @@ int tools_register_all(void) {
    }
 #endif
 
+   /* Unified cross-source recall — aggregates whatever focus adapters are
+    * registered (memory/notes/documents/calendar).  Registered unconditionally;
+    * recall_is_available() gates at runtime on the embedding engine. */
+   if (recall_tool_register() != 0) {
+      OLOG_WARNING("Failed to register recall tool");
+   }
+
 #ifdef DAWN_ENABLE_DATETIME_TOOL
    if (date_tool_register() != 0) {
       OLOG_WARNING("Failed to register date tool");
diff --git a/src/webui/webui_auth_helpers.c b/src/webui/webui_auth_helpers.c
index 6ace5970..fc3d5ec4 100644
--- a/src/webui/webui_auth_helpers.c
+++ b/src/webui/webui_auth_helpers.c
@@ -287,6 +287,22 @@ static const char k_tool_call_discipline_footer[] =
     "is the worst failure mode here — the user trusts the confirmation and finds out later "
     "that nothing happened.\n";
 
+/* Context-gathering routing nudge.  Lives in the stable prefix (cached, always
+ * emitted) per docs/CROSS_TOOL_RECALL_DESIGN.md §4.6.  Phase-0 baseline showed
+ * the model answers broad "what do we know / where do things stand" questions
+ * from a single (often wrong) source instead of fanning out; Phase-1 live test
+ * confirmed the tool-description demotion alone didn't lift `recall` invocation.
+ * This one-line steer is the reserved system-prompt lever that does. */
+static const char k_recall_routing_footer[] =
+    "\n\nCONTEXT GATHERING:\n"
+    "- When the user asks what is known / stored / remembered about a topic, person, project, or "
+    "situation, how something stands, or for a summary of context, call the 'recall' tool FIRST. "
+    "It gathers across memory, notes, documents, and the calendar in one pass and points you to "
+    "where the exact text lives.\n"
+    "- Go straight to a single per-source tool (document_read, document_search, document_grep, "
+    "memory search/get) only when you already know exactly which source and item holds the "
+    "answer.\n";
+
 /* Strip the TOOL DEFAULTS section from @p src into a fresh allocation.
  * `get_remote_command_prompt()` always emits TOOL DEFAULTS (location /
  * room / units / timezone fallback for unauthenticated callers).  For
@@ -384,13 +400,15 @@ static char *append_tool_discipline_footer(char *base) {
    if (base == NULL)
       return NULL;
    const size_t base_len = strlen(base);
-   const size_t footer_len = sizeof(k_tool_call_discipline_footer) - 1;
-   char *combined = malloc(base_len + footer_len + 1);
+   const size_t disc_len = sizeof(k_tool_call_discipline_footer) - 1;
+   const size_t recall_len = sizeof(k_recall_routing_footer) - 1;
+   char *combined = malloc(base_len + disc_len + recall_len + 1);
    if (combined == NULL)
       return base;
    memcpy(combined, base, base_len);
-   memcpy(combined + base_len, k_tool_call_discipline_footer, footer_len);
-   combined[base_len + footer_len] = '\0';
+   memcpy(combined + base_len, k_tool_call_discipline_footer, disc_len);
+   memcpy(combined + base_len + disc_len, k_recall_routing_footer, recall_len);
+   combined[base_len + disc_len + recall_len] = '\0';
    free(base);
    return combined;
 }
diff --git a/src/webui/webui_broadcasts.c b/src/webui/webui_broadcasts.c
index 51bb3798..36a9419d 100644
--- a/src/webui/webui_broadcasts.c
+++ b/src/webui/webui_broadcasts.c
@@ -639,7 +639,11 @@ static const char *capped_text_view(const char *text, char **owned, bool *out_ow
    const size_t n = strlen(text);
    if (n <= FOCUS_TEXT_MAX_BYTES)
       return text;
-   *owned = malloc(FOCUS_TEXT_MAX_BYTES + 1);
+   /* UTF-8-safe cut: a raw truncation at FOCUS_TEXT_MAX_BYTES can split a
+    * multi-byte character, making this payload's text frame invalid UTF-8
+    * (the browser then drops the WebSocket). Back the cut up to a char boundary. */
+   const size_t cap = focus_utf8_safe_cap(text, FOCUS_TEXT_MAX_BYTES);
+   *owned = malloc(cap + 1);
    if (*owned == NULL) {
       /* OOM — drop text content rather than fail the broadcast.  Log
        * the failure (security audit LOW): under sustained memory
@@ -650,8 +654,8 @@ static const char *capped_text_view(const char *text, char **owned, bool *out_ow
                    n);
       return "";
    }
-   memcpy(*owned, text, FOCUS_TEXT_MAX_BYTES);
-   (*owned)[FOCUS_TEXT_MAX_BYTES] = '\0';
+   memcpy(*owned, text, cap);
+   (*owned)[cap] = '\0';
    *out_owned = true;
    return *owned;
 }
diff --git a/src/webui/webui_history.c b/src/webui/webui_history.c
index 7ebd1ee4..42000555 100644
--- a/src/webui/webui_history.c
+++ b/src/webui/webui_history.c
@@ -506,6 +506,15 @@ void handle_continue_conversation(ws_connection_t *conn, struct json_object *pay
       return;
    }
 
+   /* DORMANT as of v67: compaction now records an in-conversation watermark instead
+    * of forking (see llm_context_compact + conv_db_set_compaction_watermark). The
+    * WebUI client no longer sends `continue_conversation` on compaction. This handler
+    * is retained only for backward-compat; if it ever fires, the (archiving) split
+    * path is still live — log it so we can confirm the client is the only caller
+    * before removing this code. */
+   OLOG_WARNING("WebUI: dormant continue_conversation handler invoked (v67 uses watermarks; "
+                "this still archives — investigate the caller)");
+
    json_object *response = json_object_new_object();
    json_object_object_add(response, "type",
                           json_object_new_string("continue_conversation_response"));
@@ -721,7 +730,13 @@ void handle_load_conversation(ws_connection_t *conn, struct json_object *payload
              * previous conversation's history for the LLM while the UI displayed
              * the newly-loaded one — messages then went to the wrong thread. */
             if (existing_count <= 1 || conn->active_conversation_id != conv_id) {
-               int restored = webui_restore_conversation_context(conn, &conv, conv_id, all_msgs);
+               /* v67: when a compaction watermark is set, the display array (all_msgs)
+                * is the FULL transcript, but the LLM context must be bounded to
+                * post-watermark messages.  Pass NULL so restore does its own bounded
+                * fetch (conv_db_get_messages_after); display stays full. */
+               json_object *restore_msgs = (conv.context_watermark_msg_id > 0) ? NULL : all_msgs;
+               int restored = webui_restore_conversation_context(conn, &conv, conv_id,
+                                                                 restore_msgs);
                if (restored >= 0) {
                   OLOG_INFO("WebUI: Restored %d messages to session %u context (conv %lld)",
                             restored, conn->session->session_id, (long long)conv_id);
diff --git a/src/webui/webui_server.c b/src/webui/webui_server.c
index 5121a111..3f0d0c5e 100644
--- a/src/webui/webui_server.c
+++ b/src/webui/webui_server.c
@@ -2861,8 +2861,18 @@ int webui_restore_conversation_context(ws_connection_t *conn,
    } else {
       all_msgs = json_object_new_array();
       owns_msgs = true;
-      int rc = conv_db_get_messages(conv_id, conn->auth_user_id, webui_session_restore_msg_cb,
-                                    all_msgs);
+      int rc;
+      if (conv->context_watermark_msg_id > 0) {
+         /* v67: bound restored context to messages after the compaction watermark;
+          * the injected summary (below) stands in for the compacted prefix. The
+          * full transcript is still shown in the UI (display load is unbounded). */
+         rc = conv_db_get_messages_after(conv_id, conn->auth_user_id,
+                                         conv->context_watermark_msg_id,
+                                         webui_session_restore_msg_cb, all_msgs);
+      } else {
+         rc = conv_db_get_messages(conv_id, conn->auth_user_id, webui_session_restore_msg_cb,
+                                   all_msgs);
+      }
       if (rc != AUTH_DB_SUCCESS) {
          json_object_put(all_msgs);
          return LWS_CLOSE_CONNECTION;
@@ -2895,10 +2905,20 @@ int webui_restore_conversation_context(ws_connection_t *conn,
    }
 
    if (conv->compaction_summary && strlen(conv->compaction_summary) > 0) {
-      char summary[4096];
-      snprintf(summary, sizeof(summary), "Previous conversation context (summarized): %s",
-               conv->compaction_summary);
-      session_add_message(conn->session, "system", summary);
+      /* v67: prepend a reconstructed [COMPACTED ...] marker (when a summary node
+       * exists) so the reloaded LLM keeps a context_expand handle to the
+       * compacted originals — not just the summary text.
+       *
+       * ASSISTANT role, NOT system: session_update_system_messages rebuilds the
+       * leading context into exactly two system messages (stable prefix + volatile
+       * focus block) every turn and DROPS any other system message — so a
+       * system-role summary never reaches the LLM. The live compaction marker
+       * (llm_context.c) is an assistant message for the same reason; matching it
+       * here makes the summary survive the per-turn rebuild. */
+      char summary[CONV_SUMMARY_MAX];
+      conv_db_format_compaction_context(conv_id, conv->compaction_summary, summary,
+                                        sizeof(summary));
+      session_add_message(conn->session, "assistant", summary);
    }
 
    for (int i = 0; i < count; i++) {
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index ea58cdad..faa76a0f 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -96,6 +96,7 @@ set(AUTH_DB_SOURCES
     ${SRC_ROOT}/src/auth/auth_db_migrations_v64.c
     ${SRC_ROOT}/src/auth/auth_db_migrations_v65.c
     ${SRC_ROOT}/src/auth/auth_db_migrations_v66.c
+    ${SRC_ROOT}/src/auth/auth_db_migrations_v67.c
     ${SRC_ROOT}/src/auth/auth_db_statements.c
     ${SRC_ROOT}/src/auth/auth_db_user.c
     ${SRC_ROOT}/src/auth/auth_db_session.c
@@ -530,6 +531,7 @@ dawn_add_unit_test(test_focus_source
    SOURCES test_focus_source.c
            ${SRC_ROOT}/src/core/focus/focus_source.c
            ${SRC_ROOT}/src/core/focus/focus_dominant_token.c
+           ${SRC_ROOT}/src/core/focus/focus_candidate_helpers.c
            ${SRC_ROOT}/src/core/memory_filter.c
    LIBS    dawn_common unity pthread
    LABEL   ci)
@@ -545,6 +547,15 @@ dawn_add_unit_test(test_focus_dominant_token
    LIBS    dawn_common unity pthread
    LABEL   ci)
 
+# Recall result formatter — pure rendering of a focus_compose result into the
+# grouped, source-tagged "here's what we know" block.  No DB / no engine.
+dawn_add_unit_test(test_recall_format
+   SOURCES test_recall_format.c
+           ${SRC_ROOT}/src/tools/recall_format.c
+           ${SRC_ROOT}/src/core/strbuf.c
+   LIBS    dawn_common unity pthread
+   LABEL   ci)
+
 # Phase 1c: extracted fact-search helper.  Links the production helper
 # against programmable memory_db_*/memory_embeddings_* stubs so the
 # pipeline's tokenize → multi-token-keyword → hybrid re-rank flow can
diff --git a/tests/test_auth_db.c b/tests/test_auth_db.c
index 866ca0ac..11cda881 100644
--- a/tests/test_auth_db.c
+++ b/tests/test_auth_db.c
@@ -483,6 +483,86 @@ static void test_get_messages_by_range_returns_private_when_opted_in(void) {
    TEST_ASSERT_EQUAL_INT(1, ctx.count);
 }
 
+/* ============================================================================
+ * Compaction watermark (v67) Tests
+ * ============================================================================ */
+
+static int wm_count_cb(const conversation_message_t *msg, void *ctx) {
+   (void)msg;
+   (*(int *)ctx)++;
+   return 0;
+}
+
+/* conv_db_set_compaction_watermark: advances forward, rejects a stale rewind
+ * (watermark AND summary), and rejects an invalid (<=0) watermark. */
+static void test_compaction_watermark_monotonic(void) {
+   int user_id = create_and_get_id("wm_mono", "hash", false);
+   int64_t conv_id = 0;
+   conv_db_create(user_id, "Watermark mono", &conv_id);
+
+   conversation_t conv;
+
+   TEST_ASSERT_EQUAL_INT(AUTH_DB_SUCCESS,
+                         conv_db_set_compaction_watermark(conv_id, user_id, "summary v1", 100));
+   memset(&conv, 0, sizeof(conv));
+   conv_db_get(conv_id, user_id, &conv);
+   TEST_ASSERT_EQUAL_INT64(100, conv.context_watermark_msg_id);
+   TEST_ASSERT_EQUAL_STRING("summary v1", conv.compaction_summary);
+   conv_free(&conv);
+
+   TEST_ASSERT_EQUAL_INT(AUTH_DB_SUCCESS,
+                         conv_db_set_compaction_watermark(conv_id, user_id, "summary v2", 200));
+   memset(&conv, 0, sizeof(conv));
+   conv_db_get(conv_id, user_id, &conv);
+   TEST_ASSERT_EQUAL_INT64(200, conv.context_watermark_msg_id);
+   conv_free(&conv);
+
+   /* Stale rewind: 150 < 200 -> benign no-op (success), watermark + summary unchanged. */
+   TEST_ASSERT_EQUAL_INT(AUTH_DB_SUCCESS,
+                         conv_db_set_compaction_watermark(conv_id, user_id, "summary STALE", 150));
+   memset(&conv, 0, sizeof(conv));
+   conv_db_get(conv_id, user_id, &conv);
+   TEST_ASSERT_EQUAL_INT64(200, conv.context_watermark_msg_id);
+   TEST_ASSERT_EQUAL_STRING("summary v2", conv.compaction_summary);
+   conv_free(&conv);
+
+   /* Invalid watermark (<= 0). */
+   TEST_ASSERT_EQUAL_INT(AUTH_DB_INVALID,
+                         conv_db_set_compaction_watermark(conv_id, user_id, "x", 0));
+}
+
+/* conv_db_get_messages_after: after_id=0 returns all; bounding excludes <= after_id. */
+static void test_get_messages_after_bounds(void) {
+   int user_id = create_and_get_id("wm_after", "hash", false);
+   int64_t conv_id = 0;
+   conv_db_create(user_id, "Watermark after", &conv_id);
+   conv_db_add_message(conv_id, user_id, "user", "m1");
+   conv_db_add_message(conv_id, user_id, "assistant", "m2");
+   conv_db_add_message(conv_id, user_id, "user", "m3");
+
+   int n = 0;
+   TEST_ASSERT_EQUAL_INT(AUTH_DB_SUCCESS,
+                         conv_db_get_messages_after(conv_id, user_id, 0, wm_count_cb, &n));
+   TEST_ASSERT_EQUAL_INT(3, n);
+
+   int64_t *ids = NULL;
+   int idc = 0;
+   TEST_ASSERT_EQUAL_INT(AUTH_DB_SUCCESS, conv_db_get_message_ids(conv_id, user_id, &ids, &idc));
+   TEST_ASSERT_EQUAL_INT(3, idc);
+
+   n = 0;
+   TEST_ASSERT_EQUAL_INT(AUTH_DB_SUCCESS,
+                         conv_db_get_messages_after(conv_id, user_id, ids[0], wm_count_cb, &n));
+   TEST_ASSERT_EQUAL_INT(2, n); /* only m2, m3 (id > ids[0]) */
+
+   n = 0;
+   TEST_ASSERT_EQUAL_INT(AUTH_DB_SUCCESS,
+                         conv_db_get_messages_after(conv_id, user_id, ids[2], wm_count_cb, &n));
+   TEST_ASSERT_EQUAL_INT(0, n); /* nothing after the last message */
+
+   free(ids);
+}
+
 /* ============================================================================
  * User Settings Tests
  * ============================================================================ */
@@ -843,6 +923,8 @@ int main(void) {
    RUN_TEST(test_message_reasoning_null);
    RUN_TEST(test_get_messages_by_range_filters_private_by_default);
    RUN_TEST(test_get_messages_by_range_returns_private_when_opted_in);
+   RUN_TEST(test_compaction_watermark_monotonic);
+   RUN_TEST(test_get_messages_after_bounds);
 
    /* User Settings */
    RUN_TEST(test_user_settings_defaults);
diff --git a/tests/test_config_validate.c b/tests/test_config_validate.c
index 533e84d3..8db70e77 100644
--- a/tests/test_config_validate.c
+++ b/tests/test_config_validate.c
@@ -96,6 +96,12 @@ static void set_valid_defaults(void) {
     * comment about silent-no-op-at-zero. */
    s_config.memory.focus_injection.dominant_token_heuristic.threshold = 0.6f;
    s_config.memory.focus_injection.dominant_token_heuristic.base_penalty = 0.5f;
+   /* memory.recall.* added with the unified recall tool — needs valid defaults
+    * so the baseline config validates clean (mirrors focus_injection above). */
+   s_config.memory.recall.top_k = 40;
+   s_config.memory.recall.budget_bytes = 24576;
+   s_config.memory.recall.min_score = 0.25f;
+   s_config.memory.recall.per_source_max = 16;
    s_config.mqtt.port = 1883;
    s_config.network.workers = 4;
    s_config.network.session_timeout_sec = 3600;
diff --git a/tests/test_focus_source.c b/tests/test_focus_source.c
index f11a7267..e48f31f0 100644
--- a/tests/test_focus_source.c
+++ b/tests/test_focus_source.c
@@ -32,6 +32,7 @@
 #include <unistd.h>
 
 #include "config/dawn_config.h"
+#include "core/focus/focus_candidate_helpers.h"
 #include "core/focus/focus_source.h"
 #include "core/focus/focus_source_internal.h"
 #include "dawn_error.h"
@@ -555,6 +556,21 @@ static void test_byte_budget_truncation(void) {
    focus_result_free(&result);
 }
 
+/* 8b. UTF-8-safe truncation — focus_utf8_safe_cap must never cut inside a
+ *     multi-byte character, or the truncated text is invalid UTF-8 and breaks
+ *     the context_injection WebSocket text frame (browser drops the socket). */
+static void test_utf8_safe_cap(void) {
+   /* "AAA" + em-dash U+2014 (bytes E2 80 94) + "XYZ" — em-dash at offsets 3-5. */
+   const char *t = "AAA\xE2\x80\x94XYZ";
+   TEST_ASSERT_EQUAL_INT(3, (int)focus_utf8_safe_cap(t, 3));          /* boundary before em-dash */
+   TEST_ASSERT_EQUAL_INT(3, (int)focus_utf8_safe_cap(t, 4));          /* mid em-dash -> back to 3 */
+   TEST_ASSERT_EQUAL_INT(3, (int)focus_utf8_safe_cap(t, 5));          /* mid em-dash -> back to 3 */
+   TEST_ASSERT_EQUAL_INT(6, (int)focus_utf8_safe_cap(t, 6));          /* full em-dash kept */
+   TEST_ASSERT_EQUAL_INT(9, (int)focus_utf8_safe_cap(t, 99));         /* whole string fits */
+   TEST_ASSERT_EQUAL_INT(5, (int)focus_utf8_safe_cap("abcdefgh", 5)); /* ASCII: exact cut ok */
+   TEST_ASSERT_EQUAL_INT(0, (int)focus_utf8_safe_cap(NULL, 10));      /* NULL guard */
+}
+
 /* 9. memory ownership — register/compose/free cycle 1000× and confirm the
  *    counter-based proxy returns to zero each iteration.  Smoke-test;
  *    valgrind locally is the real audit. */
@@ -770,6 +786,7 @@ int main(void) {
    RUN_TEST(test_filter_on_retrieval_skips_external);
    RUN_TEST(test_requires_embedding_skipped_without_query);
    RUN_TEST(test_byte_budget_truncation);
+   RUN_TEST(test_utf8_safe_cap);
    RUN_TEST(test_memory_ownership_cycle);
    RUN_TEST(test_double_register_same_source_id_fails);
    RUN_TEST(test_na_score_contributes_zero);
diff --git a/tests/test_recall_format.c b/tests/test_recall_format.c
new file mode 100644
index 00000000..0c68eb8a
--- /dev/null
+++ b/tests/test_recall_format.c
@@ -0,0 +1,160 @@
+/*
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * By contributing to this project, you agree to license your contributions
+ * under the GPLv3 (or any later version) or any future licenses chosen by
+ * the project author(s).
+ *
+ * Unit tests for the recall result formatter (src/tools/recall_format.c).
+ * Drives recall_format_result() against synthetic focus_compose results — no
+ * daemon, no DB, no embedding engine.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "core/focus/focus_source.h"
+#include "tools/recall_format.h"
+#include "unity.h"
+
+void setUp(void) {
+}
+void tearDown(void) {
+}
+
+/* Build a candidate that owns nothing the formatter frees (the formatter only
+ * reads).  We strdup so there are no -Wwrite-strings issues and free after. */
+static focus_candidate_t mk(const char *src, const char *item_id, const char *text) {
+   focus_candidate_t c;
+   memset(&c, 0, sizeof(c));
+   c.source_id = src; /* const string literal — fine, framework owns in prod */
+   c.item_id = strdup(item_id);
+   c.text = strdup(text);
+   return c;
+}
+
+static void free_candidates(focus_candidate_t *arr, int n) {
+   for (int i = 0; i < n; i++) {
+      free(arr[i].item_id);
+      free(arr[i].text);
+   }
+}
+
+static void test_zero_results_is_explicit(void) {
+   focus_compose_result_t res;
+   memset(&res, 0, sizeof(res));
+   char *out = recall_format_result("project zephyr", &res, NULL, 0);
+   TEST_ASSERT_NOT_NULL(out);
+   TEST_ASSERT_NOT_NULL(strstr(out, "nothing on file"));
+   TEST_ASSERT_NOT_NULL(strstr(out, "project zephyr"));
+   free(out);
+}
+
+static void test_grouping_and_pointers(void) {
+   focus_candidate_t cands[4];
+   cands[0] = mk("memory_fact", "fact:7858", "Open Sauce booth accepted on 2026-02-13.");
+   cands[1] = mk("memory_summary", "summary:42", "Discussed slipping the beta to June.");
+   cands[2] = mk("document_chunk", "document_chunk:903", "[spec_v3.pdf] Section 4 lists criteria.");
+   cands[3] = mk("calendar_event", "calendar_occ:11", "Design review");
+
+   focus_compose_result_t res;
+   memset(&res, 0, sizeof(res));
+   res.candidates = cands;
+   res.candidate_count = 4;
+
+   char *out = recall_format_result("open sauce status", &res, NULL, 0);
+   TEST_ASSERT_NOT_NULL(out);
+
+   /* Section headers present. */
+   TEST_ASSERT_NOT_NULL(strstr(out, "MEMORY — facts & relationships"));
+   TEST_ASSERT_NOT_NULL(strstr(out, "MEMORY — past conversation summaries"));
+   TEST_ASSERT_NOT_NULL(strstr(out, "NOTES & DOCUMENTS"));
+   TEST_ASSERT_NOT_NULL(strstr(out, "CALENDAR"));
+
+   /* Read-pointers: fact gets a [memory id], document gets document_read. */
+   TEST_ASSERT_NOT_NULL(strstr(out, "[memory id 7858]"));
+   TEST_ASSERT_NOT_NULL(strstr(out, "document_read \"spec_v3.pdf\""));
+   TEST_ASSERT_NOT_NULL(strstr(out, "calendar"));
+   /* Summary text is shown but gets NO fetch pointer — `memory get` can't
+    * resolve a summary id, so `[memory id 42]` must NOT appear. */
+   TEST_ASSERT_NOT_NULL(strstr(out, "slipping the beta"));
+   TEST_ASSERT_NULL(strstr(out, "[memory id 42]"));
+
+   free(out);
+   free_candidates(cands, 4);
+}
+
+static void test_empty_families_listed(void) {
+   focus_candidate_t cands[1];
+   cands[0] = mk("memory_fact", "fact:1", "Only a memory fact here.");
+
+   focus_compose_result_t res;
+   memset(&res, 0, sizeof(res));
+   res.candidates = cands;
+   res.candidate_count = 1;
+
+   char *out = recall_format_result("q", &res, NULL, 0);
+   TEST_ASSERT_NOT_NULL(out);
+   /* Families with no hits are named so absence is legible to the LLM. */
+   TEST_ASSERT_NOT_NULL(strstr(out, "Nothing found in:"));
+   TEST_ASSERT_NOT_NULL(strstr(out, "NOTES & DOCUMENTS"));
+   TEST_ASSERT_NOT_NULL(strstr(out, "CALENDAR"));
+   free(out);
+   free_candidates(cands, 1);
+}
+
+static void test_null_injected_emits_overlap_note(void) {
+   focus_candidate_t cands[1];
+   cands[0] = mk("memory_fact", "fact:1", "A fact.");
+   focus_compose_result_t res;
+   memset(&res, 0, sizeof(res));
+   res.candidates = cands;
+   res.candidate_count = 1;
+
+   char *out = recall_format_result("q", &res, NULL, 0);
+   TEST_ASSERT_NOT_NULL(out);
+   TEST_ASSERT_NOT_NULL(strstr(out, "in this turn's injected context"));
+   free(out);
+   free_candidates(cands, 1);
+}
+
+static void test_injected_id_is_marked(void) {
+   focus_candidate_t cands[2];
+   cands[0] = mk("memory_fact", "fact:100", "Already-injected fact.");
+   cands[1] = mk("memory_fact", "fact:200", "Fresh fact.");
+   focus_compose_result_t res;
+   memset(&res, 0, sizeof(res));
+   res.candidates = cands;
+   res.candidate_count = 2;
+
+   const char *injected[] = { "fact:100" };
+   char *out = recall_format_result("q", &res, injected, 1);
+   TEST_ASSERT_NOT_NULL(out);
+   /* The injected one is marked; the count footer reflects it. */
+   TEST_ASSERT_NOT_NULL(strstr(out, "already in current context"));
+   /* The NULL-fallback overlap note must NOT appear when a set was supplied. */
+   TEST_ASSERT_NULL(strstr(out, "in this turn's injected context"));
+   free(out);
+   free_candidates(cands, 2);
+}
+
+int main(void) {
+   UNITY_BEGIN();
+   RUN_TEST(test_zero_results_is_explicit);
+   RUN_TEST(test_grouping_and_pointers);
+   RUN_TEST(test_empty_families_listed);
+   RUN_TEST(test_null_injected_emits_overlap_note);
+   RUN_TEST(test_injected_id_is_marked);
+   return UNITY_END();
+}
diff --git a/tool_instructions/render_visual/chart.md b/tool_instructions/render_visual/chart.md
index 40b5274d..b9649ac6 100644
--- a/tool_instructions/render_visual/chart.md
+++ b/tool_instructions/render_visual/chart.md
@@ -6,7 +6,7 @@ Use Chart.js for data visualization. Load from the local server inside HTML type
 ```html
 <script src="/js/vendor/chart.umd.js"></script>
 
-<canvas id="myChart" style="max-height:400px"></canvas>
+<canvas id="myChart"></canvas>
 <script>
 const ctx = document.getElementById('myChart');
 new Chart(ctx, {
@@ -14,12 +14,21 @@ new Chart(ctx, {
    data: { ... },
    options: {
       responsive: true,
+      maintainAspectRatio: false,
       plugins: { legend: { position: 'top' } }
    }
 });
 </script>
 ```
 
+## Sizing (IMPORTANT)
+The visual is rendered in a frame with a fixed height. Always set
+`maintainAspectRatio: false` and do NOT put a `height` or `max-height` on the
+`<canvas>`. With `maintainAspectRatio: true` (the Chart.js default) plus a
+`max-height`-only canvas, the chart resolves its height from a content-sized
+container, collapses to 0×0, and renders as a black box. `responsive: true` +
+`maintainAspectRatio: false` lets the chart fill the frame correctly.
+
 ## Theming
 Read CSS variables and apply to Chart.js:
 ```javascript
diff --git a/www/index.html b/www/index.html
index f379af4a..bb7404f0 100644
--- a/www/index.html
+++ b/www/index.html
@@ -3251,6 +3251,7 @@ <h3 id="camera-modal-title">Camera Capture</h3>
       <script defer src="/js/ui/export.js"></script>
       <script defer src="/js/ui/history.js"></script>
       <script defer src="/js/ui/memory_aliases.js"></script>
+      <script defer src="/js/ui/memory_import.js"></script>
       <script defer src="/js/ui/memory.js"></script>
       <script defer src="/js/ui/silent-observe.js"></script>
       <script defer src="/js/ui/context-injection.js"></script>
diff --git a/www/js/dawn.js b/www/js/dawn.js
index 05465c83..59a68c91 100644
--- a/www/js/dawn.js
+++ b/www/js/dawn.js
@@ -139,6 +139,19 @@
                               DawnState.streamingState.content = '';
                               DawnState.streamingState.textElement = newTextEl;
                            }
+                        } else {
+                           /* No active streaming entry: the visual arrived during a tool
+                            * phase before the answer streamed (a diagram-only turn). Render
+                            * it as its own assistant entry so it shows live — otherwise it
+                            * would only appear on reload, since finalizeStream() drains
+                            * pending visuals into the save payload, not the live view.
+                            * Mirrors addNormalEntry's visual-segment entry structure. */
+                           var vEntry = document.createElement('div');
+                           vEntry.className = 'transcript-entry assistant';
+                           vEntry.innerHTML =
+                              '<div class="role">assistant</div><div class="text"></div>';
+                           DawnElements.transcript.appendChild(vEntry);
+                           DawnVisualRender.renderVisuals(vEntry, extracted.visuals);
                         }
                         DawnElements.transcript.scrollTop = DawnElements.transcript.scrollHeight;
                      }
diff --git a/www/js/ui/history.js b/www/js/ui/history.js
index 1e4e5275..90d01c6e 100644
--- a/www/js/ui/history.js
+++ b/www/js/ui/history.js
@@ -735,26 +735,22 @@
    function handleContextCompacted(payload) {
       console.log('Context compacted:', payload);
 
-      if (!historyState.activeConversationId) {
-         console.log('No active conversation to continue');
-         return;
-      }
-
-      const summary = payload.summary || '';
       const tokensBefore = payload.tokens_before || 0;
       const tokensAfter = payload.tokens_after || 0;
-
       console.log(`Compaction: ${tokensBefore} -> ${tokensAfter} tokens`);
-      // Existing per-turn context blocks reference message IDs that may have
-      // been folded into the new snapshot.  Reset the trust-tier surfaces so
-      // stale provenance and turn dedup don't carry across the boundary.
+
+      // v67: compaction no longer splits the conversation. The server records an
+      // in-conversation watermark + summary and bounds context on reload, so the
+      // conversation stays single and writable — no continuation, no archive/lock.
+      // We only reset the per-turn trust-tier surfaces so stale provenance and turn
+      // dedup don't carry across the compaction boundary. (requestContinueConversation
+      // / handleContinueConversationResponse remain for legacy already-split convs.)
       if (typeof DawnContextInjection !== 'undefined') {
          DawnContextInjection.reset();
       }
       if (typeof DawnSilentObserve !== 'undefined') {
          DawnSilentObserve.reset();
       }
-      requestContinueConversation(historyState.activeConversationId, summary);
    }
 
    function handleContinueConversationResponse(payload) {
diff --git a/www/js/ui/memory.js b/www/js/ui/memory.js
index 1b2651e9..06a1931e 100644
--- a/www/js/ui/memory.js
+++ b/www/js/ui/memory.js
@@ -48,10 +48,6 @@
    // Focus management state
    let focusTrapCleanup = null;
    let triggerElement = null;
-   /* Separate focus-trap cleanup for the import modal — it's a child
-    * dialog of the memory popover, so it needs its own trap that
-    * cycles Tab within the modal instead of the parent popover. */
-   let importFocusTrapCleanup = null;
 
    /* Phase 2 entity-merge: one-shot flag so auto-route-to-Graph fires
     * on the FIRST open per page-load when proposals are pending, then
@@ -81,13 +77,10 @@
       statContacts: null,
    };
 
-   /* Import modal state */
-   let importState = {
-      source: 'paste', // 'paste' or 'file'
-      fileData: null, // Parsed JSON from file, or null
-      fileName: null,
-      previewData: null, // Server preview response
-   };
+   /* Import/Export surface (modals + their private state) lives in
+    * www/js/ui/memory_import.js — see DawnMemoryImport.  The
+    * DawnMemory.handleExportResponse / handleImportResponse thin-
+    * forwarders below preserve the surface dawn.js dispatches against. */
 
    /* =============================================================================
     * API Requests
@@ -268,6 +261,16 @@
       if (window.DawnMemoryAliases) DawnMemoryAliases.handleProposalResolveResponse(payload);
    }
 
+   /* Import/Export response handlers — thin forwarders that preserve the
+    * DawnMemory.handle*Response surface dawn.js dispatches against, while
+    * the real work lives in www/js/ui/memory_import.js. */
+   function handleExportResponse(payload) {
+      if (window.DawnMemoryImport) DawnMemoryImport.handleExportResponse(payload);
+   }
+   function handleImportResponse(payload) {
+      if (window.DawnMemoryImport) DawnMemoryImport.handleImportResponse(payload);
+   }
+
    function handleFactsResponse(payload) {
       memoryState.loading = false;
 
@@ -1413,478 +1416,11 @@
    }
 
    /* =============================================================================
-    * Export Handling
-    * ============================================================================= */
-
-   function handleExport() {
-      if (typeof DawnWS === 'undefined' || !DawnWS.isConnected()) return;
-      const modal = document.getElementById('memory-export-modal');
-      if (modal) modal.classList.remove('hidden');
-   }
-
-   function closeExportModal() {
-      const modal = document.getElementById('memory-export-modal');
-      if (modal) modal.classList.add('hidden');
-   }
-
-   function doExport(format) {
-      closeExportModal();
-      DawnWS.send({
-         type: 'export_memories',
-         payload: { format: format },
-      });
-   }
-
-   function initExportModal() {
-      const modal = document.getElementById('memory-export-modal');
-      if (!modal) return;
-
-      const textBtn = document.getElementById('memory-export-text-btn');
-      const jsonBtn = document.getElementById('memory-export-json-btn');
-      const cancelBtn = document.getElementById('memory-export-cancel-btn');
-
-      if (textBtn) textBtn.addEventListener('click', () => doExport('text'));
-      if (jsonBtn) jsonBtn.addEventListener('click', () => doExport('json'));
-      if (cancelBtn) cancelBtn.addEventListener('click', closeExportModal);
-
-      modal.addEventListener('click', (e) => {
-         if (e.target === modal) closeExportModal();
-      });
-   }
-
-   function handleExportResponse(payload) {
-      if (!payload || !payload.success) {
-         if (typeof DawnToast !== 'undefined') {
-            DawnToast.show(payload?.error || 'Export failed', 'error');
-         }
-         return;
-      }
-
-      let blob, filename;
-      if (payload.format === 'text') {
-         blob = new Blob([payload.data], { type: 'text/plain' });
-         filename = `dawn-memories-${new Date().toISOString().slice(0, 10)}.txt`;
-      } else {
-         const jsonStr = JSON.stringify(payload.data, null, 2);
-         blob = new Blob([jsonStr], { type: 'application/json' });
-         filename = `dawn-memories-${new Date().toISOString().slice(0, 10)}.json`;
-      }
-
-      // Trigger download
-      const url = URL.createObjectURL(blob);
-      const a = document.createElement('a');
-      a.href = url;
-      a.download = filename;
-      document.body.appendChild(a);
-      a.click();
-      document.body.removeChild(a);
-      URL.revokeObjectURL(url);
-
-      const total = (payload.fact_count || 0) + (payload.pref_count || 0);
-      if (typeof DawnToast !== 'undefined') {
-         DawnToast.show(`Exported ${total} memories`, 'success');
-      }
-   }
-
-   /* =============================================================================
-    * Import Handling
+    * Export / Import — moved to www/js/ui/memory_import.js (DawnMemoryImport).
+    * The DawnMemory.handleExportResponse / handleImportResponse thin-forwarders
+    * (in the public surface below) keep dawn.js's dispatch unchanged.
     * ============================================================================= */
 
-   function openImportModal() {
-      const modal = document.getElementById('memory-import-modal');
-      if (!modal) return;
-      modal.classList.remove('hidden');
-      resetImportState();
-      /* Focus the textarea for immediate input.  Defer via
-       * setTimeout(0) so the .hidden-removed visibility transition
-       * settles before focus moves — consistent with the same
-       * pattern in music.js and scheduler-queue.js. */
-      const textArea = document.getElementById('memory-import-text');
-      if (textArea) setTimeout(() => textArea.focus(), 0);
-      /* Trap Tab/Shift+Tab within the modal so it doesn't escape to
-       * the parent memory popover.  skipInitialFocus because the
-       * textarea focus above (deferred via setTimeout) is the
-       * intended initial focus target. */
-      const M = window.DawnSettingsModals;
-      if (M && typeof M.trapFocus === 'function') {
-         importFocusTrapCleanup = M.trapFocus(modal, { skipInitialFocus: true });
-      }
-   }
-
-   function closeImportModal() {
-      const modal = document.getElementById('memory-import-modal');
-      if (modal) modal.classList.add('hidden');
-      resetImportState();
-      if (importFocusTrapCleanup) {
-         importFocusTrapCleanup();
-         importFocusTrapCleanup = null;
-      }
-      // Return focus to trigger element
-      if (memoryElements.importBtn) memoryElements.importBtn.focus();
-   }
-
-   function resetImportState() {
-      importState.source = 'paste';
-      importState.fileData = null;
-      importState.fileName = null;
-      importState.previewData = null;
-
-      const textArea = document.getElementById('memory-import-text');
-      if (textArea) textArea.value = '';
-
-      const filenameEl = document.getElementById('memory-import-filename');
-      if (filenameEl) {
-         filenameEl.textContent = '';
-         filenameEl.classList.add('hidden');
-      }
-
-      const preview = document.getElementById('memory-import-preview');
-      if (preview) preview.classList.add('hidden');
-
-      const previewBtn = document.getElementById('memory-import-preview-btn');
-      if (previewBtn) {
-         previewBtn.disabled = true;
-         previewBtn.classList.remove('hidden');
-      }
-
-      const commitBtn = document.getElementById('memory-import-commit-btn');
-      if (commitBtn) commitBtn.classList.add('hidden');
-
-      // Reset tab state — tablist.sync() reads importState.source
-      // (just set to 'paste' above) and re-applies markup.
-      if (importTablist) importTablist.sync();
-      const pastePanel = document.getElementById('memory-import-paste');
-      const filePanel = document.getElementById('memory-import-file');
-      const helpPanel = document.getElementById('memory-import-help');
-      if (pastePanel) pastePanel.classList.remove('hidden');
-      if (filePanel) filePanel.classList.add('hidden');
-      if (helpPanel) helpPanel.classList.add('hidden');
-   }
-
-   /* Import-modal tab strip — bound to the shared DawnTablist helper
-    * via importTablist below.  attr='source' because the HTML uses
-    * data-source="paste|file" instead of data-tab. */
-   let importTablist = null;
-
-   function switchImportSource(source) {
-      importState.source = source;
-      if (importTablist) importTablist.sync();
-      const pastePanel = document.getElementById('memory-import-paste');
-      const filePanel = document.getElementById('memory-import-file');
-      const helpPanel = document.getElementById('memory-import-help');
-      if (pastePanel) pastePanel.classList.toggle('hidden', source !== 'paste');
-      if (filePanel) filePanel.classList.toggle('hidden', source !== 'file');
-      if (helpPanel) helpPanel.classList.add('hidden');
-      updateImportPreviewBtn();
-   }
-
-   function updateImportPreviewBtn() {
-      const previewBtn = document.getElementById('memory-import-preview-btn');
-      if (!previewBtn) return;
-
-      if (importState.source === 'paste') {
-         const textArea = document.getElementById('memory-import-text');
-         previewBtn.disabled = !textArea || textArea.value.trim().length < 3;
-      } else {
-         previewBtn.disabled = !importState.fileData;
-      }
-   }
-
-   function handleImportFileSelect(e) {
-      const file = e.target.files[0];
-      if (!file) return;
-
-      if (file.size > 256 * 1024) {
-         if (typeof DawnToast !== 'undefined') {
-            DawnToast.show('File too large (256KB max)', 'error');
-         }
-         e.target.value = '';
-         return;
-      }
-
-      const filenameEl = document.getElementById('memory-import-filename');
-      const reader = new FileReader();
-
-      reader.onload = function (evt) {
-         const content = evt.target.result;
-
-         if (file.name.endsWith('.json')) {
-            try {
-               importState.fileData = JSON.parse(content);
-               importState.fileName = file.name;
-               if (filenameEl) {
-                  filenameEl.textContent = file.name;
-                  filenameEl.classList.remove('hidden');
-               }
-            } catch {
-               if (typeof DawnToast !== 'undefined') {
-                  DawnToast.show('Invalid JSON file', 'error');
-               }
-               return;
-            }
-         } else {
-            // Plain text file - treat as paste
-            importState.fileData = content;
-            importState.fileName = file.name;
-            if (filenameEl) {
-               filenameEl.textContent = file.name;
-               filenameEl.classList.remove('hidden');
-            }
-         }
-         updateImportPreviewBtn();
-      };
-
-      reader.readAsText(file);
-   }
-
-   /**
-    * Build the import WebSocket message from current input state
-    */
-   function buildImportMessage(commit) {
-      let payload;
-      if (importState.source === 'paste') {
-         const textArea = document.getElementById('memory-import-text');
-         const text = textArea ? textArea.value.trim() : '';
-         if (!text) return null;
-
-         // Auto-detect: if it starts with { it's likely JSON
-         if (text.startsWith('{')) {
-            try {
-               const parsed = JSON.parse(text);
-               payload = { format: 'json', data: parsed };
-            } catch {
-               payload = { format: 'text', text: text };
-            }
-         } else {
-            payload = { format: 'text', text: text };
-         }
-      } else {
-         if (!importState.fileData) return null;
-         if (typeof importState.fileData === 'string') {
-            payload = { format: 'text', text: importState.fileData };
-         } else {
-            payload = { format: 'json', data: importState.fileData };
-         }
-      }
-
-      payload.commit = commit;
-      return { type: 'import_memories', payload };
-   }
-
-   function handleImportPreview() {
-      if (typeof DawnWS === 'undefined' || !DawnWS.isConnected()) return;
-
-      const msg = buildImportMessage(false);
-      if (!msg) return;
-
-      const previewBtn = document.getElementById('memory-import-preview-btn');
-      if (previewBtn) {
-         previewBtn.disabled = true;
-         previewBtn.textContent = 'Analyzing...';
-      }
-
-      DawnWS.send(msg);
-   }
-
-   function handleImportCommit() {
-      if (typeof DawnWS === 'undefined' || !DawnWS.isConnected()) return;
-      if (!importState.previewData) return;
-
-      const msg = buildImportMessage(true);
-      if (!msg) return;
-
-      const commitBtn = document.getElementById('memory-import-commit-btn');
-      if (commitBtn) {
-         commitBtn.disabled = true;
-         commitBtn.textContent = 'Importing...';
-      }
-
-      DawnWS.send(msg);
-   }
-
-   /**
-    * Reset to preview mode when content changes after a preview
-    */
-   function onImportContentChanged() {
-      updateImportPreviewBtn();
-      if (importState.previewData) {
-         importState.previewData = null;
-         const preview = document.getElementById('memory-import-preview');
-         if (preview) preview.classList.add('hidden');
-         const previewBtn = document.getElementById('memory-import-preview-btn');
-         const commitBtn = document.getElementById('memory-import-commit-btn');
-         if (previewBtn) {
-            previewBtn.classList.remove('hidden');
-            previewBtn.textContent = 'Preview';
-         }
-         if (commitBtn) commitBtn.classList.add('hidden');
-      }
-   }
-
-   function handleImportResponse(payload) {
-      if (!payload) return;
-
-      if (!payload.success) {
-         if (typeof DawnToast !== 'undefined') {
-            DawnToast.show(payload.error || 'Import failed', 'error');
-         }
-         const previewBtn = document.getElementById('memory-import-preview-btn');
-         if (previewBtn) {
-            previewBtn.disabled = false;
-            previewBtn.textContent = 'Preview';
-         }
-         return;
-      }
-
-      if (payload.committed) {
-         // Import complete
-         const total = (payload.imported_facts || 0) + (payload.imported_prefs || 0);
-         if (typeof DawnToast !== 'undefined') {
-            DawnToast.show(
-               `Imported ${total} memories` +
-                  (payload.skipped_dupes ? ` (${payload.skipped_dupes} duplicates skipped)` : ''),
-               'success'
-            );
-         }
-         closeImportModal();
-         // Refresh memory data
-         requestStats();
-         switchTab(memoryState.activeTab);
-         return;
-      }
-
-      // Preview mode - show results
-      importState.previewData = payload;
-      const preview = document.getElementById('memory-import-preview');
-      const summaryEl = preview?.querySelector('.memory-import-summary');
-      const listEl = preview?.querySelector('.memory-import-list');
-
-      if (!preview || !summaryEl || !listEl) return;
-
-      const totalNew = (payload.imported_facts || 0) + (payload.imported_prefs || 0);
-      summaryEl.innerHTML =
-         `<span>New: <span class="count">${totalNew}</span></span>` +
-         `<span>Duplicates skipped: <span class="count">${payload.skipped_dupes || 0}</span></span>` +
-         (payload.skipped_empty
-            ? `<span>Empty skipped: <span class="count">${payload.skipped_empty}</span></span>`
-            : '');
-
-      // Render preview items (max 50 in UI)
-      listEl.innerHTML = '';
-      const items = payload.preview || [];
-      const maxShow = Math.min(items.length, 50);
-      for (let i = 0; i < maxShow; i++) {
-         const item = items[i];
-         const div = document.createElement('div');
-         div.className = 'preview-item';
-         if (item.type === 'preference') {
-            div.innerHTML =
-               `<span class="preview-type">pref</span>` +
-               `${escapeHtml(item.category)}: ${escapeHtml(item.value)}`;
-         } else {
-            div.innerHTML = `<span class="preview-type">fact</span>${escapeHtml(item.text)}`;
-         }
-         listEl.appendChild(div);
-      }
-      if (items.length > maxShow) {
-         const more = document.createElement('div');
-         more.className = 'preview-item';
-         more.style.color = 'var(--text-secondary)';
-         more.textContent = `... and ${items.length - maxShow} more`;
-         listEl.appendChild(more);
-      }
-
-      preview.classList.remove('hidden');
-
-      // Switch buttons: hide preview, show commit
-      const previewBtn = document.getElementById('memory-import-preview-btn');
-      const commitBtn = document.getElementById('memory-import-commit-btn');
-      if (previewBtn) previewBtn.classList.add('hidden');
-      if (commitBtn) {
-         commitBtn.classList.remove('hidden');
-         commitBtn.disabled = totalNew === 0;
-         commitBtn.textContent = totalNew > 0 ? `Import ${totalNew} Memories` : 'Nothing to Import';
-      }
-   }
-
-   function initImportModal() {
-      const closeBtn = document.getElementById('memory-import-close');
-      const cancelBtn = document.getElementById('memory-import-cancel');
-      const previewBtn = document.getElementById('memory-import-preview-btn');
-      const commitBtn = document.getElementById('memory-import-commit-btn');
-      const fileInput = document.getElementById('memory-import-file-input');
-      const textArea = document.getElementById('memory-import-text');
-
-      if (closeBtn) closeBtn.addEventListener('click', closeImportModal);
-      if (cancelBtn) cancelBtn.addEventListener('click', closeImportModal);
-      if (previewBtn) previewBtn.addEventListener('click', handleImportPreview);
-      if (commitBtn) commitBtn.addEventListener('click', handleImportCommit);
-      if (fileInput) fileInput.addEventListener('change', handleImportFileSelect);
-      if (textArea) textArea.addEventListener('input', onImportContentChanged);
-
-      // Help popup toggle
-      const helpBtn = document.getElementById('memory-import-help-btn');
-      const helpPanel = document.getElementById('memory-import-help');
-      const helpClose = document.getElementById('memory-import-help-close');
-      const pastePanel = document.getElementById('memory-import-paste');
-
-      if (helpBtn && helpPanel && pastePanel) {
-         helpBtn.addEventListener('click', () => {
-            pastePanel.classList.add('hidden');
-            helpPanel.classList.remove('hidden');
-         });
-      }
-      if (helpClose && helpPanel && pastePanel) {
-         helpClose.addEventListener('click', () => {
-            helpPanel.classList.add('hidden');
-            pastePanel.classList.remove('hidden');
-         });
-      }
-
-      // Copy prompt button (SVG icon swap)
-      const ICON_COPY =
-         '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" ' +
-         'stroke-width="2" stroke-linecap="round" stroke-linejoin="round">' +
-         '<rect x="9" y="9" width="13" height="13" rx="2" ry="2"/>' +
-         '<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg>';
-      const ICON_CHECK =
-         '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" ' +
-         'stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round">' +
-         '<polyline points="20 6 9 17 4 12"/></svg>';
-
-      const copyPromptBtn = document.getElementById('memory-import-copy-prompt');
-      if (copyPromptBtn) {
-         copyPromptBtn.addEventListener('click', () => {
-            const promptText = document.getElementById('memory-import-prompt-text');
-            if (!promptText) return;
-            navigator.clipboard.writeText(promptText.textContent).then(() => {
-               copyPromptBtn.innerHTML = ICON_CHECK;
-               setTimeout(() => (copyPromptBtn.innerHTML = ICON_COPY), 2000);
-            });
-         });
-      }
-
-      // Tab switching — shared DawnTablist helper, attr='source'.
-      const importTabs = document.querySelectorAll('.memory-import-tab');
-      if (window.DawnTablist && importTabs.length > 0) {
-         importTablist = window.DawnTablist.bind({
-            tabs: importTabs,
-            attr: 'source',
-            getActive: () => importState.source,
-            onActivate: (name) => switchImportSource(name),
-         });
-         importTablist.sync(); /* initial markup → matches importState.source */
-      }
-
-      // Close on overlay click
-      const modal = document.getElementById('memory-import-modal');
-      if (modal) {
-         modal.addEventListener('click', (e) => {
-            if (e.target === modal) closeImportModal();
-         });
-      }
-   }
-
    /* =============================================================================
     * Utility Functions
     * ============================================================================= */
@@ -1985,6 +1521,20 @@
          });
       }
 
+      /* Import/Export surface (modals + buttons) — owned by
+       * www/js/ui/memory_import.js.  It wires its own buttons/modals;
+       * onMemoriesChanged is the post-commit refresh (matches the old
+       * inline handleImportResponse: re-pull stats + reload active tab). */
+      if (window.DawnMemoryImport) {
+         DawnMemoryImport.init({
+            escapeHtml: escapeHtml,
+            onMemoriesChanged: () => {
+               requestStats();
+               switchTab(memoryState.activeTab);
+            },
+         });
+      }
+
       // Set up event delegation for delete buttons (single listener)
       setupDeleteDelegation();
 
@@ -2019,17 +1569,7 @@
          memoryElements.forgetAllBtn.addEventListener('click', handleForgetAll);
       }
 
-      // Export/Import handlers
-      if (memoryElements.exportBtn) {
-         memoryElements.exportBtn.addEventListener('click', handleExport);
-      }
-      if (memoryElements.importBtn) {
-         memoryElements.importBtn.addEventListener('click', openImportModal);
-      }
-
-      // Initialize import modal
-      initImportModal();
-      initExportModal();
+      // Export/Import buttons + modals are wired by DawnMemoryImport.init() above.
 
       // Source modal close button, overlay click, and ESC key
       const srcClose = document.getElementById('memory-source-close');
diff --git a/www/js/ui/memory_import.js b/www/js/ui/memory_import.js
new file mode 100644
index 00000000..26c47295
--- /dev/null
+++ b/www/js/ui/memory_import.js
@@ -0,0 +1,571 @@
+/**
+ * DAWN Memory — Import / Export surface
+ *
+ * Owns the memory Export modal and the Import modal (paste / file /
+ * preview / commit), their private state, and the two WebSocket
+ * response handlers (handleExportResponse / handleImportResponse).
+ * Split out of memory.js on 2026-06-16 because that file exceeded the
+ * 1500-line JS soft limit.
+ *
+ * Loaded BEFORE memory.js in index.html so memory.js's init() can call
+ * DawnMemoryImport.init(ctx) once the parent module's state is ready.
+ * The DawnMemory.handleExportResponse / handleImportResponse surface
+ * is preserved via thin forwarders in memory.js so dawn.js's dispatch
+ * switch stays pointed at DawnMemory.*.
+ */
+(function () {
+   'use strict';
+
+   /* =============================================================================
+    * Module state — owned here, NOT shared with memory.js's memoryState.
+    * ============================================================================= */
+
+   /* Import modal state */
+   let importState = {
+      source: 'paste', // 'paste' or 'file'
+      fileData: null, // Parsed JSON from file, or null
+      fileName: null,
+      previewData: null, // Server preview response
+   };
+
+   /* Import-modal tab strip — bound to the shared DawnTablist helper
+    * via importTablist below.  attr='source' because the HTML uses
+    * data-source="paste|file" instead of data-tab. */
+   let importTablist = null;
+
+   /* Separate focus-trap cleanup for the import modal — it's a child
+    * dialog of the memory popover, so it needs its own trap that
+    * cycles Tab within the modal instead of the parent popover. */
+   let importFocusTrapCleanup = null;
+
+   /* Injected at init() time — references to parent module helpers.
+    * Reads route through ctx so memory.js stays the source of truth for
+    * shared state (escapeHtml, the import button, the post-import
+    * refresh path). */
+   let ctx = null;
+
+   /* Local escapeHtml shim — uses the parent's escaper if init() injected one,
+    * else a real HTML-entity escaper. The output feeds innerHTML, so the
+    * fallback must never return the raw string (that would be an XSS sink if
+    * ctx is missing/failed to init). */
+   function escapeHtml(s) {
+      if (ctx && ctx.escapeHtml) return ctx.escapeHtml(s);
+      return String(s == null ? '' : s)
+         .replace(/&/g, '&amp;')
+         .replace(/</g, '&lt;')
+         .replace(/>/g, '&gt;')
+         .replace(/"/g, '&quot;')
+         .replace(/'/g, '&#39;');
+   }
+
+   /* =============================================================================
+    * Init
+    * ============================================================================= */
+
+   function init(injectedCtx) {
+      ctx = injectedCtx;
+
+      // Export/Import button handlers.
+      const exportBtn = document.getElementById('memory-export');
+      if (exportBtn) {
+         exportBtn.addEventListener('click', handleExport);
+      }
+      const importBtn = document.getElementById('memory-import');
+      if (importBtn) {
+         importBtn.addEventListener('click', openImportModal);
+      }
+
+      // Wire the modals.
+      initImportModal();
+      initExportModal();
+   }
+
+   /* =============================================================================
+    * Export Handling
+    * ============================================================================= */
+
+   function handleExport() {
+      if (typeof DawnWS === 'undefined' || !DawnWS.isConnected()) return;
+      const modal = document.getElementById('memory-export-modal');
+      if (modal) modal.classList.remove('hidden');
+   }
+
+   function closeExportModal() {
+      const modal = document.getElementById('memory-export-modal');
+      if (modal) modal.classList.add('hidden');
+   }
+
+   function doExport(format) {
+      closeExportModal();
+      DawnWS.send({
+         type: 'export_memories',
+         payload: { format: format },
+      });
+   }
+
+   function initExportModal() {
+      const modal = document.getElementById('memory-export-modal');
+      if (!modal) return;
+
+      const textBtn = document.getElementById('memory-export-text-btn');
+      const jsonBtn = document.getElementById('memory-export-json-btn');
+      const cancelBtn = document.getElementById('memory-export-cancel-btn');
+
+      if (textBtn) textBtn.addEventListener('click', () => doExport('text'));
+      if (jsonBtn) jsonBtn.addEventListener('click', () => doExport('json'));
+      if (cancelBtn) cancelBtn.addEventListener('click', closeExportModal);
+
+      modal.addEventListener('click', (e) => {
+         if (e.target === modal) closeExportModal();
+      });
+   }
+
+   function handleExportResponse(payload) {
+      if (!payload || !payload.success) {
+         if (typeof DawnToast !== 'undefined') {
+            DawnToast.show(payload?.error || 'Export failed', 'error');
+         }
+         return;
+      }
+
+      let blob, filename;
+      if (payload.format === 'text') {
+         blob = new Blob([payload.data], { type: 'text/plain' });
+         filename = `dawn-memories-${new Date().toISOString().slice(0, 10)}.txt`;
+      } else {
+         const jsonStr = JSON.stringify(payload.data, null, 2);
+         blob = new Blob([jsonStr], { type: 'application/json' });
+         filename = `dawn-memories-${new Date().toISOString().slice(0, 10)}.json`;
+      }
+
+      // Trigger download
+      const url = URL.createObjectURL(blob);
+      const a = document.createElement('a');
+      a.href = url;
+      a.download = filename;
+      document.body.appendChild(a);
+      a.click();
+      document.body.removeChild(a);
+      URL.revokeObjectURL(url);
+
+      const total = (payload.fact_count || 0) + (payload.pref_count || 0);
+      if (typeof DawnToast !== 'undefined') {
+         DawnToast.show(`Exported ${total} memories`, 'success');
+      }
+   }
+
+   /* =============================================================================
+    * Import Handling
+    * ============================================================================= */
+
+   function openImportModal() {
+      const modal = document.getElementById('memory-import-modal');
+      if (!modal) return;
+      modal.classList.remove('hidden');
+      resetImportState();
+      /* Focus the textarea for immediate input.  Defer via
+       * setTimeout(0) so the .hidden-removed visibility transition
+       * settles before focus moves — consistent with the same
+       * pattern in music.js and scheduler-queue.js. */
+      const textArea = document.getElementById('memory-import-text');
+      if (textArea) setTimeout(() => textArea.focus(), 0);
+      /* Trap Tab/Shift+Tab within the modal so it doesn't escape to
+       * the parent memory popover.  skipInitialFocus because the
+       * textarea focus above (deferred via setTimeout) is the
+       * intended initial focus target. */
+      const M = window.DawnSettingsModals;
+      if (M && typeof M.trapFocus === 'function') {
+         /* Release a prior trap first — a double-open (button clicked while the
+          * modal is already up) would otherwise leak the old cleanup handle and
+          * leave two competing traps installed. */
+         if (importFocusTrapCleanup) {
+            importFocusTrapCleanup();
+            importFocusTrapCleanup = null;
+         }
+         importFocusTrapCleanup = M.trapFocus(modal, { skipInitialFocus: true });
+      }
+   }
+
+   function closeImportModal() {
+      const modal = document.getElementById('memory-import-modal');
+      if (modal) modal.classList.add('hidden');
+      resetImportState();
+      if (importFocusTrapCleanup) {
+         importFocusTrapCleanup();
+         importFocusTrapCleanup = null;
+      }
+      // Return focus to trigger element
+      const importBtn = document.getElementById('memory-import');
+      if (importBtn) importBtn.focus();
+   }
+
+   function resetImportState() {
+      importState.source = 'paste';
+      importState.fileData = null;
+      importState.fileName = null;
+      importState.previewData = null;
+
+      const textArea = document.getElementById('memory-import-text');
+      if (textArea) textArea.value = '';
+
+      const filenameEl = document.getElementById('memory-import-filename');
+      if (filenameEl) {
+         filenameEl.textContent = '';
+         filenameEl.classList.add('hidden');
+      }
+
+      const preview = document.getElementById('memory-import-preview');
+      if (preview) preview.classList.add('hidden');
+
+      const previewBtn = document.getElementById('memory-import-preview-btn');
+      if (previewBtn) {
+         previewBtn.disabled = true;
+         previewBtn.classList.remove('hidden');
+      }
+
+      const commitBtn = document.getElementById('memory-import-commit-btn');
+      if (commitBtn) commitBtn.classList.add('hidden');
+
+      // Reset tab state — tablist.sync() reads importState.source
+      // (just set to 'paste' above) and re-applies markup.
+      if (importTablist) importTablist.sync();
+      const pastePanel = document.getElementById('memory-import-paste');
+      const filePanel = document.getElementById('memory-import-file');
+      const helpPanel = document.getElementById('memory-import-help');
+      if (pastePanel) pastePanel.classList.remove('hidden');
+      if (filePanel) filePanel.classList.add('hidden');
+      if (helpPanel) helpPanel.classList.add('hidden');
+   }
+
+   function switchImportSource(source) {
+      importState.source = source;
+      if (importTablist) importTablist.sync();
+      const pastePanel = document.getElementById('memory-import-paste');
+      const filePanel = document.getElementById('memory-import-file');
+      const helpPanel = document.getElementById('memory-import-help');
+      if (pastePanel) pastePanel.classList.toggle('hidden', source !== 'paste');
+      if (filePanel) filePanel.classList.toggle('hidden', source !== 'file');
+      if (helpPanel) helpPanel.classList.add('hidden');
+      updateImportPreviewBtn();
+   }
+
+   function updateImportPreviewBtn() {
+      const previewBtn = document.getElementById('memory-import-preview-btn');
+      if (!previewBtn) return;
+
+      if (importState.source === 'paste') {
+         const textArea = document.getElementById('memory-import-text');
+         previewBtn.disabled = !textArea || textArea.value.trim().length < 3;
+      } else {
+         previewBtn.disabled = !importState.fileData;
+      }
+   }
+
+   function handleImportFileSelect(e) {
+      const file = e.target.files[0];
+      if (!file) return;
+
+      if (file.size > 256 * 1024) {
+         if (typeof DawnToast !== 'undefined') {
+            DawnToast.show('File too large (256KB max)', 'error');
+         }
+         e.target.value = '';
+         return;
+      }
+
+      const filenameEl = document.getElementById('memory-import-filename');
+      const reader = new FileReader();
+
+      reader.onload = function (evt) {
+         const content = evt.target.result;
+
+         if (file.name.endsWith('.json')) {
+            try {
+               importState.fileData = JSON.parse(content);
+               importState.fileName = file.name;
+               if (filenameEl) {
+                  filenameEl.textContent = file.name;
+                  filenameEl.classList.remove('hidden');
+               }
+            } catch {
+               if (typeof DawnToast !== 'undefined') {
+                  DawnToast.show('Invalid JSON file', 'error');
+               }
+               return;
+            }
+         } else {
+            // Plain text file - treat as paste
+            importState.fileData = content;
+            importState.fileName = file.name;
+            if (filenameEl) {
+               filenameEl.textContent = file.name;
+               filenameEl.classList.remove('hidden');
+            }
+         }
+         updateImportPreviewBtn();
+      };
+
+      reader.readAsText(file);
+   }
+
+   /**
+    * Build the import WebSocket message from current input state
+    */
+   function buildImportMessage(commit) {
+      let payload;
+      if (importState.source === 'paste') {
+         const textArea = document.getElementById('memory-import-text');
+         const text = textArea ? textArea.value.trim() : '';
+         if (!text) return null;
+
+         // Auto-detect: if it starts with { it's likely JSON
+         if (text.startsWith('{')) {
+            try {
+               const parsed = JSON.parse(text);
+               payload = { format: 'json', data: parsed };
+            } catch {
+               payload = { format: 'text', text: text };
+            }
+         } else {
+            payload = { format: 'text', text: text };
+         }
+      } else {
+         if (!importState.fileData) return null;
+         if (typeof importState.fileData === 'string') {
+            payload = { format: 'text', text: importState.fileData };
+         } else {
+            payload = { format: 'json', data: importState.fileData };
+         }
+      }
+
+      payload.commit = commit;
+      return { type: 'import_memories', payload };
+   }
+
+   function handleImportPreview() {
+      if (typeof DawnWS === 'undefined' || !DawnWS.isConnected()) return;
+
+      const msg = buildImportMessage(false);
+      if (!msg) return;
+
+      const previewBtn = document.getElementById('memory-import-preview-btn');
+      if (previewBtn) {
+         previewBtn.disabled = true;
+         previewBtn.textContent = 'Analyzing...';
+      }
+
+      DawnWS.send(msg);
+   }
+
+   function handleImportCommit() {
+      if (typeof DawnWS === 'undefined' || !DawnWS.isConnected()) return;
+      if (!importState.previewData) return;
+
+      const msg = buildImportMessage(true);
+      if (!msg) return;
+
+      const commitBtn = document.getElementById('memory-import-commit-btn');
+      if (commitBtn) {
+         commitBtn.disabled = true;
+         commitBtn.textContent = 'Importing...';
+      }
+
+      DawnWS.send(msg);
+   }
+
+   /**
+    * Reset to preview mode when content changes after a preview
+    */
+   function onImportContentChanged() {
+      updateImportPreviewBtn();
+      if (importState.previewData) {
+         importState.previewData = null;
+         const preview = document.getElementById('memory-import-preview');
+         if (preview) preview.classList.add('hidden');
+         const previewBtn = document.getElementById('memory-import-preview-btn');
+         const commitBtn = document.getElementById('memory-import-commit-btn');
+         if (previewBtn) {
+            previewBtn.classList.remove('hidden');
+            previewBtn.textContent = 'Preview';
+         }
+         if (commitBtn) commitBtn.classList.add('hidden');
+      }
+   }
+
+   function handleImportResponse(payload) {
+      if (!payload) return;
+
+      if (!payload.success) {
+         if (typeof DawnToast !== 'undefined') {
+            DawnToast.show(payload.error || 'Import failed', 'error');
+         }
+         const previewBtn = document.getElementById('memory-import-preview-btn');
+         if (previewBtn) {
+            previewBtn.disabled = false;
+            previewBtn.textContent = 'Preview';
+         }
+         return;
+      }
+
+      if (payload.committed) {
+         // Import complete
+         const total = (payload.imported_facts || 0) + (payload.imported_prefs || 0);
+         if (typeof DawnToast !== 'undefined') {
+            DawnToast.show(
+               `Imported ${total} memories` +
+                  (payload.skipped_dupes ? ` (${payload.skipped_dupes} duplicates skipped)` : ''),
+               'success'
+            );
+         }
+         closeImportModal();
+         // Refresh memory data
+         if (ctx && typeof ctx.onMemoriesChanged === 'function') ctx.onMemoriesChanged();
+         return;
+      }
+
+      // Preview mode - show results
+      importState.previewData = payload;
+      const preview = document.getElementById('memory-import-preview');
+      const summaryEl = preview?.querySelector('.memory-import-summary');
+      const listEl = preview?.querySelector('.memory-import-list');
+
+      if (!preview || !summaryEl || !listEl) return;
+
+      const totalNew = (payload.imported_facts || 0) + (payload.imported_prefs || 0);
+      summaryEl.innerHTML =
+         `<span>New: <span class="count">${totalNew}</span></span>` +
+         `<span>Duplicates skipped: <span class="count">${payload.skipped_dupes || 0}</span></span>` +
+         (payload.skipped_empty
+            ? `<span>Empty skipped: <span class="count">${payload.skipped_empty}</span></span>`
+            : '');
+
+      // Render preview items (max 50 in UI)
+      listEl.innerHTML = '';
+      const items = payload.preview || [];
+      const maxShow = Math.min(items.length, 50);
+      for (let i = 0; i < maxShow; i++) {
+         const item = items[i];
+         const div = document.createElement('div');
+         div.className = 'preview-item';
+         if (item.type === 'preference') {
+            div.innerHTML =
+               `<span class="preview-type">pref</span>` +
+               `${escapeHtml(item.category)}: ${escapeHtml(item.value)}`;
+         } else {
+            div.innerHTML = `<span class="preview-type">fact</span>${escapeHtml(item.text)}`;
+         }
+         listEl.appendChild(div);
+      }
+      if (items.length > maxShow) {
+         const more = document.createElement('div');
+         more.className = 'preview-item';
+         more.style.color = 'var(--text-secondary)';
+         more.textContent = `... and ${items.length - maxShow} more`;
+         listEl.appendChild(more);
+      }
+
+      preview.classList.remove('hidden');
+
+      // Switch buttons: hide preview, show commit
+      const previewBtn = document.getElementById('memory-import-preview-btn');
+      const commitBtn = document.getElementById('memory-import-commit-btn');
+      if (previewBtn) previewBtn.classList.add('hidden');
+      if (commitBtn) {
+         commitBtn.classList.remove('hidden');
+         commitBtn.disabled = totalNew === 0;
+         commitBtn.textContent = totalNew > 0 ? `Import ${totalNew} Memories` : 'Nothing to Import';
+      }
+   }
+
+   function initImportModal() {
+      const closeBtn = document.getElementById('memory-import-close');
+      const cancelBtn = document.getElementById('memory-import-cancel');
+      const previewBtn = document.getElementById('memory-import-preview-btn');
+      const commitBtn = document.getElementById('memory-import-commit-btn');
+      const fileInput = document.getElementById('memory-import-file-input');
+      const textArea = document.getElementById('memory-import-text');
+
+      if (closeBtn) closeBtn.addEventListener('click', closeImportModal);
+      if (cancelBtn) cancelBtn.addEventListener('click', closeImportModal);
+      if (previewBtn) previewBtn.addEventListener('click', handleImportPreview);
+      if (commitBtn) commitBtn.addEventListener('click', handleImportCommit);
+      if (fileInput) fileInput.addEventListener('change', handleImportFileSelect);
+      if (textArea) textArea.addEventListener('input', onImportContentChanged);
+
+      // Help popup toggle
+      const helpBtn = document.getElementById('memory-import-help-btn');
+      const helpPanel = document.getElementById('memory-import-help');
+      const helpClose = document.getElementById('memory-import-help-close');
+      const pastePanel = document.getElementById('memory-import-paste');
+
+      if (helpBtn && helpPanel && pastePanel) {
+         helpBtn.addEventListener('click', () => {
+            pastePanel.classList.add('hidden');
+            helpPanel.classList.remove('hidden');
+         });
+      }
+      if (helpClose && helpPanel && pastePanel) {
+         helpClose.addEventListener('click', () => {
+            helpPanel.classList.add('hidden');
+            pastePanel.classList.remove('hidden');
+         });
+      }
+
+      // Copy prompt button (SVG icon swap)
+      const ICON_COPY =
+         '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" ' +
+         'stroke-width="2" stroke-linecap="round" stroke-linejoin="round">' +
+         '<rect x="9" y="9" width="13" height="13" rx="2" ry="2"/>' +
+         '<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg>';
+      const ICON_CHECK =
+         '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" ' +
+         'stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round">' +
+         '<polyline points="20 6 9 17 4 12"/></svg>';
+
+      const copyPromptBtn = document.getElementById('memory-import-copy-prompt');
+      if (copyPromptBtn) {
+         copyPromptBtn.addEventListener('click', () => {
+            const promptText = document.getElementById('memory-import-prompt-text');
+            if (!promptText) return;
+            navigator.clipboard.writeText(promptText.textContent).then(() => {
+               copyPromptBtn.innerHTML = ICON_CHECK;
+               setTimeout(() => (copyPromptBtn.innerHTML = ICON_COPY), 2000);
+            });
+         });
+      }
+
+      // Tab switching — shared DawnTablist helper, attr='source'.
+      const importTabs = document.querySelectorAll('.memory-import-tab');
+      if (window.DawnTablist && importTabs.length > 0) {
+         importTablist = window.DawnTablist.bind({
+            tabs: importTabs,
+            attr: 'source',
+            getActive: () => importState.source,
+            onActivate: (name) => switchImportSource(name),
+         });
+         importTablist.sync(); /* initial markup → matches importState.source */
+      }
+
+      // Close on overlay click
+      const modal = document.getElementById('memory-import-modal');
+      if (modal) {
+         modal.addEventListener('click', (e) => {
+            if (e.target === modal) closeImportModal();
+         });
+      }
+   }
+
+   /* =============================================================================
+    * Public surface
+    * ============================================================================= */
+
+   window.DawnMemoryImport = {
+      init,
+      /* WebSocket response handlers (dispatched from dawn.js via the
+       * DawnMemory.handle*Response thin-forwarders in memory.js). */
+      handleExportResponse,
+      handleImportResponse,
+      /* Opened from the memory popover's Import button (wired in init). */
+      openImportModal,
+   };
+})();
diff --git a/www/js/ui/visual-render.js b/www/js/ui/visual-render.js
index cf275dba..44560cf5 100644
--- a/www/js/ui/visual-render.js
+++ b/www/js/ui/visual-render.js
@@ -342,13 +342,100 @@
             iframe.style.height = '400px';
          }
       } else {
+         /* Gate the Chart.js sizing fix on an actual Chart.js construction, not
+          * a bare <canvas>: a non-chart canvas visual (custom 2D drawing) must
+          * not get the wrapper + overflow:hidden + forced fill, which would
+          * distort it. `new Chart(` is present in every generated chart. */
+         var isChartJs = code.indexOf('<canvas') !== -1 && /\bnew\s+Chart\s*\(/.test(code);
+
+         /* --- Chart.js responsive sizing in an opaque-origin srcdoc iframe ---
+          *
+          * Chart.js sizes a `responsive:true` chart to its canvas's PARENT box.
+          * Generated charts put a bare <canvas> directly in <body> (often with
+          * only `max-height`), and `maintainAspectRatio:true` locks the chart's
+          * aspect ratio to the canvas's DEFAULT 300x150 (ratio 2.0) at
+          * construction — the cached `_aspectRatio`. From then on every
+          * getMaximumSize() derives height = width / 2 instead of measuring the
+          * real container, so the canvas stays 300x150. A manual resize() can't
+          * rescue it: a freshly-built chart is mid entrance-animation, and
+          * `resize()` while `Chart.animator.running(chart)` is true only stashes
+          * `_resizeBeforeDraw` and returns (a true no-op — matches the measured
+          * "resize(663,400) does nothing").
+          *
+          * Canonical Chart.js responsive pattern: wrap the canvas in a
+          * position:relative element with a DEFINITE size and let the chart fill
+          * it. We do exactly that — inject a `.dawn-chart-box` wrapper (regex,
+          * since we can't change the stored markup), and at runtime flip every
+          * chart to `maintainAspectRatio:false` then resize() so it adopts the
+          * wrapper's real height on its own ResizeObserver. The wrapper's fixed
+          * pixel height (clamped by the canvas's own max-height when present) is
+          * the source of truth — no reliance on body-box measurement or on
+          * resize() timing relative to the entrance animation. */
+         var canvasCSS = '';
+         var chartFix = '';
+         if (isChartJs) {
+            /* Resolve a definite wrapper height: honor a stored canvas
+             * max-height (cap it to the frame) else use the default frame box. */
+            var FRAME_H = 380; /* px; matches the 400px iframe minus container chrome */
+            var maxH = code.match(/max-height\s*:\s*(\d+)px/i);
+            var boxH = maxH ? Math.min(parseInt(maxH[1], 10), FRAME_H) : FRAME_H;
+
+            canvasCSS =
+               'html, body { height: 100%; }\n' +
+               'body { margin: 0; padding: 0; overflow: hidden; }\n' +
+               /* The wrapper IS the container Chart.js measures: definite size,
+                * position:relative so the canvas can absolutely fill it. */
+               '.dawn-chart-box { position: relative; width: 100%; height: ' +
+               boxH +
+               'px; }\n' +
+               /* Override the stored inline max-height/margin so the canvas
+                * fills the wrapper instead of fighting it. */
+               '.dawn-chart-box > canvas { display: block !important;' +
+               ' width: 100% !important; height: 100% !important;' +
+               ' max-width: none !important; max-height: none !important;' +
+               ' margin: 0 !important; }\n';
+
+            /* Wrap each <canvas ...></canvas> in the sizing box. Self-closing
+             * <canvas .../> is invalid HTML (canvas needs a closing tag), so
+             * matching the open+close pair covers the generated markup. */
+            code = code.replace(
+               /(<canvas\b[^>]*>\s*<\/canvas>)/gi,
+               '<div class="dawn-chart-box">$1</div>'
+            );
+
+            /* Runtime: disable maintainAspectRatio (kills the cached 2.0 ratio)
+             * and resize so each chart fills its wrapper. maintainAspectRatio
+             * false makes getMaximumSize() use the container height directly.
+             * Run after the entrance animation settles so resize() isn't
+             * swallowed by the `running()` guard; also re-run on the chart's own
+             * 'resize' is unnecessary — Chart.js keeps it filled once the ratio
+             * lock is removed and the wrapper has a definite size. */
+            chartFix =
+               '<script>\n' +
+               'function _dawnFillCharts(){\n' +
+               '  if(!window.Chart)return;var m=Chart.instances||{};\n' +
+               '  Object.keys(m).forEach(function(k){var c=m[k];try{\n' +
+               '    c.options.maintainAspectRatio=false;\n' +
+               '    c.options.responsive=true;\n' +
+               '    c.resize();\n' +
+               '  }catch(e){}});\n' +
+               '}\n' +
+               /* First pass after layout; second after the default ~1s entrance
+                * animation so any resize stashed by the running()-guard applies. */
+               'requestAnimationFrame(function(){requestAnimationFrame(_dawnFillCharts)});\n' +
+               'setTimeout(_dawnFillCharts,1100);\n' +
+               '</' +
+               'script>\n';
+         }
          content =
             '<!DOCTYPE html><html><head><style>' +
             themeCSS +
             visualClasses +
+            canvasCSS +
             '</style></head><body>\n' +
             bridgeScript +
             code +
+            chartFix +
             '</body></html>';
          iframe.style.height = '400px';
       }