diff --git a/CMakeLists.txt b/CMakeLists.txt index ce9ebbe5..54006c93 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -722,6 +722,7 @@ list(APPEND DAWN_SOURCES src/auth/auth_db_migrations_v64.c src/auth/auth_db_migrations_v65.c src/auth/auth_db_migrations_v66.c + src/auth/auth_db_migrations_v67.c src/auth/auth_db_statements.c # DB-layer tables depended on by the always-compiled memory / OTA / tool # subsystems (extraction, summaries, history, satellite OTA, image tools), so @@ -783,6 +784,7 @@ if(ENABLE_AUTH) src/auth/admin_socket_memory.c src/auth/admin_socket_memory_entity.c src/auth/admin_socket_messaging.c + src/auth/admin_socket_music.c src/auth/admin_socket_ota.c src/auth/auth_db_session.c src/auth/auth_db_rate_limit.c diff --git a/cmake/DawnTools.cmake b/cmake/DawnTools.cmake index 45110a08..71c1b8c6 100644 --- a/cmake/DawnTools.cmake +++ b/cmake/DawnTools.cmake @@ -156,6 +156,14 @@ else() message(STATUS "DAWN: Memory tool DISABLED") endif() +# Unified cross-source recall tool — aggregates the focus adapters +# (memory/notes/documents/calendar) via focus_compose_ex. Always compiled; +# recall_is_available() gates at runtime on the embedding engine. +list(APPEND TOOL_SOURCES + src/tools/recall_tool.c + src/tools/recall_format.c) +message(STATUS "DAWN: Recall tool ENABLED") + # DateTime Tools (date and time) if(DAWN_ENABLE_DATETIME_TOOL) add_definitions(-DDAWN_ENABLE_DATETIME_TOOL) diff --git a/docs/LCM_DESIGN.md b/docs/LCM_DESIGN.md index b8769d85..b8a899b0 100644 --- a/docs/LCM_DESIGN.md +++ b/docs/LCM_DESIGN.md @@ -94,7 +94,7 @@ Compaction is non-destructive: the summary embeds a structured `[COMPACTED conv= - **`conv_db_get_message_ids()`** — returns ordered array of message IDs for a conversation. Dynamic array with realloc growth. - **`conv_db_get_messages_by_range()`** — retrieves messages filtered by ID range with explicit ownership pre-check returning `AUTH_DB_FORBIDDEN` (not silent empty result like the JOIN-only approach). - **`context_expand` tool** — new modular tool registered via `cmake/DawnTools.cmake`. All params optional: use `start_id`/`end_id` for raw messages, or `node_id` alone for hierarchical summaries (Phase 4). Token budget hardcoded at 4000. Range cap at 500 messages. -- **Continuation handling**: `conversation_id` from the `[COMPACTED]` tag points to the parent conversation. If omitted, the tool checks `continued_from` on the current conversation to find the parent. +- **Continuation handling**: `conversation_id` from the `[COMPACTED]` tag points to the parent conversation. If omitted, the tool checks `continued_from` on the current conversation to find the parent. *(Legacy as of Phase 5: new conversations are no longer forked, so the marker's `conv=N` is the same conversation and the `continued_from` fallback only fires for pre-v67 chains.)* - **`note_len` buffer** increased to `strlen(summary) + 256` for the longer COMPACTED prefix. ### Files modified @@ -134,7 +134,7 @@ CREATE TABLE summary_nodes ( ### Key implementation details -- **Node creation** in `llm_context_compact()` after message IDs are resolved and summary text is generated. Queries `summary_node_get_latest(conv_id)` for the prior node; if not found in current conversation, traverses `continued_from` chain to find prior nodes from parent conversations. +- **Node creation** in `llm_context_compact()` after message IDs are resolved and summary text is generated. Queries `summary_node_get_latest(conv_id)` for the prior node; if not found in current conversation, traverses `continued_from` chain to find prior nodes from parent conversations. *(Phase 5: with single continuous conversations the prior node is always in the same conversation, so `summary_node_get_latest(conv_id)` hits directly and the `continued_from` traversal is legacy-only.)* - **CRUD functions**: `summary_node_create()`, `summary_node_get()`, `summary_node_get_latest()`, `summary_node_free()` — all ad-hoc queries (not prepared statements) since compaction is infrequent. - **`[COMPACTED]` tag** includes `node=Z depth=D` when node creation succeeds. Falls back to node-less format on DB failure (graceful degradation). - **`context_expand` node_id path**: when `node_id` is provided (no other params needed), retrieves the node and its prior node's summary. Returns both summaries with metadata (depth, level, message range, conversation ID). Buffer right-sized to `summary_len + prior_len + 512` (not fixed 16KB). @@ -164,15 +164,47 @@ The model can drill down iteratively: expand node 2 → see both summaries → e --- +## Phase 5: Compaction Watermark — Single Continuous Conversations (Shipped) + +Replaces fork-on-compaction. Previously, compaction archived the conversation (`is_archived=1`, read-only) and created a new continuation row (`continued_from` → parent) seeded with the summary — the fork was what bounded *reload* context. That produced a user-visible read-only hierarchy plus a "both-locked" bug (a duplicate `continue_conversation` archived the continuation child too, with no idempotency anywhere). Now compaction records an in-conversation **watermark** on the same row; the conversation stays single and always writable, and reload is bounded by the watermark instead of by the fork. + +### Schema (v67) + +- `conversations.context_watermark_msg_id INTEGER NOT NULL DEFAULT 0` — last compacted message id. `0` = never compacted (load all; the zero-risk gate so un-compacted conversations are byte-identical to pre-v67). +- One-time migration unlocks legacy split-archived conversations (`UPDATE conversations SET is_archived=0 WHERE is_archived=1` — the continuation split was the only writer of that flag). + +### Key implementation details + +- **Persist** — `conv_db_set_compaction_watermark()`, called from `llm_context_compact()` right after `summary_node_create`. A single atomic UPDATE writes `compaction_summary` + `context_watermark_msg_id` on the same row, guarded `WHERE id=? AND user_id=? AND ? >= context_watermark_msg_id` so a stale async compaction is a harmless no-op (never rewinds). No archive, no continuation row. Skipped (never writes 0) when the message id is unresolved (voice path with no command-context user). +- **Bounded restore** (gated on `watermark > 0`) — `conv_db_get_messages_after(conv_id, user_id, after_id, ...)` (full tool/reasoning columns, same ownership JOIN as `conv_db_get_messages`) loads only post-watermark messages; the summary is prepended. The WebUI **display** load stays full (the user sees the entire transcript); only the **LLM-context** restore is bounded. The same gate is applied inside the messaging forever-conversation loader (`memory_history_load_from_db`), so every reload path is bounded by one shared check. +- **Marker re-injection across reloads** — the summary is injected as an **assistant** message carrying a reconstructed `[COMPACTED conv=N msgs=X-Y node=Z depth=D]` marker (built by `conv_db_format_compaction_context()` from the latest `summary_node`), so the LLM keeps a `context_expand` handle to the originals *after a reload*, not just in the live session. Assistant role (not system) is required: `session_update_system_messages` rebuilds the leading context into exactly two system messages every turn and drops any other system message — so a system-role summary was silently lost. This was a latent bug since the prompt-cache two-system-message refactor (reloaded continuation summaries were also being dropped); fixed here by matching the live marker's assistant role. +- **Continuation machinery is now legacy.** `continued_from` is retained as a breadcrumb but is never written for new conversations; the Phase 3/4 `continued_from` chain-walk fires only for pre-v67 conversations. The old split path (`conv_db_create_continuation`, `handle_continue_conversation`, the `continue_conversation` WS message + the client's split trigger) is left **dormant** and logs a WARNING if invoked, pending removal after a production soak. + +### Files modified + +`include/auth/auth_db_internal.h`, `include/auth/auth_db.h`, `src/auth/auth_db_schema.c`, `src/auth/auth_db_migrations.c`, `src/auth/auth_db_migrations_v67.c` (new), `src/auth/auth_db_statements.c`, `src/auth/auth_db_conv.c`, `src/llm/llm_context.c`, `src/webui/webui_server.c`, `src/webui/webui_history.c`, `src/memory/memory_history_loader.c`, `www/js/ui/history.js`, `tests/test_auth_db.c` + +### Live test results (June 16, 2026) + +- conv 845: watermark set and advanced (17874 → 17891) across 8+ compactions, `is_archived=0`, no continuation child — one writable conversation throughout. +- Bounded reload restored 46 post-watermark messages + the summary; the full 94-message transcript still displayed in the UI. +- After reload, the `[COMPACTED conv=845 msgs=17834-17891 node=N depth=N]` marker was visible to the LLM (assistant role survived the per-turn rebuild); the model called `context_expand` on its own — `expanding msgs 17834-17891 from conv 845` — and retrieved the verbatim originals (recalled the user's actual first message). +- All previously-archived conversations unlocked (0 archived post-migration). +- Build clean, 88/88 CI, +2 unit tests (monotonic guard, bounded fetch); five-agent review applied. + +--- + ## Implementation Order ``` Phase 1 (escalation) ──┐ - ├──> Phase 3 (lossless pointers) ──> Phase 4 (DAG) + ├──> Phase 3 (lossless pointers) ──> Phase 4 (DAG) ──> Phase 5 (watermark) Phase 2 (async) ───────┘ - ✓ shipped ✓ shipped ✓ shipped + ✓ shipped ✓ shipped ✓ shipped ✓ shipped ``` +Phase 5 (June 2026) retired fork-on-compaction in favor of the in-conversation watermark — conversations stay single and writable; the continuation/`continued_from` model from Phases 3–4 is now legacy (dormant, fires only for pre-v67 conversations). + --- ## Follow-up Optimizations diff --git a/include/auth/admin_socket_internal.h b/include/auth/admin_socket_internal.h index 11c87a37..3d2ddde6 100644 --- a/include/auth/admin_socket_internal.h +++ b/include/auth/admin_socket_internal.h @@ -102,6 +102,13 @@ int handle_ota_push_all_cmd(int client_fd, const char *payload, uint16_t payload int handle_ota_rollout_status_cmd(int client_fd); int handle_ota_rollout_abort_cmd(int client_fd); +/* Music-DB handlers (admin_socket_music.c). Dispatched from handle_client() + * in admin_socket.c against ADMIN_MSG_MUSIC_* opcodes. */ +int admin_handle_music_stats(int client_fd); +int admin_handle_music_search(int client_fd, const char *payload, uint16_t len); +int admin_handle_music_list(int client_fd, const char *payload, uint16_t len); +int admin_handle_music_rescan(int client_fd); + #ifdef __cplusplus } #endif diff --git a/include/auth/auth_db.h b/include/auth/auth_db.h index 3ac3be0b..d0e2face 100644 --- a/include/auth/auth_db.h +++ b/include/auth/auth_db.h @@ -1036,10 +1036,11 @@ typedef struct { time_t updated_at; int message_count; bool is_archived; - int context_tokens; /**< Last known context token count */ - int context_max; /**< Context window size */ - int64_t continued_from; /**< Parent conversation ID (0 = none) */ - char *compaction_summary; /**< Summary from parent (NULL if not a continuation) */ + int context_tokens; /**< Last known context token count */ + int context_max; /**< Context window size */ + int64_t continued_from; /**< Parent conversation ID (0 = none) */ + char *compaction_summary; /**< Summary from parent (NULL if not a continuation) */ + int64_t context_watermark_msg_id; /**< v67: last compacted msg id; 0 = none (load all) */ /* Per-conversation LLM settings (v11) - empty string means use defaults */ char llm_type[16]; /**< "local" or "cloud" */ char cloud_provider[16]; /**< "openai" or "claude" */ @@ -1351,6 +1352,44 @@ int conv_db_set_title_locked(int64_t conv_id, int user_id, int locked); */ int conv_db_update_context(int64_t conv_id, int user_id, int context_tokens, int context_max); +/** + * @brief Persist a compaction watermark + summary on a conversation (v67). + * + * Replaces fork-on-compaction: records @watermark_msg_id (the last compacted + * message id) and @summary on the SAME conversation row. Reload then bounds + * context to messages with id > watermark + the summary. Single atomic UPDATE + * with a monotonic guard (a stale watermark <= the stored one is a no-op). + * + * @param conv_id Conversation id (> 0). + * @param user_id Owner id (ownership-checked in the UPDATE). + * @param summary Latest compaction summary (may be NULL). + * @param watermark_msg_id Last compacted message id (> 0; <= 0 returns AUTH_DB_INVALID). + * @return AUTH_DB_SUCCESS (incl. benign no-op), AUTH_DB_INVALID, or AUTH_DB_FAILURE. + */ +int conv_db_set_compaction_watermark(int64_t conv_id, + int user_id, + const char *summary, + int64_t watermark_msg_id); + +/** + * @brief Format the reload context line for a (watermarked) conversation. + * + * Writes a `[COMPACTED conv=N msgs=X-Y node=Z depth=D] Previous conversation + * context (summarized): ` marker into @out when a summary node exists + * (so a reloaded LLM keeps a context_expand handle to the compacted originals), + * else a plain summary line. @out is always NUL-terminated. Empty @summary + * yields an empty string. + * + * @param conv_id Conversation id (for summary-node lookup + the marker). + * @param summary The conversation's compaction_summary text (may be NULL). + * @param out Output buffer. + * @param out_len Size of @out. + */ +void conv_db_format_compaction_context(int64_t conv_id, + const char *summary, + char *out, + size_t out_len); + /** * @brief Lock LLM settings for a conversation * @@ -1505,6 +1544,26 @@ int conv_db_add_message_with_tools(int64_t conv_id, */ int conv_db_get_messages(int64_t conv_id, int user_id, message_callback_t callback, void *ctx); +/** + * @brief Like conv_db_get_messages but only messages with id > @after_id. + * + * The compaction-watermark restore path (v67): load only post-watermark messages + * into the LLM context. Same full column set / ownership check / chronological + * order as conv_db_get_messages. @after_id = 0 returns all messages. + * + * @param conv_id Conversation ID + * @param user_id User ID (for authorization check) + * @param after_id Exclusive lower bound on message id (0 = all) + * @param callback Function called for each message + * @param ctx User-provided context passed to callback + * @return AUTH_DB_SUCCESS, AUTH_DB_INVALID, or AUTH_DB_FAILURE + */ +int conv_db_get_messages_after(int64_t conv_id, + int user_id, + int64_t after_id, + message_callback_t callback, + void *ctx); + /** * @brief Get messages for a conversation (admin only, no ownership check) * diff --git a/include/auth/auth_db_internal.h b/include/auth/auth_db_internal.h index 3979c9af..8bb383bb 100644 --- a/include/auth/auth_db_internal.h +++ b/include/auth/auth_db_internal.h @@ -56,7 +56,7 @@ * DAWN_ENABLE_MCP_BRIDGE_TOOL / DAWN_ENABLE_CODE_PROJECTS. Gating them on a * feature flag would fork the schema timeline across binaries; do not do it. * (arch-A2) */ -#define AUTH_DB_SCHEMA_VERSION 66 +#define AUTH_DB_SCHEMA_VERSION 67 /* Retention periods */ #define LOGIN_ATTEMPT_RETENTION_SEC (7 * 24 * 60 * 60) /* 7 days */ @@ -129,9 +129,11 @@ typedef struct { sqlite3_stmt *stmt_conv_count; sqlite3_stmt *stmt_msg_add; sqlite3_stmt *stmt_msg_get; + sqlite3_stmt *stmt_msg_get_after; sqlite3_stmt *stmt_msg_get_admin; sqlite3_stmt *stmt_conv_update_meta; sqlite3_stmt *stmt_conv_update_context; + sqlite3_stmt *stmt_conv_set_watermark; sqlite3_stmt *stmt_conv_create_origin; sqlite3_stmt *stmt_conv_reassign; @@ -504,6 +506,14 @@ int auth_db_migrations_v65(sqlite3 *db); */ int auth_db_migrations_v66(sqlite3 *db); +/** + * @brief v67 migration: add conversations.context_watermark_msg_id (compaction + * watermark, replacing fork-on-compaction) and one-time unlock of legacy + * split-archived conversations. Idempotent (probes PRAGMA table_info). + * @return AUTH_DB_SUCCESS or AUTH_DB_FAILURE. + */ +int auth_db_migrations_v67(sqlite3 *db); + /** * @brief Prepare every cached sqlite3_stmt* in s_db. * diff --git a/include/config/dawn_config.h b/include/config/dawn_config.h index 4191ce9f..0d5fd46f 100644 --- a/include/config/dawn_config.h +++ b/include/config/dawn_config.h @@ -572,6 +572,22 @@ typedef struct { } dominant_token_heuristic; } focus_injection_config_t; +/* Unified cross-source `recall` tool (docs/CROSS_TOOL_RECALL_DESIGN.md). + * Deep on-demand gather across all focus sources at a budget LARGER than the + * per-turn injection, kept in a SEPARATE block so tuning the deep path never + * disturbs the tuned per-turn `focus_injection` values. recall_tool copies + * top_k/min_score/budget_bytes into a focus_limits_t via DESIGNATED initializers + * (mapping is by-name, not positional — field order here need not match + * focus_limits_t); per_source_max rides focus_compose_ex's existing param. */ +typedef struct { + int top_k; /* Max candidates retained after ranking (deep gather) */ + int budget_bytes; /* Byte cap on the assembled recall result text */ + float min_score; /* Floor — lower than per-turn so weaker hits surface */ + int per_source_max; /* Per-adapter fan-out cap before ranking. + * INVARIANT: per_source_max * MAX_FOCUS_SOURCES <= 256 + * or the dominant-token heuristic self-disables. */ +} recall_config_t; + typedef struct { bool enabled; /* Enable memory system */ int context_budget_tokens; /* Max tokens for memory context (~800) */ @@ -802,6 +818,10 @@ typedef struct { * `docs/DYNAMIC_CONTEXT_INJECTION_DESIGN.md` §"Phase 1 — Per-Turn Focus". */ focus_injection_config_t focus_injection; + /* Unified cross-source `recall` tool — deep gather at a larger budget than + * the per-turn focus injection above. See recall_config_t. */ + recall_config_t recall; + /* Phase 2 entity-merge auto-merge gate. Fires after each extraction * completes, evaluates was_created entities against existing canonicals * via the resolver cascade, and routes by composite score: diff --git a/include/core/focus/focus_candidate_helpers.h b/include/core/focus/focus_candidate_helpers.h index 24f86067..38a5eed0 100644 --- a/include/core/focus/focus_candidate_helpers.h +++ b/include/core/focus/focus_candidate_helpers.h @@ -74,6 +74,20 @@ extern "C" { * document_chunk / calendar_occ variants all fit within 64. */ #define FOCUS_ITEM_ID_BUFLEN 64 +/** + * @brief Largest byte length <= @p max_bytes that does not split a UTF-8 + * character in @p text. + * + * Truncating multi-byte UTF-8 text at a raw byte offset can leave a partial + * character at the end, which is invalid UTF-8 and breaks any consumer that + * requires valid UTF-8 (notably WebSocket text frames — the browser rejects the + * frame and drops the connection). This returns a cut length that lands on a + * character boundary: `strlen(text)` when it already fits, otherwise @p + * max_bytes backed up past any trailing UTF-8 continuation bytes. Returns 0 on + * NULL @p text. + */ +size_t focus_utf8_safe_cap(const char *text, size_t max_bytes); + /** * @brief Free `text` and `item_id` on a single candidate (failure path). * diff --git a/include/core/focus/focus_source.h b/include/core/focus/focus_source.h index ef0117b7..7ee079dd 100644 --- a/include/core/focus/focus_source.h +++ b/include/core/focus/focus_source.h @@ -271,6 +271,23 @@ typedef struct focus_compose_result_s { int rejection_count; } focus_compose_result_t; +/* ============================================================================= + * Per-call trim-limit overrides (focus_compose_ex) + * + * The per-turn focus path (build_focus_block) uses the config-driven trim limits + * in `g_config.memory.focus_injection`. The unified `recall` tool needs a + * larger, separate budget for a deep cross-source gather. `focus_limits_t` + * overrides ONLY the three trim values; a NULL pointer or a zero/negative field + * falls back to the config value for that field. Ranking WEIGHTS are NOT + * overridable here — they stay config-sourced and shared, so recall changes how + * MUCH is kept, not HOW it is ranked. + * ============================================================================= */ +typedef struct { + int top_k; /* > 0 overrides focus_injection.top_k */ + float min_score; /* >= 0 overrides focus_injection.min_score */ + int budget_bytes; /* > 0 overrides focus_injection.focus_budget_bytes */ +} focus_limits_t; + /* ============================================================================= * Public API * ============================================================================= */ @@ -348,6 +365,32 @@ int focus_compose(int user_id, int per_source_max_candidates, focus_compose_result_t *out_result); +/** + * @brief Like focus_compose(), but with per-call trim-limit overrides. + * + * Identical pipeline to focus_compose(); the only difference is steps 3-5 (the + * min_score / top_k / byte-budget trim) consult `limits` first, falling back to + * `g_config.memory.focus_injection` per field when `limits` is NULL or a field + * is zero/negative. Ranking (step 2) is unchanged — weights are always config. + * + * The unified `recall` tool uses this for a deep cross-source gather at a budget + * larger than the per-turn injection. focus_compose() is a thin wrapper passing + * `limits = NULL`. + * + * @param limits Per-call trim overrides, or NULL to use config for all three. + * @see focus_compose for all other parameter semantics. + * @return SUCCESS on success, FAILURE on error (same contract as focus_compose). + */ +int focus_compose_ex(int user_id, + bool include_private, + const char *query_text, + const float *query_embedding, + size_t embed_dim, + time_t now, + int per_source_max_candidates, + const focus_limits_t *limits, + focus_compose_result_t *out_result); + /** * @brief Release everything `focus_compose()` allocated into `result`. * diff --git a/include/tools/mcp_bridge.h b/include/tools/mcp_bridge.h index b8874c91..1102b336 100644 --- a/include/tools/mcp_bridge.h +++ b/include/tools/mcp_bridge.h @@ -125,4 +125,25 @@ int mcp_bridge_call_tool(const char *server_alias, */ int mcp_bridge_server_connected(const char *server_alias); +/** + * @brief Ensure an upstream server is connected, reconnecting it if it wasn't + * ready at startup. + * + * Active counterpart to @ref mcp_bridge_server_connected: callers that gate a + * code path on a server being usable (e.g. the code-graph provider's + * availability check) should use this so a server that came up after DAWN + * connects self-heals on first use instead of staying unavailable until a + * daemon restart. Blocks for the connect handshake; call off the main loop. + * + * Restores the connection only — it does NOT register the server's LLM-facing + * tools (those are registered at startup against the still-unlocked registry; a + * server first reached after init exposes its tools on the next restart). This + * is sufficient for direct programmatic callers such as @ref mcp_bridge_call_tool. + * + * @param server_alias Configured alias of the upstream server to connect. + * @return SUCCESS if the alias is connected (already or after reconnect); + * FAILURE if it is not configured or the reconnect failed. + */ +int mcp_bridge_ensure_connected(const char *server_alias); + #endif /* MCP_BRIDGE_H */ diff --git a/include/tools/recall_format.h b/include/tools/recall_format.h new file mode 100644 index 00000000..0b7f843d --- /dev/null +++ b/include/tools/recall_format.h @@ -0,0 +1,65 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * By contributing to this project, you agree to license your contributions + * under the GPLv3 (or any later version) or any future licenses chosen by + * the project author(s). + * + * Recall tool result formatter — renders a focus_compose result into a + * grouped, source-tagged "here's what we know" block with per-item + * read-pointers. Split into its own translation unit so it is unit-testable + * without the daemon and so the grouping/pointer logic can grow (Phase 2 + * scope filter, entity/relation pointers) without bloating recall_tool.c. + */ + +#ifndef TOOLS_RECALL_FORMAT_H +#define TOOLS_RECALL_FORMAT_H + +#include "core/focus/focus_source.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Render a composed cross-source recall result for the LLM. + * + * Groups `result->candidates` (already ranked descending) by source family — + * memory facts/relationships, conversation summaries, notes & documents, + * calendar — and renders each as a bulleted line carrying a pointer to where + * the exact/full text lives (document_read "