diff --git a/internal/memory/indexer.go b/internal/memory/indexer.go index 9938ff653..b1b511833 100644 --- a/internal/memory/indexer.go +++ b/internal/memory/indexer.go @@ -22,6 +22,7 @@ import ( const ( DefaultMinScore = 0.5 DefaultValidDurationDays = 60 + maxMergeSections = 3 ) // MemoryIndex is an in-memory HNSW index for memory chunks. @@ -407,7 +408,14 @@ func (idx *MemoryIndex) PruneHashes(validHashes map[string]struct{}) int { return pruned } -// ParseDailyFile parses a daily markdown file into MemoryEntry chunks. + +type rawSection struct { + timestamp string + userID string + lines []string +} + +// ParseDailyFile parses a daily markdown file into MemoryEntry chunks and merge if from same user up to maxMergeSections. func ParseDailyFile(path string) ([]MemoryEntry, error) { content, err := os.ReadFile(path) if err != nil { @@ -418,50 +426,88 @@ func ParseDailyFile(path string) ([]MemoryEntry, error) { dateStem := strings.TrimSuffix(base, filepath.Ext(base)) datePrefix := "[Date: " + dateStem + "]" - userTagRe := regexp.MustCompile(`^\[User: (.+)\]$`) + sections := parseSections(string(content)) + if len(sections) == 0 { + return nil, nil + } var chunks []MemoryEntry - var currentChunk strings.Builder - var currentUserID string - - lines := strings.Split(string(content), "\n") - for _, line := range lines { - if strings.HasPrefix(line, "## ") && currentChunk.Len() > 0 { - text := strings.TrimSpace(currentChunk.String()) - if text != "" { - meta := map[string]string{"source": base} - if currentUserID != "" { - meta["user_id"] = currentUserID - } - chunks = append(chunks, MemoryEntry{ - Text: datePrefix + "\n" + text, - Metadata: meta, - }) + i := 0 + for i < len(sections) { + userID := sections[i].userID + start := i + for i < len(sections) && sections[i].userID == userID && (i-start) < maxMergeSections { + i++ + } + merged := sections[start:i] + + var sb strings.Builder + sb.WriteString(datePrefix) + sb.WriteString("\n[User: ") + sb.WriteString(userID) + sb.WriteString("]\n") + for _, sec := range merged { + for _, line := range sec.lines { + fmt.Fprintf(&sb, "[%s] %s\n", sec.timestamp, convertLine(line)) } - currentChunk.Reset() - currentUserID = "" } - - if m := userTagRe.FindStringSubmatch(line); m != nil { - currentUserID = strings.ToLower(strings.TrimSpace(m[1])) + text := strings.TrimSpace(sb.String()) + if text != "" { + meta := map[string]string{"source": base} + if userID != "" { + meta["user_id"] = userID + } + chunks = append(chunks, MemoryEntry{Text: text, Metadata: meta}) } - currentChunk.WriteString(line) - currentChunk.WriteByte('\n') } + return chunks, nil +} + +// parseSections splits daily file content into rawSection structs. +func parseSections(content string) []rawSection { + headerRe := regexp.MustCompile(`^## (\d{2}:\d{2}:\d{2})`) + userTagRe := regexp.MustCompile(`^\[User: (.+)\]$`) + + var sections []rawSection + var cur *rawSection - // Flush last chunk. - if text := strings.TrimSpace(currentChunk.String()); text != "" { - meta := map[string]string{"source": base} - if currentUserID != "" { - meta["user_id"] = currentUserID + for _, line := range strings.Split(content, "\n") { + if m := headerRe.FindStringSubmatch(line); m != nil { + if cur != nil && len(cur.lines) > 0 { + sections = append(sections, *cur) + } + cur = &rawSection{timestamp: m[1]} + continue } - chunks = append(chunks, MemoryEntry{ - Text: datePrefix + "\n" + text, - Metadata: meta, - }) + if cur == nil { + continue + } + if m := userTagRe.FindStringSubmatch(line); m != nil { + cur.userID = strings.ToLower(strings.TrimSpace(m[1])) + continue + } + trimmed := strings.TrimSpace(line) + if trimmed != "" { + cur.lines = append(cur.lines, trimmed) + } + } + if cur != nil && len(cur.lines) > 0 { + sections = append(sections, *cur) } + return sections +} - return chunks, nil +func convertLine(line string) string { + if after, ok := strings.CutPrefix(line, "- **User**: "); ok { + return "User: " + after + } + if after, ok := strings.CutPrefix(line, "- **Robot**: "); ok { + return "Robot: " + after + } + if after, ok := strings.CutPrefix(line, "- "); ok { + return after + } + return line } // BuildIndex populates the given MemoryIndex from recent daily markdown files. diff --git a/internal/memory/indexer_test.go b/internal/memory/indexer_test.go index 13510d6fa..7bb18639f 100644 --- a/internal/memory/indexer_test.go +++ b/internal/memory/indexer_test.go @@ -43,14 +43,14 @@ func TestParseDailyFile(t *testing.T) { chunks, err := ParseDailyFile(path) require.NoError(t, err) - require.Len(t, chunks, 2) + require.Len(t, chunks, 2, "different users should produce separate chunks") require.Contains(t, chunks[0].Text, "[Date: 2026-06-03]") - require.Contains(t, chunks[0].Text, "Hello, how are you?") + require.Contains(t, chunks[0].Text, "[14:18:28] User: Hello, how are you?") require.Equal(t, "alice", chunks[0].Metadata["user_id"]) require.Equal(t, "2026-06-03.md", chunks[0].Metadata["source"]) - require.Contains(t, chunks[1].Text, "What is your name?") + require.Contains(t, chunks[1].Text, "[14:19:00] User: What is your name?") require.Equal(t, "bob", chunks[1].Metadata["user_id"]) } @@ -79,6 +79,68 @@ func TestParseDailyFile_SingleChunk(t *testing.T) { require.Equal(t, "dave", chunks[0].Metadata["user_id"]) } +func TestParseDailyFile_SameUserMerge(t *testing.T) { + dir := t.TempDir() + content := ` +## 14:00:00 +[User: alice] +- **User**: First message + +## 14:01:00 +[User: alice] +- **User**: Second message + +## 14:02:00 +[User: bob] +- **User**: Bob here + +## 14:03:00 +[User: alice] +- **User**: Alice again +` + path := filepath.Join(dir, "2026-06-05.md") + require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) + + chunks, err := ParseDailyFile(path) + require.NoError(t, err) + require.Len(t, chunks, 3, "alice(2) + bob(1) + alice(1) = 3 chunks") + + require.Contains(t, chunks[0].Text, "[14:00:00] User: First message") + require.Contains(t, chunks[0].Text, "[14:01:00] User: Second message") + require.Equal(t, "alice", chunks[0].Metadata["user_id"]) + + require.Equal(t, "bob", chunks[1].Metadata["user_id"]) + + require.Contains(t, chunks[2].Text, "[14:03:00] User: Alice again") + require.Equal(t, "alice", chunks[2].Metadata["user_id"]) +} + +func TestParseDailyFile_WithRobotReply(t *testing.T) { + dir := t.TempDir() + content := ` +## 14:00:00 +[User: alice] +- **User**: Hello +- **Robot**: Hi there! + +## 14:01:00 +[User: alice] +- **User**: What's your name? +- **Robot**: I'm OM1! +` + path := filepath.Join(dir, "2026-06-06.md") + require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) + + chunks, err := ParseDailyFile(path) + require.NoError(t, err) + require.Len(t, chunks, 1, "same user alice → merged into 1 chunk") + + require.Contains(t, chunks[0].Text, "[14:00:00] User: Hello") + require.Contains(t, chunks[0].Text, "[14:00:00] Robot: Hi there!") + require.Contains(t, chunks[0].Text, "[14:01:00] User: What's your name?") + require.Contains(t, chunks[0].Text, "[14:01:00] Robot: I'm OM1!") +} + func TestMemoryIndex_AddChunkAndSearch(t *testing.T) { idx := NewMemoryIndex(&mockEmbedder{dim: 32}, testLogger()) diff --git a/internal/memory/manager.go b/internal/memory/manager.go index 20f05d62f..73b8e0516 100644 --- a/internal/memory/manager.go +++ b/internal/memory/manager.go @@ -84,22 +84,27 @@ func (m *Manager) SearchAndFormat(ctx context.Context, query string, uuid string return m.reader.FormatContext(results, 0, uuid) } -// RecordInteraction writes the user message to the daily log and hot-updates the index. -func (m *Manager) RecordInteraction(ctx context.Context, voiceInput, uuid, name string) { +// RecordInteraction writes the user message and robot reply to the daily log and hot-updates the index. +func (m *Manager) RecordInteraction(ctx context.Context, voiceInput, robotReply, uuid, name string) { if m.writer == nil { return } - m.writer.AppendInteraction(voiceInput, uuid, name) + m.writer.AppendInteraction(voiceInput, robotReply, uuid, name) if m.reader.IndexReady() { - m.writer.AppendToIndex(ctx, m.reader.Index(), voiceInput, uuid) + m.writer.AppendToIndex(ctx, m.reader.Index(), voiceInput, robotReply, uuid) } } -// MaybeSummarize triggers background summarization. +// MaybeSummarize triggers background summarization func (m *Manager) Summarize(ctx context.Context) { if m.summarizer != nil && m.summarizer.CheckEligibility() { go func() { m.summarizer.Run(ctx) + if err := m.reader.RebuildIndex(ctx); err != nil { + m.log.Warn("index rebuild failed, keeping old index", zap.Error(err)) + return + } + m.log.Info("index rebuilt after summarization", zap.Int("chunks", m.reader.Index().Size())) if err := m.reader.Index().SaveToDisk(m.indexDir); err != nil { m.log.Warn("failed to persist index", zap.Error(err)) } diff --git a/internal/memory/reader.go b/internal/memory/reader.go index 033234063..d30402bb8 100644 --- a/internal/memory/reader.go +++ b/internal/memory/reader.go @@ -68,6 +68,17 @@ func (r *Reader) IndexReady() bool { return r.indexReady } +// RebuildIndex rebuilds the index from daily files. On failure the old index is kept. +func (r *Reader) RebuildIndex(ctx context.Context) error { + newIdx := NewMemoryIndex(r.index.embedder, r.log) + if err := BuildIndex(ctx, newIdx, r.dailyDir, DefaultValidDurationDays); err != nil { + return err + } + r.index = newIdx + r.indexReady = true + return nil +} + // SearchDaily searches daily logs using hybrid retrieval (embedding + BM25). func (r *Reader) SearchDaily(ctx context.Context, queryText string, topK int, userID string) ([]MemoryEntry, error) { if strings.TrimSpace(queryText) == "" { diff --git a/internal/memory/writer.go b/internal/memory/writer.go index 320a42c85..e3e7f0ae3 100644 --- a/internal/memory/writer.go +++ b/internal/memory/writer.go @@ -56,7 +56,7 @@ func NewWriter(memoryRoot string, log *zap.Logger) (*Writer, error) { } // AppendInteraction writes a user message to today's daily log. -func (w *Writer) AppendInteraction(userMsg, uuid, name string) { +func (w *Writer) AppendInteraction(userMsg, robotReply, uuid, name string) { if strings.TrimSpace(userMsg) == "" { return } @@ -74,7 +74,11 @@ func (w *Writer) AppendInteraction(userMsg, uuid, name string) { tag = uuid } - entry := fmt.Sprintf("\n## %s\n[User: %s]\n- **User**: %s\n", ts, tag, strings.TrimSpace(userMsg)) + var sb strings.Builder + fmt.Fprintf(&sb, "\n## %s\n[User: %s]\n- **User**: %s\n", ts, tag, strings.TrimSpace(userMsg)) + if reply := strings.TrimSpace(robotReply); reply != "" { + fmt.Fprintf(&sb, "- **Robot**: %s\n", reply) + } f, err := os.OpenFile(dailyPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) if err != nil { @@ -83,13 +87,13 @@ func (w *Writer) AppendInteraction(userMsg, uuid, name string) { } defer func() { _ = f.Close() }() - if _, err := f.WriteString(entry); err != nil { + if _, err := f.WriteString(sb.String()); err != nil { w.log.Error("failed to write interaction", zap.Error(err)) } } // AppendToIndex embeds and inserts a new user message into the given index. -func (w *Writer) AppendToIndex(ctx context.Context, idx *MemoryIndex, userMsg, uuid string) { +func (w *Writer) AppendToIndex(ctx context.Context, idx *MemoryIndex, userMsg, robotReply, uuid string) { if strings.TrimSpace(userMsg) == "" || idx == nil { return } @@ -101,14 +105,16 @@ func (w *Writer) AppendToIndex(ctx context.Context, idx *MemoryIndex, userMsg, u if uuid != "" { tag = uuid } - userTag := fmt.Sprintf("[User: %s]\n", tag) - text := fmt.Sprintf("[Date: %s]\n## %s\n%s- **User**: %s", - dateStr, ts, userTag, strings.TrimSpace(userMsg)) + var sb strings.Builder + fmt.Fprintf(&sb, "[Date: %s]\n[User: %s]\n[%s] User: %s", dateStr, tag, ts, strings.TrimSpace(userMsg)) + if reply := strings.TrimSpace(robotReply); reply != "" { + fmt.Fprintf(&sb, "\n[%s] Robot: %s", ts, reply) + } meta := map[string]string{"source": dateStr + ".md", "user_id": uuid} - if _, err := idx.AddChunk(ctx, MemoryEntry{Text: text, Metadata: meta}); err != nil { + if _, err := idx.AddChunk(ctx, MemoryEntry{Text: sb.String(), Metadata: meta}); err != nil { w.log.Warn("write-through index failed", zap.Error(err)) } } diff --git a/internal/memory/writer_test.go b/internal/memory/writer_test.go index b29002674..4da481817 100644 --- a/internal/memory/writer_test.go +++ b/internal/memory/writer_test.go @@ -19,8 +19,8 @@ func TestWriter_AppendInteraction(t *testing.T) { w, err := NewWriter(dir, log) require.NoError(t, err) - w.AppendInteraction("Hello robot", testUUID, "alice") - w.AppendInteraction("What is your name?", testUUID, "alice") + w.AppendInteraction("Hello robot", "", testUUID, "alice") + w.AppendInteraction("What is your name?", "", testUUID, "alice") dailyPath := w.dailyPath() content, err := os.ReadFile(dailyPath) @@ -35,8 +35,8 @@ func TestWriter_AppendInteraction_EmptyMessage(t *testing.T) { w, err := NewWriter(dir, testLogger()) require.NoError(t, err) - w.AppendInteraction("", testUUID, "alice") - w.AppendInteraction(" ", testUUID, "alice") + w.AppendInteraction("", "", testUUID, "alice") + w.AppendInteraction(" ", "", testUUID, "alice") dailyPath := w.dailyPath() _, err = os.Stat(dailyPath) @@ -48,7 +48,7 @@ func TestWriter_AppendInteraction_UnknownUser(t *testing.T) { w, err := NewWriter(dir, testLogger()) require.NoError(t, err) - w.AppendInteraction("Hello", "", "") + w.AppendInteraction("Hello", "", "", "") content, err := os.ReadFile(w.dailyPath()) require.NoError(t, err) @@ -60,7 +60,7 @@ func TestWriter_EnsureUserDir(t *testing.T) { w, err := NewWriter(dir, testLogger()) require.NoError(t, err) - w.AppendInteraction("Hi", testUUID, "bob") + w.AppendInteraction("Hi", "", testUUID, "bob") // Profile should be created with UUID. profilePath := filepath.Join(dir, "users", testUUID, "profile.json") @@ -88,8 +88,8 @@ func TestWriter_UpdateUserProfile_VisitCount(t *testing.T) { require.NoError(t, err) // Multiple interactions in same session = 1 visit. - w.AppendInteraction("Hi", testUUID, "carol") - w.AppendInteraction("Bye", testUUID, "carol") + w.AppendInteraction("Hi", "", testUUID, "carol") + w.AppendInteraction("Bye", "", testUUID, "carol") profilePath := filepath.Join(dir, "users", testUUID, "profile.json") raw, err := os.ReadFile(profilePath) @@ -106,8 +106,8 @@ func TestWriter_MultipleNames(t *testing.T) { w, err := NewWriter(dir, testLogger()) require.NoError(t, err) - w.AppendInteraction("Hi", testUUID, "anon_73d0a4") - w.AppendInteraction("Hi again", testUUID, "sean") + w.AppendInteraction("Hi", "", testUUID, "anon_73d0a4") + w.AppendInteraction("Hi again", "", testUUID, "sean") profilePath := filepath.Join(dir, "users", testUUID, "profile.json") raw, err := os.ReadFile(profilePath) diff --git a/internal/runtime/runtime.go b/internal/runtime/runtime.go index b4bdd76f9..a1be87396 100644 --- a/internal/runtime/runtime.go +++ b/internal/runtime/runtime.go @@ -502,7 +502,7 @@ func (rt *Runtime) tick(ctx context.Context, current *modeState, tickStart time. if voice != nil && voice.Input != "" && voice.Tick == rt.ioProvider.TickCounter() { uuid, _ := rt.ioProvider.GetDynamicVar("current_user_id") name, _ := rt.ioProvider.GetDynamicVar("current_user_name") - current.memory.RecordInteraction(ctx, strings.TrimSpace(voice.Input), uuid, name) + current.memory.RecordInteraction(ctx, strings.TrimSpace(voice.Input), response.TextContent, uuid, name) } current.memory.Summarize(ctx) }