diff --git a/cmd/agentsview/main.go b/cmd/agentsview/main.go index 2f4e6195..a78c7d83 100644 --- a/cmd/agentsview/main.go +++ b/cmd/agentsview/main.go @@ -194,6 +194,7 @@ func runServe(args []string) { AgentDirs: cfg.AgentDirs, Machine: "local", BlockedResultCategories: cfg.ResultContentBlockedCategories, + CursorStateDB: cfg.CursorStateDB, }) if database.NeedsResync() { diff --git a/cmd/agentsview/sync.go b/cmd/agentsview/sync.go index 6bf8a197..92505043 100644 --- a/cmd/agentsview/sync.go +++ b/cmd/agentsview/sync.go @@ -93,8 +93,9 @@ func runSync(args []string) { cleanResyncTemp(appCfg.DBPath) engine := sync.NewEngine(database, sync.EngineConfig{ - AgentDirs: appCfg.AgentDirs, - Machine: "local", + AgentDirs: appCfg.AgentDirs, + Machine: "local", + CursorStateDB: appCfg.CursorStateDB, }) ctx := context.Background() diff --git a/internal/config/config.go b/internal/config/config.go index eb4bd35d..45ab9778 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -67,6 +67,13 @@ type Config struct { RemoteAccess bool `json:"remote_access"` WriteTimeout time.Duration `json:"-"` + // CursorStateDB is the path to Cursor's global + // state.vscdb SQLite database used as primary source + // for tool calls and rich session metadata. + // Set CURSOR_STATE_DB to override. Default: + // ~/.config/Cursor/User/globalStorage/state.vscdb + CursorStateDB string `json:"-"` + // AgentDirs maps each AgentType to its configured // directories. Single-dir agents store a one-element // slice; unconfigured agents use nil. @@ -130,11 +137,15 @@ func Default() (Config, error) { } return Config{ - Host: "127.0.0.1", - Port: 8080, - DataDir: dataDir, - DBPath: filepath.Join(dataDir, "sessions.db"), - WriteTimeout: 30 * time.Second, + Host: "127.0.0.1", + Port: 8080, + DataDir: dataDir, + DBPath: filepath.Join(dataDir, "sessions.db"), + WriteTimeout: 30 * time.Second, + CursorStateDB: filepath.Join( + home, + ".config/Cursor/User/globalStorage/state.vscdb", + ), AgentDirs: agentDirs, agentDirSource: agentDirSource, WatchExcludePatterns: []string{".git", "node_modules", "__pycache__", ".venv", "venv", "vendor", ".next"}, @@ -322,6 +333,9 @@ func (c *Config) loadEnv() { if v := os.Getenv("AGENT_VIEWER_DATA_DIR"); v != "" { c.DataDir = v } + if v := os.Getenv("CURSOR_STATE_DB"); v != "" { + c.CursorStateDB = v + } } type stringListFlag []string diff --git a/internal/parser/cursor_vscdb.go b/internal/parser/cursor_vscdb.go new file mode 100644 index 00000000..b91c9a86 --- /dev/null +++ b/internal/parser/cursor_vscdb.go @@ -0,0 +1,568 @@ +package parser + +import ( + "database/sql" + "encoding/json" + "fmt" + "log" + "net/url" + "os" + "path/filepath" + "strings" + "time" +) + +// CursorVscdbMeta is lightweight session metadata from state.vscdb, +// used by the sync engine to detect changes without parsing messages. +type CursorVscdbMeta struct { + SessionID string + VirtualPath string + FileMtime int64 // lastUpdatedAt in nanoseconds (millis * 1e6) + Project string + Name string + SubComposerIDs []string + CreatedAt int64 // unix millis + LastUpdatedAt int64 // unix millis +} + +// ListCursorVscdbSessions returns metadata for all Cursor sessions +// found in the global state.vscdb. Returns nil without error if the +// file does not exist. Project names are resolved by scanning the +// workspaceStorage directory adjacent to globalStorage. +func ListCursorVscdbSessions( + dbPath string, +) ([]CursorVscdbMeta, error) { + if _, err := os.Stat(dbPath); os.IsNotExist(err) { + return nil, nil + } + + db, err := openCursorVscdb(dbPath) + if err != nil { + return nil, err + } + defer db.Close() + + projects, err := loadCursorWorkspaceProjects(dbPath) + if err != nil { + log.Printf("cursor vscdb: loading workspace projects: %v", err) + // Non-fatal; sessions get "unknown" project. + } + + rows, err := db.Query( + "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%'", + ) + if err != nil { + return nil, fmt.Errorf( + "listing cursor vscdb sessions: %w", err, + ) + } + defer rows.Close() + + var metas []CursorVscdbMeta + for rows.Next() { + var key string + var rawVal []byte + if err := rows.Scan(&key, &rawVal); err != nil { + return nil, fmt.Errorf( + "scanning cursor vscdb row: %w", err, + ) + } + + sessionID, ok := strings.CutPrefix(key, "composerData:") + if !ok || sessionID == "" { + continue + } + + var cd cursorComposerData + if err := json.Unmarshal(rawVal, &cd); err != nil { + continue + } + + // Skip sessions with no conversation content. + if len(cd.FullConversationHeadersOnly) == 0 { + continue + } + + project := projects[sessionID] + if project == "" { + project = "unknown" + } + + subIDs := cd.SubComposerIDs + if len(cd.SubagentComposerIDs) > 0 { + subIDs = append(subIDs, cd.SubagentComposerIDs...) + } + + metas = append(metas, CursorVscdbMeta{ + SessionID: sessionID, + VirtualPath: dbPath + "#" + sessionID, + FileMtime: cd.LastUpdatedAt * 1_000_000, + Project: project, + Name: cd.Name, + SubComposerIDs: subIDs, + CreatedAt: cd.CreatedAt, + LastUpdatedAt: cd.LastUpdatedAt, + }) + } + return metas, rows.Err() +} + +// ParseCursorVscdbSession parses a single Cursor session from +// state.vscdb. Returns nil without error for empty sessions. +func ParseCursorVscdbSession( + dbPath, sessionID, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + if _, err := os.Stat(dbPath); os.IsNotExist(err) { + return nil, nil, fmt.Errorf( + "cursor vscdb not found: %s", dbPath, + ) + } + + db, err := openCursorVscdb(dbPath) + if err != nil { + return nil, nil, err + } + defer db.Close() + + // Load session metadata. + var rawVal []byte + err = db.QueryRow( + "SELECT value FROM cursorDiskKV WHERE key = ?", + "composerData:"+sessionID, + ).Scan(&rawVal) + if err == sql.ErrNoRows { + return nil, nil, nil + } + if err != nil { + return nil, nil, fmt.Errorf( + "loading cursor vscdb session %s: %w", + sessionID, err, + ) + } + + var cd cursorComposerData + if err := json.Unmarshal(rawVal, &cd); err != nil { + return nil, nil, fmt.Errorf( + "parsing cursor vscdb composerData %s: %w", + sessionID, err, + ) + } + + if len(cd.FullConversationHeadersOnly) == 0 { + return nil, nil, nil + } + + // Load all bubbles for this session. + bubbles, err := loadCursorBubbles(db, sessionID) + if err != nil { + return nil, nil, fmt.Errorf( + "loading cursor vscdb bubbles %s: %w", + sessionID, err, + ) + } + + msgs := buildCursorVscdbMessages( + cd.FullConversationHeadersOnly, bubbles, + ) + + if len(msgs) == 0 { + return nil, nil, nil + } + + firstMsg := "" + userCount := 0 + for _, m := range msgs { + if m.Role == RoleUser { + userCount++ + if firstMsg == "" && m.Content != "" { + firstMsg = truncate( + strings.ReplaceAll(m.Content, "\n", " "), + 300, + ) + } + } + } + + if userCount == 0 { + return nil, nil, nil + } + + startedAt := millisToTime(cd.CreatedAt) + endedAt := millisToTime(cd.LastUpdatedAt) + + if project == "" { + project = "unknown" + } + + sess := &ParsedSession{ + ID: "cursor:" + sessionID, + Project: project, + Machine: machine, + Agent: AgentCursor, + FirstMessage: firstMsg, + StartedAt: startedAt, + EndedAt: endedAt, + MessageCount: len(msgs), + UserMessageCount: userCount, + File: FileInfo{ + Path: dbPath + "#" + sessionID, + Mtime: cd.LastUpdatedAt * 1_000_000, + }, + } + + return sess, msgs, nil +} + +// cursorComposerData is the JSON structure stored under +// composerData: in the cursorDiskKV table. +type cursorComposerData struct { + ComposerID string `json:"composerId"` + Name string `json:"name"` + CreatedAt int64 `json:"createdAt"` + LastUpdatedAt int64 `json:"lastUpdatedAt"` + FullConversationHeadersOnly []cursorBubbleHeader `json:"fullConversationHeadersOnly"` + SubComposerIDs []string `json:"subComposerIds"` + SubagentComposerIDs []string `json:"subagentComposerIds"` + Status string `json:"status"` + UnifiedMode string `json:"unifiedMode"` +} + +// cursorBubbleHeader is one entry in fullConversationHeadersOnly. +type cursorBubbleHeader struct { + BubbleID string `json:"bubbleId"` + Type int `json:"type"` // 1=user, 2=assistant +} + +// cursorBubble is the JSON structure stored under +// bubbleId:: in cursorDiskKV. +type cursorBubble struct { + BubbleID string `json:"bubbleId"` + Type int `json:"type"` // 1=user, 2=assistant + Text string `json:"text"` + CreatedAt string `json:"createdAt"` // ISO 8601 string + ToolFormerData *cursorToolFormerData `json:"toolFormerData"` +} + +// cursorToolFormerData holds tool call information embedded in +// an assistant bubble. +type cursorToolFormerData struct { + Name string `json:"name"` + ToolCallID string `json:"toolCallId"` + Status string `json:"status"` + // Params and Result are JSON strings (not nested objects). + Params json.RawMessage `json:"params"` + Result json.RawMessage `json:"result"` +} + +func openCursorVscdb(dbPath string) (*sql.DB, error) { + dsn := dbPath + + "?mode=ro&_journal_mode=WAL&_busy_timeout=3000" + db, err := sql.Open("sqlite3", dsn) + if err != nil { + return nil, fmt.Errorf( + "opening cursor vscdb %s: %w", dbPath, err, + ) + } + return db, nil +} + +// loadCursorWorkspaceProjects scans workspaceStorage directories +// adjacent to globalStorage and returns a map of +// composerId → project name. +func loadCursorWorkspaceProjects( + globalDbPath string, +) (map[string]string, error) { + // globalStorage/state.vscdb → workspaceStorage/ + globalStorageDir := filepath.Dir(globalDbPath) + userDir := filepath.Dir(globalStorageDir) + wsDir := filepath.Join(userDir, "workspaceStorage") + + entries, err := os.ReadDir(wsDir) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf( + "reading workspaceStorage: %w", err, + ) + } + + projects := make(map[string]string) + for _, e := range entries { + if !e.IsDir() { + continue + } + dirPath := filepath.Join(wsDir, e.Name()) + project := extractWorkspaceProject(dirPath) + if project == "" { + continue + } + ids := extractWorkspaceComposerIDs(dirPath) + for _, id := range ids { + if id != "" { + projects[id] = project + } + } + } + return projects, nil +} + +// extractWorkspaceProject reads the project path from +// workspaceStorage//workspace.json. +func extractWorkspaceProject(dirPath string) string { + wjPath := filepath.Join(dirPath, "workspace.json") + data, err := os.ReadFile(wjPath) + if err != nil { + return "" + } + var wj struct { + Folder string `json:"folder"` + } + if err := json.Unmarshal(data, &wj); err != nil { + return "" + } + if wj.Folder == "" { + return "" + } + + // folder is a file:// URL, e.g. "file:///home/user/proj" + folderPath := wj.Folder + if strings.HasPrefix(folderPath, "file://") { + if u, err := url.Parse(folderPath); err == nil { + folderPath = u.Path + } + } + + return ExtractProjectFromCwd(folderPath) +} + +// extractWorkspaceComposerIDs reads composer IDs from +// workspaceStorage//state.vscdb ItemTable. +func extractWorkspaceComposerIDs(dirPath string) []string { + dbPath := filepath.Join(dirPath, "state.vscdb") + if _, err := os.Stat(dbPath); os.IsNotExist(err) { + return nil + } + + db, err := sql.Open( + "sqlite3", + dbPath+"?mode=ro&_busy_timeout=3000", + ) + if err != nil { + return nil + } + defer db.Close() + + var rawVal []byte + err = db.QueryRow( + "SELECT value FROM ItemTable WHERE key = 'composer.composerData'", + ).Scan(&rawVal) + if err != nil { + return nil + } + + var cd struct { + AllComposers []struct { + ComposerID string `json:"composerId"` + } `json:"allComposers"` + } + if err := json.Unmarshal(rawVal, &cd); err != nil { + return nil + } + + ids := make([]string, 0, len(cd.AllComposers)) + for _, c := range cd.AllComposers { + if c.ComposerID != "" { + ids = append(ids, c.ComposerID) + } + } + return ids +} + +// loadCursorBubbles fetches all bubble data for a session, +// keyed by bubble ID. +func loadCursorBubbles( + db *sql.DB, sessionID string, +) (map[string]cursorBubble, error) { + rows, err := db.Query( + "SELECT key, value FROM cursorDiskKV WHERE key LIKE ?", + "bubbleId:"+sessionID+":%", + ) + if err != nil { + return nil, err + } + defer rows.Close() + + bubbles := make(map[string]cursorBubble) + for rows.Next() { + var key string + var rawVal []byte + if err := rows.Scan(&key, &rawVal); err != nil { + return nil, err + } + + // key = "bubbleId::" + parts := strings.SplitN(key, ":", 3) + if len(parts) != 3 { + continue + } + bubbleID := parts[2] + + var b cursorBubble + if err := json.Unmarshal(rawVal, &b); err != nil { + continue + } + bubbles[bubbleID] = b + } + return bubbles, rows.Err() +} + +// buildCursorVscdbMessages reconstructs ParsedMessages from bubble +// headers and bubble data. Consecutive assistant bubbles (text + +// tool calls) are merged into a single assistant ParsedMessage. +func buildCursorVscdbMessages( + headers []cursorBubbleHeader, + bubbles map[string]cursorBubble, +) []ParsedMessage { + var msgs []ParsedMessage + ordinal := 0 + + // Tracks the current assistant message being assembled. + var curAsst *ParsedMessage + + flushAssistant := func() { + if curAsst == nil { + return + } + if strings.TrimSpace(curAsst.Content) != "" || + curAsst.HasToolUse { + msgs = append(msgs, *curAsst) + ordinal++ + } + curAsst = nil + } + + for _, h := range headers { + b, ok := bubbles[h.BubbleID] + if !ok { + continue + } + + switch h.Type { + case 1: // user + flushAssistant() + text := strings.TrimSpace(b.Text) + if text == "" { + continue + } + msgs = append(msgs, ParsedMessage{ + Ordinal: ordinal, + Role: RoleUser, + Content: text, + Timestamp: parseCursorBubbleTime(b.CreatedAt), + ContentLength: len(text), + }) + ordinal++ + + case 2: // assistant + isToolCall := b.ToolFormerData != nil && + b.ToolFormerData.Name != "" + + if curAsst == nil { + ts := parseCursorBubbleTime(b.CreatedAt) + curAsst = &ParsedMessage{ + Ordinal: ordinal, + Role: RoleAssistant, + Timestamp: ts, + } + } + + if isToolCall { + tc := buildCursorToolCall(b.ToolFormerData) + curAsst.ToolCalls = append( + curAsst.ToolCalls, tc, + ) + curAsst.HasToolUse = true + } else { + text := strings.TrimSpace(b.Text) + if text != "" { + if curAsst.Content != "" { + curAsst.Content += "\n" + } + curAsst.Content += text + } + } + } + } + + flushAssistant() + + // Update ContentLength on all messages. + for i := range msgs { + msgs[i].ContentLength = len(msgs[i].Content) + } + + return msgs +} + +// buildCursorToolCall converts a cursorToolFormerData into a +// ParsedToolCall using the vscdb tool name taxonomy. +func buildCursorToolCall( + tf *cursorToolFormerData, +) ParsedToolCall { + if tf == nil { + return ParsedToolCall{} + } + + inputJSON := "" + if len(tf.Params) > 0 { + // params may be a JSON string (needs unquoting) or + // a JSON object — normalize to object form. + inputJSON = normalizeCursorParamsJSON(tf.Params) + } + + return ParsedToolCall{ + ToolUseID: tf.ToolCallID, + ToolName: tf.Name, + Category: NormalizeToolCategory(tf.Name), + InputJSON: inputJSON, + } +} + +// normalizeCursorParamsJSON handles the case where params is +// stored as a JSON-encoded string (a string containing JSON) +// rather than a JSON object directly. +func normalizeCursorParamsJSON(raw json.RawMessage) string { + if len(raw) == 0 { + return "" + } + // If it's a JSON string, unwrap it. + if raw[0] == '"' { + var s string + if err := json.Unmarshal(raw, &s); err == nil { + return s + } + } + // Already a JSON object or array. + return string(raw) +} + +// parseCursorBubbleTime parses the ISO 8601 createdAt string +// used in Cursor bubbles. Returns zero time on parse failure. +func parseCursorBubbleTime(s string) time.Time { + if s == "" { + return time.Time{} + } + formats := []string{ + time.RFC3339Nano, + time.RFC3339, + "2006-01-02T15:04:05.999Z", + "2006-01-02T15:04:05Z", + } + for _, f := range formats { + if t, err := time.Parse(f, s); err == nil { + return t + } + } + return time.Time{} +} diff --git a/internal/parser/cursor_vscdb_test.go b/internal/parser/cursor_vscdb_test.go new file mode 100644 index 00000000..fafd96bd --- /dev/null +++ b/internal/parser/cursor_vscdb_test.go @@ -0,0 +1,498 @@ +package parser + +import ( + "database/sql" + "encoding/json" + "os" + "path/filepath" + "testing" + + _ "github.com/mattn/go-sqlite3" +) + +// createTestVscdb creates a minimal Cursor state.vscdb SQLite +// database at path with the cursorDiskKV table. +func createTestVscdb(t *testing.T, path string) *sql.DB { + t.Helper() + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + db, err := sql.Open("sqlite3", path) + if err != nil { + t.Fatalf("open vscdb: %v", err) + } + _, err = db.Exec(` + CREATE TABLE cursorDiskKV ( + key TEXT UNIQUE ON CONFLICT REPLACE, + value BLOB + ) + `) + if err != nil { + t.Fatalf("create table: %v", err) + } + return db +} + +// insertComposerData inserts a composerData entry. +func insertComposerData( + t *testing.T, db *sql.DB, + sessionID string, data cursorComposerData, +) { + t.Helper() + raw, err := json.Marshal(data) + if err != nil { + t.Fatalf("marshal composerData: %v", err) + } + _, err = db.Exec( + "INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)", + "composerData:"+sessionID, raw, + ) + if err != nil { + t.Fatalf("insert composerData: %v", err) + } +} + +// insertBubble inserts a bubbleId entry. +func insertBubble( + t *testing.T, db *sql.DB, + sessionID, bubbleID string, bubble cursorBubble, +) { + t.Helper() + raw, err := json.Marshal(bubble) + if err != nil { + t.Fatalf("marshal bubble: %v", err) + } + _, err = db.Exec( + "INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)", + "bubbleId:"+sessionID+":"+bubbleID, raw, + ) + if err != nil { + t.Fatalf("insert bubble: %v", err) + } +} + +func TestListCursorVscdbSessions_NonExistent(t *testing.T) { + metas, err := ListCursorVscdbSessions( + "/nonexistent/state.vscdb", + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if metas != nil { + t.Errorf("expected nil for nonexistent db, got %v", metas) + } +} + +func TestListCursorVscdbSessions_Empty(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "state.vscdb") + db := createTestVscdb(t, dbPath) + db.Close() + + metas, err := ListCursorVscdbSessions(dbPath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(metas) != 0 { + t.Errorf("expected 0 metas, got %d", len(metas)) + } +} + +func TestListCursorVscdbSessions_SkipsEmpty(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "state.vscdb") + db := createTestVscdb(t, dbPath) + defer db.Close() + + // Session with no headers — should be skipped. + insertComposerData(t, db, "session-empty", cursorComposerData{ + ComposerID: "session-empty", + CreatedAt: 1000000, + LastUpdatedAt: 2000000, + FullConversationHeadersOnly: nil, + }) + + // Session with headers — should appear. + insertComposerData(t, db, "session-ok", cursorComposerData{ + ComposerID: "session-ok", + Name: "Test session", + CreatedAt: 1000000, + LastUpdatedAt: 2000000, + FullConversationHeadersOnly: []cursorBubbleHeader{ + {BubbleID: "b1", Type: 1}, + }, + }) + + db.Close() + + metas, err := ListCursorVscdbSessions(dbPath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(metas) != 1 { + t.Errorf("expected 1 meta, got %d", len(metas)) + } + if metas[0].SessionID != "session-ok" { + t.Errorf("got session %q, want session-ok", metas[0].SessionID) + } + if metas[0].Name != "Test session" { + t.Errorf("got name %q, want 'Test session'", metas[0].Name) + } + if metas[0].FileMtime != 2000000*1_000_000 { + t.Errorf( + "FileMtime = %d, want %d", + metas[0].FileMtime, 2000000*1_000_000, + ) + } + if metas[0].VirtualPath != dbPath+"#session-ok" { + t.Errorf( + "VirtualPath = %q, want %q", + metas[0].VirtualPath, dbPath+"#session-ok", + ) + } +} + +func TestListCursorVscdbSessions_SubComposerIDs(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "state.vscdb") + db := createTestVscdb(t, dbPath) + + insertComposerData(t, db, "parent-session", cursorComposerData{ + ComposerID: "parent-session", + CreatedAt: 1000000, + LastUpdatedAt: 2000000, + SubComposerIDs: []string{"child-1", "child-2"}, + FullConversationHeadersOnly: []cursorBubbleHeader{ + {BubbleID: "b1", Type: 1}, + }, + }) + db.Close() + + metas, err := ListCursorVscdbSessions(dbPath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(metas) != 1 { + t.Fatalf("expected 1 meta, got %d", len(metas)) + } + if len(metas[0].SubComposerIDs) != 2 { + t.Errorf( + "SubComposerIDs len = %d, want 2", + len(metas[0].SubComposerIDs), + ) + } +} + +func TestParseCursorVscdbSession_NonExistent(t *testing.T) { + sess, msgs, err := ParseCursorVscdbSession( + "/nonexistent/state.vscdb", + "some-id", "myproject", "local", + ) + if err == nil { + t.Fatal("expected error for nonexistent db") + } + if sess != nil || msgs != nil { + t.Error("expected nil session and messages") + } +} + +func TestParseCursorVscdbSession_BasicTextOnly(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "state.vscdb") + db := createTestVscdb(t, dbPath) + + sessionID := "test-session-1" + bubble1 := "bubble-user-1" + bubble2 := "bubble-asst-1" + + insertComposerData(t, db, sessionID, cursorComposerData{ + ComposerID: sessionID, + Name: "My test session", + CreatedAt: 1000000, + LastUpdatedAt: 2000000, + FullConversationHeadersOnly: []cursorBubbleHeader{ + {BubbleID: bubble1, Type: 1}, + {BubbleID: bubble2, Type: 2}, + }, + }) + + insertBubble(t, db, sessionID, bubble1, cursorBubble{ + BubbleID: bubble1, + Type: 1, + Text: "Hello, can you help me?", + CreatedAt: "2025-01-01T10:00:00.000Z", + }) + insertBubble(t, db, sessionID, bubble2, cursorBubble{ + BubbleID: bubble2, + Type: 2, + Text: "Of course! What do you need?", + CreatedAt: "2025-01-01T10:00:01.000Z", + }) + + db.Close() + + sess, msgs, err := ParseCursorVscdbSession( + dbPath, sessionID, "myproject", "local", + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if sess == nil { + t.Fatal("expected non-nil session") + } + + assertEq(t, "ID", sess.ID, "cursor:"+sessionID) + assertEq(t, "Project", sess.Project, "myproject") + assertEq(t, "Machine", sess.Machine, "local") + assertEq(t, "Agent", string(sess.Agent), "cursor") + assertEq(t, "MessageCount", sess.MessageCount, 2) + assertEq(t, "UserMessageCount", sess.UserMessageCount, 1) + if sess.FirstMessage == "" { + t.Error("expected non-empty FirstMessage") + } + + if len(msgs) != 2 { + t.Fatalf("expected 2 messages, got %d", len(msgs)) + } + assertEq(t, "msgs[0].Role", string(msgs[0].Role), "user") + assertEq(t, "msgs[0].Content", msgs[0].Content, "Hello, can you help me?") + assertEq(t, "msgs[1].Role", string(msgs[1].Role), "assistant") + assertEq(t, "msgs[1].Content", msgs[1].Content, "Of course! What do you need?") +} + +func TestParseCursorVscdbSession_WithToolCall(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "state.vscdb") + db := createTestVscdb(t, dbPath) + + sessionID := "tool-session" + b1 := "b-user" + b2 := "b-tool" + b3 := "b-text" + + params := json.RawMessage(`{"pattern":"foo","path":"/src"}`) + + insertComposerData(t, db, sessionID, cursorComposerData{ + ComposerID: sessionID, + CreatedAt: 1000000, + LastUpdatedAt: 2000000, + FullConversationHeadersOnly: []cursorBubbleHeader{ + {BubbleID: b1, Type: 1}, + {BubbleID: b2, Type: 2}, + {BubbleID: b3, Type: 2}, + }, + }) + + insertBubble(t, db, sessionID, b1, cursorBubble{ + BubbleID: b1, + Type: 1, + Text: "Search for foo in /src", + CreatedAt: "2025-01-01T10:00:00.000Z", + }) + insertBubble(t, db, sessionID, b2, cursorBubble{ + BubbleID: b2, + Type: 2, + CreatedAt: "2025-01-01T10:00:01.000Z", + ToolFormerData: &cursorToolFormerData{ + Name: "grep", + ToolCallID: "call-001", + Status: "completed", + Params: params, + }, + }) + insertBubble(t, db, sessionID, b3, cursorBubble{ + BubbleID: b3, + Type: 2, + Text: "Found 3 matches.", + CreatedAt: "2025-01-01T10:00:02.000Z", + }) + + db.Close() + + sess, msgs, err := ParseCursorVscdbSession( + dbPath, sessionID, "myproject", "local", + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if sess == nil { + t.Fatal("expected non-nil session") + } + // User message + one merged assistant message. + if len(msgs) != 2 { + t.Fatalf("expected 2 messages, got %d", len(msgs)) + } + + asstMsg := msgs[1] + assertEq(t, "asstMsg.Role", string(asstMsg.Role), "assistant") + assertEq(t, "asstMsg.HasToolUse", asstMsg.HasToolUse, true) + assertEq(t, "asstMsg.Content", asstMsg.Content, "Found 3 matches.") + if len(asstMsg.ToolCalls) != 1 { + t.Fatalf( + "expected 1 tool call, got %d", + len(asstMsg.ToolCalls), + ) + } + tc := asstMsg.ToolCalls[0] + assertEq(t, "tc.ToolName", tc.ToolName, "grep") + assertEq(t, "tc.Category", tc.Category, "Grep") + assertEq(t, "tc.ToolUseID", tc.ToolUseID, "call-001") + if tc.InputJSON == "" { + t.Error("expected non-empty InputJSON") + } +} + +func TestParseCursorVscdbSession_EmptySession(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "state.vscdb") + db := createTestVscdb(t, dbPath) + + // Session with headers but no matching bubble data. + insertComposerData(t, db, "empty-session", cursorComposerData{ + ComposerID: "empty-session", + CreatedAt: 1000000, + LastUpdatedAt: 2000000, + FullConversationHeadersOnly: []cursorBubbleHeader{ + {BubbleID: "missing-bubble", Type: 1}, + }, + }) + db.Close() + + sess, msgs, err := ParseCursorVscdbSession( + dbPath, "empty-session", "proj", "local", + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if sess != nil { + t.Errorf("expected nil session for empty content, got %+v", sess) + } + if msgs != nil { + t.Errorf("expected nil messages, got %v", msgs) + } +} + +func TestNormalizeCursorVscdbTool(t *testing.T) { + tests := []struct { + name string + want string + }{ + {"run_terminal_command_v2", "Bash"}, + {"run_terminal_cmd", "Bash"}, + {"read_file_v2", "Read"}, + {"edit_file_v2", "Edit"}, + {"search_replace", "Edit"}, + {"apply_patch", "Edit"}, + {"ripgrep_raw_search", "Grep"}, + {"rg", "Grep"}, + {"glob_file_search", "Glob"}, + {"file_search", "Glob"}, + {"task_v2", "Task"}, + {"delete_file", "Write"}, + {"list_dir_v2", "Read"}, + {"list_dir", "Read"}, + {"read_lints", "Read"}, + {"todo_write", "Tool"}, + {"create_plan", "Tool"}, + {"ask_question", "Tool"}, + {"switch_mode", "Tool"}, + {"codebase_search", "Tool"}, + {"semantic_search_full", "Tool"}, + {"web_search", "Tool"}, + {"web_fetch", "Tool"}, + {"mcp-github", "Tool"}, + {"mcp-linear-search", "Tool"}, + {"grep", "Grep"}, + {"shell", "Bash"}, + {"unknown_tool_xyz", "Other"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := NormalizeToolCategory(tt.name) + if got != tt.want { + t.Errorf( + "NormalizeToolCategory(%q) = %q, want %q", + tt.name, got, tt.want, + ) + } + }) + } +} + +func TestBuildCursorVscdbMessages_GroupsConsecutiveAssistant(t *testing.T) { + headers := []cursorBubbleHeader{ + {BubbleID: "u1", Type: 1}, + {BubbleID: "a1", Type: 2}, // tool call + {BubbleID: "a2", Type: 2}, // text + {BubbleID: "u2", Type: 1}, + {BubbleID: "a3", Type: 2}, // text + } + params := json.RawMessage(`{"path":"/foo"}`) + bubbles := map[string]cursorBubble{ + "u1": {BubbleID: "u1", Type: 1, Text: "First question"}, + "a1": { + BubbleID: "a1", + Type: 2, + CreatedAt: "2025-01-01T10:00:00Z", + ToolFormerData: &cursorToolFormerData{ + Name: "read_file_v2", + Status: "completed", + Params: params, + }, + }, + "a2": {BubbleID: "a2", Type: 2, Text: "Here is the content."}, + "u2": {BubbleID: "u2", Type: 1, Text: "Second question"}, + "a3": {BubbleID: "a3", Type: 2, Text: "Another response."}, + } + + msgs := buildCursorVscdbMessages(headers, bubbles) + + // Expect: user, assistant(tool+text), user, assistant(text) + if len(msgs) != 4 { + t.Fatalf("expected 4 messages, got %d", len(msgs)) + } + + assertEq(t, "msgs[0].Role", string(msgs[0].Role), "user") + assertEq(t, "msgs[1].Role", string(msgs[1].Role), "assistant") + assertEq(t, "msgs[1].HasToolUse", msgs[1].HasToolUse, true) + assertEq(t, "msgs[1].Content", msgs[1].Content, "Here is the content.") + if len(msgs[1].ToolCalls) != 1 { + t.Errorf("expected 1 tool call, got %d", len(msgs[1].ToolCalls)) + } + assertEq(t, "msgs[2].Role", string(msgs[2].Role), "user") + assertEq(t, "msgs[3].Role", string(msgs[3].Role), "assistant") + assertEq(t, "msgs[3].Content", msgs[3].Content, "Another response.") +} + +func TestParseCursorParamsJSON(t *testing.T) { + tests := []struct { + name string + input json.RawMessage + want string + }{ + { + name: "object", + input: json.RawMessage(`{"key":"value"}`), + want: `{"key":"value"}`, + }, + { + name: "string wrapping json", + input: json.RawMessage(`"{\"key\":\"value\"}"`), + want: `{"key":"value"}`, + }, + { + name: "empty", + input: json.RawMessage(nil), + want: "", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := normalizeCursorParamsJSON(tt.input) + if got != tt.want { + t.Errorf("got %q, want %q", got, tt.want) + } + }) + } +} diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index 2c0e2383..1f1311e1 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -538,12 +538,46 @@ func DiscoverCursorSessions( // Collect valid transcripts, deduping by basename // stem. When both .jsonl and .txt exist for the // same session, prefer .jsonl. + // + // Two formats are supported: + // Old: agent-transcripts/.{txt,jsonl} + // New: agent-transcripts//.{txt,jsonl} seen := make(map[string]string) // stem -> path + addToSeen := func(stem, fullPath string) { + if prev, ok := seen[stem]; ok { + if strings.HasSuffix(prev, ".txt") && + strings.HasSuffix(fullPath, ".jsonl") { + seen[stem] = fullPath + } + return + } + seen[stem] = fullPath + } for _, sf := range transcripts { + name := sf.Name() if sf.IsDir() { + // New format: /.{jsonl,txt} + if !IsValidSessionID(name) { + continue + } + subDir := filepath.Join(transcriptsDir, name) + resolvedSub, err := filepath.EvalSymlinks(subDir) + if err != nil || !isContainedIn(resolvedSub, resolvedRoot) { + continue + } + for _, ext := range []string{".jsonl", ".txt"} { + candidate := filepath.Join(subDir, name+ext) + if !IsRegularFile(candidate) { + continue + } + resolvedFile, err := filepath.EvalSymlinks(candidate) + if err != nil || !isContainedIn(resolvedFile, resolvedRoot) { + continue + } + addToSeen(name, candidate) + } continue } - name := sf.Name() if !IsCursorTranscriptExt(name) { continue } @@ -556,15 +590,7 @@ func DiscoverCursorSessions( stem := strings.TrimSuffix( name, filepath.Ext(name), ) - if prev, ok := seen[stem]; ok { - // .jsonl wins over .txt - if strings.HasSuffix(prev, ".txt") && - strings.HasSuffix(name, ".jsonl") { - seen[stem] = fullPath - } - continue - } - seen[stem] = fullPath + addToSeen(stem, fullPath) } for _, path := range seen { files = append(files, DiscoveredFile{ @@ -606,28 +632,28 @@ func FindCursorSourceFile( if !entry.IsDir() { continue } - candidate := filepath.Join( + // Old format: agent-transcripts/.{jsonl,txt} + flat := filepath.Join( projectsDir, entry.Name(), "agent-transcripts", target, ) - if !IsRegularFile(candidate) { - continue - } - resolved, err := filepath.EvalSymlinks( - candidate, - ) - if err != nil { - continue + if IsRegularFile(flat) { + resolved, err := filepath.EvalSymlinks(flat) + if err == nil && isContainedIn(resolved, resolvedRoot) { + return flat + } } - rel, err := filepath.Rel( - resolvedRoot, resolved, + // New format: agent-transcripts//.{jsonl,txt} + nested := filepath.Join( + projectsDir, entry.Name(), + "agent-transcripts", sessionID, target, ) - sep := string(filepath.Separator) - if err != nil || rel == ".." || - strings.HasPrefix(rel, ".."+sep) { - continue + if IsRegularFile(nested) { + resolved, err := filepath.EvalSymlinks(nested) + if err == nil && isContainedIn(resolved, resolvedRoot) { + return nested + } } - return candidate } } return "" diff --git a/internal/parser/discovery_test.go b/internal/parser/discovery_test.go index a880c8d8..5f748199 100644 --- a/internal/parser/discovery_test.go +++ b/internal/parser/discovery_test.go @@ -1167,6 +1167,40 @@ func TestDiscoverCursorSessions(t *testing.T) { }, wantCount: 0, }, + { + // New Cursor format: agent-transcripts//.jsonl + name: "NewFormatSubdirJsonl", + files: map[string]string{ + filepath.Join(cursorTranscripts, "5b84cf99-8f9f-4bbe-b07b-cbbce91a32b9", "5b84cf99-8f9f-4bbe-b07b-cbbce91a32b9.jsonl"): `{"role":"user"}`, + }, + wantCount: 1, + }, + { + // New format with .txt fallback + name: "NewFormatSubdirTxt", + files: map[string]string{ + filepath.Join(cursorTranscripts, "6dc705fb-a849-4ad5-a20b-e2ca975d0f22", "6dc705fb-a849-4ad5-a20b-e2ca975d0f22.txt"): "user:\nhi", + }, + wantCount: 1, + }, + { + // Mix of old and new format sessions in the same project + name: "MixedFormats", + files: map[string]string{ + filepath.Join(cursorTranscripts, "flat-session.txt"): "user:\nhi", + filepath.Join(cursorTranscripts, "75d6894d-4317-4d3d-8324-60daa434dff4", "75d6894d-4317-4d3d-8324-60daa434dff4.jsonl"): `{"role":"user"}`, + }, + wantCount: 2, + }, + { + // New format: when both .jsonl and .txt exist inside subdir, prefer .jsonl + name: "NewFormatSubdirDedupPrefersJsonl", + files: map[string]string{ + filepath.Join(cursorTranscripts, "a1b2c3d4-0000-0000-0000-000000000001", "a1b2c3d4-0000-0000-0000-000000000001.txt"): "user:\nold", + filepath.Join(cursorTranscripts, "a1b2c3d4-0000-0000-0000-000000000001", "a1b2c3d4-0000-0000-0000-000000000001.jsonl"): `{"role":"user"}`, + }, + wantCount: 1, + }, } for _, tt := range tests { @@ -1257,6 +1291,32 @@ func TestFindCursorSourceFile(t *testing.T) { } }) + t.Run("FindsNewFormatJsonl", func(t *testing.T) { + dir := t.TempDir() + setupFileSystem(t, dir, map[string]string{ + filepath.Join(cursorTranscripts, "sess4", "sess4.jsonl"): "{}", + }) + got := FindCursorSourceFile(dir, "sess4") + if got == "" { + t.Fatal("expected to find new-format .jsonl file") + } + if !strings.HasSuffix(got, ".jsonl") { + t.Errorf("expected .jsonl path, got %q", got) + } + }) + + t.Run("PrefersNewFormatJsonlOverFlatTxt", func(t *testing.T) { + dir := t.TempDir() + setupFileSystem(t, dir, map[string]string{ + filepath.Join(cursorTranscripts, "sess5.txt"): "old", + filepath.Join(cursorTranscripts, "sess5", "sess5.jsonl"): "new", + }) + got := FindCursorSourceFile(dir, "sess5") + if !strings.HasSuffix(got, ".jsonl") { + t.Errorf("expected .jsonl path, got %q", got) + } + }) + t.Run("NotFound", func(t *testing.T) { dir := t.TempDir() got := FindCursorSourceFile(dir, "nonexistent") diff --git a/internal/parser/taxonomy.go b/internal/parser/taxonomy.go index dd8d55a5..838927ff 100644 --- a/internal/parser/taxonomy.go +++ b/internal/parser/taxonomy.go @@ -1,5 +1,7 @@ package parser +import "strings" + // NormalizeToolCategory maps a raw tool name to a normalized // category. Categories: Read, Edit, Write, Bash, Grep, Glob, // Task, Tool, Other. @@ -65,7 +67,7 @@ func NormalizeToolCategory(rawName string) string { case "report_intent": return "Tool" - // Cursor tools + // Cursor (file-based JSONL) tools case "Shell": return "Bash" case "StrReplace": @@ -73,6 +75,32 @@ func NormalizeToolCategory(rawName string) string { case "LS": return "Read" + // Cursor vscdb tool names + // Note: "apply_patch" is handled above (Codex section). + // Note: "web_search", "web_fetch" handled below (OpenClaw). + case "run_terminal_command_v2", "run_terminal_cmd": + return "Bash" + case "read_file_v2": + return "Read" + case "edit_file_v2", "search_replace": + return "Edit" + case "ripgrep_raw_search", "rg": + return "Grep" + case "glob_file_search", "file_search": + return "Glob" + case "task_v2": + return "Task" + case "delete_file": + return "Write" + case "list_dir_v2", "list_dir": + return "Read" + case "read_lints": + return "Read" + case "todo_write", "create_plan", "ask_question", + "switch_mode", "codebase_search", + "semantic_search_full": + return "Tool" + // Amp tools (not already covered above) // Note: "create_file" is also used by Pi. case "create_file": @@ -113,6 +141,10 @@ func NormalizeToolCategory(rawName string) string { return "Task" default: + // MCP tool invocations are prefixed with "mcp-". + if strings.HasPrefix(rawName, "mcp-") { + return "Tool" + } return "Other" } } diff --git a/internal/sync/engine.go b/internal/sync/engine.go index c87a107a..ae354883 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -29,6 +29,10 @@ type EngineConfig struct { AgentDirs map[parser.AgentType][]string Machine string BlockedResultCategories []string + // CursorStateDB is the path to Cursor's global + // state.vscdb SQLite database. Empty string disables + // vscdb-based Cursor sync. + CursorStateDB string } // Engine orchestrates session file discovery and sync. @@ -37,6 +41,7 @@ type Engine struct { agentDirs map[parser.AgentType][]string machine string blockedResultCategories map[string]bool + cursorStateDB string syncMu gosync.Mutex // serializes all sync operations mu gosync.RWMutex lastSync time.Time @@ -48,6 +53,12 @@ type Engine struct { // retried when its mtime changes. skipMu gosync.RWMutex skipCache map[string]int64 + + // cursorVscdbSynced is the set of "cursor:" session + // IDs synced from vscdb in the current sync cycle. It is + // populated before file workers start and cleared after. + // Read-only during worker execution; no lock needed. + cursorVscdbSynced map[string]bool } // NewEngine creates a sync engine. It pre-populates the @@ -73,6 +84,7 @@ func NewEngine( agentDirs: dirs, machine: cfg.Machine, blockedResultCategories: blockedCategorySet(cfg.BlockedResultCategories), + cursorStateDB: cfg.CursorStateDB, skipCache: skipCache, } } @@ -347,23 +359,28 @@ func (e *Engine) classifyOnePath( } } - // Cursor: //agent-transcripts/.{txt,jsonl} + // Cursor old format: //agent-transcripts/.{txt,jsonl} + // Cursor new format: //agent-transcripts//.{txt,jsonl} for _, cursorDir := range e.agentDirs[parser.AgentCursor] { if cursorDir == "" { continue } if rel, ok := isUnder(cursorDir, path); ok { parts := strings.Split(rel, sep) - if len(parts) != 3 { - continue - } - if parts[1] != "agent-transcripts" { - continue - } - if !parser.IsCursorTranscriptExt(parts[2]) { + var projPart string + switch { + case len(parts) == 3 && + parts[1] == "agent-transcripts" && + parser.IsCursorTranscriptExt(parts[2]): + projPart = parts[0] + case len(parts) == 4 && + parts[1] == "agent-transcripts" && + parser.IsCursorTranscriptExt(parts[3]): + projPart = parts[0] + default: continue } - project := parser.DecodeCursorProjectDir(parts[0]) + project := parser.DecodeCursorProjectDir(projPart) if project == "" { project = "unknown" } @@ -860,11 +877,43 @@ func (e *Engine) syncAllLocked( }) } + // Sync Cursor vscdb sessions before file workers so that + // file-based cursor sync can skip already-handled IDs. + tCV := time.Now() + cvPending, cvSynced := e.syncCursorVscdb() + e.cursorVscdbSynced = cvSynced + cvCount := len(cvPending) + for _, pw := range cvPending { + e.writeSessionFull(pw) + } + if verbose && cvCount > 0 { + log.Printf( + "cursor vscdb write: %d sessions in %s", + cvCount, + time.Since(tCV).Round(time.Millisecond), + ) + } + if verbose { + log.Printf( + "cursor vscdb sync: %s", + time.Since(tCV).Round(time.Millisecond), + ) + } + tWorkers := time.Now() results := e.startWorkers(ctx, all) stats := e.collectAndBatch( ctx, results, len(all), onProgress, ) + // Clear vscdb synced set after workers complete. + e.cursorVscdbSynced = nil + + // Fold cursor vscdb stats into the combined stats. + if cvCount > 0 { + stats.TotalSessions += cvCount + stats.RecordSynced(cvCount) + } + if verbose { log.Printf( "file sync: %d synced, %d skipped in %s", @@ -1019,6 +1068,87 @@ func (e *Engine) syncOneOpenCode( return pending } +// syncCursorVscdb syncs sessions from Cursor's global state.vscdb. +// Returns pending writes and the set of synced session IDs (with +// "cursor:" prefix) so the file-based sync can skip duplicates. +func (e *Engine) syncCursorVscdb() ( + []pendingWrite, map[string]bool, +) { + dbPath := e.cursorStateDB + if dbPath == "" { + return nil, nil + } + + metas, err := parser.ListCursorVscdbSessions(dbPath) + if err != nil { + log.Printf("sync cursor vscdb: %v", err) + return nil, nil + } + if len(metas) == 0 { + return nil, nil + } + + // Build child→parent map from subComposerIds. + childToParent := make(map[string]string) + for _, m := range metas { + for _, childID := range m.SubComposerIDs { + if childID != "" { + childToParent[childID] = m.SessionID + } + } + } + + syncedIDs := make(map[string]bool, len(metas)) + + var changed []parser.CursorVscdbMeta + for _, m := range metas { + _, storedMtime, ok := + e.db.GetFileInfoByPath(m.VirtualPath) + if ok && storedMtime == m.FileMtime { + // Unchanged: still mark as synced to suppress + // file-based sync overwriting with text-only data. + syncedIDs["cursor:"+m.SessionID] = true + continue + } + changed = append(changed, m) + } + + if len(changed) == 0 { + return nil, syncedIDs + } + + var pending []pendingWrite + for _, m := range changed { + sess, msgs, err := parser.ParseCursorVscdbSession( + dbPath, m.SessionID, m.Project, e.machine, + ) + if err != nil { + log.Printf( + "cursor vscdb session %s: %v", + m.SessionID, err, + ) + continue + } + if sess == nil { + continue + } + + // Wire up parent-child relationship. + if parentID, ok := childToParent[m.SessionID]; ok { + sess.ParentSessionID = "cursor:" + parentID + sess.RelationshipType = parser.RelSubagent + } + + syncedIDs["cursor:"+m.SessionID] = true + pending = append(pending, pendingWrite{ + sess: *sess, + msgs: msgs, + }) + } + + return pending, syncedIDs +} + // startWorkers fans out file processing across a worker pool // and returns a channel of results. When ctx is cancelled, // workers skip remaining jobs with a context error instead @@ -1682,6 +1812,11 @@ func (e *Engine) processCursor( sessionID := parser.CursorSessionID(file.Path) + // Skip if already synced from vscdb (richer data source). + if e.cursorVscdbSynced[sessionID] { + return processResult{skip: true} + } + if e.shouldSkipFile(sessionID, info) { return processResult{skip: true} } @@ -2146,11 +2281,17 @@ func (e *Engine) SyncSingleSession(sessionID string) error { file.Project = filepath.Base(filepath.Dir(path)) } case parser.AgentCursor: - // path is //agent-transcripts/.txt - // Extract project dir name from two levels up - projDir := filepath.Base( - filepath.Dir(filepath.Dir(path)), - ) + // Old format: //agent-transcripts/.txt + // New format: //agent-transcripts//.jsonl + // Detect by checking whether the immediate parent of the + // file's directory is "agent-transcripts". + dir := filepath.Dir(path) + var projDir string + if filepath.Base(dir) == "agent-transcripts" { + projDir = filepath.Base(filepath.Dir(dir)) + } else { + projDir = filepath.Base(filepath.Dir(filepath.Dir(dir))) + } file.Project = parser.DecodeCursorProjectDir(projDir) case parser.AgentIflow: // path is //session-.jsonl diff --git a/internal/sync/engine_integration_test.go b/internal/sync/engine_integration_test.go index e4c61e30..09ba29d4 100644 --- a/internal/sync/engine_integration_test.go +++ b/internal/sync/engine_integration_test.go @@ -3,6 +3,7 @@ package sync_test import ( "context" "database/sql" + "encoding/json" "fmt" "os" "path/filepath" @@ -1751,6 +1752,53 @@ func TestSyncEngineMultiCursorDir(t *testing.T) { } } +// TestSyncEngineCursorNewFormat verifies that sessions stored in +// Cursor's newer /.jsonl directory layout are +// discovered and synced correctly. +func TestSyncEngineCursorNewFormat(t *testing.T) { + env := setupTestEnv(t) + const uuid = "5b84cf99-8f9f-4bbe-b07b-cbbce91a32b9" + + transcript := "user:\nHello from new format\nassistant:\nHi!\n" + + // New format: //agent-transcripts//.txt + path := env.writeCursorSession( + t, env.cursorDir, + "Users-alice-code-proj", + filepath.Join(uuid, uuid+".txt"), + transcript, + ) + + runSyncAndAssert(t, env.engine, sync.SyncStats{ + TotalSessions: 1, Synced: 1, Skipped: 0, + }) + + sessionID := "cursor:" + uuid + assertSessionState(t, env.db, sessionID, + func(sess *db.Session) { + if sess.Agent != "cursor" { + t.Errorf("agent = %q, want cursor", sess.Agent) + } + if sess.Project != "proj" { + t.Errorf("project = %q, want proj", sess.Project) + } + }, + ) + assertSessionMessageCount(t, env.db, sessionID, 2) + + // SyncPaths must also handle the new-format path. + updated := transcript + "user:\nFollowup\nassistant:\nYep.\n" + os.WriteFile(path, []byte(updated), 0o644) + env.engine.SyncPaths([]string{path}) + assertSessionMessageCount(t, env.db, sessionID, 4) + + // FindSourceFile should resolve the new-format path. + src := env.engine.FindSourceFile(sessionID) + if src == "" { + t.Error("FindSourceFile returned empty for new-format session") + } +} + func TestSyncForkDetection(t *testing.T) { env := setupTestEnv(t) @@ -3202,3 +3250,476 @@ func TestSyncSingleSessionOpenCodeExcludedIsNoOp( ) } } + +// --- Cursor vscdb integration tests --- + +// createCursorVscdbHelper creates a minimal Cursor state.vscdb at +// the given path and returns a helper for inserting test data. +func createCursorVscdbHelper( + t *testing.T, dbPath string, +) *cursorVscdbHelper { + t.Helper() + if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + d, err := sql.Open("sqlite3", dbPath) + if err != nil { + t.Fatalf("opening cursor vscdb: %v", err) + } + t.Cleanup(func() { d.Close() }) + + if _, err := d.Exec(` + CREATE TABLE cursorDiskKV ( + key TEXT UNIQUE ON CONFLICT REPLACE, + value BLOB + ) + `); err != nil { + t.Fatalf("creating vscdb schema: %v", err) + } + return &cursorVscdbHelper{db: d, path: dbPath} +} + +type cursorVscdbHelper struct { + db *sql.DB + path string +} + +func (h *cursorVscdbHelper) addSession( + t *testing.T, + sessionID, name string, + createdAt, lastUpdatedAt int64, + bubbles []string, // ordered bubble IDs +) { + t.Helper() + headers := make([]map[string]any, 0, len(bubbles)) + for _, bid := range bubbles { + // type 1 for odd positions, 2 for even (alternating) + btype := 1 + if len(headers)%2 != 0 { + btype = 2 + } + headers = append(headers, map[string]any{ + "bubbleId": bid, + "type": btype, + }) + } + data := map[string]any{ + "composerId": sessionID, + "name": name, + "createdAt": createdAt, + "lastUpdatedAt": lastUpdatedAt, + "fullConversationHeadersOnly": headers, + } + raw, err := json.Marshal(data) + if err != nil { + t.Fatalf("marshal composerData: %v", err) + } + if _, err := h.db.Exec( + "INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)", + "composerData:"+sessionID, raw, + ); err != nil { + t.Fatalf("insert composerData: %v", err) + } +} + +func (h *cursorVscdbHelper) addSessionWithSubComposers( + t *testing.T, + sessionID, name string, + createdAt, lastUpdatedAt int64, + bubbles []string, + subIDs []string, +) { + t.Helper() + headers := make([]map[string]any, 0, len(bubbles)) + for _, bid := range bubbles { + btype := 1 + if len(headers)%2 != 0 { + btype = 2 + } + headers = append(headers, map[string]any{ + "bubbleId": bid, + "type": btype, + }) + } + data := map[string]any{ + "composerId": sessionID, + "name": name, + "createdAt": createdAt, + "lastUpdatedAt": lastUpdatedAt, + "fullConversationHeadersOnly": headers, + "subComposerIds": subIDs, + } + raw, err := json.Marshal(data) + if err != nil { + t.Fatalf("marshal composerData: %v", err) + } + if _, err := h.db.Exec( + "INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)", + "composerData:"+sessionID, raw, + ); err != nil { + t.Fatalf("insert composerData: %v", err) + } +} + +func (h *cursorVscdbHelper) addUserBubble( + t *testing.T, sessionID, bubbleID, text string, +) { + t.Helper() + data := map[string]any{ + "bubbleId": bubbleID, + "type": 1, + "text": text, + "createdAt": "2024-01-01T10:00:00.000Z", + } + raw, _ := json.Marshal(data) + if _, err := h.db.Exec( + "INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)", + "bubbleId:"+sessionID+":"+bubbleID, raw, + ); err != nil { + t.Fatalf("insert user bubble: %v", err) + } +} + +func (h *cursorVscdbHelper) addAssistantBubble( + t *testing.T, sessionID, bubbleID, text string, +) { + t.Helper() + data := map[string]any{ + "bubbleId": bubbleID, + "type": 2, + "text": text, + "createdAt": "2024-01-01T10:00:01.000Z", + } + raw, _ := json.Marshal(data) + if _, err := h.db.Exec( + "INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)", + "bubbleId:"+sessionID+":"+bubbleID, raw, + ); err != nil { + t.Fatalf("insert assistant bubble: %v", err) + } +} + +func (h *cursorVscdbHelper) addToolBubble( + t *testing.T, + sessionID, bubbleID, toolName, callID string, + params []byte, +) { + t.Helper() + data := map[string]any{ + "bubbleId": bubbleID, + "type": 2, + "createdAt": "2024-01-01T10:00:01.000Z", + "toolFormerData": map[string]any{ + "name": toolName, + "toolCallId": callID, + "status": "completed", + "params": json.RawMessage(params), + }, + } + raw, _ := json.Marshal(data) + if _, err := h.db.Exec( + "INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)", + "bubbleId:"+sessionID+":"+bubbleID, raw, + ); err != nil { + t.Fatalf("insert tool bubble: %v", err) + } +} + +func (h *cursorVscdbHelper) updateLastUpdatedAt( + t *testing.T, sessionID string, newTime int64, +) { + t.Helper() + var rawVal []byte + if err := h.db.QueryRow( + "SELECT value FROM cursorDiskKV WHERE key = ?", + "composerData:"+sessionID, + ).Scan(&rawVal); err != nil { + t.Fatalf("read composerData: %v", err) + } + var data map[string]any + if err := json.Unmarshal(rawVal, &data); err != nil { + t.Fatalf("unmarshal composerData: %v", err) + } + data["lastUpdatedAt"] = newTime + raw, _ := json.Marshal(data) + if _, err := h.db.Exec( + "INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)", + "composerData:"+sessionID, raw, + ); err != nil { + t.Fatalf("update composerData: %v", err) + } +} + +// TestSyncCursorVscdbBasic verifies that SyncAll discovers and +// stores Cursor sessions from state.vscdb. +func TestSyncCursorVscdbBasic(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + database := dbtest.OpenTestDB(t) + dir := t.TempDir() + dbPath := filepath.Join( + dir, "globalStorage", "state.vscdb", + ) + vscdb := createCursorVscdbHelper(t, dbPath) + + sessionID := "vscdb-session-001" + vscdb.addSession( + t, sessionID, "My Vscdb Session", + 1704067200000, 1704067205000, + []string{"b-user", "b-asst"}, + ) + vscdb.addUserBubble(t, sessionID, "b-user", "What is the answer?") + vscdb.addAssistantBubble(t, sessionID, "b-asst", "42.") + + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCursor: {t.TempDir()}, + }, + Machine: "local", + CursorStateDB: dbPath, + }) + + stats := engine.SyncAll(context.Background(), nil) + if stats.Synced < 1 { + t.Fatalf("Synced = %d, want >= 1", stats.Synced) + } + + agentviewID := "cursor:" + sessionID + assertSessionState(t, database, agentviewID, + func(sess *db.Session) { + if sess.Agent != "cursor" { + t.Errorf( + "agent = %q, want cursor", + sess.Agent, + ) + } + if sess.Project == "" { + t.Error("expected non-empty project") + } + }, + ) + assertSessionMessageCount(t, database, agentviewID, 2) +} + +// TestSyncCursorVscdbChangeDetection verifies that unchanged +// sessions are not re-parsed but updated ones are. +func TestSyncCursorVscdbChangeDetection(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + database := dbtest.OpenTestDB(t) + dir := t.TempDir() + dbPath := filepath.Join( + dir, "globalStorage", "state.vscdb", + ) + vscdb := createCursorVscdbHelper(t, dbPath) + + sessionID := "vscdb-change-001" + vscdb.addSession( + t, sessionID, "Change Test", + 1704067200000, 1704067205000, + []string{"b1", "b2"}, + ) + vscdb.addUserBubble(t, sessionID, "b1", "original question") + vscdb.addAssistantBubble(t, sessionID, "b2", "original answer") + + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCursor: {t.TempDir()}, + }, + Machine: "local", + CursorStateDB: dbPath, + }) + + // First sync. + stats1 := engine.SyncAll(context.Background(), nil) + if stats1.Synced < 1 { + t.Fatalf("first sync: Synced = %d, want >= 1", stats1.Synced) + } + + // Second sync with no changes: should not re-parse. + stats2 := engine.SyncAll(context.Background(), nil) + if stats2.Synced != 0 { + t.Errorf( + "second sync (no change): Synced = %d, want 0", + stats2.Synced, + ) + } + + // Update lastUpdatedAt and re-sync. + vscdb.updateLastUpdatedAt(t, sessionID, 1704067210000) + stats3 := engine.SyncAll(context.Background(), nil) + if stats3.Synced < 1 { + t.Fatalf( + "third sync (after update): Synced = %d, want >= 1", + stats3.Synced, + ) + } +} + +// TestSyncCursorVscdbDedup verifies that sessions present in +// vscdb are not overwritten by the file-based cursor sync. +func TestSyncCursorVscdbDedup(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + database := dbtest.OpenTestDB(t) + dir := t.TempDir() + dbPath := filepath.Join( + dir, "globalStorage", "state.vscdb", + ) + vscdb := createCursorVscdbHelper(t, dbPath) + + sessionID := "dedup-session-001" + vscdb.addSession( + t, sessionID, "Dedup Test", + 1704067200000, 1704067205000, + []string{"b-user", "b-tool", "b-asst"}, + ) + vscdb.addUserBubble(t, sessionID, "b-user", "Do something") + vscdb.addToolBubble( + t, sessionID, "b-tool", + "read_file_v2", "call-1", + []byte(`{"path":"/foo.txt"}`), + ) + vscdb.addAssistantBubble(t, sessionID, "b-asst", "Done.") + + // Create a cursor projects directory with a JSONL file + // for the same session (text-only, no tool calls). + cursorDir := t.TempDir() + jsonlDir := filepath.Join( + cursorDir, "myproject", "agent-transcripts", + sessionID, + ) + if err := os.MkdirAll(jsonlDir, 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + // Minimal JSONL with only text (no tool calls). + jsonlContent := `{"role":"user","message":{"content":[{"type":"text","text":"file-based text only"}]}}` + "\n" + if err := os.WriteFile( + filepath.Join(jsonlDir, sessionID+".jsonl"), + []byte(jsonlContent), 0o644, + ); err != nil { + t.Fatalf("write jsonl: %v", err) + } + + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCursor: {cursorDir}, + }, + Machine: "local", + CursorStateDB: dbPath, + }) + + engine.SyncAll(context.Background(), nil) + + agentviewID := "cursor:" + sessionID + // Session should be present (from vscdb). + assertSessionState(t, database, agentviewID, + func(sess *db.Session) { + if sess.Agent != "cursor" { + t.Errorf("agent = %q, want cursor", sess.Agent) + } + }, + ) + + // Should have messages with tool use from vscdb. + // The file-based JSONL should NOT have replaced the vscdb data. + msgs, err := database.GetMessages( + context.Background(), agentviewID, 0, 100, true, + ) + if err != nil { + t.Fatalf("GetMessages: %v", err) + } + hasToolUse := false + for _, m := range msgs { + if m.HasToolUse { + hasToolUse = true + break + } + } + if !hasToolUse { + t.Error( + "expected vscdb data (with tool call) to win over " + + "file-based text-only JSONL", + ) + } +} + +// TestSyncCursorVscdbSubagentLinking verifies that sessions +// with subComposerIds get parent-child relationships set. +func TestSyncCursorVscdbSubagentLinking(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + database := dbtest.OpenTestDB(t) + dir := t.TempDir() + dbPath := filepath.Join( + dir, "globalStorage", "state.vscdb", + ) + vscdb := createCursorVscdbHelper(t, dbPath) + + parentID := "parent-session-001" + childID := "child-session-001" + + vscdb.addSessionWithSubComposers( + t, parentID, "Parent Session", + 1704067200000, 1704067205000, + []string{"pb1", "pb2"}, + []string{childID}, + ) + vscdb.addUserBubble(t, parentID, "pb1", "parent question") + vscdb.addAssistantBubble(t, parentID, "pb2", "parent answer") + + vscdb.addSession( + t, childID, "Child Session", + 1704067201000, 1704067206000, + []string{"cb1", "cb2"}, + ) + vscdb.addUserBubble(t, childID, "cb1", "child question") + vscdb.addAssistantBubble(t, childID, "cb2", "child answer") + + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCursor: {t.TempDir()}, + }, + Machine: "local", + CursorStateDB: dbPath, + }) + + engine.SyncAll(context.Background(), nil) + + parentAvID := "cursor:" + parentID + childAvID := "cursor:" + childID + + assertSessionState(t, database, parentAvID, + func(sess *db.Session) { + if sess.ParentSessionID != nil { + t.Errorf( + "parent: ParentSessionID = %q, want nil", + *sess.ParentSessionID, + ) + } + }, + ) + assertSessionState(t, database, childAvID, + func(sess *db.Session) { + if sess.ParentSessionID == nil || + *sess.ParentSessionID != parentAvID { + got := "" + if sess.ParentSessionID != nil { + got = *sess.ParentSessionID + } + t.Errorf( + "child: ParentSessionID = %q, want %q", + got, parentAvID, + ) + } + }, + ) +}