diff --git a/src/index.zig b/src/index.zig
index 02f6816..ae0682a 100644
--- a/src/index.zig
+++ b/src/index.zig
@@ -179,20 +179,104 @@ pub const PostingMask = struct {
     loc_mask: u8 = 0, // bit mask of (position % 8) where trigram appears
 };
 
+pub const DocPosting = struct {
+    doc_id: u32,
+    next_mask: u8 = 0,
+    loc_mask: u8 = 0,
+};
+
+pub const PostingList = struct {
+    items: std.ArrayList(DocPosting) = .{},
+    path_to_id: ?*const std.StringHashMap(u32) = null,
+
+    pub fn deinit(self: *PostingList, allocator: std.mem.Allocator) void {
+        self.items.deinit(allocator);
+    }
+
+    pub fn count(self: *const PostingList) usize {
+        return self.items.items.len;
+    }
+
+    pub fn get(self: *const PostingList, path: []const u8) ?PostingMask {
+        const p2id = self.path_to_id orelse return null;
+        const doc_id = p2id.get(path) orelse return null;
+        return self.getByDocId(doc_id);
+    }
+
+    pub fn contains(self: *const PostingList, path: []const u8) bool {
+        return self.get(path) != null;
+    }
+
+    pub fn getByDocId(self: *const PostingList, doc_id: u32) ?PostingMask {
+        // Binary search on sorted doc_id array
+        const items = self.items.items;
+        var lo: usize = 0;
+        var hi: usize = items.len;
+        while (lo < hi) {
+            const mid = lo + (hi - lo) / 2;
+            if (items[mid].doc_id == doc_id) return PostingMask{ .next_mask = items[mid].next_mask, .loc_mask = items[mid].loc_mask };
+            if (items[mid].doc_id < doc_id) { lo = mid + 1; } else { hi = mid; }
+        }
+        return null;
+    }
+
+    pub fn containsDocId(self: *const PostingList, doc_id: u32) bool {
+        const items = self.items.items;
+        var lo: usize = 0;
+        var hi: usize = items.len;
+        while (lo < hi) {
+            const mid = lo + (hi - lo) / 2;
+            if (items[mid].doc_id == doc_id) return true;
+            if (items[mid].doc_id < doc_id) { lo = mid + 1; } else { hi = mid; }
+        }
+        return false;
+    }
+    pub fn getOrAddPosting(self: *PostingList, allocator: std.mem.Allocator, doc_id: u32) !*DocPosting {
+        // Binary search for existing
+        const items = self.items.items;
+        var lo: usize = 0;
+        var hi: usize = items.len;
+        while (lo < hi) {
+            const mid = lo + (hi - lo) / 2;
+            if (items[mid].doc_id == doc_id) return &self.items.items[mid];
+            if (items[mid].doc_id < doc_id) { lo = mid + 1; } else { hi = mid; }
+        }
+        // Insert at sorted position
+        try self.items.insert(allocator, lo, .{ .doc_id = doc_id });
+        return &self.items.items[lo];
+    }
+
+    pub fn removeDocId(self: *PostingList, doc_id: u32) void {
+        var i: usize = 0;
+        while (i < self.items.items.len) {
+            if (self.items.items[i].doc_id == doc_id) {
+                _ = self.items.orderedRemove(i);
+            } else {
+                i += 1;
+            }
+        }
+    }
+};
 
 pub const TrigramIndex = struct {
-    /// trigram → set of file paths
-    index: std.AutoHashMap(Trigram, std.StringHashMap(PostingMask)),
+    /// trigram → posting list with doc IDs
+    index: std.AutoHashMap(Trigram, PostingList),
     /// path → list of trigrams contributed (for cleanup)
     file_trigrams: std.StringHashMap(std.ArrayList(Trigram)),
+    /// path → doc_id mapping
+    path_to_id: std.StringHashMap(u32),
+    /// doc_id → path mapping
+    id_to_path: std.ArrayList([]const u8),
     allocator: std.mem.Allocator,
     /// When true, deinit frees the path keys in file_trigrams (set by readFromDisk).
     owns_paths: bool = false,
 
     pub fn init(allocator: std.mem.Allocator) TrigramIndex {
         return .{
-            .index = std.AutoHashMap(Trigram, std.StringHashMap(PostingMask)).init(allocator),
+            .index = std.AutoHashMap(Trigram, PostingList).init(allocator),
             .file_trigrams = std.StringHashMap(std.ArrayList(Trigram)).init(allocator),
+            .path_to_id = std.StringHashMap(u32).init(allocator),
+            .id_to_path = .{},
             .allocator = allocator,
         };
     }
@@ -200,7 +284,7 @@ pub const TrigramIndex = struct {
     pub fn deinit(self: *TrigramIndex) void {
         var iter = self.index.iterator();
         while (iter.next()) |entry| {
-            entry.value_ptr.deinit();
+            entry.value_ptr.deinit(self.allocator);
         }
         self.index.deinit();
 
@@ -210,63 +294,100 @@ pub const TrigramIndex = struct {
             entry.value_ptr.deinit(self.allocator);
         }
         self.file_trigrams.deinit();
+
+        self.path_to_id.deinit();
+        self.id_to_path.deinit(self.allocator);
+    }
+
+    fn getOrCreateDocId(self: *TrigramIndex, path: []const u8) !u32 {
+        if (self.path_to_id.get(path)) |id| return id;
+        const id: u32 = @intCast(self.id_to_path.items.len);
+        try self.id_to_path.append(self.allocator, path);
+        try self.path_to_id.put(path, id);
+        return id;
     }
 
     pub fn removeFile(self: *TrigramIndex, path: []const u8) void {
+        const doc_id = self.path_to_id.get(path) orelse {
+            const trigrams = self.file_trigrams.getPtr(path) orelse return;
+            trigrams.deinit(self.allocator);
+            _ = self.file_trigrams.remove(path);
+            return;
+        };
         const trigrams = self.file_trigrams.getPtr(path) orelse return;
         for (trigrams.items) |tri| {
-            if (self.index.getPtr(tri)) |file_set| {
-                _ = file_set.remove(path);
-                if (file_set.count() == 0) {
-                    file_set.deinit();
+            if (self.index.getPtr(tri)) |posting_list| {
+                posting_list.removeDocId(doc_id);
+                if (posting_list.items.items.len == 0) {
+                    posting_list.deinit(self.allocator);
                     _ = self.index.remove(tri);
                 }
             }
         }
         trigrams.deinit(self.allocator);
         _ = self.file_trigrams.remove(path);
+        _ = self.path_to_id.remove(path);
     }
 
     pub fn indexFile(self: *TrigramIndex, path: []const u8, content: []const u8) !void {
         self.removeFile(path);
 
-        var seen_trigrams = std.AutoHashMap(Trigram, void).init(self.allocator);
-        defer seen_trigrams.deinit();
+        const doc_id = try self.getOrCreateDocId(path);
+
+        // Phase 1: accumulate masks locally per trigram (no global index writes)
+        var local = std.AutoHashMap(Trigram, PostingMask).init(self.allocator);
+        defer local.deinit();
+        // Pre-size: a file typically has ~content.len/4 unique trigrams
+        const estimated_unique = @max(@as(u32, 64), @as(u32, @intCast(@min(content.len / 4, 65536))));
+        local.ensureTotalCapacity(estimated_unique) catch {};
 
-        // Extract trigrams from content, recording PostingMask per (trigram, file)
         if (content.len >= 3) {
             for (0..content.len - 2) |i| {
+                // Skip trigrams that are pure whitespace (terrible filters, ~12% of all occurrences)
+                const c0 = content[i];
+                const c1 = content[i + 1];
+                const c2 = content[i + 2];
+                if ((c0 == ' ' or c0 == '\t' or c0 == '\n' or c0 == '\r') and
+                    (c1 == ' ' or c1 == '\t' or c1 == '\n' or c1 == '\r') and
+                    (c2 == ' ' or c2 == '\t' or c2 == '\n' or c2 == '\r')) continue;
+
                 const tri = packTrigram(
-                    normalizeChar(content[i]),
-                    normalizeChar(content[i + 1]),
-                    normalizeChar(content[i + 2]),
+                    normalizeChar(c0),
+                    normalizeChar(c1),
+                    normalizeChar(c2),
                 );
-                // Ensure the trigram → file_set entry exists
-                const idx_gop = try self.index.getOrPut(tri);
-                if (!idx_gop.found_existing) {
-                    idx_gop.value_ptr.* = std.StringHashMap(PostingMask).init(self.allocator);
-                }
-                // Get or create the posting for this file
-                const file_gop = try idx_gop.value_ptr.getOrPut(path);
-                if (!file_gop.found_existing) {
-                    file_gop.value_ptr.* = PostingMask{};
-                    // Track this trigram for cleanup (only once per file)
-                    try seen_trigrams.put(tri, {});
+                const gop = try local.getOrPut(tri);
+                if (!gop.found_existing) {
+                    gop.value_ptr.* = PostingMask{};
                 }
-                // OR in position masks
-                file_gop.value_ptr.loc_mask |= @as(u8, 1) << @intCast(i % 8);
+                gop.value_ptr.loc_mask |= @as(u8, 1) << @intCast(i % 8);
                 if (i + 3 < content.len) {
-                    file_gop.value_ptr.next_mask |= @as(u8, 1) << @intCast(normalizeChar(content[i + 3]) % 8);
+                    gop.value_ptr.next_mask |= @as(u8, 1) << @intCast(normalizeChar(content[i + 3]) % 8);
                 }
             }
         }
 
-        // Store which trigrams this file contributed
+        // Phase 2: bulk-insert one posting per trigram into global index
         var tri_list: std.ArrayList(Trigram) = .{};
         errdefer tri_list.deinit(self.allocator);
-        var tri_iter = seen_trigrams.keyIterator();
-        while (tri_iter.next()) |tri_ptr| {
-            try tri_list.append(self.allocator, tri_ptr.*);
+
+        var local_iter = local.iterator();
+        while (local_iter.next()) |entry| {
+            const tri = entry.key_ptr.*;
+            const mask = entry.value_ptr.*;
+
+            const idx_gop = try self.index.getOrPut(tri);
+            if (!idx_gop.found_existing) {
+                idx_gop.value_ptr.* = .{ .path_to_id = &self.path_to_id };
+            }
+            // Single append (not sorted insert) since doc_id is monotonically increasing
+            try idx_gop.value_ptr.items.append(self.allocator, .{
+                .doc_id = doc_id,
+                .next_mask = mask.next_mask,
+                .loc_mask = mask.loc_mask,
+            });
+
+            try tri_list.append(self.allocator, tri);
         }
         try self.file_trigrams.put(path, tri_list);
     }
@@ -274,11 +395,10 @@ pub const TrigramIndex = struct {
 
     /// Find candidate files that contain ALL trigrams from the query.
 pub fn candidates(self: *TrigramIndex, query: []const u8, allocator: std.mem.Allocator) ?[]const []const u8 {
-    if (query.len < 3) return null; // can't use trigrams for short queries
+    if (query.len < 3) return null;
 
     const tri_count = query.len - 2;
 
-    // Deduplicate query trigrams first so repeated trigrams don't do repeated work.
     var unique = std.AutoHashMap(Trigram, void).init(allocator);
     defer unique.deinit();
     unique.ensureTotalCapacity(@intCast(tri_count)) catch return null;
@@ -291,46 +411,62 @@ pub fn candidates(self: *TrigramIndex, query: []const u8, allocator: std.mem.All
         _ = unique.getOrPut(tri) catch return null;
     }
 
-    var sets: std.ArrayList(*std.StringHashMap(PostingMask)) = .{};
+    var sets: std.ArrayList(*PostingList) = .{};
     defer sets.deinit(allocator);
     sets.ensureTotalCapacity(allocator, unique.count()) catch return null;
 
     var tri_iter = unique.keyIterator();
     while (tri_iter.next()) |tri_ptr| {
-        const file_set = self.index.getPtr(tri_ptr.*) orelse {
+        const posting_list = self.index.getPtr(tri_ptr.*) orelse {
             return allocator.alloc([]const u8, 0) catch null;
         };
-        sets.appendAssumeCapacity(file_set);
+        sets.appendAssumeCapacity(posting_list);
     }
 
     if (sets.items.len == 0) {
         return allocator.alloc([]const u8, 0) catch null;
     }
 
-    // Iterate the smallest set and check membership in all others.
-    var min_idx: usize = 0;
-    var min_count = sets.items[0].count();
-    for (sets.items[1..], 1..) |set, i| {
-        const count = set.count();
-        if (count < min_count) {
-            min_count = count;
-            min_idx = i;
+    // Sort posting lists by size (smallest first) for efficient intersection
+    std.mem.sort(*PostingList, sets.items, {}, struct {
+        fn lt(_: void, a: *PostingList, b: *PostingList) bool {
+            return a.items.items.len < b.items.items.len;
         }
-    }
+    }.lt);
 
-    var result: std.ArrayList([]const u8) = .{};
-    errdefer result.deinit(allocator);
-    result.ensureTotalCapacity(allocator, min_count) catch return null;
+    // Sorted merge intersection: start with smallest list's doc_ids
+    var result_ids: std.ArrayList(u32) = .{};
+    defer result_ids.deinit(allocator);
 
-    var it = sets.items[min_idx].keyIterator();
-    next_cand: while (it.next()) |path_ptr| {
+    // Seed with doc_ids from smallest posting list
+    result_ids.ensureTotalCapacity(allocator, sets.items[0].items.items.len) catch return null;
+    for (sets.items[0].items.items) |p| {
+        result_ids.appendAssumeCapacity(p.doc_id);
+    }
 
-        // Intersection check: candidate must be in all sets
-        for (sets.items, 0..) |set, i| {
-            if (i == min_idx) continue;
-            if (!set.contains(path_ptr.*)) continue :next_cand;
+    // Intersect with each subsequent list (both sorted → merge O(n+m))
+    for (sets.items[1..]) |set| {
+        var write: usize = 0;
+        var si: usize = 0;
+        const set_items = set.items.items;
+        for (result_ids.items) |id| {
+            // Advance set pointer to >= id
+            while (si < set_items.len and set_items[si].doc_id < id) : (si += 1) {}
+            if (si < set_items.len and set_items[si].doc_id == id) {
+                result_ids.items[write] = id;
+                write += 1;
+                si += 1;
+            }
         }
+        result_ids.items.len = write;
+        if (write == 0) break; // early exit if intersection is empty
+    }
 
+    var result: std.ArrayList([]const u8) = .{};
+    errdefer result.deinit(allocator);
+    result.ensureTotalCapacity(allocator, result_ids.items.len) catch return null;
+
+    next_cand: for (result_ids.items) |doc_id| {
         // Bloom-filter check for consecutive trigram pairs
         if (tri_count >= 2) {
             for (0..tri_count - 1) |j| {
@@ -344,22 +480,22 @@ pub fn candidates(self: *TrigramIndex, query: []const u8, allocator: std.mem.All
                     normalizeChar(query[j + 2]),
                     normalizeChar(query[j + 3]),
                 );
-                const set_a = self.index.getPtr(tri_a) orelse continue;
-                const set_b = self.index.getPtr(tri_b) orelse continue;
-                const mask_a = set_a.get(path_ptr.*) orelse continue;
-                const mask_b = set_b.get(path_ptr.*) orelse continue;
+                const list_a = self.index.getPtr(tri_a) orelse continue;
+                const list_b = self.index.getPtr(tri_b) orelse continue;
+                const mask_a = list_a.getByDocId(doc_id) orelse continue;
+                const mask_b = list_b.getByDocId(doc_id) orelse continue;
 
-                // next_mask: bit for query[j+3] must be set in tri_a's next_mask
                 const next_bit: u8 = @as(u8, 1) << @intCast(normalizeChar(query[j + 3]) % 8);
                 if ((mask_a.next_mask & next_bit) == 0) continue :next_cand;
 
-                // loc_mask adjacency: use circular shift to handle position wrap-around
                 const rotated = (mask_a.loc_mask << 1) | (mask_a.loc_mask >> 7);
                 if ((rotated & mask_b.loc_mask) == 0) continue :next_cand;
             }
         }
 
-        result.appendAssumeCapacity(path_ptr.*);
+        if (doc_id < self.id_to_path.items.len) {
+            result.appendAssumeCapacity(self.id_to_path.items[doc_id]);
+        }
     }
 
     return result.toOwnedSlice(allocator) catch {
@@ -369,39 +505,30 @@ pub fn candidates(self: *TrigramIndex, query: []const u8, allocator: std.mem.All
 }
 
 
-    /// Find candidate files matching a RegexQuery.
-    /// Intersects AND trigrams, then for each OR group unions posting lists
-    /// and intersects with the running result.
     pub fn candidatesRegex(self: *TrigramIndex, query: *const RegexQuery, allocator: std.mem.Allocator) ?[]const []const u8 {
         if (query.and_trigrams.len == 0 and query.or_groups.len == 0) return null;
 
-        // Start with AND trigrams
-        var result_set: ?std.StringHashMap(void) = null;
+        var result_set: ?std.AutoHashMap(u32, void) = null;
         defer if (result_set) |*rs| rs.deinit();
 
         if (query.and_trigrams.len > 0) {
-            // Intersect all AND trigram posting lists
             for (query.and_trigrams) |tri| {
-                const file_set = self.index.getPtr(tri) orelse {
-                    // Trigram not in index → no files can match
+                const posting_list = self.index.getPtr(tri) orelse {
                     var empty = allocator.alloc([]const u8, 0) catch return null;
                     _ = &empty;
                     return allocator.alloc([]const u8, 0) catch null;
                 };
                 if (result_set == null) {
-                    // Initialize with all files from first trigram
-                    result_set = std.StringHashMap(void).init(allocator);
-                    var it = file_set.keyIterator();
-                    while (it.next()) |key| {
-                        result_set.?.put(key.*, {}) catch return null;
+                    result_set = std.AutoHashMap(u32, void).init(allocator);
+                    for (posting_list.items.items) |p| {
+                        result_set.?.put(p.doc_id, {}) catch return null;
                     }
                 } else {
-                    // Intersect: remove files not in this posting list
-                    var to_remove: std.ArrayList([]const u8) = .{};
+                    var to_remove: std.ArrayList(u32) = .{};
                     defer to_remove.deinit(allocator);
                     var it = result_set.?.keyIterator();
                     while (it.next()) |key| {
-                        if (!file_set.contains(key.*)) {
+                        if (!posting_list.containsDocId(key.*)) {
                             to_remove.append(allocator, key.*) catch return null;
                         }
                     }
@@ -412,32 +539,26 @@ pub fn candidates(self: *TrigramIndex, query: []const u8, allocator: std.mem.All
             }
         }
 
-        // Process OR groups: for each group, union posting lists of its trigrams,
-        // then intersect with result_set
         for (query.or_groups) |group| {
             if (group.len == 0) continue;
 
-            // Union all posting lists in this OR group
-            var union_set = std.StringHashMap(void).init(allocator);
+            var union_set = std.AutoHashMap(u32, void).init(allocator);
             defer union_set.deinit();
             for (group) |tri| {
-                const file_set = self.index.getPtr(tri) orelse continue;
-                var it = file_set.keyIterator();
-                while (it.next()) |key| {
-                    union_set.put(key.*, {}) catch return null;
+                const posting_list = self.index.getPtr(tri) orelse continue;
+                for (posting_list.items.items) |p| {
+                    union_set.put(p.doc_id, {}) catch return null;
                 }
             }
 
             if (result_set == null) {
-                // First constraint — adopt the union
-                result_set = std.StringHashMap(void).init(allocator);
+                result_set = std.AutoHashMap(u32, void).init(allocator);
                 var it = union_set.keyIterator();
                 while (it.next()) |key| {
                     result_set.?.put(key.*, {}) catch return null;
                 }
             } else {
-                // Intersect result_set with union_set
-                var to_remove: std.ArrayList([]const u8) = .{};
+                var to_remove: std.ArrayList(u32) = .{};
                 defer to_remove.deinit(allocator);
                 var it = result_set.?.keyIterator();
                 while (it.next()) |key| {
@@ -453,13 +574,15 @@ pub fn candidates(self: *TrigramIndex, query: []const u8, allocator: std.mem.All
 
         if (result_set == null) return null;
 
-        // Convert to slice
         var result: std.ArrayList([]const u8) = .{};
         errdefer result.deinit(allocator);
         result.ensureTotalCapacity(allocator, result_set.?.count()) catch return null;
         var it = result_set.?.keyIterator();
-        while (it.next()) |key| {
-            result.appendAssumeCapacity(key.*);
+        while (it.next()) |id_ptr| {
+            const doc_id = id_ptr.*;
+            if (doc_id < self.id_to_path.items.len) {
+                result.appendAssumeCapacity(self.id_to_path.items[doc_id]);
+            }
         }
         return result.toOwnedSlice(allocator) catch {
             result.deinit(allocator);
@@ -498,17 +621,17 @@ pub fn candidates(self: *TrigramIndex, query: []const u8, allocator: std.mem.All
     /// Write the current in-memory index to disk in a two-file format.
     /// Files are written atomically (write to tmp, then rename).
     pub fn writeToDisk(self: *TrigramIndex, dir_path: []const u8, git_head: ?[40]u8) !void {
-        // Step 1: Build file table (assign u16 IDs to all unique paths)
+        // Step 1: Build file table from path_to_id (reuse existing doc IDs for consistency)
         var file_table: std.ArrayList([]const u8) = .{};
         defer file_table.deinit(self.allocator);
-        var path_to_id = std.StringHashMap(u32).init(self.allocator);
-        defer path_to_id.deinit();
+        var disk_path_to_id = std.StringHashMap(u32).init(self.allocator);
+        defer disk_path_to_id.deinit();
 
         var ft_iter = self.file_trigrams.keyIterator();
         while (ft_iter.next()) |path_ptr| {
             const id: u32 = @intCast(file_table.items.len);
             try file_table.append(self.allocator, path_ptr.*);
-            try path_to_id.put(path_ptr.*, id);
+            try disk_path_to_id.put(path_ptr.*, id);
         }
 
         const file_count: u32 = @intCast(file_table.items.len);
@@ -535,16 +658,18 @@ pub fn candidates(self: *TrigramIndex, query: []const u8, allocator: std.mem.All
         defer lookup_entries.deinit(self.allocator);
 
         for (trigrams_sorted.items) |tri| {
-            const file_set = self.index.getPtr(tri) orelse continue;
+            const posting_list = self.index.getPtr(tri) orelse continue;
             const offset: u32 = @intCast(postings_buf.items.len);
             var count: u32 = 0;
-            var fs_iter = file_set.iterator();
-            while (fs_iter.next()) |entry| {
-                const fid = path_to_id.get(entry.key_ptr.*) orelse continue;
+            for (posting_list.items.items) |p| {
+                // Map in-memory doc_id to disk file_id via path lookup
+                if (p.doc_id >= self.id_to_path.items.len) continue;
+                const path = self.id_to_path.items[p.doc_id];
+                const fid = disk_path_to_id.get(path) orelse continue;
                 try postings_buf.append(self.allocator, .{
                     .file_id = fid,
-                    .next_mask = entry.value_ptr.next_mask,
-                    .loc_mask = entry.value_ptr.loc_mask,
+                    .next_mask = p.next_mask,
+                    .loc_mask = p.loc_mask,
                 });
                 count += 1;
             }
@@ -700,7 +825,7 @@ pub fn candidates(self: *TrigramIndex, query: []const u8, allocator: std.mem.All
         result.owns_paths = true;
         errdefer result.deinit();
 
-        // Allocate stable path strings owned by the index
+        // Allocate stable path strings owned by the index and build doc ID mappings
         var stable_paths = try allocator.alloc([]const u8, file_count);
         defer allocator.free(stable_paths);
         for (0..file_count) |i| {
@@ -708,6 +833,8 @@ pub fn candidates(self: *TrigramIndex, query: []const u8, allocator: std.mem.All
             errdefer allocator.free(duped);
             stable_paths[i] = duped;
             try result.file_trigrams.put(duped, .{});
+            try result.path_to_id.put(duped, @intCast(i));
+            try result.id_to_path.append(allocator, duped);
         }
 
         // Parse lookup entries and populate index + file_trigrams
@@ -722,8 +849,8 @@ pub fn candidates(self: *TrigramIndex, query: []const u8, allocator: std.mem.All
 
             if (@as(u64, p_off) + @as(u64, p_count) > @as(u64, total_postings)) return error.InvalidData;
 
-            var file_set = std.StringHashMap(PostingMask).init(allocator);
-            errdefer file_set.deinit();
+            var posting_list: PostingList = .{ .path_to_id = &result.path_to_id };
+            errdefer posting_list.deinit(allocator);
 
             for (0..p_count) |pi| {
                 const pb_off = postings_start + (p_off + pi) * posting_size;
@@ -736,16 +863,14 @@ pub fn candidates(self: *TrigramIndex, query: []const u8, allocator: std.mem.All
                 const loc_mask = raw_posting[if (post_version >= 3) 5 else 3];
 
                 if (file_id >= file_count) return error.InvalidData;
-                const path = stable_paths[file_id];
 
-                const gop = try file_set.getOrPut(path);
-                if (!gop.found_existing) {
-                    gop.value_ptr.* = PostingMask{};
-                }
-                gop.value_ptr.next_mask |= next_mask;
-                gop.value_ptr.loc_mask |= loc_mask;
+                const doc_id: u32 = file_id;
+                const posting = try posting_list.getOrAddPosting(allocator, doc_id);
+                posting.next_mask |= next_mask;
+                posting.loc_mask |= loc_mask;
 
                 // Track trigram in file_trigrams
+                const path = stable_paths[file_id];
                 if (result.file_trigrams.getPtr(path)) |tri_list| {
                     var found = false;
                     for (tri_list.items) |existing| {
@@ -755,7 +880,7 @@ pub fn candidates(self: *TrigramIndex, query: []const u8, allocator: std.mem.All
                 }
             }
 
-            try result.index.put(tri, file_set);
+            try result.index.put(tri, posting_list);
         }
 
         return result;
diff --git a/src/tests.zig b/src/tests.zig
index 057c65f..1bdbc45 100644
--- a/src/tests.zig
+++ b/src/tests.zig
@@ -4221,3 +4221,188 @@ test "issue-114: TypeScript import-as alias does not affect dep path" {
     try testing.expect(outline.imports.items.len == 1);
     try testing.expectEqualStrings("./mod", outline.imports.items[0]);
 }
+
+// ── Trigram index regression suite (#142) ─────────────────────────────
+// Tests correctness invariants that must hold across index implementation changes.
+
+test "regression-142: trigram index finds all matching files" {
+    var exp = Explorer.init(testing.allocator);
+    defer exp.deinit();
+
+    try exp.indexFile("src/main.zig", "pub fn handleRequest(ctx: *Context) !void {}");
+    try exp.indexFile("src/server.zig", "fn handleRequest(req: Request) void {}");
+    try exp.indexFile("src/util.zig", "pub fn formatDate() []u8 {}");
+
+    const results = try exp.searchContent("handleRequest", testing.allocator, 50);
+    defer {
+        for (results) |r| {
+            testing.allocator.free(r.path);
+            testing.allocator.free(r.line_text);
+        }
+        testing.allocator.free(results);
+    }
+    // Must find both files containing "handleRequest"
+    try testing.expect(results.len == 2);
+}
+
+test "regression-142: trigram index returns no false positives" {
+    var exp = Explorer.init(testing.allocator);
+    defer exp.deinit();
+
+    try exp.indexFile("a.zig", "pub fn alpha() void {}");
+    try exp.indexFile("b.zig", "pub fn beta() void {}");
+
+    const results = try exp.searchContent("gamma", testing.allocator, 50);
+    defer testing.allocator.free(results);
+    // Must return zero results for non-existent content
+    try testing.expect(results.len == 0);
+}
+
+test "regression-142: trigram intersection narrows correctly" {
+    var exp = Explorer.init(testing.allocator);
+    defer exp.deinit();
+
+    try exp.indexFile("match.zig", "const unique_identifier_xyz = 42;");
+    try exp.indexFile("partial.zig", "const unique_other = 99;");
+    try exp.indexFile("none.zig", "pub fn foo() void {}");
+
+    const results = try exp.searchContent("unique_identifier_xyz", testing.allocator, 50);
+    defer {
+        for (results) |r| {
+            testing.allocator.free(r.path);
+            testing.allocator.free(r.line_text);
+        }
+        testing.allocator.free(results);
+    }
+    // Only the exact match file, not the partial
+    try testing.expect(results.len == 1);
+    try testing.expectEqualStrings("match.zig", results[0].path);
+}
+
+test "regression-142: trigram handles file removal" {
+    var exp = Explorer.init(testing.allocator);
+    defer exp.deinit();
+
+    try exp.indexFile("temp.zig", "pub fn removable() void {}");
+    try exp.indexFile("keep.zig", "pub fn permanent() void {}");
+
+    // Remove a file
+    exp.removeFile("temp.zig");
+
+    const results = try exp.searchContent("removable", testing.allocator, 50);
+    defer testing.allocator.free(results);
+    try testing.expect(results.len == 0);
+
+    const results2 = try exp.searchContent("permanent", testing.allocator, 50);
+    defer {
+        for (results2) |r| {
+            testing.allocator.free(r.path);
+            testing.allocator.free(r.line_text);
+        }
+        testing.allocator.free(results2);
+    }
+    try testing.expect(results2.len == 1);
+}
+
+test "regression-142: trigram handles re-indexing same file" {
+    var exp = Explorer.init(testing.allocator);
+    defer exp.deinit();
+
+    try exp.indexFile("mutable.zig", "pub fn oldContent() void {}");
+    try exp.indexFile("mutable.zig", "pub fn newContent() void {}");
+
+    const old = try exp.searchContent("oldContent", testing.allocator, 50);
+    defer testing.allocator.free(old);
+    try testing.expect(old.len == 0);
+
+    const new = try exp.searchContent("newContent", testing.allocator, 50);
+    defer {
+        for (new) |r| {
+            testing.allocator.free(r.path);
+            testing.allocator.free(r.line_text);
+        }
+        testing.allocator.free(new);
+    }
+    try testing.expect(new.len == 1);
+}
+
+test "regression-142: trigram disk roundtrip preserves results" {
+    var tmp = testing.tmpDir(.{});
+    defer tmp.cleanup();
+    var path_buf: [std.fs.max_path_bytes]u8 = undefined;
+    const dir_path = try tmp.dir.realpath(".", &path_buf);
+
+    // Build index
+    var idx1 = TrigramIndex.init(testing.allocator);
+    try idx1.indexFile("a.zig", "pub fn searchable() void {}");
+    try idx1.indexFile("b.zig", "const value = 42;");
+
+    // Write to disk
+    try idx1.writeToDisk(dir_path, null);
+    idx1.deinit();
+
+    // Read back
+    var idx2 = TrigramIndex.readFromDisk(dir_path, testing.allocator) orelse return error.TestUnexpectedResult;
+    defer idx2.deinit();
+
+    // Must find same results
+    const cands = idx2.candidates("searchable", testing.allocator) orelse return error.TestUnexpectedResult;
+    defer testing.allocator.free(cands);
+    try testing.expect(cands.len == 1);
+}
+
+test "regression-142: many files don't corrupt index" {
+    var exp = Explorer.init(testing.allocator);
+    defer exp.deinit();
+
+    // Index 500 files
+    var i: usize = 0;
+    while (i < 500) : (i += 1) {
+        var name_buf: [32]u8 = undefined;
+        const name = try std.fmt.bufPrint(&name_buf, "file_{d}.zig", .{i});
+        var content_buf: [64]u8 = undefined;
+        const content = try std.fmt.bufPrint(&content_buf, "pub fn func_{d}() void {{}}", .{i});
+        try exp.indexFile(name, content);
+    }
+
+    // Search for a specific one
+    const results = try exp.searchContent("func_250", testing.allocator, 50);
+    defer {
+        for (results) |r| {
+            testing.allocator.free(r.path);
+            testing.allocator.free(r.line_text);
+        }
+        testing.allocator.free(results);
+    }
+    try testing.expect(results.len == 1);
+    try testing.expectEqualStrings("file_250.zig", results[0].path);
+}
+
+test "regression-142: short queries fall back gracefully" {
+    var exp = Explorer.init(testing.allocator);
+    defer exp.deinit();
+
+    try exp.indexFile("a.zig", "pub fn ab() void {}");
+
+    // 2-char query: too short for trigrams, should still work via fallback
+    const results = try exp.searchContent("ab", testing.allocator, 50);
+    defer {
+        for (results) |r| {
+            testing.allocator.free(r.path);
+            testing.allocator.free(r.line_text);
+        }
+        testing.allocator.free(results);
+    }
+    try testing.expect(results.len == 1);
+}
+
+test "regression-142: word index still works alongside trigram" {
+    var exp = Explorer.init(testing.allocator);
+    defer exp.deinit();
+
+    try exp.indexFile("words.zig", "pub fn mySpecialFunction() void {}");
+
+    const hits = try exp.searchWord("mySpecialFunction", testing.allocator);
+    defer testing.allocator.free(hits);
+    try testing.expect(hits.len == 1);
+}