Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 20 additions & 15 deletions src/compaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,37 +27,42 @@ bool compact_sstables(const std::string &older_path,
newer_state.reader.seek_to_first();
older_state.advance();
newer_state.advance();
bool wrote_live_entry = false;
bool wrote_any_entry = false;

// Convert ReaderState's std::optional<std::string> value to the
// std::optional<std::string_view> that SSTableWriter::add_entry takes,
// preserving tombstones (nullopt) rather than dropping them.
auto as_sv = [](const std::optional<std::string> &v)
-> std::optional<std::string_view> {
return v.has_value() ? std::optional<std::string_view>(*v) : std::nullopt;
};

while (newer_state.has_entry || older_state.has_entry) {
if (newer_state.has_entry && older_state.has_entry &&
newer_state.key == older_state.key) {
// Same key - newer shadows older. Skip the older entry entirely.
if (newer_state.value.has_value()) {
writer.add_entry(newer_state.key, std::string_view(*newer_state.value));
wrote_live_entry = true;
}
// Same key - newer shadows older. Skip the older entry entirely and
// write the newer entry, including tombstones: a dropped tombstone
// would leave older L1 files free to surface the deleted key on
// read, since this engine has no L2+ to drain the tombstone into.
writer.add_entry(newer_state.key, as_sv(newer_state.value));
wrote_any_entry = true;
newer_state.advance();
older_state.advance();
} else if (older_state.has_entry &&
(!newer_state.has_entry || older_state.key < newer_state.key)) {
// Older has the smaller key, or newer is exhausted.
if (older_state.value.has_value()) {
writer.add_entry(older_state.key, std::string_view(*older_state.value));
wrote_live_entry = true;
}
writer.add_entry(older_state.key, as_sv(older_state.value));
wrote_any_entry = true;
older_state.advance();
} else {
// Newer has the smaller key, or older is exhausted.
if (newer_state.value.has_value()) {
writer.add_entry(newer_state.key, std::string_view(*newer_state.value));
wrote_live_entry = true;
}
writer.add_entry(newer_state.key, as_sv(newer_state.value));
wrote_any_entry = true;
newer_state.advance();
}
}

writer.finalise();
return wrote_live_entry;
return wrote_any_entry;
}
} // namespace kv
10 changes: 7 additions & 3 deletions src/engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,13 @@ void Engine::compact_level_zero() {
// any shared in-memory state.
std::string accumulator = l0_paths[0];
std::vector<std::string> temp_paths;
// Track whether the merge chain has produced any live entries. If a step
// produces none, skip merging with the empty result and adopt the next L0
// file directly - this avoids opening an empty SSTable as a reader.
// Track whether the merge chain has produced any entries (live values or
// tombstones). If a step produces none, skip merging with the empty
// result and adopt the next L0 file directly - this avoids opening an
// empty SSTable as a reader. Tombstone-only files DO count as "has
// entries" because they're load-bearing: they shadow older L1 files'
// values for the same key (compact_sstables preserves tombstones since
// L1 is the bottom level in this engine).
bool accumulator_has_entries = true;
for (std::size_t i = 1; i < l0_paths.size(); ++i) {
if (!accumulator_has_entries) {
Expand Down
16 changes: 13 additions & 3 deletions tests/compaction_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,11 @@ TEST(CompactionTest, NewSSTableOverridesOlderSSTableForSameKeys) {
std::remove(output_path.c_str());
}

TEST(CompactionTest, NewSSTableDeletesKeys) {
TEST(CompactionTest, NewSSTableTombstonesShadowOlderValues) {
// Tombstones in the newer SSTable must be PRESERVED in the merged
// output, not dropped. Without this, an older L1 file containing the
// pre-delete value would surface the deleted key on read (since the
// engine has no L2+ for the tombstone to drain into).
const std::string older_path = "/tmp/kv_compaction_test_older";
const std::string newer_path = "/tmp/kv_compaction_test_newer";
const std::string output_path = "/tmp/kv_compaction_test_output";
Expand All @@ -95,8 +99,14 @@ TEST(CompactionTest, NewSSTableDeletesKeys) {
compact_sstables(older_path, newer_path, output_path);
EXPECT_TRUE(std::filesystem::exists(output_path));
auto output_reader = SSTableReader(output_path);
EXPECT_EQ(output_reader.get("key1"), std::nullopt);
EXPECT_EQ(output_reader.get("key2"), std::nullopt);
// Both keys present in the output AS tombstones (outer optional has
// value = "key is in this file"; inner optional is nullopt = tombstone).
auto result1 = output_reader.get("key1");
ASSERT_TRUE(result1.has_value()) << "key1 should be in the output";
EXPECT_FALSE(result1->has_value()) << "key1 should be a tombstone";
auto result2 = output_reader.get("key2");
ASSERT_TRUE(result2.has_value()) << "key2 should be in the output";
EXPECT_FALSE(result2->has_value()) << "key2 should be a tombstone";

std::remove(older_path.c_str());
std::remove(newer_path.c_str());
Expand Down
71 changes: 71 additions & 0 deletions tests/engine_integration_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,77 @@ TEST(EngineIntegrationTest, DataSurvivesMultipleReopens) {
std::filesystem::remove_all(temp_dir);
}

TEST(EngineIntegrationTest, TombstoneSurvivesL0ToL1Compaction) {
// Regression test for a tombstone-drop bug in compact_sstables. The bug:
// 1. put("doomed", "ghost") -> L0 -> compaction merges into L1 file A
// (contains doomed=ghost)
// 2. remove("doomed") -> tombstone flushed to a new L0 file
// 3. More puts trigger a second L0->L1 compaction. The merge gates
// every write on `if (value.has_value())`, so the tombstone is
// dropped from the output. New L1 file B has no `doomed` entry
// 4. L1 now has file A (doomed=ghost) and file B (no doomed). get
// walks L1 newest-first; B says "not in this file, try older",
// falls through to A, returns "ghost". Deleted key reappears
// The fix is to preserve tombstones through L0->L1 compaction, since L1
// is the bottom level in this engine. With no L2+ to drain into, a
// dropped tombstone permanently fails to shadow older L1 entries for
// the same key.
std::string temp_dir =
std::filesystem::temp_directory_path() /
std::filesystem::path("kv_engine_integration_tombstone_l0_l1_compaction");
std::filesystem::remove_all(temp_dir);
std::filesystem::create_directories(temp_dir);

auto count_l1 = [&]() {
std::size_t count = 0;
for (const auto &entry : std::filesystem::directory_iterator(temp_dir)) {
auto filename = entry.path().filename().string();
if (filename.starts_with("sstable_1_") &&
entry.path().extension() == ".dat") {
++count;
}
}
return count;
};

Engine engine(temp_dir, 1); // Tiny memtable so every put flushes to L0.

// Phase 1: write doomed=ghost, fill L0 to 4 files, wait for the first
// L0->L1 compaction to produce L1 file A containing doomed=ghost.
engine.put("doomed", "ghost");
engine.put("a", "1");
engine.put("b", "2");
engine.put("c", "3");
ASSERT_TRUE(wait_for_l1_sstable(temp_dir))
<< "First L0->L1 compaction did not produce an L1 file";
ASSERT_EQ(engine.get("doomed"), "ghost");
const std::size_t l1_count_after_phase1 = count_l1();

// Phase 2: remove doomed, fill L0 again, wait for the second L0->L1
// compaction. The tombstone is in one of the merged L0 files. With the
// bug, the merge drops the tombstone from L1 file B.
engine.remove("doomed");
engine.put("d", "4");
engine.put("e", "5");
engine.put("f", "6");

const auto deadline =
std::chrono::steady_clock::now() + std::chrono::seconds(5);
while (count_l1() <= l1_count_after_phase1 &&
std::chrono::steady_clock::now() < deadline) {
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
ASSERT_GT(count_l1(), l1_count_after_phase1)
<< "Second L0->L1 compaction did not produce an additional L1 file";

// The bug surfaces here: with the tombstone dropped from the newer L1
// file, get falls through to the older L1 file and returns "ghost".
// After the fix, the tombstone is preserved and get returns nullopt.
EXPECT_EQ(engine.get("doomed"), std::nullopt);

std::filesystem::remove_all(temp_dir);
}

TEST(EngineIntegrationTest, TombstoneInNewerSSTableShadowsLiveValueInOlder) {
// put -> flush, remove -> flush (tombstone written to a newer SSTable), then
// reopen and verify the tombstone shadows the older value.
Expand Down
56 changes: 15 additions & 41 deletions tests/engine_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,32 +30,6 @@ bool wait_for_l1_sstable(
return false;
}

// Polls for all SSTable and compaction temp files to disappear. Used by tests
// that compact away every live key, where successful compaction leaves no .dat
// files behind at all.
bool wait_for_no_sstable_files(
const std::string &dir,
std::chrono::milliseconds timeout = std::chrono::seconds(5)) {
const auto deadline = std::chrono::steady_clock::now() + timeout;
while (std::chrono::steady_clock::now() < deadline) {
bool found_sstable = false;
for (const auto &entry : std::filesystem::directory_iterator(dir)) {
auto filename = entry.path().filename().string();
if ((filename.starts_with("sstable_") ||
filename.starts_with("compact_tmp_")) &&
entry.path().extension() == ".dat") {
found_sstable = true;
break;
}
}
if (!found_sstable) {
return true;
}
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
return false;
}

TEST(EngineTest, PutAndGet) {
std::string temp_dir = std::filesystem::temp_directory_path() /
std::filesystem::path("kv_engine_test_put_get");
Expand Down Expand Up @@ -300,11 +274,18 @@ TEST(EngineTest, RepeatedFlushesDoNotLoseNewerLevelZeroFiles) {
std::filesystem::remove_all(temp_dir);
}

TEST(EngineTest, CompactionThatDeletesAllKeysDoesNotPublishEmptyLevelOneFile) {
TEST(EngineTest, CompactionWithOnlyTombstonesPublishesL1File) {
// Previously asserted that an L1 file is NOT published when compaction
// produces only tombstones - that pinned a buggy optimisation. After
// the tombstone-preservation fix in `compact_sstables`, a tombstone-
// only compaction DOES publish an L1 file: the tombstones are load-
// bearing because they shadow same-key values in any older L1 files.
// The engine can't tell at compaction time whether an older L1 file
// exists with the same key, so it preserves the tombstones to be safe.
std::string temp_dir =
std::filesystem::temp_directory_path() /
std::filesystem::path(
"kv_engine_test_compaction_deletes_all_keys_without_empty_l1");
"kv_engine_test_compaction_with_only_tombstones_publishes_l1");
std::filesystem::remove_all(temp_dir);
std::filesystem::create_directories(temp_dir);

Expand All @@ -315,23 +296,16 @@ TEST(EngineTest, CompactionThatDeletesAllKeysDoesNotPublishEmptyLevelOneFile) {
engine.remove("key1");
engine.remove("key1");

ASSERT_TRUE(wait_for_no_sstable_files(temp_dir))
<< "Compaction did not remove all SSTable files within the timeout";
ASSERT_TRUE(wait_for_l1_sstable(temp_dir))
<< "Compaction did not produce an L1 file within the timeout";
// The L1 file contains a tombstone for key1; the engine reads through
// it correctly and returns nullopt.
EXPECT_EQ(engine.get("key1"), std::nullopt);

bool sstable_found = false;
for (const auto &entry : std::filesystem::directory_iterator(temp_dir)) {
auto filename = entry.path().filename().string();
if (filename.starts_with("sstable_") &&
entry.path().extension() == ".dat") {
sstable_found = true;
break;
}
}
EXPECT_FALSE(sstable_found);
}

{
// After reopen the same L1 file is rehydrated; the tombstone still
// shadows correctly.
Engine reopened_engine(temp_dir, 1);
EXPECT_EQ(reopened_engine.get("key1"), std::nullopt);
}
Expand Down
Loading