Skip to content

Commit bd7cff4

Browse files
wskozlowskiWojtek
andauthored
Feature/584 2 (#596)
* WIP * copy-prefix fixes * Diff decode validation --------- Co-authored-by: Wojtek <wskozlowski@dbzero.io>
1 parent 259590b commit bd7cff4

11 files changed

Lines changed: 61 additions & 34 deletions

File tree

dbzero/dbzero/dbzero.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def load_dynamic(name, path):
1010

1111
def __bootstrap__():
1212
global __bootstrap__, __loader__, __file__
13-
paths = [os.path.join(os.path.split(__file__)[0]), "/src/dev/build/debug", "/usr/local/lib/python3/dist-packages/dbzero/"]
13+
paths = [os.path.join(os.path.split(__file__)[0]), "/src/dev/build/release", "/usr/local/lib/python3/dist-packages/dbzero/"]
1414
__file__ = None
1515
for path in paths:
1616
if os.path.isdir(path):

python_tests/conftest.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ def db0_fixture(request):
3535
db0.open(
3636
"my-test-prefix",
3737
# use custom page_io_step_size if specified in request.param
38-
page_io_step_size=__extract_param(request, "page_io_step_size", None)
38+
page_io_step_size=__extract_param(request, "page_io_step_size", None),
39+
autocommit=__extract_param(request, "autocommit", True)
3940
)
4041
yield db0
4142
gc.collect()

python_tests/test_copy_prefix.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -225,9 +225,7 @@ def validate_copy(copy_id, expected_len = None, expected_min_len = None):
225225
epoch_count = 2
226226
for epoch in range(epoch_count):
227227
print(f"=== Epoch {epoch} ===")
228-
# obj_count = 5000
229-
# commit_count = 100
230-
obj_count = 500
228+
obj_count = 5000
231229
commit_count = 100
232230
# start the writer process for a long run
233231
p = multiprocessing.Process(target=writer_process, args=(px_name, obj_count, commit_count, True))
@@ -327,12 +325,13 @@ def modify_prefix():
327325
assert len(root.value) == total_len
328326

329327

328+
@pytest.mark.parametrize("db0_fixture", [{"autocommit": False}], indirect=True)
330329
def test_copy_prefix_of_recovered_copy(db0_fixture):
331330
file_name = "./test-copy.db0"
332331
# remove file if it exists
333332
if os.path.exists(file_name):
334333
os.remove(file_name)
335-
334+
336335
px_name = db0.get_current_prefix().name
337336
px_path = os.path.join(DB0_DIR, px_name + ".db0")
338337
root = MemoTestSingleton([])
@@ -355,18 +354,18 @@ def validate(expected_len):
355354
c = charset[i % len(charset)]
356355
assert item.value == c * 1024
357356
assert len(root.value) == expected_len
358-
359-
total_len += modify_prefix(150)
357+
358+
total_len += modify_prefix(5150)
360359
db0.copy_prefix(file_name, page_io_step_size=64 << 10)
361360
db0.close()
362361

363362
# drop original file and replace with copy
364363
os.remove(px_path)
365364
os.rename(file_name, px_path)
366365

367-
# open recovered prefix for update
366+
# open recovered prefix for update
368367
db0.init(DB0_DIR, prefix=px_name, read_write=True)
369-
total_len += modify_prefix(100)
368+
total_len += modify_prefix(1350)
370369

371370
db0.close()
372371
db0.init(DB0_DIR, prefix=px_name, read_write=True)
@@ -377,7 +376,7 @@ def validate(expected_len):
377376
# restore copy of a restored and modified copy
378377
os.remove(px_path)
379378
os.rename(file_name, px_path)
380-
379+
381380
# open prefix from recovered and modified copy of a copy
382381
db0.init(DB0_DIR, prefix=px_name, read_write=False)
383382
validate(total_len)

src/dbzero/core/memory/diff_utils.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -371,16 +371,16 @@ namespace db0
371371
std::byte *dp_result, const std::byte *dp_end)
372372
{
373373
const std::byte *dp_in = static_cast<const std::byte *>(in_buffer);
374-
for (auto it = diffs.begin(); it != diffs.end(); ) {
374+
for (auto it = diffs.begin(); it != diffs.end();) {
375375
auto diff_size = *it;
376376
++it;
377377
if (diff_size > 0) {
378378
assert(dp_result + diff_size <= dp_end);
379-
std::memcpy(dp_result, dp_in, diff_size);
380-
dp_result += diff_size;
381-
if (dp_result > dp_end) {
379+
if (dp_result + diff_size > dp_end) {
382380
THROWF(db0::IOException) << "applyDiffs: diff application exceeds buffer size";
383381
}
382+
std::memcpy(dp_result, dp_in, diff_size);
383+
dp_result += diff_size;
384384
dp_in += diff_size;
385385
}
386386
if (it == diffs.end()) {

src/dbzero/core/storage/BDevStorage.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,12 +300,12 @@ namespace db0
300300

301301
// query.first yields the full-DP (if it exists)
302302
std::uint64_t page_io_id = query.first();
303-
if (page_io_id) {
303+
if (page_io_id) {
304304
if (!!m_ext_space) {
305305
// convert relative page number back to absolute
306306
page_io_id = m_ext_space.getAbsolute(page_io_id);
307307
}
308-
// read full DP
308+
// read full DP
309309
m_page_io.read(page_io_id, read_buf);
310310
} else {
311311
// requesting a diff-DP only encoded page, use zero buffer as a base

src/dbzero/core/storage/Diff_IO.cpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ DB0_PACKED_END
6565
{
6666
public:
6767
// buffer is 2 pages long
68-
DiffReader(Page_IO &, std::uint64_t page_num, std::byte *begin, std::byte *end);
68+
DiffReader(Page_IO &, std::uint64_t page_num, std::byte *begin, std::byte *end,
69+
const std::function<void()> &decode_fault);
6970

7071
// appy diffs from a specific page / state number into a provided data buffer
7172
// if underflow occurs then next page needs to be fetched and apply repeated
@@ -84,6 +85,7 @@ DB0_PACKED_END
8485
std::byte const *m_end;
8586
// the number of objects remaining to be read
8687
unsigned int m_size = 0;
88+
const std::function<void()> &m_decode_fault;
8789
};
8890

8991
DiffWriter::DiffWriter(Page_IO &page_io, std::byte *begin, std::byte *end)
@@ -166,18 +168,23 @@ DB0_PACKED_END
166168
return m_header.m_size == 0 && m_header.m_offset == 0;
167169
}
168170

169-
DiffReader::DiffReader(Page_IO &page_io, std::uint64_t page_num, std::byte *begin, std::byte *end)
171+
DiffReader::DiffReader(Page_IO &page_io, std::uint64_t page_num, std::byte *begin, std::byte *end,
172+
const std::function<void()> &decode_fault)
170173
: m_page_io(page_io)
171174
, m_page_size(page_io.getPageSize())
172175
, m_page_num(page_num)
173176
, m_begin(begin)
174177
, m_current(begin + m_page_size)
175178
, m_end(end)
179+
, m_decode_fault(decode_fault)
176180
{
177181
page_io.read(page_num, m_begin + m_page_size);
178182
m_size = o_diff_header::__const_ref(m_current).m_size;
179183
// position at the first diff block
180-
m_current += o_diff_header::sizeOf()+ o_diff_header::__const_ref(m_current).m_offset;
184+
m_current += o_diff_header::sizeOf() + o_diff_header::__const_ref(m_current).m_offset;
185+
if (m_current > m_end) {
186+
m_decode_fault();
187+
}
181188
}
182189

183190
bool DiffReader::apply(std::byte *dp_data, std::pair<std::uint64_t, std::uint32_t> page_and_state,
@@ -197,8 +204,11 @@ DB0_PACKED_END
197204
underflow = true;
198205
return false;
199206
}
200-
201-
o_diff_buffer::__const_ref(m_current).apply(dp_data, dp_data + m_page_size);
207+
208+
auto &diff_buf = o_diff_buffer::__safe_const_ref(
209+
const_bounded_buf_t(m_decode_fault, m_current, m_end)
210+
);
211+
diff_buf.apply(dp_data, dp_data + m_page_size);
202212
m_current += diff_buf_size;
203213
--m_size;
204214
return true;
@@ -234,12 +244,18 @@ DB0_PACKED_END
234244
, m_writer(std::make_unique<DiffWriter>(
235245
reinterpret_cast<Page_IO&>(*this), m_write_buf.data(), m_write_buf.data() + m_write_buf.size())
236246
)
247+
, m_decode_fault([]() {
248+
THROWF(db0::IOException) << "Diff_IO: decode fault - corrupt diff data";
249+
})
237250
{
238251
}
239252

240253
Diff_IO::Diff_IO(std::size_t header_size, CFile &file, std::uint32_t page_size)
241254
: Page_IO(header_size, file, page_size)
242255
, m_read_buf(page_size * 2)
256+
, m_decode_fault([]() {
257+
THROWF(db0::IOException) << "Diff_IO: decode fault - corrupt diff data";
258+
})
243259
{
244260
}
245261

@@ -294,7 +310,7 @@ DB0_PACKED_END
294310
{
295311
// must lock because the read-buffer is shared
296312
std::unique_lock<std::mutex> lock(m_mx_read);
297-
DiffReader reader((Page_IO&)*this, page_num, m_read_buf.data(), m_read_buf.data() + m_read_buf.size());
313+
DiffReader reader((Page_IO&)*this, page_num, m_read_buf.data(), m_read_buf.data() + m_read_buf.size(), m_decode_fault);
298314
for (;;) {
299315
bool underflow = false;
300316
if (reader.apply((std::byte*)buffer, page_and_state, underflow)) {
@@ -305,7 +321,7 @@ DB0_PACKED_END
305321
reader.loadNext();
306322
continue;
307323
}
308-
THROWF(db0::InternalException) << "Diff block not found";
324+
THROWF(db0::InternalException) << "Diff block not found";
309325
}
310326
}
311327

src/dbzero/core/storage/Diff_IO.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ namespace db0
6767
std::size_t m_full_dp_bytes_written = 0;
6868
// total bytes written using the diff mechanism
6969
std::size_t m_diff_bytes_written = 0;
70+
// function throwing an exception on decode fault (corrupt diff data)
71+
std::function<void()> m_decode_fault;
7072
};
7173

7274
}

src/dbzero/core/storage/REL_Index.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,14 +188,15 @@ namespace db0
188188

189189
std::uint64_t REL_Index::assignRelative(std::uint64_t storage_page_num, bool is_first_in_step)
190190
{
191-
if (is_first_in_step) {
191+
assert(storage_page_num >= m_last_storage_page_num);
192+
// prevent adding duplicate mapping (e.g. might be called multiple times after appendDiff)
193+
if (is_first_in_step && (storage_page_num != m_last_storage_page_num)) {
192194
super_t::insert({ ++m_max_rel_page_num, storage_page_num });
193195
assert(storage_page_num > m_last_storage_page_num);
194196
m_last_storage_page_num = storage_page_num;
195197
m_rel_page_num = m_max_rel_page_num;
196198
}
197199

198-
assert(storage_page_num >= m_last_storage_page_num);
199200
auto result = m_rel_page_num + (storage_page_num - m_last_storage_page_num);
200201
if (result > m_max_rel_page_num) {
201202
m_max_rel_page_num = result;
@@ -208,6 +209,9 @@ namespace db0
208209
{
209210
assert(storage_page_num >= m_last_storage_page_num);
210211
assert(rel_page_num >= m_rel_page_num);
212+
213+
m_max_rel_page_num = rel_page_num;
214+
m_last_storage_page_num = storage_page_num;
211215
if (!this->empty()) {
212216
// check if the mapping is already valid
213217
if (storage_page_num - m_last_storage_page_num == rel_page_num - m_rel_page_num) {
@@ -218,8 +222,6 @@ namespace db0
218222

219223
// register the new mapping
220224
super_t::insert({ rel_page_num, storage_page_num });
221-
m_max_rel_page_num = rel_page_num;
222-
m_last_storage_page_num = storage_page_num;
223225
m_rel_page_num = rel_page_num;
224226
}
225227

src/dbzero/core/storage/REL_Index.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,11 +213,11 @@ DB0_PACKED_END
213213
std::uint64_t size() const;
214214

215215
const_iterator cbegin() const;
216-
216+
217217
private:
218218
// values maintained in-sync with the tree
219-
std::uint64_t m_last_storage_page_num = 0;
220-
std::uint64_t m_rel_page_num = 0;
219+
std::uint64_t m_last_storage_page_num = 0;
220+
std::uint64_t m_rel_page_num = 0; // key of the last inserted item
221221
std::uint64_t m_max_rel_page_num = 0;
222222
};
223223

src/dbzero/core/storage/diff_buffer.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,20 @@ namespace db0
6161
auto diff_size = o_packed_int<std::uint16_t>::read(at);
6262
if (diff_size > 0) {
6363
assert(dp_result + diff_size <= dp_end);
64+
// this check prevents processing of corrupt diff data
65+
if (dp_result + diff_size > dp_end) {
66+
THROWF(db0::IOException) << "o_diff_buffer::apply: corrupt diff data";
67+
}
6468
std::memcpy(dp_result, at, diff_size);
6569
dp_result += diff_size;
6670
at += diff_size;
6771
}
6872
if (at < end) {
6973
auto identical_size = o_packed_int<std::uint16_t>::read(at);
7074
dp_result += identical_size;
75+
if (dp_result > dp_end) {
76+
THROWF(db0::IOException) << "o_diff_buffer::apply: corrupt diff data";
77+
}
7178
// zero-fill when the indicator is present (special 0,0 indicator)
7279
if (!diff_size && !identical_size) {
7380
// make sure the indicator is only present at the beginning

0 commit comments

Comments
 (0)