Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions python_tests/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@ def rand_array(max_bytes):


def test_cache_size_can_be_updated_at_runtime(db0_fixture):
cache_0 = db0.get_cache_stats()
cache_0 = db0.get_cache_stats()
# create object instances to populate cache
buf = []
for _ in range(1000):
buf.append(MemoTestClass(get_string(1024)))
cache_1 = db0.get_cache_stats()
cache_1 = db0.get_cache_stats()
diff_1 = cache_1["size"] - cache_0["size"]
# reduce cache size so that only 1/2 of objects can fit
db0.set_cache_size(512 * 1024)
cache_2 = db0.get_cache_stats()
cache_2 = db0.get_cache_stats()
# make sure cache size / capacity was adjusted with at least 95% accuracy
assert abs(1.0 - (512 * 1024) / cache_2["size"]) < 0.05
assert abs(1.0 - cache_2["capacity"] / cache_2["size"]) < 0.05
Expand Down
16 changes: 15 additions & 1 deletion python_tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from dbzero import find
from datetime import timedelta, datetime
import random
import time


def test_index_instance_can_be_created_without_arguments(db0_fixture):
Expand Down Expand Up @@ -714,4 +715,17 @@ def test_find_in_index_range_issue_1(db0_fixture):
index.add(3, test_obj)
assert test_obj in set(index.range())
assert list(db0.find(index.range(), test_obj)) == [test_obj]



@pytest.mark.stress_test
def test_insert_1M_keys_to_index(db0_no_autocommit):
cut = db0.index()
objects = [MemoTestClass(0) for _ in range(25000)]
start = time.perf_counter()
for i in range(1_000_000):
# add random int
cut.add(random.randint(0, 100_000_000), random.choice(objects))
result = list(cut.select(0, 1))
end = time.perf_counter()
assert len(cut) == 1_000_000
print(f"Inserted 1M keys to index in {end - start:.2f} seconds")
101 changes: 101 additions & 0 deletions python_tests/test_issues_12.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import dbzero as db0
import pytest
from .conftest import DB0_DIR
from datetime import datetime
from .memo_test_types import MemoBlob
from dataclasses import dataclass
import random
import time
from typing import Dict, List


@db0.memo
@dataclass
class Issuer:
tax_id: int
inv_list: List
inv_index: db0.index


@db0.memo(no_cache=True)
@dataclass
class Invoice:
tax_id: int
issue_dt: datetime
data: bytes


def get_random_tax_id(tax_ids_set=set()):
tax_id = random.randint(1000000000, 9999999999)
while tax_id in tax_ids_set:
tax_id = random.randint(1000000000, 9999999999)
tax_ids_set.add(tax_id)
return tax_id


@pytest.mark.stress_test
@pytest.mark.parametrize("db0_slab_size", [{"slab_size": 64 << 20, "autocommit": False}], indirect=True)
def test_no_cache_allocator_issue(db0_slab_size):
db0.set_cache_size(8 << 30)
# create 25 k unique tax_id numbers
tax_id_count = 25000
tax_id_numbers = set()
print(f"Generating {tax_id_count} unique tax_id numbers")
for i in range(tax_id_count):
if i % 5000 == 0:
print(f"Generated {i} tax_id numbers so far")
get_random_tax_id(tax_id_numbers)
tax_id_list = list(tax_id_numbers)

issuers = {}

for i, tax_id in enumerate(tax_id_list):
if i % 5000 == 0:
print(f"Created {i} issuers so far")
new_issuer = Issuer(tax_id=tax_id, inv_list=[], inv_index=db0.index())
issuers[tax_id] = new_issuer

execution_time = 15
RANDOM_BYTES = b'DB0'*22000
total_size = 0
count_of_objects = 0
new_objects = 0
db0.commit()
print("Starting benchmark loop")
last_report = time.perf_counter()
start = last_report
while True:
# get random number between 0 and 100
random_number = random.randint(0, 100)
if random_number < 90:
data_size = random.randint(500, 2000)
else:
data_size = random.randint(8000, 64000)

random_tax_id = random.choice(tax_id_list)
issuer = issuers[random_tax_id]
invoice = Invoice(tax_id=issuer.tax_id, issue_dt=datetime.now(), data=RANDOM_BYTES[:data_size])
issuer.inv_list.append(invoice)
issuer.inv_index.add(datetime.now(), invoice)
count_of_objects += 1
new_objects += 1

total_size += data_size
# report every 3 seconds
now = time.perf_counter()
if (now - last_report) >= 3:
commit_start = time.perf_counter()
db0.commit()
commit_end = time.perf_counter()
print(f"Commit time: {(commit_end - commit_start)} seconds")

now = time.perf_counter()
print(f"Objects / sec {float(new_objects) / (now - last_report)}, Total objects: {count_of_objects}, Total size: {total_size} bytes")
print(db0.get_storage_stats())
print(db0.get_lang_cache_stats())
new_objects = 0
last_report = now

if (now - start) > execution_time:
break

26 changes: 13 additions & 13 deletions python_tests/test_memo_no_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,15 @@ def test_memo_no_cache_issue1(db0_fixture):
del obj


def test_excluding_no_cache_instances_from_dbzero_cache(db0_fixture):
def test_excluding_no_cache_instances_from_P0_cache(db0_fixture):
buf = db0.list()
initial_cache_size = db0.get_cache_stats()["size"]
initial_cache_size = db0.get_cache_stats()["P_size"]["P0"]
for _ in range(100):
obj = MemoNoCacheClass()
obj = MemoNoCacheClass()
buf.append(obj)

gc.collect()
final_cache_size = db0.get_cache_stats()["size"]
gc.collect()
final_cache_size = db0.get_cache_stats()["P_size"]["P0"]
# make sure cache utilization is low
assert abs(final_cache_size - initial_cache_size) < (350 << 10)

Expand All @@ -80,15 +80,15 @@ def test_fetching_no_cache_objects(db0_fixture):
db0.open(px_name, "r")

# now fetch objects by uuid
initial_cache_size = db0.get_cache_stats()["size"]
initial_cache_size = db0.get_cache_stats()["P_size"]["P0"]
total_len = 0
for id in uuid_list:
# NOTE: must fetch with type, otherwise no_cache flag may not be honored
obj = db0.fetch(MemoNoCacheClass, id)
# this forces data retrieval
total_len += len(obj.data)

final_cache_size = db0.get_cache_stats()["size"]
final_cache_size = db0.get_cache_stats()["P_size"]["P0"]
# make sure cache utilization is low
assert abs(final_cache_size - initial_cache_size) < (300 << 10)

Expand All @@ -105,14 +105,14 @@ def test_find_no_cache_objects(db0_fixture):
db0.open(px_name, "r")

# now retrieve objects using db0.find
initial_cache_size = db0.get_cache_stats()["size"]
initial_cache_size = db0.get_cache_stats()["P_size"]["P0"]
total_len = 0
for obj in db0.find(MemoNoCacheClass):
# this forces data retrieval (but not caching)
total_len += len(obj.data)

assert total_len > 0
final_cache_size = db0.get_cache_stats()["size"]
final_cache_size = db0.get_cache_stats()["P_size"]["P0"]
# make sure cache utilization is low
assert abs(final_cache_size - initial_cache_size) < (300 << 10)

Expand All @@ -131,15 +131,15 @@ def test_fetching_no_cache_objects(db0_fixture):
db0.open(px_name, "r")

# now fetch objects by uuid
initial_cache_size = db0.get_cache_stats()["size"]
initial_cache_size = db0.get_cache_stats()["P_size"]["P0"]
total_len = 0
for id in uuid_list:
# NOTE: must fetch with type, otherwise no_cache flag may not be honored
obj = db0.fetch(MemoNoCacheClass, id)
# this forces data retrieval
total_len += len(obj.data)

final_cache_size = db0.get_cache_stats()["size"]
final_cache_size = db0.get_cache_stats()["P_size"]["P0"]
# make sure cache utilization is low
assert abs(final_cache_size - initial_cache_size) < (300 << 10)

Expand All @@ -156,13 +156,13 @@ def test_find_no_cache_objects(db0_fixture):
db0.open(px_name, "r")

# now retrieve objects using db0.find
initial_cache_size = db0.get_cache_stats()["size"]
initial_cache_size = db0.get_cache_stats()["P_size"]["P0"]
total_len = 0
for obj in db0.find(MemoNoCacheClass):
# this forces data retrieval (but not caching)
total_len += len(obj.data)

assert total_len > 0
final_cache_size = db0.get_cache_stats()["size"]
final_cache_size = db0.get_cache_stats()["P_size"]["P0"]
# make sure cache utilization is low
assert abs(final_cache_size - initial_cache_size) < (350 << 10)
8 changes: 4 additions & 4 deletions python_tests/test_object_stress.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,15 @@ def read_value(value) -> int:
total_bytes += len(buf[-1].value)
count += 1
if total_bytes > report_bytes:
pre_commit = datetime.now()
flush = datetime.now()
print("*** next transaction ***")
db0.commit()
storage_stats = db0.get_storage_stats()
print(f"Total bytes: {total_bytes}")
print(f"Rand DRAM I/O ops: {storage_stats['dram_io_rand_ops'] - rand_dram_io}")
print(f"Rand file write ops: {storage_stats['file_rand_write_ops'] - rand_file_write_ops}")
print(f"File bytes written: {storage_stats['file_bytes_written'] - bytes_written}")
print(f"Commit took: {datetime.now() - pre_commit}\n")
print(f"Commit took: {datetime.now() - flush}\n")
rand_dram_io = storage_stats["dram_io_rand_ops"]
rand_file_write_ops = storage_stats["file_rand_write_ops"]
bytes_written = storage_stats["file_bytes_written"]
Expand Down Expand Up @@ -107,7 +107,7 @@ def rand_string(max_len):
total_bytes += len(buf[-1].value)
count += 1
if total_bytes > report_bytes:
pre_commit = datetime.now()
flush = datetime.now()
# NOTE: with each commit the size of GC0 is increasing due to large
# number of objects referenced only from python
db0.commit()
Expand All @@ -116,7 +116,7 @@ def rand_string(max_len):
print(f"Rand DRAM I/O ops: {storage_stats['dram_io_rand_ops'] - rand_dram_io}")
print(f"Rand file write ops: {storage_stats['file_rand_write_ops'] - rand_file_write_ops}")
print(f"File bytes written: {storage_stats['file_bytes_written'] - bytes_written}")
print(f"Commit took: {datetime.now() - pre_commit}")
print(f"Commit took: {datetime.now() - flush}")
rand_dram_io = storage_stats["dram_io_rand_ops"]
rand_file_write_ops = storage_stats["file_rand_write_ops"]
bytes_written = storage_stats["file_bytes_written"]
Expand Down
4 changes: 3 additions & 1 deletion src/dbzero/bindings/python/Memo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ namespace db0::python
{
using Class = db0::object_model::Class;
using TagIndex = db0::object_model::TagIndex;
using ExtT = typename MemoImplT::ExtT;

PY_API_FUNC
// the instance may already exist (e.g. if this is a singleton)
Expand All @@ -230,7 +231,8 @@ namespace db0::python
auto type = self->ext().getClassPtr();
if (type->isExistingSingleton(fixture_uuid)) {
// drop existing instance
self->ext().destroy();
// NOTE: may use ext() because destroy does not mutate the instance itself
const_cast<ExtT&>(self->ext()).destroy();
// unload singleton from a different fixture
if (!type->unloadSingleton(&self->modifyExt(), fixture_uuid)) {
PyErr_SetString(PyExc_RuntimeError, "Unloading singleton failed");
Expand Down
14 changes: 14 additions & 0 deletions src/dbzero/bindings/python/PyAPI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,20 @@ namespace db0::python
}

PySafeDict_SetItemString(*dict, "size", Py_OWN(PyLong_FromLong(cache_recycler.size())));

{
std::vector<std::size_t> detailed_size = cache_recycler.getDetailedSize();
auto detailed_size_dict = Py_OWN(PyDict_New());
unsigned int priority_index = 0;
for (auto size: detailed_size) {
std::stringstream key_str;
key_str << "P" << priority_index++;
PySafeDict_SetItemString(*detailed_size_dict, key_str.str().c_str(), Py_OWN(PyLong_FromLong(size)));
}
// cache size with a by-priority breakdown
PySafeDict_SetItemString(*dict, "P_size", detailed_size_dict);
}

PySafeDict_SetItemString(*dict, "capacity", Py_OWN(PyLong_FromLong(cache_recycler.getCapacity())));
PySafeDict_SetItemString(*dict, "deferred_free_count", Py_OWN(PyLong_FromLong(deferred_free_count)));
PySafeDict_SetItemString(*dict, "lang_cache_size", Py_OWN(PyLong_FromLong(lang_cache_size)));
Expand Down
8 changes: 4 additions & 4 deletions src/dbzero/bindings/python/PyToolkit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ namespace db0::python
}
return shared_py_cast<PyObject*>(std::move(list_object));
}

PyToolkit::ObjectSharedPtr PyToolkit::unloadByteArray(db0::swine_ptr<Fixture> fixture,
Address address, AccessFlags access_mode)
{
Expand All @@ -281,9 +281,9 @@ namespace db0::python
return shared_py_cast<PyObject*>(std::move(byte_array_object));
}

PyToolkit::ObjectSharedPtr PyToolkit::unloadIndex(db0::swine_ptr<Fixture> fixture,
PyToolkit::ObjectSharedPtr PyToolkit::unloadIndex(db0::swine_ptr<Fixture> fixture,
Address address, std::uint16_t, AccessFlags access_mode)
{
{
// try pulling from cache first
auto &lang_cache = fixture->getLangCache();
auto object_ptr = lang_cache.get(address);
Expand All @@ -292,7 +292,7 @@ namespace db0::python
return object_ptr;
}

auto index_object = IndexDefaultObject_new();
auto index_object = Py_OWN(IndexDefaultObject_new());
// retrieve actual dbzero instance
index_object->unload(fixture, address, access_mode);

Expand Down
12 changes: 6 additions & 6 deletions src/dbzero/bindings/python/collections/PyIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ namespace db0::python
return reinterpret_cast<IndexObject*>(type->tp_alloc(type, 0));
}

shared_py_object<IndexObject*> IndexDefaultObject_new() {
return { IndexObject_new(&IndexObjectType, NULL, NULL), false };
IndexObject *IndexDefaultObject_new() {
return IndexObject_new(&IndexObjectType, NULL, NULL);
}

void PyAPI_IndexObject_del(IndexObject* index_obj)
Expand All @@ -67,12 +67,12 @@ namespace db0::python
IndexObject *tryMakeIndex(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
{
// make actual dbzero instance, use default fixture
auto index_object = IndexDefaultObject_new();
auto py_index = Py_OWN(IndexDefaultObject_new());
db0::FixtureLock lock(PyToolkit::getPyWorkspace().getWorkspace().getCurrentFixture());
index_object->makeNew(*lock);
auto &index = py_index->makeNew(*lock);
// register newly created index with py-object cache
lock->getLangCache().add(index_object.get()->ext().getAddress(), index_object.get());
return index_object.steal();
lock->getLangCache().add(index.getAddress(), py_index.get());
return py_index.steal();
}

IndexObject *PyAPI_makeIndex(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
Expand Down
2 changes: 1 addition & 1 deletion src/dbzero/bindings/python/collections/PyIndex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace db0::python
using IndexObject = PyWrapper<db0::object_model::Index>;

IndexObject *IndexObject_new(PyTypeObject *type, PyObject *, PyObject *);
shared_py_object<IndexObject*> IndexDefaultObject_new();
IndexObject* IndexDefaultObject_new();
void PyAPI_IndexObject_del(IndexObject* self);
Py_ssize_t PyAPI_IndexObject_len(IndexObject *);

Expand Down
Loading