From ef7f496f37185777d5ae9ad84f64b8c18a7ae7c5 Mon Sep 17 00:00:00 2001
From: Ally Heev <allyheev@gmail.com>
Date: Sat, 30 May 2026 15:31:40 +0530
Subject: [PATCH 1/3] fix max_db_size

---
 src_py/_lbug_capi.py | 2 +-
 src_py/database.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src_py/_lbug_capi.py b/src_py/_lbug_capi.py
index c20c4b3..69779f3 100644
--- a/src_py/_lbug_capi.py
+++ b/src_py/_lbug_capi.py
@@ -1195,7 +1195,7 @@ def __init__(
         max_num_threads: int = 0,
         compression: bool = True,
         read_only: bool = False,
-        max_db_size: int = (1 << 30),
+        max_db_size: int = 0xFFFFFFFF,
         auto_checkpoint: bool = True,
         checkpoint_threshold: int = -1,
         throw_on_wal_replay_failure: bool = True,
diff --git a/src_py/database.py b/src_py/database.py
index 84eef91..4ea04d1 100644
--- a/src_py/database.py
+++ b/src_py/database.py
@@ -39,7 +39,7 @@ def __init__(
         compression: bool = True,
         lazy_init: bool = False,
         read_only: bool = False,
-        max_db_size: int = (1 << 30),
+        max_db_size: int = 0xFFFFFFFF,
         auto_checkpoint: bool = True,
         checkpoint_threshold: int = -1,
         throw_on_wal_replay_failure: bool = True,

From d06f594eebbf2b85468023162dc8c8fcf73d4a06 Mon Sep 17 00:00:00 2001
From: Ally Heev <allyheev@gmail.com>
Date: Sat, 30 May 2026 15:58:40 +0530
Subject: [PATCH 2/3] fix max_db_size issue in tests

---
 test/conftest.py       | 10 +++++++++-
 test/test_mvcc_bank.py |  6 ++++--
 test/test_wal.py       |  6 +++---
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/test/conftest.py b/test/conftest.py
index a8a6329..f1774e8 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -181,6 +181,9 @@ def init_movie_serial(conn: lb.Connection) -> None:
 
 
 _POOL_SIZE_: int = 256 * 1024 * 1024
+# Use 1GB max DB size for tests to avoid exhausting virtual address space
+# when many databases are open simultaneously (CI runners may have tight VA limits)
+_MAX_DB_SIZE_: int = 1 << 30
 
 
 def get_db_file_path(tmp_path: Path) -> Path:
@@ -228,7 +231,12 @@ def _close_cached_readonly_state() -> None:
 
 def create_conn_db(path: Path, *, read_only: bool) -> ConnDB:
     """Return a new connection and database."""
-    db = lb.Database(path, buffer_pool_size=_POOL_SIZE_, read_only=read_only)
+    db = lb.Database(
+        path,
+        buffer_pool_size=_POOL_SIZE_,
+        read_only=read_only,
+        max_db_size=_MAX_DB_SIZE_,
+    )
     conn = lb.Connection(db, num_threads=4)
     return conn, db
 
diff --git a/test/test_mvcc_bank.py b/test/test_mvcc_bank.py
index 22cc68e..c6d7962 100644
--- a/test/test_mvcc_bank.py
+++ b/test/test_mvcc_bank.py
@@ -246,10 +246,12 @@ def run_bank_test(
     edges = build_edges(N_ACCOUNTS, EDGE_PROB, rng)
 
     try:
-        db = lb.Database(str(db_path), enable_multi_writes=enable_multi_writes)
+        db = lb.Database(
+            str(db_path), enable_multi_writes=enable_multi_writes, max_db_size=1 << 30
+        )
     except TypeError:
         # Fallback if binding patch is not applied
-        db = lb.Database(str(db_path))
+        db = lb.Database(str(db_path), max_db_size=1 << 30)
 
     setup_db(db, N_ACCOUNTS, edges)
 
diff --git a/test/test_wal.py b/test/test_wal.py
index 85025b6..755218f 100644
--- a/test/test_wal.py
+++ b/test/test_wal.py
@@ -15,7 +15,7 @@ def run_query_in_new_process(tmp_path: Path, build_dir: Path, queries: str):
         sys.path.append(r"{build_dir!s}")
 
         import ladybug as lb
-        db = lb.Database(r"{db_path!s}")
+        db = lb.Database(r"{db_path!s}", max_db_size=1 << 30)
         """) + queries
     return subprocess.Popen([sys.executable, "-c", code])
 
@@ -40,7 +40,7 @@ def test_replay_after_kill(tmp_path: Path, build_dir: Path) -> None:
     """)
     run_query_then_kill(tmp_path, build_dir, queries)
     db_path = get_db_file_path(tmp_path)
-    with lb.Database(db_path) as db, lb.Connection(db) as conn:
+    with lb.Database(db_path, max_db_size=1 << 30) as db, lb.Connection(db) as conn:
         # previously committed queries should be valid after replaying WAL
         result = conn.execute("CALL show_tables() RETURN *")
         assert result.has_next()
@@ -64,7 +64,7 @@ def test_replay_with_exception(tmp_path: Path, build_dir: Path) -> None:
     """)
     run_query_then_kill(tmp_path, build_dir, queries)
     db_path = get_db_file_path(tmp_path)
-    with lb.Database(db_path) as db, lb.Connection(db) as conn:
+    with lb.Database(db_path, max_db_size=1 << 30) as db, lb.Connection(db) as conn:
         # previously committed queries should be valid after replaying WAL
         result = conn.execute("match (t:tab) where t.id <= 5 return t.id")
         assert result.get_num_tuples() == 5

From ea77c97e9fbf4a020d8bf991913b9e739f59af36 Mon Sep 17 00:00:00 2001
From: Ally Heev <allyheev@gmail.com>
Date: Sun, 31 May 2026 10:06:04 +0530
Subject: [PATCH 3/3] add mx_db_size fixture

---
 src_cpp/py_database.cpp |  2 +-
 src_py/_lbug_capi.py    |  7 +++++--
 src_py/database.py      | 40 ++++++++++++++++++++++------------------
 test/conftest.py        | 11 ++++++++++-
 test/test_iteration.py  |  9 ---------
 test/test_mvcc_bank.py  | 16 +++++++++++-----
 test/test_wal.py        | 26 ++++++++++++++++----------
 7 files changed, 65 insertions(+), 46 deletions(-)

diff --git a/src_cpp/py_database.cpp b/src_cpp/py_database.cpp
index bf7083e..8a5c1ae 100644
--- a/src_cpp/py_database.cpp
+++ b/src_cpp/py_database.cpp
@@ -16,7 +16,7 @@ void PyDatabase::initialize(py::handle& m) {
                  bool, bool, bool>(),
             py::arg("database_path"), py::arg("buffer_pool_size") = 0,
             py::arg("max_num_threads") = 0, py::arg("compression") = true,
-            py::arg("read_only") = false, py::arg("max_db_size") = (uint64_t)1 << 43,
+            py::arg("read_only") = false, py::arg("max_db_size") = -1u,
             py::arg("auto_checkpoint") = true, py::arg("checkpoint_threshold") = -1,
             py::arg("throw_on_wal_replay_failure") = true, py::arg("enable_checksums") = true,
             py::arg("enable_multi_writes") = false)
diff --git a/src_py/_lbug_capi.py b/src_py/_lbug_capi.py
index 69779f3..a3ed42c 100644
--- a/src_py/_lbug_capi.py
+++ b/src_py/_lbug_capi.py
@@ -1195,7 +1195,7 @@ def __init__(
         max_num_threads: int = 0,
         compression: bool = True,
         read_only: bool = False,
-        max_db_size: int = 0xFFFFFFFF,
+        max_db_size: int | None = None,
         auto_checkpoint: bool = True,
         checkpoint_threshold: int = -1,
         throw_on_wal_replay_failure: bool = True,
@@ -1208,7 +1208,10 @@ def __init__(
         config.max_num_threads = max_num_threads
         config.enable_compression = compression
         config.read_only = read_only
-        config.max_db_size = max_db_size
+
+        if max_db_size is not None:
+            config.max_db_size = max_db_size
+
         config.auto_checkpoint = auto_checkpoint
         if checkpoint_threshold >= 0:
             config.checkpoint_threshold = checkpoint_threshold
diff --git a/src_py/database.py b/src_py/database.py
index 4ea04d1..f0e2025 100644
--- a/src_py/database.py
+++ b/src_py/database.py
@@ -39,7 +39,7 @@ def __init__(
         compression: bool = True,
         lazy_init: bool = False,
         read_only: bool = False,
-        max_db_size: int = 0xFFFFFFFF,
+        max_db_size: int | None = None,
         auto_checkpoint: bool = True,
         checkpoint_threshold: int = -1,
         throw_on_wal_replay_failure: bool = True,
@@ -77,12 +77,11 @@ def __init__(
             database path.
             Default to False.
 
-        max_db_size : int
+        max_db_size : int, optional
             The maximum size of the database in bytes. Note that this is introduced
             temporarily for now to get around with the default 8TB mmap address
-             space limit some environment. This will be removed once we implemente
-             a better solution later. The value is default to 1 << 43 (8TB) under 64-bit
-             environment and 1GB under 32-bit one.
+             space limit some environment. This will be removed once we implement
+             a better solution later. If not specified, the backend's default is used.
 
         auto_checkpoint: bool
             If true, the database will automatically checkpoint when the size of
@@ -242,19 +241,24 @@ def init_pybind_database(self) -> Any | None:
         if pybind_module is None:
             return None
         if self._pybind_database is None:
-            self._pybind_database = pybind_module.Database(
-                self.database_path,
-                self.buffer_pool_size,
-                self.max_num_threads,
-                self.compression,
-                self.read_only,
-                self.max_db_size,
-                self.auto_checkpoint,
-                self.checkpoint_threshold,
-                self.throw_on_wal_replay_failure,
-                self.enable_checksums,
-                self.enable_multi_writes,
-            )
+            kwargs = {
+                "database_path": self.database_path,
+                "buffer_pool_size": self.buffer_pool_size,
+                "max_num_threads": self.max_num_threads,
+                "compression": self.compression,
+                "read_only": self.read_only,
+                "auto_checkpoint": self.auto_checkpoint,
+                "checkpoint_threshold": self.checkpoint_threshold,
+                "throw_on_wal_replay_failure": self.throw_on_wal_replay_failure,
+                "enable_checksums": self.enable_checksums,
+                "enable_multi_writes": self.enable_multi_writes,
+            }
+
+            if self.max_db_size is not None:
+                kwargs["max_db_size"] = self.max_db_size
+
+            self._pybind_database = pybind_module.Database(**kwargs)
+
         return self._pybind_database
 
     def get_torch_geometric_remote_backend(
diff --git a/test/conftest.py b/test/conftest.py
index f1774e8..0776ae3 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -298,11 +298,20 @@ def conn_db_empty(tmp_path: Path) -> ConnDB:
         db.close()
 
 
+@pytest.fixture(scope="session")
+def max_db_size() -> int:
+    """Return the maximum database size used across tests."""
+    return _MAX_DB_SIZE_
+
+
 @pytest.fixture
 def conn_db_in_mem() -> ConnDB:
     """Return a new in-memory connection and database."""
     db = lb.Database(
-        database_path=":memory:", buffer_pool_size=_POOL_SIZE_, read_only=False
+        database_path=":memory:",
+        buffer_pool_size=_POOL_SIZE_,
+        read_only=False,
+        max_db_size=_MAX_DB_SIZE_,
     )
     conn = lb.Connection(db, num_threads=4)
     try:
diff --git a/test/test_iteration.py b/test/test_iteration.py
index b26aaf5..182020f 100644
--- a/test/test_iteration.py
+++ b/test/test_iteration.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import ladybug as lb
 from type_aliases import ConnDB
 
 
@@ -35,11 +34,7 @@ def test_iteration_loop(conn_db_in_mem: ConnDB) -> None:
 
 def test_get_all(conn_db_in_mem: ConnDB) -> None:
     conn, _ = conn_db_in_mem
-    db = lb.Database(database_path=":memory:")
-    assert not db.is_closed
-    assert db._database is not None
 
-    conn = lb.Connection(db)
     conn.execute("CREATE NODE TABLE person(name STRING, age INT64, PRIMARY KEY(name));")
     conn.execute("CREATE (:person {name: 'Alice', age: 30});")
     conn.execute("CREATE (:person {name: 'Bob', age: 40});")
@@ -54,11 +49,7 @@ def test_get_all(conn_db_in_mem: ConnDB) -> None:
 
 def test_get_n(conn_db_in_mem: ConnDB) -> None:
     conn, _ = conn_db_in_mem
-    db = lb.Database(database_path=":memory:")
-    assert not db.is_closed
-    assert db._database is not None
 
-    conn = lb.Connection(db)
     conn.execute("CREATE NODE TABLE person(name STRING, age INT64, PRIMARY KEY(name));")
     conn.execute("CREATE (:person {name: 'Alice', age: 30});")
     conn.execute("CREATE (:person {name: 'Bob', age: 40});")
diff --git a/test/test_mvcc_bank.py b/test/test_mvcc_bank.py
index c6d7962..cac0764 100644
--- a/test/test_mvcc_bank.py
+++ b/test/test_mvcc_bank.py
@@ -240,6 +240,7 @@ def run_bank_test(
     n_readers: int,
     duration: int,
     enable_multi_writes: bool,
+    max_db_size: int,
     seed: int = 42,
 ) -> Stats:
     rng = random.Random(seed)
@@ -247,11 +248,13 @@ def run_bank_test(
 
     try:
         db = lb.Database(
-            str(db_path), enable_multi_writes=enable_multi_writes, max_db_size=1 << 30
+            str(db_path),
+            enable_multi_writes=enable_multi_writes,
+            max_db_size=max_db_size,
         )
     except TypeError:
         # Fallback if binding patch is not applied
-        db = lb.Database(str(db_path), max_db_size=1 << 30)
+        db = lb.Database(str(db_path), max_db_size=max_db_size)
 
     setup_db(db, N_ACCOUNTS, edges)
 
@@ -289,7 +292,7 @@ def run_bank_test(
 # ---------------------------------------------------------------------------
 # Tests
 # ---------------------------------------------------------------------------
-def test_single_writer_no_anomalies(tmp_path: Path) -> None:
+def test_single_writer_no_anomalies(tmp_path: Path, max_db_size: int) -> None:
     """Baseline: single writer, no concurrent write transactions."""
     stats = run_bank_test(
         tmp_path / "bank_single.lbdb",
@@ -297,12 +300,13 @@ def test_single_writer_no_anomalies(tmp_path: Path) -> None:
         n_readers=2,
         duration=DURATION_SINGLE_WRITER,
         enable_multi_writes=False,
+        max_db_size=max_db_size,
     )
     assert stats.anomalies == [], f"MVCC anomalies detected: {stats.anomalies}"
     assert stats.reads_failed == 0, f"Reader errors: {stats.reads_failed}"
 
 
-def test_multi_writer_no_anomalies(tmp_path: Path) -> None:
+def test_multi_writer_no_anomalies(tmp_path: Path, max_db_size: int) -> None:
     """
     Four concurrent writers with enable_multi_writes=True.
 
@@ -315,6 +319,7 @@ def test_multi_writer_no_anomalies(tmp_path: Path) -> None:
         n_readers=2,
         duration=DURATION_MULTI_WRITER,
         enable_multi_writes=True,
+        max_db_size=max_db_size,
     )
     assert stats.anomalies == [], f"MVCC anomalies detected: {stats.anomalies}"
     assert stats.reads_failed == 0, f"Reader errors: {stats.reads_failed}"
@@ -323,7 +328,7 @@ def test_multi_writer_no_anomalies(tmp_path: Path) -> None:
 
 
 @pytest.mark.slow
-def test_multi_writer_stress_no_anomalies(tmp_path: Path) -> None:
+def test_multi_writer_stress_no_anomalies(tmp_path: Path, max_db_size: int) -> None:
     """
     Stress: 8 writers / 4 readers for 60 s (matches adsharma README example).
 
@@ -335,5 +340,6 @@ def test_multi_writer_stress_no_anomalies(tmp_path: Path) -> None:
         n_readers=4,
         duration=60,
         enable_multi_writes=True,
+        max_db_size=max_db_size,
     )
     assert stats.anomalies == [], f"MVCC anomalies detected: {stats.anomalies}"
diff --git a/test/test_wal.py b/test/test_wal.py
index 755218f..af8a344 100644
--- a/test/test_wal.py
+++ b/test/test_wal.py
@@ -8,20 +8,24 @@
 from conftest import get_db_file_path
 
 
-def run_query_in_new_process(tmp_path: Path, build_dir: Path, queries: str):
+def run_query_in_new_process(
+    tmp_path: Path, build_dir: Path, queries: str, max_db_size: int
+):
     db_path = get_db_file_path(tmp_path)
     code = dedent(f"""
         import sys
         sys.path.append(r"{build_dir!s}")
 
         import ladybug as lb
-        db = lb.Database(r"{db_path!s}", max_db_size=1 << 30)
+        db = lb.Database(r"{db_path!s}", max_db_size={max_db_size})
         """) + queries
     return subprocess.Popen([sys.executable, "-c", code])
 
 
-def run_query_then_kill(tmp_path: Path, build_dir: Path, queries: str):
-    proc = run_query_in_new_process(tmp_path, build_dir, queries)
+def run_query_then_kill(
+    tmp_path: Path, build_dir: Path, queries: str, max_db_size: int
+):
+    proc = run_query_in_new_process(tmp_path, build_dir, queries, max_db_size)
     time.sleep(5)
     proc.kill()
     proc.wait(5)
@@ -32,15 +36,15 @@ def run_query_then_kill(tmp_path: Path, build_dir: Path, queries: str):
 
 # Kill the database while it's in the middle of executing a long persistent query
 # When we reload the database we will replay from the WAL (which will be incomplete)
-def test_replay_after_kill(tmp_path: Path, build_dir: Path) -> None:
+def test_replay_after_kill(tmp_path: Path, build_dir: Path, max_db_size: int) -> None:
     queries = dedent("""
     conn = lb.Connection(db)
     conn.execute("CREATE NODE TABLE tab (id INT64, PRIMARY KEY (id));")
     conn.execute("UNWIND RANGE(1,100000) AS x UNWIND RANGE(1, 100000) AS y CREATE (:tab {id: x * 100000 + y});")
     """)
-    run_query_then_kill(tmp_path, build_dir, queries)
+    run_query_then_kill(tmp_path, build_dir, queries, max_db_size)
     db_path = get_db_file_path(tmp_path)
-    with lb.Database(db_path, max_db_size=1 << 30) as db, lb.Connection(db) as conn:
+    with lb.Database(db_path, max_db_size=max_db_size) as db, lb.Connection(db) as conn:
         # previously committed queries should be valid after replaying WAL
         result = conn.execute("CALL show_tables() RETURN *")
         assert result.has_next()
@@ -49,7 +53,9 @@ def test_replay_after_kill(tmp_path: Path, build_dir: Path) -> None:
         result.close()
 
 
-def test_replay_with_exception(tmp_path: Path, build_dir: Path) -> None:
+def test_replay_with_exception(
+    tmp_path: Path, build_dir: Path, max_db_size: int
+) -> None:
     queries = dedent("""
     conn = lb.Connection(db)
     conn.execute("CREATE NODE TABLE tab (id INT64, PRIMARY KEY (id));")
@@ -62,9 +68,9 @@ def test_replay_with_exception(tmp_path: Path, build_dir: Path) -> None:
             assert i % 2 == 1
     conn.execute("UNWIND RANGE(1,100000) AS x UNWIND RANGE(1, 100000) AS y CREATE (:tab {id: x * 100000 + y});")
     """)
-    run_query_then_kill(tmp_path, build_dir, queries)
+    run_query_then_kill(tmp_path, build_dir, queries, max_db_size)
     db_path = get_db_file_path(tmp_path)
-    with lb.Database(db_path, max_db_size=1 << 30) as db, lb.Connection(db) as conn:
+    with lb.Database(db_path, max_db_size=max_db_size) as db, lb.Connection(db) as conn:
         # previously committed queries should be valid after replaying WAL
         result = conn.execute("match (t:tab) where t.id <= 5 return t.id")
         assert result.get_num_tuples() == 5