From 354ac6b42bc2976c6f0f0cf42e7c1948d9aeee6f Mon Sep 17 00:00:00 2001 From: Adrian Zawadzki Date: Sat, 6 Dec 2025 00:56:03 +0100 Subject: [PATCH 1/3] fix(cache): add exponential backoff for failed cache flush attempts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement rate limiting for cache recycler flush operations to prevent performance degradation when cache limits are reached and memory cannot be freed. When flush attempts fail to release sufficient memory (less than 10% of target flush size), the system now applies exponential backoff starting at 1 microsecond and doubling up to a maximum of 1 second between retry attempts. This prevents the cache recycler from hammering the system with continuous flush attempts that cannot succeed. Key changes: - Add m_next_flush_time timestamp to track next allowed flush moment - Add m_current_flush_delay for exponential backoff tracking - Flush attempts only execute if current time >= m_next_flush_time - Success threshold: flushed size > 10% of target flush size - On success: reset delay to zero for immediate future flushes - On failure: double delay (1μs → 2μs → 4μs → ... → 1s max) Fixes #539 --- src/dbzero/core/memory/CacheRecycler.cpp | 27 +++++++++++++++++++----- src/dbzero/core/memory/CacheRecycler.hpp | 19 +++++++++++------ 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/dbzero/core/memory/CacheRecycler.cpp b/src/dbzero/core/memory/CacheRecycler.cpp index c58e1702..ea7c88ca 100644 --- a/src/dbzero/core/memory/CacheRecycler.cpp +++ b/src/dbzero/core/memory/CacheRecycler.cpp @@ -129,11 +129,28 @@ namespace db0 } m_current_size[priority] += lock_size; if (getCurrentSize() > m_capacity) { - // try reducing cache utilization to capacity minus flush size - auto flush_size = std::min(m_capacity >> 1, m_flush_size); - updateSize(lock, m_capacity - flush_size); - flushed = true; - flush_result = m_current_size[priority] <= (m_capacity - flush_size); + auto now = std::chrono::high_resolution_clock::now(); + if (now >= m_next_flush_time) { + // try reducing cache utilization to capacity minus flush size + auto flush_size = std::min(m_capacity >> 1, m_flush_size); + auto size_before_flush = getCurrentSize(); + updateSize(lock, m_capacity - flush_size); + flushed = true; + flush_result = m_current_size[priority] <= (m_capacity - flush_size); + // Update backoff state based on flush result(need to flush more than 10 % of flush size) + if ((size_before_flush - getCurrentSize()) > (flush_size/10)) { + // Success: reset delay + m_current_flush_delay = std::chrono::nanoseconds{0}; + m_next_flush_time = std::chrono::high_resolution_clock::time_point{}; + } else { + // Failure: apply exponential backoff + // adding +1 to avoid condition for zero delay + auto new_delay = std::min(m_current_flush_delay.count() * 2 + 1 , MAX_FLUSH_DELAY_NS); + m_current_flush_delay = std::chrono::nanoseconds{new_delay}; + now = std::chrono::high_resolution_clock::now(); + m_next_flush_time = now + m_current_flush_delay; + } + } } // resize is a costly operation but cannot be avoided if the number of locked // resources exceeds the assumed limit diff --git a/src/dbzero/core/memory/CacheRecycler.hpp b/src/dbzero/core/memory/CacheRecycler.hpp index a31ee650..7ea405ec 100644 --- a/src/dbzero/core/memory/CacheRecycler.hpp +++ b/src/dbzero/core/memory/CacheRecycler.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -21,6 +22,8 @@ namespace db0 { public: static constexpr std::size_t DEFAULT_FLUSH_SIZE = 256u << 20; + static constexpr std::int64_t INITIAL_FLUSH_DELAY_NS = 1'000; // 1us + static constexpr std::int64_t MAX_FLUSH_DELAY_NS = 1'000'000'000; // 1 second /** * Holds resource locks and recycles based on LRU policy @@ -90,13 +93,15 @@ namespace db0 // number of locks to be flushed at once std::size_t m_flush_size; mutable std::mutex m_mutex; - std::function m_flush_dirty; - std::function m_flush_callback; - std::pair m_last_flush_callback_result = {true, false}; - - void resize(std::unique_lock &, std::size_t new_size, int priority); - - /** + std::function m_flush_dirty; + std::function m_flush_callback; + std::pair m_last_flush_callback_result = {true, false}; + + // Flush rate limiting + std::chrono::high_resolution_clock::time_point m_next_flush_time{}; + std::chrono::nanoseconds m_current_flush_delay{0}; + + void resize(std::unique_lock &, std::size_t new_size, int priority); /** * Adjusts cache size after updates, collect locks to unlock (can be unlocked off main thread) * @param released_locks locks to be released * @param release_size total number of bytes to be released From b64699cfc9d664c2ebe7a06ec48cedfec3f1fa83 Mon Sep 17 00:00:00 2001 From: Adrian Zawadzki Date: Sat, 6 Dec 2025 20:39:37 +0100 Subject: [PATCH 2/3] refactor(CacheRecycler): Extract flush logic into dedicated _flush method Extracted cache eviction and backoff logic from CacheRecycler::update() into a new private _flush() helper method to improve code maintainability and reduce method complexity. --- src/dbzero/core/memory/CacheRecycler.cpp | 52 +++++++++++++----------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/src/dbzero/core/memory/CacheRecycler.cpp b/src/dbzero/core/memory/CacheRecycler.cpp index ea7c88ca..d9f118be 100644 --- a/src/dbzero/core/memory/CacheRecycler.cpp +++ b/src/dbzero/core/memory/CacheRecycler.cpp @@ -129,28 +129,9 @@ namespace db0 } m_current_size[priority] += lock_size; if (getCurrentSize() > m_capacity) { - auto now = std::chrono::high_resolution_clock::now(); - if (now >= m_next_flush_time) { - // try reducing cache utilization to capacity minus flush size - auto flush_size = std::min(m_capacity >> 1, m_flush_size); - auto size_before_flush = getCurrentSize(); - updateSize(lock, m_capacity - flush_size); - flushed = true; - flush_result = m_current_size[priority] <= (m_capacity - flush_size); - // Update backoff state based on flush result(need to flush more than 10 % of flush size) - if ((size_before_flush - getCurrentSize()) > (flush_size/10)) { - // Success: reset delay - m_current_flush_delay = std::chrono::nanoseconds{0}; - m_next_flush_time = std::chrono::high_resolution_clock::time_point{}; - } else { - // Failure: apply exponential backoff - // adding +1 to avoid condition for zero delay - auto new_delay = std::min(m_current_flush_delay.count() * 2 + 1 , MAX_FLUSH_DELAY_NS); - m_current_flush_delay = std::chrono::nanoseconds{new_delay}; - now = std::chrono::high_resolution_clock::now(); - m_next_flush_time = now + m_current_flush_delay; - } - } + auto flush_returned_values = _flush(lock, priority); + flushed = flush_returned_values.first; + flush_result = flush_returned_values.second; } // resize is a costly operation but cannot be avoided if the number of locked // resources exceeds the assumed limit @@ -264,5 +245,30 @@ namespace db0 std::unique_lock lock(m_mutex); return { m_current_size[0], m_current_size[1] }; } - + + std::pair CacheRecycler::_flush(std::unique_lock &lock, int priority) + { + auto now = std::chrono::high_resolution_clock::now(); + if (now >= m_next_flush_time) { + // try reducing cache utilization to capacity minus flush size + auto flush_size = std::min(m_capacity >> 1, m_flush_size); + auto size_before_flush = getCurrentSize(); + updateSize(lock, m_capacity - flush_size); + // Update backoff state based on flush result(need to flush more than 10 % of flush size) + if ((size_before_flush - getCurrentSize()) > (flush_size/10)) { + // Success: reset delay + m_current_flush_delay = std::chrono::nanoseconds{0}; + m_next_flush_time = std::chrono::high_resolution_clock::time_point{}; + } else { + // Failure: apply exponential backoff + // adding +1 to avoid condition for zero delay + auto new_delay = std::min(m_current_flush_delay.count() * 2 + 1 , MAX_FLUSH_DELAY_NS); + m_current_flush_delay = std::chrono::nanoseconds{new_delay}; + now = std::chrono::high_resolution_clock::now(); + m_next_flush_time = now + m_current_flush_delay; + } + return { true, m_current_size[priority] <= (m_capacity - flush_size) }; + } + return { false, false }; + } } \ No newline at end of file From a72637112c6c11beb407a5ca60b27ce9d766a7b1 Mon Sep 17 00:00:00 2001 From: Adrian Zawadzki Date: Sat, 6 Dec 2025 21:10:17 +0100 Subject: [PATCH 3/3] fix(CacheRecycler): added modified header --- src/dbzero/core/memory/CacheRecycler.hpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/dbzero/core/memory/CacheRecycler.hpp b/src/dbzero/core/memory/CacheRecycler.hpp index 7ea405ec..84faab7f 100644 --- a/src/dbzero/core/memory/CacheRecycler.hpp +++ b/src/dbzero/core/memory/CacheRecycler.hpp @@ -93,15 +93,16 @@ namespace db0 // number of locks to be flushed at once std::size_t m_flush_size; mutable std::mutex m_mutex; - std::function m_flush_dirty; - std::function m_flush_callback; - std::pair m_last_flush_callback_result = {true, false}; - - // Flush rate limiting - std::chrono::high_resolution_clock::time_point m_next_flush_time{}; - std::chrono::nanoseconds m_current_flush_delay{0}; - - void resize(std::unique_lock &, std::size_t new_size, int priority); /** + std::function m_flush_dirty; + std::function m_flush_callback; + std::pair m_last_flush_callback_result = {true, false}; + + // Flush rate limiting + std::chrono::high_resolution_clock::time_point m_next_flush_time{}; + std::chrono::nanoseconds m_current_flush_delay{0}; + + void resize(std::unique_lock &, std::size_t new_size, int priority); + /** * Adjusts cache size after updates, collect locks to unlock (can be unlocked off main thread) * @param released_locks locks to be released * @param release_size total number of bytes to be released @@ -116,6 +117,8 @@ namespace db0 inline std::size_t getCurrentSize() const { return m_current_size[0] + m_current_size[1]; } + + std::pair _flush(std::unique_lock &, int priority); }; } \ No newline at end of file