diff --git a/AUTHORS b/AUTHORS index a9403842b54e..8fc5e0378961 100644 --- a/AUTHORS +++ b/AUTHORS @@ -325,6 +325,7 @@ answer newbie questions, and generally made Django that much better: dusk@woofle.net Dustyn Gibson Ed Morley + eevelweezel Egidijus Macijauskas eibaan@gmail.com elky diff --git a/django/core/cache/backends/db.py b/django/core/cache/backends/db.py index 8245e4f22525..5dc494652efe 100644 --- a/django/core/cache/backends/db.py +++ b/django/core/cache/backends/db.py @@ -2,6 +2,7 @@ import base64 import pickle +import random from datetime import UTC, datetime from django.conf import settings @@ -29,10 +30,47 @@ def __init__(self, table): self.swapped = False +class CullHandler: + """ + Random number selection with replacement doesn't ensure a proportional + outcome; with that approach, the cull could occur 0-3 times for an n of + 5. Random selection without replacement ensures the cull occurs once + every n queries. + Randomization is necessary because with a deterministic approach, an + application running in 5 containers will try to cull the cache n times + for every n * 5 queries. + """ + + def __init__(self, n): + self.n = n + self.elements = [] + self.set() + + def set(self): + elements = list(range(1, self.n + 1)) + random.shuffle(elements) + self.elements = elements + + def check(self): + if self.n == 1: + return True + n = self.elements.pop() + if len(self.elements) < 1: + self.set() + return n == self.n + + class BaseDatabaseCache(BaseCache): def __init__(self, table, params): super().__init__(params) self._table = table + options = params.get("OPTIONS", {}) + try: + cull_n = int(options.get("CULL_EVERY_N", 1)) + except (ValueError, TypeError): + cull_n = 1 + + self._cull_handler = CullHandler(n=cull_n) class CacheEntry: _meta = Options(table) @@ -118,8 +156,6 @@ def _base_set(self, mode, key, value, timeout=DEFAULT_TIMEOUT): table = quote_name(self._table) with connection.cursor() as cursor: - cursor.execute("SELECT COUNT(*) FROM %s" % table) - num = cursor.fetchone()[0] now = tz_now() now = now.replace(microsecond=0) if timeout is None: @@ -128,8 +164,11 @@ def _base_set(self, mode, key, value, timeout=DEFAULT_TIMEOUT): tz = UTC if settings.USE_TZ else None exp = datetime.fromtimestamp(timeout, tz=tz) exp = exp.replace(microsecond=0) - if num > self._max_entries: - self._cull(db, cursor, now, num) + if self._cull_handler.check(): + cursor.execute("SELECT COUNT(*) FROM %s" % table) + num = cursor.fetchone()[0] + if num > self._max_entries: + self._cull(db, cursor, now, num) pickled = pickle.dumps(value, self.pickle_protocol) # The DB column is expecting a string, so make sure the value is a # string, not bytes. Refs #19274. diff --git a/docs/releases/6.1.txt b/docs/releases/6.1.txt index f9fb779ff35e..18df2944900c 100644 --- a/docs/releases/6.1.txt +++ b/docs/releases/6.1.txt @@ -217,6 +217,39 @@ Minor features * :class:`~django.contrib.sessions.backends.base.SessionBase` now supports boolean evaluation via :meth:`~django.contrib.sessions.backends.base.SessionBase.__bool__`. +* ... + +:mod:`django.contrib.sitemaps` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ... + +:mod:`django.contrib.sites` +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ... + +:mod:`django.contrib.staticfiles` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ... + +:mod:`django.contrib.syndication` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ... + +Asynchronous views +~~~~~~~~~~~~~~~~~~ + +* ... + +Cache +~~~~~ + +* Subclasses of ``BaseDatabaseCache`` now support culling entries every ``N`` + cache operations as an optimization. To use this feature, pass the + ``CULL_EVERY_N`` argument. CSP ~~~ diff --git a/docs/topics/cache.txt b/docs/topics/cache.txt index 37384c3b8aed..9fa60a7d8e80 100644 --- a/docs/topics/cache.txt +++ b/docs/topics/cache.txt @@ -252,9 +252,17 @@ In this example, the cache table's name is ``my_cache_table``:: } } + Unlike other cache backends, the database cache does not support automatic culling of expired entries at the database level. Instead, expired cache -entries are culled each time ``add()``, ``set()``, or ``touch()`` is called. +entries are culled when an ``add()``, ``set()``, or ``touch()`` is called. + +.. versionadded:: 6.1 + + Since the cull operation can be expensive for a large cache, you may + control how often this check occurs by setting ``CULL_EVERY_N`` to an + integer value. The cull will occur every ``N`` requests, ``1`` is the + default. .. _database-caching-creating-the-table: @@ -499,6 +507,12 @@ behavior. These arguments are provided as additional keys in the On some backends (``database`` in particular) this makes culling *much* faster at the expense of more cache misses. + .. versionadded:: 6.1 + + The database backend supports an option ``CULL_EVERY_N`` that determines + how often expired entries are culled. The cull operation will occur every + ``N`` requests, this argument should be an integer and defaults to ``1``. + The Memcached and Redis backends pass the contents of :setting:`OPTIONS ` as keyword arguments to the client constructors, allowing for more advanced control of client behavior. For example usage, see below. diff --git a/tests/cache/tests.py b/tests/cache/tests.py index 65ca88512584..6eb99a723103 100644 --- a/tests/cache/tests.py +++ b/tests/cache/tests.py @@ -27,6 +27,7 @@ caches, ) from django.core.cache.backends.base import BaseCache, InvalidCacheBackendError +from django.core.cache.backends.db import CullHandler from django.core.cache.backends.redis import RedisCacheClient from django.core.cache.utils import make_template_fragment_key from django.db import close_old_connections, connection, connections @@ -1309,6 +1310,45 @@ def test_cull_queries(self): if "cache_key" in sql: self.assertIn(connection.ops.quote_name("cache_key"), sql) + def test_db_cull_optimized_off(self): + # Check for expired entries every request. + old_max_entries = cache._max_entries + cache._max_entries = -1 + with mock.patch.object(cache, "_cull") as mocked: + try: + cache.set("key_foo", "foo") + finally: + cache._max_entries = old_max_entries + mocked.assert_called_once() + + def test_db_cull_optimized_on(self): + # Only check for expired entries every n requests. + old_cull_handler = cache._cull_handler + old_max_entries = cache._max_entries + cache._max_entries = -1 + cache._cull_handler = CullHandler(2) + cache._cull_handler.elements = [2] + with mock.patch.object(cache, "_cull") as mocked: + try: + cache.set("key_foo", "foo") + finally: + cache._max_entries = old_max_entries + cache._cull_handler = old_cull_handler + mocked.assert_called_once() + + def test_no_query_without_check(self): + # No COUNT query should occur if the cull check is False. + old_cull_handler = cache._cull_handler + cache._cull_handler = CullHandler(2) + cache._cull_handler.elements = [9, 9] + with CaptureQueriesContext(connection) as captured_queries: + try: + cache.set("shouldnt_cull", "value") + finally: + cache._cull_handler = old_cull_handler + num_count_queries = sum("COUNT" in query["sql"] for query in captured_queries) + self.assertEqual(num_count_queries, 0) + def test_delete_cursor_rowcount(self): """ The rowcount attribute should not be checked on a closed cursor.