Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ answer newbie questions, and generally made Django that much better:
dusk@woofle.net
Dustyn Gibson <miigotu@gmail.com>
Ed Morley <https://github.com/edmorley>
eevelweezel <eevel.weezel@gmail.com>
Egidijus Macijauskas <e.macijauskas@outlook.com>
eibaan@gmail.com
elky <http://elky.me/>
Expand Down
47 changes: 43 additions & 4 deletions django/core/cache/backends/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import base64
import pickle
import random
from datetime import UTC, datetime

from django.conf import settings
Expand Down Expand Up @@ -29,10 +30,47 @@ def __init__(self, table):
self.swapped = False


class CullHandler:
"""
Random number selection with replacement doesn't ensure a proportional
outcome; with that approach, the cull could occur 0-3 times for an n of
5. Random selection without replacement ensures the cull occurs once
every n queries.
Randomization is necessary because with a deterministic approach, an
application running in 5 containers will try to cull the cache n times
for every n * 5 queries.
"""

def __init__(self, n):
self.n = n
self.elements = []
self.set()

def set(self):
elements = list(range(1, self.n + 1))
random.shuffle(elements)
self.elements = elements

def check(self):
if self.n == 1:
return True
n = self.elements.pop()
if len(self.elements) < 1:
self.set()
return n == self.n


class BaseDatabaseCache(BaseCache):
def __init__(self, table, params):
super().__init__(params)
self._table = table
options = params.get("OPTIONS", {})
try:
cull_n = int(options.get("CULL_EVERY_N", 1))
except (ValueError, TypeError):
cull_n = 1

self._cull_handler = CullHandler(n=cull_n)

class CacheEntry:
_meta = Options(table)
Expand Down Expand Up @@ -118,8 +156,6 @@ def _base_set(self, mode, key, value, timeout=DEFAULT_TIMEOUT):
table = quote_name(self._table)

with connection.cursor() as cursor:
cursor.execute("SELECT COUNT(*) FROM %s" % table)
num = cursor.fetchone()[0]
now = tz_now()
now = now.replace(microsecond=0)
if timeout is None:
Expand All @@ -128,8 +164,11 @@ def _base_set(self, mode, key, value, timeout=DEFAULT_TIMEOUT):
tz = UTC if settings.USE_TZ else None
exp = datetime.fromtimestamp(timeout, tz=tz)
exp = exp.replace(microsecond=0)
if num > self._max_entries:
self._cull(db, cursor, now, num)
if self._cull_handler.check():
cursor.execute("SELECT COUNT(*) FROM %s" % table)
num = cursor.fetchone()[0]
if num > self._max_entries:
self._cull(db, cursor, now, num)
pickled = pickle.dumps(value, self.pickle_protocol)
# The DB column is expecting a string, so make sure the value is a
# string, not bytes. Refs #19274.
Expand Down
33 changes: 33 additions & 0 deletions docs/releases/6.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,39 @@ Minor features
* :class:`~django.contrib.sessions.backends.base.SessionBase` now supports
boolean evaluation via
:meth:`~django.contrib.sessions.backends.base.SessionBase.__bool__`.
* ...

:mod:`django.contrib.sitemaps`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

* ...

:mod:`django.contrib.sites`
~~~~~~~~~~~~~~~~~~~~~~~~~~~

* ...

:mod:`django.contrib.staticfiles`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

* ...

:mod:`django.contrib.syndication`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

* ...

Asynchronous views
~~~~~~~~~~~~~~~~~~

* ...

Cache
~~~~~

* Subclasses of ``BaseDatabaseCache`` now support culling entries every ``N``
cache operations as an optimization. To use this feature, pass the
``CULL_EVERY_N`` argument.

CSP
~~~
Expand Down
16 changes: 15 additions & 1 deletion docs/topics/cache.txt
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,17 @@ In this example, the cache table's name is ``my_cache_table``::
}
}


Unlike other cache backends, the database cache does not support automatic
culling of expired entries at the database level. Instead, expired cache
entries are culled each time ``add()``, ``set()``, or ``touch()`` is called.
entries are culled when an ``add()``, ``set()``, or ``touch()`` is called.

.. versionadded:: 6.1

Since the cull operation can be expensive for a large cache, you may
control how often this check occurs by setting ``CULL_EVERY_N`` to an
integer value. The cull will occur every ``N`` requests, ``1`` is the
default.

.. _database-caching-creating-the-table:

Expand Down Expand Up @@ -499,6 +507,12 @@ behavior. These arguments are provided as additional keys in the
On some backends (``database`` in particular) this makes culling *much*
faster at the expense of more cache misses.

.. versionadded:: 6.1

The database backend supports an option ``CULL_EVERY_N`` that determines
how often expired entries are culled. The cull operation will occur every
``N`` requests, this argument should be an integer and defaults to ``1``.

The Memcached and Redis backends pass the contents of :setting:`OPTIONS
<CACHES-OPTIONS>` as keyword arguments to the client constructors, allowing
for more advanced control of client behavior. For example usage, see below.
Expand Down
40 changes: 40 additions & 0 deletions tests/cache/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
caches,
)
from django.core.cache.backends.base import BaseCache, InvalidCacheBackendError
from django.core.cache.backends.db import CullHandler
from django.core.cache.backends.redis import RedisCacheClient
from django.core.cache.utils import make_template_fragment_key
from django.db import close_old_connections, connection, connections
Expand Down Expand Up @@ -1309,6 +1310,45 @@ def test_cull_queries(self):
if "cache_key" in sql:
self.assertIn(connection.ops.quote_name("cache_key"), sql)

def test_db_cull_optimized_off(self):
# Check for expired entries every request.
old_max_entries = cache._max_entries
cache._max_entries = -1
with mock.patch.object(cache, "_cull") as mocked:
try:
cache.set("key_foo", "foo")
finally:
cache._max_entries = old_max_entries
mocked.assert_called_once()

def test_db_cull_optimized_on(self):
# Only check for expired entries every n requests.
old_cull_handler = cache._cull_handler
old_max_entries = cache._max_entries
cache._max_entries = -1
cache._cull_handler = CullHandler(2)
cache._cull_handler.elements = [2]
with mock.patch.object(cache, "_cull") as mocked:
try:
cache.set("key_foo", "foo")
finally:
cache._max_entries = old_max_entries
cache._cull_handler = old_cull_handler
mocked.assert_called_once()

def test_no_query_without_check(self):
# No COUNT query should occur if the cull check is False.
old_cull_handler = cache._cull_handler
cache._cull_handler = CullHandler(2)
cache._cull_handler.elements = [9, 9]
with CaptureQueriesContext(connection) as captured_queries:
try:
cache.set("shouldnt_cull", "value")
finally:
cache._cull_handler = old_cull_handler
num_count_queries = sum("COUNT" in query["sql"] for query in captured_queries)
self.assertEqual(num_count_queries, 0)

def test_delete_cursor_rowcount(self):
"""
The rowcount attribute should not be checked on a closed cursor.
Expand Down