From fd6026cbaf543e9cc7f7383ec65c006e7d252eb7 Mon Sep 17 00:00:00 2001 From: Steve Jalim Date: Tue, 14 Jan 2025 12:35:39 +0000 Subject: [PATCH 1/6] Add helpers to support a 'hybrid cache' option that uses locmem + DB cache In order to balance the need for a distributed cache with the speed of a local-memory cache, we've come up with a couple of helper functions that wrap the following behaviour: Getting values: * If it's in the local-memory cache, return that immediately. * If it's not, fall back to the DB cache, and if the key exists there, return that, cacheing it in local memory again on the way through * If the local memory cache and DB cache both miss, just return the default value for the helper function Setting values: * Set the value in the local memory cache and DB cache at (almost) the same time * If the DB cache is not reachable (eg the DB is a read-only replica), log this loudly, as it's a sign the helper has not been used appropriately, but still set the local-memory version for now, to prevent total failure. IMPORTANT: before this can be used in production, we need to create the cache table in the database with ./manage.py createcachetable AFTER this code has been deployed. This sounds a bit chicken-and-egg but we hopefully can do it via direct DB connection in the worst case. --- bedrock/base/cache.py | 92 +++++++++++++++++ .../base/tests/test_hybrid_cache_helpers.py | 98 +++++++++++++++++++ bedrock/settings/base.py | 8 +- 3 files changed, 197 insertions(+), 1 deletion(-) create mode 100644 bedrock/base/tests/test_hybrid_cache_helpers.py diff --git a/bedrock/base/cache.py b/bedrock/base/cache.py index acddcb389c5..a7b64e20c03 100644 --- a/bedrock/base/cache.py +++ b/bedrock/base/cache.py @@ -2,10 +2,16 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. +import logging + +from django.conf import settings +from django.core.cache import caches from django.core.cache.backends.locmem import DEFAULT_TIMEOUT, LocMemCache from bedrock.base import metrics +logger = logging.getLogger(__name__) + class SimpleDictCache(LocMemCache): """A local memory cache that doesn't pickle values. @@ -70,3 +76,89 @@ def incr(self, key, delta=1, version=None): with self._lock: self._cache[key] = new_value return new_value + + +# === HYBRID CACHE BEHAVIOUR === +# +# Our "hybrid cache" approach uses SimpleDictCache as a local, read-through +# cache on the pod, which falls back to get values from a distributed +# DB-backed cache. +# The DB-backed cache is NOT a read-through cache and only has its values +# set explicitly. + +local_cache = caches["default"] # This is the SimpleDictCache +db_cache = caches["db"] + + +def get_from_hybrid_cache(key, default=None): + """ + Retrieve a value from the hybrid cache. First checks local cache, then falls + back to DB cache. + + If found in DB cache, the value is added to local cache for faster subsequent + access. + + This can be called from any code, because it does not require write access to + the DB. + + :param key: The cache key to retrieve. + :param default: Default value to return if the key is not found in either cache. + + :return: The cached value, or the default if the key is not found. + """ + # Check local cache + value = local_cache.get(key) + if value is not None: + return value + + # Check DB cache and if it has a value, pop it into + # the local cache en route to returning the value + value = db_cache.get(key) + if value is not None: + local_cache.set( + key, + value, + timeout=settings.CACHE_TIME_SHORT, + ) + return value + + return default + + +def set_in_hybrid_cache(key, value, timeout=None): + """ + Set a value in the hybrid cache. + + Writes to both the local cache and the DB cache. + + IMPORTANT: this should only be called from somewhere with DB-write access - + i.e. the CMS deployment pod – if it is called from a Web deployment pod, it + will only set the local-memory cache and also log an exception, because + there will be unpredictable results if you're tryint to cache cache + something that should be available across pods -- and if you're not you + should just use the regular 'default' local-memory cache directly. + + :param key: The cache key to set. + :param value: The value to cache. + :param timeout: Timeout for DB cache in seconds (local cache will use a shorter timeout by default). + """ + # Set in DB cache first, with the provided optional timeout. + # In settings we have a timeout of None, so it will never expire + # but still can be replaced (via this helper function) + + try: + db_cache.set( + key, + value, + timeout=timeout, + ) + except Exception as ex: + # Cope with the DB cache not being available - eg + logger.exception(f"Could not set value in DB-backed cache: {ex}") + + # Set in local cache with a short timeout + local_cache.set( + key, + value, + timeout=settings.CACHE_TIME_SHORT, + ) diff --git a/bedrock/base/tests/test_hybrid_cache_helpers.py b/bedrock/base/tests/test_hybrid_cache_helpers.py new file mode 100644 index 00000000000..3641bfc6617 --- /dev/null +++ b/bedrock/base/tests/test_hybrid_cache_helpers.py @@ -0,0 +1,98 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +from django.conf import settings +from django.core.cache import caches + +import pytest + +from bedrock.base.cache import get_from_hybrid_cache, set_in_hybrid_cache + +pytestmark = [ + pytest.mark.django_db, +] + +local_cache = caches["default"] +db_cache = caches["db"] + + +@pytest.fixture(autouse=True) +def clear_caches(): + print("REMOVE PRINT STATEMENT") + local_cache.clear() + db_cache.clear() + + +def test_hybrid_cache_get(): + key = "test_key" + value = "test_value" + + local_cache.set( + key, + value, + timeout=settings.CACHE_TIME_SHORT, + ) + db_cache.set( + key, + value, + timeout=settings.CACHE_TIME_SHORT, + ) + + # Test getting from local cache directly + assert get_from_hybrid_cache(key) == value + + # Test falling back to db cache and populating local cache + local_cache.clear() + assert local_cache.get(key) is None + assert db_cache.get(key) == value + + assert get_from_hybrid_cache(key) == value + assert local_cache.get(key) == value + + +def test_hybrid_cache_get_no_values_in_local_or_db_cache(): + key = "test_key" + + assert local_cache.get(key) is None + assert db_cache.get(key) is None + assert get_from_hybrid_cache(key) is None + + +def test_hybrid_cache_get__default_value(): + # Test getting default value when key is not found + assert ( + get_from_hybrid_cache( + "non_existent_key", + default="default_value", + ) + == "default_value" + ) + + +def test_hybrid_cache_set(): + key = "test_key" + value = "test_value" + set_in_hybrid_cache(key, value) + + assert local_cache.get(key) == value + assert db_cache.get(key) == value + + +def test_set_in_hybrid_cache_db_cache_failure(caplog, mocker): + key = "test_key_db_failure" + value = "test_value_db_failure" + timeout = 60 + + mocker.patch.object( + db_cache, + "set", + side_effect=Exception("Faked DB cache failure"), + ) + + set_in_hybrid_cache(key, value, timeout) + + assert local_cache.get(key) == value + assert db_cache.get(key) is None + + assert caplog.records[0].msg == "Could not set value in DB-backed cache: Faked DB cache failure" diff --git a/bedrock/settings/base.py b/bedrock/settings/base.py index 510c1399b61..b622ee5055d 100644 --- a/bedrock/settings/base.py +++ b/bedrock/settings/base.py @@ -90,7 +90,6 @@ def data_path(*args): CACHE_TIME_MED = 60 * 60 # 1 hour CACHE_TIME_LONG = 60 * 60 * 6 # 6 hours - CACHES = { "default": { "BACKEND": "bedrock.base.cache.SimpleDictCache", @@ -101,6 +100,13 @@ def data_path(*args): "CULL_FREQUENCY": 4, # 1/4 entries deleted if max reached }, }, + "db": { + # Intended for use as a slower – but distributed - cache + # See bedrock.base.cache.get_from_hybrid_cache and set_in_hybrid_cache + "LOCATION": "hybrid_cache_db_table", # name of DB table to be used + "BACKEND": "django.core.cache.backends.db.DatabaseCache", + "TIMEOUT": None, # cached items will not expire + }, } # Logging From 9d295b299b99f4c8c9b60f06d63852f646c7e6df Mon Sep 17 00:00:00 2001 From: Steve Jalim Date: Tue, 14 Jan 2025 14:55:34 +0000 Subject: [PATCH 2/6] Add support to the sqlite export command to boostrap the DB cache table, to keep demos and other sqlite-powered things happy --- bin/export-db-to-sqlite.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bin/export-db-to-sqlite.sh b/bin/export-db-to-sqlite.sh index a29a1288960..7468087744c 100755 --- a/bin/export-db-to-sqlite.sh +++ b/bin/export-db-to-sqlite.sh @@ -196,6 +196,11 @@ PROD_DETAILS_STORAGE=product_details.storage.PDFileStorage \ check_status_and_handle_failure "Running Django migrations" +PROD_DETAILS_STORAGE=product_details.storage.PDFileStorage \ + python manage.py createcachetable || all_well=false + +check_status_and_handle_failure "Creating cache table migrations" + # We want to use all the data from the JSON, so let's drop the rows # that have been automatically populated during migrate, including all the Wagtail # ones, except for wagtailsearch's tables because there's a virtual table that From f247a5d5d3a6a4f13acb12534340467ab026d729 Mon Sep 17 00:00:00 2001 From: Steve Jalim Date: Wed, 15 Jan 2025 14:53:03 +0000 Subject: [PATCH 3/6] Remove debugging print statement --- bedrock/base/tests/test_hybrid_cache_helpers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bedrock/base/tests/test_hybrid_cache_helpers.py b/bedrock/base/tests/test_hybrid_cache_helpers.py index 3641bfc6617..cbe9a724286 100644 --- a/bedrock/base/tests/test_hybrid_cache_helpers.py +++ b/bedrock/base/tests/test_hybrid_cache_helpers.py @@ -19,7 +19,6 @@ @pytest.fixture(autouse=True) def clear_caches(): - print("REMOVE PRINT STATEMENT") local_cache.clear() db_cache.clear() From 8838dc60dca7cf7536b5dd6c75bebd60253c9174 Mon Sep 17 00:00:00 2001 From: Steve Jalim Date: Wed, 15 Jan 2025 14:53:21 +0000 Subject: [PATCH 4/6] Comment fixups --- bedrock/base/cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bedrock/base/cache.py b/bedrock/base/cache.py index a7b64e20c03..138aa4dda28 100644 --- a/bedrock/base/cache.py +++ b/bedrock/base/cache.py @@ -132,9 +132,9 @@ def set_in_hybrid_cache(key, value, timeout=None): Writes to both the local cache and the DB cache. IMPORTANT: this should only be called from somewhere with DB-write access - - i.e. the CMS deployment pod – if it is called from a Web deployment pod, it + i.e. the CMS deployment pod - if it is called from a Web deployment pod, it will only set the local-memory cache and also log an exception, because - there will be unpredictable results if you're tryint to cache cache + there will be unpredictable results if you're trying to cache something that should be available across pods -- and if you're not you should just use the regular 'default' local-memory cache directly. From 42c700f765be97b2257ac8eb9ff2f8c61f65670a Mon Sep 17 00:00:00 2001 From: Steve Jalim Date: Wed, 15 Jan 2025 15:10:44 +0000 Subject: [PATCH 5/6] Improve configurability of hybrid cache timeouts + add relevant tests --- bedrock/base/cache.py | 11 +++-- .../base/tests/test_hybrid_cache_helpers.py | 40 +++++++++++++++++++ 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/bedrock/base/cache.py b/bedrock/base/cache.py index 138aa4dda28..dbd20a7d710 100644 --- a/bedrock/base/cache.py +++ b/bedrock/base/cache.py @@ -125,7 +125,12 @@ def get_from_hybrid_cache(key, default=None): return default -def set_in_hybrid_cache(key, value, timeout=None): +def set_in_hybrid_cache( + key, + value, + db_cache_timeout=None, + locmem_cache_timeout=settings.CACHE_TIME_SHORT, +): """ Set a value in the hybrid cache. @@ -150,7 +155,7 @@ def set_in_hybrid_cache(key, value, timeout=None): db_cache.set( key, value, - timeout=timeout, + timeout=db_cache_timeout, ) except Exception as ex: # Cope with the DB cache not being available - eg @@ -160,5 +165,5 @@ def set_in_hybrid_cache(key, value, timeout=None): local_cache.set( key, value, - timeout=settings.CACHE_TIME_SHORT, + timeout=locmem_cache_timeout, ) diff --git a/bedrock/base/tests/test_hybrid_cache_helpers.py b/bedrock/base/tests/test_hybrid_cache_helpers.py index cbe9a724286..aa7f0386559 100644 --- a/bedrock/base/tests/test_hybrid_cache_helpers.py +++ b/bedrock/base/tests/test_hybrid_cache_helpers.py @@ -95,3 +95,43 @@ def test_set_in_hybrid_cache_db_cache_failure(caplog, mocker): assert db_cache.get(key) is None assert caplog.records[0].msg == "Could not set value in DB-backed cache: Faked DB cache failure" + + +def test_set_in_hybrid_cache_default_timeouts(mocker): + key = "test_key" + value = "test_value" + + mock_db_set = mocker.patch.object(caches["db"], "set") + mock_local_set = mocker.patch.object(caches["default"], "set") + + set_in_hybrid_cache(key, value) + + mock_db_set.assert_called_once_with(key, value, timeout=None) + mock_local_set.assert_called_once_with(key, value, timeout=settings.CACHE_TIME_SHORT) + + +def test_set_in_hybrid_cache_custom_db_cache_timeout(mocker): + key = "test_key" + value = "test_value" + custom_db_cache_timeout = 120 + + mock_db_set = mocker.patch.object(caches["db"], "set") + mock_local_set = mocker.patch.object(caches["default"], "set") + set_in_hybrid_cache(key, value, db_cache_timeout=custom_db_cache_timeout) + + mock_db_set.assert_called_once_with(key, value, timeout=custom_db_cache_timeout) + mock_local_set.assert_called_once_with(key, value, timeout=settings.CACHE_TIME_SHORT) + + +def test_set_in_hybrid_cache_custom_locmem_cache_timeout(mocker): + key = "test_key" + value = "test_value" + custom_locmem_cache_timeout = 42 + + mock_db_set = mocker.patch.object(caches["db"], "set") + mock_local_set = mocker.patch.object(caches["default"], "set") + + set_in_hybrid_cache(key, value, locmem_cache_timeout=custom_locmem_cache_timeout) + + mock_db_set.assert_called_once_with(key, value, timeout=None) + mock_local_set.assert_called_once_with(key, value, timeout=custom_locmem_cache_timeout) From b00eec201c83ac032b0d87959b80de37be21e8ce Mon Sep 17 00:00:00 2001 From: Steve Jalim Date: Wed, 15 Jan 2025 22:41:42 +0000 Subject: [PATCH 6/6] Set a very long default cache time for the DB-backed cache, which can still be set to None/no-expiry when needed --- bedrock/settings/base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bedrock/settings/base.py b/bedrock/settings/base.py index b622ee5055d..55489a7753c 100644 --- a/bedrock/settings/base.py +++ b/bedrock/settings/base.py @@ -89,6 +89,7 @@ def data_path(*args): CACHE_TIME_SHORT = 60 * 10 # 10 mins CACHE_TIME_MED = 60 * 60 # 1 hour CACHE_TIME_LONG = 60 * 60 * 6 # 6 hours +CACHE_TIME_VERY_VERY_LONG = 60 * 60 * 24 * 365 # 1 year CACHES = { "default": { @@ -103,9 +104,9 @@ def data_path(*args): "db": { # Intended for use as a slower – but distributed - cache # See bedrock.base.cache.get_from_hybrid_cache and set_in_hybrid_cache - "LOCATION": "hybrid_cache_db_table", # name of DB table to be used + "LOCATION": "hybrid_cache_db_table", # name of DB table to be used - must be pre-created once with manage.py createcachetable "BACKEND": "django.core.cache.backends.db.DatabaseCache", - "TIMEOUT": None, # cached items will not expire + "TIMEOUT": CACHE_TIME_VERY_VERY_LONG, }, }