diff --git a/bedrock/base/cache.py b/bedrock/base/cache.py index acddcb389c5..dbd20a7d710 100644 --- a/bedrock/base/cache.py +++ b/bedrock/base/cache.py @@ -2,10 +2,16 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. +import logging + +from django.conf import settings +from django.core.cache import caches from django.core.cache.backends.locmem import DEFAULT_TIMEOUT, LocMemCache from bedrock.base import metrics +logger = logging.getLogger(__name__) + class SimpleDictCache(LocMemCache): """A local memory cache that doesn't pickle values. @@ -70,3 +76,94 @@ def incr(self, key, delta=1, version=None): with self._lock: self._cache[key] = new_value return new_value + + +# === HYBRID CACHE BEHAVIOUR === +# +# Our "hybrid cache" approach uses SimpleDictCache as a local, read-through +# cache on the pod, which falls back to get values from a distributed +# DB-backed cache. +# The DB-backed cache is NOT a read-through cache and only has its values +# set explicitly. + +local_cache = caches["default"] # This is the SimpleDictCache +db_cache = caches["db"] + + +def get_from_hybrid_cache(key, default=None): + """ + Retrieve a value from the hybrid cache. First checks local cache, then falls + back to DB cache. + + If found in DB cache, the value is added to local cache for faster subsequent + access. + + This can be called from any code, because it does not require write access to + the DB. + + :param key: The cache key to retrieve. + :param default: Default value to return if the key is not found in either cache. + + :return: The cached value, or the default if the key is not found. + """ + # Check local cache + value = local_cache.get(key) + if value is not None: + return value + + # Check DB cache and if it has a value, pop it into + # the local cache en route to returning the value + value = db_cache.get(key) + if value is not None: + local_cache.set( + key, + value, + timeout=settings.CACHE_TIME_SHORT, + ) + return value + + return default + + +def set_in_hybrid_cache( + key, + value, + db_cache_timeout=None, + locmem_cache_timeout=settings.CACHE_TIME_SHORT, +): + """ + Set a value in the hybrid cache. + + Writes to both the local cache and the DB cache. + + IMPORTANT: this should only be called from somewhere with DB-write access - + i.e. the CMS deployment pod - if it is called from a Web deployment pod, it + will only set the local-memory cache and also log an exception, because + there will be unpredictable results if you're trying to cache + something that should be available across pods -- and if you're not you + should just use the regular 'default' local-memory cache directly. + + :param key: The cache key to set. + :param value: The value to cache. + :param timeout: Timeout for DB cache in seconds (local cache will use a shorter timeout by default). + """ + # Set in DB cache first, with the provided optional timeout. + # In settings we have a timeout of None, so it will never expire + # but still can be replaced (via this helper function) + + try: + db_cache.set( + key, + value, + timeout=db_cache_timeout, + ) + except Exception as ex: + # Cope with the DB cache not being available - eg + logger.exception(f"Could not set value in DB-backed cache: {ex}") + + # Set in local cache with a short timeout + local_cache.set( + key, + value, + timeout=locmem_cache_timeout, + ) diff --git a/bedrock/base/tests/test_hybrid_cache_helpers.py b/bedrock/base/tests/test_hybrid_cache_helpers.py new file mode 100644 index 00000000000..aa7f0386559 --- /dev/null +++ b/bedrock/base/tests/test_hybrid_cache_helpers.py @@ -0,0 +1,137 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +from django.conf import settings +from django.core.cache import caches + +import pytest + +from bedrock.base.cache import get_from_hybrid_cache, set_in_hybrid_cache + +pytestmark = [ + pytest.mark.django_db, +] + +local_cache = caches["default"] +db_cache = caches["db"] + + +@pytest.fixture(autouse=True) +def clear_caches(): + local_cache.clear() + db_cache.clear() + + +def test_hybrid_cache_get(): + key = "test_key" + value = "test_value" + + local_cache.set( + key, + value, + timeout=settings.CACHE_TIME_SHORT, + ) + db_cache.set( + key, + value, + timeout=settings.CACHE_TIME_SHORT, + ) + + # Test getting from local cache directly + assert get_from_hybrid_cache(key) == value + + # Test falling back to db cache and populating local cache + local_cache.clear() + assert local_cache.get(key) is None + assert db_cache.get(key) == value + + assert get_from_hybrid_cache(key) == value + assert local_cache.get(key) == value + + +def test_hybrid_cache_get_no_values_in_local_or_db_cache(): + key = "test_key" + + assert local_cache.get(key) is None + assert db_cache.get(key) is None + assert get_from_hybrid_cache(key) is None + + +def test_hybrid_cache_get__default_value(): + # Test getting default value when key is not found + assert ( + get_from_hybrid_cache( + "non_existent_key", + default="default_value", + ) + == "default_value" + ) + + +def test_hybrid_cache_set(): + key = "test_key" + value = "test_value" + set_in_hybrid_cache(key, value) + + assert local_cache.get(key) == value + assert db_cache.get(key) == value + + +def test_set_in_hybrid_cache_db_cache_failure(caplog, mocker): + key = "test_key_db_failure" + value = "test_value_db_failure" + timeout = 60 + + mocker.patch.object( + db_cache, + "set", + side_effect=Exception("Faked DB cache failure"), + ) + + set_in_hybrid_cache(key, value, timeout) + + assert local_cache.get(key) == value + assert db_cache.get(key) is None + + assert caplog.records[0].msg == "Could not set value in DB-backed cache: Faked DB cache failure" + + +def test_set_in_hybrid_cache_default_timeouts(mocker): + key = "test_key" + value = "test_value" + + mock_db_set = mocker.patch.object(caches["db"], "set") + mock_local_set = mocker.patch.object(caches["default"], "set") + + set_in_hybrid_cache(key, value) + + mock_db_set.assert_called_once_with(key, value, timeout=None) + mock_local_set.assert_called_once_with(key, value, timeout=settings.CACHE_TIME_SHORT) + + +def test_set_in_hybrid_cache_custom_db_cache_timeout(mocker): + key = "test_key" + value = "test_value" + custom_db_cache_timeout = 120 + + mock_db_set = mocker.patch.object(caches["db"], "set") + mock_local_set = mocker.patch.object(caches["default"], "set") + set_in_hybrid_cache(key, value, db_cache_timeout=custom_db_cache_timeout) + + mock_db_set.assert_called_once_with(key, value, timeout=custom_db_cache_timeout) + mock_local_set.assert_called_once_with(key, value, timeout=settings.CACHE_TIME_SHORT) + + +def test_set_in_hybrid_cache_custom_locmem_cache_timeout(mocker): + key = "test_key" + value = "test_value" + custom_locmem_cache_timeout = 42 + + mock_db_set = mocker.patch.object(caches["db"], "set") + mock_local_set = mocker.patch.object(caches["default"], "set") + + set_in_hybrid_cache(key, value, locmem_cache_timeout=custom_locmem_cache_timeout) + + mock_db_set.assert_called_once_with(key, value, timeout=None) + mock_local_set.assert_called_once_with(key, value, timeout=custom_locmem_cache_timeout) diff --git a/bedrock/settings/base.py b/bedrock/settings/base.py index 510c1399b61..55489a7753c 100644 --- a/bedrock/settings/base.py +++ b/bedrock/settings/base.py @@ -89,7 +89,7 @@ def data_path(*args): CACHE_TIME_SHORT = 60 * 10 # 10 mins CACHE_TIME_MED = 60 * 60 # 1 hour CACHE_TIME_LONG = 60 * 60 * 6 # 6 hours - +CACHE_TIME_VERY_VERY_LONG = 60 * 60 * 24 * 365 # 1 year CACHES = { "default": { @@ -101,6 +101,13 @@ def data_path(*args): "CULL_FREQUENCY": 4, # 1/4 entries deleted if max reached }, }, + "db": { + # Intended for use as a slower – but distributed - cache + # See bedrock.base.cache.get_from_hybrid_cache and set_in_hybrid_cache + "LOCATION": "hybrid_cache_db_table", # name of DB table to be used - must be pre-created once with manage.py createcachetable + "BACKEND": "django.core.cache.backends.db.DatabaseCache", + "TIMEOUT": CACHE_TIME_VERY_VERY_LONG, + }, } # Logging diff --git a/bin/export-db-to-sqlite.sh b/bin/export-db-to-sqlite.sh index a29a1288960..7468087744c 100755 --- a/bin/export-db-to-sqlite.sh +++ b/bin/export-db-to-sqlite.sh @@ -196,6 +196,11 @@ PROD_DETAILS_STORAGE=product_details.storage.PDFileStorage \ check_status_and_handle_failure "Running Django migrations" +PROD_DETAILS_STORAGE=product_details.storage.PDFileStorage \ + python manage.py createcachetable || all_well=false + +check_status_and_handle_failure "Creating cache table migrations" + # We want to use all the data from the JSON, so let's drop the rows # that have been automatically populated during migrate, including all the Wagtail # ones, except for wagtailsearch's tables because there's a virtual table that