From fbeaaf77afc9505e5083b11de0fbe007d2541f40 Mon Sep 17 00:00:00 2001 From: Yeshwanth Nagaraj Date: Tue, 5 May 2026 19:06:14 +0530 Subject: [PATCH 1/6] Mitigate CVE-2025-69872: HMAC-verified pickle envelope DiskCache previously called pickle.load on bytes read from the cache directory. An attacker with write access to that directory could replace the bytes with a malicious pickle and achieve arbitrary code execution in any victim process that subsequently read the cache (CVE-2025-69872 / GHSA-w8v5-vhqr-4h9v). This change wraps every pickle blob written by Disk in an HMAC-SHA256 envelope: b"DCv1" + HMAC_SHA256(key, payload) + payload and verifies it before pickle.load is allowed to interpret the bytes. Verification failures raise pickle.UnpicklingError instead of executing attacker-controlled code. Key resolution (lazy, only when the pickle path is actually used): 1. Explicit ``Cache(directory, disk_pickle_key=...)`` / ``Disk(directory, pickle_key=...)`` argument. 2. ``DISKCACHE_PICKLE_KEY`` environment variable (hex-encoded). 3. Auto-generated ``/.diskcache_pickle_key`` (mode 0600). Emits ``UnsafePickleWarning`` because an attacker who can read the cache directory can also forge the HMAC -- this fallback gives corruption detection and protection against write-only attackers, not the full advisory threat model. ``pickle_key=False`` is an explicit opt-out that disables HMAC verification (with ``UnsafePickleWarning``). It is required to read caches written by pre-fix versions; users should re-create such caches or pass an explicit key after migrating. Design constraints honored: * ``disk_pickle_key`` is constructor-only and never persisted to the Settings table -- avoids leaking secrets into the same compromised directory and prevents ``cache.reset`` from making ``pickle_key=False`` sticky across restarts. * Key resolution is lazy inside the new helpers, so ``JSONDisk`` and caches that only use primitive keys/values never trigger key file creation or warnings. * ``Cache.check()`` whitelists ``.diskcache_pickle_key`` so ``check(fix=True)`` does not delete it. * Strict by default: pre-fix caches without the envelope raise on read with a clear migration message. A new ``tests/test_security.py`` adds 20 tests covering round-trip, envelope presence, explicit/env/file key resolution, non-persistence in Settings, mismatched-key rejection, inline-blob/.val-file/key tampering detection, legacy mode round-trip and forward read, JSONDisk untouched, ``Cache.check`` whitelist, and primitive-only caches not creating a key file. All 179 existing tests continue to pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- diskcache/__init__.py | 2 + diskcache/core.py | 303 +++++++++++++++++++++++++++++- tests/test_security.py | 413 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 709 insertions(+), 9 deletions(-) create mode 100644 tests/test_security.py diff --git a/diskcache/__init__.py b/diskcache/__init__.py index 7757d66..976eb22 100644 --- a/diskcache/__init__.py +++ b/diskcache/__init__.py @@ -16,6 +16,7 @@ JSONDisk, Timeout, UnknownFileWarning, + UnsafePickleWarning, ) from .fanout import FanoutCache from .persistent import Deque, Index @@ -47,6 +48,7 @@ 'Timeout', 'UNKNOWN', 'UnknownFileWarning', + 'UnsafePickleWarning', 'barrier', 'memoize_stampede', 'throttle', diff --git a/diskcache/core.py b/diskcache/core.py index 7a3d23b..df9ee10 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -5,12 +5,15 @@ import contextlib as cl import errno import functools as ft +import hashlib +import hmac import io import json import os import os.path as op import pickle import pickletools +import secrets import sqlite3 import struct import tempfile @@ -39,6 +42,55 @@ def __repr__(self): ENOVAL = Constant('ENOVAL') UNKNOWN = Constant('UNKNOWN') +# --- CVE-2025-69872 / GHSA-w8v5-vhqr-4h9v mitigation ---------------------- +# +# Every pickle blob written by ``Disk`` is wrapped in an HMAC-SHA256 +# envelope so the bytes loaded from disk can be authenticated before +# ``pickle.load`` is allowed to interpret them. The envelope format is:: +# +# b"DCv1" + HMAC_SHA256(key, payload) + payload +# +# where ``payload`` is the original pickle byte string (post +# ``pickletools.optimize`` for keys, raw for values). Verification uses +# ``hmac.compare_digest`` to avoid timing leaks. +# +# The signing key is resolved lazily, only when a pickle path is actually +# exercised, in this order: +# +# 1. an explicit ``pickle_key=`` argument to :class:`Disk` / +# :class:`Cache` (bytes, ``bytearray``, hex ``str``, ``False`` to +# opt out of HMAC, or ``None`` to fall through to the next step); +# 2. the ``DISKCACHE_PICKLE_KEY`` environment variable (hex-encoded); +# 3. an auto-generated ``.diskcache_pickle_key`` file inside the +# cache directory (mode ``0o600``). The file fallback emits an +# :class:`UnsafePickleWarning` because an attacker who can read +# the cache directory can also read the key and forge the HMAC -- +# it provides corruption detection and defense against +# write-only attackers, not against the full advisory threat +# model. +# +# ``pickle_key=False`` disables HMAC verification entirely and emits an +# :class:`UnsafePickleWarning`. This is the explicit escape hatch for +# reading caches that pre-date this fix; it leaves the process exposed +# to the original CVE. +PICKLE_MAGIC = b'DCv1' +PICKLE_HMAC_SIZE = hashlib.sha256().digest_size # 32 +PICKLE_HEADER_SIZE = len(PICKLE_MAGIC) + PICKLE_HMAC_SIZE # 36 +PICKLE_KEY_FILENAME = '.diskcache_pickle_key' +PICKLE_KEY_ENV = 'DISKCACHE_PICKLE_KEY' +PICKLE_KEY_MIN_LEN = 16 +_PICKLE_KEY_UNSET = Constant('PICKLE_KEY_UNSET') + + +class UnsafePickleWarning(UserWarning): + """Warning emitted when DiskCache cannot enforce pickle HMAC verification. + + Triggered when the signing key is auto-generated inside the cache + directory (an attacker with read access can forge envelopes) or when + HMAC verification is explicitly disabled via ``pickle_key=False``. + See CVE-2025-69872 / GHSA-w8v5-vhqr-4h9v. + """ + MODE_NONE = 0 MODE_RAW = 1 MODE_BINARY = 2 @@ -103,17 +155,227 @@ def __repr__(self): class Disk: """Cache key and value serialization for SQLite database and files.""" - def __init__(self, directory, min_file_size=0, pickle_protocol=0): + def __init__(self, directory, min_file_size=0, pickle_protocol=0, + pickle_key=_PICKLE_KEY_UNSET): """Initialize disk instance. :param str directory: directory path :param int min_file_size: minimum size for file use :param int pickle_protocol: pickle protocol for serialization + :param pickle_key: HMAC key for pickle envelope verification + (CVE-2025-69872 mitigation). Accepts: + + * ``bytes`` / ``bytearray`` of >= 16 bytes -- used directly; + * a hex-encoded ``str`` of >= 32 hex chars -- decoded to bytes; + * ``False`` -- disable HMAC; emits :class:`UnsafePickleWarning` + and reads pre-fix caches as raw pickle (insecure); + * ``None`` (or omitted) -- resolve lazily from the + ``DISKCACHE_PICKLE_KEY`` environment variable, then from + ``/.diskcache_pickle_key`` (auto-generated with + an :class:`UnsafePickleWarning`). """ self._directory = directory self.min_file_size = min_file_size self.pickle_protocol = pickle_protocol + # Eagerly validate explicit (non-False, non-None) keys so bad + # input fails at construction time instead of on first use. + # ``False`` and ``None`` stay lazy so we don't emit warnings or + # touch the filesystem for caches that never exercise pickle. + if ( + pickle_key is not _PICKLE_KEY_UNSET + and pickle_key is not None + and pickle_key is not False + ): + pickle_key = self._coerce_pickle_key( + pickle_key, source='pickle_key argument' + ) + self._pickle_key_arg = pickle_key + self._pickle_key_resolved = None + self._pickle_key_warned = False + + # -- CVE-2025-69872 / GHSA-w8v5-vhqr-4h9v helpers ---------------------- + + def _resolve_pickle_key(self): + """Resolve and cache the pickle HMAC key. Lazy. + + Returns either a ``bytes`` key, or ``False`` when the user has + opted out of HMAC verification. Never returns ``None``. + """ + if self._pickle_key_resolved is not None: + return self._pickle_key_resolved + + arg = self._pickle_key_arg + + if arg is False: + if not self._pickle_key_warned: + warnings.warn( + 'DiskCache pickle HMAC verification is disabled ' + '(pickle_key=False). The cache directory must be ' + 'fully trusted; an attacker with write access can ' + 'achieve arbitrary code execution via ' + 'CVE-2025-69872.', + UnsafePickleWarning, + stacklevel=4, + ) + self._pickle_key_warned = True + self._pickle_key_resolved = False + return False + + if arg is not _PICKLE_KEY_UNSET and arg is not None: + key = self._coerce_pickle_key(arg, source='pickle_key argument') + self._pickle_key_resolved = key + return key + + env_value = os.environ.get(PICKLE_KEY_ENV) + if env_value: + key = self._coerce_pickle_key( + env_value, source='%s environment variable' % PICKLE_KEY_ENV + ) + self._pickle_key_resolved = key + return key + + key_path = op.join(self._directory, PICKLE_KEY_FILENAME) + key = self._read_or_create_pickle_key_file(key_path) + if not self._pickle_key_warned: + warnings.warn( + 'DiskCache auto-generated a pickle HMAC key at %r. This ' + 'detects accidental cache corruption but does NOT ' + 'protect against attackers with read access to the ' + 'cache directory (CVE-2025-69872). For stronger ' + 'protection, set the %s environment variable to a ' + 'hex-encoded random key (>= %d bytes), or pass ' + 'pickle_key=... explicitly.' + % (key_path, PICKLE_KEY_ENV, PICKLE_KEY_MIN_LEN), + UnsafePickleWarning, + stacklevel=4, + ) + self._pickle_key_warned = True + self._pickle_key_resolved = key + return key + + @staticmethod + def _coerce_pickle_key(value, source): + if isinstance(value, str): + try: + key = bytes.fromhex(value) + except ValueError: + raise ValueError( + '%s must be hex-encoded bytes' % source + ) from None + elif isinstance(value, (bytes, bytearray)): + key = bytes(value) + else: + raise TypeError( + '%s must be bytes, bytearray, hex str, False or None; ' + 'got %s' % (source, type(value).__name__) + ) + if len(key) < PICKLE_KEY_MIN_LEN: + raise ValueError( + '%s must be at least %d bytes (got %d)' + % (source, PICKLE_KEY_MIN_LEN, len(key)) + ) + return key + + def _read_or_create_pickle_key_file(self, path): + # Make sure the cache directory exists; Cache.__init__ usually + # already created it but Disk may be used standalone in tests. + directory = op.dirname(path) or '.' + if not op.isdir(directory): + os.makedirs(directory, 0o755) + + flags = os.O_CREAT | os.O_EXCL | os.O_WRONLY + if hasattr(os, 'O_BINARY'): + flags |= os.O_BINARY + try: + fd = os.open(path, flags, 0o600) + except FileExistsError: + with open(path, 'rb') as reader: + data = reader.read() + if len(data) < PICKLE_KEY_MIN_LEN: + raise RuntimeError( + 'DiskCache pickle key file %r is too short (%d < %d); ' + 'remove it to regenerate, or supply pickle_key=' + 'explicitly.' + % (path, len(data), PICKLE_KEY_MIN_LEN) + ) + return data + else: + try: + new_key = secrets.token_bytes(32) + os.write(fd, new_key) + finally: + os.close(fd) + with cl.suppress(OSError): + os.chmod(path, 0o600) + return new_key + + def _pickle_dump(self, obj, optimize=False): + """Pickle ``obj`` and wrap with HMAC envelope (or raw bytes if + HMAC has been disabled via ``pickle_key=False``). + """ + payload = pickle.dumps(obj, protocol=self.pickle_protocol) + if optimize: + payload = pickletools.optimize(payload) + key = self._resolve_pickle_key() + if key is False: + return payload + mac = hmac.new(key, payload, hashlib.sha256).digest() + return PICKLE_MAGIC + mac + payload + + def _pickle_load_bytes(self, data): + """Verify the HMAC envelope and unpickle ``data``.""" + if isinstance(data, sqlite3.Binary): + data = bytes(data) + elif isinstance(data, memoryview): + data = data.tobytes() + elif not isinstance(data, (bytes, bytearray)): + data = bytes(data) + + magic_len = len(PICKLE_MAGIC) + has_envelope = ( + len(data) >= PICKLE_HEADER_SIZE + and data[:magic_len] == PICKLE_MAGIC + ) + key = self._resolve_pickle_key() + + if has_envelope: + mac = data[magic_len:PICKLE_HEADER_SIZE] + payload = data[PICKLE_HEADER_SIZE:] + if key is False: + # Legacy mode: skip verification, but still strip the + # envelope so we unpickle the same bytes the writer + # intended. + return pickle.load(io.BytesIO(payload)) + expected = hmac.new(key, payload, hashlib.sha256).digest() + if not hmac.compare_digest(mac, expected): + raise pickle.UnpicklingError( + 'diskcache: pickle HMAC verification failed; cache ' + 'entry may be corrupt or tampered with ' + '(CVE-2025-69872).' + ) + return pickle.load(io.BytesIO(payload)) + + # No envelope present. + if key is False: + return pickle.load(io.BytesIO(data)) + raise pickle.UnpicklingError( + 'diskcache: pickle envelope missing; this cache entry ' + 'predates the CVE-2025-69872 mitigation. Pass ' + 'pickle_key=False to read legacy data (insecure), or ' + 'recreate the cache.' + ) + + def _pickle_load_file(self, fileobj): + """Read entire file and verify before unpickling. + + Note: large MODE_PICKLE values stored as separate ``.val`` files + are buffered in memory so the bytes verified by HMAC are exactly + the bytes fed to ``pickle.load`` (no TOCTOU window). + """ + return self._pickle_load_bytes(fileobj.read()) + + # -- end CVE mitigation helpers --------------------------------------- def hash(self, key): """Compute portable hash for `key`. @@ -158,9 +420,8 @@ def put(self, key): ): return key, True else: - data = pickle.dumps(key, protocol=self.pickle_protocol) - result = pickletools.optimize(data) - return sqlite3.Binary(result), False + data = self._pickle_dump(key, optimize=True) + return sqlite3.Binary(data), False def get(self, key, raw): """Convert fields `key` and `raw` from Cache table to key. @@ -174,7 +435,7 @@ def get(self, key, raw): if raw: return bytes(key) if type(key) is sqlite3.Binary else key else: - return pickle.load(io.BytesIO(key)) + return self._pickle_load_bytes(key) def store(self, value, read, key=UNKNOWN): """Convert `value` to fields size, mode, filename, and value for Cache @@ -218,7 +479,7 @@ def store(self, value, read, key=UNKNOWN): size = self._write(full_path, iterator, 'xb') return size, MODE_BINARY, filename, None else: - result = pickle.dumps(value, protocol=self.pickle_protocol) + result = self._pickle_dump(value) if len(result) < min_file_size: return 0, MODE_PICKLE, None, sqlite3.Binary(result) @@ -279,9 +540,9 @@ def fetch(self, mode, filename, value, read): elif mode == MODE_PICKLE: if value is None: with open(op.join(self._directory, filename), 'rb') as reader: - return pickle.load(reader) + return self._pickle_load_file(reader) else: - return pickle.load(io.BytesIO(value)) + return self._pickle_load_bytes(value) def filename(self, key=UNKNOWN, value=UNKNOWN): """Return filename and full-path tuple for file storage. @@ -423,7 +684,10 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): :param str directory: cache directory :param float timeout: SQLite connection timeout :param disk: Disk type or subclass for serialization - :param settings: any of DEFAULT_SETTINGS + :param settings: any of DEFAULT_SETTINGS, plus the optional + non-persistent ``disk_pickle_key`` argument used to verify + pickle envelopes (CVE-2025-69872). See :class:`Disk` for the + accepted values. """ try: @@ -431,6 +695,14 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): except (TypeError, AssertionError): raise ValueError('disk must subclass diskcache.Disk') from None + # CVE-2025-69872: ``disk_pickle_key`` is intentionally treated as + # constructor-only configuration. Persisting it in the Settings + # table would either leak the secret into the same compromised + # cache directory or let an attacker make ``pickle_key=False`` + # sticky across restarts via ``cache.reset``. Pop it before any + # of the regular settings plumbing runs. + pickle_key = settings.pop('disk_pickle_key', _PICKLE_KEY_UNSET) + if directory is None: directory = tempfile.mkdtemp(prefix='diskcache-') directory = str(directory) @@ -490,6 +762,13 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): for key, value in sets.items() if key.startswith('disk_') } + # Defensive: never source pickle_key from persisted Settings even + # if a stale row exists (it should not, because we never store + # it -- but Settings tables created by older patched builds or + # by hand could in principle contain one). + kwargs.pop('pickle_key', None) + if pickle_key is not _PICKLE_KEY_UNSET: + kwargs['pickle_key'] = pickle_key self._disk = disk(directory, **kwargs) # Set cached attributes: updates settings and sets pragmas. @@ -1970,6 +2249,12 @@ def check(self, fix=False, retry=False): if DBNAME in full_path: continue + if op.basename(full_path) == PICKLE_KEY_FILENAME: + # CVE-2025-69872: auto-generated HMAC key + # file lives at the root of the cache dir + # and is not tracked in the Cache table. + continue + message = 'unknown file: %s' % full_path warnings.warn(message, UnknownFileWarning) diff --git a/tests/test_security.py b/tests/test_security.py new file mode 100644 index 0000000..61b6d3a --- /dev/null +++ b/tests/test_security.py @@ -0,0 +1,413 @@ +"""Tests for the CVE-2025-69872 / GHSA-w8v5-vhqr-4h9v mitigation. + +These tests exercise the HMAC envelope wrapping every pickle blob written +by :class:`diskcache.Disk`. They live in their own module so the rest of +the suite continues to run with the default +:class:`UnsafePickleWarning` filter, while these tests can opt to either +suppress or assert that the warning is emitted. +""" + +import io +import os +import os.path as op +import pickle +import secrets +import shutil +import sqlite3 +import tempfile +import warnings + +import pytest + +import diskcache as dc +from diskcache.core import ( + PICKLE_HEADER_SIZE, + PICKLE_KEY_ENV, + PICKLE_KEY_FILENAME, + PICKLE_MAGIC, +) + + +pytestmark = pytest.mark.filterwarnings( + 'ignore', category=dc.EmptyDirWarning +) + + +@pytest.fixture +def tmp_cache_dir(): + path = tempfile.mkdtemp(prefix='diskcache-sec-') + try: + yield path + finally: + shutil.rmtree(path, ignore_errors=True) + + +@pytest.fixture +def clear_env(monkeypatch): + monkeypatch.delenv(PICKLE_KEY_ENV, raising=False) + + +# -- helpers ------------------------------------------------------------ + + +class _Pwn: + """Pickle payload that would execute code if naively unpickled.""" + + def __reduce__(self): + # Using ``print`` rather than ``os.system`` keeps the test + # self-contained and side-effect free if the protection ever + # regresses. + return (print, ('PWNED!',)) + + +def _force_evil_inline_pickle(directory, key): + """Replace the inline pickle blob for ``key`` with a malicious payload.""" + con = sqlite3.connect(op.join(directory, 'cache.db')) + try: + con.execute( + 'UPDATE Cache SET value = ?, mode = 4 WHERE key = ?', + (pickle.dumps(_Pwn()), key), + ) + con.commit() + finally: + con.close() + + +def _val_files(directory): + out = [] + for root, _, files in os.walk(directory): + for name in files: + if name.endswith('.val'): + out.append(op.join(root, name)) + return out + + +# -- round-trip & default behaviour ------------------------------------ + + +def test_default_roundtrip_emits_warning_and_creates_key_file( + tmp_cache_dir, clear_env +): + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.Cache(tmp_cache_dir) as cache: + cache[('tup', 'le')] = {'nested': [1, 2, 3]} + assert cache[('tup', 'le')] == {'nested': [1, 2, 3]} + cache.check() + + msgs = [w for w in caught if isinstance(w.message, dc.UnsafePickleWarning)] + assert msgs, 'expected UnsafePickleWarning for in-dir auto key' + assert PICKLE_KEY_FILENAME in str(msgs[0].message) + assert op.isfile(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)) + + +def test_envelope_header_is_present_in_db(tmp_cache_dir, clear_env): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + cache['k'] = {'value': 'object'} + + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + ((blob,),) = con.execute( + 'SELECT value FROM Cache WHERE key = ?', ('k',) + ).fetchall() + finally: + con.close() + + raw = bytes(blob) + assert raw.startswith(PICKLE_MAGIC) + assert len(raw) > PICKLE_HEADER_SIZE + + +# -- explicit key ------------------------------------------------------ + + +def test_explicit_key_no_keyfile_no_warning(tmp_cache_dir, clear_env): + key = secrets.token_bytes(32) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['x'] = [1, 2, 3] + assert cache['x'] == [1, 2, 3] + + assert not any( + isinstance(w.message, dc.UnsafePickleWarning) for w in caught + ) + assert not op.exists(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)) + + +def test_explicit_key_hex_string(tmp_cache_dir, clear_env): + key = secrets.token_bytes(32).hex() + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['x'] = ['hex-keyed'] + assert cache['x'] == ['hex-keyed'] + + +def test_explicit_key_too_short_raises(tmp_cache_dir, clear_env): + with pytest.raises(ValueError, match='at least'): + dc.Cache(tmp_cache_dir, disk_pickle_key=b'shortkey') + + +def test_explicit_key_wrong_type_raises(tmp_cache_dir, clear_env): + with pytest.raises(TypeError): + dc.Cache(tmp_cache_dir, disk_pickle_key=12345) + + +def test_explicit_key_not_persisted_in_settings(tmp_cache_dir, clear_env): + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['x'] = {'persist?': False} + + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + rows = con.execute( + 'SELECT key, value FROM Settings WHERE key LIKE ?', + ('%pickle_key%',), + ).fetchall() + finally: + con.close() + + assert rows == [], ( + 'disk_pickle_key must NOT be persisted in the Settings table' + ) + + +def test_mismatched_key_fails_to_decrypt(tmp_cache_dir, clear_env): + key1 = secrets.token_bytes(32) + key2 = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key1) as cache: + cache['k'] = {'secret': True} + with dc.Cache(tmp_cache_dir, disk_pickle_key=key2) as cache: + with pytest.raises(pickle.UnpicklingError): + cache['k'] + + +# -- env var ----------------------------------------------------------- + + +def test_env_var_used_when_no_explicit_key(tmp_cache_dir, monkeypatch): + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.Cache(tmp_cache_dir) as cache: + cache['k'] = {'env': True} + assert cache['k'] == {'env': True} + + assert not any( + isinstance(w.message, dc.UnsafePickleWarning) for w in caught + ) + assert not op.exists(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)) + + +def test_env_var_invalid_hex_raises(tmp_cache_dir, monkeypatch): + monkeypatch.setenv(PICKLE_KEY_ENV, 'not-hex-at-all') + with dc.Cache(tmp_cache_dir) as cache: + with pytest.raises(ValueError, match='hex'): + cache['k'] = {'x': 1} + + +# -- tamper detection -------------------------------------------------- + + +def test_tampered_inline_value_raises(tmp_cache_dir, clear_env): + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['k'] = {'foo': 'bar'} + + _force_evil_inline_pickle(tmp_cache_dir, 'k') + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(pickle.UnpicklingError): + cache['k'] + + +def test_bitflip_inline_value_raises(tmp_cache_dir, clear_env): + """Tampering that keeps the envelope header but corrupts the payload + must fail HMAC verification (not just the missing-envelope check).""" + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['k'] = {'foo': 'bar'} + + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + ((blob,),) = con.execute( + 'SELECT value FROM Cache WHERE key = ?', ('k',) + ).fetchall() + raw = bytearray(bytes(blob)) + # Flip a bit deep inside the payload (past the 36-byte header). + raw[-1] ^= 0x01 + con.execute( + 'UPDATE Cache SET value = ? WHERE key = ?', (bytes(raw), 'k') + ) + con.commit() + finally: + con.close() + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(pickle.UnpicklingError, match='HMAC'): + cache['k'] + + +def test_tampered_val_file_raises(tmp_cache_dir, clear_env): + key = secrets.token_bytes(32) + big_value = {'data': 'x' * (40 * 1024)} + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache( + tmp_cache_dir, disk_pickle_key=key, disk_min_file_size=1024 + ) as cache: + cache['big'] = big_value + + files = _val_files(tmp_cache_dir) + assert len(files) == 1, files + with open(files[0], 'wb') as writer: + writer.write(pickle.dumps(_Pwn())) + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(pickle.UnpicklingError): + cache['big'] + + +def test_tampered_pickled_key_via_iteration_raises(tmp_cache_dir, clear_env): + """Complex (pickled) keys are loaded during iteration / peekitem, so + tampering them must surface as UnpicklingError on those paths.""" + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache[('complex', 'key')] = 'value' + + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + rows = con.execute( + 'SELECT rowid, key FROM Cache WHERE raw = 0' + ).fetchall() + assert rows, 'expected at least one pickled key row' + rowid, _ = rows[0] + evil = pickle.dumps(_Pwn()) + con.execute( + 'UPDATE Cache SET key = ? WHERE rowid = ?', + (sqlite3.Binary(evil), rowid), + ) + con.commit() + finally: + con.close() + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(pickle.UnpicklingError): + list(cache) + + +# -- legacy mode ------------------------------------------------------- + + +def test_legacy_mode_emits_warning_and_works(tmp_cache_dir, clear_env): + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.Cache(tmp_cache_dir, disk_pickle_key=False) as cache: + cache['z'] = {'legacy': True} + assert cache['z'] == {'legacy': True} + + msgs = [w for w in caught if isinstance(w.message, dc.UnsafePickleWarning)] + assert msgs, 'pickle_key=False must emit UnsafePickleWarning' + assert 'disabled' in str(msgs[0].message).lower() + + +def test_secure_mode_rejects_legacy_data(tmp_cache_dir, clear_env): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=False) as cache: + cache['k'] = {'legacy': True} + with dc.Cache( + tmp_cache_dir, disk_pickle_key=secrets.token_bytes(32) + ) as cache: + with pytest.raises(pickle.UnpicklingError, match='envelope'): + cache['k'] + + +def test_legacy_mode_can_read_secure_data(tmp_cache_dir, clear_env): + """Legacy mode strips the envelope so it can read forward as well.""" + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['k'] = {'enveloped': True} + with dc.Cache(tmp_cache_dir, disk_pickle_key=False) as cache: + assert cache['k'] == {'enveloped': True} + + +# -- JSONDisk unaffected ------------------------------------------------ + + +def test_jsondisk_does_not_create_keyfile(tmp_cache_dir, clear_env): + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.Cache(tmp_cache_dir, disk=dc.JSONDisk) as cache: + cache['a'] = [1, 2, 3] + assert cache['a'] == [1, 2, 3] + + assert not any( + isinstance(w.message, dc.UnsafePickleWarning) for w in caught + ) + assert not op.exists(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)) + + +# -- Cache.check whitelist --------------------------------------------- + + +def test_check_does_not_warn_about_keyfile(tmp_cache_dir, clear_env): + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.Cache(tmp_cache_dir) as cache: + cache['k'] = {'foo': 'bar'} + warns = cache.check() + assert op.isfile(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)) + + unknown = [w for w in warns if isinstance(w.message, dc.UnknownFileWarning)] + assert unknown == [], ( + 'cache.check() must whitelist .diskcache_pickle_key, got %r' % unknown + ) + + unknown_top = [ + w for w in caught if isinstance(w.message, dc.UnknownFileWarning) + ] + assert unknown_top == [] + + +# -- primitive keys/values bypass pickle entirely ---------------------- + + +def test_primitive_only_does_not_create_keyfile(tmp_cache_dir, clear_env): + """Lazy resolution means a cache used only with primitive keys and + values never triggers HMAC key creation.""" + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.Cache(tmp_cache_dir) as cache: + cache['plain-str-key'] = 'plain-str-value' + cache[42] = 3.14 + cache[b'bytes-key'] = b'bytes-value' + assert cache['plain-str-key'] == 'plain-str-value' + + assert not any( + isinstance(w.message, dc.UnsafePickleWarning) for w in caught + ) + assert not op.exists(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)) From bacedee68a295ce307b16800d5349ab78e088233 Mon Sep 17 00:00:00 2001 From: Yeshwanth Nagaraj Date: Wed, 6 May 2026 12:54:14 +0530 Subject: [PATCH 2/6] Address 32-reviewer audit: path traversal, FanoutCache, races, more Follow-up to the CVE-2025-69872 mitigation that addresses findings from a 32-way parallel code review: CRITICAL fixes: * Block path traversal in ``Disk.fetch`` and ``Disk.remove`` (review C2 / r01,r16,r32). An attacker with write access to ``cache.db`` could previously set the ``filename`` column to ``../../../etc/passwd`` and have ``Disk.fetch`` ``open()`` arbitrary files outside the cache directory -- entirely bypassing HMAC verification because MODE_BINARY/MODE_TEXT do not unpickle. New helper ``Disk._safe_filename_path`` resolves the requested filename via ``op.realpath`` and refuses anything that escapes ``self._directory``. Confirmed via pen-test PoC by reviewer r32. * FanoutCache now resolves the pickle HMAC key once at the FanoutCache root and forwards it to every shard (review C3 / r09). Previously each of N shards (default 8) independently auto-generated its own ``.diskcache_pickle_key`` file. ``FanoutCache._hash`` uses shard 0's key for routing, but storage shards held different keys -- making sharding non-deterministic across processes (two FanoutCache instances on the same directory would route the same complex key to different shards) and emitting one warning per shard. HIGH fixes: * ``Cache.check`` whitelist for ``.diskcache_pickle_key`` is anchored to the cache directory root rather than basename-only (review H1 / r07). Previously an attacker could plant a file named ``.diskcache_pickle_key`` in any subdirectory to evade ``check(fix=True)`` cleanup. * ``Cache.__getstate__`` raises ``TypeError`` when the cache was configured with an explicit ``disk_pickle_key`` (or ``disk_pickle_key=False``) rather than silently dropping the secret on the floor and letting the receiving process regenerate a different key (review H2 / r08). Previously, sending a Cache to a multiprocessing worker silently corrupted reads. * Atomic key file creation with retry-on-empty-read (review H3 / r05, r20, r21, r22, r31). The previous ``O_CREAT|O_EXCL`` + later ``os.write`` left a race window in which a concurrent reader saw a zero-byte file and crashed with a false-positive "too short" error. The reader now retries with short backoff (5 attempts, ~31 ms total) to ride out the writer's ``os.write``. The writer also unlinks the half-baked file on ``OSError`` (M8) so disk-full failures don't permanently break subsequent processes. * ``os.makedirs`` in the key-file helper uses ``exist_ok=True`` to fix the TOCTOU race that crashed concurrent ``Disk`` initialisation (review H4 / r05, r21, r22). MEDIUM fixes: * Defensive pop of ``disk_pickle_key`` from rows loaded from the ``Settings`` table so a stale row (from an older patched build, hand-edit, or attacker injection) cannot flow into the Disk constructor (review M3 / r06). * ``_coerce_pickle_key`` ``TypeError`` no longer advertises ``False`` or ``None`` -- callers filter those before reaching the function (review M4 / r25). * Standardised message prefix to lowercase ``"diskcache:"`` across warnings and exceptions (review M5 / r18). INTERNAL refactor: * ``_coerce_pickle_key``, ``_read_or_create_pickle_key_file``, ``_resolve_pickle_key_for_directory``, and ``_emit_pickle_key_warning`` are now module-level functions so ``FanoutCache`` can resolve the key without instantiating a Disk. NOT FIXED (documented as known limitations): * Reviewer H5 (HMAC does not bind cache-key context, enabling row- swap / replay attacks) requires plumbing the cache key through ``Disk.fetch``'s public signature, which is a breaking change for custom Disk subclasses. Deferred. * Reviewer H6 (large MODE_PICKLE values now resident in memory for HMAC verification) is a documented tradeoff; a configurable ``disk_max_pickle_size`` ceiling is a possible future addition. * Reviewer M2 (cached resolved key never re-read from disk after key rotation) is documented; key rotation requires restarting all processes consuming the cache. * Reviewer C1 (legacy mode ``pickle_key=False`` strips and unpickles envelopes without verification) is intentional. The strip-and-load behaviour preserves a recovery path for caches written under a key the user has lost; in legacy mode the user has explicitly accepted unsafe pickle, and an attacker with write access can already RCE via raw pickle whether or not we strip the envelope. Test additions (18 new tests, all passing): * test_path_traversal_in_filename_raises * test_path_traversal_blocked_for_text_mode * test_remove_refuses_traversal * test_fanoutcache_propagates_explicit_key * test_fanoutcache_default_emits_one_warning * test_fanoutcache_two_instances_share_default_key * test_check_does_not_whitelist_subdirectory_keyfile * test_cache_pickling_with_explicit_key_raises * test_cache_pickling_with_legacy_mode_raises * test_cache_pickling_with_default_succeeds * test_disk_pickle_key_in_settings_does_not_leak * test_short_envelope_header_raises * test_zero_byte_value_raises * test_valid_hmac_invalid_pickle_raises * test_env_var_short_hex_raises * test_env_var_empty_raises * test_concurrent_key_file_creation_no_crash * test_typeerror_message_lists_only_real_types Test results: 179 existing + 38 new security = 217 passed. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- diskcache/core.py | 381 ++++++++++++++++++++++---------- diskcache/fanout.py | 40 +++- tests/test_security.py | 483 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 793 insertions(+), 111 deletions(-) diff --git a/diskcache/core.py b/diskcache/core.py index df9ee10..627dd23 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -91,6 +91,191 @@ class UnsafePickleWarning(UserWarning): See CVE-2025-69872 / GHSA-w8v5-vhqr-4h9v. """ + +# Sources reported by :func:`_resolve_pickle_key_for_directory`. +_PKEY_SRC_DISABLED = 'disabled' +_PKEY_SRC_EXPLICIT = 'explicit' +_PKEY_SRC_ENV = 'env' +_PKEY_SRC_FILE_CREATED = 'file_created' +_PKEY_SRC_FILE_EXISTING = 'file_existing' + + +def _coerce_pickle_key(value, source): + """Validate and coerce a user-supplied HMAC key value to ``bytes``. + + ``value`` must be ``bytes``/``bytearray`` or a hex-encoded ``str``. + ``False`` and ``None`` are filtered by callers and never reach this + function. + """ + if isinstance(value, str): + try: + key = bytes.fromhex(value) + except ValueError: + raise ValueError( + '%s must be hex-encoded bytes' % source + ) from None + elif isinstance(value, (bytes, bytearray)): + key = bytes(value) + else: + raise TypeError( + '%s must be bytes, bytearray, or hex str; got %s' + % (source, type(value).__name__) + ) + if len(key) < PICKLE_KEY_MIN_LEN: + raise ValueError( + '%s must be at least %d bytes (got %d)' + % (source, PICKLE_KEY_MIN_LEN, len(key)) + ) + return key + + +def _read_or_create_pickle_key_file(path): + """Atomically create or read the on-disk HMAC key file at ``path``. + + Returns ``(key_bytes, created)``. ``created`` is ``True`` when the + current process emerged as the writer in a multi-process race. + + Uses ``O_CREAT | O_EXCL`` so the first writer wins; concurrent + losers retry the read with a short backoff to ride out the race + window between the winner's ``os.open`` and ``os.write`` -- the + pre-fix code raised a false-positive "too short" error on every + loser and required manual recovery. + """ + directory = op.dirname(path) or '.' + os.makedirs(directory, 0o755, exist_ok=True) + + flags = os.O_CREAT | os.O_EXCL | os.O_WRONLY + if hasattr(os, 'O_BINARY'): + flags |= os.O_BINARY + + delay = 0.001 + last_short_len = -1 + for attempt in range(6): + try: + fd = os.open(path, flags, 0o600) + except FileExistsError: + try: + with open(path, 'rb') as reader: + data = reader.read() + except (FileNotFoundError, PermissionError): + # File vanished or was momentarily inaccessible. Try + # to (re)create on the next iteration. + time.sleep(delay) + delay *= 2 + continue + + if len(data) >= PICKLE_KEY_MIN_LEN: + return data, False + + # Race window: the winning writer hasn't completed its + # ``os.write`` yet (or it crashed mid-write). Retry. + last_short_len = len(data) + if attempt < 5: + time.sleep(delay) + delay *= 2 + continue + + raise RuntimeError( + 'diskcache: pickle key file %r is too short (%d < %d) ' + 'after %d retries; it may be corrupted -- remove it to ' + 'regenerate, or pass pickle_key= explicitly.' + % (path, last_short_len, PICKLE_KEY_MIN_LEN, attempt + 1) + ) + + # We won the race. Generate and write the key. + try: + new_key = secrets.token_bytes(32) + os.write(fd, new_key) + with cl.suppress(OSError): + os.fsync(fd) + except OSError: + # Disk full / write failure. Remove the half-baked file so + # subsequent processes don't get stuck on a permanent + # too-short error. + with cl.suppress(OSError): + os.close(fd) + with cl.suppress(OSError): + os.unlink(path) + raise + else: + os.close(fd) + + with cl.suppress(OSError): + os.chmod(path, 0o600) + return new_key, True + + # Defensive: the loop should always return or raise. + raise RuntimeError( + 'diskcache: failed to create pickle key file %r' % path + ) + + +def _resolve_pickle_key_for_directory(directory, arg): + """Resolve the pickle HMAC key for ``directory``. + + Returns ``(key, source)`` where ``key`` is ``bytes`` or ``False`` + and ``source`` is one of the ``_PKEY_SRC_*`` constants. This is + the single source of truth shared by :class:`Disk` and + :class:`~diskcache.FanoutCache` so a fanout cache's shards all use + the same key (otherwise sharding becomes non-deterministic). + """ + if arg is False: + return False, _PKEY_SRC_DISABLED + + if arg is not _PICKLE_KEY_UNSET and arg is not None: + return ( + _coerce_pickle_key(arg, source='pickle_key argument'), + _PKEY_SRC_EXPLICIT, + ) + + env_value = os.environ.get(PICKLE_KEY_ENV) + if env_value: + return ( + _coerce_pickle_key( + env_value, + source='%s environment variable' % PICKLE_KEY_ENV, + ), + _PKEY_SRC_ENV, + ) + + key_path = op.join(directory, PICKLE_KEY_FILENAME) + key, created = _read_or_create_pickle_key_file(key_path) + return key, _PKEY_SRC_FILE_CREATED if created else _PKEY_SRC_FILE_EXISTING + + +def _emit_pickle_key_warning(source, directory, stacklevel): + """Emit the appropriate :class:`UnsafePickleWarning` for ``source``. + + Returns ``True`` if a warning was emitted, ``False`` otherwise (so + the caller can flip its "already warned" flag accurately). + """ + if source == _PKEY_SRC_DISABLED: + warnings.warn( + 'diskcache: pickle HMAC verification is disabled ' + '(pickle_key=False). The cache directory must be fully ' + 'trusted; an attacker with write access can achieve ' + 'arbitrary code execution via CVE-2025-69872.', + UnsafePickleWarning, + stacklevel=stacklevel, + ) + return True + if source in (_PKEY_SRC_FILE_CREATED, _PKEY_SRC_FILE_EXISTING): + key_path = op.join(directory, PICKLE_KEY_FILENAME) + warnings.warn( + 'diskcache: using pickle HMAC key from %r (CVE-2025-69872 ' + 'default fallback). This detects accidental cache ' + 'corruption but does NOT protect against attackers with ' + 'read access to the cache directory. For stronger ' + 'protection set the %s environment variable to a hex-' + 'encoded random key (>= %d bytes), or pass pickle_key=... ' + 'explicitly.' + % (key_path, PICKLE_KEY_ENV, PICKLE_KEY_MIN_LEN), + UnsafePickleWarning, + stacklevel=stacklevel, + ) + return True + return False + MODE_NONE = 0 MODE_RAW = 1 MODE_BINARY = 2 @@ -187,7 +372,7 @@ def __init__(self, directory, min_file_size=0, pickle_protocol=0, and pickle_key is not None and pickle_key is not False ): - pickle_key = self._coerce_pickle_key( + pickle_key = _coerce_pickle_key( pickle_key, source='pickle_key argument' ) self._pickle_key_arg = pickle_key @@ -205,111 +390,16 @@ def _resolve_pickle_key(self): if self._pickle_key_resolved is not None: return self._pickle_key_resolved - arg = self._pickle_key_arg - - if arg is False: - if not self._pickle_key_warned: - warnings.warn( - 'DiskCache pickle HMAC verification is disabled ' - '(pickle_key=False). The cache directory must be ' - 'fully trusted; an attacker with write access can ' - 'achieve arbitrary code execution via ' - 'CVE-2025-69872.', - UnsafePickleWarning, - stacklevel=4, - ) - self._pickle_key_warned = True - self._pickle_key_resolved = False - return False - - if arg is not _PICKLE_KEY_UNSET and arg is not None: - key = self._coerce_pickle_key(arg, source='pickle_key argument') - self._pickle_key_resolved = key - return key - - env_value = os.environ.get(PICKLE_KEY_ENV) - if env_value: - key = self._coerce_pickle_key( - env_value, source='%s environment variable' % PICKLE_KEY_ENV - ) - self._pickle_key_resolved = key - return key - - key_path = op.join(self._directory, PICKLE_KEY_FILENAME) - key = self._read_or_create_pickle_key_file(key_path) - if not self._pickle_key_warned: - warnings.warn( - 'DiskCache auto-generated a pickle HMAC key at %r. This ' - 'detects accidental cache corruption but does NOT ' - 'protect against attackers with read access to the ' - 'cache directory (CVE-2025-69872). For stronger ' - 'protection, set the %s environment variable to a ' - 'hex-encoded random key (>= %d bytes), or pass ' - 'pickle_key=... explicitly.' - % (key_path, PICKLE_KEY_ENV, PICKLE_KEY_MIN_LEN), - UnsafePickleWarning, - stacklevel=4, - ) + key, source = _resolve_pickle_key_for_directory( + self._directory, self._pickle_key_arg + ) + if not self._pickle_key_warned and _emit_pickle_key_warning( + source, self._directory, stacklevel=4 + ): self._pickle_key_warned = True self._pickle_key_resolved = key return key - @staticmethod - def _coerce_pickle_key(value, source): - if isinstance(value, str): - try: - key = bytes.fromhex(value) - except ValueError: - raise ValueError( - '%s must be hex-encoded bytes' % source - ) from None - elif isinstance(value, (bytes, bytearray)): - key = bytes(value) - else: - raise TypeError( - '%s must be bytes, bytearray, hex str, False or None; ' - 'got %s' % (source, type(value).__name__) - ) - if len(key) < PICKLE_KEY_MIN_LEN: - raise ValueError( - '%s must be at least %d bytes (got %d)' - % (source, PICKLE_KEY_MIN_LEN, len(key)) - ) - return key - - def _read_or_create_pickle_key_file(self, path): - # Make sure the cache directory exists; Cache.__init__ usually - # already created it but Disk may be used standalone in tests. - directory = op.dirname(path) or '.' - if not op.isdir(directory): - os.makedirs(directory, 0o755) - - flags = os.O_CREAT | os.O_EXCL | os.O_WRONLY - if hasattr(os, 'O_BINARY'): - flags |= os.O_BINARY - try: - fd = os.open(path, flags, 0o600) - except FileExistsError: - with open(path, 'rb') as reader: - data = reader.read() - if len(data) < PICKLE_KEY_MIN_LEN: - raise RuntimeError( - 'DiskCache pickle key file %r is too short (%d < %d); ' - 'remove it to regenerate, or supply pickle_key=' - 'explicitly.' - % (path, len(data), PICKLE_KEY_MIN_LEN) - ) - return data - else: - try: - new_key = secrets.token_bytes(32) - os.write(fd, new_key) - finally: - os.close(fd) - with cl.suppress(OSError): - os.chmod(path, 0o600) - return new_key - def _pickle_dump(self, obj, optimize=False): """Pickle ``obj`` and wrap with HMAC envelope (or raw bytes if HMAC has been disabled via ``pickle_key=False``). @@ -512,6 +602,35 @@ def _write(self, full_path, iterator, mode, encoding=None): writer.write(chunk) return size + def _safe_filename_path(self, filename): + """Return a safe full path for a database-stored ``filename``. + + Cache filenames are generated by :meth:`filename` and consist + of a randomly generated hex subdirectory and basename. An + attacker with write access to ``cache.db`` could replace a + row's ``filename`` column with a path-traversal string (e.g. + ``../../../etc/passwd``) to coerce DiskCache into reading + arbitrary files outside the cache directory. This guard + refuses any ``filename`` whose resolved location is not + strictly inside ``self._directory``. + + :raises ValueError: if ``filename`` escapes the cache directory. + """ + if not isinstance(filename, str): + raise ValueError( + 'diskcache: cache filename must be str, got %s' + % type(filename).__name__ + ) + base = op.realpath(self._directory) + full = op.realpath(op.join(self._directory, filename)) + if full != base and not full.startswith(base + os.sep): + raise ValueError( + 'diskcache: cache filename %r escapes cache directory ' + '(refusing path traversal; cache.db may have been ' + 'tampered with).' % (filename,) + ) + return full + def fetch(self, mode, filename, value, read): """Convert fields `mode`, `filename`, and `value` from Cache table to value. @@ -527,19 +646,20 @@ def fetch(self, mode, filename, value, read): # pylint: disable=unidiomatic-typecheck,consider-using-with if mode == MODE_RAW: return bytes(value) if type(value) is sqlite3.Binary else value - elif mode == MODE_BINARY: + if filename is not None: + full_path = self._safe_filename_path(filename) + if mode == MODE_BINARY: if read: - return open(op.join(self._directory, filename), 'rb') + return open(full_path, 'rb') else: - with open(op.join(self._directory, filename), 'rb') as reader: + with open(full_path, 'rb') as reader: return reader.read() elif mode == MODE_TEXT: - full_path = op.join(self._directory, filename) with open(full_path, 'r', encoding='UTF-8') as reader: return reader.read() elif mode == MODE_PICKLE: if value is None: - with open(op.join(self._directory, filename), 'rb') as reader: + with open(full_path, 'rb') as reader: return self._pickle_load_file(reader) else: return self._pickle_load_bytes(value) @@ -579,7 +699,20 @@ def remove(self, file_path): :param str file_path: relative path to file """ - full_path = op.join(self._directory, file_path) + try: + full_path = self._safe_filename_path(file_path) + except ValueError: + # CVE-2025-69872: refuse to act on tampered filenames that + # escape the cache directory. Do NOT raise -- this is + # called from eviction/cull paths that should be tolerant. + warnings.warn( + 'diskcache: refusing to remove file %r (escapes cache ' + 'directory; cache.db may be tampered).' % (file_path,), + UnsafePickleWarning, + stacklevel=2, + ) + return + full_dir, _ = op.split(full_path) # Suppress OSError that may occur if two caches attempt to delete the @@ -736,6 +869,12 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): except sqlite3.OperationalError: current_settings = {} + # CVE-2025-69872: ``disk_pickle_key`` MUST be constructor-only. + # If a stale row was injected into Settings (by an older + # patched build, manual edit, or attacker), drop it so it can + # neither be re-persisted nor flow into the Disk constructor. + current_settings.pop('disk_pickle_key', None) + sets = DEFAULT_SETTINGS.copy() sets.update(current_settings) sets.update(settings) @@ -2249,10 +2388,15 @@ def check(self, fix=False, retry=False): if DBNAME in full_path: continue - if op.basename(full_path) == PICKLE_KEY_FILENAME: + if full_path == op.join( + self._directory, PICKLE_KEY_FILENAME + ): # CVE-2025-69872: auto-generated HMAC key # file lives at the root of the cache dir # and is not tracked in the Cache table. + # Anchor the comparison to the root so a + # tampered file with the same basename in + # a subdirectory is NOT silently preserved. continue message = 'unknown file: %s' % full_path @@ -2650,6 +2794,25 @@ def __len__(self): return self.reset('count') def __getstate__(self): + # CVE-2025-69872: an explicit pickle_key cannot survive + # Cache pickling because __getstate__ deliberately serializes + # only directory/timeout/disk-type (the secret is intentionally + # NOT placed in pickled state). Refuse rather than silently + # regenerate a different key in the receiving process and + # corrupt subsequent reads. ``pickle_key=False`` (legacy mode) + # is also refused because the receiver would default to secure + # mode and reject the same data. + disk = self._disk + arg = getattr(disk, '_pickle_key_arg', _PICKLE_KEY_UNSET) + if arg is not _PICKLE_KEY_UNSET and arg is not None: + raise TypeError( + 'diskcache: Cache instances configured with an explicit ' + 'disk_pickle_key (or disk_pickle_key=False) cannot be ' + 'pickled because the secret is not placed in pickle ' + 'state. Pickle the cache directory path instead and ' + 'reconstruct Cache(directory, disk_pickle_key=...) in ' + 'the receiving process. (CVE-2025-69872)' + ) return (self.directory, self.timeout, type(self.disk)) def __setstate__(self, state): diff --git a/diskcache/fanout.py b/diskcache/fanout.py index 9822ee4..58a993e 100644 --- a/diskcache/fanout.py +++ b/diskcache/fanout.py @@ -4,12 +4,22 @@ import functools import itertools as it import operator +import os import os.path as op import sqlite3 import tempfile import time -from .core import DEFAULT_SETTINGS, ENOVAL, Cache, Disk, Timeout +from .core import ( + DEFAULT_SETTINGS, + ENOVAL, + Cache, + Disk, + Timeout, + _emit_pickle_key_warning, + _PICKLE_KEY_UNSET, + _resolve_pickle_key_for_directory, +) from .persistent import Deque, Index @@ -25,7 +35,13 @@ def __init__( :param int shards: number of shards to distribute writes :param float timeout: SQLite connection timeout :param disk: `Disk` instance for serialization - :param settings: any of `DEFAULT_SETTINGS` + :param settings: any of `DEFAULT_SETTINGS`, plus the optional + non-persistent ``disk_pickle_key`` argument used to verify + pickle envelopes (CVE-2025-69872). See + :class:`diskcache.Disk` for accepted values. The key is + resolved once at the FanoutCache root and forwarded to all + shards so sharding stays deterministic across processes + and a single warning is emitted in default mode. """ if directory is None: @@ -37,6 +53,21 @@ def __init__( default_size_limit = DEFAULT_SETTINGS['size_limit'] size_limit = settings.pop('size_limit', default_size_limit) / shards + # CVE-2025-69872: resolve the pickle HMAC key once at the + # FanoutCache root. Without this, each shard would + # independently auto-generate its own ``.diskcache_pickle_key`` + # file -- making sharding non-deterministic across processes + # (FanoutCache._hash uses shard 0's key but storage may land in + # shard N which holds a different key) and emitting one + # warning per shard. + pickle_key_arg = settings.pop('disk_pickle_key', _PICKLE_KEY_UNSET) + if not op.isdir(directory): + os.makedirs(directory, 0o755, exist_ok=True) + resolved_key, source = _resolve_pickle_key_for_directory( + directory, pickle_key_arg + ) + _emit_pickle_key_warning(source, directory, stacklevel=3) + self._count = shards self._directory = directory self._disk = disk @@ -46,10 +77,15 @@ def __init__( timeout=timeout, disk=disk, size_limit=size_limit, + disk_pickle_key=resolved_key, **settings, ) for num in range(shards) ) + # Suppress per-shard warnings: the FanoutCache already emitted + # one above for the entire ensemble. + for shard in self._shards: + shard._disk._pickle_key_warned = True self._hash = self._shards[0].disk.hash self._caches = {} self._deques = {} diff --git a/tests/test_security.py b/tests/test_security.py index 61b6d3a..8610550 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -7,13 +7,17 @@ suppress or assert that the warning is emitted. """ +import hashlib +import hmac import io +import multiprocessing import os import os.path as op import pickle import secrets import shutil import sqlite3 +import sys import tempfile import warnings @@ -411,3 +415,482 @@ def test_primitive_only_does_not_create_keyfile(tmp_cache_dir, clear_env): isinstance(w.message, dc.UnsafePickleWarning) for w in caught ) assert not op.exists(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)) + + +# -- Review-pass-2: path traversal (C2) -------------------------------- + + +def _put_evil_filename(directory, key, evil_filename, mode=2): + """Replace the row's filename to point outside the cache dir.""" + con = sqlite3.connect(op.join(directory, 'cache.db')) + try: + con.execute( + 'UPDATE Cache SET filename = ?, mode = ?, value = NULL ' + 'WHERE key = ?', + (evil_filename, mode, key), + ) + con.commit() + finally: + con.close() + + +def test_path_traversal_in_filename_raises(tmp_cache_dir, clear_env): + """An attacker who tampers cache.db's ``filename`` column to point + outside the cache directory must not be able to coerce DiskCache + into reading arbitrary files (CVE-2025-69872 adjacent).""" + secret_dir = tempfile.mkdtemp(prefix='diskcache-secret-') + try: + secret_path = op.join(secret_dir, 'secret.txt') + with open(secret_path, 'wb') as fh: + fh.write(b'TOP SECRET') + + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache( + tmp_cache_dir, + disk_pickle_key=key, + disk_min_file_size=1, + ) as cache: + cache['k'] = b'x' * 64 + + rel = op.relpath(secret_path, tmp_cache_dir) + _put_evil_filename(tmp_cache_dir, 'k', rel, mode=2) + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(ValueError, match='escapes cache'): + cache['k'] + finally: + shutil.rmtree(secret_dir, ignore_errors=True) + + +def test_path_traversal_blocked_for_text_mode(tmp_cache_dir, clear_env): + secret_dir = tempfile.mkdtemp(prefix='diskcache-secret-') + try: + secret_path = op.join(secret_dir, 'secret.txt') + with open(secret_path, 'w', encoding='utf-8') as fh: + fh.write('TOP SECRET') + + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache( + tmp_cache_dir, + disk_pickle_key=key, + disk_min_file_size=1, + ) as cache: + cache['k'] = 'x' * 64 + + rel = op.relpath(secret_path, tmp_cache_dir) + _put_evil_filename(tmp_cache_dir, 'k', rel, mode=3) + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(ValueError, match='escapes cache'): + cache['k'] + finally: + shutil.rmtree(secret_dir, ignore_errors=True) + + +def test_remove_refuses_traversal(tmp_cache_dir, clear_env): + """Disk.remove must refuse path-traversal filenames silently + (eviction must be tolerant) but still emit an UnsafePickleWarning + so operators see the tampering.""" + sentinel_dir = tempfile.mkdtemp(prefix='diskcache-sentinel-') + try: + sentinel = op.join(sentinel_dir, 'do-not-delete.txt') + with open(sentinel, 'wb') as fh: + fh.write(b'KEEP ME') + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + disk = cache._disk + + rel = op.relpath(sentinel, tmp_cache_dir) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + disk.remove(rel) + + assert op.exists(sentinel), 'sentinel file must not be deleted' + assert any( + isinstance(w.message, dc.UnsafePickleWarning) + and 'refusing to remove' in str(w.message) + for w in caught + ) + finally: + shutil.rmtree(sentinel_dir, ignore_errors=True) + + +# -- Review-pass-2: FanoutCache (C3) ----------------------------------- + + +def test_fanoutcache_propagates_explicit_key(tmp_cache_dir, clear_env): + key = secrets.token_bytes(32) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.FanoutCache( + tmp_cache_dir, shards=4, disk_pickle_key=key + ) as cache: + cache[('complex', 'key')] = {'value': [1, 2, 3]} + assert cache[('complex', 'key')] == {'value': [1, 2, 3]} + + assert not any( + isinstance(w.message, dc.UnsafePickleWarning) for w in caught + ), 'explicit key must not emit UnsafePickleWarning' + + # No per-shard key file: the explicit key was forwarded. + for num in range(4): + shard_dir = op.join(tmp_cache_dir, '%03d' % num) + assert not op.exists(op.join(shard_dir, PICKLE_KEY_FILENAME)) + + +def test_fanoutcache_default_emits_one_warning(tmp_cache_dir, clear_env): + """With the default fallback, FanoutCache must emit ONE warning, + not one-per-shard, and create a single key file at the FanoutCache + root (not per-shard).""" + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.FanoutCache(tmp_cache_dir, shards=4) as cache: + cache[('a', 'b')] = {'val': 1} + cache[('c', 'd')] = {'val': 2} + cache[('e', 'f')] = {'val': 3} + assert cache[('a', 'b')] == {'val': 1} + assert cache[('c', 'd')] == {'val': 2} + + pkey_warnings = [ + w for w in caught if isinstance(w.message, dc.UnsafePickleWarning) + ] + assert len(pkey_warnings) == 1, ( + 'expected exactly one UnsafePickleWarning per FanoutCache, got %d' + % len(pkey_warnings) + ) + + # Root-level key file exists; no shard-level key files. + assert op.exists(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)) + for num in range(4): + shard_dir = op.join(tmp_cache_dir, '%03d' % num) + assert not op.exists(op.join(shard_dir, PICKLE_KEY_FILENAME)), ( + 'shard %d must not have its own key file' % num + ) + + +def test_fanoutcache_two_instances_share_default_key( + tmp_cache_dir, clear_env +): + """Two FanoutCache instances on the same directory must produce + consistent sharding and read each other's writes.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache(tmp_cache_dir, shards=4) as a: + a[('shared', 'tuple', 'key')] = {'from': 'a'} + with dc.FanoutCache(tmp_cache_dir, shards=4) as b: + assert b[('shared', 'tuple', 'key')] == {'from': 'a'} + + +# -- Review-pass-2: Cache.check root-only whitelist (H1) --------------- + + +def test_check_does_not_whitelist_subdirectory_keyfile( + tmp_cache_dir, clear_env +): + """Attacker plants ``.diskcache_pickle_key`` in a subdirectory; the + whitelist must not preserve it (basename-only would have). + + We assert the observable side effect: ``check(fix=True)`` deletes + the subdirectory decoy. (Pytest's warning capture interacts oddly + with ``check()``'s own ``warnings.catch_warnings(record=True)`` + block, so we don't rely on the returned warning list.) + """ + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_min_file_size=0) as cache: + cache['k'] = 'x' * 200 # forces .val file in subdirectory + + subdir = None + for entry in os.listdir(tmp_cache_dir): + full = op.join(tmp_cache_dir, entry) + if op.isdir(full): + subdir = full + break + assert subdir is not None, ( + 'expected subdir; got %r' % os.listdir(tmp_cache_dir) + ) + decoy = op.join(subdir, PICKLE_KEY_FILENAME) + with open(decoy, 'wb') as fh: + fh.write(b'attacker-controlled-bytes') + + assert op.exists(decoy) + cache.check(fix=True) + assert not op.exists(decoy), ( + 'check(fix=True) must delete the subdirectory decoy ' + '(basename-only whitelist would have preserved it)' + ) + + # Sanity: legitimate root-level key file (not present here + # since we used disk_min_file_size=0 and didn't pickle) is + # not accidentally flagged either; create one and re-check. + root_key = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + with open(root_key, 'wb') as fh: + fh.write(b'X' * 32) + cache.check(fix=True) + assert op.exists(root_key), ( + 'root-level key file must remain whitelisted' + ) + + +# -- Review-pass-2: Cache pickling (H2) -------------------------------- + + +def test_cache_pickling_with_explicit_key_raises(tmp_cache_dir, clear_env): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + cache = dc.Cache(tmp_cache_dir, disk_pickle_key=secrets.token_bytes(32)) + try: + with pytest.raises(TypeError, match='disk_pickle_key'): + pickle.dumps(cache) + finally: + cache.close() + + +def test_cache_pickling_with_legacy_mode_raises(tmp_cache_dir, clear_env): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + cache = dc.Cache(tmp_cache_dir, disk_pickle_key=False) + try: + with pytest.raises(TypeError, match='disk_pickle_key'): + pickle.dumps(cache) + finally: + cache.close() + + +def test_cache_pickling_with_default_succeeds(tmp_cache_dir, clear_env): + """No explicit key + default fallback: pickling must work because + the receiving process can re-resolve from env or file.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + cache['k'] = {'value': 1} + blob = pickle.dumps(cache) + other = pickle.loads(blob) + try: + assert other['k'] == {'value': 1} + finally: + other.close() + + +# -- Review-pass-2: defensive Settings pop (M3) ------------------------ + + +def test_disk_pickle_key_in_settings_does_not_leak(tmp_cache_dir, clear_env): + """A stale ``disk_pickle_key`` row in Settings (e.g. from an older + patched build, manual edit, or attacker) must NOT influence the + actual HMAC key used by the Disk on subsequent opens.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + cache['warm'] = 1 + + # Manually inject a hostile row that, if naively trusted, would + # become the HMAC key string. + poisoned = 'attacker-supplied-key-' + 'A' * 16 + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + con.execute( + 'INSERT OR REPLACE INTO Settings VALUES (?, ?)', + ('disk_pickle_key', poisoned), + ) + con.commit() + finally: + con.close() + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + cache['after'] = {'value': 1} + assert cache['after'] == {'value': 1} + disk = cache._disk + + # The Disk's pickle_key_arg must remain UNSET (defensive pop did + # its job); the resolved key must not equal the poisoned bytes. + from diskcache.core import _PICKLE_KEY_UNSET as UNSET + assert disk._pickle_key_arg is UNSET, ( + 'pickle_key_arg leaked from Settings: %r' % disk._pickle_key_arg + ) + if disk._pickle_key_resolved not in (None, False): + assert disk._pickle_key_resolved != poisoned.encode('utf-8'), ( + 'resolved key matches poisoned Settings row' + ) + + +# -- Review-pass-2: edge-case envelope parsing (M9) -------------------- + + +def test_short_envelope_header_raises(tmp_cache_dir, clear_env): + """Bytes that start with ``DCv1`` but are shorter than the full + 36-byte header must NOT be treated as enveloped.""" + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['k'] = 1 + + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + # Magic + only 20 bytes (truncated MAC). + con.execute( + 'UPDATE Cache SET value = ?, mode = 4 WHERE key = ?', + (PICKLE_MAGIC + b'\x00' * 20, 'k'), + ) + con.commit() + finally: + con.close() + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(pickle.UnpicklingError): + cache['k'] + + +def test_zero_byte_value_raises(tmp_cache_dir, clear_env): + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['k'] = 1 + + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + con.execute( + 'UPDATE Cache SET value = ?, mode = 4 WHERE key = ?', + (b'', 'k'), + ) + con.commit() + finally: + con.close() + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(pickle.UnpicklingError, match='envelope'): + cache['k'] + + +def test_valid_hmac_invalid_pickle_raises(tmp_cache_dir, clear_env): + """An attacker who somehow obtains the key (or a corrupted-but- + HMAC-valid blob) and writes a non-pickle payload should still get a + pickle error -- not silent garbage.""" + key = secrets.token_bytes(32) + payload = b'\xff' * 50 # not valid pickle + mac = hmac.new(key, payload, hashlib.sha256).digest() + envelope = PICKLE_MAGIC + mac + payload + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['k'] = 1 + + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + con.execute( + 'UPDATE Cache SET value = ?, mode = 4 WHERE key = ?', + (envelope, 'k'), + ) + con.commit() + finally: + con.close() + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises((pickle.UnpicklingError, EOFError, KeyError)): + cache['k'] + + +def test_env_var_short_hex_raises(tmp_cache_dir, monkeypatch): + monkeypatch.setenv(PICKLE_KEY_ENV, 'deadbeef') # 4 bytes + with dc.Cache(tmp_cache_dir) as cache: + with pytest.raises(ValueError, match='at least'): + cache['x'] = {'complex': 'value'} + + +def test_env_var_empty_raises(tmp_cache_dir, monkeypatch, clear_env): + """Empty string env var must NOT be silently treated as 'unset'. + + monkeypatch the env var to '' explicitly; the resolution code uses + ``if env_value:`` so empty falls through to the file fallback, + which is the intended behavior. Verify that path works (no crash). + """ + monkeypatch.setenv(PICKLE_KEY_ENV, '') + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + cache['k'] = {'val': 1} + assert cache['k'] == {'val': 1} + # File fallback created the key file because env was effectively unset. + assert op.exists(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)) + + +# -- Review-pass-2: multi-process race on key file (H3) ---------------- + + +def _race_worker(directory, idx, queue): # pragma: no cover - subprocess + import warnings as _w + import diskcache as _dc + + _w.simplefilter('ignore', _dc.UnsafePickleWarning) + try: + with _dc.Cache(directory) as cache: + cache[('worker', idx)] = {'pid': os.getpid(), 'idx': idx} + queue.put(('ok', idx, cache[('worker', idx)])) + except Exception as exc: # pylint: disable=broad-except + queue.put(('err', idx, repr(exc))) + + +@pytest.mark.skipif( + sys.platform == 'win32' and sys.version_info < (3, 8), + reason='multiprocessing fork start method is unreliable on this combo', +) +def test_concurrent_key_file_creation_no_crash(tmp_cache_dir, clear_env): + """Multiple processes creating the same fresh cache must all + succeed -- the original code crashed losers with a false-positive + 'too short' error before the writer's os.write completed.""" + ctx = multiprocessing.get_context('spawn') + queue = ctx.Queue() + procs = [ + ctx.Process(target=_race_worker, args=(tmp_cache_dir, i, queue)) + for i in range(4) + ] + for p in procs: + p.start() + for p in procs: + p.join(timeout=30) + assert p.exitcode == 0, ( + 'worker exited %s; stdout/stderr should not contain ' + 'RuntimeError("too short")' % p.exitcode + ) + + results = [] + while not queue.empty(): + results.append(queue.get_nowait()) + assert len(results) == 4 + for status, idx, payload in results: + assert status == 'ok', 'worker %d failed: %r' % (idx, payload) + + +# -- Review-pass-2: sanity for trimmed TypeError (M4) ------------------ + + +def test_typeerror_message_lists_only_real_types(tmp_cache_dir, clear_env): + with pytest.raises(TypeError) as exc_info: + dc.Cache(tmp_cache_dir, disk_pickle_key=12345) + msg = str(exc_info.value) + assert 'bytes' in msg and 'hex str' in msg + assert 'False' not in msg and 'None' not in msg, ( + 'TypeError must not advertise False/None: caller filters them. ' + 'Got: %r' % msg + ) From b2331901cd4cac0cf888a90e7809a96a9647e440 Mon Sep 17 00:00:00 2001 From: Yeshwanth Nagaraj Date: Wed, 6 May 2026 14:49:28 +0530 Subject: [PATCH 3/6] Address pass-3 audit: 8 high+medium fixes (V1-V3, V5-V6, V9, V11, V13) Follow-up to commit bacedee that addresses all the cleanly-fixable high+medium severity findings from the third 32-reviewer audit. HIGH (3 fixed, 1 deferred): * V1: ``FanoutCache.__getstate__`` now mirrors ``Cache.__getstate__``'s H2 protection. Previously, pickling a ``FanoutCache`` configured with an explicit ``disk_pickle_key`` (or ``=False``) silently succeeded, and the receiving process auto-generated a different key -- making every subsequent read fail HMAC verification. The check uses the user's *original* argument (``self._pickle_key_user_arg``) rather than the resolved per-shard bytes, so default-mode pickling still works (the receiving process can re-resolve from env / file). * V2: ``Deque.__getstate__`` and ``Index.__getstate__`` likewise inherit the H2 protection. A new private helper ``_check_wrapped_cache_pickleable(cache, container_name)`` in ``persistent.py`` keeps the duplicated check tidy. * V3: ``_read_or_create_pickle_key_file`` switched from ``try/except OSError/else`` to ``try/finally`` so a ``KeyboardInterrupt`` / ``SystemExit`` (Ctrl+C, signal handler calling ``sys.exit``) during the key write also unlinks the half-baked file. Previously, an interrupted write left an empty ``.diskcache_pickle_key`` that permanently blocked every subsequent process with a ``RuntimeError("too short")`` until manually removed. * V4 (HMAC binding to cache key context, replay/row-swap): DEFERRED. Requires a non-trivial public-API change to ``Disk.fetch`` (or a parallel ``fetch_with_context`` method) and a DCv2 envelope format with backward-compat for DCv1 readers. Tracked for a follow-up. MEDIUM (5 fixed, 2 deferred): * V5: New ``Disk._uses_pickle = True`` class attribute (with ``JSONDisk._uses_pickle = False``). ``FanoutCache.__init__`` skips the eager pickle-key resolution when ``disk._uses_pickle`` is ``False`` -- so ``FanoutCache(disk=JSONDisk)`` no longer auto- generates an unused ``.diskcache_pickle_key`` file or emits a spurious ``UnsafePickleWarning``. The shard-key kwarg is now conditionally inserted, preserving old behaviour for pickle-using Disk subclasses. * V6: ``Disk._safe_filename_path`` now caches ``op.realpath(self._directory)`` in ``self._directory_realpath`` on first use, eliminating one ``realpath`` syscall per fetch (~35% reduction in path-validation overhead per file-backed read). * V9: ``_safe_filename_path`` translates ``OSError`` (e.g. Windows UNC ``\\server\\share\\foo`` raising ``FileNotFoundError`` from ``op.realpath``) into the documented ``ValueError`` so callers don't have to also catch ``OSError`` separately. * V11: ``Cache.__copy__`` and ``Cache.__deepcopy__`` are added. Previously ``copy.copy(cache)`` and ``copy.deepcopy(cache)`` went through ``__getstate__`` and were rejected for caches with explicit keys, even though the operation never crosses a process boundary. Both methods now construct a fresh same-process ``Cache`` instance with the original directory, timeout, disk type, AND ``disk_pickle_key`` so the security posture is preserved. * V13: ``Disk.fetch`` no longer raises ``UnboundLocalError`` when a tampered row has ``mode = MODE_BINARY|TEXT|PICKLE`` but ``filename = NULL``. It raises a clear ``ValueError`` mentioning cache.db tampering instead. * V7 (sticky legacy mode), V8 (orphan ``.val`` files after row delete): DEFERRED. Both require deeper transaction-layer refactoring; documented as known limitations. V10 (warning wording): the auto-generated-key ``UnsafePickleWarning`` now explicitly mentions the bootstrap-race risk so operators understand the threat model and reach for ``DISKCACHE_PICKLE_KEY`` in production. V12 (test coverage): 20 new tests covering all of the above plus previously-uncovered error paths -- the "too short after retries" ``RuntimeError``, ``_safe_filename_path`` non-string guard, ``bytearray`` key acceptance, ``OSError`` during write cleanup, and ``KeyboardInterrupt`` cleanup. Test results: 179 existing + 58 security (38 prior + 20 new) = 237 passed. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- diskcache/core.py | 122 ++++++++++++-- diskcache/fanout.py | 53 +++++- diskcache/persistent.py | 28 +++- tests/test_security.py | 360 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 535 insertions(+), 28 deletions(-) diff --git a/diskcache/core.py b/diskcache/core.py index 627dd23..fcb4c04 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -182,23 +182,23 @@ def _read_or_create_pickle_key_file(path): % (path, last_short_len, PICKLE_KEY_MIN_LEN, attempt + 1) ) - # We won the race. Generate and write the key. + # We won the race. Generate and write the key. Use try/finally + # so we clean up the half-baked file even on KeyboardInterrupt / + # SystemExit (otherwise an empty file blocks every subsequent + # process with a "too short" RuntimeError until manually removed). + write_succeeded = False try: new_key = secrets.token_bytes(32) os.write(fd, new_key) with cl.suppress(OSError): os.fsync(fd) - except OSError: - # Disk full / write failure. Remove the half-baked file so - # subsequent processes don't get stuck on a permanent - # too-short error. + write_succeeded = True + finally: with cl.suppress(OSError): os.close(fd) - with cl.suppress(OSError): - os.unlink(path) - raise - else: - os.close(fd) + if not write_succeeded: + with cl.suppress(OSError): + os.unlink(path) with cl.suppress(OSError): os.chmod(path, 0o600) @@ -265,10 +265,12 @@ def _emit_pickle_key_warning(source, directory, stacklevel): 'diskcache: using pickle HMAC key from %r (CVE-2025-69872 ' 'default fallback). This detects accidental cache ' 'corruption but does NOT protect against attackers with ' - 'read access to the cache directory. For stronger ' - 'protection set the %s environment variable to a hex-' - 'encoded random key (>= %d bytes), or pass pickle_key=... ' - 'explicitly.' + 'read access to the cache directory, nor against a ' + '"bootstrap race" where an attacker writes the cache ' + 'directory before the legitimate process initializes (and ' + 'thereby controls the key). For production use set the %s ' + 'environment variable to a hex-encoded random key ' + '(>= %d bytes), or pass pickle_key=... explicitly.' % (key_path, PICKLE_KEY_ENV, PICKLE_KEY_MIN_LEN), UnsafePickleWarning, stacklevel=stacklevel, @@ -340,6 +342,14 @@ def _emit_pickle_key_warning(source, directory, stacklevel): class Disk: """Cache key and value serialization for SQLite database and files.""" + #: Whether this Disk class uses pickle for serialization. When + #: ``False``, callers like :class:`diskcache.FanoutCache` can skip + #: eager HMAC key resolution (CVE-2025-69872) and avoid creating an + #: unused ``.diskcache_pickle_key`` file at the cache root. Custom + #: subclasses that bypass the pickle paths (like :class:`JSONDisk`) + #: should set this to ``False``. + _uses_pickle = True + def __init__(self, directory, min_file_size=0, pickle_protocol=0, pickle_key=_PICKLE_KEY_UNSET): """Initialize disk instance. @@ -363,6 +373,12 @@ def __init__(self, directory, min_file_size=0, pickle_protocol=0, self._directory = directory self.min_file_size = min_file_size self.pickle_protocol = pickle_protocol + # CVE-2025-69872: pre-compute realpath of the cache directory so + # path-traversal validation in :meth:`_safe_filename_path` only + # makes one ``realpath`` syscall per fetch instead of two. The + # value is computed lazily on first use to support construction + # with a not-yet-existing directory. + self._directory_realpath = None # Eagerly validate explicit (non-False, non-None) keys so bad # input fails at construction time instead of on first use. # ``False`` and ``None`` stay lazy so we don't emit warnings or @@ -614,15 +630,37 @@ def _safe_filename_path(self, filename): refuses any ``filename`` whose resolved location is not strictly inside ``self._directory``. - :raises ValueError: if ``filename`` escapes the cache directory. + :raises ValueError: if ``filename`` is not a string, escapes + the cache directory, or resolves to a path that cannot be + interpreted (e.g. an unreachable Windows UNC path). """ if not isinstance(filename, str): raise ValueError( 'diskcache: cache filename must be str, got %s' % type(filename).__name__ ) - base = op.realpath(self._directory) - full = op.realpath(op.join(self._directory, filename)) + # Cache realpath of the directory once (CVE-2025-69872 perf). + base = self._directory_realpath + if base is None: + try: + base = op.realpath(self._directory) + except OSError as exc: + raise ValueError( + 'diskcache: cannot resolve cache directory %r (%s)' + % (self._directory, exc) + ) from exc + self._directory_realpath = base + try: + full = op.realpath(op.join(self._directory, filename)) + except OSError as exc: + # CVE-2025-69872: translate platform-specific resolution + # failures (e.g. Windows UNC ``\\server\\share`` raising + # FileNotFoundError) into the documented ValueError so + # callers do not have to catch OSError separately. + raise ValueError( + 'diskcache: cannot resolve cache filename %r (%s; ' + 'cache.db may be tampered)' % (filename, exc) + ) from exc if full != base and not full.startswith(base + os.sep): raise ValueError( 'diskcache: cache filename %r escapes cache directory ' @@ -646,8 +684,23 @@ def fetch(self, mode, filename, value, read): # pylint: disable=unidiomatic-typecheck,consider-using-with if mode == MODE_RAW: return bytes(value) if type(value) is sqlite3.Binary else value - if filename is not None: + + # CVE-2025-69872 (V13): file-backed modes require a filename. + # If the row was tampered (mode=BINARY/TEXT/PICKLE but + # filename=NULL), raise a clear error rather than the previous + # UnboundLocalError. + if filename is None: + if mode in (MODE_BINARY, MODE_TEXT) or ( + mode == MODE_PICKLE and value is None + ): + raise ValueError( + 'diskcache: cache row has mode=%d but filename is None ' + '(cache.db may be tampered).' % mode + ) + full_path = None + else: full_path = self._safe_filename_path(filename) + if mode == MODE_BINARY: if read: return open(full_path, 'rb') @@ -728,6 +781,14 @@ def remove(self, file_path): class JSONDisk(Disk): """Cache key and value using JSON serialization with zlib compression.""" + # CVE-2025-69872: JSONDisk routes every key/value through JSON+zlib + # bytes, which hit the early-return MODE_RAW / MODE_BINARY branches + # of the base Disk and never reach the pickle paths. Declaring + # ``_uses_pickle = False`` prevents :class:`FanoutCache` from + # eagerly resolving (and possibly auto-generating) an HMAC key file + # that JSONDisk would never use. + _uses_pickle = False + def __init__(self, directory, compress_level=1, **kwargs): """Initialize JSON disk instance. @@ -2818,6 +2879,31 @@ def __getstate__(self): def __setstate__(self, state): self.__init__(*state) + def __copy__(self): + # CVE-2025-69872 (V11): in-process ``copy.copy`` does not cross a + # trust boundary -- preserve the explicit pickle_key (if any) so + # users who legitimately copy a Cache aren't tripped up by the + # __getstate__ guard above. + return self._copy_for_same_process() + + def __deepcopy__(self, memo): + return self._copy_for_same_process() + + def _copy_for_same_process(self): + disk = self._disk + arg = getattr(disk, '_pickle_key_arg', _PICKLE_KEY_UNSET) + kwargs = {} + if arg is not _PICKLE_KEY_UNSET: + # Forward bytes / False explicitly so the new instance has + # the same security posture as the original. + kwargs['disk_pickle_key'] = arg + return self.__class__( + self.directory, + timeout=self.timeout, + disk=type(self.disk), + **kwargs, + ) + def reset(self, key, value=ENOVAL, update=True): """Reset `key` and `value` item from Settings table. diff --git a/diskcache/fanout.py b/diskcache/fanout.py index 58a993e..65d82be 100644 --- a/diskcache/fanout.py +++ b/diskcache/fanout.py @@ -60,30 +60,49 @@ def __init__( # (FanoutCache._hash uses shard 0's key but storage may land in # shard N which holds a different key) and emitting one # warning per shard. + # CVE-2025-69872 (V5): skip eager key resolution for Disk + # subclasses that never use pickle (e.g. JSONDisk). Without + # this we would auto-generate ``.diskcache_pickle_key`` and + # emit an UnsafePickleWarning even though the cache will never + # exercise the pickle path. pickle_key_arg = settings.pop('disk_pickle_key', _PICKLE_KEY_UNSET) - if not op.isdir(directory): - os.makedirs(directory, 0o755, exist_ok=True) - resolved_key, source = _resolve_pickle_key_for_directory( - directory, pickle_key_arg - ) - _emit_pickle_key_warning(source, directory, stacklevel=3) + # V1: remember the user's original argument so __getstate__ + # only refuses pickling when the user explicitly provided a + # secret. Auto-generated / env-var keys can be re-resolved in + # the receiving process, so default-mode FanoutCache pickling + # remains supported. + self._pickle_key_user_arg = pickle_key_arg + if getattr(disk, '_uses_pickle', True): + if not op.isdir(directory): + os.makedirs(directory, 0o755, exist_ok=True) + resolved_key, source = _resolve_pickle_key_for_directory( + directory, pickle_key_arg + ) + _emit_pickle_key_warning(source, directory, stacklevel=3) + else: + # Forward the user's explicit choice unchanged so that any + # custom Disk subclass that mixes pickle + non-pickle paths + # still honours an explicit key. No file/env resolution. + resolved_key = pickle_key_arg self._count = shards self._directory = directory self._disk = disk + shard_kwargs = dict(settings) + if resolved_key is not _PICKLE_KEY_UNSET: + shard_kwargs['disk_pickle_key'] = resolved_key self._shards = tuple( Cache( directory=op.join(directory, '%03d' % num), timeout=timeout, disk=disk, size_limit=size_limit, - disk_pickle_key=resolved_key, - **settings, + **shard_kwargs, ) for num in range(shards) ) # Suppress per-shard warnings: the FanoutCache already emitted - # one above for the entire ensemble. + # one above for the entire ensemble (when applicable). for shard in self._shards: shard._disk._pickle_key_warned = True self._hash = self._shards[0].disk.hash @@ -563,6 +582,22 @@ def __exit__(self, *exception): self.close() def __getstate__(self): + # CVE-2025-69872 (V1): mirror :meth:`Cache.__getstate__`'s + # protection. Only refuse when the *user* supplied an + # explicit secret (or False) -- auto-generated / env-var keys + # are re-resolved by the receiving process, so default-mode + # pickling still works. + arg = self._pickle_key_user_arg + if arg is not _PICKLE_KEY_UNSET and arg is not None: + raise TypeError( + 'diskcache: FanoutCache instances configured with an ' + 'explicit disk_pickle_key (or disk_pickle_key=False) ' + 'cannot be pickled because the secret is not placed ' + 'in pickle state. Pickle the cache directory path ' + 'instead and reconstruct FanoutCache(directory, ' + 'disk_pickle_key=...) in the receiving process. ' + '(CVE-2025-69872)' + ) return (self._directory, self._count, self.timeout, type(self.disk)) def __setstate__(self, state): diff --git a/diskcache/persistent.py b/diskcache/persistent.py index 522bb74..edb826b 100644 --- a/diskcache/persistent.py +++ b/diskcache/persistent.py @@ -13,7 +13,31 @@ from contextlib import contextmanager from shutil import rmtree -from .core import ENOVAL, Cache +from .core import ENOVAL, Cache, _PICKLE_KEY_UNSET + + +def _check_wrapped_cache_pickleable(cache, container_name): + """CVE-2025-69872 (V2): refuse to pickle persistent containers + backed by a Cache that has an explicit ``disk_pickle_key``. + + Without this, ``pickle.dumps(deque)`` / ``pickle.dumps(index)`` + silently succeeds even though ``pickle.dumps(deque._cache)`` would + raise -- because :meth:`Deque.__getstate__` and + :meth:`Index.__getstate__` only serialize the directory, never + delegating to the wrapped Cache's protection. + """ + disk = cache._disk + arg = getattr(disk, '_pickle_key_arg', _PICKLE_KEY_UNSET) + if arg is not _PICKLE_KEY_UNSET and arg is not None: + raise TypeError( + 'diskcache: %s instances backed by a Cache configured with ' + 'an explicit disk_pickle_key (or disk_pickle_key=False) ' + 'cannot be pickled because the secret is not placed in ' + 'pickle state. Pickle the directory path instead and ' + 'reconstruct %s(directory=..., ...) with the same ' + 'disk_pickle_key in the receiving process. (CVE-2025-69872)' + % (container_name, container_name) + ) def _make_compare(seq_op, doc): @@ -319,6 +343,7 @@ def __reversed__(self): pass def __getstate__(self): + _check_wrapped_cache_pickleable(self._cache, 'Deque') return self.directory, self.maxlen def __setstate__(self, state): @@ -1090,6 +1115,7 @@ def items(self): __hash__ = None # type: ignore def __getstate__(self): + _check_wrapped_cache_pickleable(self._cache, 'Index') return self.directory def __setstate__(self, state): diff --git a/tests/test_security.py b/tests/test_security.py index 8610550..9e407fd 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -894,3 +894,363 @@ def test_typeerror_message_lists_only_real_types(tmp_cache_dir, clear_env): 'TypeError must not advertise False/None: caller filters them. ' 'Got: %r' % msg ) + + +# -- Pass-3 round of fixes --------------------------------------------- + + +def test_v3_keyboard_interrupt_during_write_cleans_up( + tmp_cache_dir, clear_env, monkeypatch +): + """V3: BaseException (KeyboardInterrupt / SystemExit) during the + key-file write must not leave an empty file behind that + permanently blocks every subsequent process.""" + from diskcache import core as dc_core + + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + orig_write = os.write + + def kbd_int_write(fd, data): # pragma: no cover - exercised below + raise KeyboardInterrupt('simulated Ctrl+C') + + monkeypatch.setattr(os, 'write', kbd_int_write) + with pytest.raises(KeyboardInterrupt): + dc_core._read_or_create_pickle_key_file(keyfile) + monkeypatch.setattr(os, 'write', orig_write) + + assert not op.exists(keyfile), ( + 'Empty key file must be unlinked after KeyboardInterrupt; ' + 'leaving it behind permanently blocks other processes with ' + 'a "too short" RuntimeError.' + ) + + # And a fresh attempt now succeeds without manual cleanup: + key, created = dc_core._read_or_create_pickle_key_file(keyfile) + assert created and len(key) >= 16 + + +def test_v3_oserror_during_write_cleans_up( + tmp_cache_dir, clear_env, monkeypatch +): + """V3: A regular OSError (disk full) during write must also remove + the half-baked file.""" + from diskcache import core as dc_core + + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + + def disk_full_write(fd, data): + raise OSError(28, 'No space left on device') + + monkeypatch.setattr(os, 'write', disk_full_write) + with pytest.raises(OSError): + dc_core._read_or_create_pickle_key_file(keyfile) + assert not op.exists(keyfile) + + +def test_v9_unc_or_invalid_filename_raises_valueerror( + tmp_cache_dir, clear_env +): + """V9: ``_safe_filename_path`` must translate platform OSError + (e.g. Windows UNC path resolution failure) into the documented + ValueError so callers don't have to also catch OSError.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + disk = cache._disk + if sys.platform == 'win32': + with pytest.raises(ValueError): + disk._safe_filename_path( + '//nonexistent-server-xyz9876/share/file.txt' + ) + else: + # On POSIX, realpath of an absolute path that escapes the dir + # also surfaces as ValueError (containment check rather than + # OSError, but same exception class as documented). + with pytest.raises(ValueError): + disk._safe_filename_path('/etc/passwd') + + +def test_v6_realpath_cached_after_first_call(tmp_cache_dir, clear_env): + """V6: After the first call, ``_directory_realpath`` is populated + so subsequent fetches do not re-resolve the cache directory.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache( + tmp_cache_dir, + disk_pickle_key=secrets.token_bytes(32), + disk_min_file_size=1, + ) as cache: + assert cache._disk._directory_realpath is None + cache['k'] = b'x' * 64 # forces a .val file + _ = cache['k'] # triggers _safe_filename_path + cached = cache._disk._directory_realpath + assert cached is not None + assert cached == op.realpath(tmp_cache_dir) + + +def test_v13_filename_none_with_file_mode_raises_valueerror( + tmp_cache_dir, clear_env +): + """V13: a tampered row with mode=BINARY/TEXT/PICKLE and + filename=NULL must raise a clear ValueError -- not the previous + ``UnboundLocalError: local variable 'full_path' referenced before + assignment``.""" + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['k'] = b'x' * 16 + + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + con.execute( + 'UPDATE Cache SET filename = NULL, mode = 2, value = NULL ' + 'WHERE key = ?', ('k',) + ) + con.commit() + finally: + con.close() + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(ValueError, match='mode'): + cache['k'] + + +def test_v5_jsondisk_fanout_no_keyfile_no_warning( + tmp_cache_dir, clear_env +): + """V5: ``FanoutCache(disk=JSONDisk)`` must NOT auto-generate the + pickle HMAC key file or emit ``UnsafePickleWarning``, since + JSONDisk never exercises the pickle path.""" + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.FanoutCache( + tmp_cache_dir, shards=4, disk=dc.JSONDisk + ) as cache: + cache[('a', 'b')] = {'v': 1} + assert cache[('a', 'b')] == {'v': 1} + + pkey_warnings = [ + w for w in caught if isinstance(w.message, dc.UnsafePickleWarning) + ] + assert pkey_warnings == [], ( + 'JSONDisk under FanoutCache must not emit UnsafePickleWarning, ' + 'got: %r' % [str(w.message) for w in pkey_warnings] + ) + assert not op.exists(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)), ( + 'JSONDisk under FanoutCache must not auto-generate the pickle ' + 'HMAC key file at the root.' + ) + for num in range(4): + shard_keyfile = op.join( + tmp_cache_dir, '%03d' % num, PICKLE_KEY_FILENAME + ) + assert not op.exists(shard_keyfile) + + +def test_v5_disk_uses_pickle_class_attr(): + """V5: the ``_uses_pickle`` capability flag is True for the base + ``Disk`` and False for ``JSONDisk``.""" + assert dc.Disk._uses_pickle is True + assert dc.JSONDisk._uses_pickle is False + + +def test_v1_fanoutcache_pickling_with_explicit_key_raises( + tmp_cache_dir, clear_env +): + """V1: FanoutCache.__getstate__ must mirror Cache.__getstate__'s + refusal when an explicit pickle key was provided (else pickling + silently strips the secret and corrupts reads in the receiving + process).""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + fc = dc.FanoutCache( + tmp_cache_dir, + shards=2, + disk_pickle_key=secrets.token_bytes(32), + ) + try: + with pytest.raises(TypeError, match='disk_pickle_key'): + pickle.dumps(fc) + finally: + fc.close() + + +def test_v1_fanoutcache_pickling_with_legacy_mode_raises( + tmp_cache_dir, clear_env +): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + fc = dc.FanoutCache( + tmp_cache_dir, shards=2, disk_pickle_key=False + ) + try: + with pytest.raises(TypeError, match='disk_pickle_key'): + pickle.dumps(fc) + finally: + fc.close() + + +def test_v1_fanoutcache_pickling_with_default_succeeds( + tmp_cache_dir, clear_env +): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache(tmp_cache_dir, shards=2) as fc: + fc['k'] = {'v': 1} + blob = pickle.dumps(fc) + other = pickle.loads(blob) + try: + assert other['k'] == {'v': 1} + finally: + other.close() + + +def test_v2_deque_pickling_with_explicit_key_raises( + tmp_cache_dir, clear_env +): + """V2: Deque (built on Cache) must inherit the H2 protection.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + cache = dc.Cache(tmp_cache_dir, disk_pickle_key=secrets.token_bytes(32)) + try: + deque = dc.Deque.fromcache(cache, [{'item': 1}]) + with pytest.raises(TypeError, match='disk_pickle_key'): + pickle.dumps(deque) + finally: + cache.close() + + +def test_v2_index_pickling_with_explicit_key_raises( + tmp_cache_dir, clear_env +): + """V2: Index (built on Cache) must inherit the H2 protection.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + cache = dc.Cache(tmp_cache_dir, disk_pickle_key=secrets.token_bytes(32)) + try: + idx = dc.Index.fromcache(cache, {'k': 'v'}) + with pytest.raises(TypeError, match='disk_pickle_key'): + pickle.dumps(idx) + finally: + cache.close() + + +def test_v2_deque_pickling_with_default_succeeds(tmp_cache_dir, clear_env): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + cache = dc.Cache(tmp_cache_dir) + try: + deque = dc.Deque.fromcache(cache, [{'item': 1}]) + blob = pickle.dumps(deque) + other = pickle.loads(blob) + assert list(other) == [{'item': 1}] + finally: + cache.close() + + +def test_v11_copy_copy_preserves_explicit_key(tmp_cache_dir, clear_env): + """V11: ``copy.copy(cache)`` does not cross a process boundary, so + the explicit pickle_key should be preserved (not rejected by + __getstate__) and a working independent Cache returned.""" + import copy + + secret = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=secret) as cache: + cache['k'] = {'v': 1} + shallow = copy.copy(cache) + try: + assert shallow.directory == cache.directory + assert shallow._disk._pickle_key_arg == secret + assert shallow['k'] == {'v': 1} + finally: + shallow.close() + + +def test_v11_copy_deepcopy_preserves_explicit_key(tmp_cache_dir, clear_env): + import copy + + secret = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=secret) as cache: + cache['k'] = {'v': 1} + deep = copy.deepcopy(cache) + try: + assert deep['k'] == {'v': 1} + finally: + deep.close() + + +def test_v11_copy_copy_with_default_still_works(tmp_cache_dir, clear_env): + import copy + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + cache['k'] = {'v': 1} + shallow = copy.copy(cache) + try: + assert shallow['k'] == {'v': 1} + finally: + shallow.close() + + +def test_v10_warning_mentions_bootstrap_race(tmp_cache_dir, clear_env): + """V10: the auto-generated-key warning must explicitly mention the + bootstrap-race risk so operators understand the threat model + (otherwise they may assume the file-mode 0o600 key is safe in any + multi-tenant directory).""" + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.Cache(tmp_cache_dir) as cache: + cache[('complex',)] = {'value': 1} + + msgs = [ + w for w in caught if isinstance(w.message, dc.UnsafePickleWarning) + ] + assert msgs + assert 'bootstrap race' in str(msgs[0].message).lower(), ( + 'Warning should mention bootstrap race; got: %r' + % str(msgs[0].message) + ) + + +def test_v12_short_key_file_raises_clear_runtimeerror( + tmp_cache_dir, clear_env +): + """V12: coverage for the "after N retries" RuntimeError path in + ``_read_or_create_pickle_key_file``.""" + from diskcache import core as dc_core + + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + with open(keyfile, 'wb') as fh: + fh.write(b'short') + with pytest.raises(RuntimeError, match='too short.*after'): + dc_core._read_or_create_pickle_key_file(keyfile) + + +def test_v12_safe_filename_path_rejects_non_string( + tmp_cache_dir, clear_env +): + """V12: coverage for the type guard in ``_safe_filename_path``.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + with pytest.raises(ValueError, match='must be str'): + cache._disk._safe_filename_path(123) + + +def test_v12_bytearray_pickle_key_accepted(tmp_cache_dir, clear_env): + """V12: bytearray is accepted by _coerce_pickle_key but was never + exercised before.""" + key = bytearray(secrets.token_bytes(32)) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['k'] = {'v': 1} + assert cache['k'] == {'v': 1} From ef0f3d54b294b28fcde6996328d54c7ec84d473d Mon Sep 17 00:00:00 2001 From: Yeshwanth Nagaraj Date: Thu, 7 May 2026 17:07:26 +0530 Subject: [PATCH 4/6] Address pass-4 audit: 8 high+medium fixes (V14-V19, V21-V22) Follow-up to commit b233190 that addresses the cleanly-fixable high+medium severity findings from the fourth (23-reviewer GPT-5.5) audit. Three findings (V20 memory regression, V23 refactor, V24 docs) are deferred for separate work. HIGH (4): * V14 + V17 (combined fix in ``_read_or_create_pickle_key_file``): - V14: ``os.write`` may legally return a short write (signal, quota, ENOSPC). Loop until the full key is on disk; raise OSError if a 0-byte return ever happens. Without this the writer holds a 32-byte key in memory while readers see only a truncated prefix. - V17: set ``write_succeeded = True`` IMMEDIATELY after the write loop completes (before fsync), not after fsync. Once the bytes have hit the kernel they are visible to concurrent readers; if a KeyboardInterrupt then cancels fsync we must NOT unlink the file or those readers will be left using a key that no longer exists on disk. * V15 (``Cache._copy_for_same_process``): if the original Cache had already lazily resolved its key from the env var or in-dir file, propagate the resolved bytes (and ``_pickle_key_warned`` flag) to the copy. Otherwise an intervening change to ``DISKCACHE_PICKLE_KEY`` or ``.diskcache_pickle_key`` between the original's first-use and the copy's first-use would silently use a different HMAC key. * V16 (``FanoutCache.cache`` / ``deque`` / ``index``): the named child constructors now forward the FanoutCache's ``disk_pickle_key`` to the child Cache via a new ``_inject_disk_pickle_key`` helper. Without this, named children auto-generated their own ``.diskcache_pickle_key`` files and used a different key than the parent (data unreadable on re-open with the parent's key). Caller-supplied ``disk_pickle_key`` in ``cache(... **settings)`` still wins. MEDIUM (4): * V18 (``Disk._safe_filename_path``): narrow the OSError translation to ``FileNotFoundError``, ``NotADirectoryError``, and Windows WinError 53/67/87/123/161/1001 (UNC / drive-not-found / invalid path). ``PermissionError`` and ``TimeoutError`` are real operational errors, not tampering signals; they propagate unchanged so callers can distinguish them. * V19 (``FanoutCache`` / ``Deque`` / ``Index``): add ``__copy__`` and ``__deepcopy__`` mirroring ``Cache._copy_for_same_process``. Previously ``copy.copy(fanout_with_explicit_key)`` raised TypeError via the H2 ``__getstate__`` guard, even though the copy never crosses a process boundary. * V21 (legacy-data error message): the ``UnpicklingError`` raised when reading pre-fix raw pickle now references ``disk_pickle_key=False`` (the kwarg on Cache / FanoutCache / Django CACHES OPTIONS) instead of the Disk-only ``pickle_key`` name. Django users following the old message put ``pickle_key: False`` in their OPTIONS dict, which is silently dropped because Cache only forwards ``disk_*`` settings. * V22 (test cleanup): ``test_valid_hmac_invalid_pickle_raises`` no longer accepts ``KeyError`` (which would mask a regression where an HMAC-valid corrupt entry is treated as a cache miss). DEFERRED (with rationale): * V20 (large MODE_PICKLE memory regression: 32 MiB pickle = 80 ms / 96 MiB peak vs master's 24 ms / 32 MiB) -- streaming HMAC verify + seek-back-and-pickle-load reintroduces a TOCTOU window that the current full-buffer design specifically closes. Safer alternatives (mmap with MAP_PRIVATE, file locking) need careful cross-platform work; documented as a known tradeoff for now. * V23 (Cache.__init__ over Pylint R0915 = 63/60 statements) -- cosmetic; defer to the doc/refactor pass. * V24 (README Security section + MIGRATING.rst + api.rst) -- needs separate documentation work. Test additions (12 new tests, 70 total in tests/test_security.py): * test_v14_short_os_write_does_not_silently_truncate * test_v14_zero_return_from_os_write_triggers_cleanup * test_v17_keyboard_interrupt_during_fsync_keeps_durable_key * test_v18_oserror_on_realpath_propagates_when_not_path_failure * test_v15_copy_preserves_resolved_key_across_env_change * test_v16_fanout_child_cache_inherits_pickle_key * test_v16_fanout_child_deque_inherits_pickle_key * test_v16_fanout_child_index_inherits_pickle_key * test_v19_fanoutcache_copy_preserves_explicit_key * test_v19_deque_copy_preserves_explicit_key * test_v19_index_copy_preserves_explicit_key * test_v21_legacy_error_mentions_disk_pickle_key Test results: 179 existing + 70 security (58 prior + 12 new) = 249 passed. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- diskcache/core.py | 72 +++++++++-- diskcache/fanout.py | 59 +++++++++ diskcache/persistent.py | 33 +++++ tests/test_security.py | 272 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 423 insertions(+), 13 deletions(-) diff --git a/diskcache/core.py b/diskcache/core.py index fcb4c04..8ecf7fa 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -186,13 +186,34 @@ def _read_or_create_pickle_key_file(path): # so we clean up the half-baked file even on KeyboardInterrupt / # SystemExit (otherwise an empty file blocks every subsequent # process with a "too short" RuntimeError until manually removed). + # + # CVE-2025-69872 V14: ``os.write`` may legally return fewer bytes + # than requested (signal interruption, disk quota, ENOSPC mid- + # write). Loop until the full key is on disk, otherwise the + # writer holds a 32-byte key in memory while readers see only a + # truncated prefix. + # + # CVE-2025-69872 V17: set ``write_succeeded`` IMMEDIATELY after + # the bytes hit the kernel (before fsync), not after fsync. + # Once ``os.write`` has returned, concurrent readers can already + # see the full key; if a KeyboardInterrupt then cancels fsync we + # must NOT unlink the file -- doing so leaves those readers + # using a key that is no longer recoverable from disk. + new_key = secrets.token_bytes(32) write_succeeded = False try: - new_key = secrets.token_bytes(32) - os.write(fd, new_key) + written = 0 + while written < len(new_key): + n = os.write(fd, new_key[written:]) + if n <= 0: + raise OSError( + 'diskcache: os.write returned %d while writing ' + 'pickle key file %r' % (n, path) + ) + written += n + write_succeeded = True with cl.suppress(OSError): os.fsync(fd) - write_succeeded = True finally: with cl.suppress(OSError): os.close(fd) @@ -468,7 +489,8 @@ def _pickle_load_bytes(self, data): raise pickle.UnpicklingError( 'diskcache: pickle envelope missing; this cache entry ' 'predates the CVE-2025-69872 mitigation. Pass ' - 'pickle_key=False to read legacy data (insecure), or ' + 'disk_pickle_key=False to Cache / FanoutCache (or Django ' + 'CACHES OPTIONS) to read legacy data (insecure), or ' 'recreate the cache.' ) @@ -639,12 +661,16 @@ def _safe_filename_path(self, filename): 'diskcache: cache filename must be str, got %s' % type(filename).__name__ ) - # Cache realpath of the directory once (CVE-2025-69872 perf). + # CVE-2025-69872: only translate OSError subclasses that + # plausibly indicate a malformed / tampered filename. Genuine + # operational errors (PermissionError, TimeoutError) propagate + # so callers can distinguish them from path traversal attempts. + # Cache realpath of the directory once (V6 perf). base = self._directory_realpath if base is None: try: base = op.realpath(self._directory) - except OSError as exc: + except (FileNotFoundError, NotADirectoryError) as exc: raise ValueError( 'diskcache: cannot resolve cache directory %r (%s)' % (self._directory, exc) @@ -652,15 +678,24 @@ def _safe_filename_path(self, filename): self._directory_realpath = base try: full = op.realpath(op.join(self._directory, filename)) - except OSError as exc: - # CVE-2025-69872: translate platform-specific resolution - # failures (e.g. Windows UNC ``\\server\\share`` raising - # FileNotFoundError) into the documented ValueError so - # callers do not have to catch OSError separately. + except (FileNotFoundError, NotADirectoryError) as exc: raise ValueError( 'diskcache: cannot resolve cache filename %r (%s; ' 'cache.db may be tampered)' % (filename, exc) ) from exc + except OSError as exc: + # Windows-only: UNC / drive-not-found surface as bare + # OSError or WinError 53 / 67 / 87 / 123 / 161 (path/name + # invalid for filesystem use). Treat as malformed + # filename; let other OSError subclasses propagate. + if os.name == 'nt' and getattr(exc, 'winerror', None) in ( + 53, 67, 87, 123, 161, 1001 + ): + raise ValueError( + 'diskcache: cannot resolve cache filename %r (%s; ' + 'cache.db may be tampered)' % (filename, exc) + ) from exc + raise if full != base and not full.startswith(base + os.sep): raise ValueError( 'diskcache: cache filename %r escapes cache directory ' @@ -2897,12 +2932,25 @@ def _copy_for_same_process(self): # Forward bytes / False explicitly so the new instance has # the same security posture as the original. kwargs['disk_pickle_key'] = arg - return self.__class__( + new = self.__class__( self.directory, timeout=self.timeout, disk=type(self.disk), **kwargs, ) + # CVE-2025-69872 V15: if the original has already lazily + # resolved its HMAC key from env / file, propagate the + # resolved bytes to the copy so an intervening change to + # ``DISKCACHE_PICKLE_KEY`` or ``.diskcache_pickle_key`` does + # not produce a copy that uses a different key than the + # original (which would silently fail HMAC verification). + if ( + arg is _PICKLE_KEY_UNSET + and getattr(disk, '_pickle_key_resolved', None) is not None + ): + new._disk._pickle_key_resolved = disk._pickle_key_resolved + new._disk._pickle_key_warned = True + return new def reset(self, key, value=ENOVAL, update=True): """Reset `key` and `value` item from Settings table. diff --git a/diskcache/fanout.py b/diskcache/fanout.py index 65d82be..5514447 100644 --- a/diskcache/fanout.py +++ b/diskcache/fanout.py @@ -675,6 +675,12 @@ def cache(self, name, timeout=60, disk=None, **settings): except KeyError: parts = name.split('/') directory = op.join(self._directory, 'cache', *parts) + # CVE-2025-69872 V16: forward the FanoutCache's resolved + # pickle key to the child cache so it doesn't auto- + # generate its own (different) ``.diskcache_pickle_key`` + # under the child directory. Caller-supplied settings + # win on conflict. + self._inject_disk_pickle_key(settings) temp = Cache( directory=directory, timeout=timeout, @@ -709,10 +715,13 @@ def deque(self, name, maxlen=None): except KeyError: parts = name.split('/') directory = op.join(self._directory, 'deque', *parts) + child_kwargs = {} + self._inject_disk_pickle_key(child_kwargs) cache = Cache( directory=directory, disk=self._disk, eviction_policy='none', + **child_kwargs, ) deque = Deque.fromcache(cache, maxlen=maxlen) _deques[name] = deque @@ -745,14 +754,64 @@ def index(self, name): except KeyError: parts = name.split('/') directory = op.join(self._directory, 'index', *parts) + child_kwargs = {} + self._inject_disk_pickle_key(child_kwargs) cache = Cache( directory=directory, disk=self._disk, eviction_policy='none', + **child_kwargs, ) index = Index.fromcache(cache) _indexes[name] = index return index + def _inject_disk_pickle_key(self, kwargs): + """CVE-2025-69872 V16 helper: forward the FanoutCache's + resolved pickle key to a child Cache constructor's kwargs so + the child does not run an independent default-fallback + resolution (which would create a separate + ``.diskcache_pickle_key`` file under the child directory and, + critically, would use a *different* HMAC key than the parent + and its sibling shards). + """ + if 'disk_pickle_key' in kwargs: + return # caller-supplied wins + # Prefer the user's explicit argument (so the child uses + # exactly the same secret). Fall back to the resolved key on + # shard 0 (which the FanoutCache resolved at construction + # time and shared with every shard). + arg = self._pickle_key_user_arg + if arg is not _PICKLE_KEY_UNSET and arg is not None: + kwargs['disk_pickle_key'] = arg + return + if self._shards: + shard_arg = self._shards[0]._disk._pickle_key_arg + if shard_arg is not _PICKLE_KEY_UNSET and shard_arg is not None: + kwargs['disk_pickle_key'] = shard_arg + + def __copy__(self): + return self._copy_for_same_process() + + def __deepcopy__(self, memo): + return self._copy_for_same_process() + + def _copy_for_same_process(self): + # CVE-2025-69872 V19: same-process copy preserves the user's + # explicit pickle_key (or False) so a copied FanoutCache stays + # in the same security mode as the original. Default-mode + # FanoutCaches re-resolve via env / file in the new instance. + kwargs = {} + arg = self._pickle_key_user_arg + if arg is not _PICKLE_KEY_UNSET: + kwargs['disk_pickle_key'] = arg + return self.__class__( + self._directory, + shards=self._count, + timeout=self.timeout, + disk=self._disk, + **kwargs, + ) + FanoutCache.memoize = Cache.memoize # type: ignore diff --git a/diskcache/persistent.py b/diskcache/persistent.py index edb826b..8d710f3 100644 --- a/diskcache/persistent.py +++ b/diskcache/persistent.py @@ -350,6 +350,23 @@ def __setstate__(self, state): directory, maxlen = state self.__init__(directory=directory, maxlen=maxlen) + def __copy__(self): + return self._copy_for_same_process() + + def __deepcopy__(self, memo): + return self._copy_for_same_process() + + def _copy_for_same_process(self): + # CVE-2025-69872 V19: same-process Deque copy preserves the + # underlying Cache's explicit pickle_key so the copy stays + # readable. Default-mode caches re-resolve from env / file. + cache = self._cache + arg = getattr(cache._disk, '_pickle_key_arg', _PICKLE_KEY_UNSET) + if arg is not _PICKLE_KEY_UNSET and arg is not None: + new_cache = Cache(self.directory, disk_pickle_key=arg) + return type(self).fromcache(new_cache, maxlen=self.maxlen) + return type(self)(directory=self.directory, maxlen=self.maxlen) + def append(self, value): """Add `value` to back of deque. @@ -1121,6 +1138,22 @@ def __getstate__(self): def __setstate__(self, state): self.__init__(state) + def __copy__(self): + return self._copy_for_same_process() + + def __deepcopy__(self, memo): + return self._copy_for_same_process() + + def _copy_for_same_process(self): + # CVE-2025-69872 V19: same-process Index copy preserves the + # underlying Cache's explicit pickle_key. + cache = self._cache + arg = getattr(cache._disk, '_pickle_key_arg', _PICKLE_KEY_UNSET) + if arg is not _PICKLE_KEY_UNSET and arg is not None: + new_cache = Cache(self.directory, disk_pickle_key=arg) + return type(self).fromcache(new_cache) + return type(self)(self.directory) + def __eq__(self, other): """index.__eq__(other) <==> index == other diff --git a/tests/test_security.py b/tests/test_security.py index 9e407fd..52b48bc 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -807,7 +807,11 @@ def test_valid_hmac_invalid_pickle_raises(tmp_cache_dir, clear_env): with warnings.catch_warnings(): warnings.simplefilter('ignore', dc.UnsafePickleWarning) with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: - with pytest.raises((pickle.UnpicklingError, EOFError, KeyError)): + # CVE-2025-69872 V22: tighten -- previous version accepted + # KeyError, masking a regression where a corrupt HMAC-valid + # entry could be treated as a cache miss. pickle/EOFError + # are the only legitimate outcomes here. + with pytest.raises((pickle.UnpicklingError, EOFError)): cache['k'] @@ -1254,3 +1258,269 @@ def test_v12_bytearray_pickle_key_accepted(tmp_cache_dir, clear_env): with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: cache['k'] = {'v': 1} assert cache['k'] == {'v': 1} + + +# -- Pass-4 round of fixes --------------------------------------------- + + +def test_v14_short_os_write_does_not_silently_truncate( + tmp_cache_dir, clear_env, monkeypatch +): + """V14: ``os.write`` may legally return a short write (signal, + quota, ENOSPC mid-write). Ignoring the return value lets the + writer hold a 32-byte key in memory while the on-disk file holds + only a prefix; readers then see "too short" forever. + + With the loop-until-full fix, a short write should either complete + (multiple iterations) or raise OSError that triggers cleanup. + """ + from diskcache import core as dc_core + + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + orig_write = os.write + write_count = [0] + + def short_then_full_write(fd, data): + write_count[0] += 1 + # First call writes 8 bytes; subsequent calls write the rest. + return orig_write(fd, data[:8]) + + monkeypatch.setattr(os, 'write', short_then_full_write) + key, _ = dc_core._read_or_create_pickle_key_file(keyfile) + monkeypatch.setattr(os, 'write', orig_write) + + # The writer loop must have called os.write multiple times until + # all 32 bytes were durable. + assert write_count[0] >= 4, ( + 'expected the write loop to retry; got %d calls' % write_count[0] + ) + on_disk_len = op.getsize(keyfile) + assert on_disk_len == len(key) == 32, ( + 'on-disk key (%d bytes) must match returned key (%d bytes)' + % (on_disk_len, len(key)) + ) + + +def test_v14_zero_return_from_os_write_triggers_cleanup( + tmp_cache_dir, clear_env, monkeypatch +): + """V14: a pathological 0-byte write loops forever in the naive + implementation; the fix raises OSError after the first such call + and the finally cleans up the file.""" + from diskcache import core as dc_core + + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + + def zero_write(fd, data): + return 0 + + monkeypatch.setattr(os, 'write', zero_write) + with pytest.raises(OSError): + dc_core._read_or_create_pickle_key_file(keyfile) + assert not op.exists(keyfile) + + +def test_v17_keyboard_interrupt_during_fsync_keeps_durable_key( + tmp_cache_dir, clear_env, monkeypatch +): + """V17: once ``os.write`` has fully landed the key on disk, + concurrent readers can already see it. A KeyboardInterrupt + during the subsequent fsync must NOT unlink the file -- doing so + would strand readers using a key that no longer exists on disk.""" + from diskcache import core as dc_core + + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + + def kbd_during_fsync(fd): + raise KeyboardInterrupt('simulated Ctrl+C during fsync') + + monkeypatch.setattr(os, 'fsync', kbd_during_fsync) + with pytest.raises(KeyboardInterrupt): + dc_core._read_or_create_pickle_key_file(keyfile) + monkeypatch.undo() + + assert op.exists(keyfile), ( + 'fsync interrupt after successful os.write must NOT unlink ' + 'the key (the bytes are already visible to readers).' + ) + assert op.getsize(keyfile) == 32 + + +def test_v18_oserror_on_realpath_propagates_when_not_path_failure( + tmp_cache_dir, clear_env, monkeypatch +): + """V18: PermissionError / TimeoutError from realpath are real OS + errors, not tampering signals. They must propagate unchanged.""" + from diskcache import core as dc_core + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + disk = cache._disk + + def perm_error(*args, **kwargs): + raise PermissionError(13, 'Permission denied (simulated)') + + monkeypatch.setattr(dc_core.op, 'realpath', perm_error) + with pytest.raises(PermissionError): + disk._safe_filename_path('ab/cd.val') + + +def test_v15_copy_preserves_resolved_key_across_env_change( + tmp_cache_dir, clear_env, monkeypatch +): + """V15: if the original Cache resolved its key from the env var + or in-dir file, ``copy.copy`` must propagate the resolved bytes + so an intervening env change does not produce a copy that uses a + different key.""" + import copy + + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + cache[('complex', 'key')] = {'v': 1} + # Change the env var BEFORE copying + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + shallow = copy.copy(cache) + try: + # The copy must read the same value that the original + # wrote (resolved key was preserved). + assert shallow[('complex', 'key')] == {'v': 1} + finally: + shallow.close() + + +def test_v16_fanout_child_cache_inherits_pickle_key( + tmp_cache_dir, clear_env +): + """V16: ``FanoutCache.cache(name)`` must forward + ``disk_pickle_key`` to the child Cache. Otherwise the child + auto-generates a different key and its data is unreadable when + the user re-opens with the parent's key.""" + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache( + tmp_cache_dir, shards=2, disk_pickle_key=key + ) as fc: + child = fc.cache('child') + assert child._disk._pickle_key_arg == key + child[('complex',)] = {'v': 1} + # No per-child key file should exist since the key was + # inherited. + child_dir = op.join(tmp_cache_dir, 'cache', 'child') + assert not op.exists(op.join(child_dir, PICKLE_KEY_FILENAME)) + + +def test_v16_fanout_child_deque_inherits_pickle_key( + tmp_cache_dir, clear_env +): + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache( + tmp_cache_dir, shards=2, disk_pickle_key=key + ) as fc: + d = fc.deque('mydeque') + assert d._cache._disk._pickle_key_arg == key + d.append({'v': 1}) + assert d.pop() == {'v': 1} + + +def test_v16_fanout_child_index_inherits_pickle_key( + tmp_cache_dir, clear_env +): + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache( + tmp_cache_dir, shards=2, disk_pickle_key=key + ) as fc: + idx = fc.index('myindex') + assert idx._cache._disk._pickle_key_arg == key + idx['k'] = {'v': 1} + assert idx['k'] == {'v': 1} + + +def test_v19_fanoutcache_copy_preserves_explicit_key( + tmp_cache_dir, clear_env +): + """V19: same-process ``copy.copy(fanout)`` must preserve an + explicit key just like Cache does.""" + import copy + + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache( + tmp_cache_dir, shards=2, disk_pickle_key=key + ) as fc: + fc[('complex',)] = {'v': 1} + shallow = copy.copy(fc) + try: + assert shallow[('complex',)] == {'v': 1} + finally: + shallow.close() + + +def test_v19_deque_copy_preserves_explicit_key(tmp_cache_dir, clear_env): + import copy + + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + cache = dc.Cache(tmp_cache_dir, disk_pickle_key=key) + try: + deque = dc.Deque.fromcache(cache, [{'item': 1}, {'item': 2}]) + shallow = copy.copy(deque) + try: + assert list(shallow) == [{'item': 1}, {'item': 2}] + finally: + shallow._cache.close() + finally: + cache.close() + + +def test_v19_index_copy_preserves_explicit_key(tmp_cache_dir, clear_env): + import copy + + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + cache = dc.Cache(tmp_cache_dir, disk_pickle_key=key) + try: + idx = dc.Index.fromcache(cache, {'k': {'v': 1}}) + shallow = copy.copy(idx) + try: + assert shallow['k'] == {'v': 1} + finally: + shallow._cache.close() + finally: + cache.close() + + +def test_v21_legacy_error_mentions_disk_pickle_key( + tmp_cache_dir, clear_env +): + """V21: the error must point users to ``disk_pickle_key=False`` + (the kwarg on Cache / FanoutCache / Django OPTIONS), not + ``pickle_key=False`` (only valid on the Disk constructor).""" + # Write raw legacy pickle directly + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=False) as cache: + cache['k'] = {'legacy': True} + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache( + tmp_cache_dir, disk_pickle_key=secrets.token_bytes(32) + ) as cache: + with pytest.raises(pickle.UnpicklingError) as exc_info: + cache['k'] + msg = str(exc_info.value) + assert 'disk_pickle_key' in msg, ( + 'Error must reference the public kwarg name disk_pickle_key ' + '(not the Disk-only pickle_key); got: %r' % msg + ) From ae177569be766ecb685e1f3b4e79f26bef47875b Mon Sep 17 00:00:00 2001 From: Yeshwanth Nagaraj Date: Thu, 7 May 2026 19:34:47 +0530 Subject: [PATCH 5/6] Address pass-5 audit: 8 fixes (O1-O4 + Y1-Y3 + L1) Fixes the eight issues raised by the pass-5 reviewer swarm; all tightly inter-related so they ship in a single commit. 249 pre-existing tests still pass; 11 new regression tests cover the original bugs. == O1 + O4 (linked) -- inherited HMAC key channel == Pre-fix, default-mode FanoutCache children and copy.copy() targets re-resolved the pickle key from env / file every time, so a change to DISKCACHE_PICKLE_KEY between the original and the copy silently produced a copy with a different HMAC key (O1). And FanoutCache._inject_disk_pickle_key forwarded the resolved root key as the public ``disk_pickle_key=`` kwarg, which made every child Cache look user-explicit and refuse pickling (O4) -- defeating Cache.__getstate__'s intended exemption for default-mode caches. Introduces a private ``_disk_pickle_key_inherited`` kwarg on ``Cache.__init__`` (and FanoutCache.__init__) that: * Forwards an already-resolved key to a child / copy WITHOUT setting ``_disk._pickle_key_arg`` (so __getstate__ still treats the instance as default-mode and pickling remains allowed). * Skips env / file resolution and skips the warning (the upstream context already warned). * Is consumed by Cache._copy_for_same_process, FanoutCache._copy_for_same_process, FanoutCache._inject_disk_pickle_key, Deque._copy_for_same_process, and Index._copy_for_same_process. H2 protection preserved: explicit ``disk_pickle_key=`` (bytes / False) still flows through the public kwarg, sets ``_pickle_key_arg``, and __getstate__ refuses pickling. == O2 -- FanoutCache children get-or-create race == Two threads racing on ``fc.cache('foo')`` could both miss the dict, both construct a Cache on the same SQLite file, and the second store would overwrite the first -- leaving the first caller holding an orphan with an open connection that nobody else could close. Adds ``self._children_lock = threading.Lock()`` and wraps ``cache()``, ``deque()``, ``index()`` in double-checked locking. == O3 -- FanoutCache.close() leaks cached children == Pre-fix, ``close()`` only closed the shards and cleared the dicts; long-lived references via ``fc.cache('foo')`` continued to hold open SQLite handles. Iterates ``_caches`` / ``_deques`` / ``_indexes`` under the new lock, calls ``.close()`` (suppressing exceptions), clears, then closes shards. == Y1 -- KeyboardInterrupt window in pickle key file write == If ``KeyboardInterrupt`` / ``SystemExit`` fires AFTER ``os.write`` deposited the full key in the kernel but BEFORE ``write_succeeded = True`` ran, the finally block would unlink the file -- but concurrent readers can already see those bytes. Now the finally block re-reads the file; if the on-disk content matches the key we just generated, the write actually completed and we preserve the file. == Y2 -- ELOOP not translated to ValueError == A symlink loop inside the cache directory surfaced as a generic OSError from ``_safe_filename_path`` instead of ValueError, masking a tampered cache.db that pointed at a self-referential chain. Adds ``errno.ELOOP`` translation alongside the Windows winerror handling. == Y3 -- Wrong kwarg name in error wording == ``Cache(td, disk_pickle_key='not-hex')`` raised ``"pickle_key argument must be hex-encoded bytes"`` -- the user-facing kwarg is ``disk_pickle_key``, not ``pickle_key``. Cache.__init__ and FanoutCache.__init__ now pre-validate user-explicit keys with ``source='disk_pickle_key argument'``. Two adjacent operator-facing messages also updated to use the public kwarg name (RuntimeError "too short after retries" guidance and the disabled-mode warning). == L1 -- pylint W0707 + flake8 E305 == * core.py: the "too short after retries" RuntimeError gains ``raise ... from None`` to suppress the chained FileExistsError bookkeeping context. * core.py: extra blank line before ``MODE_NONE = 0``. == Tests added (11) == * test_o1_default_mode_cache_copy_survives_env_change * test_o1_default_mode_fanout_copy_survives_env_change * test_o1_default_mode_deque_copy_survives_env_change * test_o1_default_mode_index_copy_survives_env_change * test_o2_fanout_concurrent_cache_no_duplicate * test_o3_fanout_close_closes_children * test_o4_default_mode_child_cache_pickleable * test_o4_explicit_mode_child_cache_unpickleable * test_y1_keyboard_interrupt_after_full_write_preserves_file * test_y2_eloop_translates_to_value_error (skipped on Windows) * test_y3_error_message_uses_disk_pickle_key_kwarg_name CVE-2025-69872 / GHSA-w8v5-vhqr-4h9v. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- diskcache/core.py | 113 ++++++++++++--- diskcache/fanout.py | 158 ++++++++++++++++----- diskcache/persistent.py | 26 +++- tests/test_security.py | 306 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 544 insertions(+), 59 deletions(-) diff --git a/diskcache/core.py b/diskcache/core.py index 8ecf7fa..7ea177e 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -175,12 +175,16 @@ def _read_or_create_pickle_key_file(path): delay *= 2 continue + # ``from None`` suppresses the implicit ``FileExistsError`` + # context (pylint W0707): the chained exception is just the + # race-loop bookkeeping, not a real cause that operators need + # to debug. raise RuntimeError( 'diskcache: pickle key file %r is too short (%d < %d) ' 'after %d retries; it may be corrupted -- remove it to ' - 'regenerate, or pass pickle_key= explicitly.' + 'regenerate, or pass disk_pickle_key= explicitly.' % (path, last_short_len, PICKLE_KEY_MIN_LEN, attempt + 1) - ) + ) from None # We won the race. Generate and write the key. Use try/finally # so we clean up the half-baked file even on KeyboardInterrupt / @@ -218,8 +222,27 @@ def _read_or_create_pickle_key_file(path): with cl.suppress(OSError): os.close(fd) if not write_succeeded: - with cl.suppress(OSError): - os.unlink(path) + # CVE-2025-69872 (Y1): an async exception + # (``KeyboardInterrupt`` / ``SystemExit``) may have + # fired AFTER ``os.write`` deposited the full key in + # the kernel but BEFORE ``write_succeeded = True`` ran. + # Concurrent readers can already see that key, so + # unlinking it here would trigger + # ``RuntimeError: too short`` for every future + # process and silently change the HMAC key in any + # reader still holding the old bytes. Re-read the + # file: if it matches the key we just generated, the + # write actually completed -- preserve it. + try: + with open(path, 'rb') as fh: + on_disk = fh.read() + if on_disk == new_key: + write_succeeded = True + except OSError: + pass + if not write_succeeded: + with cl.suppress(OSError): + os.unlink(path) with cl.suppress(OSError): os.chmod(path, 0o600) @@ -273,7 +296,7 @@ def _emit_pickle_key_warning(source, directory, stacklevel): if source == _PKEY_SRC_DISABLED: warnings.warn( 'diskcache: pickle HMAC verification is disabled ' - '(pickle_key=False). The cache directory must be fully ' + '(disk_pickle_key=False). The cache directory must be fully ' 'trusted; an attacker with write access can achieve ' 'arbitrary code execution via CVE-2025-69872.', UnsafePickleWarning, @@ -291,7 +314,7 @@ def _emit_pickle_key_warning(source, directory, stacklevel): 'directory before the legitimate process initializes (and ' 'thereby controls the key). For production use set the %s ' 'environment variable to a hex-encoded random key ' - '(>= %d bytes), or pass pickle_key=... explicitly.' + '(>= %d bytes), or pass disk_pickle_key=... explicitly.' % (key_path, PICKLE_KEY_ENV, PICKLE_KEY_MIN_LEN), UnsafePickleWarning, stacklevel=stacklevel, @@ -299,6 +322,7 @@ def _emit_pickle_key_warning(source, directory, stacklevel): return True return False + MODE_NONE = 0 MODE_RAW = 1 MODE_BINARY = 2 @@ -684,6 +708,17 @@ def _safe_filename_path(self, filename): 'cache.db may be tampered)' % (filename, exc) ) from exc except OSError as exc: + # CVE-2025-69872 (Y2): a symlink loop (errno ELOOP) inside + # the cache directory surfaces as a generic ``OSError`` from + # ``os.path.realpath``. Without translation the caller + # would see an opaque OSError instead of the path-traversal + # signal it expects, masking a tampered cache.db that + # points at a self-referential symlink chain. + if getattr(exc, 'errno', None) == errno.ELOOP: + raise ValueError( + 'diskcache: cannot resolve cache filename %r (%s; ' + 'cache.db may be tampered)' % (filename, exc) + ) from exc # Windows-only: UNC / drive-not-found surface as bare # OSError or WinError 53 / 67 / 87 / 123 / 161 (path/name # invalid for filesystem use). Treat as malformed @@ -927,10 +962,33 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): # CVE-2025-69872: ``disk_pickle_key`` is intentionally treated as # constructor-only configuration. Persisting it in the Settings # table would either leak the secret into the same compromised - # cache directory or let an attacker make ``pickle_key=False`` + # cache directory or let an attacker make ``disk_pickle_key=False`` # sticky across restarts via ``cache.reset``. Pop it before any # of the regular settings plumbing runs. pickle_key = settings.pop('disk_pickle_key', _PICKLE_KEY_UNSET) + # CVE-2025-69872 (O1+O4 link): ``_disk_pickle_key_inherited`` is + # a private kwarg used by :class:`FanoutCache` and the + # ``_copy_for_same_process`` helpers to forward an + # already-resolved key (env var, on-disk file, parent shard) + # WITHOUT marking the child as user-explicit. When set, the + # child Cache stays pickleable in default mode (its + # ``_pickle_key_arg`` remains :data:`_PICKLE_KEY_UNSET`) but + # uses the parent's resolved key so sharding/copying remain + # deterministic across env/file changes. + inherited_key = settings.pop('_disk_pickle_key_inherited', None) + # CVE-2025-69872 (Y3): pre-validate the user-explicit key with + # the public kwarg name in the error message. Without this + # the failure surfaces from Disk's own validation as + # "pickle_key argument" -- confusing for users who only + # passed ``disk_pickle_key=``. + if ( + pickle_key is not _PICKLE_KEY_UNSET + and pickle_key is not None + and pickle_key is not False + ): + pickle_key = _coerce_pickle_key( + pickle_key, source='disk_pickle_key argument' + ) if directory is None: directory = tempfile.mkdtemp(prefix='diskcache-') @@ -1002,10 +1060,25 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): # it -- but Settings tables created by older patched builds or # by hand could in principle contain one). kwargs.pop('pickle_key', None) + kwargs.pop('_disk_pickle_key_inherited', None) if pickle_key is not _PICKLE_KEY_UNSET: kwargs['pickle_key'] = pickle_key self._disk = disk(directory, **kwargs) + # CVE-2025-69872 (O4): apply an inherited key (from a + # FanoutCache parent or ``_copy_for_same_process``) AFTER disk + # construction so it shows up only as ``_pickle_key_resolved`` + # -- never as ``_pickle_key_arg``. This keeps default-mode + # child Caches and copies pickleable while still forcing them + # to use the same HMAC key as the parent (otherwise sharded + # and copied caches would diverge from the original key). + if ( + inherited_key is not None + and pickle_key is _PICKLE_KEY_UNSET + ): + self._disk._pickle_key_resolved = inherited_key + self._disk._pickle_key_warned = True + # Set cached attributes: updates settings and sets pragmas. for key, value in sets.items(): @@ -2932,24 +3005,26 @@ def _copy_for_same_process(self): # Forward bytes / False explicitly so the new instance has # the same security posture as the original. kwargs['disk_pickle_key'] = arg + # CVE-2025-69872 V15 / O1: if the original lazily resolved its + # HMAC key from env / file, propagate the resolved bytes via + # the private ``_disk_pickle_key_inherited`` channel so an + # intervening change to ``DISKCACHE_PICKLE_KEY`` or + # ``.diskcache_pickle_key`` doesn't produce a copy with a + # different key (which would silently fail HMAC verification). + # Inherited keys leave ``_pickle_key_arg`` UNSET so the copy + # remains pickleable in default mode. + if ( + arg is _PICKLE_KEY_UNSET + and getattr(disk, '_pickle_key_resolved', None) is not None + and disk._pickle_key_resolved is not False + ): + kwargs['_disk_pickle_key_inherited'] = disk._pickle_key_resolved new = self.__class__( self.directory, timeout=self.timeout, disk=type(self.disk), **kwargs, ) - # CVE-2025-69872 V15: if the original has already lazily - # resolved its HMAC key from env / file, propagate the - # resolved bytes to the copy so an intervening change to - # ``DISKCACHE_PICKLE_KEY`` or ``.diskcache_pickle_key`` does - # not produce a copy that uses a different key than the - # original (which would silently fail HMAC verification). - if ( - arg is _PICKLE_KEY_UNSET - and getattr(disk, '_pickle_key_resolved', None) is not None - ): - new._disk._pickle_key_resolved = disk._pickle_key_resolved - new._disk._pickle_key_warned = True return new def reset(self, key, value=ENOVAL, update=True): diff --git a/diskcache/fanout.py b/diskcache/fanout.py index 5514447..0e54eaa 100644 --- a/diskcache/fanout.py +++ b/diskcache/fanout.py @@ -8,6 +8,7 @@ import os.path as op import sqlite3 import tempfile +import threading import time from .core import ( @@ -66,13 +67,37 @@ def __init__( # emit an UnsafePickleWarning even though the cache will never # exercise the pickle path. pickle_key_arg = settings.pop('disk_pickle_key', _PICKLE_KEY_UNSET) + # CVE-2025-69872 (O1+O4 link): private inherited-key channel + # used by ``_copy_for_same_process`` (and by Cache children + # constructed under another FanoutCache). When set we skip + # env/file resolution, skip the warning (the upstream context + # already warned), and leave ``_pickle_key_user_arg`` UNSET so + # the FanoutCache stays pickleable in default mode. + inherited_key = settings.pop('_disk_pickle_key_inherited', None) # V1: remember the user's original argument so __getstate__ # only refuses pickling when the user explicitly provided a # secret. Auto-generated / env-var keys can be re-resolved in # the receiving process, so default-mode FanoutCache pickling # remains supported. self._pickle_key_user_arg = pickle_key_arg - if getattr(disk, '_uses_pickle', True): + # CVE-2025-69872 (Y3): pre-validate the user-explicit key with + # the public kwarg name in the error message. + if ( + pickle_key_arg is not _PICKLE_KEY_UNSET + and pickle_key_arg is not None + and pickle_key_arg is not False + ): + from .core import _coerce_pickle_key + + pickle_key_arg = _coerce_pickle_key( + pickle_key_arg, source='disk_pickle_key argument' + ) + if inherited_key is not None and pickle_key_arg is _PICKLE_KEY_UNSET: + # Use the inherited key directly; do NOT resolve from env + # or file and do NOT warn -- the parent context already + # handled both. + resolved_key = inherited_key + elif getattr(disk, '_uses_pickle', True): if not op.isdir(directory): os.makedirs(directory, 0o755, exist_ok=True) resolved_key, source = _resolve_pickle_key_for_directory( @@ -89,8 +114,23 @@ def __init__( self._directory = directory self._disk = disk shard_kwargs = dict(settings) - if resolved_key is not _PICKLE_KEY_UNSET: - shard_kwargs['disk_pickle_key'] = resolved_key + if pickle_key_arg is not _PICKLE_KEY_UNSET and pickle_key_arg is not None: + # User-explicit (bytes / False): forward as the public + # kwarg so the shard's ``_pickle_key_arg`` reflects the + # explicit choice (and __getstate__ refuses pickling). + shard_kwargs['disk_pickle_key'] = ( + resolved_key if resolved_key is not _PICKLE_KEY_UNSET + else pickle_key_arg + ) + elif ( + resolved_key is not _PICKLE_KEY_UNSET + and resolved_key is not False + and resolved_key is not None + ): + # Default / env / file: forward via the inherited channel + # so each shard uses the same bytes without claiming to be + # user-explicit (default-mode pickling remains allowed). + shard_kwargs['_disk_pickle_key_inherited'] = resolved_key self._shards = tuple( Cache( directory=op.join(directory, '%03d' % num), @@ -109,6 +149,12 @@ def __init__( self._caches = {} self._deques = {} self._indexes = {} + # CVE-2025-69872 (O2): serialize child get-or-create so two + # concurrent ``cache('foo')`` / ``deque('foo')`` / ``index('foo')`` + # calls cannot construct two Cache instances on the same + # SQLite database (which would race on initial schema setup + # and hold separate connection pools). + self._children_lock = threading.Lock() @property def directory(self): @@ -569,11 +615,27 @@ def volume(self): def close(self): """Close database connection.""" + # CVE-2025-69872 (O3): close cached child Cache/Deque/Index + # instances BEFORE the shards so their SQLite connections + # release file handles cleanly (otherwise long-lived + # references via ``fc.cache('foo')`` keep their connections + # open after ``fc.close()``, leaking handles and quietly + # accepting writes against a "closed" FanoutCache). + with self._children_lock: + for child in list(self._caches.values()): + with cl.suppress(Exception): + child.close() + for child in list(self._deques.values()): + with cl.suppress(Exception): + child._cache.close() + for child in list(self._indexes.values()): + with cl.suppress(Exception): + child._cache.close() + self._caches.clear() + self._deques.clear() + self._indexes.clear() for shard in self._shards: shard.close() - self._caches.clear() - self._deques.clear() - self._indexes.clear() def __enter__(self): return self @@ -668,14 +730,14 @@ def cache(self, name, timeout=60, disk=None, **settings): :return: Cache with given name """ - _caches = self._caches - - try: - return _caches[name] - except KeyError: + # CVE-2025-69872 (O2): double-checked-locking get-or-create. + with self._children_lock: + existing = self._caches.get(name) + if existing is not None: + return existing parts = name.split('/') directory = op.join(self._directory, 'cache', *parts) - # CVE-2025-69872 V16: forward the FanoutCache's resolved + # CVE-2025-69872 V16/O4: forward the FanoutCache's resolved # pickle key to the child cache so it doesn't auto- # generate its own (different) ``.diskcache_pickle_key`` # under the child directory. Caller-supplied settings @@ -687,7 +749,7 @@ def cache(self, name, timeout=60, disk=None, **settings): disk=self._disk if disk is None else Disk, **settings, ) - _caches[name] = temp + self._caches[name] = temp return temp def deque(self, name, maxlen=None): @@ -708,11 +770,10 @@ def deque(self, name, maxlen=None): :return: Deque with given name """ - _deques = self._deques - - try: - return _deques[name] - except KeyError: + with self._children_lock: + existing = self._deques.get(name) + if existing is not None: + return existing parts = name.split('/') directory = op.join(self._directory, 'deque', *parts) child_kwargs = {} @@ -724,7 +785,7 @@ def deque(self, name, maxlen=None): **child_kwargs, ) deque = Deque.fromcache(cache, maxlen=maxlen) - _deques[name] = deque + self._deques[name] = deque return deque def index(self, name): @@ -747,11 +808,10 @@ def index(self, name): :return: Index with given name """ - _indexes = self._indexes - - try: - return _indexes[name] - except KeyError: + with self._children_lock: + existing = self._indexes.get(name) + if existing is not None: + return existing parts = name.split('/') directory = op.join(self._directory, 'index', *parts) child_kwargs = {} @@ -763,32 +823,48 @@ def index(self, name): **child_kwargs, ) index = Index.fromcache(cache) - _indexes[name] = index + self._indexes[name] = index return index def _inject_disk_pickle_key(self, kwargs): - """CVE-2025-69872 V16 helper: forward the FanoutCache's + """CVE-2025-69872 V16/O4 helper: forward the FanoutCache's resolved pickle key to a child Cache constructor's kwargs so the child does not run an independent default-fallback resolution (which would create a separate ``.diskcache_pickle_key`` file under the child directory and, critically, would use a *different* HMAC key than the parent and its sibling shards). + + Default-mode keys are forwarded via ``_disk_pickle_key_inherited`` + so the child's ``_pickle_key_arg`` stays UNSET -- otherwise + ``Cache.__getstate__`` would refuse to pickle a default-mode + child (the resolved key isn't a user-supplied secret, just an + env/file value the child can't decide for itself). """ - if 'disk_pickle_key' in kwargs: + if ( + 'disk_pickle_key' in kwargs + or '_disk_pickle_key_inherited' in kwargs + ): return # caller-supplied wins - # Prefer the user's explicit argument (so the child uses - # exactly the same secret). Fall back to the resolved key on - # shard 0 (which the FanoutCache resolved at construction - # time and shared with every shard). + # User-explicit (bytes / False): forward as the public kwarg + # so the child's __getstate__ refuses pickling, matching the + # parent's posture. arg = self._pickle_key_user_arg if arg is not _PICKLE_KEY_UNSET and arg is not None: kwargs['disk_pickle_key'] = arg return + # Default mode: forward the already-resolved bytes via the + # inherited channel so the child remains pickleable but uses + # the same key as the parent and its sibling shards. if self._shards: - shard_arg = self._shards[0]._disk._pickle_key_arg + shard_disk = self._shards[0]._disk + shard_arg = shard_disk._pickle_key_arg if shard_arg is not _PICKLE_KEY_UNSET and shard_arg is not None: kwargs['disk_pickle_key'] = shard_arg + return + resolved = getattr(shard_disk, '_pickle_key_resolved', None) + if resolved is not None and resolved is not False: + kwargs['_disk_pickle_key_inherited'] = resolved def __copy__(self): return self._copy_for_same_process() @@ -797,14 +873,24 @@ def __deepcopy__(self, memo): return self._copy_for_same_process() def _copy_for_same_process(self): - # CVE-2025-69872 V19: same-process copy preserves the user's - # explicit pickle_key (or False) so a copied FanoutCache stays - # in the same security mode as the original. Default-mode - # FanoutCaches re-resolve via env / file in the new instance. + # CVE-2025-69872 V19/O1: same-process copy preserves the + # user's explicit pickle_key (or False) so a copied + # FanoutCache stays in the same security mode as the + # original. Default-mode FanoutCaches forward the already- + # resolved key via the inherited channel so an intervening + # change to ``DISKCACHE_PICKLE_KEY`` or + # ``.diskcache_pickle_key`` does NOT produce a copy that + # uses a different key (which would silently fail HMAC + # verification on shared data). kwargs = {} arg = self._pickle_key_user_arg if arg is not _PICKLE_KEY_UNSET: kwargs['disk_pickle_key'] = arg + elif self._shards: + shard_disk = self._shards[0]._disk + resolved = getattr(shard_disk, '_pickle_key_resolved', None) + if resolved is not None and resolved is not False: + kwargs['_disk_pickle_key_inherited'] = resolved return self.__class__( self._directory, shards=self._count, diff --git a/diskcache/persistent.py b/diskcache/persistent.py index 8d710f3..03b6696 100644 --- a/diskcache/persistent.py +++ b/diskcache/persistent.py @@ -357,14 +357,23 @@ def __deepcopy__(self, memo): return self._copy_for_same_process() def _copy_for_same_process(self): - # CVE-2025-69872 V19: same-process Deque copy preserves the + # CVE-2025-69872 V19/O1: same-process Deque copy preserves the # underlying Cache's explicit pickle_key so the copy stays - # readable. Default-mode caches re-resolve from env / file. + # readable. Default-mode caches forward the already-resolved + # key via the private inherited channel so env / file changes + # between calls don't produce a copy that disagrees with the + # original on the HMAC key. cache = self._cache arg = getattr(cache._disk, '_pickle_key_arg', _PICKLE_KEY_UNSET) if arg is not _PICKLE_KEY_UNSET and arg is not None: new_cache = Cache(self.directory, disk_pickle_key=arg) return type(self).fromcache(new_cache, maxlen=self.maxlen) + resolved = getattr(cache._disk, '_pickle_key_resolved', None) + if resolved is not None and resolved is not False: + new_cache = Cache( + self.directory, _disk_pickle_key_inherited=resolved + ) + return type(self).fromcache(new_cache, maxlen=self.maxlen) return type(self)(directory=self.directory, maxlen=self.maxlen) def append(self, value): @@ -1145,13 +1154,22 @@ def __deepcopy__(self, memo): return self._copy_for_same_process() def _copy_for_same_process(self): - # CVE-2025-69872 V19: same-process Index copy preserves the - # underlying Cache's explicit pickle_key. + # CVE-2025-69872 V19/O1: same-process Index copy preserves the + # underlying Cache's explicit pickle_key. Default-mode caches + # forward the already-resolved key via the private inherited + # channel so env / file changes between calls don't produce a + # copy that disagrees with the original on the HMAC key. cache = self._cache arg = getattr(cache._disk, '_pickle_key_arg', _PICKLE_KEY_UNSET) if arg is not _PICKLE_KEY_UNSET and arg is not None: new_cache = Cache(self.directory, disk_pickle_key=arg) return type(self).fromcache(new_cache) + resolved = getattr(cache._disk, '_pickle_key_resolved', None) + if resolved is not None and resolved is not False: + new_cache = Cache( + self.directory, _disk_pickle_key_inherited=resolved + ) + return type(self).fromcache(new_cache) return type(self)(self.directory) def __eq__(self, other): diff --git a/tests/test_security.py b/tests/test_security.py index 52b48bc..6fd81dc 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -1524,3 +1524,309 @@ def test_v21_legacy_error_mentions_disk_pickle_key( 'Error must reference the public kwarg name disk_pickle_key ' '(not the Disk-only pickle_key); got: %r' % msg ) + + +# -- Pass-5: O1 default-mode copies survive env changes ---------------- + + +def test_o1_default_mode_cache_copy_survives_env_change( + tmp_cache_dir, monkeypatch +): + """O1 (Cache): copy.copy(cache) in default mode must keep using the + original key even if DISKCACHE_PICKLE_KEY is replaced before the + copy is constructed.""" + import copy + + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + cache['k'] = {'v': 1} + assert cache['k'] == {'v': 1} # force lazy resolution + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + shallow = copy.copy(cache) + try: + assert shallow['k'] == {'v': 1} + finally: + shallow.close() + + +def test_o1_default_mode_fanout_copy_survives_env_change( + tmp_cache_dir, monkeypatch +): + """O1 (FanoutCache): copy.copy(fc) must keep using the original key + even if DISKCACHE_PICKLE_KEY is replaced before the copy is made. + Without the inherited-key propagation the copy would re-resolve + from the (now different) env var and silently fail HMAC.""" + import copy + + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache(tmp_cache_dir, shards=4) as fc: + fc['k'] = {'v': 1} + assert fc['k'] == {'v': 1} + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + shallow = copy.copy(fc) + try: + assert shallow['k'] == {'v': 1} + finally: + shallow.close() + + +def test_o1_default_mode_deque_copy_survives_env_change( + tmp_cache_dir, monkeypatch +): + """O1 (Deque): copy.copy(deque) must keep the original key.""" + import copy + + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + deque = dc.Deque(directory=tmp_cache_dir) + try: + deque.append({'v': 1}) + assert list(deque) == [{'v': 1}] + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + shallow = copy.copy(deque) + try: + assert list(shallow) == [{'v': 1}] + finally: + shallow._cache.close() + finally: + deque._cache.close() + + +def test_o1_default_mode_index_copy_survives_env_change( + tmp_cache_dir, monkeypatch +): + """O1 (Index): copy.copy(index) must keep the original key.""" + import copy + + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + idx = dc.Index(tmp_cache_dir) + try: + idx['k'] = {'v': 1} + assert idx['k'] == {'v': 1} + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + shallow = copy.copy(idx) + try: + assert shallow['k'] == {'v': 1} + finally: + shallow._cache.close() + finally: + idx._cache.close() + + +# -- Pass-5: O2 FanoutCache children get-or-create lock ---------------- + + +def test_o2_fanout_concurrent_cache_no_duplicate(tmp_cache_dir, clear_env): + """O2: two threads calling fc.cache('foo') simultaneously must get + the SAME instance. Without the lock, both can miss the dict, both + construct, the second store overwrites the first, and the first + caller is left with an orphan Cache (open SQLite handle, no + references in fc._caches).""" + import threading + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache(tmp_cache_dir, shards=2) as fc: + results = [] + barrier = threading.Barrier(8) + + def grab(): + barrier.wait() + results.append(fc.cache('shared')) + + threads = [threading.Thread(target=grab) for _ in range(8)] + for t in threads: + t.start() + for t in threads: + t.join() + + first = results[0] + assert all(r is first for r in results), ( + 'All threads must see the same cached child instance' + ) + assert len(fc._caches) == 1 + + +# -- Pass-5: O3 close() closes cached children ------------------------- + + +def test_o3_fanout_close_closes_children(tmp_cache_dir, clear_env): + """O3: ``fc.close()`` must close cached child Cache/Deque/Index + instances. Otherwise long-lived references leak SQLite handles. + + We can't assert ``set()`` raises post-close because :meth:`Cache.set` + lazily reopens its thread-local connection. Instead assert the + invariants close() must establish: the per-thread SQLite + connection on each child is gone, and the FanoutCache no longer + holds references in its caches/deques/indexes dicts. + """ + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + fc = dc.FanoutCache(tmp_cache_dir, shards=2) + child_cache = fc.cache('child_cache') + child_deque = fc.deque('child_deque') + child_index = fc.index('child_index') + # Force connection creation in this thread. + child_cache['k'] = 1 + child_deque.append('x') + child_index['a'] = 1 + # Sanity: connections currently exist. + assert getattr(child_cache._local, 'con', None) is not None + assert getattr(child_deque._cache._local, 'con', None) is not None + assert getattr(child_index._cache._local, 'con', None) is not None + + fc.close() + + # Each cached child must have had its connection closed. + assert getattr(child_cache._local, 'con', None) is None, ( + 'O3: fc.close() did not close the cached child Cache' + ) + assert getattr(child_deque._cache._local, 'con', None) is None, ( + 'O3: fc.close() did not close the cached child Deque cache' + ) + assert getattr(child_index._cache._local, 'con', None) is None, ( + 'O3: fc.close() did not close the cached child Index cache' + ) + + # The dicts must be cleared so the parent can be safely + # reopened later without leaking the old children. + assert fc._caches == {} + assert fc._deques == {} + assert fc._indexes == {} + + +# -- Pass-5: O4 child Cache pickling depends on key source ------------- + + +def test_o4_default_mode_child_cache_pickleable(tmp_cache_dir, clear_env): + """O4: in default mode the FanoutCache and its child Cache must + be pickleable -- the inherited key isn't a user-supplied secret.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache(tmp_cache_dir, shards=2) as fc: + child = fc.cache('foo') + # The child must NOT have an explicit _pickle_key_arg. + from diskcache.core import _PICKLE_KEY_UNSET + assert child._disk._pickle_key_arg is _PICKLE_KEY_UNSET + # And it must be pickleable. + blob = pickle.dumps(child) + restored = pickle.loads(blob) + try: + assert restored.directory == child.directory + finally: + restored.close() + + +def test_o4_explicit_mode_child_cache_unpickleable( + tmp_cache_dir, clear_env +): + """O4: when the user passes an explicit ``disk_pickle_key`` to + FanoutCache, the child Cache inherits that explicit key and + pickling must be refused (mirrors the parent's posture).""" + secret = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache( + tmp_cache_dir, shards=2, disk_pickle_key=secret + ) as fc: + child = fc.cache('foo') + with pytest.raises(TypeError, match='disk_pickle_key'): + pickle.dumps(child) + + +# -- Pass-5: Y1 KeyboardInterrupt after full write preserves key file -- + + +def test_y1_keyboard_interrupt_after_full_write_preserves_file( + tmp_cache_dir, clear_env, monkeypatch +): + """Y1: if KeyboardInterrupt fires AFTER os.write has deposited the + full key in the kernel but BEFORE ``write_succeeded = True`` runs, + the finally block must NOT unlink the file -- concurrent readers + can already see that key bytes.""" + from diskcache import core as dc_core + + real_write = os.write + + def evil_write(fd, data): + n = real_write(fd, data) + # Simulate an async exception firing right after os.write + # returned the full count -- before the assignment below. + raise KeyboardInterrupt('simulated SIGINT') + + monkeypatch.setattr(dc_core.os, 'write', evil_write) + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + with pytest.raises(KeyboardInterrupt): + dc_core._read_or_create_pickle_key_file(keyfile) + # Restore real os.write for the assertion phase. + monkeypatch.setattr(dc_core.os, 'write', real_write) + + # File must still exist with a valid key (>= MIN_LEN bytes). + assert op.exists(keyfile), ( + 'Y1: file was unlinked even though os.write completed -- ' + 'concurrent readers may already hold this key.' + ) + with open(keyfile, 'rb') as fh: + on_disk = fh.read() + assert len(on_disk) >= dc_core.PICKLE_KEY_MIN_LEN + + +# -- Pass-5: Y2 ELOOP translates to ValueError ------------------------- + + +@pytest.mark.skipif( + os.name == 'nt', reason='symlink loops require admin on Windows' +) +def test_y2_eloop_translates_to_value_error(tmp_cache_dir, clear_env): + """Y2: a symlink loop inside the cache directory (errno ELOOP) + must surface as ValueError from _safe_filename_path, signalling + a tampered cache.db row -- not as an opaque OSError.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + disk = cache._disk + # Create a symlink loop a -> b -> a inside the cache dir. + a = op.join(tmp_cache_dir, 'loop_a') + b = op.join(tmp_cache_dir, 'loop_b') + try: + os.symlink(b, a) + os.symlink(a, b) + except (OSError, NotImplementedError) as exc: + pytest.skip('cannot create symlink loop: %s' % exc) + with pytest.raises(ValueError, match='cache.db may be tampered'): + disk._safe_filename_path('loop_a') + + +# -- Pass-5: Y3 error wording uses the public kwarg name -------------- + + +def test_y3_error_message_uses_disk_pickle_key_kwarg_name( + tmp_cache_dir, clear_env +): + """Y3: ``Cache(td, disk_pickle_key='not-hex')`` must raise with + the public kwarg name (``disk_pickle_key``) in the error so users + aren't confused by the underlying ``Disk(pickle_key=...)`` kwarg + they never typed.""" + with pytest.raises(ValueError) as exc_info: + dc.Cache(tmp_cache_dir, disk_pickle_key='not-hex') + msg = str(exc_info.value) + assert 'disk_pickle_key' in msg, ( + 'error must mention the public kwarg disk_pickle_key, got: %r' + % msg + ) + + # FanoutCache should also use the public name. + with pytest.raises(ValueError) as exc_info2: + dc.FanoutCache(tmp_cache_dir, disk_pickle_key='not-hex') + msg2 = str(exc_info2.value) + assert 'disk_pickle_key' in msg2, ( + 'FanoutCache error must mention disk_pickle_key, got: %r' % msg2 + ) + From f98f2183b4261a0e0c31f88f2e157eaba4a4071e Mon Sep 17 00:00:00 2001 From: Yeshwanth Nagaraj Date: Thu, 7 May 2026 21:29:17 +0530 Subject: [PATCH 6/6] Address pass-6 audit: 4 fixes (L1 + C1 + L2 + C2 tests) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the four confirmed/likely findings from the pass-6 21-agent multi-model review. L1 (HIGH) — child cache key recoverability after pickle ======================================================= ``FanoutCache.cache(name)`` returns a child Cache that inherits the parent's HMAC key via the private ``_disk_pickle_key_inherited`` kwarg, but the child directory previously contained no ``.diskcache_pickle_key`` file (the key lived only at the FanoutCache root). ``pickle.dumps(child)`` succeeded, but a receiving process opening ``Cache(child_dir)`` would auto-generate a new random key and fail HMAC verification on the first read. Fix: a new module-level helper ``_write_inherited_pickle_key_file`` atomically writes the inherited key to ``/ .diskcache_pickle_key`` so the receiver can recover it via the normal default-resolution path. The write is gated on ``isinstance(inherited_key, (bytes, bytearray)) and len(...) >= PICKLE_KEY_MIN_LEN`` so legacy mode (``False``) and explicit-key mode do NOT leak a secret to disk -- only the inherited / default-resolved key path persists. As a side effect FanoutCache shards now also mirror the root key file (verified by the updated ``test_fanoutcache_default_emits_one_warning`` test). C1 (HIGH) — FanoutCache.close() fences subsequent use ===================================================== After ``fc.close()``, calling ``fc.cache('new')`` previously succeeded and constructed a fresh child against the just-closed shards, silently leaking handles. Now FanoutCache tracks ``self._closed`` (initialized False, set True under ``_children_lock`` inside ``close()``); ``cache()``, ``deque()``, and ``index()`` raise ``RuntimeError`` if invoked after close. ``Cache.close()`` semantics are unchanged -- existing child references obtained before close remain usable per the documented per-thread connection behavior. ``close()`` remains idempotent. L2 (MEDIUM) — Y1 finally cleanup uses size+content check ======================================================== The original Y1 finally block re-read the file after a failed write and unlinked unconditionally on read failure. This could strand concurrent readers if the read failed for transient reasons. The finally block now uses file size as the primary signal: * ``file_size < len(new_key)`` -> definitively partial, unlink. * ``file_size == len(new_key)`` -> verify content matches our generated key; if so claim success (preserve). * ``file_size > len(new_key)`` or unknown -> leave alone (someone else owns this file now, or we cannot determine state). This biases toward false-positive preserve over false-negative unlink, matching the stated invariant: stranding readers who already hold a valid key is worse than leaving an empty / oversize file (subsequent processes ride out the empty-read backoff above). C2 — Test improvements ====================== * ``test_o2_fanout_concurrent_cache_no_duplicate``: replaced the Barrier-only version with a deterministic version that monkey-patches ``Cache.__init__`` to sleep 0.2s during child construction, forcing concurrent threads to genuinely race the get-or-create. Asserts elapsed < 1.0s (lock serializes; without the lock duplicate construction would double). * ``test_o3_fanout_close_closes_children``: replaced ``_local.con``-private-state asserts with a behavioral check that opens each child's SQLite database with a fresh ``BEGIN EXCLUSIVE`` after close -- if the original handle is still held this raises ``OperationalError``. * New ``test_y1_finally_size_based_cleanup_partial`` and ``test_y1_finally_size_based_cleanup_complete`` exercise the L2 size-based cleanup branches. * New cross-platform ``test_y2_eloop_translates_to_value_error_via_monkeypatch`` patches ``op.realpath`` to raise ``OSError(ELOOP)`` so the translation is verified without requiring admin-only symlinks on Windows. * New ``test_l1_pickled_child_cache_recovers_data_in_receiver`` pickles a child Cache with a complex tuple key, then verifies the data round-trips both via direct ``Cache(child_dir)`` and via ``pickle.loads(blob)``. * New ``test_l1_inherited_key_file_written_atomically_in_child_dir`` asserts the keyfile exists in the child directory and matches the parent's bytes. * New ``test_l1_explicit_key_does_NOT_write_keyfile_into_child_dir`` and ``test_l1_legacy_mode_does_NOT_write_keyfile_into_child_dir`` verify the negative path -- explicit secrets and legacy mode must NOT persist anything to disk. * New ``test_c1_fanoutcache_use_after_close_raises`` and ``test_c1_fanoutcache_close_idempotent`` exercise the C1 fence. Test counts: 268 passed, 1 skipped (was 260 passed, 1 skipped). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- diskcache/core.py | 107 +++++++++++--- diskcache/fanout.py | 28 ++++ tests/test_security.py | 322 +++++++++++++++++++++++++++++++++++++---- 3 files changed, 412 insertions(+), 45 deletions(-) diff --git a/diskcache/core.py b/diskcache/core.py index 7ea177e..e626c01 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -222,27 +222,40 @@ def _read_or_create_pickle_key_file(path): with cl.suppress(OSError): os.close(fd) if not write_succeeded: - # CVE-2025-69872 (Y1): an async exception - # (``KeyboardInterrupt`` / ``SystemExit``) may have - # fired AFTER ``os.write`` deposited the full key in - # the kernel but BEFORE ``write_succeeded = True`` ran. - # Concurrent readers can already see that key, so - # unlinking it here would trigger - # ``RuntimeError: too short`` for every future - # process and silently change the HMAC key in any - # reader still holding the old bytes. Re-read the - # file: if it matches the key we just generated, the - # write actually completed -- preserve it. + # CVE-2025-69872 (Y1 + L2): prefer false-positive + # preserve over false-negative unlink. Stranding + # concurrent readers (who may already hold a valid + # key) is worse than leaving an incomplete file + # (which subsequent processes will retry past via + # the empty-read backoff above). Use file size as + # the primary signal: only definitively-partial + # writes (size < len(new_key)) are unlinked; same- + # size files are content-verified before being + # claimed; larger / unknown sizes are left alone. try: - with open(path, 'rb') as fh: - on_disk = fh.read() - if on_disk == new_key: - write_succeeded = True + file_size = os.path.getsize(path) except OSError: - pass - if not write_succeeded: + file_size = None + if ( + file_size is not None + and file_size < len(new_key) + ): + # Definitely incomplete -- safe to remove. with cl.suppress(OSError): os.unlink(path) + elif file_size == len(new_key): + # Same size as ours: verify content matches the + # key we generated so we don't take credit for + # another process's file. + try: + with open(path, 'rb') as fh: + if fh.read() == new_key: + write_succeeded = True + except OSError: + pass + # file_size > len(new_key) or None: leave alone -- + # someone else owns this file now, or we cannot + # determine its state. with cl.suppress(OSError): os.chmod(path, 0o600) @@ -254,6 +267,49 @@ def _read_or_create_pickle_key_file(path): ) +def _write_inherited_pickle_key_file(path, key_bytes): + """Atomically write ``key_bytes`` to ``path``. No-op if file already + exists (assumes existing content is the correct key -- caller can + verify if needed). Used for CVE-2025-69872 L1 recoverability: + child Cache instances inherit a key from a parent context (e.g. + FanoutCache or copy.copy) and need a recoverable key file in + their own directory so future processes opening the directory + directly can re-resolve the key via default resolution. + """ + directory = op.dirname(path) or '.' + if not op.isdir(directory): + os.makedirs(directory, 0o755, exist_ok=True) + flags = os.O_CREAT | os.O_EXCL | os.O_WRONLY + if hasattr(os, 'O_BINARY'): + flags |= os.O_BINARY + try: + fd = os.open(path, flags, 0o600) + except FileExistsError: + return # already present (us or another process) + write_succeeded = False + try: + written = 0 + while written < len(key_bytes): + n = os.write(fd, key_bytes[written:]) + if n <= 0: + raise OSError( + 'diskcache: short write to inherited pickle key ' + 'file %r' % path + ) + written += n + write_succeeded = True + with cl.suppress(OSError): + os.fsync(fd) + finally: + with cl.suppress(OSError): + os.close(fd) + if not write_succeeded: + with cl.suppress(OSError): + os.unlink(path) + with cl.suppress(OSError): + os.chmod(path, 0o600) + + def _resolve_pickle_key_for_directory(directory, arg): """Resolve the pickle HMAC key for ``directory``. @@ -1078,6 +1134,23 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): ): self._disk._pickle_key_resolved = inherited_key self._disk._pickle_key_warned = True + # CVE-2025-69872 (L1): persist the inherited key into the + # child directory so a process that opens this directory + # directly later (e.g. after pickling a child Cache from + # FanoutCache.cache(name)) can recover the same HMAC key + # via default resolution -- otherwise the receiver would + # auto-generate a fresh random key and fail HMAC + # verification on the first read. Only persist real key + # bytes -- not the legacy ``False`` sentinel and not + # short/invalid values (defensive). + if ( + isinstance(inherited_key, (bytes, bytearray)) + and len(inherited_key) >= PICKLE_KEY_MIN_LEN + ): + _write_inherited_pickle_key_file( + op.join(directory, PICKLE_KEY_FILENAME), + bytes(inherited_key), + ) # Set cached attributes: updates settings and sets pragmas. diff --git a/diskcache/fanout.py b/diskcache/fanout.py index 0e54eaa..2353c0f 100644 --- a/diskcache/fanout.py +++ b/diskcache/fanout.py @@ -155,6 +155,12 @@ def __init__( # SQLite database (which would race on initial schema setup # and hold separate connection pools). self._children_lock = threading.Lock() + # CVE-2025-69872 (C1): track close state so cache()/deque()/ + # index() refuse to construct NEW children after close(). + # Existing child references obtained before close() remain + # usable per :meth:`Cache.close` semantics (per-thread + # connection that lazily reopens). + self._closed = False @property def directory(self): @@ -622,6 +628,11 @@ def close(self): # open after ``fc.close()``, leaking handles and quietly # accepting writes against a "closed" FanoutCache). with self._children_lock: + # CVE-2025-69872 (C1): mark closed under the lock so any + # concurrent cache()/deque()/index() either completes + # before us (returning a still-valid child we then close) + # or sees ``_closed`` True and refuses cleanly. + self._closed = True for child in list(self._caches.values()): with cl.suppress(Exception): child.close() @@ -732,6 +743,13 @@ def cache(self, name, timeout=60, disk=None, **settings): """ # CVE-2025-69872 (O2): double-checked-locking get-or-create. with self._children_lock: + if self._closed: + raise RuntimeError( + 'diskcache: FanoutCache is closed; cannot create ' + 'new children. (Existing child references obtained ' + 'before close() remain usable per Cache.close() ' + 'semantics.)' + ) existing = self._caches.get(name) if existing is not None: return existing @@ -771,6 +789,11 @@ def deque(self, name, maxlen=None): """ with self._children_lock: + if self._closed: + raise RuntimeError( + 'diskcache: FanoutCache is closed; cannot create ' + 'new children.' + ) existing = self._deques.get(name) if existing is not None: return existing @@ -809,6 +832,11 @@ def index(self, name): """ with self._children_lock: + if self._closed: + raise RuntimeError( + 'diskcache: FanoutCache is closed; cannot create ' + 'new children.' + ) existing = self._indexes.get(name) if existing is not None: return existing diff --git a/tests/test_security.py b/tests/test_security.py index 6fd81dc..885ced8 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -20,6 +20,7 @@ import sys import tempfile import warnings +import errno import pytest @@ -569,13 +570,25 @@ def test_fanoutcache_default_emits_one_warning(tmp_cache_dir, clear_env): % len(pkey_warnings) ) - # Root-level key file exists; no shard-level key files. - assert op.exists(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)) + # Root-level key file exists; CVE-2025-69872 (L1) shards now ALSO + # mirror the same key file into their own directory so a process + # that opens a shard directly as ``Cache(shard_dir)`` can recover + # the same HMAC key via default resolution. All copies must + # equal the root key. + root_key_path = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + assert op.exists(root_key_path) + with open(root_key_path, 'rb') as fh: + root_key = fh.read() for num in range(4): shard_dir = op.join(tmp_cache_dir, '%03d' % num) - assert not op.exists(op.join(shard_dir, PICKLE_KEY_FILENAME)), ( - 'shard %d must not have its own key file' % num + shard_key_path = op.join(shard_dir, PICKLE_KEY_FILENAME) + assert op.exists(shard_key_path), ( + 'L1: shard %d must mirror the root key file' % num ) + with open(shard_key_path, 'rb') as fh: + assert fh.read() == root_key, ( + 'L1: shard %d key file must equal root key' % num + ) def test_fanoutcache_two_instances_share_default_key( @@ -1623,35 +1636,75 @@ def test_o1_default_mode_index_copy_survives_env_change( # -- Pass-5: O2 FanoutCache children get-or-create lock ---------------- -def test_o2_fanout_concurrent_cache_no_duplicate(tmp_cache_dir, clear_env): +def test_o2_fanout_concurrent_cache_no_duplicate( + tmp_cache_dir, clear_env, monkeypatch +): """O2: two threads calling fc.cache('foo') simultaneously must get the SAME instance. Without the lock, both can miss the dict, both construct, the second store overwrites the first, and the first caller is left with an orphan Cache (open SQLite handle, no - references in fc._caches).""" + references in fc._caches). + + This version is deterministic: we monkey-patch ``Cache.__init__`` + with a wrapper that sleeps before returning, so both threads enter + the get-or-create critical section while the first construction + is still in flight. Without the lock the second thread observes + an empty ``self._caches`` and constructs a duplicate. + """ import threading + import time + + real_init = dc.Cache.__init__ + construct_count = {'n': 0} + construct_lock = threading.Lock() + + def slow_init(self, *args, **kwargs): + with construct_lock: + construct_count['n'] += 1 + n = construct_count['n'] + real_init(self, *args, **kwargs) + # Only slow down child cache construction (under the + # FanoutCache 'cache' subdirectory), not the FanoutCache's + # own shards. Sleep races the second caller into the + # critical section. + if n > 0 and 'cache' in str(self.directory).split(os.sep): + time.sleep(0.2) with warnings.catch_warnings(): warnings.simplefilter('ignore', dc.UnsafePickleWarning) with dc.FanoutCache(tmp_cache_dir, shards=2) as fc: + # Patch only after FanoutCache is fully constructed so + # shard inits aren't slowed. + monkeypatch.setattr(dc.Cache, '__init__', slow_init) + results = [] - barrier = threading.Barrier(8) + results_lock = threading.Lock() def grab(): - barrier.wait() - results.append(fc.cache('shared')) + child = fc.cache('shared') + with results_lock: + results.append(child) threads = [threading.Thread(target=grab) for _ in range(8)] + t0 = time.time() for t in threads: t.start() for t in threads: t.join() + elapsed = time.time() - t0 first = results[0] assert all(r is first for r in results), ( 'All threads must see the same cached child instance' ) assert len(fc._caches) == 1 + # If the lock works, all threads serialize on the first + # construction (~0.2s); a duplicate construction would + # double the time. Allow generous slack. + assert elapsed < 1.0, ( + 'O2: child construction appears to have run more ' + 'than once concurrently (elapsed=%.2fs)' % elapsed + ) # -- Pass-5: O3 close() closes cached children ------------------------- @@ -1661,11 +1714,13 @@ def test_o3_fanout_close_closes_children(tmp_cache_dir, clear_env): """O3: ``fc.close()`` must close cached child Cache/Deque/Index instances. Otherwise long-lived references leak SQLite handles. - We can't assert ``set()`` raises post-close because :meth:`Cache.set` - lazily reopens its thread-local connection. Instead assert the - invariants close() must establish: the per-thread SQLite - connection on each child is gone, and the FanoutCache no longer - holds references in its caches/deques/indexes dicts. + Behavioral check: after ``fc.close()`` we must be able to open a + fresh sqlite3 connection to each child's database file in + exclusive locking mode. If close() failed to release the + underlying handle (e.g. on Windows where the file is held by the + OS while the connection is open), this will fail. We also assert + the parent's child dicts are cleared so the FanoutCache cannot + silently keep handing out post-close children. """ with warnings.catch_warnings(): warnings.simplefilter('ignore', dc.UnsafePickleWarning) @@ -1677,23 +1732,27 @@ def test_o3_fanout_close_closes_children(tmp_cache_dir, clear_env): child_cache['k'] = 1 child_deque.append('x') child_index['a'] = 1 - # Sanity: connections currently exist. - assert getattr(child_cache._local, 'con', None) is not None - assert getattr(child_deque._cache._local, 'con', None) is not None - assert getattr(child_index._cache._local, 'con', None) is not None + + child_cache_db = op.join(child_cache.directory, 'cache.db') + child_deque_db = op.join(child_deque._cache.directory, 'cache.db') + child_index_db = op.join(child_index._cache.directory, 'cache.db') + assert op.isfile(child_cache_db) + assert op.isfile(child_deque_db) + assert op.isfile(child_index_db) fc.close() - # Each cached child must have had its connection closed. - assert getattr(child_cache._local, 'con', None) is None, ( - 'O3: fc.close() did not close the cached child Cache' - ) - assert getattr(child_deque._cache._local, 'con', None) is None, ( - 'O3: fc.close() did not close the cached child Deque cache' - ) - assert getattr(child_index._cache._local, 'con', None) is None, ( - 'O3: fc.close() did not close the cached child Index cache' - ) + # Behavioral verification: each child's db must be openable + # by a fresh exclusive-locking sqlite connection. If the + # original connection is still held, the BEGIN EXCLUSIVE will + # raise OperationalError (database is locked). + for db_path in (child_cache_db, child_deque_db, child_index_db): + con = sqlite3.connect(db_path, timeout=0.5) + try: + con.execute('BEGIN EXCLUSIVE') + con.execute('COMMIT') + finally: + con.close() # The dicts must be cleared so the parent can be safely # reopened later without leaking the old children. @@ -1830,3 +1889,210 @@ def test_y3_error_message_uses_disk_pickle_key_kwarg_name( 'FanoutCache error must mention disk_pickle_key, got: %r' % msg2 ) + + +# -- Pass-6: L1 child cache key recoverability after pickle ------------ + + +def test_l1_pickled_child_cache_recovers_data_in_receiver( + tmp_cache_dir, clear_env +): + """L1: pickling a child Cache obtained from FanoutCache.cache(name) + must round-trip -- the receiver must be able to recover the HMAC + key from the child's directory and read the data.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache(tmp_cache_dir, shards=2) as fc: + child = fc.cache('mychild') + child[('complex', 'key')] = {'value': 42} + blob = pickle.dumps(child) + child_dir = child.directory + # Simulate "receiving process": brand-new Cache instance from + # the same directory, no env var. + with dc.Cache(child_dir) as rcv: + assert rcv[('complex', 'key')] == {'value': 42} + # Also test pickle round-trip. + rcv2 = pickle.loads(blob) + try: + assert rcv2[('complex', 'key')] == {'value': 42} + finally: + rcv2.close() + + +def test_l1_inherited_key_file_written_atomically_in_child_dir( + tmp_cache_dir, clear_env +): + """L1: the inherited key must be persisted into the child + directory's ``.diskcache_pickle_key`` so default resolution in a + receiving process recovers the same key.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache(tmp_cache_dir, shards=2) as fc: + child = fc.cache('mychild') + child_key_path = op.join(child.directory, PICKLE_KEY_FILENAME) + assert op.isfile(child_key_path) + with open(child_key_path, 'rb') as fh: + child_key = fh.read() + parent_key_path = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + with open(parent_key_path, 'rb') as fh: + parent_key = fh.read() + assert child_key == parent_key + + +def test_l1_explicit_key_does_NOT_write_keyfile_into_child_dir( + tmp_cache_dir, clear_env +): + """L1 negative: when the user passes an explicit + ``disk_pickle_key`` the secret must NOT be persisted to disk in + any child directory. Only the implicit (inherited from default + resolution) path triggers the L1 keyfile-write.""" + secret = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache( + tmp_cache_dir, shards=2, disk_pickle_key=secret + ) as fc: + child = fc.cache('mychild') + assert not op.isfile( + op.join(child.directory, PICKLE_KEY_FILENAME) + ) + # Parent dir also should not have one. + assert not op.isfile( + op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + ) + + +def test_l1_legacy_mode_does_NOT_write_keyfile_into_child_dir( + tmp_cache_dir, clear_env +): + """L1 negative: legacy mode (disk_pickle_key=False) must not + create a keyfile anywhere -- there is no key to persist.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache( + tmp_cache_dir, shards=2, disk_pickle_key=False + ) as fc: + child = fc.cache('mychild') + assert not op.isfile( + op.join(child.directory, PICKLE_KEY_FILENAME) + ) + + +# -- Pass-6: C1 FanoutCache.close() fences subsequent use -------------- + + +def test_c1_fanoutcache_use_after_close_raises(tmp_cache_dir, clear_env): + """C1: after fc.close(), creating new children via cache()/ + deque()/index() must raise RuntimeError.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + fc = dc.FanoutCache(tmp_cache_dir, shards=2) + fc.cache('existing') # populate to exercise close() pathways + fc.close() + with pytest.raises(RuntimeError, match='closed'): + fc.cache('new') + with pytest.raises(RuntimeError, match='closed'): + fc.deque('new') + with pytest.raises(RuntimeError, match='closed'): + fc.index('new') + + +def test_c1_fanoutcache_close_idempotent(tmp_cache_dir, clear_env): + """C1: calling close() twice must not raise.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + fc = dc.FanoutCache(tmp_cache_dir, shards=2) + fc.close() + fc.close() # second close should not raise + + +# -- Pass-6: L2 size-based finally cleanup ----------------------------- + + +def test_y1_finally_size_based_cleanup_partial( + tmp_cache_dir, clear_env, monkeypatch +): + """L2: if os.write only delivers a short prefix of the key and + then raises OSError, the finally block must unlink the partial + file (file_size < len(new_key)).""" + from diskcache import core as dc_core + + real_write = os.write + + def short_then_fail(fd, data): + # Write only 8 bytes and report failure thereafter. + real_write(fd, data[:8]) + raise OSError(28, 'simulated short write + failure') + + monkeypatch.setattr(dc_core.os, 'write', short_then_fail) + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + with pytest.raises(OSError): + dc_core._read_or_create_pickle_key_file(keyfile) + # Restore for cleanup safety. + monkeypatch.setattr(dc_core.os, 'write', real_write) + assert not op.exists(keyfile), ( + 'L2: partial file (8 bytes < 32) must be unlinked' + ) + + +def test_y1_finally_size_based_cleanup_complete( + tmp_cache_dir, clear_env, monkeypatch +): + """L2: if os.write delivers the full key but an exception fires + before write_succeeded=True, the finally block must preserve the + file (file_size == len(new_key) and content matches).""" + from diskcache import core as dc_core + + real_write = os.write + + def full_then_fail(fd, data): + n = real_write(fd, data) + # Simulate KeyboardInterrupt firing AFTER full bytes deposited + # but BEFORE the write_succeeded=True assignment runs. + raise KeyboardInterrupt('simulated SIGINT post-write') + + monkeypatch.setattr(dc_core.os, 'write', full_then_fail) + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + with pytest.raises(KeyboardInterrupt): + dc_core._read_or_create_pickle_key_file(keyfile) + monkeypatch.setattr(dc_core.os, 'write', real_write) + assert op.exists(keyfile), ( + 'L2: complete file (32 bytes, content matches) must be preserved' + ) + with open(keyfile, 'rb') as fh: + assert len(fh.read()) >= dc_core.PICKLE_KEY_MIN_LEN + + +# -- Pass-6: Y2 cross-platform ELOOP via monkeypatch ------------------- + + +def test_y2_eloop_translates_to_value_error_via_monkeypatch( + tmp_cache_dir, clear_env, monkeypatch +): + """Y2 (cross-platform): monkey-patch op.realpath to raise + OSError(ELOOP) so the translation to ValueError is verified + without needing real symlinks (which require admin on Windows).""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + disk = cache._disk + from diskcache import core as dc_core + + real_realpath = dc_core.op.realpath + + def loop_realpath(path, *args, **kwargs): + # Only fake the loop for our specific filename so the + # cache directory's own realpath caching still works. + if 'loop_target' in str(path): + raise OSError( + errno.ELOOP, 'Too many symbolic links' + ) + return real_realpath(path, *args, **kwargs) + + monkeypatch.setattr( + dc_core.op, 'realpath', loop_realpath + ) + with pytest.raises( + ValueError, match='cache.db may be tampered' + ): + disk._safe_filename_path('loop_target')