diff --git a/diskcache/__init__.py b/diskcache/__init__.py index 7757d66..976eb22 100644 --- a/diskcache/__init__.py +++ b/diskcache/__init__.py @@ -16,6 +16,7 @@ JSONDisk, Timeout, UnknownFileWarning, + UnsafePickleWarning, ) from .fanout import FanoutCache from .persistent import Deque, Index @@ -47,6 +48,7 @@ 'Timeout', 'UNKNOWN', 'UnknownFileWarning', + 'UnsafePickleWarning', 'barrier', 'memoize_stampede', 'throttle', diff --git a/diskcache/core.py b/diskcache/core.py index 7a3d23b..e626c01 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -5,12 +5,15 @@ import contextlib as cl import errno import functools as ft +import hashlib +import hmac import io import json import os import os.path as op import pickle import pickletools +import secrets import sqlite3 import struct import tempfile @@ -39,6 +42,343 @@ def __repr__(self): ENOVAL = Constant('ENOVAL') UNKNOWN = Constant('UNKNOWN') +# --- CVE-2025-69872 / GHSA-w8v5-vhqr-4h9v mitigation ---------------------- +# +# Every pickle blob written by ``Disk`` is wrapped in an HMAC-SHA256 +# envelope so the bytes loaded from disk can be authenticated before +# ``pickle.load`` is allowed to interpret them. The envelope format is:: +# +# b"DCv1" + HMAC_SHA256(key, payload) + payload +# +# where ``payload`` is the original pickle byte string (post +# ``pickletools.optimize`` for keys, raw for values). Verification uses +# ``hmac.compare_digest`` to avoid timing leaks. +# +# The signing key is resolved lazily, only when a pickle path is actually +# exercised, in this order: +# +# 1. an explicit ``pickle_key=`` argument to :class:`Disk` / +# :class:`Cache` (bytes, ``bytearray``, hex ``str``, ``False`` to +# opt out of HMAC, or ``None`` to fall through to the next step); +# 2. the ``DISKCACHE_PICKLE_KEY`` environment variable (hex-encoded); +# 3. an auto-generated ``.diskcache_pickle_key`` file inside the +# cache directory (mode ``0o600``). The file fallback emits an +# :class:`UnsafePickleWarning` because an attacker who can read +# the cache directory can also read the key and forge the HMAC -- +# it provides corruption detection and defense against +# write-only attackers, not against the full advisory threat +# model. +# +# ``pickle_key=False`` disables HMAC verification entirely and emits an +# :class:`UnsafePickleWarning`. This is the explicit escape hatch for +# reading caches that pre-date this fix; it leaves the process exposed +# to the original CVE. +PICKLE_MAGIC = b'DCv1' +PICKLE_HMAC_SIZE = hashlib.sha256().digest_size # 32 +PICKLE_HEADER_SIZE = len(PICKLE_MAGIC) + PICKLE_HMAC_SIZE # 36 +PICKLE_KEY_FILENAME = '.diskcache_pickle_key' +PICKLE_KEY_ENV = 'DISKCACHE_PICKLE_KEY' +PICKLE_KEY_MIN_LEN = 16 +_PICKLE_KEY_UNSET = Constant('PICKLE_KEY_UNSET') + + +class UnsafePickleWarning(UserWarning): + """Warning emitted when DiskCache cannot enforce pickle HMAC verification. + + Triggered when the signing key is auto-generated inside the cache + directory (an attacker with read access can forge envelopes) or when + HMAC verification is explicitly disabled via ``pickle_key=False``. + See CVE-2025-69872 / GHSA-w8v5-vhqr-4h9v. + """ + + +# Sources reported by :func:`_resolve_pickle_key_for_directory`. +_PKEY_SRC_DISABLED = 'disabled' +_PKEY_SRC_EXPLICIT = 'explicit' +_PKEY_SRC_ENV = 'env' +_PKEY_SRC_FILE_CREATED = 'file_created' +_PKEY_SRC_FILE_EXISTING = 'file_existing' + + +def _coerce_pickle_key(value, source): + """Validate and coerce a user-supplied HMAC key value to ``bytes``. + + ``value`` must be ``bytes``/``bytearray`` or a hex-encoded ``str``. + ``False`` and ``None`` are filtered by callers and never reach this + function. + """ + if isinstance(value, str): + try: + key = bytes.fromhex(value) + except ValueError: + raise ValueError( + '%s must be hex-encoded bytes' % source + ) from None + elif isinstance(value, (bytes, bytearray)): + key = bytes(value) + else: + raise TypeError( + '%s must be bytes, bytearray, or hex str; got %s' + % (source, type(value).__name__) + ) + if len(key) < PICKLE_KEY_MIN_LEN: + raise ValueError( + '%s must be at least %d bytes (got %d)' + % (source, PICKLE_KEY_MIN_LEN, len(key)) + ) + return key + + +def _read_or_create_pickle_key_file(path): + """Atomically create or read the on-disk HMAC key file at ``path``. + + Returns ``(key_bytes, created)``. ``created`` is ``True`` when the + current process emerged as the writer in a multi-process race. + + Uses ``O_CREAT | O_EXCL`` so the first writer wins; concurrent + losers retry the read with a short backoff to ride out the race + window between the winner's ``os.open`` and ``os.write`` -- the + pre-fix code raised a false-positive "too short" error on every + loser and required manual recovery. + """ + directory = op.dirname(path) or '.' + os.makedirs(directory, 0o755, exist_ok=True) + + flags = os.O_CREAT | os.O_EXCL | os.O_WRONLY + if hasattr(os, 'O_BINARY'): + flags |= os.O_BINARY + + delay = 0.001 + last_short_len = -1 + for attempt in range(6): + try: + fd = os.open(path, flags, 0o600) + except FileExistsError: + try: + with open(path, 'rb') as reader: + data = reader.read() + except (FileNotFoundError, PermissionError): + # File vanished or was momentarily inaccessible. Try + # to (re)create on the next iteration. + time.sleep(delay) + delay *= 2 + continue + + if len(data) >= PICKLE_KEY_MIN_LEN: + return data, False + + # Race window: the winning writer hasn't completed its + # ``os.write`` yet (or it crashed mid-write). Retry. + last_short_len = len(data) + if attempt < 5: + time.sleep(delay) + delay *= 2 + continue + + # ``from None`` suppresses the implicit ``FileExistsError`` + # context (pylint W0707): the chained exception is just the + # race-loop bookkeeping, not a real cause that operators need + # to debug. + raise RuntimeError( + 'diskcache: pickle key file %r is too short (%d < %d) ' + 'after %d retries; it may be corrupted -- remove it to ' + 'regenerate, or pass disk_pickle_key= explicitly.' + % (path, last_short_len, PICKLE_KEY_MIN_LEN, attempt + 1) + ) from None + + # We won the race. Generate and write the key. Use try/finally + # so we clean up the half-baked file even on KeyboardInterrupt / + # SystemExit (otherwise an empty file blocks every subsequent + # process with a "too short" RuntimeError until manually removed). + # + # CVE-2025-69872 V14: ``os.write`` may legally return fewer bytes + # than requested (signal interruption, disk quota, ENOSPC mid- + # write). Loop until the full key is on disk, otherwise the + # writer holds a 32-byte key in memory while readers see only a + # truncated prefix. + # + # CVE-2025-69872 V17: set ``write_succeeded`` IMMEDIATELY after + # the bytes hit the kernel (before fsync), not after fsync. + # Once ``os.write`` has returned, concurrent readers can already + # see the full key; if a KeyboardInterrupt then cancels fsync we + # must NOT unlink the file -- doing so leaves those readers + # using a key that is no longer recoverable from disk. + new_key = secrets.token_bytes(32) + write_succeeded = False + try: + written = 0 + while written < len(new_key): + n = os.write(fd, new_key[written:]) + if n <= 0: + raise OSError( + 'diskcache: os.write returned %d while writing ' + 'pickle key file %r' % (n, path) + ) + written += n + write_succeeded = True + with cl.suppress(OSError): + os.fsync(fd) + finally: + with cl.suppress(OSError): + os.close(fd) + if not write_succeeded: + # CVE-2025-69872 (Y1 + L2): prefer false-positive + # preserve over false-negative unlink. Stranding + # concurrent readers (who may already hold a valid + # key) is worse than leaving an incomplete file + # (which subsequent processes will retry past via + # the empty-read backoff above). Use file size as + # the primary signal: only definitively-partial + # writes (size < len(new_key)) are unlinked; same- + # size files are content-verified before being + # claimed; larger / unknown sizes are left alone. + try: + file_size = os.path.getsize(path) + except OSError: + file_size = None + if ( + file_size is not None + and file_size < len(new_key) + ): + # Definitely incomplete -- safe to remove. + with cl.suppress(OSError): + os.unlink(path) + elif file_size == len(new_key): + # Same size as ours: verify content matches the + # key we generated so we don't take credit for + # another process's file. + try: + with open(path, 'rb') as fh: + if fh.read() == new_key: + write_succeeded = True + except OSError: + pass + # file_size > len(new_key) or None: leave alone -- + # someone else owns this file now, or we cannot + # determine its state. + + with cl.suppress(OSError): + os.chmod(path, 0o600) + return new_key, True + + # Defensive: the loop should always return or raise. + raise RuntimeError( + 'diskcache: failed to create pickle key file %r' % path + ) + + +def _write_inherited_pickle_key_file(path, key_bytes): + """Atomically write ``key_bytes`` to ``path``. No-op if file already + exists (assumes existing content is the correct key -- caller can + verify if needed). Used for CVE-2025-69872 L1 recoverability: + child Cache instances inherit a key from a parent context (e.g. + FanoutCache or copy.copy) and need a recoverable key file in + their own directory so future processes opening the directory + directly can re-resolve the key via default resolution. + """ + directory = op.dirname(path) or '.' + if not op.isdir(directory): + os.makedirs(directory, 0o755, exist_ok=True) + flags = os.O_CREAT | os.O_EXCL | os.O_WRONLY + if hasattr(os, 'O_BINARY'): + flags |= os.O_BINARY + try: + fd = os.open(path, flags, 0o600) + except FileExistsError: + return # already present (us or another process) + write_succeeded = False + try: + written = 0 + while written < len(key_bytes): + n = os.write(fd, key_bytes[written:]) + if n <= 0: + raise OSError( + 'diskcache: short write to inherited pickle key ' + 'file %r' % path + ) + written += n + write_succeeded = True + with cl.suppress(OSError): + os.fsync(fd) + finally: + with cl.suppress(OSError): + os.close(fd) + if not write_succeeded: + with cl.suppress(OSError): + os.unlink(path) + with cl.suppress(OSError): + os.chmod(path, 0o600) + + +def _resolve_pickle_key_for_directory(directory, arg): + """Resolve the pickle HMAC key for ``directory``. + + Returns ``(key, source)`` where ``key`` is ``bytes`` or ``False`` + and ``source`` is one of the ``_PKEY_SRC_*`` constants. This is + the single source of truth shared by :class:`Disk` and + :class:`~diskcache.FanoutCache` so a fanout cache's shards all use + the same key (otherwise sharding becomes non-deterministic). + """ + if arg is False: + return False, _PKEY_SRC_DISABLED + + if arg is not _PICKLE_KEY_UNSET and arg is not None: + return ( + _coerce_pickle_key(arg, source='pickle_key argument'), + _PKEY_SRC_EXPLICIT, + ) + + env_value = os.environ.get(PICKLE_KEY_ENV) + if env_value: + return ( + _coerce_pickle_key( + env_value, + source='%s environment variable' % PICKLE_KEY_ENV, + ), + _PKEY_SRC_ENV, + ) + + key_path = op.join(directory, PICKLE_KEY_FILENAME) + key, created = _read_or_create_pickle_key_file(key_path) + return key, _PKEY_SRC_FILE_CREATED if created else _PKEY_SRC_FILE_EXISTING + + +def _emit_pickle_key_warning(source, directory, stacklevel): + """Emit the appropriate :class:`UnsafePickleWarning` for ``source``. + + Returns ``True`` if a warning was emitted, ``False`` otherwise (so + the caller can flip its "already warned" flag accurately). + """ + if source == _PKEY_SRC_DISABLED: + warnings.warn( + 'diskcache: pickle HMAC verification is disabled ' + '(disk_pickle_key=False). The cache directory must be fully ' + 'trusted; an attacker with write access can achieve ' + 'arbitrary code execution via CVE-2025-69872.', + UnsafePickleWarning, + stacklevel=stacklevel, + ) + return True + if source in (_PKEY_SRC_FILE_CREATED, _PKEY_SRC_FILE_EXISTING): + key_path = op.join(directory, PICKLE_KEY_FILENAME) + warnings.warn( + 'diskcache: using pickle HMAC key from %r (CVE-2025-69872 ' + 'default fallback). This detects accidental cache ' + 'corruption but does NOT protect against attackers with ' + 'read access to the cache directory, nor against a ' + '"bootstrap race" where an attacker writes the cache ' + 'directory before the legitimate process initializes (and ' + 'thereby controls the key). For production use set the %s ' + 'environment variable to a hex-encoded random key ' + '(>= %d bytes), or pass disk_pickle_key=... explicitly.' + % (key_path, PICKLE_KEY_ENV, PICKLE_KEY_MIN_LEN), + UnsafePickleWarning, + stacklevel=stacklevel, + ) + return True + return False + + MODE_NONE = 0 MODE_RAW = 1 MODE_BINARY = 2 @@ -103,17 +443,147 @@ def __repr__(self): class Disk: """Cache key and value serialization for SQLite database and files.""" - def __init__(self, directory, min_file_size=0, pickle_protocol=0): + #: Whether this Disk class uses pickle for serialization. When + #: ``False``, callers like :class:`diskcache.FanoutCache` can skip + #: eager HMAC key resolution (CVE-2025-69872) and avoid creating an + #: unused ``.diskcache_pickle_key`` file at the cache root. Custom + #: subclasses that bypass the pickle paths (like :class:`JSONDisk`) + #: should set this to ``False``. + _uses_pickle = True + + def __init__(self, directory, min_file_size=0, pickle_protocol=0, + pickle_key=_PICKLE_KEY_UNSET): """Initialize disk instance. :param str directory: directory path :param int min_file_size: minimum size for file use :param int pickle_protocol: pickle protocol for serialization + :param pickle_key: HMAC key for pickle envelope verification + (CVE-2025-69872 mitigation). Accepts: + + * ``bytes`` / ``bytearray`` of >= 16 bytes -- used directly; + * a hex-encoded ``str`` of >= 32 hex chars -- decoded to bytes; + * ``False`` -- disable HMAC; emits :class:`UnsafePickleWarning` + and reads pre-fix caches as raw pickle (insecure); + * ``None`` (or omitted) -- resolve lazily from the + ``DISKCACHE_PICKLE_KEY`` environment variable, then from + ``/.diskcache_pickle_key`` (auto-generated with + an :class:`UnsafePickleWarning`). """ self._directory = directory self.min_file_size = min_file_size self.pickle_protocol = pickle_protocol + # CVE-2025-69872: pre-compute realpath of the cache directory so + # path-traversal validation in :meth:`_safe_filename_path` only + # makes one ``realpath`` syscall per fetch instead of two. The + # value is computed lazily on first use to support construction + # with a not-yet-existing directory. + self._directory_realpath = None + # Eagerly validate explicit (non-False, non-None) keys so bad + # input fails at construction time instead of on first use. + # ``False`` and ``None`` stay lazy so we don't emit warnings or + # touch the filesystem for caches that never exercise pickle. + if ( + pickle_key is not _PICKLE_KEY_UNSET + and pickle_key is not None + and pickle_key is not False + ): + pickle_key = _coerce_pickle_key( + pickle_key, source='pickle_key argument' + ) + self._pickle_key_arg = pickle_key + self._pickle_key_resolved = None + self._pickle_key_warned = False + + # -- CVE-2025-69872 / GHSA-w8v5-vhqr-4h9v helpers ---------------------- + + def _resolve_pickle_key(self): + """Resolve and cache the pickle HMAC key. Lazy. + + Returns either a ``bytes`` key, or ``False`` when the user has + opted out of HMAC verification. Never returns ``None``. + """ + if self._pickle_key_resolved is not None: + return self._pickle_key_resolved + + key, source = _resolve_pickle_key_for_directory( + self._directory, self._pickle_key_arg + ) + if not self._pickle_key_warned and _emit_pickle_key_warning( + source, self._directory, stacklevel=4 + ): + self._pickle_key_warned = True + self._pickle_key_resolved = key + return key + + def _pickle_dump(self, obj, optimize=False): + """Pickle ``obj`` and wrap with HMAC envelope (or raw bytes if + HMAC has been disabled via ``pickle_key=False``). + """ + payload = pickle.dumps(obj, protocol=self.pickle_protocol) + if optimize: + payload = pickletools.optimize(payload) + key = self._resolve_pickle_key() + if key is False: + return payload + mac = hmac.new(key, payload, hashlib.sha256).digest() + return PICKLE_MAGIC + mac + payload + + def _pickle_load_bytes(self, data): + """Verify the HMAC envelope and unpickle ``data``.""" + if isinstance(data, sqlite3.Binary): + data = bytes(data) + elif isinstance(data, memoryview): + data = data.tobytes() + elif not isinstance(data, (bytes, bytearray)): + data = bytes(data) + + magic_len = len(PICKLE_MAGIC) + has_envelope = ( + len(data) >= PICKLE_HEADER_SIZE + and data[:magic_len] == PICKLE_MAGIC + ) + key = self._resolve_pickle_key() + + if has_envelope: + mac = data[magic_len:PICKLE_HEADER_SIZE] + payload = data[PICKLE_HEADER_SIZE:] + if key is False: + # Legacy mode: skip verification, but still strip the + # envelope so we unpickle the same bytes the writer + # intended. + return pickle.load(io.BytesIO(payload)) + expected = hmac.new(key, payload, hashlib.sha256).digest() + if not hmac.compare_digest(mac, expected): + raise pickle.UnpicklingError( + 'diskcache: pickle HMAC verification failed; cache ' + 'entry may be corrupt or tampered with ' + '(CVE-2025-69872).' + ) + return pickle.load(io.BytesIO(payload)) + + # No envelope present. + if key is False: + return pickle.load(io.BytesIO(data)) + raise pickle.UnpicklingError( + 'diskcache: pickle envelope missing; this cache entry ' + 'predates the CVE-2025-69872 mitigation. Pass ' + 'disk_pickle_key=False to Cache / FanoutCache (or Django ' + 'CACHES OPTIONS) to read legacy data (insecure), or ' + 'recreate the cache.' + ) + + def _pickle_load_file(self, fileobj): + """Read entire file and verify before unpickling. + + Note: large MODE_PICKLE values stored as separate ``.val`` files + are buffered in memory so the bytes verified by HMAC are exactly + the bytes fed to ``pickle.load`` (no TOCTOU window). + """ + return self._pickle_load_bytes(fileobj.read()) + + # -- end CVE mitigation helpers --------------------------------------- def hash(self, key): """Compute portable hash for `key`. @@ -158,9 +628,8 @@ def put(self, key): ): return key, True else: - data = pickle.dumps(key, protocol=self.pickle_protocol) - result = pickletools.optimize(data) - return sqlite3.Binary(result), False + data = self._pickle_dump(key, optimize=True) + return sqlite3.Binary(data), False def get(self, key, raw): """Convert fields `key` and `raw` from Cache table to key. @@ -174,7 +643,7 @@ def get(self, key, raw): if raw: return bytes(key) if type(key) is sqlite3.Binary else key else: - return pickle.load(io.BytesIO(key)) + return self._pickle_load_bytes(key) def store(self, value, read, key=UNKNOWN): """Convert `value` to fields size, mode, filename, and value for Cache @@ -218,7 +687,7 @@ def store(self, value, read, key=UNKNOWN): size = self._write(full_path, iterator, 'xb') return size, MODE_BINARY, filename, None else: - result = pickle.dumps(value, protocol=self.pickle_protocol) + result = self._pickle_dump(value) if len(result) < min_file_size: return 0, MODE_PICKLE, None, sqlite3.Binary(result) @@ -251,6 +720,81 @@ def _write(self, full_path, iterator, mode, encoding=None): writer.write(chunk) return size + def _safe_filename_path(self, filename): + """Return a safe full path for a database-stored ``filename``. + + Cache filenames are generated by :meth:`filename` and consist + of a randomly generated hex subdirectory and basename. An + attacker with write access to ``cache.db`` could replace a + row's ``filename`` column with a path-traversal string (e.g. + ``../../../etc/passwd``) to coerce DiskCache into reading + arbitrary files outside the cache directory. This guard + refuses any ``filename`` whose resolved location is not + strictly inside ``self._directory``. + + :raises ValueError: if ``filename`` is not a string, escapes + the cache directory, or resolves to a path that cannot be + interpreted (e.g. an unreachable Windows UNC path). + """ + if not isinstance(filename, str): + raise ValueError( + 'diskcache: cache filename must be str, got %s' + % type(filename).__name__ + ) + # CVE-2025-69872: only translate OSError subclasses that + # plausibly indicate a malformed / tampered filename. Genuine + # operational errors (PermissionError, TimeoutError) propagate + # so callers can distinguish them from path traversal attempts. + # Cache realpath of the directory once (V6 perf). + base = self._directory_realpath + if base is None: + try: + base = op.realpath(self._directory) + except (FileNotFoundError, NotADirectoryError) as exc: + raise ValueError( + 'diskcache: cannot resolve cache directory %r (%s)' + % (self._directory, exc) + ) from exc + self._directory_realpath = base + try: + full = op.realpath(op.join(self._directory, filename)) + except (FileNotFoundError, NotADirectoryError) as exc: + raise ValueError( + 'diskcache: cannot resolve cache filename %r (%s; ' + 'cache.db may be tampered)' % (filename, exc) + ) from exc + except OSError as exc: + # CVE-2025-69872 (Y2): a symlink loop (errno ELOOP) inside + # the cache directory surfaces as a generic ``OSError`` from + # ``os.path.realpath``. Without translation the caller + # would see an opaque OSError instead of the path-traversal + # signal it expects, masking a tampered cache.db that + # points at a self-referential symlink chain. + if getattr(exc, 'errno', None) == errno.ELOOP: + raise ValueError( + 'diskcache: cannot resolve cache filename %r (%s; ' + 'cache.db may be tampered)' % (filename, exc) + ) from exc + # Windows-only: UNC / drive-not-found surface as bare + # OSError or WinError 53 / 67 / 87 / 123 / 161 (path/name + # invalid for filesystem use). Treat as malformed + # filename; let other OSError subclasses propagate. + if os.name == 'nt' and getattr(exc, 'winerror', None) in ( + 53, 67, 87, 123, 161, 1001 + ): + raise ValueError( + 'diskcache: cannot resolve cache filename %r (%s; ' + 'cache.db may be tampered)' % (filename, exc) + ) from exc + raise + if full != base and not full.startswith(base + os.sep): + raise ValueError( + 'diskcache: cache filename %r escapes cache directory ' + '(refusing path traversal; cache.db may have been ' + 'tampered with).' % (filename,) + ) + return full + def fetch(self, mode, filename, value, read): """Convert fields `mode`, `filename`, and `value` from Cache table to value. @@ -266,22 +810,38 @@ def fetch(self, mode, filename, value, read): # pylint: disable=unidiomatic-typecheck,consider-using-with if mode == MODE_RAW: return bytes(value) if type(value) is sqlite3.Binary else value - elif mode == MODE_BINARY: + + # CVE-2025-69872 (V13): file-backed modes require a filename. + # If the row was tampered (mode=BINARY/TEXT/PICKLE but + # filename=NULL), raise a clear error rather than the previous + # UnboundLocalError. + if filename is None: + if mode in (MODE_BINARY, MODE_TEXT) or ( + mode == MODE_PICKLE and value is None + ): + raise ValueError( + 'diskcache: cache row has mode=%d but filename is None ' + '(cache.db may be tampered).' % mode + ) + full_path = None + else: + full_path = self._safe_filename_path(filename) + + if mode == MODE_BINARY: if read: - return open(op.join(self._directory, filename), 'rb') + return open(full_path, 'rb') else: - with open(op.join(self._directory, filename), 'rb') as reader: + with open(full_path, 'rb') as reader: return reader.read() elif mode == MODE_TEXT: - full_path = op.join(self._directory, filename) with open(full_path, 'r', encoding='UTF-8') as reader: return reader.read() elif mode == MODE_PICKLE: if value is None: - with open(op.join(self._directory, filename), 'rb') as reader: - return pickle.load(reader) + with open(full_path, 'rb') as reader: + return self._pickle_load_file(reader) else: - return pickle.load(io.BytesIO(value)) + return self._pickle_load_bytes(value) def filename(self, key=UNKNOWN, value=UNKNOWN): """Return filename and full-path tuple for file storage. @@ -318,7 +878,20 @@ def remove(self, file_path): :param str file_path: relative path to file """ - full_path = op.join(self._directory, file_path) + try: + full_path = self._safe_filename_path(file_path) + except ValueError: + # CVE-2025-69872: refuse to act on tampered filenames that + # escape the cache directory. Do NOT raise -- this is + # called from eviction/cull paths that should be tolerant. + warnings.warn( + 'diskcache: refusing to remove file %r (escapes cache ' + 'directory; cache.db may be tampered).' % (file_path,), + UnsafePickleWarning, + stacklevel=2, + ) + return + full_dir, _ = op.split(full_path) # Suppress OSError that may occur if two caches attempt to delete the @@ -334,6 +907,14 @@ def remove(self, file_path): class JSONDisk(Disk): """Cache key and value using JSON serialization with zlib compression.""" + # CVE-2025-69872: JSONDisk routes every key/value through JSON+zlib + # bytes, which hit the early-return MODE_RAW / MODE_BINARY branches + # of the base Disk and never reach the pickle paths. Declaring + # ``_uses_pickle = False`` prevents :class:`FanoutCache` from + # eagerly resolving (and possibly auto-generating) an HMAC key file + # that JSONDisk would never use. + _uses_pickle = False + def __init__(self, directory, compress_level=1, **kwargs): """Initialize JSON disk instance. @@ -423,7 +1004,10 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): :param str directory: cache directory :param float timeout: SQLite connection timeout :param disk: Disk type or subclass for serialization - :param settings: any of DEFAULT_SETTINGS + :param settings: any of DEFAULT_SETTINGS, plus the optional + non-persistent ``disk_pickle_key`` argument used to verify + pickle envelopes (CVE-2025-69872). See :class:`Disk` for the + accepted values. """ try: @@ -431,6 +1015,37 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): except (TypeError, AssertionError): raise ValueError('disk must subclass diskcache.Disk') from None + # CVE-2025-69872: ``disk_pickle_key`` is intentionally treated as + # constructor-only configuration. Persisting it in the Settings + # table would either leak the secret into the same compromised + # cache directory or let an attacker make ``disk_pickle_key=False`` + # sticky across restarts via ``cache.reset``. Pop it before any + # of the regular settings plumbing runs. + pickle_key = settings.pop('disk_pickle_key', _PICKLE_KEY_UNSET) + # CVE-2025-69872 (O1+O4 link): ``_disk_pickle_key_inherited`` is + # a private kwarg used by :class:`FanoutCache` and the + # ``_copy_for_same_process`` helpers to forward an + # already-resolved key (env var, on-disk file, parent shard) + # WITHOUT marking the child as user-explicit. When set, the + # child Cache stays pickleable in default mode (its + # ``_pickle_key_arg`` remains :data:`_PICKLE_KEY_UNSET`) but + # uses the parent's resolved key so sharding/copying remain + # deterministic across env/file changes. + inherited_key = settings.pop('_disk_pickle_key_inherited', None) + # CVE-2025-69872 (Y3): pre-validate the user-explicit key with + # the public kwarg name in the error message. Without this + # the failure surfaces from Disk's own validation as + # "pickle_key argument" -- confusing for users who only + # passed ``disk_pickle_key=``. + if ( + pickle_key is not _PICKLE_KEY_UNSET + and pickle_key is not None + and pickle_key is not False + ): + pickle_key = _coerce_pickle_key( + pickle_key, source='disk_pickle_key argument' + ) + if directory is None: directory = tempfile.mkdtemp(prefix='diskcache-') directory = str(directory) @@ -464,6 +1079,12 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): except sqlite3.OperationalError: current_settings = {} + # CVE-2025-69872: ``disk_pickle_key`` MUST be constructor-only. + # If a stale row was injected into Settings (by an older + # patched build, manual edit, or attacker), drop it so it can + # neither be re-persisted nor flow into the Disk constructor. + current_settings.pop('disk_pickle_key', None) + sets = DEFAULT_SETTINGS.copy() sets.update(current_settings) sets.update(settings) @@ -490,8 +1111,47 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): for key, value in sets.items() if key.startswith('disk_') } + # Defensive: never source pickle_key from persisted Settings even + # if a stale row exists (it should not, because we never store + # it -- but Settings tables created by older patched builds or + # by hand could in principle contain one). + kwargs.pop('pickle_key', None) + kwargs.pop('_disk_pickle_key_inherited', None) + if pickle_key is not _PICKLE_KEY_UNSET: + kwargs['pickle_key'] = pickle_key self._disk = disk(directory, **kwargs) + # CVE-2025-69872 (O4): apply an inherited key (from a + # FanoutCache parent or ``_copy_for_same_process``) AFTER disk + # construction so it shows up only as ``_pickle_key_resolved`` + # -- never as ``_pickle_key_arg``. This keeps default-mode + # child Caches and copies pickleable while still forcing them + # to use the same HMAC key as the parent (otherwise sharded + # and copied caches would diverge from the original key). + if ( + inherited_key is not None + and pickle_key is _PICKLE_KEY_UNSET + ): + self._disk._pickle_key_resolved = inherited_key + self._disk._pickle_key_warned = True + # CVE-2025-69872 (L1): persist the inherited key into the + # child directory so a process that opens this directory + # directly later (e.g. after pickling a child Cache from + # FanoutCache.cache(name)) can recover the same HMAC key + # via default resolution -- otherwise the receiver would + # auto-generate a fresh random key and fail HMAC + # verification on the first read. Only persist real key + # bytes -- not the legacy ``False`` sentinel and not + # short/invalid values (defensive). + if ( + isinstance(inherited_key, (bytes, bytearray)) + and len(inherited_key) >= PICKLE_KEY_MIN_LEN + ): + _write_inherited_pickle_key_file( + op.join(directory, PICKLE_KEY_FILENAME), + bytes(inherited_key), + ) + # Set cached attributes: updates settings and sets pragmas. for key, value in sets.items(): @@ -1970,6 +2630,17 @@ def check(self, fix=False, retry=False): if DBNAME in full_path: continue + if full_path == op.join( + self._directory, PICKLE_KEY_FILENAME + ): + # CVE-2025-69872: auto-generated HMAC key + # file lives at the root of the cache dir + # and is not tracked in the Cache table. + # Anchor the comparison to the root so a + # tampered file with the same basename in + # a subdirectory is NOT silently preserved. + continue + message = 'unknown file: %s' % full_path warnings.warn(message, UnknownFileWarning) @@ -2365,11 +3036,70 @@ def __len__(self): return self.reset('count') def __getstate__(self): + # CVE-2025-69872: an explicit pickle_key cannot survive + # Cache pickling because __getstate__ deliberately serializes + # only directory/timeout/disk-type (the secret is intentionally + # NOT placed in pickled state). Refuse rather than silently + # regenerate a different key in the receiving process and + # corrupt subsequent reads. ``pickle_key=False`` (legacy mode) + # is also refused because the receiver would default to secure + # mode and reject the same data. + disk = self._disk + arg = getattr(disk, '_pickle_key_arg', _PICKLE_KEY_UNSET) + if arg is not _PICKLE_KEY_UNSET and arg is not None: + raise TypeError( + 'diskcache: Cache instances configured with an explicit ' + 'disk_pickle_key (or disk_pickle_key=False) cannot be ' + 'pickled because the secret is not placed in pickle ' + 'state. Pickle the cache directory path instead and ' + 'reconstruct Cache(directory, disk_pickle_key=...) in ' + 'the receiving process. (CVE-2025-69872)' + ) return (self.directory, self.timeout, type(self.disk)) def __setstate__(self, state): self.__init__(*state) + def __copy__(self): + # CVE-2025-69872 (V11): in-process ``copy.copy`` does not cross a + # trust boundary -- preserve the explicit pickle_key (if any) so + # users who legitimately copy a Cache aren't tripped up by the + # __getstate__ guard above. + return self._copy_for_same_process() + + def __deepcopy__(self, memo): + return self._copy_for_same_process() + + def _copy_for_same_process(self): + disk = self._disk + arg = getattr(disk, '_pickle_key_arg', _PICKLE_KEY_UNSET) + kwargs = {} + if arg is not _PICKLE_KEY_UNSET: + # Forward bytes / False explicitly so the new instance has + # the same security posture as the original. + kwargs['disk_pickle_key'] = arg + # CVE-2025-69872 V15 / O1: if the original lazily resolved its + # HMAC key from env / file, propagate the resolved bytes via + # the private ``_disk_pickle_key_inherited`` channel so an + # intervening change to ``DISKCACHE_PICKLE_KEY`` or + # ``.diskcache_pickle_key`` doesn't produce a copy with a + # different key (which would silently fail HMAC verification). + # Inherited keys leave ``_pickle_key_arg`` UNSET so the copy + # remains pickleable in default mode. + if ( + arg is _PICKLE_KEY_UNSET + and getattr(disk, '_pickle_key_resolved', None) is not None + and disk._pickle_key_resolved is not False + ): + kwargs['_disk_pickle_key_inherited'] = disk._pickle_key_resolved + new = self.__class__( + self.directory, + timeout=self.timeout, + disk=type(self.disk), + **kwargs, + ) + return new + def reset(self, key, value=ENOVAL, update=True): """Reset `key` and `value` item from Settings table. diff --git a/diskcache/fanout.py b/diskcache/fanout.py index 9822ee4..2353c0f 100644 --- a/diskcache/fanout.py +++ b/diskcache/fanout.py @@ -4,12 +4,23 @@ import functools import itertools as it import operator +import os import os.path as op import sqlite3 import tempfile +import threading import time -from .core import DEFAULT_SETTINGS, ENOVAL, Cache, Disk, Timeout +from .core import ( + DEFAULT_SETTINGS, + ENOVAL, + Cache, + Disk, + Timeout, + _emit_pickle_key_warning, + _PICKLE_KEY_UNSET, + _resolve_pickle_key_for_directory, +) from .persistent import Deque, Index @@ -25,7 +36,13 @@ def __init__( :param int shards: number of shards to distribute writes :param float timeout: SQLite connection timeout :param disk: `Disk` instance for serialization - :param settings: any of `DEFAULT_SETTINGS` + :param settings: any of `DEFAULT_SETTINGS`, plus the optional + non-persistent ``disk_pickle_key`` argument used to verify + pickle envelopes (CVE-2025-69872). See + :class:`diskcache.Disk` for accepted values. The key is + resolved once at the FanoutCache root and forwarded to all + shards so sharding stays deterministic across processes + and a single warning is emitted in default mode. """ if directory is None: @@ -37,23 +54,113 @@ def __init__( default_size_limit = DEFAULT_SETTINGS['size_limit'] size_limit = settings.pop('size_limit', default_size_limit) / shards + # CVE-2025-69872: resolve the pickle HMAC key once at the + # FanoutCache root. Without this, each shard would + # independently auto-generate its own ``.diskcache_pickle_key`` + # file -- making sharding non-deterministic across processes + # (FanoutCache._hash uses shard 0's key but storage may land in + # shard N which holds a different key) and emitting one + # warning per shard. + # CVE-2025-69872 (V5): skip eager key resolution for Disk + # subclasses that never use pickle (e.g. JSONDisk). Without + # this we would auto-generate ``.diskcache_pickle_key`` and + # emit an UnsafePickleWarning even though the cache will never + # exercise the pickle path. + pickle_key_arg = settings.pop('disk_pickle_key', _PICKLE_KEY_UNSET) + # CVE-2025-69872 (O1+O4 link): private inherited-key channel + # used by ``_copy_for_same_process`` (and by Cache children + # constructed under another FanoutCache). When set we skip + # env/file resolution, skip the warning (the upstream context + # already warned), and leave ``_pickle_key_user_arg`` UNSET so + # the FanoutCache stays pickleable in default mode. + inherited_key = settings.pop('_disk_pickle_key_inherited', None) + # V1: remember the user's original argument so __getstate__ + # only refuses pickling when the user explicitly provided a + # secret. Auto-generated / env-var keys can be re-resolved in + # the receiving process, so default-mode FanoutCache pickling + # remains supported. + self._pickle_key_user_arg = pickle_key_arg + # CVE-2025-69872 (Y3): pre-validate the user-explicit key with + # the public kwarg name in the error message. + if ( + pickle_key_arg is not _PICKLE_KEY_UNSET + and pickle_key_arg is not None + and pickle_key_arg is not False + ): + from .core import _coerce_pickle_key + + pickle_key_arg = _coerce_pickle_key( + pickle_key_arg, source='disk_pickle_key argument' + ) + if inherited_key is not None and pickle_key_arg is _PICKLE_KEY_UNSET: + # Use the inherited key directly; do NOT resolve from env + # or file and do NOT warn -- the parent context already + # handled both. + resolved_key = inherited_key + elif getattr(disk, '_uses_pickle', True): + if not op.isdir(directory): + os.makedirs(directory, 0o755, exist_ok=True) + resolved_key, source = _resolve_pickle_key_for_directory( + directory, pickle_key_arg + ) + _emit_pickle_key_warning(source, directory, stacklevel=3) + else: + # Forward the user's explicit choice unchanged so that any + # custom Disk subclass that mixes pickle + non-pickle paths + # still honours an explicit key. No file/env resolution. + resolved_key = pickle_key_arg + self._count = shards self._directory = directory self._disk = disk + shard_kwargs = dict(settings) + if pickle_key_arg is not _PICKLE_KEY_UNSET and pickle_key_arg is not None: + # User-explicit (bytes / False): forward as the public + # kwarg so the shard's ``_pickle_key_arg`` reflects the + # explicit choice (and __getstate__ refuses pickling). + shard_kwargs['disk_pickle_key'] = ( + resolved_key if resolved_key is not _PICKLE_KEY_UNSET + else pickle_key_arg + ) + elif ( + resolved_key is not _PICKLE_KEY_UNSET + and resolved_key is not False + and resolved_key is not None + ): + # Default / env / file: forward via the inherited channel + # so each shard uses the same bytes without claiming to be + # user-explicit (default-mode pickling remains allowed). + shard_kwargs['_disk_pickle_key_inherited'] = resolved_key self._shards = tuple( Cache( directory=op.join(directory, '%03d' % num), timeout=timeout, disk=disk, size_limit=size_limit, - **settings, + **shard_kwargs, ) for num in range(shards) ) + # Suppress per-shard warnings: the FanoutCache already emitted + # one above for the entire ensemble (when applicable). + for shard in self._shards: + shard._disk._pickle_key_warned = True self._hash = self._shards[0].disk.hash self._caches = {} self._deques = {} self._indexes = {} + # CVE-2025-69872 (O2): serialize child get-or-create so two + # concurrent ``cache('foo')`` / ``deque('foo')`` / ``index('foo')`` + # calls cannot construct two Cache instances on the same + # SQLite database (which would race on initial schema setup + # and hold separate connection pools). + self._children_lock = threading.Lock() + # CVE-2025-69872 (C1): track close state so cache()/deque()/ + # index() refuse to construct NEW children after close(). + # Existing child references obtained before close() remain + # usable per :meth:`Cache.close` semantics (per-thread + # connection that lazily reopens). + self._closed = False @property def directory(self): @@ -514,11 +621,32 @@ def volume(self): def close(self): """Close database connection.""" + # CVE-2025-69872 (O3): close cached child Cache/Deque/Index + # instances BEFORE the shards so their SQLite connections + # release file handles cleanly (otherwise long-lived + # references via ``fc.cache('foo')`` keep their connections + # open after ``fc.close()``, leaking handles and quietly + # accepting writes against a "closed" FanoutCache). + with self._children_lock: + # CVE-2025-69872 (C1): mark closed under the lock so any + # concurrent cache()/deque()/index() either completes + # before us (returning a still-valid child we then close) + # or sees ``_closed`` True and refuses cleanly. + self._closed = True + for child in list(self._caches.values()): + with cl.suppress(Exception): + child.close() + for child in list(self._deques.values()): + with cl.suppress(Exception): + child._cache.close() + for child in list(self._indexes.values()): + with cl.suppress(Exception): + child._cache.close() + self._caches.clear() + self._deques.clear() + self._indexes.clear() for shard in self._shards: shard.close() - self._caches.clear() - self._deques.clear() - self._indexes.clear() def __enter__(self): return self @@ -527,6 +655,22 @@ def __exit__(self, *exception): self.close() def __getstate__(self): + # CVE-2025-69872 (V1): mirror :meth:`Cache.__getstate__`'s + # protection. Only refuse when the *user* supplied an + # explicit secret (or False) -- auto-generated / env-var keys + # are re-resolved by the receiving process, so default-mode + # pickling still works. + arg = self._pickle_key_user_arg + if arg is not _PICKLE_KEY_UNSET and arg is not None: + raise TypeError( + 'diskcache: FanoutCache instances configured with an ' + 'explicit disk_pickle_key (or disk_pickle_key=False) ' + 'cannot be pickled because the secret is not placed ' + 'in pickle state. Pickle the cache directory path ' + 'instead and reconstruct FanoutCache(directory, ' + 'disk_pickle_key=...) in the receiving process. ' + '(CVE-2025-69872)' + ) return (self._directory, self._count, self.timeout, type(self.disk)) def __setstate__(self, state): @@ -597,20 +741,33 @@ def cache(self, name, timeout=60, disk=None, **settings): :return: Cache with given name """ - _caches = self._caches - - try: - return _caches[name] - except KeyError: + # CVE-2025-69872 (O2): double-checked-locking get-or-create. + with self._children_lock: + if self._closed: + raise RuntimeError( + 'diskcache: FanoutCache is closed; cannot create ' + 'new children. (Existing child references obtained ' + 'before close() remain usable per Cache.close() ' + 'semantics.)' + ) + existing = self._caches.get(name) + if existing is not None: + return existing parts = name.split('/') directory = op.join(self._directory, 'cache', *parts) + # CVE-2025-69872 V16/O4: forward the FanoutCache's resolved + # pickle key to the child cache so it doesn't auto- + # generate its own (different) ``.diskcache_pickle_key`` + # under the child directory. Caller-supplied settings + # win on conflict. + self._inject_disk_pickle_key(settings) temp = Cache( directory=directory, timeout=timeout, disk=self._disk if disk is None else Disk, **settings, ) - _caches[name] = temp + self._caches[name] = temp return temp def deque(self, name, maxlen=None): @@ -631,20 +788,27 @@ def deque(self, name, maxlen=None): :return: Deque with given name """ - _deques = self._deques - - try: - return _deques[name] - except KeyError: + with self._children_lock: + if self._closed: + raise RuntimeError( + 'diskcache: FanoutCache is closed; cannot create ' + 'new children.' + ) + existing = self._deques.get(name) + if existing is not None: + return existing parts = name.split('/') directory = op.join(self._directory, 'deque', *parts) + child_kwargs = {} + self._inject_disk_pickle_key(child_kwargs) cache = Cache( directory=directory, disk=self._disk, eviction_policy='none', + **child_kwargs, ) deque = Deque.fromcache(cache, maxlen=maxlen) - _deques[name] = deque + self._deques[name] = deque return deque def index(self, name): @@ -667,21 +831,101 @@ def index(self, name): :return: Index with given name """ - _indexes = self._indexes - - try: - return _indexes[name] - except KeyError: + with self._children_lock: + if self._closed: + raise RuntimeError( + 'diskcache: FanoutCache is closed; cannot create ' + 'new children.' + ) + existing = self._indexes.get(name) + if existing is not None: + return existing parts = name.split('/') directory = op.join(self._directory, 'index', *parts) + child_kwargs = {} + self._inject_disk_pickle_key(child_kwargs) cache = Cache( directory=directory, disk=self._disk, eviction_policy='none', + **child_kwargs, ) index = Index.fromcache(cache) - _indexes[name] = index + self._indexes[name] = index return index + def _inject_disk_pickle_key(self, kwargs): + """CVE-2025-69872 V16/O4 helper: forward the FanoutCache's + resolved pickle key to a child Cache constructor's kwargs so + the child does not run an independent default-fallback + resolution (which would create a separate + ``.diskcache_pickle_key`` file under the child directory and, + critically, would use a *different* HMAC key than the parent + and its sibling shards). + + Default-mode keys are forwarded via ``_disk_pickle_key_inherited`` + so the child's ``_pickle_key_arg`` stays UNSET -- otherwise + ``Cache.__getstate__`` would refuse to pickle a default-mode + child (the resolved key isn't a user-supplied secret, just an + env/file value the child can't decide for itself). + """ + if ( + 'disk_pickle_key' in kwargs + or '_disk_pickle_key_inherited' in kwargs + ): + return # caller-supplied wins + # User-explicit (bytes / False): forward as the public kwarg + # so the child's __getstate__ refuses pickling, matching the + # parent's posture. + arg = self._pickle_key_user_arg + if arg is not _PICKLE_KEY_UNSET and arg is not None: + kwargs['disk_pickle_key'] = arg + return + # Default mode: forward the already-resolved bytes via the + # inherited channel so the child remains pickleable but uses + # the same key as the parent and its sibling shards. + if self._shards: + shard_disk = self._shards[0]._disk + shard_arg = shard_disk._pickle_key_arg + if shard_arg is not _PICKLE_KEY_UNSET and shard_arg is not None: + kwargs['disk_pickle_key'] = shard_arg + return + resolved = getattr(shard_disk, '_pickle_key_resolved', None) + if resolved is not None and resolved is not False: + kwargs['_disk_pickle_key_inherited'] = resolved + + def __copy__(self): + return self._copy_for_same_process() + + def __deepcopy__(self, memo): + return self._copy_for_same_process() + + def _copy_for_same_process(self): + # CVE-2025-69872 V19/O1: same-process copy preserves the + # user's explicit pickle_key (or False) so a copied + # FanoutCache stays in the same security mode as the + # original. Default-mode FanoutCaches forward the already- + # resolved key via the inherited channel so an intervening + # change to ``DISKCACHE_PICKLE_KEY`` or + # ``.diskcache_pickle_key`` does NOT produce a copy that + # uses a different key (which would silently fail HMAC + # verification on shared data). + kwargs = {} + arg = self._pickle_key_user_arg + if arg is not _PICKLE_KEY_UNSET: + kwargs['disk_pickle_key'] = arg + elif self._shards: + shard_disk = self._shards[0]._disk + resolved = getattr(shard_disk, '_pickle_key_resolved', None) + if resolved is not None and resolved is not False: + kwargs['_disk_pickle_key_inherited'] = resolved + return self.__class__( + self._directory, + shards=self._count, + timeout=self.timeout, + disk=self._disk, + **kwargs, + ) + FanoutCache.memoize = Cache.memoize # type: ignore diff --git a/diskcache/persistent.py b/diskcache/persistent.py index 522bb74..03b6696 100644 --- a/diskcache/persistent.py +++ b/diskcache/persistent.py @@ -13,7 +13,31 @@ from contextlib import contextmanager from shutil import rmtree -from .core import ENOVAL, Cache +from .core import ENOVAL, Cache, _PICKLE_KEY_UNSET + + +def _check_wrapped_cache_pickleable(cache, container_name): + """CVE-2025-69872 (V2): refuse to pickle persistent containers + backed by a Cache that has an explicit ``disk_pickle_key``. + + Without this, ``pickle.dumps(deque)`` / ``pickle.dumps(index)`` + silently succeeds even though ``pickle.dumps(deque._cache)`` would + raise -- because :meth:`Deque.__getstate__` and + :meth:`Index.__getstate__` only serialize the directory, never + delegating to the wrapped Cache's protection. + """ + disk = cache._disk + arg = getattr(disk, '_pickle_key_arg', _PICKLE_KEY_UNSET) + if arg is not _PICKLE_KEY_UNSET and arg is not None: + raise TypeError( + 'diskcache: %s instances backed by a Cache configured with ' + 'an explicit disk_pickle_key (or disk_pickle_key=False) ' + 'cannot be pickled because the secret is not placed in ' + 'pickle state. Pickle the directory path instead and ' + 'reconstruct %s(directory=..., ...) with the same ' + 'disk_pickle_key in the receiving process. (CVE-2025-69872)' + % (container_name, container_name) + ) def _make_compare(seq_op, doc): @@ -319,12 +343,39 @@ def __reversed__(self): pass def __getstate__(self): + _check_wrapped_cache_pickleable(self._cache, 'Deque') return self.directory, self.maxlen def __setstate__(self, state): directory, maxlen = state self.__init__(directory=directory, maxlen=maxlen) + def __copy__(self): + return self._copy_for_same_process() + + def __deepcopy__(self, memo): + return self._copy_for_same_process() + + def _copy_for_same_process(self): + # CVE-2025-69872 V19/O1: same-process Deque copy preserves the + # underlying Cache's explicit pickle_key so the copy stays + # readable. Default-mode caches forward the already-resolved + # key via the private inherited channel so env / file changes + # between calls don't produce a copy that disagrees with the + # original on the HMAC key. + cache = self._cache + arg = getattr(cache._disk, '_pickle_key_arg', _PICKLE_KEY_UNSET) + if arg is not _PICKLE_KEY_UNSET and arg is not None: + new_cache = Cache(self.directory, disk_pickle_key=arg) + return type(self).fromcache(new_cache, maxlen=self.maxlen) + resolved = getattr(cache._disk, '_pickle_key_resolved', None) + if resolved is not None and resolved is not False: + new_cache = Cache( + self.directory, _disk_pickle_key_inherited=resolved + ) + return type(self).fromcache(new_cache, maxlen=self.maxlen) + return type(self)(directory=self.directory, maxlen=self.maxlen) + def append(self, value): """Add `value` to back of deque. @@ -1090,11 +1141,37 @@ def items(self): __hash__ = None # type: ignore def __getstate__(self): + _check_wrapped_cache_pickleable(self._cache, 'Index') return self.directory def __setstate__(self, state): self.__init__(state) + def __copy__(self): + return self._copy_for_same_process() + + def __deepcopy__(self, memo): + return self._copy_for_same_process() + + def _copy_for_same_process(self): + # CVE-2025-69872 V19/O1: same-process Index copy preserves the + # underlying Cache's explicit pickle_key. Default-mode caches + # forward the already-resolved key via the private inherited + # channel so env / file changes between calls don't produce a + # copy that disagrees with the original on the HMAC key. + cache = self._cache + arg = getattr(cache._disk, '_pickle_key_arg', _PICKLE_KEY_UNSET) + if arg is not _PICKLE_KEY_UNSET and arg is not None: + new_cache = Cache(self.directory, disk_pickle_key=arg) + return type(self).fromcache(new_cache) + resolved = getattr(cache._disk, '_pickle_key_resolved', None) + if resolved is not None and resolved is not False: + new_cache = Cache( + self.directory, _disk_pickle_key_inherited=resolved + ) + return type(self).fromcache(new_cache) + return type(self)(self.directory) + def __eq__(self, other): """index.__eq__(other) <==> index == other diff --git a/tests/test_security.py b/tests/test_security.py new file mode 100644 index 0000000..885ced8 --- /dev/null +++ b/tests/test_security.py @@ -0,0 +1,2098 @@ +"""Tests for the CVE-2025-69872 / GHSA-w8v5-vhqr-4h9v mitigation. + +These tests exercise the HMAC envelope wrapping every pickle blob written +by :class:`diskcache.Disk`. They live in their own module so the rest of +the suite continues to run with the default +:class:`UnsafePickleWarning` filter, while these tests can opt to either +suppress or assert that the warning is emitted. +""" + +import hashlib +import hmac +import io +import multiprocessing +import os +import os.path as op +import pickle +import secrets +import shutil +import sqlite3 +import sys +import tempfile +import warnings +import errno + +import pytest + +import diskcache as dc +from diskcache.core import ( + PICKLE_HEADER_SIZE, + PICKLE_KEY_ENV, + PICKLE_KEY_FILENAME, + PICKLE_MAGIC, +) + + +pytestmark = pytest.mark.filterwarnings( + 'ignore', category=dc.EmptyDirWarning +) + + +@pytest.fixture +def tmp_cache_dir(): + path = tempfile.mkdtemp(prefix='diskcache-sec-') + try: + yield path + finally: + shutil.rmtree(path, ignore_errors=True) + + +@pytest.fixture +def clear_env(monkeypatch): + monkeypatch.delenv(PICKLE_KEY_ENV, raising=False) + + +# -- helpers ------------------------------------------------------------ + + +class _Pwn: + """Pickle payload that would execute code if naively unpickled.""" + + def __reduce__(self): + # Using ``print`` rather than ``os.system`` keeps the test + # self-contained and side-effect free if the protection ever + # regresses. + return (print, ('PWNED!',)) + + +def _force_evil_inline_pickle(directory, key): + """Replace the inline pickle blob for ``key`` with a malicious payload.""" + con = sqlite3.connect(op.join(directory, 'cache.db')) + try: + con.execute( + 'UPDATE Cache SET value = ?, mode = 4 WHERE key = ?', + (pickle.dumps(_Pwn()), key), + ) + con.commit() + finally: + con.close() + + +def _val_files(directory): + out = [] + for root, _, files in os.walk(directory): + for name in files: + if name.endswith('.val'): + out.append(op.join(root, name)) + return out + + +# -- round-trip & default behaviour ------------------------------------ + + +def test_default_roundtrip_emits_warning_and_creates_key_file( + tmp_cache_dir, clear_env +): + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.Cache(tmp_cache_dir) as cache: + cache[('tup', 'le')] = {'nested': [1, 2, 3]} + assert cache[('tup', 'le')] == {'nested': [1, 2, 3]} + cache.check() + + msgs = [w for w in caught if isinstance(w.message, dc.UnsafePickleWarning)] + assert msgs, 'expected UnsafePickleWarning for in-dir auto key' + assert PICKLE_KEY_FILENAME in str(msgs[0].message) + assert op.isfile(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)) + + +def test_envelope_header_is_present_in_db(tmp_cache_dir, clear_env): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + cache['k'] = {'value': 'object'} + + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + ((blob,),) = con.execute( + 'SELECT value FROM Cache WHERE key = ?', ('k',) + ).fetchall() + finally: + con.close() + + raw = bytes(blob) + assert raw.startswith(PICKLE_MAGIC) + assert len(raw) > PICKLE_HEADER_SIZE + + +# -- explicit key ------------------------------------------------------ + + +def test_explicit_key_no_keyfile_no_warning(tmp_cache_dir, clear_env): + key = secrets.token_bytes(32) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['x'] = [1, 2, 3] + assert cache['x'] == [1, 2, 3] + + assert not any( + isinstance(w.message, dc.UnsafePickleWarning) for w in caught + ) + assert not op.exists(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)) + + +def test_explicit_key_hex_string(tmp_cache_dir, clear_env): + key = secrets.token_bytes(32).hex() + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['x'] = ['hex-keyed'] + assert cache['x'] == ['hex-keyed'] + + +def test_explicit_key_too_short_raises(tmp_cache_dir, clear_env): + with pytest.raises(ValueError, match='at least'): + dc.Cache(tmp_cache_dir, disk_pickle_key=b'shortkey') + + +def test_explicit_key_wrong_type_raises(tmp_cache_dir, clear_env): + with pytest.raises(TypeError): + dc.Cache(tmp_cache_dir, disk_pickle_key=12345) + + +def test_explicit_key_not_persisted_in_settings(tmp_cache_dir, clear_env): + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['x'] = {'persist?': False} + + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + rows = con.execute( + 'SELECT key, value FROM Settings WHERE key LIKE ?', + ('%pickle_key%',), + ).fetchall() + finally: + con.close() + + assert rows == [], ( + 'disk_pickle_key must NOT be persisted in the Settings table' + ) + + +def test_mismatched_key_fails_to_decrypt(tmp_cache_dir, clear_env): + key1 = secrets.token_bytes(32) + key2 = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key1) as cache: + cache['k'] = {'secret': True} + with dc.Cache(tmp_cache_dir, disk_pickle_key=key2) as cache: + with pytest.raises(pickle.UnpicklingError): + cache['k'] + + +# -- env var ----------------------------------------------------------- + + +def test_env_var_used_when_no_explicit_key(tmp_cache_dir, monkeypatch): + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.Cache(tmp_cache_dir) as cache: + cache['k'] = {'env': True} + assert cache['k'] == {'env': True} + + assert not any( + isinstance(w.message, dc.UnsafePickleWarning) for w in caught + ) + assert not op.exists(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)) + + +def test_env_var_invalid_hex_raises(tmp_cache_dir, monkeypatch): + monkeypatch.setenv(PICKLE_KEY_ENV, 'not-hex-at-all') + with dc.Cache(tmp_cache_dir) as cache: + with pytest.raises(ValueError, match='hex'): + cache['k'] = {'x': 1} + + +# -- tamper detection -------------------------------------------------- + + +def test_tampered_inline_value_raises(tmp_cache_dir, clear_env): + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['k'] = {'foo': 'bar'} + + _force_evil_inline_pickle(tmp_cache_dir, 'k') + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(pickle.UnpicklingError): + cache['k'] + + +def test_bitflip_inline_value_raises(tmp_cache_dir, clear_env): + """Tampering that keeps the envelope header but corrupts the payload + must fail HMAC verification (not just the missing-envelope check).""" + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['k'] = {'foo': 'bar'} + + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + ((blob,),) = con.execute( + 'SELECT value FROM Cache WHERE key = ?', ('k',) + ).fetchall() + raw = bytearray(bytes(blob)) + # Flip a bit deep inside the payload (past the 36-byte header). + raw[-1] ^= 0x01 + con.execute( + 'UPDATE Cache SET value = ? WHERE key = ?', (bytes(raw), 'k') + ) + con.commit() + finally: + con.close() + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(pickle.UnpicklingError, match='HMAC'): + cache['k'] + + +def test_tampered_val_file_raises(tmp_cache_dir, clear_env): + key = secrets.token_bytes(32) + big_value = {'data': 'x' * (40 * 1024)} + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache( + tmp_cache_dir, disk_pickle_key=key, disk_min_file_size=1024 + ) as cache: + cache['big'] = big_value + + files = _val_files(tmp_cache_dir) + assert len(files) == 1, files + with open(files[0], 'wb') as writer: + writer.write(pickle.dumps(_Pwn())) + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(pickle.UnpicklingError): + cache['big'] + + +def test_tampered_pickled_key_via_iteration_raises(tmp_cache_dir, clear_env): + """Complex (pickled) keys are loaded during iteration / peekitem, so + tampering them must surface as UnpicklingError on those paths.""" + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache[('complex', 'key')] = 'value' + + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + rows = con.execute( + 'SELECT rowid, key FROM Cache WHERE raw = 0' + ).fetchall() + assert rows, 'expected at least one pickled key row' + rowid, _ = rows[0] + evil = pickle.dumps(_Pwn()) + con.execute( + 'UPDATE Cache SET key = ? WHERE rowid = ?', + (sqlite3.Binary(evil), rowid), + ) + con.commit() + finally: + con.close() + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(pickle.UnpicklingError): + list(cache) + + +# -- legacy mode ------------------------------------------------------- + + +def test_legacy_mode_emits_warning_and_works(tmp_cache_dir, clear_env): + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.Cache(tmp_cache_dir, disk_pickle_key=False) as cache: + cache['z'] = {'legacy': True} + assert cache['z'] == {'legacy': True} + + msgs = [w for w in caught if isinstance(w.message, dc.UnsafePickleWarning)] + assert msgs, 'pickle_key=False must emit UnsafePickleWarning' + assert 'disabled' in str(msgs[0].message).lower() + + +def test_secure_mode_rejects_legacy_data(tmp_cache_dir, clear_env): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=False) as cache: + cache['k'] = {'legacy': True} + with dc.Cache( + tmp_cache_dir, disk_pickle_key=secrets.token_bytes(32) + ) as cache: + with pytest.raises(pickle.UnpicklingError, match='envelope'): + cache['k'] + + +def test_legacy_mode_can_read_secure_data(tmp_cache_dir, clear_env): + """Legacy mode strips the envelope so it can read forward as well.""" + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['k'] = {'enveloped': True} + with dc.Cache(tmp_cache_dir, disk_pickle_key=False) as cache: + assert cache['k'] == {'enveloped': True} + + +# -- JSONDisk unaffected ------------------------------------------------ + + +def test_jsondisk_does_not_create_keyfile(tmp_cache_dir, clear_env): + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.Cache(tmp_cache_dir, disk=dc.JSONDisk) as cache: + cache['a'] = [1, 2, 3] + assert cache['a'] == [1, 2, 3] + + assert not any( + isinstance(w.message, dc.UnsafePickleWarning) for w in caught + ) + assert not op.exists(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)) + + +# -- Cache.check whitelist --------------------------------------------- + + +def test_check_does_not_warn_about_keyfile(tmp_cache_dir, clear_env): + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.Cache(tmp_cache_dir) as cache: + cache['k'] = {'foo': 'bar'} + warns = cache.check() + assert op.isfile(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)) + + unknown = [w for w in warns if isinstance(w.message, dc.UnknownFileWarning)] + assert unknown == [], ( + 'cache.check() must whitelist .diskcache_pickle_key, got %r' % unknown + ) + + unknown_top = [ + w for w in caught if isinstance(w.message, dc.UnknownFileWarning) + ] + assert unknown_top == [] + + +# -- primitive keys/values bypass pickle entirely ---------------------- + + +def test_primitive_only_does_not_create_keyfile(tmp_cache_dir, clear_env): + """Lazy resolution means a cache used only with primitive keys and + values never triggers HMAC key creation.""" + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.Cache(tmp_cache_dir) as cache: + cache['plain-str-key'] = 'plain-str-value' + cache[42] = 3.14 + cache[b'bytes-key'] = b'bytes-value' + assert cache['plain-str-key'] == 'plain-str-value' + + assert not any( + isinstance(w.message, dc.UnsafePickleWarning) for w in caught + ) + assert not op.exists(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)) + + +# -- Review-pass-2: path traversal (C2) -------------------------------- + + +def _put_evil_filename(directory, key, evil_filename, mode=2): + """Replace the row's filename to point outside the cache dir.""" + con = sqlite3.connect(op.join(directory, 'cache.db')) + try: + con.execute( + 'UPDATE Cache SET filename = ?, mode = ?, value = NULL ' + 'WHERE key = ?', + (evil_filename, mode, key), + ) + con.commit() + finally: + con.close() + + +def test_path_traversal_in_filename_raises(tmp_cache_dir, clear_env): + """An attacker who tampers cache.db's ``filename`` column to point + outside the cache directory must not be able to coerce DiskCache + into reading arbitrary files (CVE-2025-69872 adjacent).""" + secret_dir = tempfile.mkdtemp(prefix='diskcache-secret-') + try: + secret_path = op.join(secret_dir, 'secret.txt') + with open(secret_path, 'wb') as fh: + fh.write(b'TOP SECRET') + + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache( + tmp_cache_dir, + disk_pickle_key=key, + disk_min_file_size=1, + ) as cache: + cache['k'] = b'x' * 64 + + rel = op.relpath(secret_path, tmp_cache_dir) + _put_evil_filename(tmp_cache_dir, 'k', rel, mode=2) + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(ValueError, match='escapes cache'): + cache['k'] + finally: + shutil.rmtree(secret_dir, ignore_errors=True) + + +def test_path_traversal_blocked_for_text_mode(tmp_cache_dir, clear_env): + secret_dir = tempfile.mkdtemp(prefix='diskcache-secret-') + try: + secret_path = op.join(secret_dir, 'secret.txt') + with open(secret_path, 'w', encoding='utf-8') as fh: + fh.write('TOP SECRET') + + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache( + tmp_cache_dir, + disk_pickle_key=key, + disk_min_file_size=1, + ) as cache: + cache['k'] = 'x' * 64 + + rel = op.relpath(secret_path, tmp_cache_dir) + _put_evil_filename(tmp_cache_dir, 'k', rel, mode=3) + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(ValueError, match='escapes cache'): + cache['k'] + finally: + shutil.rmtree(secret_dir, ignore_errors=True) + + +def test_remove_refuses_traversal(tmp_cache_dir, clear_env): + """Disk.remove must refuse path-traversal filenames silently + (eviction must be tolerant) but still emit an UnsafePickleWarning + so operators see the tampering.""" + sentinel_dir = tempfile.mkdtemp(prefix='diskcache-sentinel-') + try: + sentinel = op.join(sentinel_dir, 'do-not-delete.txt') + with open(sentinel, 'wb') as fh: + fh.write(b'KEEP ME') + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + disk = cache._disk + + rel = op.relpath(sentinel, tmp_cache_dir) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + disk.remove(rel) + + assert op.exists(sentinel), 'sentinel file must not be deleted' + assert any( + isinstance(w.message, dc.UnsafePickleWarning) + and 'refusing to remove' in str(w.message) + for w in caught + ) + finally: + shutil.rmtree(sentinel_dir, ignore_errors=True) + + +# -- Review-pass-2: FanoutCache (C3) ----------------------------------- + + +def test_fanoutcache_propagates_explicit_key(tmp_cache_dir, clear_env): + key = secrets.token_bytes(32) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.FanoutCache( + tmp_cache_dir, shards=4, disk_pickle_key=key + ) as cache: + cache[('complex', 'key')] = {'value': [1, 2, 3]} + assert cache[('complex', 'key')] == {'value': [1, 2, 3]} + + assert not any( + isinstance(w.message, dc.UnsafePickleWarning) for w in caught + ), 'explicit key must not emit UnsafePickleWarning' + + # No per-shard key file: the explicit key was forwarded. + for num in range(4): + shard_dir = op.join(tmp_cache_dir, '%03d' % num) + assert not op.exists(op.join(shard_dir, PICKLE_KEY_FILENAME)) + + +def test_fanoutcache_default_emits_one_warning(tmp_cache_dir, clear_env): + """With the default fallback, FanoutCache must emit ONE warning, + not one-per-shard, and create a single key file at the FanoutCache + root (not per-shard).""" + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.FanoutCache(tmp_cache_dir, shards=4) as cache: + cache[('a', 'b')] = {'val': 1} + cache[('c', 'd')] = {'val': 2} + cache[('e', 'f')] = {'val': 3} + assert cache[('a', 'b')] == {'val': 1} + assert cache[('c', 'd')] == {'val': 2} + + pkey_warnings = [ + w for w in caught if isinstance(w.message, dc.UnsafePickleWarning) + ] + assert len(pkey_warnings) == 1, ( + 'expected exactly one UnsafePickleWarning per FanoutCache, got %d' + % len(pkey_warnings) + ) + + # Root-level key file exists; CVE-2025-69872 (L1) shards now ALSO + # mirror the same key file into their own directory so a process + # that opens a shard directly as ``Cache(shard_dir)`` can recover + # the same HMAC key via default resolution. All copies must + # equal the root key. + root_key_path = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + assert op.exists(root_key_path) + with open(root_key_path, 'rb') as fh: + root_key = fh.read() + for num in range(4): + shard_dir = op.join(tmp_cache_dir, '%03d' % num) + shard_key_path = op.join(shard_dir, PICKLE_KEY_FILENAME) + assert op.exists(shard_key_path), ( + 'L1: shard %d must mirror the root key file' % num + ) + with open(shard_key_path, 'rb') as fh: + assert fh.read() == root_key, ( + 'L1: shard %d key file must equal root key' % num + ) + + +def test_fanoutcache_two_instances_share_default_key( + tmp_cache_dir, clear_env +): + """Two FanoutCache instances on the same directory must produce + consistent sharding and read each other's writes.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache(tmp_cache_dir, shards=4) as a: + a[('shared', 'tuple', 'key')] = {'from': 'a'} + with dc.FanoutCache(tmp_cache_dir, shards=4) as b: + assert b[('shared', 'tuple', 'key')] == {'from': 'a'} + + +# -- Review-pass-2: Cache.check root-only whitelist (H1) --------------- + + +def test_check_does_not_whitelist_subdirectory_keyfile( + tmp_cache_dir, clear_env +): + """Attacker plants ``.diskcache_pickle_key`` in a subdirectory; the + whitelist must not preserve it (basename-only would have). + + We assert the observable side effect: ``check(fix=True)`` deletes + the subdirectory decoy. (Pytest's warning capture interacts oddly + with ``check()``'s own ``warnings.catch_warnings(record=True)`` + block, so we don't rely on the returned warning list.) + """ + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_min_file_size=0) as cache: + cache['k'] = 'x' * 200 # forces .val file in subdirectory + + subdir = None + for entry in os.listdir(tmp_cache_dir): + full = op.join(tmp_cache_dir, entry) + if op.isdir(full): + subdir = full + break + assert subdir is not None, ( + 'expected subdir; got %r' % os.listdir(tmp_cache_dir) + ) + decoy = op.join(subdir, PICKLE_KEY_FILENAME) + with open(decoy, 'wb') as fh: + fh.write(b'attacker-controlled-bytes') + + assert op.exists(decoy) + cache.check(fix=True) + assert not op.exists(decoy), ( + 'check(fix=True) must delete the subdirectory decoy ' + '(basename-only whitelist would have preserved it)' + ) + + # Sanity: legitimate root-level key file (not present here + # since we used disk_min_file_size=0 and didn't pickle) is + # not accidentally flagged either; create one and re-check. + root_key = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + with open(root_key, 'wb') as fh: + fh.write(b'X' * 32) + cache.check(fix=True) + assert op.exists(root_key), ( + 'root-level key file must remain whitelisted' + ) + + +# -- Review-pass-2: Cache pickling (H2) -------------------------------- + + +def test_cache_pickling_with_explicit_key_raises(tmp_cache_dir, clear_env): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + cache = dc.Cache(tmp_cache_dir, disk_pickle_key=secrets.token_bytes(32)) + try: + with pytest.raises(TypeError, match='disk_pickle_key'): + pickle.dumps(cache) + finally: + cache.close() + + +def test_cache_pickling_with_legacy_mode_raises(tmp_cache_dir, clear_env): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + cache = dc.Cache(tmp_cache_dir, disk_pickle_key=False) + try: + with pytest.raises(TypeError, match='disk_pickle_key'): + pickle.dumps(cache) + finally: + cache.close() + + +def test_cache_pickling_with_default_succeeds(tmp_cache_dir, clear_env): + """No explicit key + default fallback: pickling must work because + the receiving process can re-resolve from env or file.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + cache['k'] = {'value': 1} + blob = pickle.dumps(cache) + other = pickle.loads(blob) + try: + assert other['k'] == {'value': 1} + finally: + other.close() + + +# -- Review-pass-2: defensive Settings pop (M3) ------------------------ + + +def test_disk_pickle_key_in_settings_does_not_leak(tmp_cache_dir, clear_env): + """A stale ``disk_pickle_key`` row in Settings (e.g. from an older + patched build, manual edit, or attacker) must NOT influence the + actual HMAC key used by the Disk on subsequent opens.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + cache['warm'] = 1 + + # Manually inject a hostile row that, if naively trusted, would + # become the HMAC key string. + poisoned = 'attacker-supplied-key-' + 'A' * 16 + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + con.execute( + 'INSERT OR REPLACE INTO Settings VALUES (?, ?)', + ('disk_pickle_key', poisoned), + ) + con.commit() + finally: + con.close() + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + cache['after'] = {'value': 1} + assert cache['after'] == {'value': 1} + disk = cache._disk + + # The Disk's pickle_key_arg must remain UNSET (defensive pop did + # its job); the resolved key must not equal the poisoned bytes. + from diskcache.core import _PICKLE_KEY_UNSET as UNSET + assert disk._pickle_key_arg is UNSET, ( + 'pickle_key_arg leaked from Settings: %r' % disk._pickle_key_arg + ) + if disk._pickle_key_resolved not in (None, False): + assert disk._pickle_key_resolved != poisoned.encode('utf-8'), ( + 'resolved key matches poisoned Settings row' + ) + + +# -- Review-pass-2: edge-case envelope parsing (M9) -------------------- + + +def test_short_envelope_header_raises(tmp_cache_dir, clear_env): + """Bytes that start with ``DCv1`` but are shorter than the full + 36-byte header must NOT be treated as enveloped.""" + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['k'] = 1 + + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + # Magic + only 20 bytes (truncated MAC). + con.execute( + 'UPDATE Cache SET value = ?, mode = 4 WHERE key = ?', + (PICKLE_MAGIC + b'\x00' * 20, 'k'), + ) + con.commit() + finally: + con.close() + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(pickle.UnpicklingError): + cache['k'] + + +def test_zero_byte_value_raises(tmp_cache_dir, clear_env): + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['k'] = 1 + + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + con.execute( + 'UPDATE Cache SET value = ?, mode = 4 WHERE key = ?', + (b'', 'k'), + ) + con.commit() + finally: + con.close() + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(pickle.UnpicklingError, match='envelope'): + cache['k'] + + +def test_valid_hmac_invalid_pickle_raises(tmp_cache_dir, clear_env): + """An attacker who somehow obtains the key (or a corrupted-but- + HMAC-valid blob) and writes a non-pickle payload should still get a + pickle error -- not silent garbage.""" + key = secrets.token_bytes(32) + payload = b'\xff' * 50 # not valid pickle + mac = hmac.new(key, payload, hashlib.sha256).digest() + envelope = PICKLE_MAGIC + mac + payload + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['k'] = 1 + + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + con.execute( + 'UPDATE Cache SET value = ?, mode = 4 WHERE key = ?', + (envelope, 'k'), + ) + con.commit() + finally: + con.close() + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + # CVE-2025-69872 V22: tighten -- previous version accepted + # KeyError, masking a regression where a corrupt HMAC-valid + # entry could be treated as a cache miss. pickle/EOFError + # are the only legitimate outcomes here. + with pytest.raises((pickle.UnpicklingError, EOFError)): + cache['k'] + + +def test_env_var_short_hex_raises(tmp_cache_dir, monkeypatch): + monkeypatch.setenv(PICKLE_KEY_ENV, 'deadbeef') # 4 bytes + with dc.Cache(tmp_cache_dir) as cache: + with pytest.raises(ValueError, match='at least'): + cache['x'] = {'complex': 'value'} + + +def test_env_var_empty_raises(tmp_cache_dir, monkeypatch, clear_env): + """Empty string env var must NOT be silently treated as 'unset'. + + monkeypatch the env var to '' explicitly; the resolution code uses + ``if env_value:`` so empty falls through to the file fallback, + which is the intended behavior. Verify that path works (no crash). + """ + monkeypatch.setenv(PICKLE_KEY_ENV, '') + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + cache['k'] = {'val': 1} + assert cache['k'] == {'val': 1} + # File fallback created the key file because env was effectively unset. + assert op.exists(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)) + + +# -- Review-pass-2: multi-process race on key file (H3) ---------------- + + +def _race_worker(directory, idx, queue): # pragma: no cover - subprocess + import warnings as _w + import diskcache as _dc + + _w.simplefilter('ignore', _dc.UnsafePickleWarning) + try: + with _dc.Cache(directory) as cache: + cache[('worker', idx)] = {'pid': os.getpid(), 'idx': idx} + queue.put(('ok', idx, cache[('worker', idx)])) + except Exception as exc: # pylint: disable=broad-except + queue.put(('err', idx, repr(exc))) + + +@pytest.mark.skipif( + sys.platform == 'win32' and sys.version_info < (3, 8), + reason='multiprocessing fork start method is unreliable on this combo', +) +def test_concurrent_key_file_creation_no_crash(tmp_cache_dir, clear_env): + """Multiple processes creating the same fresh cache must all + succeed -- the original code crashed losers with a false-positive + 'too short' error before the writer's os.write completed.""" + ctx = multiprocessing.get_context('spawn') + queue = ctx.Queue() + procs = [ + ctx.Process(target=_race_worker, args=(tmp_cache_dir, i, queue)) + for i in range(4) + ] + for p in procs: + p.start() + for p in procs: + p.join(timeout=30) + assert p.exitcode == 0, ( + 'worker exited %s; stdout/stderr should not contain ' + 'RuntimeError("too short")' % p.exitcode + ) + + results = [] + while not queue.empty(): + results.append(queue.get_nowait()) + assert len(results) == 4 + for status, idx, payload in results: + assert status == 'ok', 'worker %d failed: %r' % (idx, payload) + + +# -- Review-pass-2: sanity for trimmed TypeError (M4) ------------------ + + +def test_typeerror_message_lists_only_real_types(tmp_cache_dir, clear_env): + with pytest.raises(TypeError) as exc_info: + dc.Cache(tmp_cache_dir, disk_pickle_key=12345) + msg = str(exc_info.value) + assert 'bytes' in msg and 'hex str' in msg + assert 'False' not in msg and 'None' not in msg, ( + 'TypeError must not advertise False/None: caller filters them. ' + 'Got: %r' % msg + ) + + +# -- Pass-3 round of fixes --------------------------------------------- + + +def test_v3_keyboard_interrupt_during_write_cleans_up( + tmp_cache_dir, clear_env, monkeypatch +): + """V3: BaseException (KeyboardInterrupt / SystemExit) during the + key-file write must not leave an empty file behind that + permanently blocks every subsequent process.""" + from diskcache import core as dc_core + + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + orig_write = os.write + + def kbd_int_write(fd, data): # pragma: no cover - exercised below + raise KeyboardInterrupt('simulated Ctrl+C') + + monkeypatch.setattr(os, 'write', kbd_int_write) + with pytest.raises(KeyboardInterrupt): + dc_core._read_or_create_pickle_key_file(keyfile) + monkeypatch.setattr(os, 'write', orig_write) + + assert not op.exists(keyfile), ( + 'Empty key file must be unlinked after KeyboardInterrupt; ' + 'leaving it behind permanently blocks other processes with ' + 'a "too short" RuntimeError.' + ) + + # And a fresh attempt now succeeds without manual cleanup: + key, created = dc_core._read_or_create_pickle_key_file(keyfile) + assert created and len(key) >= 16 + + +def test_v3_oserror_during_write_cleans_up( + tmp_cache_dir, clear_env, monkeypatch +): + """V3: A regular OSError (disk full) during write must also remove + the half-baked file.""" + from diskcache import core as dc_core + + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + + def disk_full_write(fd, data): + raise OSError(28, 'No space left on device') + + monkeypatch.setattr(os, 'write', disk_full_write) + with pytest.raises(OSError): + dc_core._read_or_create_pickle_key_file(keyfile) + assert not op.exists(keyfile) + + +def test_v9_unc_or_invalid_filename_raises_valueerror( + tmp_cache_dir, clear_env +): + """V9: ``_safe_filename_path`` must translate platform OSError + (e.g. Windows UNC path resolution failure) into the documented + ValueError so callers don't have to also catch OSError.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + disk = cache._disk + if sys.platform == 'win32': + with pytest.raises(ValueError): + disk._safe_filename_path( + '//nonexistent-server-xyz9876/share/file.txt' + ) + else: + # On POSIX, realpath of an absolute path that escapes the dir + # also surfaces as ValueError (containment check rather than + # OSError, but same exception class as documented). + with pytest.raises(ValueError): + disk._safe_filename_path('/etc/passwd') + + +def test_v6_realpath_cached_after_first_call(tmp_cache_dir, clear_env): + """V6: After the first call, ``_directory_realpath`` is populated + so subsequent fetches do not re-resolve the cache directory.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache( + tmp_cache_dir, + disk_pickle_key=secrets.token_bytes(32), + disk_min_file_size=1, + ) as cache: + assert cache._disk._directory_realpath is None + cache['k'] = b'x' * 64 # forces a .val file + _ = cache['k'] # triggers _safe_filename_path + cached = cache._disk._directory_realpath + assert cached is not None + assert cached == op.realpath(tmp_cache_dir) + + +def test_v13_filename_none_with_file_mode_raises_valueerror( + tmp_cache_dir, clear_env +): + """V13: a tampered row with mode=BINARY/TEXT/PICKLE and + filename=NULL must raise a clear ValueError -- not the previous + ``UnboundLocalError: local variable 'full_path' referenced before + assignment``.""" + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['k'] = b'x' * 16 + + con = sqlite3.connect(op.join(tmp_cache_dir, 'cache.db')) + try: + con.execute( + 'UPDATE Cache SET filename = NULL, mode = 2, value = NULL ' + 'WHERE key = ?', ('k',) + ) + con.commit() + finally: + con.close() + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + with pytest.raises(ValueError, match='mode'): + cache['k'] + + +def test_v5_jsondisk_fanout_no_keyfile_no_warning( + tmp_cache_dir, clear_env +): + """V5: ``FanoutCache(disk=JSONDisk)`` must NOT auto-generate the + pickle HMAC key file or emit ``UnsafePickleWarning``, since + JSONDisk never exercises the pickle path.""" + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.FanoutCache( + tmp_cache_dir, shards=4, disk=dc.JSONDisk + ) as cache: + cache[('a', 'b')] = {'v': 1} + assert cache[('a', 'b')] == {'v': 1} + + pkey_warnings = [ + w for w in caught if isinstance(w.message, dc.UnsafePickleWarning) + ] + assert pkey_warnings == [], ( + 'JSONDisk under FanoutCache must not emit UnsafePickleWarning, ' + 'got: %r' % [str(w.message) for w in pkey_warnings] + ) + assert not op.exists(op.join(tmp_cache_dir, PICKLE_KEY_FILENAME)), ( + 'JSONDisk under FanoutCache must not auto-generate the pickle ' + 'HMAC key file at the root.' + ) + for num in range(4): + shard_keyfile = op.join( + tmp_cache_dir, '%03d' % num, PICKLE_KEY_FILENAME + ) + assert not op.exists(shard_keyfile) + + +def test_v5_disk_uses_pickle_class_attr(): + """V5: the ``_uses_pickle`` capability flag is True for the base + ``Disk`` and False for ``JSONDisk``.""" + assert dc.Disk._uses_pickle is True + assert dc.JSONDisk._uses_pickle is False + + +def test_v1_fanoutcache_pickling_with_explicit_key_raises( + tmp_cache_dir, clear_env +): + """V1: FanoutCache.__getstate__ must mirror Cache.__getstate__'s + refusal when an explicit pickle key was provided (else pickling + silently strips the secret and corrupts reads in the receiving + process).""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + fc = dc.FanoutCache( + tmp_cache_dir, + shards=2, + disk_pickle_key=secrets.token_bytes(32), + ) + try: + with pytest.raises(TypeError, match='disk_pickle_key'): + pickle.dumps(fc) + finally: + fc.close() + + +def test_v1_fanoutcache_pickling_with_legacy_mode_raises( + tmp_cache_dir, clear_env +): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + fc = dc.FanoutCache( + tmp_cache_dir, shards=2, disk_pickle_key=False + ) + try: + with pytest.raises(TypeError, match='disk_pickle_key'): + pickle.dumps(fc) + finally: + fc.close() + + +def test_v1_fanoutcache_pickling_with_default_succeeds( + tmp_cache_dir, clear_env +): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache(tmp_cache_dir, shards=2) as fc: + fc['k'] = {'v': 1} + blob = pickle.dumps(fc) + other = pickle.loads(blob) + try: + assert other['k'] == {'v': 1} + finally: + other.close() + + +def test_v2_deque_pickling_with_explicit_key_raises( + tmp_cache_dir, clear_env +): + """V2: Deque (built on Cache) must inherit the H2 protection.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + cache = dc.Cache(tmp_cache_dir, disk_pickle_key=secrets.token_bytes(32)) + try: + deque = dc.Deque.fromcache(cache, [{'item': 1}]) + with pytest.raises(TypeError, match='disk_pickle_key'): + pickle.dumps(deque) + finally: + cache.close() + + +def test_v2_index_pickling_with_explicit_key_raises( + tmp_cache_dir, clear_env +): + """V2: Index (built on Cache) must inherit the H2 protection.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + cache = dc.Cache(tmp_cache_dir, disk_pickle_key=secrets.token_bytes(32)) + try: + idx = dc.Index.fromcache(cache, {'k': 'v'}) + with pytest.raises(TypeError, match='disk_pickle_key'): + pickle.dumps(idx) + finally: + cache.close() + + +def test_v2_deque_pickling_with_default_succeeds(tmp_cache_dir, clear_env): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + cache = dc.Cache(tmp_cache_dir) + try: + deque = dc.Deque.fromcache(cache, [{'item': 1}]) + blob = pickle.dumps(deque) + other = pickle.loads(blob) + assert list(other) == [{'item': 1}] + finally: + cache.close() + + +def test_v11_copy_copy_preserves_explicit_key(tmp_cache_dir, clear_env): + """V11: ``copy.copy(cache)`` does not cross a process boundary, so + the explicit pickle_key should be preserved (not rejected by + __getstate__) and a working independent Cache returned.""" + import copy + + secret = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=secret) as cache: + cache['k'] = {'v': 1} + shallow = copy.copy(cache) + try: + assert shallow.directory == cache.directory + assert shallow._disk._pickle_key_arg == secret + assert shallow['k'] == {'v': 1} + finally: + shallow.close() + + +def test_v11_copy_deepcopy_preserves_explicit_key(tmp_cache_dir, clear_env): + import copy + + secret = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=secret) as cache: + cache['k'] = {'v': 1} + deep = copy.deepcopy(cache) + try: + assert deep['k'] == {'v': 1} + finally: + deep.close() + + +def test_v11_copy_copy_with_default_still_works(tmp_cache_dir, clear_env): + import copy + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + cache['k'] = {'v': 1} + shallow = copy.copy(cache) + try: + assert shallow['k'] == {'v': 1} + finally: + shallow.close() + + +def test_v10_warning_mentions_bootstrap_race(tmp_cache_dir, clear_env): + """V10: the auto-generated-key warning must explicitly mention the + bootstrap-race risk so operators understand the threat model + (otherwise they may assume the file-mode 0o600 key is safe in any + multi-tenant directory).""" + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + with dc.Cache(tmp_cache_dir) as cache: + cache[('complex',)] = {'value': 1} + + msgs = [ + w for w in caught if isinstance(w.message, dc.UnsafePickleWarning) + ] + assert msgs + assert 'bootstrap race' in str(msgs[0].message).lower(), ( + 'Warning should mention bootstrap race; got: %r' + % str(msgs[0].message) + ) + + +def test_v12_short_key_file_raises_clear_runtimeerror( + tmp_cache_dir, clear_env +): + """V12: coverage for the "after N retries" RuntimeError path in + ``_read_or_create_pickle_key_file``.""" + from diskcache import core as dc_core + + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + with open(keyfile, 'wb') as fh: + fh.write(b'short') + with pytest.raises(RuntimeError, match='too short.*after'): + dc_core._read_or_create_pickle_key_file(keyfile) + + +def test_v12_safe_filename_path_rejects_non_string( + tmp_cache_dir, clear_env +): + """V12: coverage for the type guard in ``_safe_filename_path``.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + with pytest.raises(ValueError, match='must be str'): + cache._disk._safe_filename_path(123) + + +def test_v12_bytearray_pickle_key_accepted(tmp_cache_dir, clear_env): + """V12: bytearray is accepted by _coerce_pickle_key but was never + exercised before.""" + key = bytearray(secrets.token_bytes(32)) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=key) as cache: + cache['k'] = {'v': 1} + assert cache['k'] == {'v': 1} + + +# -- Pass-4 round of fixes --------------------------------------------- + + +def test_v14_short_os_write_does_not_silently_truncate( + tmp_cache_dir, clear_env, monkeypatch +): + """V14: ``os.write`` may legally return a short write (signal, + quota, ENOSPC mid-write). Ignoring the return value lets the + writer hold a 32-byte key in memory while the on-disk file holds + only a prefix; readers then see "too short" forever. + + With the loop-until-full fix, a short write should either complete + (multiple iterations) or raise OSError that triggers cleanup. + """ + from diskcache import core as dc_core + + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + orig_write = os.write + write_count = [0] + + def short_then_full_write(fd, data): + write_count[0] += 1 + # First call writes 8 bytes; subsequent calls write the rest. + return orig_write(fd, data[:8]) + + monkeypatch.setattr(os, 'write', short_then_full_write) + key, _ = dc_core._read_or_create_pickle_key_file(keyfile) + monkeypatch.setattr(os, 'write', orig_write) + + # The writer loop must have called os.write multiple times until + # all 32 bytes were durable. + assert write_count[0] >= 4, ( + 'expected the write loop to retry; got %d calls' % write_count[0] + ) + on_disk_len = op.getsize(keyfile) + assert on_disk_len == len(key) == 32, ( + 'on-disk key (%d bytes) must match returned key (%d bytes)' + % (on_disk_len, len(key)) + ) + + +def test_v14_zero_return_from_os_write_triggers_cleanup( + tmp_cache_dir, clear_env, monkeypatch +): + """V14: a pathological 0-byte write loops forever in the naive + implementation; the fix raises OSError after the first such call + and the finally cleans up the file.""" + from diskcache import core as dc_core + + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + + def zero_write(fd, data): + return 0 + + monkeypatch.setattr(os, 'write', zero_write) + with pytest.raises(OSError): + dc_core._read_or_create_pickle_key_file(keyfile) + assert not op.exists(keyfile) + + +def test_v17_keyboard_interrupt_during_fsync_keeps_durable_key( + tmp_cache_dir, clear_env, monkeypatch +): + """V17: once ``os.write`` has fully landed the key on disk, + concurrent readers can already see it. A KeyboardInterrupt + during the subsequent fsync must NOT unlink the file -- doing so + would strand readers using a key that no longer exists on disk.""" + from diskcache import core as dc_core + + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + + def kbd_during_fsync(fd): + raise KeyboardInterrupt('simulated Ctrl+C during fsync') + + monkeypatch.setattr(os, 'fsync', kbd_during_fsync) + with pytest.raises(KeyboardInterrupt): + dc_core._read_or_create_pickle_key_file(keyfile) + monkeypatch.undo() + + assert op.exists(keyfile), ( + 'fsync interrupt after successful os.write must NOT unlink ' + 'the key (the bytes are already visible to readers).' + ) + assert op.getsize(keyfile) == 32 + + +def test_v18_oserror_on_realpath_propagates_when_not_path_failure( + tmp_cache_dir, clear_env, monkeypatch +): + """V18: PermissionError / TimeoutError from realpath are real OS + errors, not tampering signals. They must propagate unchanged.""" + from diskcache import core as dc_core + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + disk = cache._disk + + def perm_error(*args, **kwargs): + raise PermissionError(13, 'Permission denied (simulated)') + + monkeypatch.setattr(dc_core.op, 'realpath', perm_error) + with pytest.raises(PermissionError): + disk._safe_filename_path('ab/cd.val') + + +def test_v15_copy_preserves_resolved_key_across_env_change( + tmp_cache_dir, clear_env, monkeypatch +): + """V15: if the original Cache resolved its key from the env var + or in-dir file, ``copy.copy`` must propagate the resolved bytes + so an intervening env change does not produce a copy that uses a + different key.""" + import copy + + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + cache[('complex', 'key')] = {'v': 1} + # Change the env var BEFORE copying + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + shallow = copy.copy(cache) + try: + # The copy must read the same value that the original + # wrote (resolved key was preserved). + assert shallow[('complex', 'key')] == {'v': 1} + finally: + shallow.close() + + +def test_v16_fanout_child_cache_inherits_pickle_key( + tmp_cache_dir, clear_env +): + """V16: ``FanoutCache.cache(name)`` must forward + ``disk_pickle_key`` to the child Cache. Otherwise the child + auto-generates a different key and its data is unreadable when + the user re-opens with the parent's key.""" + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache( + tmp_cache_dir, shards=2, disk_pickle_key=key + ) as fc: + child = fc.cache('child') + assert child._disk._pickle_key_arg == key + child[('complex',)] = {'v': 1} + # No per-child key file should exist since the key was + # inherited. + child_dir = op.join(tmp_cache_dir, 'cache', 'child') + assert not op.exists(op.join(child_dir, PICKLE_KEY_FILENAME)) + + +def test_v16_fanout_child_deque_inherits_pickle_key( + tmp_cache_dir, clear_env +): + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache( + tmp_cache_dir, shards=2, disk_pickle_key=key + ) as fc: + d = fc.deque('mydeque') + assert d._cache._disk._pickle_key_arg == key + d.append({'v': 1}) + assert d.pop() == {'v': 1} + + +def test_v16_fanout_child_index_inherits_pickle_key( + tmp_cache_dir, clear_env +): + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache( + tmp_cache_dir, shards=2, disk_pickle_key=key + ) as fc: + idx = fc.index('myindex') + assert idx._cache._disk._pickle_key_arg == key + idx['k'] = {'v': 1} + assert idx['k'] == {'v': 1} + + +def test_v19_fanoutcache_copy_preserves_explicit_key( + tmp_cache_dir, clear_env +): + """V19: same-process ``copy.copy(fanout)`` must preserve an + explicit key just like Cache does.""" + import copy + + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache( + tmp_cache_dir, shards=2, disk_pickle_key=key + ) as fc: + fc[('complex',)] = {'v': 1} + shallow = copy.copy(fc) + try: + assert shallow[('complex',)] == {'v': 1} + finally: + shallow.close() + + +def test_v19_deque_copy_preserves_explicit_key(tmp_cache_dir, clear_env): + import copy + + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + cache = dc.Cache(tmp_cache_dir, disk_pickle_key=key) + try: + deque = dc.Deque.fromcache(cache, [{'item': 1}, {'item': 2}]) + shallow = copy.copy(deque) + try: + assert list(shallow) == [{'item': 1}, {'item': 2}] + finally: + shallow._cache.close() + finally: + cache.close() + + +def test_v19_index_copy_preserves_explicit_key(tmp_cache_dir, clear_env): + import copy + + key = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + cache = dc.Cache(tmp_cache_dir, disk_pickle_key=key) + try: + idx = dc.Index.fromcache(cache, {'k': {'v': 1}}) + shallow = copy.copy(idx) + try: + assert shallow['k'] == {'v': 1} + finally: + shallow._cache.close() + finally: + cache.close() + + +def test_v21_legacy_error_mentions_disk_pickle_key( + tmp_cache_dir, clear_env +): + """V21: the error must point users to ``disk_pickle_key=False`` + (the kwarg on Cache / FanoutCache / Django OPTIONS), not + ``pickle_key=False`` (only valid on the Disk constructor).""" + # Write raw legacy pickle directly + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir, disk_pickle_key=False) as cache: + cache['k'] = {'legacy': True} + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache( + tmp_cache_dir, disk_pickle_key=secrets.token_bytes(32) + ) as cache: + with pytest.raises(pickle.UnpicklingError) as exc_info: + cache['k'] + msg = str(exc_info.value) + assert 'disk_pickle_key' in msg, ( + 'Error must reference the public kwarg name disk_pickle_key ' + '(not the Disk-only pickle_key); got: %r' % msg + ) + + +# -- Pass-5: O1 default-mode copies survive env changes ---------------- + + +def test_o1_default_mode_cache_copy_survives_env_change( + tmp_cache_dir, monkeypatch +): + """O1 (Cache): copy.copy(cache) in default mode must keep using the + original key even if DISKCACHE_PICKLE_KEY is replaced before the + copy is constructed.""" + import copy + + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + cache['k'] = {'v': 1} + assert cache['k'] == {'v': 1} # force lazy resolution + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + shallow = copy.copy(cache) + try: + assert shallow['k'] == {'v': 1} + finally: + shallow.close() + + +def test_o1_default_mode_fanout_copy_survives_env_change( + tmp_cache_dir, monkeypatch +): + """O1 (FanoutCache): copy.copy(fc) must keep using the original key + even if DISKCACHE_PICKLE_KEY is replaced before the copy is made. + Without the inherited-key propagation the copy would re-resolve + from the (now different) env var and silently fail HMAC.""" + import copy + + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache(tmp_cache_dir, shards=4) as fc: + fc['k'] = {'v': 1} + assert fc['k'] == {'v': 1} + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + shallow = copy.copy(fc) + try: + assert shallow['k'] == {'v': 1} + finally: + shallow.close() + + +def test_o1_default_mode_deque_copy_survives_env_change( + tmp_cache_dir, monkeypatch +): + """O1 (Deque): copy.copy(deque) must keep the original key.""" + import copy + + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + deque = dc.Deque(directory=tmp_cache_dir) + try: + deque.append({'v': 1}) + assert list(deque) == [{'v': 1}] + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + shallow = copy.copy(deque) + try: + assert list(shallow) == [{'v': 1}] + finally: + shallow._cache.close() + finally: + deque._cache.close() + + +def test_o1_default_mode_index_copy_survives_env_change( + tmp_cache_dir, monkeypatch +): + """O1 (Index): copy.copy(index) must keep the original key.""" + import copy + + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + idx = dc.Index(tmp_cache_dir) + try: + idx['k'] = {'v': 1} + assert idx['k'] == {'v': 1} + monkeypatch.setenv(PICKLE_KEY_ENV, secrets.token_bytes(32).hex()) + shallow = copy.copy(idx) + try: + assert shallow['k'] == {'v': 1} + finally: + shallow._cache.close() + finally: + idx._cache.close() + + +# -- Pass-5: O2 FanoutCache children get-or-create lock ---------------- + + +def test_o2_fanout_concurrent_cache_no_duplicate( + tmp_cache_dir, clear_env, monkeypatch +): + """O2: two threads calling fc.cache('foo') simultaneously must get + the SAME instance. Without the lock, both can miss the dict, both + construct, the second store overwrites the first, and the first + caller is left with an orphan Cache (open SQLite handle, no + references in fc._caches). + + This version is deterministic: we monkey-patch ``Cache.__init__`` + with a wrapper that sleeps before returning, so both threads enter + the get-or-create critical section while the first construction + is still in flight. Without the lock the second thread observes + an empty ``self._caches`` and constructs a duplicate. + """ + import threading + import time + + real_init = dc.Cache.__init__ + construct_count = {'n': 0} + construct_lock = threading.Lock() + + def slow_init(self, *args, **kwargs): + with construct_lock: + construct_count['n'] += 1 + n = construct_count['n'] + real_init(self, *args, **kwargs) + # Only slow down child cache construction (under the + # FanoutCache 'cache' subdirectory), not the FanoutCache's + # own shards. Sleep races the second caller into the + # critical section. + if n > 0 and 'cache' in str(self.directory).split(os.sep): + time.sleep(0.2) + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache(tmp_cache_dir, shards=2) as fc: + # Patch only after FanoutCache is fully constructed so + # shard inits aren't slowed. + monkeypatch.setattr(dc.Cache, '__init__', slow_init) + + results = [] + results_lock = threading.Lock() + + def grab(): + child = fc.cache('shared') + with results_lock: + results.append(child) + + threads = [threading.Thread(target=grab) for _ in range(8)] + t0 = time.time() + for t in threads: + t.start() + for t in threads: + t.join() + elapsed = time.time() - t0 + + first = results[0] + assert all(r is first for r in results), ( + 'All threads must see the same cached child instance' + ) + assert len(fc._caches) == 1 + # If the lock works, all threads serialize on the first + # construction (~0.2s); a duplicate construction would + # double the time. Allow generous slack. + assert elapsed < 1.0, ( + 'O2: child construction appears to have run more ' + 'than once concurrently (elapsed=%.2fs)' % elapsed + ) + + +# -- Pass-5: O3 close() closes cached children ------------------------- + + +def test_o3_fanout_close_closes_children(tmp_cache_dir, clear_env): + """O3: ``fc.close()`` must close cached child Cache/Deque/Index + instances. Otherwise long-lived references leak SQLite handles. + + Behavioral check: after ``fc.close()`` we must be able to open a + fresh sqlite3 connection to each child's database file in + exclusive locking mode. If close() failed to release the + underlying handle (e.g. on Windows where the file is held by the + OS while the connection is open), this will fail. We also assert + the parent's child dicts are cleared so the FanoutCache cannot + silently keep handing out post-close children. + """ + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + fc = dc.FanoutCache(tmp_cache_dir, shards=2) + child_cache = fc.cache('child_cache') + child_deque = fc.deque('child_deque') + child_index = fc.index('child_index') + # Force connection creation in this thread. + child_cache['k'] = 1 + child_deque.append('x') + child_index['a'] = 1 + + child_cache_db = op.join(child_cache.directory, 'cache.db') + child_deque_db = op.join(child_deque._cache.directory, 'cache.db') + child_index_db = op.join(child_index._cache.directory, 'cache.db') + assert op.isfile(child_cache_db) + assert op.isfile(child_deque_db) + assert op.isfile(child_index_db) + + fc.close() + + # Behavioral verification: each child's db must be openable + # by a fresh exclusive-locking sqlite connection. If the + # original connection is still held, the BEGIN EXCLUSIVE will + # raise OperationalError (database is locked). + for db_path in (child_cache_db, child_deque_db, child_index_db): + con = sqlite3.connect(db_path, timeout=0.5) + try: + con.execute('BEGIN EXCLUSIVE') + con.execute('COMMIT') + finally: + con.close() + + # The dicts must be cleared so the parent can be safely + # reopened later without leaking the old children. + assert fc._caches == {} + assert fc._deques == {} + assert fc._indexes == {} + + +# -- Pass-5: O4 child Cache pickling depends on key source ------------- + + +def test_o4_default_mode_child_cache_pickleable(tmp_cache_dir, clear_env): + """O4: in default mode the FanoutCache and its child Cache must + be pickleable -- the inherited key isn't a user-supplied secret.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache(tmp_cache_dir, shards=2) as fc: + child = fc.cache('foo') + # The child must NOT have an explicit _pickle_key_arg. + from diskcache.core import _PICKLE_KEY_UNSET + assert child._disk._pickle_key_arg is _PICKLE_KEY_UNSET + # And it must be pickleable. + blob = pickle.dumps(child) + restored = pickle.loads(blob) + try: + assert restored.directory == child.directory + finally: + restored.close() + + +def test_o4_explicit_mode_child_cache_unpickleable( + tmp_cache_dir, clear_env +): + """O4: when the user passes an explicit ``disk_pickle_key`` to + FanoutCache, the child Cache inherits that explicit key and + pickling must be refused (mirrors the parent's posture).""" + secret = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache( + tmp_cache_dir, shards=2, disk_pickle_key=secret + ) as fc: + child = fc.cache('foo') + with pytest.raises(TypeError, match='disk_pickle_key'): + pickle.dumps(child) + + +# -- Pass-5: Y1 KeyboardInterrupt after full write preserves key file -- + + +def test_y1_keyboard_interrupt_after_full_write_preserves_file( + tmp_cache_dir, clear_env, monkeypatch +): + """Y1: if KeyboardInterrupt fires AFTER os.write has deposited the + full key in the kernel but BEFORE ``write_succeeded = True`` runs, + the finally block must NOT unlink the file -- concurrent readers + can already see that key bytes.""" + from diskcache import core as dc_core + + real_write = os.write + + def evil_write(fd, data): + n = real_write(fd, data) + # Simulate an async exception firing right after os.write + # returned the full count -- before the assignment below. + raise KeyboardInterrupt('simulated SIGINT') + + monkeypatch.setattr(dc_core.os, 'write', evil_write) + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + with pytest.raises(KeyboardInterrupt): + dc_core._read_or_create_pickle_key_file(keyfile) + # Restore real os.write for the assertion phase. + monkeypatch.setattr(dc_core.os, 'write', real_write) + + # File must still exist with a valid key (>= MIN_LEN bytes). + assert op.exists(keyfile), ( + 'Y1: file was unlinked even though os.write completed -- ' + 'concurrent readers may already hold this key.' + ) + with open(keyfile, 'rb') as fh: + on_disk = fh.read() + assert len(on_disk) >= dc_core.PICKLE_KEY_MIN_LEN + + +# -- Pass-5: Y2 ELOOP translates to ValueError ------------------------- + + +@pytest.mark.skipif( + os.name == 'nt', reason='symlink loops require admin on Windows' +) +def test_y2_eloop_translates_to_value_error(tmp_cache_dir, clear_env): + """Y2: a symlink loop inside the cache directory (errno ELOOP) + must surface as ValueError from _safe_filename_path, signalling + a tampered cache.db row -- not as an opaque OSError.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + disk = cache._disk + # Create a symlink loop a -> b -> a inside the cache dir. + a = op.join(tmp_cache_dir, 'loop_a') + b = op.join(tmp_cache_dir, 'loop_b') + try: + os.symlink(b, a) + os.symlink(a, b) + except (OSError, NotImplementedError) as exc: + pytest.skip('cannot create symlink loop: %s' % exc) + with pytest.raises(ValueError, match='cache.db may be tampered'): + disk._safe_filename_path('loop_a') + + +# -- Pass-5: Y3 error wording uses the public kwarg name -------------- + + +def test_y3_error_message_uses_disk_pickle_key_kwarg_name( + tmp_cache_dir, clear_env +): + """Y3: ``Cache(td, disk_pickle_key='not-hex')`` must raise with + the public kwarg name (``disk_pickle_key``) in the error so users + aren't confused by the underlying ``Disk(pickle_key=...)`` kwarg + they never typed.""" + with pytest.raises(ValueError) as exc_info: + dc.Cache(tmp_cache_dir, disk_pickle_key='not-hex') + msg = str(exc_info.value) + assert 'disk_pickle_key' in msg, ( + 'error must mention the public kwarg disk_pickle_key, got: %r' + % msg + ) + + # FanoutCache should also use the public name. + with pytest.raises(ValueError) as exc_info2: + dc.FanoutCache(tmp_cache_dir, disk_pickle_key='not-hex') + msg2 = str(exc_info2.value) + assert 'disk_pickle_key' in msg2, ( + 'FanoutCache error must mention disk_pickle_key, got: %r' % msg2 + ) + + + +# -- Pass-6: L1 child cache key recoverability after pickle ------------ + + +def test_l1_pickled_child_cache_recovers_data_in_receiver( + tmp_cache_dir, clear_env +): + """L1: pickling a child Cache obtained from FanoutCache.cache(name) + must round-trip -- the receiver must be able to recover the HMAC + key from the child's directory and read the data.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache(tmp_cache_dir, shards=2) as fc: + child = fc.cache('mychild') + child[('complex', 'key')] = {'value': 42} + blob = pickle.dumps(child) + child_dir = child.directory + # Simulate "receiving process": brand-new Cache instance from + # the same directory, no env var. + with dc.Cache(child_dir) as rcv: + assert rcv[('complex', 'key')] == {'value': 42} + # Also test pickle round-trip. + rcv2 = pickle.loads(blob) + try: + assert rcv2[('complex', 'key')] == {'value': 42} + finally: + rcv2.close() + + +def test_l1_inherited_key_file_written_atomically_in_child_dir( + tmp_cache_dir, clear_env +): + """L1: the inherited key must be persisted into the child + directory's ``.diskcache_pickle_key`` so default resolution in a + receiving process recovers the same key.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache(tmp_cache_dir, shards=2) as fc: + child = fc.cache('mychild') + child_key_path = op.join(child.directory, PICKLE_KEY_FILENAME) + assert op.isfile(child_key_path) + with open(child_key_path, 'rb') as fh: + child_key = fh.read() + parent_key_path = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + with open(parent_key_path, 'rb') as fh: + parent_key = fh.read() + assert child_key == parent_key + + +def test_l1_explicit_key_does_NOT_write_keyfile_into_child_dir( + tmp_cache_dir, clear_env +): + """L1 negative: when the user passes an explicit + ``disk_pickle_key`` the secret must NOT be persisted to disk in + any child directory. Only the implicit (inherited from default + resolution) path triggers the L1 keyfile-write.""" + secret = secrets.token_bytes(32) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache( + tmp_cache_dir, shards=2, disk_pickle_key=secret + ) as fc: + child = fc.cache('mychild') + assert not op.isfile( + op.join(child.directory, PICKLE_KEY_FILENAME) + ) + # Parent dir also should not have one. + assert not op.isfile( + op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + ) + + +def test_l1_legacy_mode_does_NOT_write_keyfile_into_child_dir( + tmp_cache_dir, clear_env +): + """L1 negative: legacy mode (disk_pickle_key=False) must not + create a keyfile anywhere -- there is no key to persist.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.FanoutCache( + tmp_cache_dir, shards=2, disk_pickle_key=False + ) as fc: + child = fc.cache('mychild') + assert not op.isfile( + op.join(child.directory, PICKLE_KEY_FILENAME) + ) + + +# -- Pass-6: C1 FanoutCache.close() fences subsequent use -------------- + + +def test_c1_fanoutcache_use_after_close_raises(tmp_cache_dir, clear_env): + """C1: after fc.close(), creating new children via cache()/ + deque()/index() must raise RuntimeError.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + fc = dc.FanoutCache(tmp_cache_dir, shards=2) + fc.cache('existing') # populate to exercise close() pathways + fc.close() + with pytest.raises(RuntimeError, match='closed'): + fc.cache('new') + with pytest.raises(RuntimeError, match='closed'): + fc.deque('new') + with pytest.raises(RuntimeError, match='closed'): + fc.index('new') + + +def test_c1_fanoutcache_close_idempotent(tmp_cache_dir, clear_env): + """C1: calling close() twice must not raise.""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + fc = dc.FanoutCache(tmp_cache_dir, shards=2) + fc.close() + fc.close() # second close should not raise + + +# -- Pass-6: L2 size-based finally cleanup ----------------------------- + + +def test_y1_finally_size_based_cleanup_partial( + tmp_cache_dir, clear_env, monkeypatch +): + """L2: if os.write only delivers a short prefix of the key and + then raises OSError, the finally block must unlink the partial + file (file_size < len(new_key)).""" + from diskcache import core as dc_core + + real_write = os.write + + def short_then_fail(fd, data): + # Write only 8 bytes and report failure thereafter. + real_write(fd, data[:8]) + raise OSError(28, 'simulated short write + failure') + + monkeypatch.setattr(dc_core.os, 'write', short_then_fail) + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + with pytest.raises(OSError): + dc_core._read_or_create_pickle_key_file(keyfile) + # Restore for cleanup safety. + monkeypatch.setattr(dc_core.os, 'write', real_write) + assert not op.exists(keyfile), ( + 'L2: partial file (8 bytes < 32) must be unlinked' + ) + + +def test_y1_finally_size_based_cleanup_complete( + tmp_cache_dir, clear_env, monkeypatch +): + """L2: if os.write delivers the full key but an exception fires + before write_succeeded=True, the finally block must preserve the + file (file_size == len(new_key) and content matches).""" + from diskcache import core as dc_core + + real_write = os.write + + def full_then_fail(fd, data): + n = real_write(fd, data) + # Simulate KeyboardInterrupt firing AFTER full bytes deposited + # but BEFORE the write_succeeded=True assignment runs. + raise KeyboardInterrupt('simulated SIGINT post-write') + + monkeypatch.setattr(dc_core.os, 'write', full_then_fail) + keyfile = op.join(tmp_cache_dir, PICKLE_KEY_FILENAME) + with pytest.raises(KeyboardInterrupt): + dc_core._read_or_create_pickle_key_file(keyfile) + monkeypatch.setattr(dc_core.os, 'write', real_write) + assert op.exists(keyfile), ( + 'L2: complete file (32 bytes, content matches) must be preserved' + ) + with open(keyfile, 'rb') as fh: + assert len(fh.read()) >= dc_core.PICKLE_KEY_MIN_LEN + + +# -- Pass-6: Y2 cross-platform ELOOP via monkeypatch ------------------- + + +def test_y2_eloop_translates_to_value_error_via_monkeypatch( + tmp_cache_dir, clear_env, monkeypatch +): + """Y2 (cross-platform): monkey-patch op.realpath to raise + OSError(ELOOP) so the translation to ValueError is verified + without needing real symlinks (which require admin on Windows).""" + with warnings.catch_warnings(): + warnings.simplefilter('ignore', dc.UnsafePickleWarning) + with dc.Cache(tmp_cache_dir) as cache: + disk = cache._disk + from diskcache import core as dc_core + + real_realpath = dc_core.op.realpath + + def loop_realpath(path, *args, **kwargs): + # Only fake the loop for our specific filename so the + # cache directory's own realpath caching still works. + if 'loop_target' in str(path): + raise OSError( + errno.ELOOP, 'Too many symbolic links' + ) + return real_realpath(path, *args, **kwargs) + + monkeypatch.setattr( + dc_core.op, 'realpath', loop_realpath + ) + with pytest.raises( + ValueError, match='cache.db may be tampered' + ): + disk._safe_filename_path('loop_target')