From 39c32bb26900a8f8cca195059ccefa8d5a2c767b Mon Sep 17 00:00:00 2001
From: J Berg <j.berg2349@gmail.com>
Date: Tue, 31 Mar 2026 22:19:37 +0100
Subject: [PATCH 1/2] Expose _cache and test cache state

---
 spacy/tests/tokenizer/test_tokenizer.py | 12 ++++++++++++
 spacy/tokenizer.pxd                     |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/spacy/tests/tokenizer/test_tokenizer.py b/spacy/tests/tokenizer/test_tokenizer.py
index 1ea5f78c9a8..610531ab5a0 100644
--- a/spacy/tests/tokenizer/test_tokenizer.py
+++ b/spacy/tests/tokenizer/test_tokenizer.py
@@ -5,6 +5,7 @@
 
 from spacy.lang.de import German
 from spacy.lang.en import English
+from spacy.strings import hash_string
 from spacy.symbols import ORTH
 from spacy.tokenizer import Tokenizer
 from spacy.tokens import Doc
@@ -555,3 +556,14 @@ def test_tokenizer_initial_special_case_explain(en_vocab):
     tokens = [t.text for t in tokenizer("id")]
     explain_tokens = [t[1] for t in tokenizer.explain("id")]
     assert tokens == explain_tokens
+
+
+@pytest.mark.issue(13950)
+def test_issue13950(en_tokenizer):
+    # Special contraction occurs before regular words
+    en_tokenizer("I can't believe you have done this")
+
+    # "believe" and "this" appear after the special case "can't".
+    # They should still be cached.
+    assert hash_string("believe") in en_tokenizer._cache
+    assert hash_string("this") in en_tokenizer._cache
diff --git a/spacy/tokenizer.pxd b/spacy/tokenizer.pxd
index 88e4b06b024..5b17ea42389 100644
--- a/spacy/tokenizer.pxd
+++ b/spacy/tokenizer.pxd
@@ -12,7 +12,7 @@ from .vocab cimport LexemesOrTokens, Vocab, _Cached
 
 cdef class Tokenizer:
     cdef Pool mem
-    cdef PreshMap _cache
+    cdef readonly PreshMap _cache  # readonly so tests can check state
     cdef PreshMap _specials
     cdef readonly Vocab vocab
 

From b0834be78c233c65af660a833624cff57777492f Mon Sep 17 00:00:00 2001
From: J Berg <j.berg2349@gmail.com>
Date: Tue, 31 Mar 2026 22:32:52 +0100
Subject: [PATCH 2/2] fix

---
 spacy/tokenizer.pyx | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index 77718a75b0c..0c16fde509a 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -192,6 +192,7 @@ cdef class Tokenizer:
                     # we don't have to create the slice when we hit the cache.
                     span = string[start:i]
                     key = hash_string(span)
+                    has_special = 0
                     if not self._try_specials_and_cache(key, doc, &has_special, with_special_cases):
                         self._tokenize(doc, span, key, &has_special, with_special_cases)
                 if uc == ' ':
@@ -204,6 +205,7 @@ cdef class Tokenizer:
         if start < i:
             span = string[start:]
             key = hash_string(span)
+            has_special = 0
             if not self._try_specials_and_cache(key, doc, &has_special, with_special_cases):
                 self._tokenize(doc, span, key, &has_special, with_special_cases)
             doc.c[doc.length - 1].spacy = string[-1] == " " and not in_ws