From 2eeaef458bbb1007d657c1f7a6c578cb53d59893 Mon Sep 17 00:00:00 2001 From: CL-ModelCloud Date: Wed, 22 Jan 2025 17:48:54 +0800 Subject: [PATCH 01/13] Fix the bug in tokenizer.save_pretrained when saving tokenizer_class to tokenizer_config.json --- src/transformers/tokenization_utils_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index b9b24d681b24..99fa92f1ed7a 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -2457,7 +2457,8 @@ def save_pretrained( # Add tokenizer class to the tokenizer config to be able to reload it with from_pretrained tokenizer_class = self.__class__.__name__ # Remove the Fast at the end unless we have a special `PreTrainedTokenizerFast` - if tokenizer_class.endswith("Fast") and tokenizer_class != "PreTrainedTokenizerFast": + from .tokenization_utils_fast import PreTrainedTokenizerFast + if tokenizer_class.endswith("Fast") and not isinstance(self, PreTrainedTokenizerFast): tokenizer_class = tokenizer_class[:-4] tokenizer_config["tokenizer_class"] = tokenizer_class if getattr(self, "_auto_map", None) is not None: From 495729faee154b0b20d2534c699eb61d85f23d37 Mon Sep 17 00:00:00 2001 From: Qubitium-ModelCloud Date: Wed, 22 Jan 2025 18:28:03 +0800 Subject: [PATCH 02/13] Update tokenization_utils_base.py --- src/transformers/tokenization_utils_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 99fa92f1ed7a..8ff3d5bfbacf 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -2456,8 +2456,9 @@ def save_pretrained( # Add tokenizer class to the tokenizer config to be able to reload it with from_pretrained tokenizer_class = self.__class__.__name__ - # Remove the Fast at the end unless we have a special `PreTrainedTokenizerFast` + # import here to prevent circular import error from .tokenization_utils_fast import PreTrainedTokenizerFast + # Remove the Fast at the end unless we have a special `PreTrainedTokenizerFast` if tokenizer_class.endswith("Fast") and not isinstance(self, PreTrainedTokenizerFast): tokenizer_class = tokenizer_class[:-4] tokenizer_config["tokenizer_class"] = tokenizer_class From 802846332b48805905f162ef1d4af006a6b23888 Mon Sep 17 00:00:00 2001 From: LRL-ModelCloud <165116337+LRL-ModelCloud@users.noreply.github.com> Date: Thu, 23 Jan 2025 09:36:52 +0800 Subject: [PATCH 03/13] Update tokenization_utils_base.py --- src/transformers/tokenization_utils_base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 8ff3d5bfbacf..d0580136fa0b 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -2456,10 +2456,10 @@ def save_pretrained( # Add tokenizer class to the tokenizer config to be able to reload it with from_pretrained tokenizer_class = self.__class__.__name__ - # import here to prevent circular import error - from .tokenization_utils_fast import PreTrainedTokenizerFast - # Remove the Fast at the end unless we have a special `PreTrainedTokenizerFast` - if tokenizer_class.endswith("Fast") and not isinstance(self, PreTrainedTokenizerFast): + # Remove the Fast at the end if we can save the slow tokenizer + if tokenizer_class.endswith("Fast") and ( + hasattr(self, "can_save_slow_tokenizer") and self.can_save_slow_tokenizer + ): tokenizer_class = tokenizer_class[:-4] tokenizer_config["tokenizer_class"] = tokenizer_class if getattr(self, "_auto_map", None) is not None: From cb16afb91f3daf8c94f1012de4e0ac5250e79c0d Mon Sep 17 00:00:00 2001 From: LRL-ModelCloud <165116337+LRL-ModelCloud@users.noreply.github.com> Date: Fri, 24 Jan 2025 09:25:37 +0800 Subject: [PATCH 04/13] Update tokenization_utils_base.py --- src/transformers/tokenization_utils_base.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index ca8fc91184d1..6304e69e929f 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -2465,9 +2465,7 @@ def save_pretrained( # Add tokenizer class to the tokenizer config to be able to reload it with from_pretrained tokenizer_class = self.__class__.__name__ # Remove the Fast at the end if we can save the slow tokenizer - if tokenizer_class.endswith("Fast") and ( - hasattr(self, "can_save_slow_tokenizer") and self.can_save_slow_tokenizer - ): + if tokenizer_class.endswith("Fast") and getattr(self, "can_save_slow_tokenizer", False): tokenizer_class = tokenizer_class[:-4] tokenizer_config["tokenizer_class"] = tokenizer_class if getattr(self, "_auto_map", None) is not None: From fd264b560b7301a59cf5ff478d46c1eadb042635 Mon Sep 17 00:00:00 2001 From: CL-ModelCloud Date: Thu, 13 Feb 2025 02:49:08 +0000 Subject: [PATCH 05/13] add tokenizer class type test --- tests/tokenization/test_tokenization_fast.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/tokenization/test_tokenization_fast.py b/tests/tokenization/test_tokenization_fast.py index d5c6444de4ec..bc22a09338ab 100644 --- a/tests/tokenization/test_tokenization_fast.py +++ b/tests/tokenization/test_tokenization_fast.py @@ -219,6 +219,23 @@ def test_repo_versioning(self): json_tokenizer = json.loads(old_tokenizer._tokenizer.to_str()) self.assertNotIn("huggingface", json_tokenizer["model"]["vocab"]) + def test_tokenizer_class(self): + from transformers import LlamaTokenizerFast + model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" + + with tempfile.TemporaryDirectory() as temp_dir: + tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) + isFast = isinstance(tokenizer, LlamaTokenizerFast) + self.assertTrue(isFast, f"Expected tokenizer(use_fast=True) type: LlamaTokenizerFast, , actual={type(tokenizer)}") + + tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False) + isFast = isinstance(tokenizer, LlamaTokenizerFast) + self.assertTrue(isFast, f"Expected tokenizer type(use_fast=False): LlamaTokenizerFast, , actual={type(tokenizer)}") + + tokenizer.save_pretrained(temp_dir) + tokenizer = AutoTokenizer.from_pretrained(temp_dir, use_fast=False) + self.assertTrue(isFast, f"Expected tokenizer type: LlamaTokenizerFast, , actual={type(tokenizer)}") + @require_tokenizers class ReduceMutableBorrowTests(unittest.TestCase): From 384bb33639aca44b482173d503791b1aa2ce66d8 Mon Sep 17 00:00:00 2001 From: CL-ModelCloud Date: Thu, 13 Feb 2025 02:52:22 +0000 Subject: [PATCH 06/13] code review --- tests/tokenization/test_tokenization_fast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tokenization/test_tokenization_fast.py b/tests/tokenization/test_tokenization_fast.py index bc22a09338ab..5961d65ac3e0 100644 --- a/tests/tokenization/test_tokenization_fast.py +++ b/tests/tokenization/test_tokenization_fast.py @@ -24,6 +24,7 @@ from transformers.testing_utils import require_tokenizers from ..test_tokenization_common import TokenizerTesterMixin +from transformers import LlamaTokenizerFast @require_tokenizers @@ -220,7 +221,6 @@ def test_repo_versioning(self): self.assertNotIn("huggingface", json_tokenizer["model"]["vocab"]) def test_tokenizer_class(self): - from transformers import LlamaTokenizerFast model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" with tempfile.TemporaryDirectory() as temp_dir: From e4dd54c566e7498b31281c5ae48a6a81546fe476 Mon Sep 17 00:00:00 2001 From: CL-ModelCloud Date: Thu, 13 Feb 2025 02:57:15 +0000 Subject: [PATCH 07/13] code opt --- tests/tokenization/test_tokenization_fast.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/tokenization/test_tokenization_fast.py b/tests/tokenization/test_tokenization_fast.py index 5961d65ac3e0..41ec580e7e54 100644 --- a/tests/tokenization/test_tokenization_fast.py +++ b/tests/tokenization/test_tokenization_fast.py @@ -225,16 +225,14 @@ def test_tokenizer_class(self): with tempfile.TemporaryDirectory() as temp_dir: tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) - isFast = isinstance(tokenizer, LlamaTokenizerFast) - self.assertTrue(isFast, f"Expected tokenizer(use_fast=True) type: LlamaTokenizerFast, , actual={type(tokenizer)}") + self.assertTrue(isinstance(tokenizer, LlamaTokenizerFast), f"Expected tokenizer(use_fast=True) type: LlamaTokenizerFast, , actual={type(tokenizer)}") tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False) - isFast = isinstance(tokenizer, LlamaTokenizerFast) - self.assertTrue(isFast, f"Expected tokenizer type(use_fast=False): LlamaTokenizerFast, , actual={type(tokenizer)}") + self.assertTrue(isinstance(tokenizer, LlamaTokenizerFast), f"Expected tokenizer type(use_fast=False): LlamaTokenizerFast, , actual={type(tokenizer)}") tokenizer.save_pretrained(temp_dir) tokenizer = AutoTokenizer.from_pretrained(temp_dir, use_fast=False) - self.assertTrue(isFast, f"Expected tokenizer type: LlamaTokenizerFast, , actual={type(tokenizer)}") + self.assertTrue(isinstance(tokenizer, LlamaTokenizerFast), f"Expected tokenizer type: LlamaTokenizerFast, , actual={type(tokenizer)}") @require_tokenizers From 20a1f17d8b2572d76dd94b1513e798349fda14e7 Mon Sep 17 00:00:00 2001 From: CL-ModelCloud Date: Thu, 13 Feb 2025 03:00:44 +0000 Subject: [PATCH 08/13] fix bug --- tests/tokenization/test_tokenization_fast.py | 32 +++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/tests/tokenization/test_tokenization_fast.py b/tests/tokenization/test_tokenization_fast.py index 41ec580e7e54..80321cd900b6 100644 --- a/tests/tokenization/test_tokenization_fast.py +++ b/tests/tokenization/test_tokenization_fast.py @@ -171,6 +171,23 @@ def test_init_from_tokenizers_model(self): # thus tok(sentences, truncation = True) does nothing and does not warn either self.assertEqual(tok(sentences, truncation = True, max_length = 8), {'input_ids': [[8774, 6, 3, 63, 31, 1748, 55, 1],[ 571, 33, 25, 3, 2, 3, 58, 1]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0],[0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1],[1, 1, 1, 1, 1, 1, 1, 1]]}) # fmt: skip + def test_tokenizer_class(self): + model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" + + with tempfile.TemporaryDirectory() as temp_dir: + tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) + self.assertTrue(isinstance(tokenizer, LlamaTokenizerFast), + f"Expected tokenizer(use_fast=True) type: LlamaTokenizerFast, , actual={type(tokenizer)}") + + tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False) + self.assertTrue(isinstance(tokenizer, LlamaTokenizerFast), + f"Expected tokenizer type(use_fast=False): LlamaTokenizerFast, , actual={type(tokenizer)}") + + tokenizer.save_pretrained(temp_dir) + tokenizer = AutoTokenizer.from_pretrained(temp_dir, use_fast=False) + self.assertTrue(isinstance(tokenizer, LlamaTokenizerFast), + f"Expected tokenizer type: LlamaTokenizerFast, , actual={type(tokenizer)}") + @require_tokenizers class TokenizerVersioningTest(unittest.TestCase): @@ -220,21 +237,6 @@ def test_repo_versioning(self): json_tokenizer = json.loads(old_tokenizer._tokenizer.to_str()) self.assertNotIn("huggingface", json_tokenizer["model"]["vocab"]) - def test_tokenizer_class(self): - model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" - - with tempfile.TemporaryDirectory() as temp_dir: - tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) - self.assertTrue(isinstance(tokenizer, LlamaTokenizerFast), f"Expected tokenizer(use_fast=True) type: LlamaTokenizerFast, , actual={type(tokenizer)}") - - tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False) - self.assertTrue(isinstance(tokenizer, LlamaTokenizerFast), f"Expected tokenizer type(use_fast=False): LlamaTokenizerFast, , actual={type(tokenizer)}") - - tokenizer.save_pretrained(temp_dir) - tokenizer = AutoTokenizer.from_pretrained(temp_dir, use_fast=False) - self.assertTrue(isinstance(tokenizer, LlamaTokenizerFast), f"Expected tokenizer type: LlamaTokenizerFast, , actual={type(tokenizer)}") - - @require_tokenizers class ReduceMutableBorrowTests(unittest.TestCase): def test_async_share_tokenizer(self): From aa7cf0530d351e3a9c6015e07e966edd07a3e7f5 Mon Sep 17 00:00:00 2001 From: Qubitium-ModelCloud Date: Thu, 13 Feb 2025 11:06:53 +0800 Subject: [PATCH 09/13] Update test_tokenization_fast.py --- tests/tokenization/test_tokenization_fast.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/tokenization/test_tokenization_fast.py b/tests/tokenization/test_tokenization_fast.py index 80321cd900b6..9ea468992c03 100644 --- a/tests/tokenization/test_tokenization_fast.py +++ b/tests/tokenization/test_tokenization_fast.py @@ -171,22 +171,27 @@ def test_init_from_tokenizers_model(self): # thus tok(sentences, truncation = True) does nothing and does not warn either self.assertEqual(tok(sentences, truncation = True, max_length = 8), {'input_ids': [[8774, 6, 3, 63, 31, 1748, 55, 1],[ 571, 33, 25, 3, 2, 3, 58, 1]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0],[0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1],[1, 1, 1, 1, 1, 1, 1, 1]]}) # fmt: skip - def test_tokenizer_class(self): + def test_class_after_save_and_reload(self): + # Model contains a `LlamaTokenizerFast` tokenizer with no slow fallback model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" with tempfile.TemporaryDirectory() as temp_dir: tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) self.assertTrue(isinstance(tokenizer, LlamaTokenizerFast), - f"Expected tokenizer(use_fast=True) type: LlamaTokenizerFast, , actual={type(tokenizer)}") + f"Expected tokenizer(use_fast=True) type: `LlamaTokenizerFast`, , actual=`{type(tokenizer)}`") + # Fast tokenizer will ignore `use_fast=False` tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False) self.assertTrue(isinstance(tokenizer, LlamaTokenizerFast), - f"Expected tokenizer type(use_fast=False): LlamaTokenizerFast, , actual={type(tokenizer)}") + f"Expected tokenizer type(use_fast=False): `LlamaTokenizerFast`, , actual=`{type(tokenizer)}`") + # Save tokenizer tokenizer.save_pretrained(temp_dir) + tokenizer = AutoTokenizer.from_pretrained(temp_dir, use_fast=False) + # Verify post save and reload the fast tokenizer class did not change self.assertTrue(isinstance(tokenizer, LlamaTokenizerFast), - f"Expected tokenizer type: LlamaTokenizerFast, , actual={type(tokenizer)}") + f"Expected tokenizer type: `LlamaTokenizerFast`, , actual=`{type(tokenizer)}`") @require_tokenizers From d09a2231654424921364309ed1d29cf990a3a8a3 Mon Sep 17 00:00:00 2001 From: CL-ModelCloud Date: Thu, 13 Feb 2025 03:17:05 +0000 Subject: [PATCH 10/13] ruff check --- tests/tokenization/test_tokenization_fast.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/tokenization/test_tokenization_fast.py b/tests/tokenization/test_tokenization_fast.py index 9ea468992c03..5749b5646ce4 100644 --- a/tests/tokenization/test_tokenization_fast.py +++ b/tests/tokenization/test_tokenization_fast.py @@ -20,11 +20,10 @@ import tempfile import unittest -from transformers import AutoTokenizer, PreTrainedTokenizerFast +from transformers import AutoTokenizer, LlamaTokenizerFast, PreTrainedTokenizerFast from transformers.testing_utils import require_tokenizers from ..test_tokenization_common import TokenizerTesterMixin -from transformers import LlamaTokenizerFast @require_tokenizers @@ -187,7 +186,7 @@ def test_class_after_save_and_reload(self): # Save tokenizer tokenizer.save_pretrained(temp_dir) - + tokenizer = AutoTokenizer.from_pretrained(temp_dir, use_fast=False) # Verify post save and reload the fast tokenizer class did not change self.assertTrue(isinstance(tokenizer, LlamaTokenizerFast), From 6ec6feb28f16944e3c8f0ba77bcbce14e3ef6145 Mon Sep 17 00:00:00 2001 From: CL-ModelCloud Date: Thu, 13 Feb 2025 03:24:47 +0000 Subject: [PATCH 11/13] make style --- tests/tokenization/test_tokenization_fast.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tests/tokenization/test_tokenization_fast.py b/tests/tokenization/test_tokenization_fast.py index 5749b5646ce4..cc5ac1075078 100644 --- a/tests/tokenization/test_tokenization_fast.py +++ b/tests/tokenization/test_tokenization_fast.py @@ -176,21 +176,27 @@ def test_class_after_save_and_reload(self): with tempfile.TemporaryDirectory() as temp_dir: tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) - self.assertTrue(isinstance(tokenizer, LlamaTokenizerFast), - f"Expected tokenizer(use_fast=True) type: `LlamaTokenizerFast`, , actual=`{type(tokenizer)}`") + self.assertTrue( + isinstance(tokenizer, LlamaTokenizerFast), + f"Expected tokenizer(use_fast=True) type: `LlamaTokenizerFast`, , actual=`{type(tokenizer)}`", + ) # Fast tokenizer will ignore `use_fast=False` tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False) - self.assertTrue(isinstance(tokenizer, LlamaTokenizerFast), - f"Expected tokenizer type(use_fast=False): `LlamaTokenizerFast`, , actual=`{type(tokenizer)}`") + self.assertTrue( + isinstance(tokenizer, LlamaTokenizerFast), + f"Expected tokenizer type(use_fast=False): `LlamaTokenizerFast`, , actual=`{type(tokenizer)}`", + ) # Save tokenizer tokenizer.save_pretrained(temp_dir) tokenizer = AutoTokenizer.from_pretrained(temp_dir, use_fast=False) # Verify post save and reload the fast tokenizer class did not change - self.assertTrue(isinstance(tokenizer, LlamaTokenizerFast), - f"Expected tokenizer type: `LlamaTokenizerFast`, , actual=`{type(tokenizer)}`") + self.assertTrue( + isinstance(tokenizer, LlamaTokenizerFast), + f"Expected tokenizer type: `LlamaTokenizerFast`, , actual=`{type(tokenizer)}`", + ) @require_tokenizers @@ -241,6 +247,7 @@ def test_repo_versioning(self): json_tokenizer = json.loads(old_tokenizer._tokenizer.to_str()) self.assertNotIn("huggingface", json_tokenizer["model"]["vocab"]) + @require_tokenizers class ReduceMutableBorrowTests(unittest.TestCase): def test_async_share_tokenizer(self): From c9c2225be2b0c2142036ef16384d3eda58133268 Mon Sep 17 00:00:00 2001 From: CL-ModelCloud Date: Thu, 13 Feb 2025 04:13:20 +0000 Subject: [PATCH 12/13] code opt --- tests/tokenization/test_tokenization_fast.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/tokenization/test_tokenization_fast.py b/tests/tokenization/test_tokenization_fast.py index cc5ac1075078..c83e8b411fd6 100644 --- a/tests/tokenization/test_tokenization_fast.py +++ b/tests/tokenization/test_tokenization_fast.py @@ -198,6 +198,12 @@ def test_class_after_save_and_reload(self): f"Expected tokenizer type: `LlamaTokenizerFast`, , actual=`{type(tokenizer)}`", ) + tokenizer = AutoTokenizer.from_pretrained(temp_dir, use_fast=True) + self.assertTrue( + isinstance(tokenizer, LlamaTokenizerFast), + f"Expected tokenizer type: `LlamaTokenizerFast`, , actual=`{type(tokenizer)}`", + ) + @require_tokenizers class TokenizerVersioningTest(unittest.TestCase): From e12614187e2a11e91e3a8c2c13b40e72698af4b5 Mon Sep 17 00:00:00 2001 From: Qubitium-ModelCloud Date: Thu, 13 Feb 2025 12:19:00 +0800 Subject: [PATCH 13/13] Update test_tokenization_fast.py --- tests/tokenization/test_tokenization_fast.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/tokenization/test_tokenization_fast.py b/tests/tokenization/test_tokenization_fast.py index c83e8b411fd6..4bd9b046d406 100644 --- a/tests/tokenization/test_tokenization_fast.py +++ b/tests/tokenization/test_tokenization_fast.py @@ -178,14 +178,14 @@ def test_class_after_save_and_reload(self): tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) self.assertTrue( isinstance(tokenizer, LlamaTokenizerFast), - f"Expected tokenizer(use_fast=True) type: `LlamaTokenizerFast`, , actual=`{type(tokenizer)}`", + f"Expected tokenizer(use_fast=True) type: `LlamaTokenizerFast`, actual=`{type(tokenizer)}`", ) # Fast tokenizer will ignore `use_fast=False` tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False) self.assertTrue( isinstance(tokenizer, LlamaTokenizerFast), - f"Expected tokenizer type(use_fast=False): `LlamaTokenizerFast`, , actual=`{type(tokenizer)}`", + f"Expected tokenizer type(use_fast=False): `LlamaTokenizerFast`, actual=`{type(tokenizer)}`", ) # Save tokenizer @@ -195,13 +195,14 @@ def test_class_after_save_and_reload(self): # Verify post save and reload the fast tokenizer class did not change self.assertTrue( isinstance(tokenizer, LlamaTokenizerFast), - f"Expected tokenizer type: `LlamaTokenizerFast`, , actual=`{type(tokenizer)}`", + f"Expected tokenizer type: `LlamaTokenizerFast`, actual=`{type(tokenizer)}`", ) tokenizer = AutoTokenizer.from_pretrained(temp_dir, use_fast=True) + # Verify post save and reload the fast tokenizer class did not change self.assertTrue( isinstance(tokenizer, LlamaTokenizerFast), - f"Expected tokenizer type: `LlamaTokenizerFast`, , actual=`{type(tokenizer)}`", + f"Expected tokenizer type: `LlamaTokenizerFast`, actual=`{type(tokenizer)}`", )