From 5fa4b88825992962d8ce2429123480a20f9088f8 Mon Sep 17 00:00:00 2001 From: dhruvidave348 Date: Thu, 9 Oct 2025 00:45:19 +0530 Subject: [PATCH 01/11] Add Vision Transformer demo in computer_vision module --- computer_vision/vision_transformer.py | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 computer_vision/vision_transformer.py diff --git a/computer_vision/vision_transformer.py b/computer_vision/vision_transformer.py new file mode 100644 index 000000000000..92616e519b66 --- /dev/null +++ b/computer_vision/vision_transformer.py @@ -0,0 +1,2 @@ +def vision_transformer_demo(): + print("Vision Transformer running...") From 62af08b78d543502d052a034aa4dcb040e9141ff Mon Sep 17 00:00:00 2001 From: dhruvidave348 Date: Thu, 9 Oct 2025 20:03:54 +0530 Subject: [PATCH 02/11] Update Vision Transformer (ViT) demo with full implementation --- computer_vision/vision_transformer.py | 47 +++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/computer_vision/vision_transformer.py b/computer_vision/vision_transformer.py index 92616e519b66..14049f73adcf 100644 --- a/computer_vision/vision_transformer.py +++ b/computer_vision/vision_transformer.py @@ -1,2 +1,45 @@ -def vision_transformer_demo(): - print("Vision Transformer running...") +""" +Vision Transformer (ViT) +======================== + +This module demonstrates how to use a pretrained Vision Transformer (ViT) +for image classification using Hugging Face's Transformers library. + +Source: +https://huggingface.co/docs/transformers/model_doc/vit +""" + +from transformers import ViTImageProcessor, ViTForImageClassification +from PIL import Image +import requests +import torch + + +def vision_transformer_demo() -> None: + """ + Demonstrates Vision Transformer (ViT) on a sample image. + + Example: + >>> vision_transformer_demo() # doctest: +SKIP + Predicted label: tabby, tabby cat + """ + url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cat_sample.jpeg" + image = Image.open(requests.get(url, stream=True).raw) + + processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224") + model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224") + + inputs = processor(images=image, return_tensors="pt") + + with torch.no_grad(): + outputs = model(**inputs) + logits = outputs.logits + + predicted_class_idx = logits.argmax(-1).item() + predicted_label = model.config.id2label[predicted_class_idx] + + print(f"Predicted label: {predicted_label}") + + +if __name__ == "__main__": + vision_transformer_demo() From b5b3192f163cb7ea46a53d9d3fd8aa163c7465f0 Mon Sep 17 00:00:00 2001 From: dhruvidave348 Date: Thu, 9 Oct 2025 20:08:11 +0530 Subject: [PATCH 03/11] Fix import order and add timeout to requests call --- computer_vision/vision_transformer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/computer_vision/vision_transformer.py b/computer_vision/vision_transformer.py index 14049f73adcf..3f7f35e66d88 100644 --- a/computer_vision/vision_transformer.py +++ b/computer_vision/vision_transformer.py @@ -9,10 +9,10 @@ https://huggingface.co/docs/transformers/model_doc/vit """ -from transformers import ViTImageProcessor, ViTForImageClassification from PIL import Image import requests import torch +from transformers import ViTForImageClassification, ViTImageProcessor def vision_transformer_demo() -> None: @@ -24,7 +24,7 @@ def vision_transformer_demo() -> None: Predicted label: tabby, tabby cat """ url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cat_sample.jpeg" - image = Image.open(requests.get(url, stream=True).raw) + image = Image.open(requests.get(url, stream=True, timeout=10).raw) processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224") model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224") From fc2c15aedd89d0e015df1830c2185b75aa454b17 Mon Sep 17 00:00:00 2001 From: dhruvidave348 Date: Fri, 10 Oct 2025 19:45:38 +0530 Subject: [PATCH 04/11] Add type hint and doctest to vision_transformer_demo --- computer_vision/vision_transformer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/computer_vision/vision_transformer.py b/computer_vision/vision_transformer.py index 3f7f35e66d88..01fd569cdd0d 100644 --- a/computer_vision/vision_transformer.py +++ b/computer_vision/vision_transformer.py @@ -14,7 +14,6 @@ import torch from transformers import ViTForImageClassification, ViTImageProcessor - def vision_transformer_demo() -> None: """ Demonstrates Vision Transformer (ViT) on a sample image. @@ -40,6 +39,5 @@ def vision_transformer_demo() -> None: print(f"Predicted label: {predicted_label}") - if __name__ == "__main__": vision_transformer_demo() From e5d52d756a224775f690c31e3486c7fb686dfe6a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 10 Oct 2025 14:16:49 +0000 Subject: [PATCH 05/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- computer_vision/vision_transformer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/computer_vision/vision_transformer.py b/computer_vision/vision_transformer.py index 01fd569cdd0d..3f7f35e66d88 100644 --- a/computer_vision/vision_transformer.py +++ b/computer_vision/vision_transformer.py @@ -14,6 +14,7 @@ import torch from transformers import ViTForImageClassification, ViTImageProcessor + def vision_transformer_demo() -> None: """ Demonstrates Vision Transformer (ViT) on a sample image. @@ -39,5 +40,6 @@ def vision_transformer_demo() -> None: print(f"Predicted label: {predicted_label}") + if __name__ == "__main__": vision_transformer_demo() From fa237c394dd39bcb6a4ca5ad6ebddd93c4b163d2 Mon Sep 17 00:00:00 2001 From: dhruvidave348 Date: Fri, 10 Oct 2025 20:00:57 +0530 Subject: [PATCH 06/11] Fix import order for vision_transformer.py (ruff check) --- computer_vision/vision_transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/computer_vision/vision_transformer.py b/computer_vision/vision_transformer.py index 3f7f35e66d88..9b017cf26a47 100644 --- a/computer_vision/vision_transformer.py +++ b/computer_vision/vision_transformer.py @@ -9,9 +9,9 @@ https://huggingface.co/docs/transformers/model_doc/vit """ -from PIL import Image import requests import torch +from PIL import Image from transformers import ViTForImageClassification, ViTImageProcessor From 451b7ec414b339d211dfb35ce0e031f7b80a2b72 Mon Sep 17 00:00:00 2001 From: dhruvidave348 Date: Fri, 10 Oct 2025 20:43:11 +0530 Subject: [PATCH 07/11] Fix Vision Transformer demo for test compatibility --- computer_vision/vision_transformer.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/computer_vision/vision_transformer.py b/computer_vision/vision_transformer.py index 9b017cf26a47..fea4720f22c7 100644 --- a/computer_vision/vision_transformer.py +++ b/computer_vision/vision_transformer.py @@ -9,11 +9,6 @@ https://huggingface.co/docs/transformers/model_doc/vit """ -import requests -import torch -from PIL import Image -from transformers import ViTForImageClassification, ViTImageProcessor - def vision_transformer_demo() -> None: """ @@ -23,14 +18,32 @@ def vision_transformer_demo() -> None: >>> vision_transformer_demo() # doctest: +SKIP Predicted label: tabby, tabby cat """ - url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cat_sample.jpeg" + try: + import requests + import torch + from PIL import Image + from transformers import ViTForImageClassification, ViTImageProcessor + except ImportError as e: + raise ImportError( + "This demo requires 'torch', 'transformers', 'PIL', and 'requests' packages. " + "Install them with: pip install torch transformers pillow requests" + ) from e + + # Load a sample image + url = ( + "https://huggingface.co/datasets/huggingface/documentation-images/" + "resolve/main/cat_sample.jpeg" + ) image = Image.open(requests.get(url, stream=True, timeout=10).raw) + # Load pretrained model and processor processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224") model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224") + # Preprocess the image inputs = processor(images=image, return_tensors="pt") + # Run inference with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits From 1ba4fe14462e2c81796245e7b378c46ecb4d0d33 Mon Sep 17 00:00:00 2001 From: dhruvidave348 Date: Fri, 10 Oct 2025 21:58:59 +0530 Subject: [PATCH 08/11] Add Vision Transformer demo for image classification --- computer_vision/vision_transformer.py | 77 +++++++++++++++------------ 1 file changed, 42 insertions(+), 35 deletions(-) diff --git a/computer_vision/vision_transformer.py b/computer_vision/vision_transformer.py index fea4720f22c7..96eb4846b464 100644 --- a/computer_vision/vision_transformer.py +++ b/computer_vision/vision_transformer.py @@ -1,58 +1,65 @@ """ -Vision Transformer (ViT) -======================== +Vision Transformer (ViT) Module +================================ -This module demonstrates how to use a pretrained Vision Transformer (ViT) -for image classification using Hugging Face's Transformers library. +Classify images using a pretrained Vision Transformer (ViT) +from Hugging Face Transformers. + +Can be used as a demo or imported in other scripts. Source: https://huggingface.co/docs/transformers/model_doc/vit """ +try: + import requests + import torch + from io import BytesIO + from PIL import Image + from transformers import ViTForImageClassification, ViTImageProcessor +except ImportError as e: + raise ImportError( + "This module requires 'torch', 'transformers', 'PIL', and 'requests'. " + "Install them with: pip install torch transformers pillow requests" + ) from e -def vision_transformer_demo() -> None: - """ - Demonstrates Vision Transformer (ViT) on a sample image. - Example: - >>> vision_transformer_demo() # doctest: +SKIP - Predicted label: tabby, tabby cat - """ - try: - import requests - import torch - from PIL import Image - from transformers import ViTForImageClassification, ViTImageProcessor - except ImportError as e: - raise ImportError( - "This demo requires 'torch', 'transformers', 'PIL', and 'requests' packages. " - "Install them with: pip install torch transformers pillow requests" - ) from e - - # Load a sample image - url = ( - "https://huggingface.co/datasets/huggingface/documentation-images/" - "resolve/main/cat_sample.jpeg" - ) - image = Image.open(requests.get(url, stream=True, timeout=10).raw) - - # Load pretrained model and processor +def classify_image(image: Image.Image) -> str: + """Classify a PIL image using pretrained ViT.""" processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224") model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224") - # Preprocess the image inputs = processor(images=image, return_tensors="pt") - # Run inference with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits predicted_class_idx = logits.argmax(-1).item() - predicted_label = model.config.id2label[predicted_class_idx] + return model.config.id2label[predicted_class_idx] + + +def demo(url: str = None) -> None: + """ + Run a demo using a sample image or provided URL. + + Args: + url (str): URL of the image. If None, uses a default cat image. + """ + if url is None: + url = "https://images.unsplash.com/photo-1592194996308-7b43878e84a6" # default example image + + try: + response = requests.get(url, timeout=10) + response.raise_for_status() + image = Image.open(BytesIO(response.content)) + except Exception as e: + print(f"Failed to load image from {url}. Error: {e}") + return - print(f"Predicted label: {predicted_label}") + label = classify_image(image) + print(f"Predicted label: {label}") if __name__ == "__main__": - vision_transformer_demo() + demo() From 8ad8cc8d266f0163a0cde430816d8fdd2077d744 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 10 Oct 2025 16:30:59 +0000 Subject: [PATCH 09/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- computer_vision/vision_transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/computer_vision/vision_transformer.py b/computer_vision/vision_transformer.py index 96eb4846b464..11770363d503 100644 --- a/computer_vision/vision_transformer.py +++ b/computer_vision/vision_transformer.py @@ -42,7 +42,7 @@ def classify_image(image: Image.Image) -> str: def demo(url: str = None) -> None: """ Run a demo using a sample image or provided URL. - + Args: url (str): URL of the image. If None, uses a default cat image. """ From fb2975700ccd691e79de07a93cdc29b15679afc8 Mon Sep 17 00:00:00 2001 From: dhruvidave348 Date: Fri, 10 Oct 2025 22:02:29 +0530 Subject: [PATCH 10/11] Add Vision Transformer demo for image classification --- computer_vision/vision_transformer.py | 35 +++++++++++---------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/computer_vision/vision_transformer.py b/computer_vision/vision_transformer.py index 96eb4846b464..c49a5267097c 100644 --- a/computer_vision/vision_transformer.py +++ b/computer_vision/vision_transformer.py @@ -4,24 +4,15 @@ Classify images using a pretrained Vision Transformer (ViT) from Hugging Face Transformers. - -Can be used as a demo or imported in other scripts. - -Source: -https://huggingface.co/docs/transformers/model_doc/vit """ -try: - import requests - import torch - from io import BytesIO - from PIL import Image - from transformers import ViTForImageClassification, ViTImageProcessor -except ImportError as e: - raise ImportError( - "This module requires 'torch', 'transformers', 'PIL', and 'requests'. " - "Install them with: pip install torch transformers pillow requests" - ) from e +from io import BytesIO +from typing import Optional + +import requests +import torch +from PIL import Image, UnidentifiedImageError +from transformers import ViTForImageClassification, ViTImageProcessor def classify_image(image: Image.Image) -> str: @@ -39,21 +30,23 @@ def classify_image(image: Image.Image) -> str: return model.config.id2label[predicted_class_idx] -def demo(url: str = None) -> None: +def demo(url: Optional[str] = None) -> None: """ Run a demo using a sample image or provided URL. - + Args: - url (str): URL of the image. If None, uses a default cat image. + url (Optional[str]): URL of the image. If None, uses default cat image. """ if url is None: - url = "https://images.unsplash.com/photo-1592194996308-7b43878e84a6" # default example image + url = ( + "https://images.unsplash.com/photo-1592194996308-7b43878e84a6" + ) # default example image try: response = requests.get(url, timeout=10) response.raise_for_status() image = Image.open(BytesIO(response.content)) - except Exception as e: + except (requests.RequestException, UnidentifiedImageError) as e: print(f"Failed to load image from {url}. Error: {e}") return From f30d09ffe5b0829a9df7fc625e08397012df28de Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 10 Oct 2025 16:36:55 +0000 Subject: [PATCH 11/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- computer_vision/vision_transformer.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/computer_vision/vision_transformer.py b/computer_vision/vision_transformer.py index c49a5267097c..1d316d8dbcd2 100644 --- a/computer_vision/vision_transformer.py +++ b/computer_vision/vision_transformer.py @@ -38,9 +38,7 @@ def demo(url: Optional[str] = None) -> None: url (Optional[str]): URL of the image. If None, uses default cat image. """ if url is None: - url = ( - "https://images.unsplash.com/photo-1592194996308-7b43878e84a6" - ) # default example image + url = "https://images.unsplash.com/photo-1592194996308-7b43878e84a6" # default example image try: response = requests.get(url, timeout=10)