From 533b1e652a0079eaf999c37edaf18c156146b182 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 24 Feb 2026 19:21:39 +0000
Subject: [PATCH 1/5] Initial plan


From d12e0b7e3be4c9b506096473ca8456c1b9c99952 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 24 Feb 2026 19:30:47 +0000
Subject: [PATCH 2/5] Add auto_retry.py sample for rate limiting handling

Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com>
---
 python/samples/02-agents/auto_retry.py | 245 +++++++++++++++++++++++++
 1 file changed, 245 insertions(+)
 create mode 100644 python/samples/02-agents/auto_retry.py

diff --git a/python/samples/02-agents/auto_retry.py b/python/samples/02-agents/auto_retry.py
new file mode 100644
index 0000000000..4b7cbb28f6
--- /dev/null
+++ b/python/samples/02-agents/auto_retry.py
@@ -0,0 +1,245 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+import logging
+import sys
+from collections.abc import Awaitable, Callable
+
+from agent_framework import ChatContext, ChatMiddleware, chat_middleware
+from agent_framework.azure import AzureOpenAIChatClient
+from azure.identity import AzureCliCredential
+from dotenv import load_dotenv
+from openai import RateLimitError
+from tenacity import (
+    AsyncRetrying,
+    before_sleep_log,
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_exponential,
+)
+
+if sys.version_info >= (3, 12):
+    from typing import override  # type: ignore # pragma: no cover
+else:
+    from typing_extensions import override  # type: ignore[import] # pragma: no cover
+
+# Load environment variables from .env file
+load_dotenv()
+
+"""
+Auto-Retry Rate Limiting Sample
+
+Every model inference API enforces rate limits, so production agents need retry logic
+to handle 429 responses gracefully. This sample shows two ways to add automatic retry
+using the `tenacity` library, keeping your application code free of boilerplate.
+
+Approach 1 – Class-based wrapper
+    Subclass AzureOpenAIChatClient and override get_response() to wrap the underlying
+    call in a tenacity retry loop. Non-streaming responses are wrapped in an async
+    retry coroutine; streaming is returned as-is (streaming retry requires more
+    delicate handling).
+
+Approach 2 – Chat middleware
+    Register middleware on the agent that catches RateLimitError raised inside
+    call_next() and retries the entire request pipeline. Two styles are shown:
+    a) Class-based middleware (ChatMiddleware subclass)
+    b) Function-based middleware (@chat_middleware decorator)
+
+Both approaches use the same tenacity primitives:
+    - stop_after_attempt  – cap the total number of tries
+    - wait_exponential    – exponential back-off between retries
+    - retry_if_exception_type(RateLimitError) – only retry on 429 errors
+    - before_sleep_log    – log each retry attempt at WARNING level
+"""
+
+logger = logging.getLogger(__name__)
+
+RETRY_ATTEMPTS = 3
+
+# =============================================================================
+# Approach 1: Class-based wrapper
+# =============================================================================
+
+
+class AzureOpenAIChatClientWithRetry(AzureOpenAIChatClient):
+    """Azure OpenAI Chat Client with built-in retry logic for handling rate limits.
+
+    Subclass any chat client and override get_response() to transparently retry
+    on RateLimitError (HTTP 429) without changing any call-site code.
+
+    Note: Streaming responses are returned without retry. Adding retry to a
+    streaming response requires more delicate handling (e.g. checking whether
+    the stream has already started before attempting a retry).
+    """
+
+    retry_attempts: int = RETRY_ATTEMPTS
+
+    @override
+    def get_response(self, *args, **kwargs):  # type: ignore[override]
+        """Return a response, retrying up to retry_attempts times on rate limit errors."""
+        stream = kwargs.get("stream", False)
+
+        if stream:
+            # Streaming retry is more complex; fall back to the parent behaviour.
+            return super().get_response(*args, **kwargs)
+
+        # For non-streaming, wrap the awaitable in a retry loop so that each
+        # retry re-issues the full HTTP request rather than just re-awaiting a
+        # stale coroutine.
+        async def _with_retry():  # noqa: RET503 - AsyncRetrying with reraise=True raises on exhaustion
+            async for attempt in AsyncRetrying(
+                stop=stop_after_attempt(self.retry_attempts),
+                wait=wait_exponential(multiplier=1, min=4, max=10),
+                retry=retry_if_exception_type(RateLimitError),
+                reraise=True,
+                before_sleep=before_sleep_log(logger, logging.WARNING),
+            ):
+                with attempt:
+                    return await super(  # type: ignore[misc]
+                        AzureOpenAIChatClientWithRetry, self
+                    ).get_response(*args, **kwargs)
+
+        return _with_retry()
+
+
+# =============================================================================
+# Approach 2a: Class-based chat middleware
+# =============================================================================
+
+
+class RateLimitRetryMiddleware(ChatMiddleware):
+    """Chat middleware that retries the full request pipeline on rate limit errors.
+
+    Register this middleware on an agent (or at the run level) to automatically
+    retry any call_next() invocation that raises RateLimitError.
+    """
+
+    def __init__(self, *, max_attempts: int = RETRY_ATTEMPTS) -> None:
+        """Initialize with the maximum number of retry attempts."""
+        self.max_attempts = max_attempts
+
+    async def process(
+        self,
+        context: ChatContext,
+        call_next: Callable[[], Awaitable[None]],
+    ) -> None:
+        """Retry call_next() on rate limit errors with exponential back-off."""
+        async for attempt in AsyncRetrying(
+            stop=stop_after_attempt(self.max_attempts),
+            wait=wait_exponential(multiplier=1, min=4, max=10),
+            retry=retry_if_exception_type(RateLimitError),
+            reraise=True,
+            before_sleep=before_sleep_log(logger, logging.WARNING),
+        ):
+            with attempt:
+                await call_next()
+
+
+# =============================================================================
+# Approach 2b: Function-based chat middleware
+# =============================================================================
+
+
+@chat_middleware
+async def rate_limit_retry_middleware(
+    context: ChatContext,
+    call_next: Callable[[], Awaitable[None]],
+) -> None:
+    """Function-based chat middleware that retries on rate limit errors.
+
+    Wrap call_next() with a tenacity @retry decorator so any RateLimitError
+    raised during model inference triggers an automatic retry with exponential
+    back-off.
+    """
+
+    @retry(
+        stop=stop_after_attempt(RETRY_ATTEMPTS),
+        wait=wait_exponential(multiplier=1, min=4, max=10),
+        retry=retry_if_exception_type(RateLimitError),
+        reraise=True,
+        before_sleep=before_sleep_log(logger, logging.WARNING),
+    )
+    async def _call_next_with_retry() -> None:
+        await call_next()
+
+    await _call_next_with_retry()
+
+
+# =============================================================================
+# Demo
+# =============================================================================
+
+
+async def class_based_wrapper_example() -> None:
+    """Demonstrate Approach 1: subclassing the chat client."""
+    print("\n" + "=" * 60)
+    print("Approach 1: Class-based wrapper (custom client subclass)")
+    print("=" * 60)
+
+    # For authentication, run `az login` command in terminal or replace
+    # AzureCliCredential with your preferred authentication option.
+    agent = AzureOpenAIChatClientWithRetry(credential=AzureCliCredential()).as_agent(
+        instructions="You are a helpful assistant.",
+    )
+
+    query = "Say hello!"
+    print(f"User: {query}")
+    result = await agent.run(query)
+    print(f"Agent: {result.text}")
+
+
+async def class_based_middleware_example() -> None:
+    """Demonstrate Approach 2a: class-based chat middleware."""
+    print("\n" + "=" * 60)
+    print("Approach 2a: Class-based chat middleware")
+    print("=" * 60)
+
+    # For authentication, run `az login` command in terminal or replace
+    # AzureCliCredential with your preferred authentication option.
+    agent = AzureOpenAIChatClient(credential=AzureCliCredential()).as_agent(
+        instructions="You are a helpful assistant.",
+        middleware=[RateLimitRetryMiddleware(max_attempts=3)],
+    )
+
+    query = "Say hello!"
+    print(f"User: {query}")
+    result = await agent.run(query)
+    print(f"Agent: {result.text}")
+
+
+async def function_based_middleware_example() -> None:
+    """Demonstrate Approach 2b: function-based chat middleware."""
+    print("\n" + "=" * 60)
+    print("Approach 2b: Function-based chat middleware")
+    print("=" * 60)
+
+    # For authentication, run `az login` command in terminal or replace
+    # AzureCliCredential with your preferred authentication option.
+    agent = AzureOpenAIChatClient(credential=AzureCliCredential()).as_agent(
+        instructions="You are a helpful assistant.",
+        middleware=[rate_limit_retry_middleware],
+    )
+
+    query = "Say hello!"
+    print(f"User: {query}")
+    result = await agent.run(query)
+    print(f"Agent: {result.text}")
+
+
+async def main() -> None:
+    """Run all auto-retry examples."""
+    print("=== Auto-Retry Rate Limiting Sample ===")
+    print(
+        "Demonstrates two approaches for automatic retry on rate limit (429) errors.\n"
+        "Set AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_CHAT_DEPLOYMENT_NAME (and optionally\n"
+        "AZURE_OPENAI_API_KEY) before running, or populate a .env file."
+    )
+
+    await class_based_wrapper_example()
+    await class_based_middleware_example()
+    await function_based_middleware_example()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

From da740f3757645e4ec628a5fe7453979ea070774c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Feb 2026 09:21:22 +0000
Subject: [PATCH 3/5] Update auto_retry sample to use class decorator for
 get_response retries

Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com>
---
 python/samples/02-agents/auto_retry.py | 81 ++++++++++++--------------
 1 file changed, 37 insertions(+), 44 deletions(-)

diff --git a/python/samples/02-agents/auto_retry.py b/python/samples/02-agents/auto_retry.py
index 4b7cbb28f6..39ef115ccc 100644
--- a/python/samples/02-agents/auto_retry.py
+++ b/python/samples/02-agents/auto_retry.py
@@ -2,10 +2,10 @@
 
 import asyncio
 import logging
-import sys
 from collections.abc import Awaitable, Callable
+from typing import Any, TypeVar, cast
 
-from agent_framework import ChatContext, ChatMiddleware, chat_middleware
+from agent_framework import ChatContext, ChatMiddleware, SupportsChatGetResponse, chat_middleware
 from agent_framework.azure import AzureOpenAIChatClient
 from azure.identity import AzureCliCredential
 from dotenv import load_dotenv
@@ -19,11 +19,6 @@
     wait_exponential,
 )
 
-if sys.version_info >= (3, 12):
-    from typing import override  # type: ignore # pragma: no cover
-else:
-    from typing_extensions import override  # type: ignore[import] # pragma: no cover
-
 # Load environment variables from .env file
 load_dotenv()
 
@@ -34,11 +29,11 @@
 to handle 429 responses gracefully. This sample shows two ways to add automatic retry
 using the `tenacity` library, keeping your application code free of boilerplate.
 
-Approach 1 – Class-based wrapper
-    Subclass AzureOpenAIChatClient and override get_response() to wrap the underlying
-    call in a tenacity retry loop. Non-streaming responses are wrapped in an async
-    retry coroutine; streaming is returned as-is (streaming retry requires more
-    delicate handling).
+Approach 1 – Class decorator
+    Apply a class decorator to any client type implementing SupportsChatGetResponse.
+    The decorator patches get_response() with tenacity retry logic. Non-streaming
+    responses are wrapped in an async retry coroutine; streaming is returned as-is
+    (streaming retry requires more delicate handling).
 
 Approach 2 – Chat middleware
     Register middleware on the agent that catches RateLimitError raised inside
@@ -58,49 +53,47 @@
 RETRY_ATTEMPTS = 3
 
 # =============================================================================
-# Approach 1: Class-based wrapper
+# Approach 1: Class decorator
 # =============================================================================
 
 
-class AzureOpenAIChatClientWithRetry(AzureOpenAIChatClient):
-    """Azure OpenAI Chat Client with built-in retry logic for handling rate limits.
+ChatClientT = TypeVar("ChatClientT", bound=SupportsChatGetResponse[Any])
 
-    Subclass any chat client and override get_response() to transparently retry
-    on RateLimitError (HTTP 429) without changing any call-site code.
 
-    Note: Streaming responses are returned without retry. Adding retry to a
-    streaming response requires more delicate handling (e.g. checking whether
-    the stream has already started before attempting a retry).
-    """
+def with_rate_limit_retry(*, retry_attempts: int = RETRY_ATTEMPTS) -> Callable[[type[ChatClientT]], type[ChatClientT]]:
+    """Class decorator that adds non-streaming retry behavior to get_response()."""
 
-    retry_attempts: int = RETRY_ATTEMPTS
+    def decorator(client_cls: type[ChatClientT]) -> type[ChatClientT]:
+        original_get_response = client_cls.get_response
 
-    @override
-    def get_response(self, *args, **kwargs):  # type: ignore[override]
-        """Return a response, retrying up to retry_attempts times on rate limit errors."""
-        stream = kwargs.get("stream", False)
+        def get_response_with_retry(self, *args, **kwargs):  # type: ignore[no-untyped-def]
+            stream = kwargs.get("stream", False)
 
-        if stream:
-            # Streaming retry is more complex; fall back to the parent behaviour.
-            return super().get_response(*args, **kwargs)
+            if stream:
+                # Streaming retry is more complex; fall back to the original behaviour.
+                return original_get_response(self, *args, **kwargs)
 
-        # For non-streaming, wrap the awaitable in a retry loop so that each
-        # retry re-issues the full HTTP request rather than just re-awaiting a
-        # stale coroutine.
-        async def _with_retry():  # noqa: RET503 - AsyncRetrying with reraise=True raises on exhaustion
-            async for attempt in AsyncRetrying(
-                stop=stop_after_attempt(self.retry_attempts),
+            @retry(
+                stop=stop_after_attempt(retry_attempts),
                 wait=wait_exponential(multiplier=1, min=4, max=10),
                 retry=retry_if_exception_type(RateLimitError),
                 reraise=True,
                 before_sleep=before_sleep_log(logger, logging.WARNING),
-            ):
-                with attempt:
-                    return await super(  # type: ignore[misc]
-                        AzureOpenAIChatClientWithRetry, self
-                    ).get_response(*args, **kwargs)
+            )
+            async def _with_retry():
+                return await original_get_response(self, *args, **kwargs)
+
+            return _with_retry()
+
+        client_cls.get_response = cast(Any, get_response_with_retry)
+        return client_cls
+
+    return decorator
+
 
-        return _with_retry()
+@with_rate_limit_retry()
+class RetryingAzureOpenAIChatClient(AzureOpenAIChatClient):
+    """Azure OpenAI Chat client with class-decorator-based retry behavior."""
 
 
 # =============================================================================
@@ -172,14 +165,14 @@ async def _call_next_with_retry() -> None:
 
 
 async def class_based_wrapper_example() -> None:
-    """Demonstrate Approach 1: subclassing the chat client."""
+    """Demonstrate Approach 1: class decorator on a chat client type."""
     print("\n" + "=" * 60)
-    print("Approach 1: Class-based wrapper (custom client subclass)")
+    print("Approach 1: Class decorator (applied to client type)")
     print("=" * 60)
 
     # For authentication, run `az login` command in terminal or replace
     # AzureCliCredential with your preferred authentication option.
-    agent = AzureOpenAIChatClientWithRetry(credential=AzureCliCredential()).as_agent(
+    agent = RetryingAzureOpenAIChatClient(credential=AzureCliCredential()).as_agent(
         instructions="You are a helpful assistant.",
     )
 

From 23f98b73978d55e6b314a07743d4ad450dec6f3f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Feb 2026 10:37:05 +0000
Subject: [PATCH 4/5] Address review feedback on auto_retry sample header and
 wrapper usage

Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com>
---
 python/samples/02-agents/auto_retry.py | 78 +++++++++++++-------------
 1 file changed, 40 insertions(+), 38 deletions(-)

diff --git a/python/samples/02-agents/auto_retry.py b/python/samples/02-agents/auto_retry.py
index 39ef115ccc..3933c8eb7b 100644
--- a/python/samples/02-agents/auto_retry.py
+++ b/python/samples/02-agents/auto_retry.py
@@ -1,3 +1,13 @@
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "agent-framework[azure]",
+#     "tenacity",
+# ]
+# ///
+# Run with any PEP 723 compatible runner, e.g.:
+#   uv run samples/02-agents/auto_retry.py
+
 # Copyright (c) Microsoft. All rights reserved.
 
 import asyncio
@@ -29,11 +39,11 @@
 to handle 429 responses gracefully. This sample shows two ways to add automatic retry
 using the `tenacity` library, keeping your application code free of boilerplate.
 
-Approach 1 – Class decorator
-    Apply a class decorator to any client type implementing SupportsChatGetResponse.
-    The decorator patches get_response() with tenacity retry logic. Non-streaming
-    responses are wrapped in an async retry coroutine; streaming is returned as-is
-    (streaming retry requires more delicate handling).
+Approach 1 – Client wrapper
+    Apply a retry wrapper to any client instance implementing
+    SupportsChatGetResponse. The wrapper patches get_response() with tenacity
+    retry logic. Non-streaming responses are wrapped in an async retry coroutine;
+    streaming is returned as-is (streaming retry requires more delicate handling).
 
 Approach 2 – Chat middleware
     Register middleware on the agent that catches RateLimitError raised inside
@@ -53,47 +63,38 @@
 RETRY_ATTEMPTS = 3
 
 # =============================================================================
-# Approach 1: Class decorator
+# Approach 1: Client wrapper
 # =============================================================================
 
 
 ChatClientT = TypeVar("ChatClientT", bound=SupportsChatGetResponse[Any])
 
 
-def with_rate_limit_retry(*, retry_attempts: int = RETRY_ATTEMPTS) -> Callable[[type[ChatClientT]], type[ChatClientT]]:
-    """Class decorator that adds non-streaming retry behavior to get_response()."""
-
-    def decorator(client_cls: type[ChatClientT]) -> type[ChatClientT]:
-        original_get_response = client_cls.get_response
-
-        def get_response_with_retry(self, *args, **kwargs):  # type: ignore[no-untyped-def]
-            stream = kwargs.get("stream", False)
-
-            if stream:
-                # Streaming retry is more complex; fall back to the original behaviour.
-                return original_get_response(self, *args, **kwargs)
-
-            @retry(
-                stop=stop_after_attempt(retry_attempts),
-                wait=wait_exponential(multiplier=1, min=4, max=10),
-                retry=retry_if_exception_type(RateLimitError),
-                reraise=True,
-                before_sleep=before_sleep_log(logger, logging.WARNING),
-            )
-            async def _with_retry():
-                return await original_get_response(self, *args, **kwargs)
+def with_rate_limit_retry(client: ChatClientT, *, retry_attempts: int = RETRY_ATTEMPTS) -> ChatClientT:
+    """Wrap a client instance with non-streaming retry behavior on get_response()."""
+    original_get_response = client.get_response
 
-            return _with_retry()
+    def get_response_with_retry(*args, **kwargs):  # type: ignore[no-untyped-def]
+        stream = kwargs.get("stream", False)
 
-        client_cls.get_response = cast(Any, get_response_with_retry)
-        return client_cls
+        if stream:
+            # Streaming retry is more complex; fall back to the original behaviour.
+            return original_get_response(*args, **kwargs)
 
-    return decorator
+        @retry(
+            stop=stop_after_attempt(retry_attempts),
+            wait=wait_exponential(multiplier=1, min=4, max=10),
+            retry=retry_if_exception_type(RateLimitError),
+            reraise=True,
+            before_sleep=before_sleep_log(logger, logging.WARNING),
+        )
+        async def _with_retry():
+            return await original_get_response(*args, **kwargs)
 
+        return _with_retry()
 
-@with_rate_limit_retry()
-class RetryingAzureOpenAIChatClient(AzureOpenAIChatClient):
-    """Azure OpenAI Chat client with class-decorator-based retry behavior."""
+    client.get_response = cast(Any, get_response_with_retry)
+    return client
 
 
 # =============================================================================
@@ -165,14 +166,15 @@ async def _call_next_with_retry() -> None:
 
 
 async def class_based_wrapper_example() -> None:
-    """Demonstrate Approach 1: class decorator on a chat client type."""
+    """Demonstrate Approach 1: retry wrapper on a chat client instance."""
     print("\n" + "=" * 60)
-    print("Approach 1: Class decorator (applied to client type)")
+    print("Approach 1: Client wrapper (applied to client instance)")
     print("=" * 60)
 
     # For authentication, run `az login` command in terminal or replace
     # AzureCliCredential with your preferred authentication option.
-    agent = RetryingAzureOpenAIChatClient(credential=AzureCliCredential()).as_agent(
+    client = with_rate_limit_retry(AzureOpenAIChatClient(credential=AzureCliCredential()))
+    agent = client.as_agent(
         instructions="You are a helpful assistant.",
     )
 

From 01374e65360e8315ec45321dd2265fd3ebedca99 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Feb 2026 12:33:58 +0000
Subject: [PATCH 5/5] Restore class-decorator retry sample and address reviewer
 feedback

Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com>
---
 python/samples/02-agents/auto_retry.py | 76 +++++++++++++++-----------
 1 file changed, 43 insertions(+), 33 deletions(-)

diff --git a/python/samples/02-agents/auto_retry.py b/python/samples/02-agents/auto_retry.py
index 3933c8eb7b..7c985bd0c1 100644
--- a/python/samples/02-agents/auto_retry.py
+++ b/python/samples/02-agents/auto_retry.py
@@ -1,7 +1,7 @@
 # /// script
 # requires-python = ">=3.10"
 # dependencies = [
-#     "agent-framework[azure]",
+#     "agent-framework",
 #     "tenacity",
 # ]
 # ///
@@ -39,11 +39,11 @@
 to handle 429 responses gracefully. This sample shows two ways to add automatic retry
 using the `tenacity` library, keeping your application code free of boilerplate.
 
-Approach 1 – Client wrapper
-    Apply a retry wrapper to any client instance implementing
-    SupportsChatGetResponse. The wrapper patches get_response() with tenacity
-    retry logic. Non-streaming responses are wrapped in an async retry coroutine;
-    streaming is returned as-is (streaming retry requires more delicate handling).
+Approach 1 – Class decorator
+    Apply a class decorator to any client type implementing
+    SupportsChatGetResponse. The decorator patches get_response() with retry
+    behavior. Non-streaming responses are retried; streaming is returned as-is
+    (streaming retry requires more delicate handling).
 
 Approach 2 – Chat middleware
     Register middleware on the agent that catches RateLimitError raised inside
@@ -63,38 +63,49 @@
 RETRY_ATTEMPTS = 3
 
 # =============================================================================
-# Approach 1: Client wrapper
+# Approach 1: Class decorator
 # =============================================================================
 
 
 ChatClientT = TypeVar("ChatClientT", bound=SupportsChatGetResponse[Any])
 
 
-def with_rate_limit_retry(client: ChatClientT, *, retry_attempts: int = RETRY_ATTEMPTS) -> ChatClientT:
-    """Wrap a client instance with non-streaming retry behavior on get_response()."""
-    original_get_response = client.get_response
+def with_rate_limit_retry(*, retry_attempts: int = RETRY_ATTEMPTS) -> Callable[[type[ChatClientT]], type[ChatClientT]]:
+    """Class decorator that adds non-streaming retry behavior to get_response()."""
 
-    def get_response_with_retry(*args, **kwargs):  # type: ignore[no-untyped-def]
-        stream = kwargs.get("stream", False)
+    def decorator(client_cls: type[ChatClientT]) -> type[ChatClientT]:
+        original_get_response = client_cls.get_response
 
-        if stream:
-            # Streaming retry is more complex; fall back to the original behaviour.
-            return original_get_response(*args, **kwargs)
+        def get_response_with_retry(self, *args, **kwargs):  # type: ignore[no-untyped-def]
+            stream = kwargs.get("stream", False)
 
-        @retry(
-            stop=stop_after_attempt(retry_attempts),
-            wait=wait_exponential(multiplier=1, min=4, max=10),
-            retry=retry_if_exception_type(RateLimitError),
-            reraise=True,
-            before_sleep=before_sleep_log(logger, logging.WARNING),
-        )
-        async def _with_retry():
-            return await original_get_response(*args, **kwargs)
+            if stream:
+                # Streaming retry is more complex; fall back to the original behaviour.
+                return original_get_response(self, *args, **kwargs)
+
+            async def _with_retry():
+                async for attempt in AsyncRetrying(
+                    stop=stop_after_attempt(retry_attempts),
+                    wait=wait_exponential(multiplier=1, min=4, max=10),
+                    retry=retry_if_exception_type(RateLimitError),
+                    reraise=True,
+                    before_sleep=before_sleep_log(logger, logging.WARNING),
+                ):
+                    with attempt:
+                        return await original_get_response(self, *args, **kwargs)
+                return None
+
+            return _with_retry()
+
+        client_cls.get_response = cast(Any, get_response_with_retry)
+        return client_cls
+
+    return decorator
 
-        return _with_retry()
 
-    client.get_response = cast(Any, get_response_with_retry)
-    return client
+@with_rate_limit_retry()
+class RetryingAzureOpenAIChatClient(AzureOpenAIChatClient):
+    """Azure OpenAI Chat client with class-decorator-based retry behavior."""
 
 
 # =============================================================================
@@ -165,16 +176,15 @@ async def _call_next_with_retry() -> None:
 # =============================================================================
 
 
-async def class_based_wrapper_example() -> None:
-    """Demonstrate Approach 1: retry wrapper on a chat client instance."""
+async def class_decorator_example() -> None:
+    """Demonstrate Approach 1: class decorator on a chat client type."""
     print("\n" + "=" * 60)
-    print("Approach 1: Client wrapper (applied to client instance)")
+    print("Approach 1: Class decorator (applied to client type)")
     print("=" * 60)
 
     # For authentication, run `az login` command in terminal or replace
     # AzureCliCredential with your preferred authentication option.
-    client = with_rate_limit_retry(AzureOpenAIChatClient(credential=AzureCliCredential()))
-    agent = client.as_agent(
+    agent = RetryingAzureOpenAIChatClient(credential=AzureCliCredential()).as_agent(
         instructions="You are a helpful assistant.",
     )
 
@@ -231,7 +241,7 @@ async def main() -> None:
         "AZURE_OPENAI_API_KEY) before running, or populate a .env file."
     )
 
-    await class_based_wrapper_example()
+    await class_decorator_example()
     await class_based_middleware_example()
     await function_based_middleware_example()