From 533b1e652a0079eaf999c37edaf18c156146b182 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Feb 2026 19:21:39 +0000 Subject: [PATCH 1/5] Initial plan From d12e0b7e3be4c9b506096473ca8456c1b9c99952 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Feb 2026 19:30:47 +0000 Subject: [PATCH 2/5] Add auto_retry.py sample for rate limiting handling Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com> --- python/samples/02-agents/auto_retry.py | 245 +++++++++++++++++++++++++ 1 file changed, 245 insertions(+) create mode 100644 python/samples/02-agents/auto_retry.py diff --git a/python/samples/02-agents/auto_retry.py b/python/samples/02-agents/auto_retry.py new file mode 100644 index 0000000000..4b7cbb28f6 --- /dev/null +++ b/python/samples/02-agents/auto_retry.py @@ -0,0 +1,245 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +import logging +import sys +from collections.abc import Awaitable, Callable + +from agent_framework import ChatContext, ChatMiddleware, chat_middleware +from agent_framework.azure import AzureOpenAIChatClient +from azure.identity import AzureCliCredential +from dotenv import load_dotenv +from openai import RateLimitError +from tenacity import ( + AsyncRetrying, + before_sleep_log, + retry, + retry_if_exception_type, + stop_after_attempt, + wait_exponential, +) + +if sys.version_info >= (3, 12): + from typing import override # type: ignore # pragma: no cover +else: + from typing_extensions import override # type: ignore[import] # pragma: no cover + +# Load environment variables from .env file +load_dotenv() + +""" +Auto-Retry Rate Limiting Sample + +Every model inference API enforces rate limits, so production agents need retry logic +to handle 429 responses gracefully. This sample shows two ways to add automatic retry +using the `tenacity` library, keeping your application code free of boilerplate. + +Approach 1 – Class-based wrapper + Subclass AzureOpenAIChatClient and override get_response() to wrap the underlying + call in a tenacity retry loop. Non-streaming responses are wrapped in an async + retry coroutine; streaming is returned as-is (streaming retry requires more + delicate handling). + +Approach 2 – Chat middleware + Register middleware on the agent that catches RateLimitError raised inside + call_next() and retries the entire request pipeline. Two styles are shown: + a) Class-based middleware (ChatMiddleware subclass) + b) Function-based middleware (@chat_middleware decorator) + +Both approaches use the same tenacity primitives: + - stop_after_attempt – cap the total number of tries + - wait_exponential – exponential back-off between retries + - retry_if_exception_type(RateLimitError) – only retry on 429 errors + - before_sleep_log – log each retry attempt at WARNING level +""" + +logger = logging.getLogger(__name__) + +RETRY_ATTEMPTS = 3 + +# ============================================================================= +# Approach 1: Class-based wrapper +# ============================================================================= + + +class AzureOpenAIChatClientWithRetry(AzureOpenAIChatClient): + """Azure OpenAI Chat Client with built-in retry logic for handling rate limits. + + Subclass any chat client and override get_response() to transparently retry + on RateLimitError (HTTP 429) without changing any call-site code. + + Note: Streaming responses are returned without retry. Adding retry to a + streaming response requires more delicate handling (e.g. checking whether + the stream has already started before attempting a retry). + """ + + retry_attempts: int = RETRY_ATTEMPTS + + @override + def get_response(self, *args, **kwargs): # type: ignore[override] + """Return a response, retrying up to retry_attempts times on rate limit errors.""" + stream = kwargs.get("stream", False) + + if stream: + # Streaming retry is more complex; fall back to the parent behaviour. + return super().get_response(*args, **kwargs) + + # For non-streaming, wrap the awaitable in a retry loop so that each + # retry re-issues the full HTTP request rather than just re-awaiting a + # stale coroutine. + async def _with_retry(): # noqa: RET503 - AsyncRetrying with reraise=True raises on exhaustion + async for attempt in AsyncRetrying( + stop=stop_after_attempt(self.retry_attempts), + wait=wait_exponential(multiplier=1, min=4, max=10), + retry=retry_if_exception_type(RateLimitError), + reraise=True, + before_sleep=before_sleep_log(logger, logging.WARNING), + ): + with attempt: + return await super( # type: ignore[misc] + AzureOpenAIChatClientWithRetry, self + ).get_response(*args, **kwargs) + + return _with_retry() + + +# ============================================================================= +# Approach 2a: Class-based chat middleware +# ============================================================================= + + +class RateLimitRetryMiddleware(ChatMiddleware): + """Chat middleware that retries the full request pipeline on rate limit errors. + + Register this middleware on an agent (or at the run level) to automatically + retry any call_next() invocation that raises RateLimitError. + """ + + def __init__(self, *, max_attempts: int = RETRY_ATTEMPTS) -> None: + """Initialize with the maximum number of retry attempts.""" + self.max_attempts = max_attempts + + async def process( + self, + context: ChatContext, + call_next: Callable[[], Awaitable[None]], + ) -> None: + """Retry call_next() on rate limit errors with exponential back-off.""" + async for attempt in AsyncRetrying( + stop=stop_after_attempt(self.max_attempts), + wait=wait_exponential(multiplier=1, min=4, max=10), + retry=retry_if_exception_type(RateLimitError), + reraise=True, + before_sleep=before_sleep_log(logger, logging.WARNING), + ): + with attempt: + await call_next() + + +# ============================================================================= +# Approach 2b: Function-based chat middleware +# ============================================================================= + + +@chat_middleware +async def rate_limit_retry_middleware( + context: ChatContext, + call_next: Callable[[], Awaitable[None]], +) -> None: + """Function-based chat middleware that retries on rate limit errors. + + Wrap call_next() with a tenacity @retry decorator so any RateLimitError + raised during model inference triggers an automatic retry with exponential + back-off. + """ + + @retry( + stop=stop_after_attempt(RETRY_ATTEMPTS), + wait=wait_exponential(multiplier=1, min=4, max=10), + retry=retry_if_exception_type(RateLimitError), + reraise=True, + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + async def _call_next_with_retry() -> None: + await call_next() + + await _call_next_with_retry() + + +# ============================================================================= +# Demo +# ============================================================================= + + +async def class_based_wrapper_example() -> None: + """Demonstrate Approach 1: subclassing the chat client.""" + print("\n" + "=" * 60) + print("Approach 1: Class-based wrapper (custom client subclass)") + print("=" * 60) + + # For authentication, run `az login` command in terminal or replace + # AzureCliCredential with your preferred authentication option. + agent = AzureOpenAIChatClientWithRetry(credential=AzureCliCredential()).as_agent( + instructions="You are a helpful assistant.", + ) + + query = "Say hello!" + print(f"User: {query}") + result = await agent.run(query) + print(f"Agent: {result.text}") + + +async def class_based_middleware_example() -> None: + """Demonstrate Approach 2a: class-based chat middleware.""" + print("\n" + "=" * 60) + print("Approach 2a: Class-based chat middleware") + print("=" * 60) + + # For authentication, run `az login` command in terminal or replace + # AzureCliCredential with your preferred authentication option. + agent = AzureOpenAIChatClient(credential=AzureCliCredential()).as_agent( + instructions="You are a helpful assistant.", + middleware=[RateLimitRetryMiddleware(max_attempts=3)], + ) + + query = "Say hello!" + print(f"User: {query}") + result = await agent.run(query) + print(f"Agent: {result.text}") + + +async def function_based_middleware_example() -> None: + """Demonstrate Approach 2b: function-based chat middleware.""" + print("\n" + "=" * 60) + print("Approach 2b: Function-based chat middleware") + print("=" * 60) + + # For authentication, run `az login` command in terminal or replace + # AzureCliCredential with your preferred authentication option. + agent = AzureOpenAIChatClient(credential=AzureCliCredential()).as_agent( + instructions="You are a helpful assistant.", + middleware=[rate_limit_retry_middleware], + ) + + query = "Say hello!" + print(f"User: {query}") + result = await agent.run(query) + print(f"Agent: {result.text}") + + +async def main() -> None: + """Run all auto-retry examples.""" + print("=== Auto-Retry Rate Limiting Sample ===") + print( + "Demonstrates two approaches for automatic retry on rate limit (429) errors.\n" + "Set AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_CHAT_DEPLOYMENT_NAME (and optionally\n" + "AZURE_OPENAI_API_KEY) before running, or populate a .env file." + ) + + await class_based_wrapper_example() + await class_based_middleware_example() + await function_based_middleware_example() + + +if __name__ == "__main__": + asyncio.run(main()) From da740f3757645e4ec628a5fe7453979ea070774c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Feb 2026 09:21:22 +0000 Subject: [PATCH 3/5] Update auto_retry sample to use class decorator for get_response retries Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com> --- python/samples/02-agents/auto_retry.py | 81 ++++++++++++-------------- 1 file changed, 37 insertions(+), 44 deletions(-) diff --git a/python/samples/02-agents/auto_retry.py b/python/samples/02-agents/auto_retry.py index 4b7cbb28f6..39ef115ccc 100644 --- a/python/samples/02-agents/auto_retry.py +++ b/python/samples/02-agents/auto_retry.py @@ -2,10 +2,10 @@ import asyncio import logging -import sys from collections.abc import Awaitable, Callable +from typing import Any, TypeVar, cast -from agent_framework import ChatContext, ChatMiddleware, chat_middleware +from agent_framework import ChatContext, ChatMiddleware, SupportsChatGetResponse, chat_middleware from agent_framework.azure import AzureOpenAIChatClient from azure.identity import AzureCliCredential from dotenv import load_dotenv @@ -19,11 +19,6 @@ wait_exponential, ) -if sys.version_info >= (3, 12): - from typing import override # type: ignore # pragma: no cover -else: - from typing_extensions import override # type: ignore[import] # pragma: no cover - # Load environment variables from .env file load_dotenv() @@ -34,11 +29,11 @@ to handle 429 responses gracefully. This sample shows two ways to add automatic retry using the `tenacity` library, keeping your application code free of boilerplate. -Approach 1 – Class-based wrapper - Subclass AzureOpenAIChatClient and override get_response() to wrap the underlying - call in a tenacity retry loop. Non-streaming responses are wrapped in an async - retry coroutine; streaming is returned as-is (streaming retry requires more - delicate handling). +Approach 1 – Class decorator + Apply a class decorator to any client type implementing SupportsChatGetResponse. + The decorator patches get_response() with tenacity retry logic. Non-streaming + responses are wrapped in an async retry coroutine; streaming is returned as-is + (streaming retry requires more delicate handling). Approach 2 – Chat middleware Register middleware on the agent that catches RateLimitError raised inside @@ -58,49 +53,47 @@ RETRY_ATTEMPTS = 3 # ============================================================================= -# Approach 1: Class-based wrapper +# Approach 1: Class decorator # ============================================================================= -class AzureOpenAIChatClientWithRetry(AzureOpenAIChatClient): - """Azure OpenAI Chat Client with built-in retry logic for handling rate limits. +ChatClientT = TypeVar("ChatClientT", bound=SupportsChatGetResponse[Any]) - Subclass any chat client and override get_response() to transparently retry - on RateLimitError (HTTP 429) without changing any call-site code. - Note: Streaming responses are returned without retry. Adding retry to a - streaming response requires more delicate handling (e.g. checking whether - the stream has already started before attempting a retry). - """ +def with_rate_limit_retry(*, retry_attempts: int = RETRY_ATTEMPTS) -> Callable[[type[ChatClientT]], type[ChatClientT]]: + """Class decorator that adds non-streaming retry behavior to get_response().""" - retry_attempts: int = RETRY_ATTEMPTS + def decorator(client_cls: type[ChatClientT]) -> type[ChatClientT]: + original_get_response = client_cls.get_response - @override - def get_response(self, *args, **kwargs): # type: ignore[override] - """Return a response, retrying up to retry_attempts times on rate limit errors.""" - stream = kwargs.get("stream", False) + def get_response_with_retry(self, *args, **kwargs): # type: ignore[no-untyped-def] + stream = kwargs.get("stream", False) - if stream: - # Streaming retry is more complex; fall back to the parent behaviour. - return super().get_response(*args, **kwargs) + if stream: + # Streaming retry is more complex; fall back to the original behaviour. + return original_get_response(self, *args, **kwargs) - # For non-streaming, wrap the awaitable in a retry loop so that each - # retry re-issues the full HTTP request rather than just re-awaiting a - # stale coroutine. - async def _with_retry(): # noqa: RET503 - AsyncRetrying with reraise=True raises on exhaustion - async for attempt in AsyncRetrying( - stop=stop_after_attempt(self.retry_attempts), + @retry( + stop=stop_after_attempt(retry_attempts), wait=wait_exponential(multiplier=1, min=4, max=10), retry=retry_if_exception_type(RateLimitError), reraise=True, before_sleep=before_sleep_log(logger, logging.WARNING), - ): - with attempt: - return await super( # type: ignore[misc] - AzureOpenAIChatClientWithRetry, self - ).get_response(*args, **kwargs) + ) + async def _with_retry(): + return await original_get_response(self, *args, **kwargs) + + return _with_retry() + + client_cls.get_response = cast(Any, get_response_with_retry) + return client_cls + + return decorator + - return _with_retry() +@with_rate_limit_retry() +class RetryingAzureOpenAIChatClient(AzureOpenAIChatClient): + """Azure OpenAI Chat client with class-decorator-based retry behavior.""" # ============================================================================= @@ -172,14 +165,14 @@ async def _call_next_with_retry() -> None: async def class_based_wrapper_example() -> None: - """Demonstrate Approach 1: subclassing the chat client.""" + """Demonstrate Approach 1: class decorator on a chat client type.""" print("\n" + "=" * 60) - print("Approach 1: Class-based wrapper (custom client subclass)") + print("Approach 1: Class decorator (applied to client type)") print("=" * 60) # For authentication, run `az login` command in terminal or replace # AzureCliCredential with your preferred authentication option. - agent = AzureOpenAIChatClientWithRetry(credential=AzureCliCredential()).as_agent( + agent = RetryingAzureOpenAIChatClient(credential=AzureCliCredential()).as_agent( instructions="You are a helpful assistant.", ) From 23f98b73978d55e6b314a07743d4ad450dec6f3f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Feb 2026 10:37:05 +0000 Subject: [PATCH 4/5] Address review feedback on auto_retry sample header and wrapper usage Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com> --- python/samples/02-agents/auto_retry.py | 78 +++++++++++++------------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/python/samples/02-agents/auto_retry.py b/python/samples/02-agents/auto_retry.py index 39ef115ccc..3933c8eb7b 100644 --- a/python/samples/02-agents/auto_retry.py +++ b/python/samples/02-agents/auto_retry.py @@ -1,3 +1,13 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "agent-framework[azure]", +# "tenacity", +# ] +# /// +# Run with any PEP 723 compatible runner, e.g.: +# uv run samples/02-agents/auto_retry.py + # Copyright (c) Microsoft. All rights reserved. import asyncio @@ -29,11 +39,11 @@ to handle 429 responses gracefully. This sample shows two ways to add automatic retry using the `tenacity` library, keeping your application code free of boilerplate. -Approach 1 – Class decorator - Apply a class decorator to any client type implementing SupportsChatGetResponse. - The decorator patches get_response() with tenacity retry logic. Non-streaming - responses are wrapped in an async retry coroutine; streaming is returned as-is - (streaming retry requires more delicate handling). +Approach 1 – Client wrapper + Apply a retry wrapper to any client instance implementing + SupportsChatGetResponse. The wrapper patches get_response() with tenacity + retry logic. Non-streaming responses are wrapped in an async retry coroutine; + streaming is returned as-is (streaming retry requires more delicate handling). Approach 2 – Chat middleware Register middleware on the agent that catches RateLimitError raised inside @@ -53,47 +63,38 @@ RETRY_ATTEMPTS = 3 # ============================================================================= -# Approach 1: Class decorator +# Approach 1: Client wrapper # ============================================================================= ChatClientT = TypeVar("ChatClientT", bound=SupportsChatGetResponse[Any]) -def with_rate_limit_retry(*, retry_attempts: int = RETRY_ATTEMPTS) -> Callable[[type[ChatClientT]], type[ChatClientT]]: - """Class decorator that adds non-streaming retry behavior to get_response().""" - - def decorator(client_cls: type[ChatClientT]) -> type[ChatClientT]: - original_get_response = client_cls.get_response - - def get_response_with_retry(self, *args, **kwargs): # type: ignore[no-untyped-def] - stream = kwargs.get("stream", False) - - if stream: - # Streaming retry is more complex; fall back to the original behaviour. - return original_get_response(self, *args, **kwargs) - - @retry( - stop=stop_after_attempt(retry_attempts), - wait=wait_exponential(multiplier=1, min=4, max=10), - retry=retry_if_exception_type(RateLimitError), - reraise=True, - before_sleep=before_sleep_log(logger, logging.WARNING), - ) - async def _with_retry(): - return await original_get_response(self, *args, **kwargs) +def with_rate_limit_retry(client: ChatClientT, *, retry_attempts: int = RETRY_ATTEMPTS) -> ChatClientT: + """Wrap a client instance with non-streaming retry behavior on get_response().""" + original_get_response = client.get_response - return _with_retry() + def get_response_with_retry(*args, **kwargs): # type: ignore[no-untyped-def] + stream = kwargs.get("stream", False) - client_cls.get_response = cast(Any, get_response_with_retry) - return client_cls + if stream: + # Streaming retry is more complex; fall back to the original behaviour. + return original_get_response(*args, **kwargs) - return decorator + @retry( + stop=stop_after_attempt(retry_attempts), + wait=wait_exponential(multiplier=1, min=4, max=10), + retry=retry_if_exception_type(RateLimitError), + reraise=True, + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + async def _with_retry(): + return await original_get_response(*args, **kwargs) + return _with_retry() -@with_rate_limit_retry() -class RetryingAzureOpenAIChatClient(AzureOpenAIChatClient): - """Azure OpenAI Chat client with class-decorator-based retry behavior.""" + client.get_response = cast(Any, get_response_with_retry) + return client # ============================================================================= @@ -165,14 +166,15 @@ async def _call_next_with_retry() -> None: async def class_based_wrapper_example() -> None: - """Demonstrate Approach 1: class decorator on a chat client type.""" + """Demonstrate Approach 1: retry wrapper on a chat client instance.""" print("\n" + "=" * 60) - print("Approach 1: Class decorator (applied to client type)") + print("Approach 1: Client wrapper (applied to client instance)") print("=" * 60) # For authentication, run `az login` command in terminal or replace # AzureCliCredential with your preferred authentication option. - agent = RetryingAzureOpenAIChatClient(credential=AzureCliCredential()).as_agent( + client = with_rate_limit_retry(AzureOpenAIChatClient(credential=AzureCliCredential())) + agent = client.as_agent( instructions="You are a helpful assistant.", ) From 01374e65360e8315ec45321dd2265fd3ebedca99 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Feb 2026 12:33:58 +0000 Subject: [PATCH 5/5] Restore class-decorator retry sample and address reviewer feedback Co-authored-by: eavanvalkenburg <13749212+eavanvalkenburg@users.noreply.github.com> --- python/samples/02-agents/auto_retry.py | 76 +++++++++++++++----------- 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/python/samples/02-agents/auto_retry.py b/python/samples/02-agents/auto_retry.py index 3933c8eb7b..7c985bd0c1 100644 --- a/python/samples/02-agents/auto_retry.py +++ b/python/samples/02-agents/auto_retry.py @@ -1,7 +1,7 @@ # /// script # requires-python = ">=3.10" # dependencies = [ -# "agent-framework[azure]", +# "agent-framework", # "tenacity", # ] # /// @@ -39,11 +39,11 @@ to handle 429 responses gracefully. This sample shows two ways to add automatic retry using the `tenacity` library, keeping your application code free of boilerplate. -Approach 1 – Client wrapper - Apply a retry wrapper to any client instance implementing - SupportsChatGetResponse. The wrapper patches get_response() with tenacity - retry logic. Non-streaming responses are wrapped in an async retry coroutine; - streaming is returned as-is (streaming retry requires more delicate handling). +Approach 1 – Class decorator + Apply a class decorator to any client type implementing + SupportsChatGetResponse. The decorator patches get_response() with retry + behavior. Non-streaming responses are retried; streaming is returned as-is + (streaming retry requires more delicate handling). Approach 2 – Chat middleware Register middleware on the agent that catches RateLimitError raised inside @@ -63,38 +63,49 @@ RETRY_ATTEMPTS = 3 # ============================================================================= -# Approach 1: Client wrapper +# Approach 1: Class decorator # ============================================================================= ChatClientT = TypeVar("ChatClientT", bound=SupportsChatGetResponse[Any]) -def with_rate_limit_retry(client: ChatClientT, *, retry_attempts: int = RETRY_ATTEMPTS) -> ChatClientT: - """Wrap a client instance with non-streaming retry behavior on get_response().""" - original_get_response = client.get_response +def with_rate_limit_retry(*, retry_attempts: int = RETRY_ATTEMPTS) -> Callable[[type[ChatClientT]], type[ChatClientT]]: + """Class decorator that adds non-streaming retry behavior to get_response().""" - def get_response_with_retry(*args, **kwargs): # type: ignore[no-untyped-def] - stream = kwargs.get("stream", False) + def decorator(client_cls: type[ChatClientT]) -> type[ChatClientT]: + original_get_response = client_cls.get_response - if stream: - # Streaming retry is more complex; fall back to the original behaviour. - return original_get_response(*args, **kwargs) + def get_response_with_retry(self, *args, **kwargs): # type: ignore[no-untyped-def] + stream = kwargs.get("stream", False) - @retry( - stop=stop_after_attempt(retry_attempts), - wait=wait_exponential(multiplier=1, min=4, max=10), - retry=retry_if_exception_type(RateLimitError), - reraise=True, - before_sleep=before_sleep_log(logger, logging.WARNING), - ) - async def _with_retry(): - return await original_get_response(*args, **kwargs) + if stream: + # Streaming retry is more complex; fall back to the original behaviour. + return original_get_response(self, *args, **kwargs) + + async def _with_retry(): + async for attempt in AsyncRetrying( + stop=stop_after_attempt(retry_attempts), + wait=wait_exponential(multiplier=1, min=4, max=10), + retry=retry_if_exception_type(RateLimitError), + reraise=True, + before_sleep=before_sleep_log(logger, logging.WARNING), + ): + with attempt: + return await original_get_response(self, *args, **kwargs) + return None + + return _with_retry() + + client_cls.get_response = cast(Any, get_response_with_retry) + return client_cls + + return decorator - return _with_retry() - client.get_response = cast(Any, get_response_with_retry) - return client +@with_rate_limit_retry() +class RetryingAzureOpenAIChatClient(AzureOpenAIChatClient): + """Azure OpenAI Chat client with class-decorator-based retry behavior.""" # ============================================================================= @@ -165,16 +176,15 @@ async def _call_next_with_retry() -> None: # ============================================================================= -async def class_based_wrapper_example() -> None: - """Demonstrate Approach 1: retry wrapper on a chat client instance.""" +async def class_decorator_example() -> None: + """Demonstrate Approach 1: class decorator on a chat client type.""" print("\n" + "=" * 60) - print("Approach 1: Client wrapper (applied to client instance)") + print("Approach 1: Class decorator (applied to client type)") print("=" * 60) # For authentication, run `az login` command in terminal or replace # AzureCliCredential with your preferred authentication option. - client = with_rate_limit_retry(AzureOpenAIChatClient(credential=AzureCliCredential())) - agent = client.as_agent( + agent = RetryingAzureOpenAIChatClient(credential=AzureCliCredential()).as_agent( instructions="You are a helpful assistant.", ) @@ -231,7 +241,7 @@ async def main() -> None: "AZURE_OPENAI_API_KEY) before running, or populate a .env file." ) - await class_based_wrapper_example() + await class_decorator_example() await class_based_middleware_example() await function_based_middleware_example()