diff --git a/packages/uipath_langchain_client/demo.py b/packages/uipath_langchain_client/demo.py
index be792d9..e3c887e 100644
--- a/packages/uipath_langchain_client/demo.py
+++ b/packages/uipath_langchain_client/demo.py
@@ -17,9 +17,7 @@
 from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_core.tools import tool
 from uipath_langchain_client import get_chat_model, get_embedding_model
-from uipath_langchain_client.settings import get_default_client_settings
-
-from uipath.llm_client.settings.constants import RoutingMode
+from uipath_langchain_client.settings import RoutingMode, get_default_client_settings
 
 
 def demo_basic_chat():
@@ -136,8 +134,31 @@ def calculate(expression: str) -> str:
         Args:
             expression: A mathematical expression to evaluate (e.g., "2 + 2").
         """
+        import ast
+
         try:
-            result = eval(expression)
+            # Restrict to a safe subset: only literals and basic arithmetic operators.
+            # This prevents arbitrary code execution via eval().
+            tree = ast.parse(expression, mode="eval")
+            allowed_node_types = (
+                ast.Expression,
+                ast.BinOp,
+                ast.UnaryOp,
+                ast.Constant,
+                ast.Add,
+                ast.Sub,
+                ast.Mult,
+                ast.Div,
+                ast.FloorDiv,
+                ast.Mod,
+                ast.Pow,
+                ast.USub,
+                ast.UAdd,
+            )
+            for node in ast.walk(tree):
+                if not isinstance(node, allowed_node_types):
+                    return "Error: unsupported operation in expression"
+            result = eval(compile(tree, "<string>", "eval"), {"__builtins__": {}})
             return str(result)
         except Exception as e:
             return f"Error: {e}"
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py b/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py
index 292aefb..5149f6e 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py
@@ -1,3 +1,3 @@
 __title__ = "UiPath LangChain Client"
 __description__ = "A Python client for interacting with UiPath's LLM services via LangChain."
-__version__ = "1.5.10"
+__version__ = "1.6.0"
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/base_client.py b/packages/uipath_langchain_client/src/uipath_langchain_client/base_client.py
index 2999441..a79025e 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/base_client.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/base_client.py
@@ -25,7 +25,7 @@
 
 import logging
 from abc import ABC
-from collections.abc import AsyncIterator, Iterator, Mapping, Sequence
+from collections.abc import AsyncGenerator, Generator, Mapping, Sequence
 from functools import cached_property
 from typing import Any, Literal
 
@@ -189,7 +189,7 @@ def uipath_async_client(self) -> UiPathHttpxAsyncClient:
 
     def uipath_request(
         self,
-        method: str = "POST",
+        method: Literal["POST", "GET"] = "POST",
         url: URL | str = "/",
         *,
         request_body: dict[str, Any] | None = None,
@@ -199,16 +199,18 @@ def uipath_request(
         """Make a synchronous HTTP request to the UiPath API.
 
         Args:
-            method: HTTP method (GET, POST, etc.). Defaults to "POST".
+            method: HTTP method (POST or GET). Defaults to "POST".
             url: Request URL path. Defaults to "/".
             request_body: JSON request body to send.
+            raise_status_error: If True, raises UiPathAPIError on non-2xx responses.
             **kwargs: Additional arguments passed to httpx.Client.request().
 
         Returns:
             httpx.Response: The HTTP response from the API.
 
         Raises:
-            UiPathAPIError: On HTTP 4xx/5xx responses (raised by transport layer).
+            UiPathAPIError: On HTTP 4xx/5xx responses when raise_status_error is True,
+                or raised by the transport layer.
         """
         response = self.uipath_sync_client.request(method, url, json=request_body, **kwargs)
         if raise_status_error:
@@ -224,7 +226,22 @@ async def uipath_arequest(
         raise_status_error: bool = False,
         **kwargs: Any,
     ) -> Response:
-        """Make an asynchronous HTTP request to the UiPath API."""
+        """Make an asynchronous HTTP request to the UiPath API.
+
+        Args:
+            method: HTTP method (POST or GET). Defaults to "POST".
+            url: Request URL path. Defaults to "/".
+            request_body: JSON request body to send.
+            raise_status_error: If True, raises UiPathAPIError on non-2xx responses.
+            **kwargs: Additional arguments passed to httpx.AsyncClient.request().
+
+        Returns:
+            httpx.Response: The HTTP response from the API.
+
+        Raises:
+            UiPathAPIError: On HTTP 4xx/5xx responses when raise_status_error is True,
+                or raised by the transport layer.
+        """
         response = await self.uipath_async_client.request(method, url, json=request_body, **kwargs)
         if raise_status_error:
             response.raise_for_status()
@@ -239,7 +256,7 @@ def uipath_stream(
         stream_type: Literal["text", "bytes", "lines", "raw"] = "lines",
         raise_status_error: bool = False,
         **kwargs: Any,
-    ) -> Iterator[str | bytes]:
+    ) -> Generator[str | bytes, None, None]:
         """Make a synchronous streaming HTTP request to the UiPath API.
 
         Args:
@@ -251,6 +268,7 @@ def uipath_stream(
                 - "bytes": Yield raw byte chunks
                 - "lines": Yield complete lines (default, best for SSE)
                 - "raw": Yield raw response data
+            raise_status_error: If True, raises UiPathAPIError on non-2xx responses.
             **kwargs: Additional arguments passed to httpx.Client.stream().
 
         Yields:
@@ -282,7 +300,7 @@ async def uipath_astream(
         stream_type: Literal["text", "bytes", "lines", "raw"] = "lines",
         raise_status_error: bool = False,
         **kwargs: Any,
-    ) -> AsyncIterator[str | bytes]:
+    ) -> AsyncGenerator[str | bytes, None]:
         """Make an asynchronous streaming HTTP request to the UiPath API.
 
         Args:
@@ -294,6 +312,7 @@ async def uipath_astream(
                 - "bytes": Yield raw byte chunks
                 - "lines": Yield complete lines (default, best for SSE)
                 - "raw": Yield raw response data
+            raise_status_error: If True, raises UiPathAPIError on non-2xx responses.
             **kwargs: Additional arguments passed to httpx.AsyncClient.stream().
 
         Yields:
@@ -393,7 +412,7 @@ def _stream(
         stop: list[str] | None = None,
         run_manager: CallbackManagerForLLMRun | None = None,
         **kwargs: Any,
-    ) -> Iterator[ChatGenerationChunk]:
+    ) -> Generator[ChatGenerationChunk, None, None]:
         set_captured_response_headers({})
         try:
             first = True
@@ -413,7 +432,7 @@ def _uipath_stream(
         stop: list[str] | None = None,
         run_manager: CallbackManagerForLLMRun | None = None,
         **kwargs: Any,
-    ) -> Iterator[ChatGenerationChunk]:
+    ) -> Generator[ChatGenerationChunk, None, None]:
         """Override in subclasses to provide the core (non-wrapped) stream logic."""
         yield from super()._stream(messages, stop=stop, run_manager=run_manager, **kwargs)
 
@@ -423,7 +442,7 @@ async def _astream(
         stop: list[str] | None = None,
         run_manager: AsyncCallbackManagerForLLMRun | None = None,
         **kwargs: Any,
-    ) -> AsyncIterator[ChatGenerationChunk]:
+    ) -> AsyncGenerator[ChatGenerationChunk, None]:
         set_captured_response_headers({})
         try:
             first = True
@@ -443,7 +462,7 @@ async def _uipath_astream(
         stop: list[str] | None = None,
         run_manager: AsyncCallbackManagerForLLMRun | None = None,
         **kwargs: Any,
-    ) -> AsyncIterator[ChatGenerationChunk]:
+    ) -> AsyncGenerator[ChatGenerationChunk, None]:
         """Override in subclasses to provide the core (non-wrapped) async stream logic."""
         async for chunk in super()._astream(messages, stop=stop, run_manager=run_manager, **kwargs):
             yield chunk
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/callbacks.py b/packages/uipath_langchain_client/src/uipath_langchain_client/callbacks.py
index af99ac1..431b49c 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/callbacks.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/callbacks.py
@@ -1,6 +1,6 @@
 """LangChain callbacks for dynamic per-request header injection."""
 
-from abc import abstractmethod
+from abc import ABC, abstractmethod
 from typing import Any
 
 from langchain_core.callbacks import BaseCallbackHandler
@@ -8,7 +8,7 @@
 from uipath.llm_client.utils.headers import set_dynamic_request_headers
 
 
-class UiPathDynamicHeadersCallback(BaseCallbackHandler):
+class UiPathDynamicHeadersCallback(BaseCallbackHandler, ABC):
     """Base callback for injecting dynamic headers into each LLM gateway request.
 
     Extend this class and implement ``get_headers()`` to return the headers to
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/anthropic/chat_models.py b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/anthropic/chat_models.py
index 52633df..26b87a2 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/anthropic/chat_models.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/anthropic/chat_models.py
@@ -45,15 +45,17 @@ class UiPathChatAnthropic(UiPathBaseChatModel, ChatAnthropic):
     def setup_api_flavor_and_version(self) -> Self:
         self.api_config.vendor_type = self.vendor_type
         match self.vendor_type:
+            case VendorType.ANTHROPIC:
+                self.api_config.api_flavor = None
+            case VendorType.AZURE:
+                self.api_config.api_flavor = None
             case VendorType.VERTEXAI:
                 self.api_config.api_flavor = ApiFlavor.ANTHROPIC_CLAUDE
                 self.api_config.api_version = "v1beta1"
             case VendorType.AWSBEDROCK:
                 self.api_config.api_flavor = ApiFlavor.INVOKE
             case _:
-                raise ValueError(
-                    "anthropic and azure vendors are currently not supported by UiPath"
-                )
+                raise ValueError(f"Unsupported vendor_type: {self.vendor_type}")
         return self
 
     # Override fields to avoid typing issues and fix stuff
@@ -150,13 +152,13 @@ def _async_anthropic_client(
                 raise ValueError("Anthropic models are currently not hosted on any other provider")
 
     @override
-    def _create(self, payload: dict) -> Any:
+    def _create(self, payload: dict[str, Any]) -> Any:
         if "betas" in payload:
             return self._anthropic_client.beta.messages.create(**payload)
         return self._anthropic_client.messages.create(**payload)
 
     @override
-    async def _acreate(self, payload: dict) -> Any:
+    async def _acreate(self, payload: dict[str, Any]) -> Any:
         if "betas" in payload:
             return await self._async_anthropic_client.beta.messages.create(**payload)
         return await self._async_anthropic_client.messages.create(**payload)
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/azure/chat_models.py b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/azure/chat_models.py
index 3612a09..7af831f 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/azure/chat_models.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/azure/chat_models.py
@@ -1,11 +1,11 @@
 from typing import Self
 
-from httpx import URL, Request
+from httpx import Request
 from pydantic import Field, model_validator
 
 from uipath_langchain_client.base_client import UiPathBaseChatModel
+from uipath_langchain_client.clients.openai.utils import fix_url_and_api_flavor_header
 from uipath_langchain_client.settings import (
-    ApiFlavor,
     ApiType,
     RoutingMode,
     UiPathAPIConfig,
@@ -42,24 +42,14 @@ class UiPathAzureAIChatCompletionsModel(UiPathBaseChatModel, AzureAIOpenAIApiCha
     def setup_uipath_client(self) -> Self:
         base_url = str(self.uipath_sync_client.base_url).rstrip("/")
 
-        def fix_url_and_api_flavor_header(request: Request):
-            url_suffix = str(request.url).split(base_url)[-1]
-            if "responses" in url_suffix:
-                request.headers["X-UiPath-LlmGateway-ApiFlavor"] = ApiFlavor.RESPONSES.value
-            else:
-                request.headers["X-UiPath-LlmGateway-ApiFlavor"] = ApiFlavor.CHAT_COMPLETIONS.value
-            request.url = URL(base_url)
+        def on_request(request: Request) -> None:
+            fix_url_and_api_flavor_header(base_url, request)
 
-        async def fix_url_and_api_flavor_header_async(request: Request):
-            url_suffix = str(request.url).split(base_url)[-1]
-            if "responses" in url_suffix:
-                request.headers["X-UiPath-LlmGateway-ApiFlavor"] = ApiFlavor.RESPONSES.value
-            else:
-                request.headers["X-UiPath-LlmGateway-ApiFlavor"] = ApiFlavor.CHAT_COMPLETIONS.value
-            request.url = URL(base_url)
+        async def on_request_async(request: Request) -> None:
+            fix_url_and_api_flavor_header(base_url, request)
 
-        self.uipath_sync_client.event_hooks["request"].append(fix_url_and_api_flavor_header)
-        self.uipath_async_client.event_hooks["request"].append(fix_url_and_api_flavor_header_async)
+        self.uipath_sync_client.event_hooks["request"].append(on_request)
+        self.uipath_async_client.event_hooks["request"].append(on_request_async)
 
         self.root_client = OpenAI(
             api_key="PLACEHOLDER",
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/bedrock/chat_models.py b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/bedrock/chat_models.py
index f2349b0..8993522 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/bedrock/chat_models.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/bedrock/chat_models.py
@@ -48,7 +48,7 @@ def _patched_format_data_content_block(block: dict) -> dict:
     ) from e
 
 
-class UiPathChatBedrockConverse(UiPathBaseChatModel, ChatBedrockConverse):
+class UiPathChatBedrockConverse(UiPathBaseChatModel, ChatBedrockConverse):  # type: ignore[override]
     api_config: UiPathAPIConfig = UiPathAPIConfig(
         api_type=ApiType.COMPLETIONS,
         routing_mode=RoutingMode.PASSTHROUGH,
@@ -77,7 +77,7 @@ def setup_uipath_client(self) -> Self:
         return self
 
 
-class UiPathChatBedrock(UiPathBaseChatModel, ChatBedrock):
+class UiPathChatBedrock(UiPathBaseChatModel, ChatBedrock):  # type: ignore[override]
     api_config: UiPathAPIConfig = UiPathAPIConfig(
         api_type=ApiType.COMPLETIONS,
         routing_mode=RoutingMode.PASSTHROUGH,
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/bedrock/utils.py b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/bedrock/utils.py
index a56f371..232c085 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/bedrock/utils.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/bedrock/utils.py
@@ -1,6 +1,7 @@
 import base64
 import json
-from typing import Any, Iterator
+from collections.abc import Generator
+from typing import Any
 
 from httpx import Client
 
@@ -50,7 +51,9 @@ def __init__(self, httpx_client: Client | None = None, region_name: str = "PLACE
         self.httpx_client = httpx_client
         self.meta = _MockClientMeta(region_name=region_name)
 
-    def _stream_generator(self, request_body: dict[str, Any]) -> Iterator[dict[str, Any]]:
+    def _stream_generator(
+        self, request_body: dict[str, Any]
+    ) -> Generator[dict[str, Any], None, None]:
         if self.httpx_client is None:
             raise ValueError("httpx_client is not set")
         with self.httpx_client.stream("POST", "/", json=_serialize_bytes(request_body)) as response:
@@ -71,15 +74,19 @@ def invoke_model(self, **kwargs: Any) -> Any:
         return {
             "body": self.httpx_client.post(
                 "/",
-                json=json.loads(kwargs.get("body", {})),
+                json=json.loads(kwargs.get("body", "{}")),
             )
         }
 
     def invoke_model_with_response_stream(self, **kwargs: Any) -> Any:
-        return {"body": self._stream_generator(json.loads(kwargs.get("body", {})))}
+        return {"body": self._stream_generator(json.loads(kwargs.get("body", "{}")))}
 
     def converse(
-        self, *, messages: list[dict[str, Any]], system: str | None = None, **params: Any
+        self,
+        *,
+        messages: list[dict[str, Any]],
+        system: list[dict[str, Any]] | None = None,
+        **params: Any,
     ) -> Any:
         if self.httpx_client is None:
             raise ValueError("httpx_client is not set")
@@ -95,7 +102,11 @@ def converse(
         ).json()
 
     def converse_stream(
-        self, *, messages: list[dict[str, Any]], system: str | None = None, **params: Any
+        self,
+        *,
+        messages: list[dict[str, Any]],
+        system: list[dict[str, Any]] | None = None,
+        **params: Any,
     ) -> Any:
         return {
             "stream": self._stream_generator(
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/fireworks/embeddings.py b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/fireworks/embeddings.py
index b4af2bc..161b71d 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/fireworks/embeddings.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/fireworks/embeddings.py
@@ -1,10 +1,9 @@
 from typing import Self
 
-from pydantic import model_validator
+from pydantic import Field, model_validator
 
 from uipath_langchain_client.base_client import UiPathBaseEmbeddings
 from uipath_langchain_client.settings import (
-    ApiFlavor,
     ApiType,
     RoutingMode,
     UiPathAPIConfig,
@@ -26,11 +25,12 @@ class UiPathFireworksEmbeddings(UiPathBaseEmbeddings, FireworksEmbeddings):
         api_type=ApiType.EMBEDDINGS,
         routing_mode=RoutingMode.PASSTHROUGH,
         vendor_type=VendorType.OPENAI,
-        api_flavor=ApiFlavor.CHAT_COMPLETIONS,
         api_version="2025-03-01-preview",
         freeze_base_url=True,
     )
 
+    model: str = Field(default="", alias="model_name")
+
     @model_validator(mode="after")
     def setup_uipath_client(self) -> Self:
         self.client = OpenAI(
@@ -48,7 +48,8 @@ def setup_uipath_client(self) -> Self:
     def embed_documents(self, texts: list[str]) -> list[list[float]]:
         """Embed search docs."""
         return [
-            i.embedding for i in self.client.embeddings.create(input=texts, model=self.model).data
+            i.embedding
+            for i in self.client.embeddings.create(input=texts, model=self.model_name).data
         ]
 
     def embed_query(self, text: str) -> list[float]:
@@ -59,7 +60,9 @@ async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
         """Embed search docs asynchronously."""
         return [
             i.embedding
-            for i in (await self.async_client.embeddings.create(input=texts, model=self.model)).data
+            for i in (
+                await self.async_client.embeddings.create(input=texts, model=self.model_name)
+            ).data
         ]
 
     async def aembed_query(self, text: str) -> list[float]:
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/chat_models.py b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/chat_models.py
index 582181c..47e325b 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/chat_models.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/chat_models.py
@@ -24,7 +24,7 @@
 """
 
 import json
-from collections.abc import AsyncIterator, Callable, Iterator, Sequence
+from collections.abc import AsyncGenerator, Callable, Generator, Sequence
 from typing import Any
 
 from langchain_core.callbacks import (
@@ -155,7 +155,6 @@ def _default_params(self) -> dict[str, Any]:
         }
 
         return {
-            "model": self.model_name,
             **{k: v for k, v in exclude_if_none.items() if v is not None},
             **self.model_kwargs,
         }
@@ -329,9 +328,7 @@ async def _uipath_agenerate(
         response = await self.uipath_arequest(request_body=request_body, raise_status_error=True)
         return self._postprocess_response(response.json())
 
-    def _generate_chunk(
-        self, original_message: str, json_data: dict[str, Any]
-    ) -> ChatGenerationChunk:
+    def _generate_chunk(self, json_data: dict[str, Any]) -> ChatGenerationChunk:
         generation_info = {
             "id": json_data.get("id"),
             "created": json_data.get("created", ""),
@@ -377,10 +374,10 @@ def _generate_chunk(
                 )
 
         return ChatGenerationChunk(
-            text=original_message,
+            text=content or "",
             generation_info=generation_info,
             message=AIMessageChunk(
-                content=content,
+                content=content or "",
                 usage_metadata=usage_metadata,
                 tool_call_chunks=tool_call_chunks,
             ),
@@ -392,21 +389,22 @@ def _uipath_stream(
         stop: list[str] | None = None,
         run_manager: CallbackManagerForLLMRun | None = None,
         **kwargs: Any,
-    ) -> Iterator[ChatGenerationChunk]:
+    ) -> Generator[ChatGenerationChunk, None, None]:
         request_body = self._preprocess_request(messages, stop=stop, **kwargs)
+        request_body["stream"] = True
         for chunk in self.uipath_stream(
             request_body=request_body, stream_type="lines", raise_status_error=True
         ):
             chunk = str(chunk)
             if chunk.startswith("data:"):
-                chunk = chunk.split("data:")[1].strip()
+                chunk = chunk[len("data:") :].strip()
             try:
                 json_data = json.loads(chunk)
             except json.JSONDecodeError:
                 continue
             if "id" in json_data and not json_data["id"]:
                 continue
-            yield self._generate_chunk(chunk, json_data)
+            yield self._generate_chunk(json_data)
 
     async def _uipath_astream(
         self,
@@ -414,18 +412,19 @@ async def _uipath_astream(
         stop: list[str] | None = None,
         run_manager: AsyncCallbackManagerForLLMRun | None = None,
         **kwargs: Any,
-    ) -> AsyncIterator[ChatGenerationChunk]:
+    ) -> AsyncGenerator[ChatGenerationChunk, None]:
         request_body = self._preprocess_request(messages, stop=stop, **kwargs)
+        request_body["stream"] = True
         async for chunk in self.uipath_astream(
             request_body=request_body, stream_type="lines", raise_status_error=True
         ):
             chunk = str(chunk)
             if chunk.startswith("data:"):
-                chunk = chunk.split("data:")[1].strip()
+                chunk = chunk[len("data:") :].strip()
             try:
                 json_data = json.loads(chunk)
             except json.JSONDecodeError:
                 continue
             if "id" in json_data and not json_data["id"]:
                 continue
-            yield self._generate_chunk(chunk, json_data)
+            yield self._generate_chunk(json_data)
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/embeddings.py b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/embeddings.py
index 86498a4..e77f1d6 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/embeddings.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/embeddings.py
@@ -1,10 +1,8 @@
-from langchain_core.embeddings import Embeddings
-
 from uipath_langchain_client.base_client import UiPathBaseEmbeddings
 from uipath_langchain_client.settings import ApiType, RoutingMode, UiPathAPIConfig
 
 
-class UiPathEmbeddings(UiPathBaseEmbeddings, Embeddings):
+class UiPathEmbeddings(UiPathBaseEmbeddings):
     """LangChain embeddings using the UiPath's normalized embeddings API.
 
     Provides a consistent interface for generating text embeddings across all
@@ -18,14 +16,20 @@ class UiPathEmbeddings(UiPathBaseEmbeddings, Embeddings):
     )
 
     def embed_documents(self, texts: list[str]) -> list[list[float]]:
-        response = self.uipath_request(request_body={"input": texts})
+        response = self.uipath_request(
+            request_body={"input": texts},
+            raise_status_error=True,
+        )
         return [r["embedding"] for r in response.json()["data"]]
 
     def embed_query(self, text: str) -> list[float]:
         return self.embed_documents([text])[0]
 
     async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
-        response = await self.uipath_arequest(request_body={"input": texts})
+        response = await self.uipath_arequest(
+            request_body={"input": texts},
+            raise_status_error=True,
+        )
         return [r["embedding"] for r in response.json()["data"]]
 
     async def aembed_query(self, text: str) -> list[float]:
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/openai/chat_models.py b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/openai/chat_models.py
index b448867..4278c1f 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/openai/chat_models.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/openai/chat_models.py
@@ -1,12 +1,12 @@
 from collections.abc import Awaitable, Callable
 from typing import Self
 
-from httpx import URL, Request
+from httpx import Request
 from pydantic import Field, SecretStr, model_validator
 
 from uipath_langchain_client.base_client import UiPathBaseChatModel
+from uipath_langchain_client.clients.openai.utils import fix_url_and_api_flavor_header
 from uipath_langchain_client.settings import (
-    ApiFlavor,
     ApiType,
     RoutingMode,
     UiPathAPIConfig,
@@ -41,24 +41,14 @@ class UiPathChatOpenAI(UiPathBaseChatModel, ChatOpenAI):  # type: ignore[overrid
     def setup_uipath_client(self) -> Self:
         base_url = str(self.uipath_sync_client.base_url).rstrip("/")
 
-        def fix_url_and_api_flavor_header(request: Request):
-            url_suffix = str(request.url).split(base_url)[-1]
-            if "responses" in url_suffix:
-                request.headers["X-UiPath-LlmGateway-ApiFlavor"] = ApiFlavor.RESPONSES.value
-            else:
-                request.headers["X-UiPath-LlmGateway-ApiFlavor"] = ApiFlavor.CHAT_COMPLETIONS.value
-            request.url = URL(base_url)
-
-        async def fix_url_and_api_flavor_header_async(request: Request):
-            url_suffix = str(request.url).split(base_url)[-1]
-            if "responses" in url_suffix:
-                request.headers["X-UiPath-LlmGateway-ApiFlavor"] = ApiFlavor.RESPONSES.value
-            else:
-                request.headers["X-UiPath-LlmGateway-ApiFlavor"] = ApiFlavor.CHAT_COMPLETIONS.value
-            request.url = URL(base_url)
-
-        self.uipath_sync_client.event_hooks["request"].append(fix_url_and_api_flavor_header)
-        self.uipath_async_client.event_hooks["request"].append(fix_url_and_api_flavor_header_async)
+        def on_request(request: Request) -> None:
+            fix_url_and_api_flavor_header(base_url, request)
+
+        async def on_request_async(request: Request) -> None:
+            fix_url_and_api_flavor_header(base_url, request)
+
+        self.uipath_sync_client.event_hooks["request"].append(on_request)
+        self.uipath_async_client.event_hooks["request"].append(on_request_async)
 
         self.root_client = OpenAI(
             api_key="PLACEHOLDER",
@@ -95,24 +85,14 @@ class UiPathAzureChatOpenAI(UiPathBaseChatModel, AzureChatOpenAI):  # type: igno
     def setup_uipath_client(self) -> Self:
         base_url = str(self.uipath_sync_client.base_url).rstrip("/")
 
-        def fix_url_and_api_flavor_header(request: Request):
-            url_suffix = str(request.url).split(base_url)[-1]
-            if "responses" in url_suffix:
-                request.headers["X-UiPath-LlmGateway-ApiFlavor"] = ApiFlavor.RESPONSES.value
-            else:
-                request.headers["X-UiPath-LlmGateway-ApiFlavor"] = ApiFlavor.CHAT_COMPLETIONS.value
-            request.url = URL(base_url)
-
-        async def fix_url_and_api_flavor_header_async(request: Request):
-            url_suffix = str(request.url).split(base_url)[-1]
-            if "responses" in url_suffix:
-                request.headers["X-UiPath-LlmGateway-ApiFlavor"] = ApiFlavor.RESPONSES.value
-            else:
-                request.headers["X-UiPath-LlmGateway-ApiFlavor"] = ApiFlavor.CHAT_COMPLETIONS.value
-            request.url = URL(base_url)
-
-        self.uipath_sync_client.event_hooks["request"].append(fix_url_and_api_flavor_header)
-        self.uipath_async_client.event_hooks["request"].append(fix_url_and_api_flavor_header_async)
+        def on_request(request: Request) -> None:
+            fix_url_and_api_flavor_header(base_url, request)
+
+        async def on_request_async(request: Request) -> None:
+            fix_url_and_api_flavor_header(base_url, request)
+
+        self.uipath_sync_client.event_hooks["request"].append(on_request)
+        self.uipath_async_client.event_hooks["request"].append(on_request_async)
 
         self.root_client = AzureOpenAI(
             azure_endpoint="PLACEHOLDER",
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/openai/utils.py b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/openai/utils.py
new file mode 100644
index 0000000..03ecbbb
--- /dev/null
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/openai/utils.py
@@ -0,0 +1,26 @@
+"""Shared utilities for UiPath LangChain provider clients."""
+
+from httpx import URL, Request
+
+from uipath_langchain_client.settings import ApiFlavor
+
+
+def fix_url_and_api_flavor_header(base_url: str, request: Request) -> None:
+    """Detect API flavor from URL suffix and rewrite the URL to the base gateway URL.
+
+    Inspects the outgoing request URL to determine whether it targets the
+    OpenAI *responses* or *chat completions* endpoint and sets the
+    ``X-UiPath-LlmGateway-ApiFlavor`` header accordingly.  The request URL
+    is then collapsed back to *base_url* so that the gateway receives a
+    clean path.
+
+    Args:
+        base_url: The UiPath gateway base URL to rewrite the request to.
+        request: The outgoing httpx request (mutated in place).
+    """
+    url_suffix = str(request.url).split(base_url)[-1]
+    if "responses" in url_suffix:
+        request.headers["X-UiPath-LlmGateway-ApiFlavor"] = ApiFlavor.RESPONSES.value
+    else:
+        request.headers["X-UiPath-LlmGateway-ApiFlavor"] = ApiFlavor.CHAT_COMPLETIONS.value
+    request.url = URL(base_url)
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/vertexai/chat_models.py b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/vertexai/chat_models.py
index 1d3d242..d6f1db6 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/vertexai/chat_models.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/vertexai/chat_models.py
@@ -41,7 +41,7 @@ def setup_uipath_client(self) -> Self:
             project_id="PLACEHOLDER",
             access_token="PLACEHOLDER",
             base_url=str(self.uipath_sync_client.base_url),
-            default_headers=self.uipath_sync_client.headers,
+            default_headers=dict(self.uipath_sync_client.headers),
             max_retries=0,  # handled by the UiPath client
             http_client=self.uipath_sync_client,
         )
@@ -50,7 +50,7 @@ def setup_uipath_client(self) -> Self:
             project_id="PLACEHOLDER",
             access_token="PLACEHOLDER",
             base_url=str(self.uipath_async_client.base_url),
-            default_headers=self.uipath_async_client.headers,
+            default_headers=dict(self.uipath_async_client.headers),
             max_retries=0,  # handled by the UiPath client
             http_client=self.uipath_async_client,
         )
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/factory.py b/packages/uipath_langchain_client/src/uipath_langchain_client/factory.py
index 9a5804b..4e98d63 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/factory.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/factory.py
@@ -27,7 +27,7 @@
     UiPathBaseEmbeddings,
 )
 from uipath_langchain_client.settings import (
-    _API_FLAVOR_TO_VENDOR_TYPE,
+    API_FLAVOR_TO_VENDOR_TYPE,
     ApiFlavor,
     RoutingMode,
     UiPathBaseSettings,
@@ -73,7 +73,7 @@ def _get_model_info(
 
     if not matching_models:
         raise ValueError(
-            f"Model {model_name} not found in available models the available models are: {[m['modelName'] for m in available_models]}"
+            f"Model {model_name} not found. Available models are: {[m['modelName'] for m in available_models]}"
         )
 
     return matching_models[0]
@@ -143,7 +143,7 @@ def get_chat_model(
     discovered_vendor_type = model_info.get("vendor", None)
     discovered_api_flavor = model_info.get("apiFlavor", None)
     if discovered_vendor_type is None and discovered_api_flavor is not None:
-        discovered_vendor_type = _API_FLAVOR_TO_VENDOR_TYPE.get(discovered_api_flavor, None)
+        discovered_vendor_type = API_FLAVOR_TO_VENDOR_TYPE.get(discovered_api_flavor, None)
     if discovered_vendor_type is None:
         raise ValueError("No vendor type or api flavor found in model info")
     discovered_vendor_type = discovered_vendor_type.lower()
@@ -298,7 +298,17 @@ def get_embedding_model(
             **model_kwargs,
         )
 
-    discovered_vendor_type = model_info["vendor"].lower()
+    discovered_vendor_type = model_info.get("vendor")
+    if discovered_vendor_type is None:
+        discovered_api_flavor = model_info.get("apiFlavor")
+        if discovered_api_flavor is not None:
+            discovered_vendor_type = API_FLAVOR_TO_VENDOR_TYPE.get(discovered_api_flavor)
+    if discovered_vendor_type is None:
+        raise ValueError(
+            f"No vendor type found in model info for embedding model '{model_name}'. "
+            f"Model info returned: {model_info}"
+        )
+    discovered_vendor_type = discovered_vendor_type.lower()
     match discovered_vendor_type:
         case VendorType.OPENAI:
             if is_uipath_owned:
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/settings.py b/packages/uipath_langchain_client/src/uipath_langchain_client/settings.py
index e8fc87a..32c9a4b 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/settings.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/settings.py
@@ -23,7 +23,7 @@
     get_default_client_settings,
 )
 from uipath.llm_client.settings.constants import (
-    _API_FLAVOR_TO_VENDOR_TYPE,
+    API_FLAVOR_TO_VENDOR_TYPE,
     ApiFlavor,
     ApiType,
     RoutingMode,
@@ -40,5 +40,5 @@
     "RoutingMode",
     "ApiFlavor",
     "VendorType",
-    "_API_FLAVOR_TO_VENDOR_TYPE",
+    "API_FLAVOR_TO_VENDOR_TYPE",
 ]
diff --git a/src/uipath/llm_client/__init__.py b/src/uipath/llm_client/__init__.py
index 342ef49..b437cef 100644
--- a/src/uipath/llm_client/__init__.py
+++ b/src/uipath/llm_client/__init__.py
@@ -9,20 +9,22 @@
     - uipath_llamaindex_client: LlamaIndex-compatible models
 
 Quick Start:
-    >>> from uipath.llm_client import UiPathBaseLLMClient, UiPathAPIConfig
-    >>> from uipath.llm_client.settings import get_default_client_settings
+    >>> from uipath.llm_client import UiPathHttpxClient
+    >>> from uipath.llm_client.settings import get_default_client_settings, UiPathAPIConfig
+    >>> from uipath.llm_client.settings.constants import ApiType, RoutingMode
     >>>
     >>> settings = get_default_client_settings()
-    >>> client = UiPathBaseLLMClient(
-    ...     model="gpt-4o-2024-11-20",
-    ...     api_config=UiPathAPIConfig(
-    ...         api_type=ApiType.COMPLETIONS,
-    ...         routing_mode=RoutingMode.PASSTHROUGH,
-    ...         vendor_type="openai",
-    ...     ),
-    ...     settings=settings,
+    >>> api_config = UiPathAPIConfig(
+    ...     api_type=ApiType.COMPLETIONS,
+    ...     routing_mode=RoutingMode.PASSTHROUGH,
+    ...     vendor_type="openai",
+    ... )
+    >>> client = UiPathHttpxClient(
+    ...     model_name="gpt-4o-2024-11-20",
+    ...     api_config=api_config,
+    ...     base_url=settings.build_base_url(model_name="gpt-4o-2024-11-20", api_config=api_config),
+    ...     auth=settings.build_auth_pipeline(),
     ... )
-    >>> response = client.uipath_request(request_body={...})
 """
 
 from uipath.llm_client.__version__ import __version__
diff --git a/src/uipath/llm_client/__version__.py b/src/uipath/llm_client/__version__.py
index 7da9182..c5bf7a8 100644
--- a/src/uipath/llm_client/__version__.py
+++ b/src/uipath/llm_client/__version__.py
@@ -1,3 +1,3 @@
 __title__ = "UiPath LLM Client"
 __description__ = "A Python client for interacting with UiPath's LLM services."
-__version__ = "1.5.10"
+__version__ = "1.6.0"
diff --git a/src/uipath/llm_client/clients/anthropic/client.py b/src/uipath/llm_client/clients/anthropic/client.py
index 1c5cdfc..32d900c 100644
--- a/src/uipath/llm_client/clients/anthropic/client.py
+++ b/src/uipath/llm_client/clients/anthropic/client.py
@@ -15,9 +15,9 @@
 """
 
 import logging
-from typing import Any
+from collections.abc import Mapping, Sequence
 
-from uipath.llm_client.httpx_client import UiPathHttpxAsyncClient, UiPathHttpxClient
+from uipath.llm_client.clients.utils import build_httpx_async_client, build_httpx_client
 from uipath.llm_client.settings import (
     UiPathAPIConfig,
     UiPathBaseSettings,
@@ -57,16 +57,16 @@ def _build_api_config(vendor_type: str | VendorType = VendorType.ANTHROPIC) -> U
 class UiPathAnthropic(Anthropic):
     """Anthropic client routed through UiPath LLM Gateway.
 
-    Wraps the standard Anthropic client to route requests through UiPath's
-    LLM Gateway while preserving the full Anthropic SDK interface.
-
     Args:
         model_name: The Anthropic model name (e.g., "claude-3-5-sonnet-20241022").
         byo_connection_id: Bring Your Own connection ID for custom deployments.
         client_settings: UiPath client settings. Defaults to environment-based settings.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum retry attempts for failed requests.
+        default_headers: Additional headers to include in requests.
+        captured_headers: Response header prefixes to capture (case-insensitive).
         retry_config: Custom retry configuration.
         logger: Logger instance for request/response logging.
-        **kwargs: Additional arguments passed to Anthropic client.
     """
 
     def __init__(
@@ -75,48 +75,45 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        timeout: float | None = None,
+        max_retries: int | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
-        api_config = _build_api_config()
-        httpx_client = UiPathHttpxClient(
-            model_name=model_name,
-            byo_connection_id=byo_connection_id,
-            api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
-            retry_config=retry_config,
-            base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
-            headers={
-                **kwargs.pop("default_headers", {}),
-                **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
-            },
-            logger=logger,
-            auth=client_settings.build_auth_pipeline(),
-        )
         super().__init__(
             api_key="PLACEHOLDER",
             max_retries=0,
-            http_client=httpx_client,
-            **kwargs,
+            http_client=build_httpx_client(
+                model_name=model_name,
+                byo_connection_id=byo_connection_id,
+                api_config=_build_api_config(),
+                client_settings=client_settings,
+                timeout=timeout,
+                max_retries=max_retries,
+                default_headers=default_headers,
+                captured_headers=captured_headers,
+                retry_config=retry_config,
+                logger=logger,
+            ),
         )
 
 
 class UiPathAsyncAnthropic(AsyncAnthropic):
     """Async Anthropic client routed through UiPath LLM Gateway.
 
-    Wraps the standard AsyncAnthropic client to route requests through UiPath's
-    LLM Gateway while preserving the full Anthropic SDK interface.
-
     Args:
         model_name: The Anthropic model name (e.g., "claude-3-5-sonnet-20241022").
         byo_connection_id: Bring Your Own connection ID for custom deployments.
         client_settings: UiPath client settings. Defaults to environment-based settings.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum retry attempts for failed requests.
+        default_headers: Additional headers to include in requests.
+        captured_headers: Response header prefixes to capture (case-insensitive).
         retry_config: Custom retry configuration.
         logger: Logger instance for request/response logging.
-        **kwargs: Additional arguments passed to AsyncAnthropic client.
     """
 
     def __init__(
@@ -125,48 +122,45 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        timeout: float | None = None,
+        max_retries: int | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
-        api_config = _build_api_config()
-        httpx_client = UiPathHttpxAsyncClient(
-            model_name=model_name,
-            byo_connection_id=byo_connection_id,
-            api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
-            retry_config=retry_config,
-            base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
-            headers={
-                **kwargs.pop("default_headers", {}),
-                **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
-            },
-            logger=logger,
-            auth=client_settings.build_auth_pipeline(),
-        )
         super().__init__(
             api_key="PLACEHOLDER",
             max_retries=0,
-            http_client=httpx_client,
-            **kwargs,
+            http_client=build_httpx_async_client(
+                model_name=model_name,
+                byo_connection_id=byo_connection_id,
+                api_config=_build_api_config(),
+                client_settings=client_settings,
+                timeout=timeout,
+                max_retries=max_retries,
+                default_headers=default_headers,
+                captured_headers=captured_headers,
+                retry_config=retry_config,
+                logger=logger,
+            ),
         )
 
 
 class UiPathAnthropicBedrock(AnthropicBedrock):
     """Anthropic Bedrock client routed through UiPath LLM Gateway.
 
-    Wraps the AnthropicBedrock client to route requests through UiPath's
-    LLM Gateway while preserving the full Anthropic SDK interface.
-
     Args:
         model_name: The Anthropic model name.
         byo_connection_id: Bring Your Own connection ID for custom deployments.
         client_settings: UiPath client settings. Defaults to environment-based settings.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum retry attempts for failed requests.
+        default_headers: Additional headers to include in requests.
+        captured_headers: Response header prefixes to capture (case-insensitive).
         retry_config: Custom retry configuration.
         logger: Logger instance for request/response logging.
-        **kwargs: Additional arguments passed to AnthropicBedrock client.
     """
 
     def __init__(
@@ -175,50 +169,47 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        timeout: float | None = None,
+        max_retries: int | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
-        api_config = _build_api_config(vendor_type=VendorType.AWSBEDROCK)
-        httpx_client = UiPathHttpxClient(
-            model_name=model_name,
-            byo_connection_id=byo_connection_id,
-            api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
-            retry_config=retry_config,
-            base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
-            headers={
-                **kwargs.pop("default_headers", {}),
-                **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
-            },
-            logger=logger,
-            auth=client_settings.build_auth_pipeline(),
-        )
         super().__init__(
             aws_access_key="PLACEHOLDER",
             aws_secret_key="PLACEHOLDER",
             aws_region="PLACEHOLDER",
             max_retries=0,
-            http_client=httpx_client,
-            **kwargs,
+            http_client=build_httpx_client(
+                model_name=model_name,
+                byo_connection_id=byo_connection_id,
+                api_config=_build_api_config(vendor_type=VendorType.AWSBEDROCK),
+                client_settings=client_settings,
+                timeout=timeout,
+                max_retries=max_retries,
+                default_headers=default_headers,
+                captured_headers=captured_headers,
+                retry_config=retry_config,
+                logger=logger,
+            ),
         )
 
 
 class UiPathAsyncAnthropicBedrock(AsyncAnthropicBedrock):
     """Async Anthropic Bedrock client routed through UiPath LLM Gateway.
 
-    Wraps the AsyncAnthropicBedrock client to route requests through UiPath's
-    LLM Gateway while preserving the full Anthropic SDK interface.
-
     Args:
         model_name: The Anthropic model name.
         byo_connection_id: Bring Your Own connection ID for custom deployments.
         client_settings: UiPath client settings. Defaults to environment-based settings.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum retry attempts for failed requests.
+        default_headers: Additional headers to include in requests.
+        captured_headers: Response header prefixes to capture (case-insensitive).
         retry_config: Custom retry configuration.
         logger: Logger instance for request/response logging.
-        **kwargs: Additional arguments passed to AsyncAnthropicBedrock client.
     """
 
     def __init__(
@@ -227,50 +218,47 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        timeout: float | None = None,
+        max_retries: int | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
-        api_config = _build_api_config(vendor_type=VendorType.AWSBEDROCK)
-        httpx_client = UiPathHttpxAsyncClient(
-            model_name=model_name,
-            byo_connection_id=byo_connection_id,
-            api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
-            retry_config=retry_config,
-            base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
-            headers={
-                **kwargs.pop("default_headers", {}),
-                **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
-            },
-            logger=logger,
-            auth=client_settings.build_auth_pipeline(),
-        )
         super().__init__(
             aws_access_key="PLACEHOLDER",
             aws_secret_key="PLACEHOLDER",
             aws_region="PLACEHOLDER",
             max_retries=0,
-            http_client=httpx_client,
-            **kwargs,
+            http_client=build_httpx_async_client(
+                model_name=model_name,
+                byo_connection_id=byo_connection_id,
+                api_config=_build_api_config(vendor_type=VendorType.AWSBEDROCK),
+                client_settings=client_settings,
+                timeout=timeout,
+                max_retries=max_retries,
+                default_headers=default_headers,
+                captured_headers=captured_headers,
+                retry_config=retry_config,
+                logger=logger,
+            ),
         )
 
 
 class UiPathAnthropicVertex(AnthropicVertex):
     """Anthropic Vertex client routed through UiPath LLM Gateway.
 
-    Wraps the AnthropicVertex client to route requests through UiPath's
-    LLM Gateway while preserving the full Anthropic SDK interface.
-
     Args:
         model_name: The Anthropic model name.
         byo_connection_id: Bring Your Own connection ID for custom deployments.
         client_settings: UiPath client settings. Defaults to environment-based settings.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum retry attempts for failed requests.
+        default_headers: Additional headers to include in requests.
+        captured_headers: Response header prefixes to capture (case-insensitive).
         retry_config: Custom retry configuration.
         logger: Logger instance for request/response logging.
-        **kwargs: Additional arguments passed to AnthropicVertex client.
     """
 
     def __init__(
@@ -279,50 +267,47 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        timeout: float | None = None,
+        max_retries: int | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
-        api_config = _build_api_config(vendor_type=VendorType.VERTEXAI)
-        httpx_client = UiPathHttpxClient(
-            model_name=model_name,
-            byo_connection_id=byo_connection_id,
-            api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
-            retry_config=retry_config,
-            base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
-            headers={
-                **kwargs.pop("default_headers", {}),
-                **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
-            },
-            logger=logger,
-            auth=client_settings.build_auth_pipeline(),
-        )
         super().__init__(
             region="PLACEHOLDER",
             project_id="PLACEHOLDER",
             access_token="PLACEHOLDER",
             max_retries=0,
-            http_client=httpx_client,
-            **kwargs,
+            http_client=build_httpx_client(
+                model_name=model_name,
+                byo_connection_id=byo_connection_id,
+                api_config=_build_api_config(vendor_type=VendorType.VERTEXAI),
+                client_settings=client_settings,
+                timeout=timeout,
+                max_retries=max_retries,
+                default_headers=default_headers,
+                captured_headers=captured_headers,
+                retry_config=retry_config,
+                logger=logger,
+            ),
         )
 
 
 class UiPathAsyncAnthropicVertex(AsyncAnthropicVertex):
     """Async Anthropic Vertex client routed through UiPath LLM Gateway.
 
-    Wraps the AsyncAnthropicVertex client to route requests through UiPath's
-    LLM Gateway while preserving the full Anthropic SDK interface.
-
     Args:
         model_name: The Anthropic model name.
         byo_connection_id: Bring Your Own connection ID for custom deployments.
         client_settings: UiPath client settings. Defaults to environment-based settings.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum retry attempts for failed requests.
+        default_headers: Additional headers to include in requests.
+        captured_headers: Response header prefixes to capture (case-insensitive).
         retry_config: Custom retry configuration.
         logger: Logger instance for request/response logging.
-        **kwargs: Additional arguments passed to AsyncAnthropicVertex client.
     """
 
     def __init__(
@@ -331,50 +316,47 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        timeout: float | None = None,
+        max_retries: int | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
-        api_config = _build_api_config(vendor_type=VendorType.VERTEXAI)
-        httpx_client = UiPathHttpxAsyncClient(
-            model_name=model_name,
-            byo_connection_id=byo_connection_id,
-            api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
-            retry_config=retry_config,
-            base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
-            headers={
-                **kwargs.pop("default_headers", {}),
-                **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
-            },
-            logger=logger,
-            auth=client_settings.build_auth_pipeline(),
-        )
         super().__init__(
             region="PLACEHOLDER",
             project_id="PLACEHOLDER",
             access_token="PLACEHOLDER",
             max_retries=0,
-            http_client=httpx_client,
-            **kwargs,
+            http_client=build_httpx_async_client(
+                model_name=model_name,
+                byo_connection_id=byo_connection_id,
+                api_config=_build_api_config(vendor_type=VendorType.VERTEXAI),
+                client_settings=client_settings,
+                timeout=timeout,
+                max_retries=max_retries,
+                default_headers=default_headers,
+                captured_headers=captured_headers,
+                retry_config=retry_config,
+                logger=logger,
+            ),
         )
 
 
 class UiPathAnthropicFoundry(AnthropicFoundry):
     """Anthropic Foundry (Azure) client routed through UiPath LLM Gateway.
 
-    Wraps the AnthropicFoundry client to route requests through UiPath's
-    LLM Gateway while preserving the full Anthropic SDK interface.
-
     Args:
         model_name: The Anthropic model name.
         byo_connection_id: Bring Your Own connection ID for custom deployments.
         client_settings: UiPath client settings. Defaults to environment-based settings.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum retry attempts for failed requests.
+        default_headers: Additional headers to include in requests.
+        captured_headers: Response header prefixes to capture (case-insensitive).
         retry_config: Custom retry configuration.
         logger: Logger instance for request/response logging.
-        **kwargs: Additional arguments passed to AnthropicFoundry client.
     """
 
     def __init__(
@@ -383,48 +365,45 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        timeout: float | None = None,
+        max_retries: int | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
-        api_config = _build_api_config(vendor_type=VendorType.AZURE)
-        httpx_client = UiPathHttpxClient(
-            model_name=model_name,
-            byo_connection_id=byo_connection_id,
-            api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
-            retry_config=retry_config,
-            base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
-            headers={
-                **kwargs.pop("default_headers", {}),
-                **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
-            },
-            logger=logger,
-            auth=client_settings.build_auth_pipeline(),
-        )
         super().__init__(
             api_key="PLACEHOLDER",
             max_retries=0,
-            http_client=httpx_client,
-            **kwargs,
+            http_client=build_httpx_client(
+                model_name=model_name,
+                byo_connection_id=byo_connection_id,
+                api_config=_build_api_config(vendor_type=VendorType.AZURE),
+                client_settings=client_settings,
+                timeout=timeout,
+                max_retries=max_retries,
+                default_headers=default_headers,
+                captured_headers=captured_headers,
+                retry_config=retry_config,
+                logger=logger,
+            ),
         )
 
 
 class UiPathAsyncAnthropicFoundry(AsyncAnthropicFoundry):
     """Async Anthropic Foundry (Azure) client routed through UiPath LLM Gateway.
 
-    Wraps the AsyncAnthropicFoundry client to route requests through UiPath's
-    LLM Gateway while preserving the full Anthropic SDK interface.
-
     Args:
         model_name: The Anthropic model name.
         byo_connection_id: Bring Your Own connection ID for custom deployments.
         client_settings: UiPath client settings. Defaults to environment-based settings.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum retry attempts for failed requests.
+        default_headers: Additional headers to include in requests.
+        captured_headers: Response header prefixes to capture (case-insensitive).
         retry_config: Custom retry configuration.
         logger: Logger instance for request/response logging.
-        **kwargs: Additional arguments passed to AsyncAnthropicFoundry client.
     """
 
     def __init__(
@@ -433,30 +412,27 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        timeout: float | None = None,
+        max_retries: int | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
-        api_config = _build_api_config(vendor_type=VendorType.AZURE)
-        httpx_client = UiPathHttpxAsyncClient(
-            model_name=model_name,
-            byo_connection_id=byo_connection_id,
-            api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
-            retry_config=retry_config,
-            base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
-            headers={
-                **kwargs.pop("default_headers", {}),
-                **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
-            },
-            logger=logger,
-            auth=client_settings.build_auth_pipeline(),
-        )
         super().__init__(
             api_key="PLACEHOLDER",
             max_retries=0,
-            http_client=httpx_client,
-            **kwargs,
+            http_client=build_httpx_async_client(
+                model_name=model_name,
+                byo_connection_id=byo_connection_id,
+                api_config=_build_api_config(vendor_type=VendorType.AZURE),
+                client_settings=client_settings,
+                timeout=timeout,
+                max_retries=max_retries,
+                default_headers=default_headers,
+                captured_headers=captured_headers,
+                retry_config=retry_config,
+                logger=logger,
+            ),
         )
diff --git a/src/uipath/llm_client/clients/google/client.py b/src/uipath/llm_client/clients/google/client.py
index 6e1ae88..d78d371 100644
--- a/src/uipath/llm_client/clients/google/client.py
+++ b/src/uipath/llm_client/clients/google/client.py
@@ -1,7 +1,7 @@
 import logging
-from typing import Any
+from collections.abc import Mapping, Sequence
 
-from uipath.llm_client.httpx_client import UiPathHttpxAsyncClient, UiPathHttpxClient
+from uipath.llm_client.clients.utils import build_httpx_async_client, build_httpx_client
 from uipath.llm_client.settings import (
     UiPathAPIConfig,
     UiPathBaseSettings,
@@ -21,15 +21,32 @@
 
 
 class UiPathGoogle(Client):
+    """Google GenAI client routed through UiPath LLM Gateway.
+
+    Args:
+        model_name: The Google model name (e.g., "gemini-2.5-flash").
+        byo_connection_id: Bring Your Own connection ID for custom deployments.
+        client_settings: UiPath client settings. Defaults to environment-based settings.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum retry attempts for failed requests.
+        default_headers: Additional headers to include in requests.
+        captured_headers: Response header prefixes to capture (case-insensitive).
+        retry_config: Custom retry configuration.
+        logger: Logger instance for request/response logging.
+    """
+
     def __init__(
         self,
         *,
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        timeout: float | None = None,
+        max_retries: int | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
         api_config = UiPathAPIConfig(
@@ -40,35 +57,29 @@ def __init__(
             api_version="v1beta1",
             freeze_base_url=True,
         )
-        httpx_client = UiPathHttpxClient(
+        httpx_client = build_httpx_client(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
             api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
+            client_settings=client_settings,
+            timeout=timeout,
+            max_retries=max_retries,
+            default_headers=default_headers,
+            captured_headers=captured_headers,
             retry_config=retry_config,
-            base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
-            headers={
-                **kwargs.pop("default_headers", {}),
-                **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
-            },
             logger=logger,
-            auth=client_settings.build_auth_pipeline(),
         )
-        httpx_async_client = UiPathHttpxAsyncClient(
+        httpx_async_client = build_httpx_async_client(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
             api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
+            client_settings=client_settings,
+            timeout=timeout,
+            max_retries=max_retries,
+            default_headers=default_headers,
+            captured_headers=captured_headers,
             retry_config=retry_config,
-            base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
-            headers={
-                **kwargs.pop("default_headers", {}),
-                **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
-            },
             logger=logger,
-            auth=client_settings.build_auth_pipeline(),
         )
         super().__init__(
             api_key="PLACEHOLDER",
diff --git a/src/uipath/llm_client/clients/openai/client.py b/src/uipath/llm_client/clients/openai/client.py
index cb9b82d..f98247b 100644
--- a/src/uipath/llm_client/clients/openai/client.py
+++ b/src/uipath/llm_client/clients/openai/client.py
@@ -1,8 +1,8 @@
 import logging
-from typing import Any
+from collections.abc import Mapping, Sequence
 
 from uipath.llm_client.clients.openai.utils import OpenAIRequestHandler
-from uipath.llm_client.httpx_client import UiPathHttpxAsyncClient, UiPathHttpxClient
+from uipath.llm_client.clients.utils import build_httpx_async_client, build_httpx_client
 from uipath.llm_client.settings import get_default_client_settings
 from uipath.llm_client.settings.base import UiPathBaseSettings
 from uipath.llm_client.utils.retry import RetryConfig
@@ -17,29 +17,47 @@
 
 
 class UiPathOpenAI(OpenAI):
+    """OpenAI client routed through UiPath LLM Gateway.
+
+    Wraps the standard OpenAI client to route requests through UiPath's
+    LLM Gateway while preserving the full OpenAI SDK interface.
+
+    Args:
+        model_name: The OpenAI model name (e.g., "gpt-4o-2024-11-20").
+        byo_connection_id: Bring Your Own connection ID for custom deployments.
+        client_settings: UiPath client settings. Defaults to environment-based settings.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum retry attempts for failed requests.
+        default_headers: Additional headers to include in requests.
+        captured_headers: Response header prefixes to capture (case-insensitive).
+        retry_config: Custom retry configuration.
+        logger: Logger instance for request/response logging.
+    """
+
     def __init__(
         self,
         *,
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        timeout: float | None = None,
+        max_retries: int | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
-        timeout = kwargs.pop("timeout", None)
-        max_retries = kwargs.pop("max_retries", None)
-        default_headers = kwargs.pop("default_headers", None)
-        httpx_client = UiPathHttpxClient(
+        httpx_client = build_httpx_client(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
+            client_settings=client_settings,
             timeout=timeout,
             max_retries=max_retries,
-            headers=default_headers,
+            default_headers=default_headers,
+            captured_headers=captured_headers,
             retry_config=retry_config,
             logger=logger,
-            auth=client_settings.build_auth_pipeline(),
             event_hooks={
                 "request": [
                     OpenAIRequestHandler(
@@ -57,29 +75,47 @@ def __init__(
 
 
 class UiPathAsyncOpenAI(AsyncOpenAI):
+    """Async OpenAI client routed through UiPath LLM Gateway.
+
+    Wraps the standard AsyncOpenAI client to route requests through UiPath's
+    LLM Gateway while preserving the full OpenAI SDK interface.
+
+    Args:
+        model_name: The OpenAI model name (e.g., "gpt-4o-2024-11-20").
+        byo_connection_id: Bring Your Own connection ID for custom deployments.
+        client_settings: UiPath client settings. Defaults to environment-based settings.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum retry attempts for failed requests.
+        default_headers: Additional headers to include in requests.
+        captured_headers: Response header prefixes to capture (case-insensitive).
+        retry_config: Custom retry configuration.
+        logger: Logger instance for request/response logging.
+    """
+
     def __init__(
         self,
         *,
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        timeout: float | None = None,
+        max_retries: int | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
-        timeout = kwargs.pop("timeout", None)
-        max_retries = kwargs.pop("max_retries", None)
-        default_headers = kwargs.pop("default_headers", None)
-        httpx_client = UiPathHttpxAsyncClient(
+        httpx_client = build_httpx_async_client(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
+            client_settings=client_settings,
             timeout=timeout,
             max_retries=max_retries,
-            headers=default_headers,
+            default_headers=default_headers,
+            captured_headers=captured_headers,
             retry_config=retry_config,
             logger=logger,
-            auth=client_settings.build_auth_pipeline(),
             event_hooks={
                 "request": [
                     OpenAIRequestHandler(
@@ -97,29 +133,47 @@ def __init__(
 
 
 class UiPathAzureOpenAI(AzureOpenAI):
+    """Azure OpenAI client routed through UiPath LLM Gateway.
+
+    Wraps the AzureOpenAI client to route requests through UiPath's
+    LLM Gateway while preserving the full Azure OpenAI SDK interface.
+
+    Args:
+        model_name: The model name (e.g., "gpt-4o-2024-11-20").
+        byo_connection_id: Bring Your Own connection ID for custom deployments.
+        client_settings: UiPath client settings. Defaults to environment-based settings.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum retry attempts for failed requests.
+        default_headers: Additional headers to include in requests.
+        captured_headers: Response header prefixes to capture (case-insensitive).
+        retry_config: Custom retry configuration.
+        logger: Logger instance for request/response logging.
+    """
+
     def __init__(
         self,
         *,
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        timeout: float | None = None,
+        max_retries: int | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
-        timeout = kwargs.pop("timeout", None)
-        max_retries = kwargs.pop("max_retries", None)
-        default_headers = kwargs.pop("default_headers", None)
-        httpx_client = UiPathHttpxClient(
+        httpx_client = build_httpx_client(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
+            client_settings=client_settings,
             timeout=timeout,
             max_retries=max_retries,
-            headers=default_headers,
+            default_headers=default_headers,
+            captured_headers=captured_headers,
             retry_config=retry_config,
             logger=logger,
-            auth=client_settings.build_auth_pipeline(),
             event_hooks={
                 "request": [
                     OpenAIRequestHandler(
@@ -138,29 +192,47 @@ def __init__(
 
 
 class UiPathAsyncAzureOpenAI(AsyncAzureOpenAI):
+    """Async Azure OpenAI client routed through UiPath LLM Gateway.
+
+    Wraps the AsyncAzureOpenAI client to route requests through UiPath's
+    LLM Gateway while preserving the full Azure OpenAI SDK interface.
+
+    Args:
+        model_name: The model name (e.g., "gpt-4o-2024-11-20").
+        byo_connection_id: Bring Your Own connection ID for custom deployments.
+        client_settings: UiPath client settings. Defaults to environment-based settings.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum retry attempts for failed requests.
+        default_headers: Additional headers to include in requests.
+        captured_headers: Response header prefixes to capture (case-insensitive).
+        retry_config: Custom retry configuration.
+        logger: Logger instance for request/response logging.
+    """
+
     def __init__(
         self,
         *,
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        timeout: float | None = None,
+        max_retries: int | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
-        timeout = kwargs.pop("timeout", None)
-        max_retries = kwargs.pop("max_retries", None)
-        default_headers = kwargs.pop("default_headers", None)
-        httpx_client = UiPathHttpxAsyncClient(
+        httpx_client = build_httpx_async_client(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
+            client_settings=client_settings,
             timeout=timeout,
             max_retries=max_retries,
-            headers=default_headers,
+            default_headers=default_headers,
+            captured_headers=captured_headers,
             retry_config=retry_config,
             logger=logger,
-            auth=client_settings.build_auth_pipeline(),
             event_hooks={
                 "request": [
                     OpenAIRequestHandler(
diff --git a/src/uipath/llm_client/clients/openai/utils.py b/src/uipath/llm_client/clients/openai/utils.py
index 40c0473..9841efe 100644
--- a/src/uipath/llm_client/clients/openai/utils.py
+++ b/src/uipath/llm_client/clients/openai/utils.py
@@ -1,9 +1,13 @@
+import logging
+
 from httpx import URL, Request
 
 from uipath.llm_client.httpx_client import build_routing_headers
 from uipath.llm_client.settings.base import UiPathAPIConfig, UiPathBaseSettings
 from uipath.llm_client.settings.constants import ApiFlavor, ApiType, RoutingMode, VendorType
 
+_logger = logging.getLogger(__name__)
+
 
 class OpenAIRequestHandler:
     def __init__(
@@ -22,55 +26,40 @@ def __init__(
             freeze_base_url=False,
         )
 
+    def _apply_routing(self, request: Request, api_config: UiPathAPIConfig) -> None:
+        """Apply UiPath routing headers and URL rewriting to the request."""
+        request.headers.update(
+            build_routing_headers(
+                model_name=self.model_name,
+                byo_connection_id=self.byo_connection_id,
+                api_config=api_config,
+            )
+        )
+        request.url = URL(
+            self.client_settings.build_base_url(
+                model_name=self.model_name, api_config=api_config
+            )
+        )
+
     def fix_url_and_headers(self, request: Request):
         if request.url.path.endswith("/completions"):
             api_config = self.base_api_config.model_copy(
                 update={"api_flavor": ApiFlavor.CHAT_COMPLETIONS, "api_type": ApiType.COMPLETIONS}
             )
-            request.headers.update(
-                build_routing_headers(
-                    model_name=self.model_name,
-                    byo_connection_id=self.byo_connection_id,
-                    api_config=api_config,
-                )
-            )
-            request.url = URL(
-                self.client_settings.build_base_url(
-                    model_name=self.model_name, api_config=api_config
-                )
-            )
+            self._apply_routing(request, api_config)
         elif request.url.path.endswith("/responses"):
             api_config = self.base_api_config.model_copy(
                 update={"api_flavor": ApiFlavor.RESPONSES, "api_type": ApiType.COMPLETIONS}
             )
-            request.headers.update(
-                build_routing_headers(
-                    model_name=self.model_name,
-                    byo_connection_id=self.byo_connection_id,
-                    api_config=api_config,
-                )
-            )
-            request.url = URL(
-                self.client_settings.build_base_url(
-                    model_name=self.model_name, api_config=api_config
-                )
-            )
+            self._apply_routing(request, api_config)
         elif request.url.path.endswith("/embeddings"):
             api_config = self.base_api_config.model_copy(update={"api_type": ApiType.EMBEDDINGS})
-            request.headers.update(
-                build_routing_headers(
-                    model_name=self.model_name,
-                    byo_connection_id=self.byo_connection_id,
-                    api_config=api_config,
-                )
-            )
-            request.url = URL(
-                self.client_settings.build_base_url(
-                    model_name=self.model_name, api_config=api_config
-                )
-            )
+            self._apply_routing(request, api_config)
         else:
-            raise ValueError(f"Unsupported API endpoint: {request.url.path}")
+            _logger.debug(
+                "Unrecognized API endpoint '%s' — passing through without UiPath routing.",
+                request.url.path,
+            )
 
     async def fix_url_and_headers_async(self, request: Request):
         self.fix_url_and_headers(request)
diff --git a/src/uipath/llm_client/clients/utils.py b/src/uipath/llm_client/clients/utils.py
new file mode 100644
index 0000000..639668d
--- /dev/null
+++ b/src/uipath/llm_client/clients/utils.py
@@ -0,0 +1,107 @@
+"""Shared utilities for building UiPath-configured httpx clients."""
+
+import logging
+from collections.abc import Callable, Mapping, Sequence
+from typing import Any
+
+from uipath.llm_client.httpx_client import UiPathHttpxAsyncClient, UiPathHttpxClient
+from uipath.llm_client.settings.base import UiPathAPIConfig, UiPathBaseSettings
+from uipath.llm_client.utils.retry import RetryConfig
+
+
+def build_httpx_client(
+    *,
+    model_name: str,
+    byo_connection_id: str | None,
+    client_settings: UiPathBaseSettings,
+    timeout: float | None,
+    max_retries: int | None,
+    default_headers: Mapping[str, str] | None,
+    captured_headers: Sequence[str],
+    retry_config: RetryConfig | None,
+    logger: logging.Logger | None,
+    api_config: UiPathAPIConfig | None = None,
+    event_hooks: dict[str, list[Callable[..., Any]]] | None = None,
+) -> UiPathHttpxClient:
+    """Build a sync UiPath httpx client with auth, routing headers, and retry.
+
+    When *api_config* is provided the base URL and routing/auth headers are
+    derived from *client_settings*.  When it is ``None`` (e.g. for OpenAI
+    clients that resolve routing per-request via event hooks) those are
+    omitted and only the auth pipeline, default headers and retry transport
+    are configured.
+    """
+    headers: dict[str, str] = {**(default_headers or {})}
+    kwargs: dict[str, Any] = {}
+
+    if api_config is not None:
+        headers.update(
+            client_settings.build_auth_headers(model_name=model_name, api_config=api_config)
+        )
+        kwargs["base_url"] = client_settings.build_base_url(
+            model_name=model_name, api_config=api_config
+        )
+
+    if event_hooks is not None:
+        kwargs["event_hooks"] = event_hooks
+
+    return UiPathHttpxClient(
+        model_name=model_name,
+        byo_connection_id=byo_connection_id,
+        api_config=api_config,
+        timeout=timeout,
+        max_retries=max_retries,
+        retry_config=retry_config,
+        headers=headers,
+        captured_headers=captured_headers,
+        logger=logger,
+        auth=client_settings.build_auth_pipeline(),
+        **kwargs,
+    )
+
+
+def build_httpx_async_client(
+    *,
+    model_name: str,
+    byo_connection_id: str | None,
+    client_settings: UiPathBaseSettings,
+    timeout: float | None,
+    max_retries: int | None,
+    default_headers: Mapping[str, str] | None,
+    captured_headers: Sequence[str],
+    retry_config: RetryConfig | None,
+    logger: logging.Logger | None,
+    api_config: UiPathAPIConfig | None = None,
+    event_hooks: dict[str, list[Callable[..., Any]]] | None = None,
+) -> UiPathHttpxAsyncClient:
+    """Build an async UiPath httpx client with auth, routing headers, and retry.
+
+    See :func:`build_httpx_client` for parameter details.
+    """
+    headers: dict[str, str] = {**(default_headers or {})}
+    kwargs: dict[str, Any] = {}
+
+    if api_config is not None:
+        headers.update(
+            client_settings.build_auth_headers(model_name=model_name, api_config=api_config)
+        )
+        kwargs["base_url"] = client_settings.build_base_url(
+            model_name=model_name, api_config=api_config
+        )
+
+    if event_hooks is not None:
+        kwargs["event_hooks"] = event_hooks
+
+    return UiPathHttpxAsyncClient(
+        model_name=model_name,
+        byo_connection_id=byo_connection_id,
+        api_config=api_config,
+        timeout=timeout,
+        max_retries=max_retries,
+        retry_config=retry_config,
+        headers=headers,
+        captured_headers=captured_headers,
+        logger=logger,
+        auth=client_settings.build_auth_pipeline(),
+        **kwargs,
+    )
diff --git a/src/uipath/llm_client/httpx_client.py b/src/uipath/llm_client/httpx_client.py
index 1250465..83d2360 100644
--- a/src/uipath/llm_client/httpx_client.py
+++ b/src/uipath/llm_client/httpx_client.py
@@ -21,7 +21,7 @@
 """
 
 import logging
-from collections.abc import Callable, Mapping, Sequence
+from collections.abc import Callable, Sequence
 from typing import Any
 
 from httpx import (
@@ -72,7 +72,7 @@ class UiPathHttpxClient(Client):
     """
 
     _streaming_header: str = "X-UiPath-Streaming-Enabled"
-    _default_headers: Mapping[str, str] = {
+    _default_headers: dict[str, str] = {
         "X-UiPath-LLMGateway-TimeoutSeconds": "295",  # server side timeout, default is 10, maximum is 300
         # "X-UiPath-LLMGateway-AllowFull4xxResponse": "true",  # allow full 4xx responses (default is false) — removed from default to avoid PII leakage in logs
     }
@@ -100,7 +100,8 @@ def __init__(
             captured_headers: Case-insensitive header name prefixes to capture from
                 responses. Captured headers are stored in a ContextVar and can be
                 retrieved with get_captured_response_headers(). Defaults to ("x-uipath-",).
-            max_retries: Maximum retry attempts for failed requests. Defaults to 1.
+            max_retries: Maximum retry attempts for failed requests. Defaults to 0
+                (retries disabled). Set to a positive integer to enable retries.
             retry_config: Custom retry configuration (backoff, retryable status codes).
             logger: Logger instance for request/response logging.
             **kwargs: Additional arguments passed to httpx.Client (e.g., base_url,
@@ -114,7 +115,7 @@ def __init__(
         # Extract httpx.Client params that we need to modify
         headers: HeaderTypes | None = kwargs.pop("headers", None)
         transport: BaseTransport | None = kwargs.pop("transport", None)
-        event_hooks: Mapping[str, list[Callable[..., Any]]] | None = kwargs.pop("event_hooks", None)
+        event_hooks: dict[str, list[Callable[..., Any]]] | None = kwargs.pop("event_hooks", None)
 
         # Merge headers: default -> api_config -> user provided
         merged_headers = Headers(self._default_headers)
@@ -131,7 +132,7 @@ def __init__(
         # Setup retry transport if not provided
         if transport is None:
             transport = RetryableHTTPTransport(
-                max_retries=max_retries or 0,
+                max_retries=max_retries if max_retries is not None else 0,
                 retry_config=retry_config,
                 logger=logger,
             )
@@ -147,8 +148,8 @@ def __init__(
                 "request": [],
                 "response": [],
             }
-        event_hooks["request"].append(logging_config.log_request_duration)
-        event_hooks["response"].append(logging_config.log_response_duration)
+        event_hooks.setdefault("request", []).append(logging_config.log_request_duration)
+        event_hooks.setdefault("response", []).append(logging_config.log_response_duration)
         event_hooks["response"].append(logging_config.log_error)
 
         # setup ssl context
@@ -205,7 +206,7 @@ class UiPathHttpxAsyncClient(AsyncClient):
     """
 
     _streaming_header: str = "X-UiPath-Streaming-Enabled"
-    _default_headers: Mapping[str, str] = {
+    _default_headers: dict[str, str] = {
         "X-UiPath-LLMGateway-TimeoutSeconds": "295",  # server side timeout, default is 10, maximum is 300
         # "X-UiPath-LLMGateway-AllowFull4xxResponse": "true",  # allow full 4xx responses (default is false) — removed from default to avoid PII leakage in logs
     }
@@ -233,7 +234,8 @@ def __init__(
             captured_headers: Case-insensitive header name prefixes to capture from
                 responses. Captured headers are stored in a ContextVar and can be
                 retrieved with get_captured_response_headers(). Defaults to ("x-uipath-",).
-            max_retries: Maximum retry attempts for failed requests. Defaults to 1.
+            max_retries: Maximum retry attempts for failed requests. Defaults to 0
+                (retries disabled). Set to a positive integer to enable retries.
             retry_config: Custom retry configuration (backoff, retryable status codes).
             logger: Logger instance for request/response logging.
             **kwargs: Additional arguments passed to httpx.AsyncClient (e.g., base_url,
@@ -247,7 +249,7 @@ def __init__(
         # Extract httpx.AsyncClient params that we need to modify
         headers: HeaderTypes | None = kwargs.pop("headers", None)
         transport: AsyncBaseTransport | None = kwargs.pop("transport", None)
-        event_hooks: Mapping[str, list[Callable[..., Any]]] | None = kwargs.pop("event_hooks", None)
+        event_hooks: dict[str, list[Callable[..., Any]]] | None = kwargs.pop("event_hooks", None)
 
         # Merge headers: default -> api_config -> user provided
         merged_headers = Headers(self._default_headers)
@@ -264,7 +266,7 @@ def __init__(
         # Setup retry transport if not provided
         if transport is None:
             transport = RetryableAsyncHTTPTransport(
-                max_retries=max_retries or 0,
+                max_retries=max_retries if max_retries is not None else 0,
                 retry_config=retry_config,
                 logger=logger,
             )
@@ -280,8 +282,8 @@ def __init__(
                 "request": [],
                 "response": [],
             }
-        event_hooks["request"].append(logging_config.alog_request_duration)
-        event_hooks["response"].append(logging_config.alog_response_duration)
+        event_hooks.setdefault("request", []).append(logging_config.alog_request_duration)
+        event_hooks.setdefault("response", []).append(logging_config.alog_response_duration)
         event_hooks["response"].append(logging_config.alog_error)
 
         # setup ssl context
diff --git a/src/uipath/llm_client/settings/base.py b/src/uipath/llm_client/settings/base.py
index f6c2326..c2c974c 100644
--- a/src/uipath/llm_client/settings/base.py
+++ b/src/uipath/llm_client/settings/base.py
@@ -2,7 +2,7 @@
 Base Settings Module for UiPath LLM Client
 
 This module defines the abstract base classes and data models for UiPath API settings.
-Concrete implementations are provided in the `agenthub` and `llmgateway` submodules.
+Concrete implementations are provided in the `platform` and `llmgateway` submodules.
 """
 
 from abc import ABC, abstractmethod
diff --git a/src/uipath/llm_client/settings/constants.py b/src/uipath/llm_client/settings/constants.py
index 27232ca..f8acf52 100644
--- a/src/uipath/llm_client/settings/constants.py
+++ b/src/uipath/llm_client/settings/constants.py
@@ -28,7 +28,7 @@ class ApiFlavor(StrEnum):
     ANTHROPIC_CLAUDE = "anthropic-claude"
 
 
-_API_FLAVOR_TO_VENDOR_TYPE: dict[ApiFlavor, VendorType] = {
+API_FLAVOR_TO_VENDOR_TYPE: dict[ApiFlavor, VendorType] = {
     ApiFlavor.CHAT_COMPLETIONS: VendorType.OPENAI,
     ApiFlavor.RESPONSES: VendorType.OPENAI,
     ApiFlavor.GENERATE_CONTENT: VendorType.VERTEXAI,
diff --git a/src/uipath/llm_client/settings/llmgateway/auth.py b/src/uipath/llm_client/settings/llmgateway/auth.py
index aa8dc37..9662ca0 100644
--- a/src/uipath/llm_client/settings/llmgateway/auth.py
+++ b/src/uipath/llm_client/settings/llmgateway/auth.py
@@ -5,18 +5,28 @@
 from uipath.llm_client.settings.llmgateway.settings import LLMGatewayBaseSettings
 from uipath.llm_client.settings.llmgateway.utils import LLMGatewayEndpoints
 from uipath.llm_client.settings.utils import SingletonMeta
+from uipath.llm_client.utils.ssl_config import get_httpx_ssl_client_kwargs
 
 
 class LLMGatewayS2SAuth(Auth, metaclass=SingletonMeta):
     """Bearer authentication handler with automatic token refresh.
 
-    Singleton class that reuses the same token across all requests to minimize
-    token generation overhead. Automatically refreshes the token on 401 responses.
+    Singleton keyed by (client_id, client_secret) so that clients sharing the
+    same credentials reuse one token while different credentials get separate
+    instances.  Automatically refreshes the token on 401 responses.
 
     Does not raise errors on token retrieval failures — the request is sent
     without a valid token and the downstream client handles the error response.
     """
 
+    @classmethod
+    def _singleton_cache_key(cls, settings: LLMGatewayBaseSettings) -> tuple:
+        """Derive a cache key from the credentials so different settings get different instances."""
+        return (
+            settings.client_id.get_secret_value() if settings.client_id else None,
+            settings.client_secret.get_secret_value() if settings.client_secret else None,
+        )
+
     def __init__(
         self,
         settings: LLMGatewayBaseSettings,
@@ -51,7 +61,7 @@ def get_llmgw_token(
             grant_type="client_credentials",
         )
         try:
-            with Client() as http_client:
+            with Client(**get_httpx_ssl_client_kwargs()) as http_client:
                 response = http_client.post(url_get_token, data=token_credentials)
                 if response.is_error:
                     return None
diff --git a/src/uipath/llm_client/settings/llmgateway/settings.py b/src/uipath/llm_client/settings/llmgateway/settings.py
index 1076668..a55a01a 100644
--- a/src/uipath/llm_client/settings/llmgateway/settings.py
+++ b/src/uipath/llm_client/settings/llmgateway/settings.py
@@ -7,8 +7,10 @@
 from typing_extensions import override
 
 from uipath.llm_client.settings.base import UiPathAPIConfig, UiPathBaseSettings
+from uipath.llm_client.settings.constants import ApiType, RoutingMode
 from uipath.llm_client.settings.llmgateway.utils import LLMGatewayEndpoints
 from uipath.llm_client.utils.exceptions import UiPathAPIError
+from uipath.llm_client.utils.ssl_config import get_httpx_ssl_client_kwargs
 
 
 class LLMGatewayBaseSettings(UiPathBaseSettings):
@@ -73,13 +75,17 @@ def build_base_url(
         model_name: str | None = None,
         api_config: UiPathAPIConfig | None = None,
     ) -> str:
+        if api_config is None:
+            raise ValueError("api_config is required for LLMGatewaySettings.build_base_url")
+        if api_config.routing_mode is None:
+            raise ValueError("api_config.routing_mode is required for LLMGatewaySettings.build_base_url")
         base_url = f"{self.base_url}/{self.org_id}/{self.tenant_id}"
-        if api_config is not None and api_config.routing_mode == "normalized":
-            url = f"{base_url}/{LLMGatewayEndpoints.NORMALIZED_ENDPOINT.value.format(api_type='chat/completions' if api_config.api_type == 'completions' else 'embeddings')}"
-        else:
-            if api_config is None:
-                raise ValueError("api_config is required for passthrough routing_mode")
+        if api_config.routing_mode == RoutingMode.NORMALIZED:
+            url = f"{base_url}/{LLMGatewayEndpoints.NORMALIZED_ENDPOINT.value.format(api_type='chat/completions' if api_config.api_type == ApiType.COMPLETIONS else 'embeddings')}"
+        elif api_config.routing_mode == RoutingMode.PASSTHROUGH:
             url = f"{base_url}/{LLMGatewayEndpoints.PASSTHROUGH_ENDPOINT.value.format(vendor=api_config.vendor_type, model=model_name, api_type=api_config.api_type)}"
+        else:
+            raise ValueError(f"Unsupported routing_mode: {api_config.routing_mode}")
         return url
 
     @override
@@ -108,7 +114,7 @@ def build_auth_headers(
     @override
     def get_available_models(self) -> list[dict[str, Any]]:
         discovery_url = f"{self.base_url}/{self.org_id}/{self.tenant_id}/{LLMGatewayEndpoints.DISCOVERY_ENDPOINT.value}"
-        with Client(auth=self.build_auth_pipeline(), headers=self.build_auth_headers()) as client:
+        with Client(auth=self.build_auth_pipeline(), headers=self.build_auth_headers(), **get_httpx_ssl_client_kwargs()) as client:
             response = client.get(discovery_url)
             if response.is_error:
                 raise UiPathAPIError.from_response(response)
@@ -116,17 +122,23 @@ def get_available_models(self) -> list[dict[str, Any]]:
 
     @override
     def validate_byo_model(self, model_info: dict[str, Any]) -> None:
+        """Validate that the model is a BYOM model.
+
+        Note: This method may mutate ``self.operation_code`` as a side effect
+        when no operation code was explicitly configured but the model provides
+        available codes.
+        """
         byom_details = model_info.get("byomDetails", {})
         operation_codes = byom_details.get("availableOperationCodes", [])
         if self.operation_code and self.operation_code not in operation_codes:
             raise ValueError(
-                f"The operation code {self.operation_code} is not allowed for the model {model_info['modelName']}"
+                f"The operation code {self.operation_code} is not allowed for the model {model_info.get('modelName', 'unknown')}"
             )
         if not self.operation_code and len(operation_codes) > 0:
             if len(operation_codes) > 1:
                 logging.warning(
                     "Multiple operation codes are allowed for the model %s, but no operation code was provided, picking the first one available: %s",
-                    model_info["modelName"],
+                    model_info.get("modelName", "unknown"),
                     operation_codes[0],
                 )
             self.operation_code = operation_codes[0]
diff --git a/src/uipath/llm_client/settings/platform/auth.py b/src/uipath/llm_client/settings/platform/auth.py
index fe1482f..b6d35fb 100644
--- a/src/uipath/llm_client/settings/platform/auth.py
+++ b/src/uipath/llm_client/settings/platform/auth.py
@@ -12,10 +12,22 @@
 class PlatformAuth(Auth, metaclass=SingletonMeta):
     """Bearer authentication handler with automatic token refresh.
 
-    Singleton class that stores access_token and refresh_token directly,
-    reusing them across all requests. Automatically refreshes on 401 responses.
+    Singleton keyed by (base_url, organization_id, tenant_id, access_token)
+    so that clients sharing the same credentials reuse one token while
+    different credentials get separate instances.  Automatically refreshes
+    on 401 responses.
     """
 
+    @classmethod
+    def _singleton_cache_key(cls, settings: PlatformBaseSettings) -> tuple:
+        """Derive a cache key from the credentials so different settings get different instances."""
+        return (
+            settings.base_url,
+            settings.organization_id,
+            settings.tenant_id,
+            settings.access_token.get_secret_value() if settings.access_token else None,
+        )
+
     def __init__(
         self,
         settings: PlatformBaseSettings,
diff --git a/src/uipath/llm_client/settings/platform/settings.py b/src/uipath/llm_client/settings/platform/settings.py
index bbcc4d8..0b1aba0 100644
--- a/src/uipath/llm_client/settings/platform/settings.py
+++ b/src/uipath/llm_client/settings/platform/settings.py
@@ -8,6 +8,7 @@
 from uipath.platform.common import EndpointManager
 
 from uipath.llm_client.settings.base import UiPathAPIConfig, UiPathBaseSettings
+from uipath.llm_client.settings.constants import ApiType, RoutingMode
 from uipath.llm_client.settings.platform.utils import is_token_expired, parse_access_token
 
 
@@ -72,7 +73,7 @@ def validate_environment(self) -> Self:
             )
 
         parsed_token_data = parse_access_token(access_token)
-        self.client_id = parsed_token_data.get("client_id", None)
+        self.client_id = parsed_token_data.get("client_id")
         return self
 
     @staticmethod
@@ -97,19 +98,21 @@ def build_base_url(
         api_config: UiPathAPIConfig | None = None,
     ) -> str:
         """Build the base URL for API requests."""
-        assert model_name is not None
-        assert api_config is not None
-        if api_config.routing_mode == "normalized" and api_config.api_type == "completions":
+        if model_name is None:
+            raise ValueError("model_name is required for PlatformBaseSettings.build_base_url")
+        if api_config is None:
+            raise ValueError("api_config is required for PlatformBaseSettings.build_base_url")
+        if api_config.routing_mode == RoutingMode.NORMALIZED and api_config.api_type == ApiType.COMPLETIONS:
             url = f"{self.base_url}/{EndpointManager.get_normalized_endpoint()}"
-        elif api_config.routing_mode == "normalized" and api_config.api_type == "embeddings":
+        elif api_config.routing_mode == RoutingMode.NORMALIZED and api_config.api_type == ApiType.EMBEDDINGS:
             raise ValueError(
                 "Normalized embeddings are not supported on UiPath Platform (AgentHub/Orchestrator). "
                 "Use passthrough routing mode for embeddings instead."
             )
-        elif api_config.routing_mode == "passthrough" and api_config.api_type == "completions":
+        elif api_config.routing_mode == RoutingMode.PASSTHROUGH and api_config.api_type == ApiType.COMPLETIONS:
             endpoint = EndpointManager.get_vendor_endpoint()
             url = f"{self.base_url}/{self._format_endpoint(endpoint, model=model_name, vendor=api_config.vendor_type, api_version=api_config.api_version)}"
-        elif api_config.routing_mode == "passthrough" and api_config.api_type == "embeddings":
+        elif api_config.routing_mode == RoutingMode.PASSTHROUGH and api_config.api_type == ApiType.EMBEDDINGS:
             if api_config.vendor_type is not None and api_config.vendor_type != "openai":
                 raise ValueError(
                     f"Platform embeddings endpoint only supports OpenAI-compatible models, "
diff --git a/src/uipath/llm_client/settings/platform/utils.py b/src/uipath/llm_client/settings/platform/utils.py
index 559250b..1daaa0e 100644
--- a/src/uipath/llm_client/settings/platform/utils.py
+++ b/src/uipath/llm_client/settings/platform/utils.py
@@ -1,16 +1,42 @@
 import base64
 import json
 import time
+from typing import Any
 
 
-def parse_access_token(access_token: str):
+def parse_access_token(access_token: str) -> dict[str, Any]:
+    """Parse a JWT access token and return the payload as a dict.
+
+    Args:
+        access_token: A JWT token string (header.payload.signature).
+
+    Returns:
+        The decoded payload as a dictionary.
+
+    Raises:
+        ValueError: If the token is malformed or cannot be decoded.
+    """
     token_parts = access_token.split(".")
     if len(token_parts) < 2:
-        raise Exception("Invalid access token")
-    payload = base64.urlsafe_b64decode(token_parts[1] + "=" * (-len(token_parts[1]) % 4))
-    return json.loads(payload)
+        raise ValueError("Invalid access token: expected JWT with at least 2 dot-separated parts")
+    try:
+        payload = base64.urlsafe_b64decode(token_parts[1] + "=" * (-len(token_parts[1]) % 4))
+        return json.loads(payload)
+    except (json.JSONDecodeError, UnicodeDecodeError) as e:
+        raise ValueError(f"Invalid access token: failed to decode payload: {e}") from e
 
 
 def is_token_expired(token: str) -> bool:
+    """Check whether a JWT access token has expired.
+
+    Args:
+        token: A JWT token string.
+
+    Returns:
+        True if the token is expired, False if it is still valid or has no ``exp`` claim.
+    """
     token_data = parse_access_token(token)
-    return token_data["exp"] < time.time()
+    exp = token_data.get("exp")
+    if exp is None:
+        return False
+    return exp < time.time()
diff --git a/src/uipath/llm_client/settings/utils.py b/src/uipath/llm_client/settings/utils.py
index 3892d4c..4596e5d 100644
--- a/src/uipath/llm_client/settings/utils.py
+++ b/src/uipath/llm_client/settings/utils.py
@@ -3,15 +3,33 @@
 
 
 class SingletonMeta(type):
-    """Metaclass for creating singleton classes. Used to keep global configs shared between instances."""
+    """Metaclass for creating singleton classes keyed by (class, cache_key).
 
-    _instances: dict[type, Any] = {}
+    Classes using this metaclass can define a ``_singleton_cache_key`` classmethod
+    that derives a hashable key from the constructor arguments.  When the same
+    key is seen again the cached instance is returned and ``__init__`` is
+    **not** re-invoked.
+
+    If the class does not define ``_singleton_cache_key``, the class itself is
+    used as the sole key (original singleton-per-class behaviour).
+
+    Used to share access-tokens / auth state between multiple HTTP clients that
+    are configured with the same credentials.
+    """
+
+    _instances: dict[tuple[type, Any], Any] = {}
     _lock: threading.Lock = threading.Lock()
 
     def __call__(cls, *args: Any, **kwargs: Any) -> Any:
-        if cls not in SingletonMeta._instances:
+        key_fn = getattr(cls, "_singleton_cache_key", None)
+        if key_fn is not None:
+            cache_key = (cls, key_fn(*args, **kwargs))
+        else:
+            cache_key = (cls, None)
+
+        if cache_key not in SingletonMeta._instances:
             with SingletonMeta._lock:
-                if cls not in SingletonMeta._instances:
+                if cache_key not in SingletonMeta._instances:
                     instance = super().__call__(*args, **kwargs)
-                    SingletonMeta._instances[cls] = instance
-        return SingletonMeta._instances[cls]
+                    SingletonMeta._instances[cache_key] = instance
+        return SingletonMeta._instances[cache_key]
diff --git a/src/uipath/llm_client/utils/exceptions.py b/src/uipath/llm_client/utils/exceptions.py
index 4c7d311..3e13057 100644
--- a/src/uipath/llm_client/utils/exceptions.py
+++ b/src/uipath/llm_client/utils/exceptions.py
@@ -26,6 +26,7 @@
 """
 
 from json import JSONDecodeError
+from typing import Literal
 
 from httpx import HTTPStatusError, Request, Response
 
@@ -96,43 +97,43 @@ def from_response(cls, response: Response, request: Request | None = None) -> "U
 class UiPathBadRequestError(UiPathAPIError):
     """HTTP 400 Bad Request error."""
 
-    status_code: int = 400
+    status_code: Literal[400] = 400  # pyright: ignore[reportIncompatibleVariableOverride]
 
 
 class UiPathAuthenticationError(UiPathAPIError):
     """HTTP 401 Unauthorized error."""
 
-    status_code: int = 401
+    status_code: Literal[401] = 401  # pyright: ignore[reportIncompatibleVariableOverride]
 
 
 class UiPathPermissionDeniedError(UiPathAPIError):
     """HTTP 403 Forbidden error."""
 
-    status_code: int = 403
+    status_code: Literal[403] = 403  # pyright: ignore[reportIncompatibleVariableOverride]
 
 
 class UiPathNotFoundError(UiPathAPIError):
     """HTTP 404 Not Found error."""
 
-    status_code: int = 404
+    status_code: Literal[404] = 404  # pyright: ignore[reportIncompatibleVariableOverride]
 
 
 class UiPathConflictError(UiPathAPIError):
     """HTTP 409 Conflict error."""
 
-    status_code: int = 409
+    status_code: Literal[409] = 409  # pyright: ignore[reportIncompatibleVariableOverride]
 
 
 class UiPathRequestTooLargeError(UiPathAPIError):
     """HTTP 413 Payload Too Large error."""
 
-    status_code: int = 413
+    status_code: Literal[413] = 413  # pyright: ignore[reportIncompatibleVariableOverride]
 
 
 class UiPathUnprocessableEntityError(UiPathAPIError):
     """HTTP 422 Unprocessable Entity error."""
 
-    status_code: int = 422
+    status_code: Literal[422] = 422  # pyright: ignore[reportIncompatibleVariableOverride]
 
 
 class UiPathRateLimitError(UiPathAPIError):
@@ -142,7 +143,7 @@ class UiPathRateLimitError(UiPathAPIError):
         retry_after: Seconds to wait before retrying (from Retry-After header), or None.
     """
 
-    status_code: int = 429
+    status_code: Literal[429] = 429  # pyright: ignore[reportIncompatibleVariableOverride]
 
     def __init__(
         self,
@@ -178,9 +179,9 @@ def _parse_retry_after(response: Response) -> float | None:
         from datetime import datetime, timezone
 
         # Check both header variants (case-insensitive in httpx)
-        retry_after_value = response.headers.get("retry-after") or response.headers.get(
-            "x-retry-after"
-        )
+        retry_after_value = response.headers.get("retry-after")
+        if retry_after_value is None:
+            retry_after_value = response.headers.get("x-retry-after")
 
         if retry_after_value is None:
             return None
@@ -207,25 +208,25 @@ def _parse_retry_after(response: Response) -> float | None:
 class UiPathInternalServerError(UiPathAPIError):
     """HTTP 500 Internal Server Error."""
 
-    status_code: int = 500
+    status_code: Literal[500] = 500  # pyright: ignore[reportIncompatibleVariableOverride]
 
 
 class UiPathServiceUnavailableError(UiPathAPIError):
     """HTTP 503 Service Unavailable error."""
 
-    status_code: int = 503
+    status_code: Literal[503] = 503  # pyright: ignore[reportIncompatibleVariableOverride]
 
 
 class UiPathGatewayTimeoutError(UiPathAPIError):
     """HTTP 504 Gateway Timeout error."""
 
-    status_code: int = 504
+    status_code: Literal[504] = 504  # pyright: ignore[reportIncompatibleVariableOverride]
 
 
 class UiPathTooManyRequestsError(UiPathAPIError):
     """HTTP 529 Too Many Requests (Anthropic overload) error."""
 
-    status_code: int = 529
+    status_code: Literal[529] = 529  # pyright: ignore[reportIncompatibleVariableOverride]
 
 
 _STATUS_CODE_TO_EXCEPTION: dict[int, type[UiPathAPIError]] = {
diff --git a/src/uipath/llm_client/utils/headers.py b/src/uipath/llm_client/utils/headers.py
index 6a5bea2..58c4c9e 100644
--- a/src/uipath/llm_client/utils/headers.py
+++ b/src/uipath/llm_client/utils/headers.py
@@ -4,13 +4,14 @@
 from httpx import Headers
 
 from uipath.llm_client.settings.base import UiPathAPIConfig
+from uipath.llm_client.settings.constants import ApiType, RoutingMode
 
-_CAPTURED_RESPONSE_HEADERS: contextvars.ContextVar[dict[str, str]] = contextvars.ContextVar(
-    "_captured_response_headers", default={}
+_CAPTURED_RESPONSE_HEADERS: contextvars.ContextVar[dict[str, str] | None] = contextvars.ContextVar(
+    "_captured_response_headers", default=None
 )
 
-_DYNAMIC_REQUEST_HEADERS: contextvars.ContextVar[dict[str, str]] = contextvars.ContextVar(
-    "_dynamic_request_headers", default={}
+_DYNAMIC_REQUEST_HEADERS: contextvars.ContextVar[dict[str, str] | None] = contextvars.ContextVar(
+    "_dynamic_request_headers", default=None
 )
 
 
@@ -20,10 +21,10 @@ def get_captured_response_headers() -> dict[str, str]:
     Returns an empty dict if no headers have been captured or if called
     outside a capture scope.
     """
-    return dict(_CAPTURED_RESPONSE_HEADERS.get())
+    return dict(_CAPTURED_RESPONSE_HEADERS.get() or {})
 
 
-def set_captured_response_headers(headers: dict[str, str]) -> contextvars.Token[dict[str, str]]:
+def set_captured_response_headers(headers: dict[str, str]) -> contextvars.Token[dict[str, str] | None]:
     """Set captured response headers for the current context."""
     return _CAPTURED_RESPONSE_HEADERS.set(headers)
 
@@ -33,10 +34,10 @@ def get_dynamic_request_headers() -> dict[str, str]:
 
     Returns an empty dict if no dynamic headers have been set in this context.
     """
-    return dict(_DYNAMIC_REQUEST_HEADERS.get())
+    return dict(_DYNAMIC_REQUEST_HEADERS.get() or {})
 
 
-def set_dynamic_request_headers(headers: dict[str, str]) -> contextvars.Token[dict[str, str]]:
+def set_dynamic_request_headers(headers: dict[str, str]) -> contextvars.Token[dict[str, str] | None]:
     """Set headers to be injected into the next outgoing request."""
     return _DYNAMIC_REQUEST_HEADERS.set(headers)
 
@@ -74,9 +75,9 @@ def build_routing_headers(
     """
     headers: dict[str, str] = {}
     if api_config is not None:
-        if api_config.routing_mode == "normalized" and model_name is not None:
+        if api_config.routing_mode == RoutingMode.NORMALIZED and model_name is not None:
             headers["X-UiPath-LlmGateway-NormalizedApi-ModelName"] = model_name
-        elif api_config.routing_mode == "passthrough" and api_config.api_type == "completions":
+        elif api_config.routing_mode == RoutingMode.PASSTHROUGH and api_config.api_type == ApiType.COMPLETIONS:
             if api_config.api_flavor is not None:
                 headers["X-UiPath-LlmGateway-ApiFlavor"] = api_config.api_flavor
             if api_config.api_version is not None:
diff --git a/src/uipath/llm_client/utils/retry.py b/src/uipath/llm_client/utils/retry.py
index 941dfbd..32c89a3 100644
--- a/src/uipath/llm_client/utils/retry.py
+++ b/src/uipath/llm_client/utils/retry.py
@@ -248,6 +248,9 @@ def handle_request(self, request: Request) -> Response:
             The httpx Response. Returns error responses after retries are exhausted
             instead of raising exceptions.
         """
+        if self.retryer is None:
+            return super().handle_request(request)
+
         parent_handle = super().handle_request
 
         def _send() -> Response:
@@ -257,10 +260,7 @@ def _send() -> Response:
             return response
 
         try:
-            if self.retryer is not None:
-                return self.retryer(_send)
-            else:
-                return _send()
+            return self.retryer(_send)
         except UiPathAPIError as e:
             return e.response
 
@@ -312,6 +312,9 @@ async def handle_async_request(self, request: Request) -> Response:
             The httpx Response. Returns error responses after retries are exhausted
             instead of raising exceptions.
         """
+        if self.retryer is None:
+            return await super().handle_async_request(request)
+
         parent_handle = super().handle_async_request
 
         async def _send() -> Response:
@@ -321,10 +324,7 @@ async def _send() -> Response:
             return response
 
         try:
-            if self.retryer is not None:
-                return await self.retryer(_send)
-            else:
-                return await _send()
+            return await self.retryer(_send)
         except UiPathAPIError as e:
             return e.response
 
diff --git a/src/uipath/llm_client/utils/ssl_config.py b/src/uipath/llm_client/utils/ssl_config.py
index 31d3c61..4dd1c35 100644
--- a/src/uipath/llm_client/utils/ssl_config.py
+++ b/src/uipath/llm_client/utils/ssl_config.py
@@ -3,7 +3,7 @@
 from typing import Any
 
 
-def expand_path(path):
+def expand_path(path: str | None) -> str | None:
     """Expand environment variables and user home directory in path."""
     if not path:
         return path
@@ -14,24 +14,40 @@ def expand_path(path):
     return path
 
 
-def create_ssl_context():
+def create_ssl_context() -> ssl.SSLContext:
+    """Create an SSL context using system certificates.
+
+    Tries ``truststore`` first for native system certificate support.
+    Falls back to ``certifi`` for bundled Mozilla CA certificates.
+
+    Raises:
+        ImportError: If neither ``truststore`` nor ``certifi`` is installed.
+    """
     # Try truststore first (system certificates)
     try:
         import truststore
 
         return truststore.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
     except ImportError:
-        # Fallback to manual certificate configuration
+        pass
+
+    # Fallback to manual certificate configuration
+    try:
         import certifi
+    except ImportError:
+        raise ImportError(
+            "SSL certificate support requires either 'truststore' or 'certifi'. "
+            "Install one with: pip install truststore  or  pip install certifi"
+        )
 
-        ssl_cert_file = expand_path(os.environ.get("SSL_CERT_FILE"))
-        requests_ca_bundle = expand_path(os.environ.get("REQUESTS_CA_BUNDLE"))
-        ssl_cert_dir = expand_path(os.environ.get("SSL_CERT_DIR"))
+    ssl_cert_file = expand_path(os.environ.get("SSL_CERT_FILE"))
+    requests_ca_bundle = expand_path(os.environ.get("REQUESTS_CA_BUNDLE"))
+    ssl_cert_dir = expand_path(os.environ.get("SSL_CERT_DIR"))
 
-        return ssl.create_default_context(
-            cafile=ssl_cert_file or requests_ca_bundle or certifi.where(),
-            capath=ssl_cert_dir,
-        )
+    return ssl.create_default_context(
+        cafile=ssl_cert_file or requests_ca_bundle or certifi.where(),
+        capath=ssl_cert_dir,
+    )
 
 
 def get_httpx_ssl_client_kwargs() -> dict[str, Any]:
diff --git a/tests/core/test_base_client.py b/tests/core/test_base_client.py
index d1c830f..bfb00c1 100644
--- a/tests/core/test_base_client.py
+++ b/tests/core/test_base_client.py
@@ -458,8 +458,8 @@ def test_auth_flow_refreshes_on_401(self, llmgw_s2s_env_vars):
                 except StopIteration:
                     pass
 
-    def test_auth_singleton_reuses_instance(self, llmgw_env_vars):
-        """Test that LLMGatewayS2SAuth is a singleton."""
+    def test_auth_singleton_reuses_instance_for_same_settings(self, llmgw_env_vars):
+        """Test that LLMGatewayS2SAuth reuses the same instance for identical settings."""
         from uipath.llm_client.settings.llmgateway.auth import LLMGatewayS2SAuth
 
         with patch.dict(os.environ, llmgw_env_vars, clear=True):
@@ -468,6 +468,20 @@ def test_auth_singleton_reuses_instance(self, llmgw_env_vars):
             auth2 = LLMGatewayS2SAuth(settings=settings)
             assert auth1 is auth2
 
+    def test_auth_creates_separate_instances_for_different_settings(self, llmgw_env_vars):
+        """Test that LLMGatewayS2SAuth creates separate instances for different credentials."""
+        from uipath.llm_client.settings.llmgateway.auth import LLMGatewayS2SAuth
+
+        env1 = {**llmgw_env_vars, "LLMGW_ACCESS_TOKEN": "token-a"}
+        env2 = {**llmgw_env_vars, "LLMGW_ACCESS_TOKEN": "token-b"}
+        with patch.dict(os.environ, env1, clear=True):
+            settings1 = LLMGatewaySettings()
+        with patch.dict(os.environ, env2, clear=True):
+            settings2 = LLMGatewaySettings()
+        auth1 = LLMGatewayS2SAuth(settings=settings1)
+        auth2 = LLMGatewayS2SAuth(settings=settings2)
+        assert auth1 is not auth2
+
 
 # ============================================================================
 # Test PlatformSettings
@@ -637,17 +651,17 @@ def test_build_base_url_normalized_embeddings_raises(
     def test_build_base_url_requires_model_name(
         self, platform_env_vars, mock_platform_auth, normalized_api_config
     ):
-        """Test build_base_url asserts model_name is not None."""
+        """Test build_base_url raises ValueError when model_name is None."""
         with patch.dict(os.environ, platform_env_vars, clear=True):
             settings = PlatformSettings()
-            with pytest.raises(AssertionError):
+            with pytest.raises(ValueError, match="model_name is required"):
                 settings.build_base_url(model_name=None, api_config=normalized_api_config)
 
     def test_build_base_url_requires_api_config(self, platform_env_vars, mock_platform_auth):
-        """Test build_base_url asserts api_config is not None."""
+        """Test build_base_url raises ValueError when api_config is None."""
         with patch.dict(os.environ, platform_env_vars, clear=True):
             settings = PlatformSettings()
-            with pytest.raises(AssertionError):
+            with pytest.raises(ValueError, match="api_config is required"):
                 settings.build_base_url(model_name="gpt-4o", api_config=None)
 
     def test_build_auth_headers_empty_when_no_optional(self, platform_env_vars, mock_platform_auth):
@@ -715,12 +729,11 @@ class TestPlatformAuthRefresh:
     @pytest.fixture(autouse=True)
     def clear_auth_singleton(self):
         """Clear PlatformAuth singleton before each test."""
-        from uipath.llm_client.settings.platform.auth import PlatformAuth
         from uipath.llm_client.settings.utils import SingletonMeta
 
-        SingletonMeta._instances.pop(PlatformAuth, None)
+        SingletonMeta._instances.clear()
         yield
-        SingletonMeta._instances.pop(PlatformAuth, None)
+        SingletonMeta._instances.clear()
 
     def test_auth_flow_adds_bearer_token(self, platform_env_vars, mock_platform_auth):
         """Test auth_flow adds Authorization header."""
@@ -765,8 +778,8 @@ def test_auth_flow_refreshes_on_401(self, platform_env_vars, mock_platform_auth)
                 except StopIteration:
                     pass
 
-    def test_auth_singleton_reuses_instance(self, platform_env_vars, mock_platform_auth):
-        """Test that PlatformAuth is a singleton."""
+    def test_auth_singleton_reuses_instance_for_same_settings(self, platform_env_vars, mock_platform_auth):
+        """Test that PlatformAuth reuses the same instance for identical settings."""
         from uipath.llm_client.settings.platform.auth import PlatformAuth
 
         with patch.dict(os.environ, platform_env_vars, clear=True):
@@ -775,6 +788,20 @@ def test_auth_singleton_reuses_instance(self, platform_env_vars, mock_platform_a
             auth2 = PlatformAuth(settings=settings)
             assert auth1 is auth2
 
+    def test_auth_creates_separate_instances_for_different_settings(self, platform_env_vars, mock_platform_auth):
+        """Test that PlatformAuth creates separate instances for different credentials."""
+        from uipath.llm_client.settings.platform.auth import PlatformAuth
+
+        env1 = {**platform_env_vars, "UIPATH_ACCESS_TOKEN": "token-x"}
+        env2 = {**platform_env_vars, "UIPATH_ACCESS_TOKEN": "token-y"}
+        with patch.dict(os.environ, env1, clear=True):
+            settings1 = PlatformSettings()
+        with patch.dict(os.environ, env2, clear=True):
+            settings2 = PlatformSettings()
+        auth1 = PlatformAuth(settings=settings1)
+        auth2 = PlatformAuth(settings=settings2)
+        assert auth1 is not auth2
+
 
 # ============================================================================
 # Test Retry Logic
@@ -1131,7 +1158,7 @@ class TestSingletonMeta:
     """Tests for SingletonMeta metaclass."""
 
     def test_singleton_creates_single_instance(self):
-        """Test singleton creates only one instance."""
+        """Test singleton creates only one instance when no cache key is defined."""
 
         class TestSingleton(metaclass=SingletonMeta):
             def __init__(self, value: int):
@@ -1157,6 +1184,41 @@ class SingletonB(metaclass=SingletonMeta):
 
         assert a is not b
 
+    def test_keyed_singleton_same_key_reuses_instance(self):
+        """Test that same cache key returns the same instance."""
+
+        class KeyedSingleton(metaclass=SingletonMeta):
+            def __init__(self, key: str, value: int):
+                self.key = key
+                self.value = value
+
+            @classmethod
+            def _singleton_cache_key(cls, key: str, value: int) -> tuple:
+                return (key,)
+
+        a = KeyedSingleton("k1", 10)
+        b = KeyedSingleton("k1", 20)
+        assert a is b
+        assert a.value == 10  # First value retained
+
+    def test_keyed_singleton_different_key_creates_new_instance(self):
+        """Test that different cache keys create separate instances."""
+
+        class KeyedSingleton2(metaclass=SingletonMeta):
+            def __init__(self, key: str, value: int):
+                self.key = key
+                self.value = value
+
+            @classmethod
+            def _singleton_cache_key(cls, key: str, value: int) -> tuple:
+                return (key,)
+
+        a = KeyedSingleton2("k1", 10)
+        b = KeyedSingleton2("k2", 20)
+        assert a is not b
+        assert a.value == 10
+        assert b.value == 20
+
 
 # ============================================================================
 # Test Exception String Representations and Body Parsing