diff --git a/CHANGELOG.md b/CHANGELOG.md
index a041eec..520ff4c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,11 @@
 
 All notable changes to `uipath_llm_client` (core package) will be documented in this file.
 
+## [1.6.0] - 2026-03-31
+
+### Added
+- `UiPathNormalizedClient`: a provider-agnostic HTTP client that speaks directly to UiPath's normalized API endpoint without requiring any vendor SDK. Supports sync/async chat completions, streaming, structured output (JSON schema), and tool calling across all supported providers (OpenAI, Google Gemini, Anthropic on Bedrock/Vertex, etc.).
+
 ## [1.5.10] - 2026-03-26
 
 ### Changed
diff --git a/packages/uipath_langchain_client/CHANGELOG.md b/packages/uipath_langchain_client/CHANGELOG.md
index 2b7bd3c..870597a 100644
--- a/packages/uipath_langchain_client/CHANGELOG.md
+++ b/packages/uipath_langchain_client/CHANGELOG.md
@@ -2,6 +2,11 @@
 
 All notable changes to `uipath_langchain_client` will be documented in this file.
 
+## [1.6.0] - 2026-03-31
+
+### Changed
+- Bumped `uipath-llm-client` dependency to `>=1.6.0` to pick up `UiPathNormalizedClient`
+
 ## [1.5.10] - 2026-03-26
 
 ### Changed
diff --git a/packages/uipath_langchain_client/pyproject.toml b/packages/uipath_langchain_client/pyproject.toml
index 93ff0be..e97cffe 100644
--- a/packages/uipath_langchain_client/pyproject.toml
+++ b/packages/uipath_langchain_client/pyproject.toml
@@ -6,7 +6,7 @@ readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
     "langchain>=1.2.13",
-    "uipath-llm-client>=1.5.10",
+    "uipath-llm-client>=1.6.0",
 ]
 
 [project.optional-dependencies]
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py b/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py
index 292aefb..5149f6e 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py
@@ -1,3 +1,3 @@
 __title__ = "UiPath LangChain Client"
 __description__ = "A Python client for interacting with UiPath's LLM services via LangChain."
-__version__ = "1.5.10"
+__version__ = "1.6.0"
diff --git a/src/uipath/llm_client/__init__.py b/src/uipath/llm_client/__init__.py
index 342ef49..e15ad1f 100644
--- a/src/uipath/llm_client/__init__.py
+++ b/src/uipath/llm_client/__init__.py
@@ -26,6 +26,7 @@
 """
 
 from uipath.llm_client.__version__ import __version__
+from uipath.llm_client.clients.normalized import UiPathNormalizedClient
 from uipath.llm_client.httpx_client import (
     UiPathHttpxAsyncClient,
     UiPathHttpxClient,
@@ -54,6 +55,8 @@
 
 __all__ = [
     "__version__",
+    # Normalized client
+    "UiPathNormalizedClient",
     # Settings
     "get_default_client_settings",
     "PlatformSettings",
diff --git a/src/uipath/llm_client/__version__.py b/src/uipath/llm_client/__version__.py
index 7da9182..c5bf7a8 100644
--- a/src/uipath/llm_client/__version__.py
+++ b/src/uipath/llm_client/__version__.py
@@ -1,3 +1,3 @@
 __title__ = "UiPath LLM Client"
 __description__ = "A Python client for interacting with UiPath's LLM services."
-__version__ = "1.5.10"
+__version__ = "1.6.0"
diff --git a/src/uipath/llm_client/clients/anthropic/client.py b/src/uipath/llm_client/clients/anthropic/client.py
index 1c5cdfc..cb760b9 100644
--- a/src/uipath/llm_client/clients/anthropic/client.py
+++ b/src/uipath/llm_client/clients/anthropic/client.py
@@ -15,6 +15,7 @@
 """
 
 import logging
+from collections.abc import Mapping, Sequence
 from typing import Any
 
 from uipath.llm_client.httpx_client import UiPathHttpxAsyncClient, UiPathHttpxClient
@@ -64,6 +65,10 @@ class UiPathAnthropic(Anthropic):
         model_name: The Anthropic model name (e.g., "claude-3-5-sonnet-20241022").
         byo_connection_id: Bring Your Own connection ID for custom deployments.
         client_settings: UiPath client settings. Defaults to environment-based settings.
+        default_headers: Additional headers included in every request.
+        captured_headers: Case-insensitive response header name prefixes to capture.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum number of retries for failed requests. Defaults to 0.
         retry_config: Custom retry configuration.
         logger: Logger instance for request/response logging.
         **kwargs: Additional arguments passed to Anthropic client.
@@ -75,6 +80,10 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
+        timeout: float | None = None,
+        max_retries: int = 0,
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
         **kwargs: Any,
@@ -85,12 +94,13 @@ def __init__(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
             api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
+            timeout=timeout,
+            max_retries=max_retries,
+            captured_headers=captured_headers,
             retry_config=retry_config,
             base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
             headers={
-                **kwargs.pop("default_headers", {}),
+                **(default_headers or {}),
                 **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
             },
             logger=logger,
@@ -114,6 +124,10 @@ class UiPathAsyncAnthropic(AsyncAnthropic):
         model_name: The Anthropic model name (e.g., "claude-3-5-sonnet-20241022").
         byo_connection_id: Bring Your Own connection ID for custom deployments.
         client_settings: UiPath client settings. Defaults to environment-based settings.
+        default_headers: Additional headers included in every request.
+        captured_headers: Case-insensitive response header name prefixes to capture.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum number of retries for failed requests. Defaults to 0.
         retry_config: Custom retry configuration.
         logger: Logger instance for request/response logging.
         **kwargs: Additional arguments passed to AsyncAnthropic client.
@@ -125,6 +139,10 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
+        timeout: float | None = None,
+        max_retries: int = 0,
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
         **kwargs: Any,
@@ -135,12 +153,13 @@ def __init__(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
             api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
+            timeout=timeout,
+            max_retries=max_retries,
+            captured_headers=captured_headers,
             retry_config=retry_config,
             base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
             headers={
-                **kwargs.pop("default_headers", {}),
+                **(default_headers or {}),
                 **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
             },
             logger=logger,
@@ -164,6 +183,10 @@ class UiPathAnthropicBedrock(AnthropicBedrock):
         model_name: The Anthropic model name.
         byo_connection_id: Bring Your Own connection ID for custom deployments.
         client_settings: UiPath client settings. Defaults to environment-based settings.
+        default_headers: Additional headers included in every request.
+        captured_headers: Case-insensitive response header name prefixes to capture.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum number of retries for failed requests. Defaults to 0.
         retry_config: Custom retry configuration.
         logger: Logger instance for request/response logging.
         **kwargs: Additional arguments passed to AnthropicBedrock client.
@@ -175,6 +198,10 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
+        timeout: float | None = None,
+        max_retries: int = 0,
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
         **kwargs: Any,
@@ -185,12 +212,13 @@ def __init__(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
             api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
+            timeout=timeout,
+            max_retries=max_retries,
+            captured_headers=captured_headers,
             retry_config=retry_config,
             base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
             headers={
-                **kwargs.pop("default_headers", {}),
+                **(default_headers or {}),
                 **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
             },
             logger=logger,
@@ -216,6 +244,10 @@ class UiPathAsyncAnthropicBedrock(AsyncAnthropicBedrock):
         model_name: The Anthropic model name.
         byo_connection_id: Bring Your Own connection ID for custom deployments.
         client_settings: UiPath client settings. Defaults to environment-based settings.
+        default_headers: Additional headers included in every request.
+        captured_headers: Case-insensitive response header name prefixes to capture.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum number of retries for failed requests. Defaults to 0.
         retry_config: Custom retry configuration.
         logger: Logger instance for request/response logging.
         **kwargs: Additional arguments passed to AsyncAnthropicBedrock client.
@@ -227,6 +259,10 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
+        timeout: float | None = None,
+        max_retries: int = 0,
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
         **kwargs: Any,
@@ -237,12 +273,13 @@ def __init__(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
             api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
+            timeout=timeout,
+            max_retries=max_retries,
+            captured_headers=captured_headers,
             retry_config=retry_config,
             base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
             headers={
-                **kwargs.pop("default_headers", {}),
+                **(default_headers or {}),
                 **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
             },
             logger=logger,
@@ -268,6 +305,10 @@ class UiPathAnthropicVertex(AnthropicVertex):
         model_name: The Anthropic model name.
         byo_connection_id: Bring Your Own connection ID for custom deployments.
         client_settings: UiPath client settings. Defaults to environment-based settings.
+        default_headers: Additional headers included in every request.
+        captured_headers: Case-insensitive response header name prefixes to capture.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum number of retries for failed requests. Defaults to 0.
         retry_config: Custom retry configuration.
         logger: Logger instance for request/response logging.
         **kwargs: Additional arguments passed to AnthropicVertex client.
@@ -279,6 +320,10 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
+        timeout: float | None = None,
+        max_retries: int = 0,
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
         **kwargs: Any,
@@ -289,12 +334,13 @@ def __init__(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
             api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
+            timeout=timeout,
+            max_retries=max_retries,
+            captured_headers=captured_headers,
             retry_config=retry_config,
             base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
             headers={
-                **kwargs.pop("default_headers", {}),
+                **(default_headers or {}),
                 **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
             },
             logger=logger,
@@ -320,6 +366,10 @@ class UiPathAsyncAnthropicVertex(AsyncAnthropicVertex):
         model_name: The Anthropic model name.
         byo_connection_id: Bring Your Own connection ID for custom deployments.
         client_settings: UiPath client settings. Defaults to environment-based settings.
+        default_headers: Additional headers included in every request.
+        captured_headers: Case-insensitive response header name prefixes to capture.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum number of retries for failed requests. Defaults to 0.
         retry_config: Custom retry configuration.
         logger: Logger instance for request/response logging.
         **kwargs: Additional arguments passed to AsyncAnthropicVertex client.
@@ -331,6 +381,10 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
+        timeout: float | None = None,
+        max_retries: int = 0,
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
         **kwargs: Any,
@@ -341,12 +395,13 @@ def __init__(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
             api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
+            timeout=timeout,
+            max_retries=max_retries,
+            captured_headers=captured_headers,
             retry_config=retry_config,
             base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
             headers={
-                **kwargs.pop("default_headers", {}),
+                **(default_headers or {}),
                 **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
             },
             logger=logger,
@@ -372,6 +427,10 @@ class UiPathAnthropicFoundry(AnthropicFoundry):
         model_name: The Anthropic model name.
         byo_connection_id: Bring Your Own connection ID for custom deployments.
         client_settings: UiPath client settings. Defaults to environment-based settings.
+        default_headers: Additional headers included in every request.
+        captured_headers: Case-insensitive response header name prefixes to capture.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum number of retries for failed requests. Defaults to 0.
         retry_config: Custom retry configuration.
         logger: Logger instance for request/response logging.
         **kwargs: Additional arguments passed to AnthropicFoundry client.
@@ -383,6 +442,10 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
+        timeout: float | None = None,
+        max_retries: int = 0,
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
         **kwargs: Any,
@@ -393,12 +456,13 @@ def __init__(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
             api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
+            timeout=timeout,
+            max_retries=max_retries,
+            captured_headers=captured_headers,
             retry_config=retry_config,
             base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
             headers={
-                **kwargs.pop("default_headers", {}),
+                **(default_headers or {}),
                 **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
             },
             logger=logger,
@@ -422,6 +486,10 @@ class UiPathAsyncAnthropicFoundry(AsyncAnthropicFoundry):
         model_name: The Anthropic model name.
         byo_connection_id: Bring Your Own connection ID for custom deployments.
         client_settings: UiPath client settings. Defaults to environment-based settings.
+        default_headers: Additional headers included in every request.
+        captured_headers: Case-insensitive response header name prefixes to capture.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum number of retries for failed requests. Defaults to 0.
         retry_config: Custom retry configuration.
         logger: Logger instance for request/response logging.
         **kwargs: Additional arguments passed to AsyncAnthropicFoundry client.
@@ -433,6 +501,10 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
+        timeout: float | None = None,
+        max_retries: int = 0,
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
         **kwargs: Any,
@@ -443,12 +515,13 @@ def __init__(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
             api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
+            timeout=timeout,
+            max_retries=max_retries,
+            captured_headers=captured_headers,
             retry_config=retry_config,
             base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
             headers={
-                **kwargs.pop("default_headers", {}),
+                **(default_headers or {}),
                 **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
             },
             logger=logger,
diff --git a/src/uipath/llm_client/clients/google/client.py b/src/uipath/llm_client/clients/google/client.py
index 6e1ae88..f8dbdbb 100644
--- a/src/uipath/llm_client/clients/google/client.py
+++ b/src/uipath/llm_client/clients/google/client.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Any
+from collections.abc import Mapping, Sequence
 
 from uipath.llm_client.httpx_client import UiPathHttpxAsyncClient, UiPathHttpxClient
 from uipath.llm_client.settings import (
@@ -27,9 +27,12 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
+        timeout: float | None = None,
+        max_retries: int = 0,
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
         api_config = UiPathAPIConfig(
@@ -40,35 +43,38 @@ def __init__(
             api_version="v1beta1",
             freeze_base_url=True,
         )
+        merged_headers = {
+            **(default_headers or {}),
+            **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
+        }
+        base_url = client_settings.build_base_url(model_name=model_name, api_config=api_config)
+        auth = client_settings.build_auth_pipeline()
+
         httpx_client = UiPathHttpxClient(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
             api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
+            timeout=timeout,
+            max_retries=max_retries,
+            captured_headers=captured_headers,
             retry_config=retry_config,
-            base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
-            headers={
-                **kwargs.pop("default_headers", {}),
-                **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
-            },
+            base_url=base_url,
+            headers=merged_headers,
             logger=logger,
-            auth=client_settings.build_auth_pipeline(),
+            auth=auth,
         )
         httpx_async_client = UiPathHttpxAsyncClient(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
             api_config=api_config,
-            timeout=kwargs.pop("timeout", None),
-            max_retries=kwargs.pop("max_retries", None),
+            timeout=timeout,
+            max_retries=max_retries,
+            captured_headers=captured_headers,
             retry_config=retry_config,
-            base_url=client_settings.build_base_url(model_name=model_name, api_config=api_config),
-            headers={
-                **kwargs.pop("default_headers", {}),
-                **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
-            },
+            base_url=base_url,
+            headers=merged_headers,
             logger=logger,
-            auth=client_settings.build_auth_pipeline(),
+            auth=auth,
         )
         super().__init__(
             api_key="PLACEHOLDER",
diff --git a/src/uipath/llm_client/clients/normalized/__init__.py b/src/uipath/llm_client/clients/normalized/__init__.py
new file mode 100644
index 0000000..43d1031
--- /dev/null
+++ b/src/uipath/llm_client/clients/normalized/__init__.py
@@ -0,0 +1,15 @@
+from uipath.llm_client.clients.normalized.client import (
+    NormalizedCompletions,
+    NormalizedEmbeddings,
+    OutputFormatType,
+    ToolType,
+    UiPathNormalizedClient,
+)
+
+__all__ = [
+    "NormalizedCompletions",
+    "NormalizedEmbeddings",
+    "OutputFormatType",
+    "ToolType",
+    "UiPathNormalizedClient",
+]
diff --git a/src/uipath/llm_client/clients/normalized/client.py b/src/uipath/llm_client/clients/normalized/client.py
new file mode 100644
index 0000000..ede850a
--- /dev/null
+++ b/src/uipath/llm_client/clients/normalized/client.py
@@ -0,0 +1,681 @@
+"""Normalized (provider-agnostic) LLM client for UiPath LLM services.
+
+This module provides a lightweight HTTP client that speaks directly to UiPath's
+normalized API endpoint, offering a consistent OpenAI-compatible interface across
+all supported LLM providers (OpenAI, Google Gemini, Anthropic, etc.).
+
+Unlike the vendor-specific clients (UiPathOpenAI, UiPathAnthropic, UiPathGoogle),
+this client requires no vendor SDK — it makes HTTP requests directly using the
+UiPath httpx transport layer.
+
+Example:
+    >>> from uipath.llm_client.clients.normalized import UiPathNormalizedClient
+    >>>
+    >>> client = UiPathNormalizedClient(model_name="gpt-4o-2024-11-20")
+    >>>
+    >>> # Chat completions
+    >>> response = client.completions.create(
+    ...     messages=[{"role": "user", "content": "Hello!"}]
+    ... )
+    >>> print(response["choices"][0]["message"]["content"])
+    >>>
+    >>> # Structured output with Pydantic
+    >>> from pydantic import BaseModel
+    >>> class Capital(BaseModel):
+    ...     capital: str
+    ...     country: str
+    >>> response = client.completions.create(
+    ...     messages=[{"role": "user", "content": "Capital of France?"}],
+    ...     output_format=Capital,
+    ... )
+    >>>
+    >>> # Embeddings
+    >>> embed_client = UiPathNormalizedClient(model_name="text-embedding-3-large")
+    >>> result = embed_client.embeddings.create(input=["Hello world"])
+    >>> print(result["data"][0]["embedding"])
+"""
+
+import json
+import logging
+from collections.abc import AsyncIterator, Callable, Iterator, Mapping, Sequence
+from typing import Any
+
+from pydantic import BaseModel
+
+from uipath.llm_client.httpx_client import UiPathHttpxAsyncClient, UiPathHttpxClient
+from uipath.llm_client.settings import (
+    UiPathAPIConfig,
+    UiPathBaseSettings,
+    get_default_client_settings,
+)
+from uipath.llm_client.settings.constants import ApiType, RoutingMode
+from uipath.llm_client.utils.retry import RetryConfig
+
+# Type alias for tool definitions accepted by the normalized API.
+# Each tool can be:
+#   - A dict in the flat OpenAI function format: {"name": ..., "description": ..., "parameters": ...}
+#   - A Pydantic BaseModel subclass (auto-converted to the flat format)
+#   - A callable with type annotations (auto-converted via docstring + signature)
+ToolType = dict[str, Any] | type[BaseModel] | Callable[..., Any]
+
+# Type alias for structured output format.
+# Can be:
+#   - A Pydantic BaseModel subclass (auto-converted to json_schema response_format)
+#   - A dict (passed through as-is to the API's response_format field)
+OutputFormatType = type[BaseModel] | dict[str, Any]
+
+
+def _pydantic_to_tool(model: type[BaseModel]) -> dict[str, Any]:
+    """Convert a Pydantic model class to the flat normalized API tool format."""
+    schema = model.model_json_schema()
+    # Remove pydantic-internal keys that aren't part of JSON Schema
+    schema.pop("title", None)
+    return {
+        "name": model.__name__,
+        "description": model.__doc__ or model.__name__,
+        "parameters": schema,
+    }
+
+
+def _callable_to_tool(func: Callable[..., Any]) -> dict[str, Any]:
+    """Convert a callable to the flat normalized API tool format using its signature."""
+    import inspect
+
+    sig = inspect.signature(func)
+    properties: dict[str, Any] = {}
+    required: list[str] = []
+    for name, param in sig.parameters.items():
+        prop: dict[str, Any] = {}
+        if param.annotation is not inspect.Parameter.empty:
+            type_map = {str: "string", int: "integer", float: "number", bool: "boolean"}
+            prop["type"] = type_map.get(param.annotation, "string")
+        else:
+            prop["type"] = "string"
+        properties[name] = prop
+        if param.default is inspect.Parameter.empty:
+            required.append(name)
+
+    return {
+        "name": func.__name__,
+        "description": func.__doc__ or func.__name__,
+        "parameters": {
+            "type": "object",
+            "properties": properties,
+            "required": required,
+        },
+    }
+
+
+def _resolve_tool(tool: ToolType) -> dict[str, Any]:
+    """Convert a tool definition to the flat normalized API format."""
+    if isinstance(tool, dict):
+        return tool
+    if isinstance(tool, type) and issubclass(tool, BaseModel):
+        return _pydantic_to_tool(tool)
+    if callable(tool):
+        return _callable_to_tool(tool)
+    raise TypeError(f"Unsupported tool type: {type(tool)}")
+
+
+def _make_strict_schema(schema: dict[str, Any]) -> dict[str, Any]:
+    """Recursively add ``additionalProperties: false`` to all object schemas.
+
+    Required by the ``strict: true`` mode of the normalized API's ``json_schema``
+    response format.
+    """
+    if schema.get("type") == "object":
+        schema.setdefault("additionalProperties", False)
+    for value in schema.values():
+        if isinstance(value, dict):
+            _make_strict_schema(value)
+        elif isinstance(value, list):
+            for item in value:
+                if isinstance(item, dict):
+                    _make_strict_schema(item)
+    return schema
+
+
+def _resolve_output_format(output_format: OutputFormatType) -> dict[str, Any]:
+    """Convert an output format spec to the API's response_format field."""
+    if isinstance(output_format, dict):
+        return output_format
+    if isinstance(output_format, type) and issubclass(output_format, BaseModel):
+        schema = output_format.model_json_schema()
+        schema.pop("title", None)
+        _make_strict_schema(schema)
+        return {
+            "type": "json_schema",
+            "json_schema": {
+                "name": output_format.__name__,
+                "strict": True,
+                "schema": schema,
+            },
+        }
+    raise TypeError(f"Unsupported output_format type: {type(output_format)}")
+
+
+class NormalizedCompletions:
+    """Chat completions sub-resource for :class:`UiPathNormalizedClient`.
+
+    Provides sync/async chat completion and streaming methods backed by
+    UiPath's normalized (provider-agnostic) completions endpoint.
+
+    Accessed via ``client.completions``.
+    """
+
+    def __init__(
+        self,
+        model_name: str,
+        sync_client: UiPathHttpxClient,
+        async_client: UiPathHttpxAsyncClient,
+    ) -> None:
+        self._model_name = model_name
+        self._sync_client = sync_client
+        self._async_client = async_client
+
+    def _build_body(
+        self,
+        messages: list[dict[str, Any]],
+        *,
+        tools: list[ToolType] | None = None,
+        output_format: OutputFormatType | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        top_p: float | None = None,
+        stop: list[str] | str | None = None,
+        n: int | None = None,
+        presence_penalty: float | None = None,
+        frequency_penalty: float | None = None,
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        body: dict[str, Any] = {"model": self._model_name, "messages": messages}
+
+        if tools is not None:
+            body["tools"] = [_resolve_tool(t) for t in tools]
+        if output_format is not None:
+            body["response_format"] = _resolve_output_format(output_format)
+        if tool_choice is not None:
+            body["tool_choice"] = tool_choice
+        if temperature is not None:
+            body["temperature"] = temperature
+        if max_tokens is not None:
+            body["max_tokens"] = max_tokens
+        if top_p is not None:
+            body["top_p"] = top_p
+        if stop is not None:
+            body["stop"] = stop
+        if n is not None:
+            body["n"] = n
+        if presence_penalty is not None:
+            body["presence_penalty"] = presence_penalty
+        if frequency_penalty is not None:
+            body["frequency_penalty"] = frequency_penalty
+
+        body.update(kwargs)
+        return body
+
+    def create(
+        self,
+        messages: list[dict[str, Any]],
+        *,
+        tools: list[ToolType] | None = None,
+        output_format: OutputFormatType | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        top_p: float | None = None,
+        stop: list[str] | str | None = None,
+        n: int | None = None,
+        presence_penalty: float | None = None,
+        frequency_penalty: float | None = None,
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        """Send a synchronous chat completion request.
+
+        Args:
+            messages: List of message dicts in OpenAI format, e.g.
+                ``[{"role": "user", "content": "Hello!"}]``.
+            tools: Tool definitions for function calling. Each element can be:
+                - A dict in flat format: ``{"name": ..., "description": ..., "parameters": ...}``
+                - A Pydantic ``BaseModel`` subclass (auto-converted)
+                - A callable with type annotations (auto-converted)
+            output_format: Structured output format. Can be:
+                - A Pydantic ``BaseModel`` subclass (auto-converted to ``json_schema``)
+                - A dict passed as-is to ``response_format``
+            tool_choice: Tool selection strategy (e.g., ``"auto"``, ``"required"``,
+                or ``{"type": "tool", "name": "..."}``).
+            temperature: Sampling temperature (0.0–2.0).
+            max_tokens: Maximum tokens in the response.
+            top_p: Nucleus sampling probability mass.
+            stop: Stop sequence(s) to end generation.
+            n: Number of completions to generate.
+            presence_penalty: Penalty for repeated tokens (-2.0 to 2.0).
+            frequency_penalty: Penalty based on token frequency (-2.0 to 2.0).
+            **kwargs: Additional parameters forwarded to the API.
+
+        Returns:
+            The API response as a dict with OpenAI-compatible structure.
+        """
+        body = self._build_body(
+            messages,
+            tools=tools,
+            output_format=output_format,
+            tool_choice=tool_choice,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_p=top_p,
+            stop=stop,
+            n=n,
+            presence_penalty=presence_penalty,
+            frequency_penalty=frequency_penalty,
+            **kwargs,
+        )
+        response = self._sync_client.post("", json=body)
+        response.raise_for_status()
+        return response.json()  # type: ignore[no-any-return]
+
+    async def acreate(
+        self,
+        messages: list[dict[str, Any]],
+        *,
+        tools: list[ToolType] | None = None,
+        output_format: OutputFormatType | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        top_p: float | None = None,
+        stop: list[str] | str | None = None,
+        n: int | None = None,
+        presence_penalty: float | None = None,
+        frequency_penalty: float | None = None,
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        """Send an asynchronous chat completion request.
+
+        Args:
+            messages: List of message dicts in OpenAI format.
+            tools: Tool definitions (see ``create()``).
+            output_format: Structured output format (see ``create()``).
+            tool_choice: Tool selection strategy.
+            temperature: Sampling temperature (0.0–2.0).
+            max_tokens: Maximum tokens in the response.
+            top_p: Nucleus sampling probability mass.
+            stop: Stop sequence(s).
+            n: Number of completions.
+            presence_penalty: Penalty for repeated tokens.
+            frequency_penalty: Penalty based on token frequency.
+            **kwargs: Additional parameters forwarded to the API.
+
+        Returns:
+            The API response as a dict with OpenAI-compatible structure.
+        """
+        body = self._build_body(
+            messages,
+            tools=tools,
+            output_format=output_format,
+            tool_choice=tool_choice,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_p=top_p,
+            stop=stop,
+            n=n,
+            presence_penalty=presence_penalty,
+            frequency_penalty=frequency_penalty,
+            **kwargs,
+        )
+        response = await self._async_client.post("", json=body)
+        response.raise_for_status()
+        return response.json()  # type: ignore[no-any-return]
+
+    def stream(
+        self,
+        messages: list[dict[str, Any]],
+        *,
+        tools: list[ToolType] | None = None,
+        output_format: OutputFormatType | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        top_p: float | None = None,
+        stop: list[str] | str | None = None,
+        n: int | None = None,
+        presence_penalty: float | None = None,
+        frequency_penalty: float | None = None,
+        **kwargs: Any,
+    ) -> Iterator[dict[str, Any]]:
+        """Stream a synchronous chat completion, yielding parsed SSE chunks.
+
+        Args:
+            messages: List of message dicts in OpenAI format.
+            tools: Tool definitions (see ``create()``).
+            output_format: Structured output format (see ``create()``).
+            tool_choice: Tool selection strategy.
+            temperature: Sampling temperature (0.0–2.0).
+            max_tokens: Maximum tokens in the response.
+            top_p: Nucleus sampling probability mass.
+            stop: Stop sequence(s).
+            n: Number of completions.
+            presence_penalty: Penalty for repeated tokens.
+            frequency_penalty: Penalty based on token frequency.
+            **kwargs: Additional parameters forwarded to the API.
+
+        Yields:
+            Parsed JSON dicts for each SSE chunk from the response stream.
+        """
+        body = self._build_body(
+            messages,
+            tools=tools,
+            output_format=output_format,
+            tool_choice=tool_choice,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_p=top_p,
+            stop=stop,
+            n=n,
+            presence_penalty=presence_penalty,
+            frequency_penalty=frequency_penalty,
+            **kwargs,
+        )
+        with self._sync_client.stream("POST", "", json=body) as response:
+            response.raise_for_status()
+            for line in response.iter_lines():
+                if line.startswith("data:"):
+                    line = line[5:].strip()
+                if not line or line == "[DONE]":
+                    continue
+                try:
+                    yield json.loads(line)
+                except json.JSONDecodeError:
+                    continue
+
+    async def astream(
+        self,
+        messages: list[dict[str, Any]],
+        *,
+        tools: list[ToolType] | None = None,
+        output_format: OutputFormatType | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        top_p: float | None = None,
+        stop: list[str] | str | None = None,
+        n: int | None = None,
+        presence_penalty: float | None = None,
+        frequency_penalty: float | None = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[dict[str, Any]]:
+        """Stream an asynchronous chat completion, yielding parsed SSE chunks.
+
+        Args:
+            messages: List of message dicts in OpenAI format.
+            tools: Tool definitions (see ``create()``).
+            output_format: Structured output format (see ``create()``).
+            tool_choice: Tool selection strategy.
+            temperature: Sampling temperature (0.0–2.0).
+            max_tokens: Maximum tokens in the response.
+            top_p: Nucleus sampling probability mass.
+            stop: Stop sequence(s).
+            n: Number of completions.
+            presence_penalty: Penalty for repeated tokens.
+            frequency_penalty: Penalty based on token frequency.
+            **kwargs: Additional parameters forwarded to the API.
+
+        Yields:
+            Parsed JSON dicts for each SSE chunk from the response stream.
+        """
+        body = self._build_body(
+            messages,
+            tools=tools,
+            output_format=output_format,
+            tool_choice=tool_choice,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_p=top_p,
+            stop=stop,
+            n=n,
+            presence_penalty=presence_penalty,
+            frequency_penalty=frequency_penalty,
+            **kwargs,
+        )
+        async with self._async_client.stream("POST", "", json=body) as response:
+            response.raise_for_status()
+            async for line in response.aiter_lines():
+                if line.startswith("data:"):
+                    line = line[5:].strip()
+                if not line or line == "[DONE]":
+                    continue
+                try:
+                    yield json.loads(line)
+                except json.JSONDecodeError:
+                    continue
+
+
+class NormalizedEmbeddings:
+    """Embeddings sub-resource for :class:`UiPathNormalizedClient`.
+
+    Provides sync/async embedding methods backed by UiPath's normalized
+    (provider-agnostic) embeddings endpoint.
+
+    Accessed via ``client.embeddings``.
+
+    Response structure::
+
+        {
+            "data": [{"embedding": [0.1, 0.2, ...], "index": 0}],
+            "usage": {"prompt_tokens": N, "total_tokens": N}
+        }
+    """
+
+    def __init__(
+        self,
+        model_name: str,
+        sync_client: UiPathHttpxClient,
+        async_client: UiPathHttpxAsyncClient,
+    ) -> None:
+        self._model_name = model_name
+        self._sync_client = sync_client
+        self._async_client = async_client
+
+    def _build_body(
+        self,
+        input: str | list[str],
+        *,
+        encoding_format: str | None = None,
+        dimensions: int | None = None,
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        # The normalized embeddings endpoint resolves the model from routing headers;
+        # the body only needs "input" (matching UiPathEmbeddings in the langchain client).
+        # The API requires input to always be a list.
+        body: dict[str, Any] = {"input": [input] if isinstance(input, str) else input}
+        if encoding_format is not None:
+            body["encoding_format"] = encoding_format
+        if dimensions is not None:
+            body["dimensions"] = dimensions
+        body.update(kwargs)
+        return body
+
+    def create(
+        self,
+        input: str | list[str],
+        *,
+        encoding_format: str | None = None,
+        dimensions: int | None = None,
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        """Generate embeddings synchronously.
+
+        Args:
+            input: A single string or list of strings to embed.
+            encoding_format: The format of the returned embeddings
+                (e.g., ``"float"``, ``"base64"``).
+            dimensions: The number of dimensions for the output embeddings
+                (only supported by some models).
+            **kwargs: Additional parameters forwarded to the API.
+
+        Returns:
+            The API response dict. Extract vectors via
+            ``response["data"][i]["embedding"]``.
+        """
+        body = self._build_body(
+            input, encoding_format=encoding_format, dimensions=dimensions, **kwargs
+        )
+        response = self._sync_client.post("", json=body)
+        response.raise_for_status()
+        return response.json()  # type: ignore[no-any-return]
+
+    async def acreate(
+        self,
+        input: str | list[str],
+        *,
+        encoding_format: str | None = None,
+        dimensions: int | None = None,
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        """Generate embeddings asynchronously.
+
+        Args:
+            input: A single string or list of strings to embed.
+            encoding_format: The format of the returned embeddings.
+            dimensions: The number of dimensions for the output embeddings.
+            **kwargs: Additional parameters forwarded to the API.
+
+        Returns:
+            The API response dict. Extract vectors via
+            ``response["data"][i]["embedding"]``.
+        """
+        body = self._build_body(
+            input, encoding_format=encoding_format, dimensions=dimensions, **kwargs
+        )
+        response = await self._async_client.post("", json=body)
+        response.raise_for_status()
+        return response.json()  # type: ignore[no-any-return]
+
+
+class UiPathNormalizedClient:
+    """Provider-agnostic LLM client using UiPath's normalized API.
+
+    Routes requests through UiPath's normalized endpoint, which provides a consistent
+    OpenAI-compatible interface across all supported LLM providers (OpenAI, Google
+    Gemini, Anthropic on Bedrock/Vertex, etc.).
+
+    Unlike the vendor-specific clients (UiPathOpenAI, UiPathAnthropic, UiPathGoogle),
+    this client does not require or wrap a vendor SDK. It communicates directly with
+    the UiPath normalized HTTP endpoint using the shared httpx transport layer.
+
+    Sub-resources:
+        - ``completions``: Chat completion methods (``create``, ``acreate``, ``stream``,
+          ``astream``).
+        - ``embeddings``: Embedding methods (``create``, ``acreate``).
+
+    Args:
+        model_name: The model identifier (e.g., "gpt-4o-2024-11-20", "gemini-2.5-flash",
+            "anthropic.claude-haiku-4-5-20251001-v1:0").
+        byo_connection_id: Bring Your Own connection ID for custom model deployments.
+        client_settings: UiPath client settings. Defaults to environment-based settings.
+        default_headers: Additional headers included in every request.
+        captured_headers: Case-insensitive response header name prefixes to capture.
+            Captured headers are stored in a ContextVar and can be retrieved with
+            ``get_captured_response_headers()``. Defaults to ``("x-uipath-",)``.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum number of retries for failed requests. Defaults to 0.
+        retry_config: Custom retry configuration (backoff strategy, retryable errors).
+        logger: Logger instance for request/response logging.
+
+    Example:
+        >>> client = UiPathNormalizedClient(model_name="gpt-4o-2024-11-20")
+        >>> response = client.completions.create(
+        ...     messages=[{"role": "user", "content": "What is 2+2?"}],
+        ...     temperature=0.0,
+        ... )
+        >>> print(response["choices"][0]["message"]["content"])
+        >>>
+        >>> # Structured output with Pydantic
+        >>> from pydantic import BaseModel
+        >>> class Answer(BaseModel):
+        ...     result: int
+        >>> response = client.completions.create(
+        ...     messages=[{"role": "user", "content": "What is 2+2?"}],
+        ...     output_format=Answer,
+        ... )
+        >>>
+        >>> embed_client = UiPathNormalizedClient(model_name="text-embedding-3-large")
+        >>> result = embed_client.embeddings.create(input="Hello world")
+        >>> print(result["data"][0]["embedding"])
+    """
+
+    completions: NormalizedCompletions
+    embeddings: NormalizedEmbeddings
+
+    def __init__(
+        self,
+        *,
+        model_name: str,
+        byo_connection_id: str | None = None,
+        client_settings: UiPathBaseSettings | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
+        timeout: float | None = None,
+        max_retries: int = 0,
+        retry_config: RetryConfig | None = None,
+        logger: logging.Logger | None = None,
+    ):
+        self.model_name = model_name
+        self.byo_connection_id = byo_connection_id
+
+        client_settings = client_settings or get_default_client_settings()
+
+        auth = client_settings.build_auth_pipeline()
+
+        def _make_httpx_clients(
+            api_config: UiPathAPIConfig,
+        ) -> tuple[UiPathHttpxClient, UiPathHttpxAsyncClient]:
+            merged_headers = {
+                **(default_headers or {}),
+                **client_settings.build_auth_headers(model_name=model_name, api_config=api_config),
+            }
+            base_url = client_settings.build_base_url(model_name=model_name, api_config=api_config)
+            sync_client = UiPathHttpxClient(
+                model_name=model_name,
+                byo_connection_id=byo_connection_id,
+                api_config=api_config,
+                timeout=timeout,
+                max_retries=max_retries,
+                retry_config=retry_config,
+                captured_headers=captured_headers,
+                base_url=base_url,
+                headers=merged_headers,
+                logger=logger,
+                auth=auth,
+            )
+            async_client = UiPathHttpxAsyncClient(
+                model_name=model_name,
+                byo_connection_id=byo_connection_id,
+                api_config=api_config,
+                timeout=timeout,
+                max_retries=max_retries,
+                retry_config=retry_config,
+                captured_headers=captured_headers,
+                base_url=base_url,
+                headers=merged_headers,
+                logger=logger,
+                auth=auth,
+            )
+            return sync_client, async_client
+
+        completions_api_config = UiPathAPIConfig(
+            api_type=ApiType.COMPLETIONS,
+            routing_mode=RoutingMode.NORMALIZED,
+            freeze_base_url=True,
+        )
+        completions_sync, completions_async = _make_httpx_clients(completions_api_config)
+        self.completions = NormalizedCompletions(model_name, completions_sync, completions_async)
+
+        embeddings_api_config = UiPathAPIConfig(
+            api_type=ApiType.EMBEDDINGS,
+            routing_mode=RoutingMode.NORMALIZED,
+            freeze_base_url=True,
+        )
+        embeddings_sync, embeddings_async = _make_httpx_clients(embeddings_api_config)
+        self.embeddings = NormalizedEmbeddings(model_name, embeddings_sync, embeddings_async)
diff --git a/src/uipath/llm_client/clients/openai/client.py b/src/uipath/llm_client/clients/openai/client.py
index cb9b82d..90c6f91 100644
--- a/src/uipath/llm_client/clients/openai/client.py
+++ b/src/uipath/llm_client/clients/openai/client.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Any
+from collections.abc import Mapping, Sequence
 
 from uipath.llm_client.clients.openai.utils import OpenAIRequestHandler
 from uipath.llm_client.httpx_client import UiPathHttpxAsyncClient, UiPathHttpxClient
@@ -23,20 +23,21 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
+        timeout: float | None = None,
+        max_retries: int = 0,
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
-        timeout = kwargs.pop("timeout", None)
-        max_retries = kwargs.pop("max_retries", None)
-        default_headers = kwargs.pop("default_headers", None)
         httpx_client = UiPathHttpxClient(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
             timeout=timeout,
             max_retries=max_retries,
             headers=default_headers,
+            captured_headers=captured_headers,
             retry_config=retry_config,
             logger=logger,
             auth=client_settings.build_auth_pipeline(),
@@ -63,20 +64,21 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
+        timeout: float | None = None,
+        max_retries: int = 0,
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
-        timeout = kwargs.pop("timeout", None)
-        max_retries = kwargs.pop("max_retries", None)
-        default_headers = kwargs.pop("default_headers", None)
         httpx_client = UiPathHttpxAsyncClient(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
             timeout=timeout,
             max_retries=max_retries,
             headers=default_headers,
+            captured_headers=captured_headers,
             retry_config=retry_config,
             logger=logger,
             auth=client_settings.build_auth_pipeline(),
@@ -103,20 +105,21 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
+        timeout: float | None = None,
+        max_retries: int = 0,
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
-        timeout = kwargs.pop("timeout", None)
-        max_retries = kwargs.pop("max_retries", None)
-        default_headers = kwargs.pop("default_headers", None)
         httpx_client = UiPathHttpxClient(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
             timeout=timeout,
             max_retries=max_retries,
             headers=default_headers,
+            captured_headers=captured_headers,
             retry_config=retry_config,
             logger=logger,
             auth=client_settings.build_auth_pipeline(),
@@ -144,20 +147,21 @@ def __init__(
         model_name: str,
         byo_connection_id: str | None = None,
         client_settings: UiPathBaseSettings | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
+        timeout: float | None = None,
+        max_retries: int = 0,
         retry_config: RetryConfig | None = None,
         logger: logging.Logger | None = None,
-        **kwargs: Any,
     ):
         client_settings = client_settings or get_default_client_settings()
-        timeout = kwargs.pop("timeout", None)
-        max_retries = kwargs.pop("max_retries", None)
-        default_headers = kwargs.pop("default_headers", None)
         httpx_client = UiPathHttpxAsyncClient(
             model_name=model_name,
             byo_connection_id=byo_connection_id,
             timeout=timeout,
             max_retries=max_retries,
             headers=default_headers,
+            captured_headers=captured_headers,
             retry_config=retry_config,
             logger=logger,
             auth=client_settings.build_auth_pipeline(),
diff --git a/tests/cassettes.db b/tests/cassettes.db
index f98a4a6..f6d7f0e 100644
Binary files a/tests/cassettes.db and b/tests/cassettes.db differ
diff --git a/tests/core/normalized/__init__.py b/tests/core/normalized/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/core/normalized/test_normalized_client.py b/tests/core/normalized/test_normalized_client.py
new file mode 100644
index 0000000..e484231
--- /dev/null
+++ b/tests/core/normalized/test_normalized_client.py
@@ -0,0 +1,292 @@
+"""Integration tests for UiPathNormalizedClient.
+
+Tests the provider-agnostic normalized client against three providers:
+- OpenAI (gpt-5.2-2025-12-11)
+- Google Gemini (gemini-3-flash-preview)
+- Anthropic on AWS Bedrock (anthropic.claude-haiku-4-5-20251001-v1:0)
+
+Each provider is tested for:
+- Normal chat completion (sync and async)
+- Structured output via JSON schema
+- Tool calling
+- Streaming (sync and async)
+
+Embeddings are tested with text-embedding-3-large and gemini-embedding-001.
+"""
+
+import json
+from typing import Any
+
+import pytest
+from pydantic import BaseModel
+
+from uipath.llm_client.clients.normalized import UiPathNormalizedClient
+from uipath.llm_client.settings import UiPathBaseSettings
+
+COMPLETION_MODELS = [
+    "gpt-5.2-2025-12-11",
+    "gemini-3-flash-preview",
+    "anthropic.claude-haiku-4-5-20251001-v1:0",
+]
+
+EMBEDDING_MODELS = [
+    "text-embedding-3-large",
+    "gemini-embedding-001",
+]
+
+
+# --- Tool definitions: dict, Pydantic, and callable forms ---
+
+WEATHER_TOOL_DICT: dict[str, Any] = {
+    "name": "get_weather",
+    "description": "Get the current weather for a given location.",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "location": {
+                "type": "string",
+                "description": "The city and country, e.g. 'Paris, France'.",
+            },
+        },
+        "required": ["location"],
+    },
+}
+
+
+class GetWeather(BaseModel):
+    """Get the current weather for a given location."""
+
+    location: str
+
+
+def get_weather(location: str) -> str:
+    """Get the current weather for a given location."""
+    return f"Sunny in {location}"
+
+
+# --- Structured output: dict and Pydantic forms ---
+
+CAPITAL_SCHEMA_DICT: dict[str, Any] = {
+    "type": "json_schema",
+    "json_schema": {
+        "name": "CapitalAnswer",
+        "strict": True,
+        "schema": {
+            "type": "object",
+            "properties": {
+                "capital": {"type": "string"},
+                "country": {"type": "string"},
+            },
+            "required": ["capital", "country"],
+            "additionalProperties": False,
+        },
+    },
+}
+
+
+class CapitalAnswer(BaseModel):
+    capital: str
+    country: str
+
+
+@pytest.fixture(params=COMPLETION_MODELS)
+def model_name(request: pytest.FixtureRequest) -> str:
+    return request.param  # type: ignore[no-any-return]
+
+
+@pytest.fixture(params=EMBEDDING_MODELS)
+def embedding_model_name(request: pytest.FixtureRequest) -> str:
+    return request.param  # type: ignore[no-any-return]
+
+
+_DEBUG_HEADERS = {"X-UiPath-LLMGateway-AllowFull4xxResponse": "true"}
+
+
+@pytest.fixture
+def client(model_name: str, client_settings: UiPathBaseSettings) -> UiPathNormalizedClient:
+    return UiPathNormalizedClient(
+        model_name=model_name,
+        client_settings=client_settings,
+        default_headers=_DEBUG_HEADERS,
+    )
+
+
+@pytest.fixture
+def embed_client(
+    embedding_model_name: str, client_settings: UiPathBaseSettings
+) -> UiPathNormalizedClient:
+    return UiPathNormalizedClient(
+        model_name=embedding_model_name,
+        client_settings=client_settings,
+        default_headers=_DEBUG_HEADERS,
+    )
+
+
+@pytest.mark.vcr
+class TestNormalizedClientCompletions:
+    def test_create(self, client: UiPathNormalizedClient) -> None:
+        response = client.completions.create(
+            messages=[{"role": "user", "content": "Say 'hello' and nothing else."}],
+            max_tokens=500,
+        )
+
+        assert "choices" in response
+        assert len(response["choices"]) > 0
+        message = response["choices"][0]["message"]
+        assert message["role"] == "assistant"
+        # Gemini 3 thinking models may omit content when only thinking blocks are present
+        assert message.get("content") or message.get("tool_calls")
+
+    @pytest.mark.asyncio
+    async def test_acreate(self, client: UiPathNormalizedClient) -> None:
+        response = await client.completions.acreate(
+            messages=[{"role": "user", "content": "Say 'hello' and nothing else."}],
+            max_tokens=500,
+        )
+
+        assert "choices" in response
+        assert len(response["choices"]) > 0
+        message = response["choices"][0]["message"]
+        assert message["role"] == "assistant"
+        assert message.get("content") or message.get("tool_calls")
+
+    def test_structured_output_pydantic(self, client: UiPathNormalizedClient) -> None:
+        response = client.completions.create(
+            messages=[{"role": "user", "content": "What is the capital of France?"}],
+            output_format=CapitalAnswer,
+            max_tokens=100,
+        )
+
+        assert "choices" in response
+        content = response["choices"][0]["message"]["content"]
+        assert content
+        data = json.loads(content)
+        assert "capital" in data
+        assert "Paris" in data["capital"]
+
+    @pytest.mark.asyncio
+    async def test_astructured_output_dict(self, client: UiPathNormalizedClient) -> None:
+        response = await client.completions.acreate(
+            messages=[{"role": "user", "content": "What is the capital of France?"}],
+            output_format=CAPITAL_SCHEMA_DICT,
+            max_tokens=100,
+        )
+
+        assert "choices" in response
+        content = response["choices"][0]["message"]["content"]
+        assert content
+        data = json.loads(content)
+        assert "capital" in data
+        assert "Paris" in data["capital"]
+
+    def test_tool_calling_dict(self, client: UiPathNormalizedClient) -> None:
+        response = client.completions.create(
+            messages=[{"role": "user", "content": "What's the weather in Paris?"}],
+            tools=[WEATHER_TOOL_DICT],
+            tool_choice={"type": "auto"},
+            max_tokens=200,
+        )
+
+        assert "choices" in response
+        choice = response["choices"][0]
+        tool_calls = choice["message"].get("tool_calls", [])
+        assert tool_calls, f"Expected tool_calls in response, got: {choice!r}"
+        assert tool_calls[0].get("name") == "get_weather"
+
+    @pytest.mark.asyncio
+    async def test_atool_calling_pydantic(self, client: UiPathNormalizedClient) -> None:
+        response = await client.completions.acreate(
+            messages=[{"role": "user", "content": "What's the weather in Paris?"}],
+            tools=[GetWeather],
+            tool_choice={"type": "auto"},
+            max_tokens=200,
+        )
+
+        assert "choices" in response
+        choice = response["choices"][0]
+        tool_calls = choice["message"].get("tool_calls", [])
+        assert tool_calls, f"Expected tool_calls in response, got: {choice!r}"
+        assert tool_calls[0].get("name") == "GetWeather"
+
+    def test_tool_calling_callable(self, client: UiPathNormalizedClient) -> None:
+        response = client.completions.create(
+            messages=[{"role": "user", "content": "What's the weather in Paris?"}],
+            tools=[get_weather],
+            tool_choice={"type": "auto"},
+            max_tokens=200,
+        )
+
+        assert "choices" in response
+        choice = response["choices"][0]
+        tool_calls = choice["message"].get("tool_calls", [])
+        assert tool_calls, f"Expected tool_calls in response, got: {choice!r}"
+        assert tool_calls[0].get("name") == "get_weather"
+
+    def test_stream(self, client: UiPathNormalizedClient) -> None:
+        chunks = list(
+            client.completions.stream(
+                messages=[{"role": "user", "content": "Say 'hello' and nothing else."}],
+                max_tokens=500,
+            )
+        )
+
+        assert len(chunks) > 0
+        all_content = "".join(
+            chunk.get("choices", [{}])[0].get("delta", {}).get("content", "")
+            or chunk.get("choices", [{}])[0].get("message", {}).get("content", "")
+            for chunk in chunks
+            if chunk.get("choices")
+        )
+        assert all_content
+
+    @pytest.mark.asyncio
+    async def test_astream(self, client: UiPathNormalizedClient) -> None:
+        chunks = []
+        async for chunk in client.completions.astream(
+            messages=[{"role": "user", "content": "Say 'hello' and nothing else."}],
+            max_tokens=500,
+        ):
+            chunks.append(chunk)
+
+        assert len(chunks) > 0
+        all_content = "".join(
+            chunk.get("choices", [{}])[0].get("delta", {}).get("content", "")
+            or chunk.get("choices", [{}])[0].get("message", {}).get("content", "")
+            for chunk in chunks
+            if chunk.get("choices")
+        )
+        assert all_content
+
+
+@pytest.mark.vcr
+class TestNormalizedClientEmbeddings:
+    def test_create(self, embed_client: UiPathNormalizedClient) -> None:
+        response = embed_client.embeddings.create(input="Hello world")
+
+        assert "data" in response
+        assert len(response["data"]) > 0
+        embedding = response["data"][0]["embedding"]
+        assert isinstance(embedding, list)
+        assert len(embedding) > 0
+        assert all(isinstance(v, float) for v in embedding)
+
+    @pytest.mark.asyncio
+    async def test_acreate(self, embed_client: UiPathNormalizedClient) -> None:
+        response = await embed_client.embeddings.acreate(input="Hello world")
+
+        assert "data" in response
+        assert len(response["data"]) > 0
+        embedding = response["data"][0]["embedding"]
+        assert isinstance(embedding, list)
+        assert len(embedding) > 0
+
+    def test_create_batch(self, embed_client: UiPathNormalizedClient) -> None:
+        texts = ["Hello world", "Goodbye world", "How are you?"]
+        response = embed_client.embeddings.create(input=texts)
+
+        assert "data" in response
+        assert len(response["data"]) == len(texts)
+        for item in response["data"]:
+            assert "embedding" in item
+            assert isinstance(item["embedding"], list)
+            assert len(item["embedding"]) > 0