diff --git a/CHANGELOG.md b/CHANGELOG.md
index 34b8f44..12a0427 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,19 @@
 
 All notable changes to `uipath_llm_client` (core package) will be documented in this file.
 
+## [1.7.0] - 2026-04-03
+
+### Added
+- `UiPathNormalizedClient` — provider-agnostic LLM client with no optional dependencies
+  - `client.completions.create/acreate/stream/astream` for chat completions
+  - `client.embeddings.create/acreate` for embeddings
+  - Structured output via `response_format` (Pydantic, TypedDict, dict, json_object)
+  - Tool calling with dicts, Pydantic models, or callables
+  - Streaming with SSE parsing
+  - Full vendor parameter coverage: OpenAI (reasoning, logprobs, logit_bias), Anthropic (thinking, top_k), Google (thinking_level/budget, safety_settings, cached_content)
+  - Typed response models: `ChatCompletion`, `ChatCompletionChunk`, `EmbeddingResponse`
+  - Accepts both dict and Pydantic model messages
+
 ## [1.6.0] - 2026-04-03
 
 ### Fixed
diff --git a/packages/uipath_langchain_client/CHANGELOG.md b/packages/uipath_langchain_client/CHANGELOG.md
index 6ac4201..3674583 100644
--- a/packages/uipath_langchain_client/CHANGELOG.md
+++ b/packages/uipath_langchain_client/CHANGELOG.md
@@ -2,6 +2,13 @@
 
 All notable changes to `uipath_langchain_client` will be documented in this file.
 
+## [1.7.0] - 2026-04-03
+
+### Added
+- `UiPathChat.with_structured_output()` — supports `function_calling`, `json_schema`, and `json_mode` methods
+- `UiPathChat.bind_tools()` — added `parallel_tool_calls` parameter
+- Added vendor-specific parameters to `UiPathChat`: `logit_bias`, `logprobs`, `top_logprobs`, `parallel_tool_calls`, `top_k`, `safety_settings`, `cached_content`, `labels`, `seed`
+
 ## [1.6.0] - 2026-04-03
 
 ### Fixed
diff --git a/packages/uipath_langchain_client/pyproject.toml b/packages/uipath_langchain_client/pyproject.toml
index 93ff0be..ba69f55 100644
--- a/packages/uipath_langchain_client/pyproject.toml
+++ b/packages/uipath_langchain_client/pyproject.toml
@@ -6,7 +6,7 @@ readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
     "langchain>=1.2.13",
-    "uipath-llm-client>=1.5.10",
+    "uipath-llm-client>=1.7.0",
 ]
 
 [project.optional-dependencies]
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py b/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py
index 5149f6e..66e749d 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py
@@ -1,3 +1,3 @@
 __title__ = "UiPath LangChain Client"
 __description__ = "A Python client for interacting with UiPath's LLM services via LangChain."
-__version__ = "1.6.0"
+__version__ = "1.7.0"
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/chat_models.py b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/chat_models.py
index 47e325b..a1d545e 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/chat_models.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/chat_models.py
@@ -25,7 +25,8 @@
 
 import json
 from collections.abc import AsyncGenerator, Callable, Generator, Sequence
-from typing import Any
+from functools import partial
+from typing import Any, Literal, Union, cast
 
 from langchain_core.callbacks import (
     AsyncCallbackManagerForLLMRun,
@@ -44,21 +45,76 @@
     UsageMetadata,
 )
 from langchain_core.messages.utils import convert_to_openai_messages
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_core.output_parsers.openai_tools import (
+    JsonOutputKeyToolsParser,
+    PydanticToolsParser,
+)
 from langchain_core.outputs import (
     ChatGeneration,
     ChatGenerationChunk,
     ChatResult,
 )
-from langchain_core.runnables import Runnable
+from langchain_core.runnables import Runnable, RunnableLambda, RunnablePassthrough
 from langchain_core.tools import BaseTool
 from langchain_core.utils.function_calling import (
     convert_to_openai_function,
+    convert_to_openai_tool,
 )
-from pydantic import Field
+from langchain_core.utils.pydantic import is_basemodel_subclass
+from pydantic import AliasChoices, BaseModel, Field
 
 from uipath_langchain_client.base_client import UiPathBaseChatModel
 from uipath_langchain_client.settings import ApiType, RoutingMode, UiPathAPIConfig
 
+_DictOrPydanticClass = Union[dict[str, Any], type[BaseModel], type]
+_DictOrPydantic = Union[dict[str, Any], BaseModel]
+
+
+def _oai_structured_outputs_parser(ai_msg: AIMessage, schema: type[BaseModel]) -> BaseModel:
+    if not ai_msg.content:
+        raise ValueError("Expected non-empty content from model.")
+    content = ai_msg.content
+    if isinstance(content, list):
+        # Extract the first text block from content parts
+        content = next((c for c in content if isinstance(c, str)), str(content[0]))
+    parsed = json.loads(content)
+    return schema.model_validate(parsed)
+
+
+def _build_normalized_response_format(
+    schema: _DictOrPydanticClass, strict: bool | None = None
+) -> dict[str, Any]:
+    """Build response_format for the normalized API from a schema."""
+    if isinstance(schema, dict):
+        return {"type": "json_schema", "json_schema": schema}
+
+    if isinstance(schema, type) and issubclass(schema, BaseModel):
+        json_schema = schema.model_json_schema()
+        rf: dict[str, Any] = {
+            "type": "json_schema",
+            "json_schema": {
+                "name": schema.__name__,
+                "schema": json_schema,
+            },
+        }
+        if strict is not None:
+            rf["json_schema"]["strict"] = strict
+        return rf
+
+    # TypedDict or other type — convert via openai tool schema
+    tool_schema = convert_to_openai_tool(schema)
+    rf = {
+        "type": "json_schema",
+        "json_schema": {
+            "name": tool_schema["function"]["name"],
+            "schema": tool_schema["function"]["parameters"],
+        },
+    }
+    if strict is not None:
+        rf["json_schema"]["strict"] = strict
+    return rf
+
 
 class UiPathChat(UiPathBaseChatModel):
     """LangChain chat model using UiPath's normalized (provider-agnostic) API.
@@ -101,33 +157,48 @@ class UiPathChat(UiPathBaseChatModel):
         freeze_base_url=True,
     )
 
-    # Standard LLM parameters
-    max_tokens: int | None = None
+    # Common
+    max_tokens: int | None = Field(
+        default=None,
+        validation_alias=AliasChoices("max_tokens", "max_output_tokens", "max_completion_tokens"),
+    )
     temperature: float | None = None
-    stop: list[str] | str | None = Field(default=None, alias="stop_sequences")
+    top_p: float | None = None
+    top_k: int | None = None
+    stop: list[str] | str | None = Field(
+        default=None,
+        validation_alias=AliasChoices("stop", "stop_sequences"),
+    )
+    n: int | None = Field(
+        default=None,
+        validation_alias=AliasChoices("n", "candidate_count"),
+    )
+    frequency_penalty: float | None = None
+    presence_penalty: float | None = None
+    seed: int | None = None
 
-    n: int | None = None  # Number of completions to generate
-    top_p: float | None = None  # Nucleus sampling probability mass
-    presence_penalty: float | None = None  # Penalty for repeated tokens
-    frequency_penalty: float | None = None  # Frequency-based repetition penalty
-    verbosity: str | None = None  # Response verbosity: "low", "medium", or "high"
+    model_kwargs: dict[str, Any] = Field(default_factory=dict)
+    disabled_params: dict[str, Any] | None = None
 
-    model_kwargs: dict[str, Any] = Field(
-        default_factory=dict
-    )  # Additional model-specific parameters
-    disabled_params: dict[str, Any] | None = None  # Parameters to exclude from requests
+    # OpenAI
+    logit_bias: dict[str, int] | None = None
+    logprobs: bool | None = None
+    top_logprobs: int | None = None
+    parallel_tool_calls: bool | None = None
+    reasoning_effort: str | None = None
+    reasoning: dict[str, Any] | None = None
 
-    # OpenAI o1/o3 reasoning parameters
-    reasoning: dict[str, Any] | None = None  # {"effort": "low"|"medium"|"high", "summary": ...}
-    reasoning_effort: str | None = None  # "minimal", "low", "medium", or "high"
+    # Anthropic
+    thinking: dict[str, Any] | None = None
 
-    # Anthropic Claude extended thinking parameters
-    thinking: dict[str, Any] | None = None  # {"type": "enabled"|"disabled", "budget_tokens": N}
+    # Google
+    thinking_level: str | None = None
+    thinking_budget: int | None = None
+    include_thoughts: bool | None = None
+    safety_settings: list[dict[str, Any]] | None = None
 
-    # Google Gemini thinking parameters
-    thinking_level: str | None = None  # Thinking depth level
-    thinking_budget: int | None = None  # Token budget for thinking
-    include_thoughts: bool | None = None  # Include thinking in response
+    # Shared
+    verbosity: str | None = None
 
     @property
     def _llm_type(self) -> str:
@@ -138,20 +209,31 @@ def _llm_type(self) -> str:
     def _default_params(self) -> dict[str, Any]:
         """Get the default parameters for the normalized API request."""
         exclude_if_none = {
-            "frequency_penalty": self.frequency_penalty,
-            "presence_penalty": self.presence_penalty,
-            "top_p": self.top_p,
-            "stop": self.stop or None,  # Also exclude empty list for this
-            "n": self.n,
             "max_tokens": self.max_tokens,
             "temperature": self.temperature,
-            "verbosity": self.verbosity,
-            "reasoning": self.reasoning,
+            "top_p": self.top_p,
+            "top_k": self.top_k,
+            "stop": self.stop or None,
+            "n": self.n,
+            "frequency_penalty": self.frequency_penalty,
+            "presence_penalty": self.presence_penalty,
+            "seed": self.seed,
+            # OpenAI
+            "logit_bias": self.logit_bias,
+            "logprobs": self.logprobs,
+            "top_logprobs": self.top_logprobs,
+            "parallel_tool_calls": self.parallel_tool_calls,
             "reasoning_effort": self.reasoning_effort,
+            "reasoning": self.reasoning,
+            # Anthropic
             "thinking": self.thinking,
+            # Google
             "thinking_level": self.thinking_level,
             "thinking_budget": self.thinking_budget,
             "include_thoughts": self.include_thoughts,
+            "safety_settings": self.safety_settings,
+            # Shared
+            "verbosity": self.verbosity,
         }
 
         return {
@@ -181,6 +263,7 @@ def bind_tools(
         *,
         tool_choice: str | None = None,
         strict: bool | None = None,
+        parallel_tool_calls: bool | None = None,
         **kwargs: Any,
     ) -> Runnable[LanguageModelInput, AIMessage]:
         """Bind tools to the model with automatic tool choice detection."""
@@ -197,7 +280,7 @@ def bind_tools(
             tool_choice = "auto"
 
         if tool_choice in ["required", "auto"]:
-            tool_choice_object = {
+            tool_choice_object: dict[str, Any] = {
                 "type": tool_choice,
             }
         else:
@@ -206,11 +289,113 @@ def bind_tools(
                 "name": tool_choice,
             }
 
-        return super().bind(
-            tools=formatted_tools,
-            tool_choice=tool_choice_object,
+        bind_kwargs: dict[str, Any] = {
+            "tools": formatted_tools,
+            "tool_choice": tool_choice_object,
             **kwargs,
-        )
+        }
+        if parallel_tool_calls is not None:
+            bind_kwargs["parallel_tool_calls"] = parallel_tool_calls
+
+        return super().bind(**bind_kwargs)
+
+    def with_structured_output(
+        self,
+        schema: _DictOrPydanticClass | None = None,
+        *,
+        method: Literal["function_calling", "json_mode", "json_schema"] = "function_calling",
+        include_raw: bool = False,
+        strict: bool | None = None,
+        **kwargs: Any,
+    ) -> Runnable[LanguageModelInput, _DictOrPydantic]:
+        """Model wrapper that returns outputs formatted to match the given schema.
+
+        Args:
+            schema: The output schema as a Pydantic class, TypedDict, JSON Schema dict,
+                or OpenAI function schema.
+            method: Either "json_schema" (uses response_format) or "function_calling"
+                (uses tool calling to force the schema).
+            include_raw: If True, returns dict with 'raw', 'parsed', and 'parsing_error'.
+            strict: If True, model output is guaranteed to match the schema exactly.
+            **kwargs: Additional arguments passed to bind().
+
+        Returns:
+            A Runnable that parses the model output into the given schema.
+        """
+        if schema is None:
+            raise ValueError("schema must be specified.")
+
+        is_pydantic = isinstance(schema, type) and is_basemodel_subclass(schema)
+
+        if method == "function_calling":
+            tool_name = convert_to_openai_tool(schema)["function"]["name"]
+            llm = self.bind_tools(
+                [schema],
+                tool_choice="any",
+                strict=strict,
+                ls_structured_output_format={
+                    "kwargs": {"method": "function_calling", "strict": strict},
+                    "schema": schema,
+                },
+                **kwargs,
+            )
+            if is_pydantic:
+                output_parser: Runnable = PydanticToolsParser(
+                    tools=[schema],  # type: ignore[list-item]
+                    first_tool_only=True,
+                )
+            else:
+                output_parser = JsonOutputKeyToolsParser(key_name=tool_name, first_tool_only=True)
+        elif method == "json_mode":
+            llm = self.bind(
+                response_format={"type": "json_object"},
+                ls_structured_output_format={
+                    "kwargs": {"method": method},
+                    "schema": schema,
+                },
+                **kwargs,
+            )
+            if is_pydantic:
+                from langchain_core.output_parsers import PydanticOutputParser
+
+                output_parser = PydanticOutputParser(pydantic_object=schema)  # type: ignore[arg-type]
+            else:
+                output_parser = JsonOutputParser()
+        elif method == "json_schema":
+            response_format = _build_normalized_response_format(schema, strict=strict)
+            llm = self.bind(
+                response_format=response_format,
+                ls_structured_output_format={
+                    "kwargs": {"method": method, "strict": strict},
+                    "schema": convert_to_openai_tool(schema),
+                },
+                **kwargs,
+            )
+            if is_pydantic:
+                output_parser = RunnableLambda(
+                    partial(_oai_structured_outputs_parser, schema=cast(type, schema))
+                ).with_types(output_type=cast(type, schema))
+            else:
+                output_parser = JsonOutputParser()
+        else:
+            raise ValueError(
+                f"Unrecognized method: '{method}'. "
+                "Expected 'function_calling', 'json_mode', or 'json_schema'."
+            )
+
+        if include_raw:
+            parser_assign = RunnablePassthrough.assign(
+                parsed=lambda x: output_parser.invoke(x["raw"]),
+                parsing_error=lambda _: None,
+            )
+            parser_none = RunnablePassthrough.assign(
+                parsed=lambda _: None,
+            )
+            parser_with_fallback = parser_assign.with_fallbacks(
+                [parser_none], exception_key="parsing_error"
+            )
+            return RunnablePassthrough.assign(raw=llm) | parser_with_fallback  # type: ignore[return-value]
+        return llm | output_parser  # type: ignore[return-value]
 
     def _preprocess_request(
         self, messages: list[BaseMessage], stop: list[str] | None = None, **kwargs: Any
diff --git a/src/uipath/llm_client/__init__.py b/src/uipath/llm_client/__init__.py
index b437cef..a42a286 100644
--- a/src/uipath/llm_client/__init__.py
+++ b/src/uipath/llm_client/__init__.py
@@ -28,6 +28,7 @@
 """
 
 from uipath.llm_client.__version__ import __version__
+from uipath.llm_client.clients.normalized import UiPathNormalizedClient
 from uipath.llm_client.httpx_client import (
     UiPathHttpxAsyncClient,
     UiPathHttpxClient,
@@ -60,6 +61,8 @@
     "get_default_client_settings",
     "PlatformSettings",
     "LLMGatewaySettings",
+    # Normalized client
+    "UiPathNormalizedClient",
     # HTTPX clients
     "UiPathHttpxClient",
     "UiPathHttpxAsyncClient",
diff --git a/src/uipath/llm_client/__version__.py b/src/uipath/llm_client/__version__.py
index c5bf7a8..cbd0256 100644
--- a/src/uipath/llm_client/__version__.py
+++ b/src/uipath/llm_client/__version__.py
@@ -1,3 +1,3 @@
 __title__ = "UiPath LLM Client"
 __description__ = "A Python client for interacting with UiPath's LLM services."
-__version__ = "1.6.0"
+__version__ = "1.7.0"
diff --git a/src/uipath/llm_client/clients/normalized/__init__.py b/src/uipath/llm_client/clients/normalized/__init__.py
new file mode 100644
index 0000000..11bf16d
--- /dev/null
+++ b/src/uipath/llm_client/clients/normalized/__init__.py
@@ -0,0 +1,46 @@
+"""UiPath Normalized Client - Provider-agnostic LLM client.
+
+No optional dependencies required. Works with the base uipath-llm-client package.
+"""
+
+from uipath.llm_client.clients.normalized.client import UiPathNormalizedClient
+from uipath.llm_client.clients.normalized.completions import (
+    MessageType,
+    ResponseFormatType,
+    ToolChoiceType,
+    ToolType,
+)
+from uipath.llm_client.clients.normalized.types import (
+    ChatCompletion,
+    ChatCompletionChunk,
+    Choice,
+    Delta,
+    EmbeddingData,
+    EmbeddingResponse,
+    Message,
+    StreamChoice,
+    ToolCall,
+    ToolCallChunk,
+    Usage,
+)
+
+__all__ = [
+    "UiPathNormalizedClient",
+    # Input types
+    "MessageType",
+    "ToolType",
+    "ToolChoiceType",
+    "ResponseFormatType",
+    # Response types
+    "ChatCompletion",
+    "ChatCompletionChunk",
+    "Choice",
+    "Delta",
+    "EmbeddingData",
+    "EmbeddingResponse",
+    "Message",
+    "StreamChoice",
+    "ToolCall",
+    "ToolCallChunk",
+    "Usage",
+]
diff --git a/src/uipath/llm_client/clients/normalized/client.py b/src/uipath/llm_client/clients/normalized/client.py
new file mode 100644
index 0000000..43ddf5a
--- /dev/null
+++ b/src/uipath/llm_client/clients/normalized/client.py
@@ -0,0 +1,184 @@
+"""UiPath Normalized Client.
+
+A provider-agnostic LLM client that uses UiPath's normalized API to provide
+a consistent interface across all supported providers (OpenAI, Google, Anthropic, etc.).
+
+No optional dependencies required - works with the base uipath-llm-client package.
+
+Example:
+    >>> from uipath.llm_client.clients.normalized import UiPathNormalizedClient
+    >>>
+    >>> client = UiPathNormalizedClient(model_name="gpt-4o-2024-11-20")
+    >>>
+    >>> # Chat completion
+    >>> response = client.completions.create(
+    ...     messages=[{"role": "user", "content": "Hello!"}],
+    ... )
+    >>> print(response.choices[0].message.content)
+    >>>
+    >>> # Streaming
+    >>> for chunk in client.completions.stream(
+    ...     messages=[{"role": "user", "content": "Hello!"}],
+    ... ):
+    ...     print(chunk.choices[0].delta.content, end="")
+    >>>
+    >>> # Async
+    >>> response = await client.completions.acreate(
+    ...     messages=[{"role": "user", "content": "Hello!"}],
+    ... )
+    >>>
+    >>> # Structured output
+    >>> from pydantic import BaseModel
+    >>> class Answer(BaseModel):
+    ...     text: str
+    ...     confidence: float
+    >>>
+    >>> response = client.completions.create(
+    ...     messages=[{"role": "user", "content": "What is 2+2?"}],
+    ...     output_format=Answer,
+    ... )
+    >>> print(response.choices[0].message.parsed)  # Answer(text='4', confidence=1.0)
+    >>>
+    >>> # Embeddings
+    >>> response = client.embeddings.create(input=["Hello world"])
+    >>> print(len(response.data[0].embedding))
+"""
+
+import logging
+from collections.abc import Mapping, Sequence
+from functools import cached_property
+
+from uipath.llm_client.clients.normalized.completions import Completions
+from uipath.llm_client.clients.normalized.embeddings import Embeddings
+from uipath.llm_client.clients.utils import build_httpx_async_client, build_httpx_client
+from uipath.llm_client.httpx_client import UiPathHttpxAsyncClient, UiPathHttpxClient
+from uipath.llm_client.settings import UiPathBaseSettings, get_default_client_settings
+from uipath.llm_client.settings.base import UiPathAPIConfig
+from uipath.llm_client.settings.constants import ApiType, RoutingMode
+from uipath.llm_client.utils.retry import RetryConfig
+
+
+class UiPathNormalizedClient:
+    """Provider-agnostic LLM client using UiPath's normalized API.
+
+    Routes requests through UiPath's LLM Gateway using the normalized API,
+    which provides a consistent interface across all supported LLM providers.
+    No vendor-specific SDK dependencies are required.
+
+    Namespaces:
+        - ``completions``: ``create``, ``acreate``, ``stream``, ``astream``
+        - ``embeddings``: ``create``, ``acreate``
+
+    Args:
+        model_name: The model name (e.g., "gpt-4o-2024-11-20", "gemini-2.5-flash").
+        byo_connection_id: Bring Your Own connection ID for custom deployments.
+        client_settings: UiPath client settings. Defaults to environment-based settings.
+        timeout: Client-side request timeout in seconds.
+        max_retries: Maximum retry attempts for failed requests.
+        default_headers: Additional headers to include in requests.
+        captured_headers: Response header prefixes to capture (case-insensitive).
+        retry_config: Custom retry configuration.
+        logger: Logger instance for request/response logging.
+
+    Example:
+        >>> client = UiPathNormalizedClient(model_name="gpt-4o-2024-11-20")
+        >>> response = client.completions.create(
+        ...     messages=[{"role": "user", "content": "Hello!"}],
+        ... )
+    """
+
+    def __init__(
+        self,
+        *,
+        model_name: str,
+        byo_connection_id: str | None = None,
+        client_settings: UiPathBaseSettings | None = None,
+        timeout: float | None = None,
+        max_retries: int | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        captured_headers: Sequence[str] = ("x-uipath-",),
+        retry_config: RetryConfig | None = None,
+        logger: logging.Logger | None = None,
+    ):
+        self._model_name = model_name
+        self._byo_connection_id = byo_connection_id
+        self._client_settings = client_settings or get_default_client_settings()
+        self._timeout = timeout
+        self._max_retries = max_retries
+        self._default_headers = default_headers
+        self._captured_headers = captured_headers
+        self._retry_config = retry_config
+        self._logger = logger
+
+        self._completions_api_config = UiPathAPIConfig(
+            api_type=ApiType.COMPLETIONS,
+            routing_mode=RoutingMode.NORMALIZED,
+            freeze_base_url=True,
+        )
+        self._embeddings_api_config = UiPathAPIConfig(
+            api_type=ApiType.EMBEDDINGS,
+            routing_mode=RoutingMode.NORMALIZED,
+            freeze_base_url=True,
+        )
+
+    # ------------------------------------------------------------------
+    # HTTP clients (lazily created)
+    # ------------------------------------------------------------------
+
+    def _build_sync(self, api_config: UiPathAPIConfig) -> UiPathHttpxClient:
+        return build_httpx_client(
+            model_name=self._model_name,
+            byo_connection_id=self._byo_connection_id,
+            client_settings=self._client_settings,
+            timeout=self._timeout,
+            max_retries=self._max_retries,
+            default_headers=self._default_headers,
+            captured_headers=self._captured_headers,
+            retry_config=self._retry_config,
+            logger=self._logger,
+            api_config=api_config,
+        )
+
+    def _build_async(self, api_config: UiPathAPIConfig) -> UiPathHttpxAsyncClient:
+        return build_httpx_async_client(
+            model_name=self._model_name,
+            byo_connection_id=self._byo_connection_id,
+            client_settings=self._client_settings,
+            timeout=self._timeout,
+            max_retries=self._max_retries,
+            default_headers=self._default_headers,
+            captured_headers=self._captured_headers,
+            retry_config=self._retry_config,
+            logger=self._logger,
+            api_config=api_config,
+        )
+
+    @cached_property
+    def _sync_client(self) -> UiPathHttpxClient:
+        return self._build_sync(self._completions_api_config)
+
+    @cached_property
+    def _async_client(self) -> UiPathHttpxAsyncClient:
+        return self._build_async(self._completions_api_config)
+
+    @cached_property
+    def _embedding_sync_client(self) -> UiPathHttpxClient:
+        return self._build_sync(self._embeddings_api_config)
+
+    @cached_property
+    def _embedding_async_client(self) -> UiPathHttpxAsyncClient:
+        return self._build_async(self._embeddings_api_config)
+
+    # ------------------------------------------------------------------
+    # Public namespaces
+    # ------------------------------------------------------------------
+
+    @cached_property
+    def completions(self) -> Completions:
+        """Chat completions namespace (``create``, ``acreate``, ``stream``, ``astream``)."""
+        return Completions(self)
+
+    @cached_property
+    def embeddings(self) -> Embeddings:
+        """Embeddings namespace (``create``, ``acreate``)."""
+        return Embeddings(self)
diff --git a/src/uipath/llm_client/clients/normalized/completions.py b/src/uipath/llm_client/clients/normalized/completions.py
new file mode 100644
index 0000000..35b91a3
--- /dev/null
+++ b/src/uipath/llm_client/clients/normalized/completions.py
@@ -0,0 +1,727 @@
+"""Completions endpoint for the UiPath Normalized API."""
+
+from __future__ import annotations
+
+import json
+from collections.abc import AsyncGenerator, Callable, Generator, Sequence
+from typing import Any, Union, get_args, get_origin, get_type_hints
+
+from pydantic import BaseModel
+
+from uipath.llm_client.clients.normalized.types import (
+    ChatCompletion,
+    ChatCompletionChunk,
+    Choice,
+    Delta,
+    Message,
+    StreamChoice,
+    ToolCall,
+    ToolCallChunk,
+    Usage,
+)
+
+try:
+    from typing import is_typeddict
+except ImportError:
+    from typing_extensions import is_typeddict
+
+# ---------------------------------------------------------------------------
+# Public input types
+# ---------------------------------------------------------------------------
+
+ResponseFormatType = Union[type[BaseModel], type, dict[str, Any]]
+"""Response format: Pydantic model, TypedDict, or raw dict (e.g. {"type": "json_object"})."""
+
+ToolType = Union[dict[str, Any], type[BaseModel], Callable[..., Any]]
+"""Tool definition: dict (raw schema), Pydantic model, or callable."""
+
+ToolChoiceType = Union[str, dict[str, Any]]
+"""Tool choice: "auto", "required", "none", a tool name, or a dict."""
+
+MessageType = Union[dict[str, Any], BaseModel]
+"""A single message: dict with role/content or a Pydantic model with those fields."""
+
+
+def _normalize_messages(messages: Sequence[MessageType]) -> list[dict[str, Any]]:
+    """Convert a sequence of messages (dicts or pydantic models) to dicts."""
+    result: list[dict[str, Any]] = []
+    for msg in messages:
+        if isinstance(msg, dict):
+            result.append(msg)
+        elif isinstance(msg, BaseModel):
+            result.append(msg.model_dump(exclude_none=True))
+        else:
+            result.append(dict(msg))  # type: ignore[arg-type]
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Schema helpers
+# ---------------------------------------------------------------------------
+
+
+def _json_schema_from_type(tp: type) -> dict[str, Any]:
+    origin = get_origin(tp)
+    if origin is list:
+        args = get_args(tp)
+        return {"type": "array", "items": _json_schema_from_type(args[0]) if args else {}}
+    if origin is dict:
+        return {"type": "object"}
+    simple = {str: "string", int: "integer", float: "number", bool: "boolean"}
+    return {"type": simple.get(tp, "object")}
+
+
+def _build_response_format(
+    response_format: ResponseFormatType, strict: bool | None = None
+) -> dict[str, Any]:
+    if isinstance(response_format, dict):
+        if "type" in response_format:
+            return response_format
+        return {"type": "json_schema", "json_schema": response_format}
+
+    if isinstance(response_format, type) and issubclass(response_format, BaseModel):
+        js: dict[str, Any] = {
+            "name": response_format.__name__,
+            "schema": response_format.model_json_schema(),
+        }
+        if strict is not False:
+            js["strict"] = True
+        return {"type": "json_schema", "json_schema": js}
+
+    if isinstance(response_format, type) and is_typeddict(response_format):
+        hints = get_type_hints(response_format)
+        properties = {name: _json_schema_from_type(tp) for name, tp in hints.items()}
+        js = {
+            "name": response_format.__name__,
+            "schema": {
+                "type": "object",
+                "properties": properties,
+                "required": list(properties.keys()),
+                "additionalProperties": False,
+            },
+        }
+        if strict is not False:
+            js["strict"] = True
+        return {"type": "json_schema", "json_schema": js}
+
+    if isinstance(response_format, type):
+        js = {
+            "name": response_format.__name__,
+            "schema": _json_schema_from_type(response_format),
+        }
+        if strict is True:
+            js["strict"] = True
+        return {"type": "json_schema", "json_schema": js}
+
+    raise TypeError(f"Unsupported response_format type: {type(response_format)}")
+
+
+# ---------------------------------------------------------------------------
+# Tool helpers
+# ---------------------------------------------------------------------------
+
+
+def _build_tool_definition(tool: ToolType) -> dict[str, Any]:
+    if isinstance(tool, dict):
+        return tool
+
+    if isinstance(tool, type) and issubclass(tool, BaseModel):
+        schema = tool.model_json_schema()
+        schema.pop("title", None)
+        return {"name": tool.__name__, "description": tool.__doc__ or "", "parameters": schema}
+
+    if callable(tool):
+        import inspect
+
+        sig = inspect.signature(tool)
+        hints = get_type_hints(tool)
+        properties = {name: _json_schema_from_type(hints.get(name, str)) for name in sig.parameters}
+        required = [
+            name for name, p in sig.parameters.items() if p.default is inspect.Parameter.empty
+        ]
+        return {
+            "name": tool.__name__,
+            "description": tool.__doc__ or "",
+            "parameters": {"type": "object", "properties": properties, "required": required},
+        }
+
+    raise TypeError(f"Unsupported tool type: {type(tool)}")
+
+
+def _resolve_tool_choice(
+    tool_choice: ToolChoiceType, tools: list[dict[str, Any]]
+) -> dict[str, Any] | str:
+    if isinstance(tool_choice, dict):
+        return tool_choice
+    if tool_choice in ("auto", "required", "none"):
+        return tool_choice
+    tool_names = [t.get("name", "") for t in tools]
+    if tool_choice in tool_names:
+        return {"type": "tool", "name": tool_choice}
+    return "auto"
+
+
+# ---------------------------------------------------------------------------
+# Response parsing
+# ---------------------------------------------------------------------------
+
+
+def _parse_tool_call(raw: dict[str, Any]) -> ToolCall:
+    arguments = raw.get("arguments", {})
+    if isinstance(arguments, str):
+        try:
+            arguments = json.loads(arguments)
+        except json.JSONDecodeError:
+            arguments = {}
+    return ToolCall(id=raw.get("id", ""), name=raw.get("name", ""), arguments=arguments)
+
+
+def _parse_tool_call_chunk(raw: dict[str, Any]) -> ToolCallChunk:
+    if "function" in raw:
+        name = raw["function"].get("name", "")
+        args = raw["function"].get("arguments", "")
+    else:
+        name = raw.get("name", "")
+        args = raw.get("arguments", "")
+    if isinstance(args, dict):
+        args = json.dumps(args) if args else ""
+    return ToolCallChunk(id=raw.get("id", ""), name=name, arguments=args, index=raw.get("index", 0))
+
+
+def _parse_structured_output(content: str, response_format: ResponseFormatType) -> Any:
+    try:
+        parsed_json = json.loads(content)
+    except json.JSONDecodeError:
+        return None
+    if isinstance(response_format, type) and issubclass(response_format, BaseModel):
+        return response_format.model_validate(parsed_json)
+    return parsed_json
+
+
+def _parse_response(
+    data: dict[str, Any], response_format: ResponseFormatType | None = None
+) -> ChatCompletion:
+    usage = Usage(**data.get("usage", {}))
+    choices: list[Choice] = []
+    for choice_data in data.get("choices", []):
+        msg_data = choice_data.get("message", {})
+        tool_calls = [_parse_tool_call(tc) for tc in msg_data.get("tool_calls", [])]
+        content = msg_data.get("content", "")
+        parsed = (
+            _parse_structured_output(content, response_format)
+            if response_format and content
+            else None
+        )
+        message = Message(
+            role=msg_data.get("role", "assistant"),
+            content=content,
+            tool_calls=tool_calls,
+            signature=msg_data.get("signature"),
+            thinking=msg_data.get("thinking"),
+            parsed=parsed,
+        )
+        choices.append(
+            Choice(
+                index=choice_data.get("index", 0),
+                message=message,
+                finish_reason=choice_data.get("finish_reason"),
+                avg_logprobs=choice_data.get("avg_logprobs"),
+            )
+        )
+    return ChatCompletion(
+        id=data.get("id", ""),
+        object=data.get("object", ""),
+        created=data.get("created", 0),
+        model=data.get("model", ""),
+        choices=choices,
+        usage=usage,
+    )
+
+
+def _parse_stream_chunk(data: dict[str, Any]) -> ChatCompletionChunk:
+    usage = Usage(**data["usage"]) if data.get("usage") else None
+    choices: list[StreamChoice] = []
+    for choice_data in data.get("choices", []):
+        delta_data = choice_data.get("delta", choice_data.get("message", {}))
+        tool_calls = [_parse_tool_call_chunk(tc) for tc in delta_data.get("tool_calls", [])]
+        delta = Delta(
+            role=delta_data.get("role"),
+            content=delta_data.get("content", ""),
+            tool_calls=tool_calls,
+        )
+        choices.append(
+            StreamChoice(
+                index=choice_data.get("index", 0),
+                delta=delta,
+                finish_reason=choice_data.get("finish_reason"),
+                avg_logprobs=choice_data.get("avg_logprobs"),
+            )
+        )
+    return ChatCompletionChunk(
+        id=data.get("id", ""),
+        object=data.get("object", ""),
+        created=data.get("created", 0),
+        model=data.get("model", ""),
+        choices=choices,
+        usage=usage,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Request building
+# ---------------------------------------------------------------------------
+
+
+def _build_request(
+    *,
+    messages: Sequence[MessageType],
+    stream: bool = False,
+    tools: Sequence[ToolType] | None = None,
+    tool_choice: ToolChoiceType | None = None,
+    response_format: ResponseFormatType | None = None,
+    strict: bool | None = None,
+    # Common
+    max_tokens: int | None = None,
+    temperature: float | None = None,
+    top_p: float | None = None,
+    top_k: int | None = None,
+    stop: list[str] | str | None = None,
+    n: int | None = None,
+    frequency_penalty: float | None = None,
+    presence_penalty: float | None = None,
+    seed: int | None = None,
+    # OpenAI
+    logit_bias: dict[str, int] | None = None,
+    logprobs: bool | None = None,
+    top_logprobs: int | None = None,
+    parallel_tool_calls: bool | None = None,
+    # OpenAI reasoning (o1/o3/gpt-5)
+    reasoning_effort: str | None = None,
+    reasoning: dict[str, Any] | None = None,
+    # Anthropic
+    thinking: dict[str, Any] | None = None,
+    # Google
+    thinking_level: str | None = None,
+    thinking_budget: int | None = None,
+    include_thoughts: bool | None = None,
+    safety_settings: list[dict[str, Any]] | None = None,
+    # Shared
+    verbosity: str | None = None,
+    # Aliases (resolve to canonical names above)
+    stop_sequences: list[str] | None = None,
+    max_output_tokens: int | None = None,
+    max_completion_tokens: int | None = None,
+    candidate_count: int | None = None,
+    **kwargs: Any,
+) -> dict[str, Any]:
+    """Build the request body for a chat completion."""
+    # Resolve aliases
+    max_tokens = max_tokens or max_output_tokens or max_completion_tokens
+    stop = stop or stop_sequences
+    n = n or candidate_count
+
+    body: dict[str, Any] = {"messages": _normalize_messages(messages)}
+
+    if stream:
+        body["stream"] = True
+
+    optional: dict[str, Any] = {
+        "max_tokens": max_tokens,
+        "temperature": temperature,
+        "top_p": top_p,
+        "top_k": top_k,
+        "stop": stop,
+        "n": n,
+        "frequency_penalty": frequency_penalty,
+        "presence_penalty": presence_penalty,
+        "seed": seed,
+        "logit_bias": logit_bias,
+        "logprobs": logprobs,
+        "top_logprobs": top_logprobs,
+        "parallel_tool_calls": parallel_tool_calls,
+        "reasoning_effort": reasoning_effort,
+        "reasoning": reasoning,
+        "thinking": thinking,
+        "thinking_level": thinking_level,
+        "thinking_budget": thinking_budget,
+        "include_thoughts": include_thoughts,
+        "safety_settings": safety_settings,
+        "verbosity": verbosity,
+    }
+    body.update({k: v for k, v in optional.items() if v is not None})
+
+    if tools is not None:
+        body["tools"] = [_build_tool_definition(t) for t in tools]
+        if tool_choice is not None:
+            body["tool_choice"] = _resolve_tool_choice(tool_choice, body["tools"])
+
+    if response_format is not None:
+        body["response_format"] = _build_response_format(response_format, strict=strict)
+
+    body.update(kwargs)
+    return body
+
+
+# ---------------------------------------------------------------------------
+# SSE helpers
+# ---------------------------------------------------------------------------
+
+
+def _iter_sse(lines: Generator[str, None, None]) -> Generator[dict[str, Any], None, None]:
+    for line in lines:
+        line = line.strip()
+        if line.startswith("data:"):
+            line = line[len("data:") :].strip()
+        try:
+            data = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if "id" in data and not data["id"]:
+            continue
+        yield data
+
+
+async def _aiter_sse(lines: AsyncGenerator[str, None]) -> AsyncGenerator[dict[str, Any], None]:
+    async for line in lines:
+        line = line.strip()
+        if line.startswith("data:"):
+            line = line[len("data:") :].strip()
+        try:
+            data = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if "id" in data and not data["id"]:
+            continue
+        yield data
+
+
+# ---------------------------------------------------------------------------
+# Completions namespace
+# ---------------------------------------------------------------------------
+
+
+class Completions:
+    """``client.completions`` — create, acreate, stream, astream."""
+
+    def __init__(self, client: Any) -> None:
+        self._client = client
+
+    def create(
+        self,
+        *,
+        messages: Sequence[MessageType],
+        tools: Sequence[ToolType] | None = None,
+        tool_choice: ToolChoiceType | None = None,
+        response_format: ResponseFormatType | None = None,
+        strict: bool | None = None,
+        # Common
+        max_tokens: int | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        top_k: int | None = None,
+        stop: list[str] | str | None = None,
+        n: int | None = None,
+        frequency_penalty: float | None = None,
+        presence_penalty: float | None = None,
+        seed: int | None = None,
+        # OpenAI
+        logit_bias: dict[str, int] | None = None,
+        logprobs: bool | None = None,
+        top_logprobs: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        reasoning_effort: str | None = None,
+        reasoning: dict[str, Any] | None = None,
+        # Anthropic
+        thinking: dict[str, Any] | None = None,
+        # Google
+        thinking_level: str | None = None,
+        thinking_budget: int | None = None,
+        include_thoughts: bool | None = None,
+        safety_settings: list[dict[str, Any]] | None = None,
+        # Shared
+        verbosity: str | None = None,
+        # Aliases
+        stop_sequences: list[str] | None = None,
+        max_output_tokens: int | None = None,
+        max_completion_tokens: int | None = None,
+        candidate_count: int | None = None,
+        **kwargs: Any,
+    ) -> ChatCompletion:
+        """Create a chat completion (sync)."""
+        body = _build_request(
+            messages=messages,
+            tools=tools,
+            tool_choice=tool_choice,
+            response_format=response_format,
+            strict=strict,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k,
+            stop=stop,
+            n=n,
+            frequency_penalty=frequency_penalty,
+            presence_penalty=presence_penalty,
+            seed=seed,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            parallel_tool_calls=parallel_tool_calls,
+            reasoning_effort=reasoning_effort,
+            reasoning=reasoning,
+            thinking=thinking,
+            thinking_level=thinking_level,
+            thinking_budget=thinking_budget,
+            include_thoughts=include_thoughts,
+            safety_settings=safety_settings,
+            verbosity=verbosity,
+            stop_sequences=stop_sequences,
+            max_output_tokens=max_output_tokens,
+            max_completion_tokens=max_completion_tokens,
+            candidate_count=candidate_count,
+            **kwargs,
+        )
+        response = self._client._sync_client.request("POST", "/", json=body)
+        response.raise_for_status()
+        return _parse_response(response.json(), response_format=response_format)
+
+    async def acreate(
+        self,
+        *,
+        messages: Sequence[MessageType],
+        tools: Sequence[ToolType] | None = None,
+        tool_choice: ToolChoiceType | None = None,
+        response_format: ResponseFormatType | None = None,
+        strict: bool | None = None,
+        # Common
+        max_tokens: int | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        top_k: int | None = None,
+        stop: list[str] | str | None = None,
+        n: int | None = None,
+        frequency_penalty: float | None = None,
+        presence_penalty: float | None = None,
+        seed: int | None = None,
+        # OpenAI
+        logit_bias: dict[str, int] | None = None,
+        logprobs: bool | None = None,
+        top_logprobs: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        reasoning_effort: str | None = None,
+        reasoning: dict[str, Any] | None = None,
+        # Anthropic
+        thinking: dict[str, Any] | None = None,
+        # Google
+        thinking_level: str | None = None,
+        thinking_budget: int | None = None,
+        include_thoughts: bool | None = None,
+        safety_settings: list[dict[str, Any]] | None = None,
+        # Shared
+        verbosity: str | None = None,
+        # Aliases
+        stop_sequences: list[str] | None = None,
+        max_output_tokens: int | None = None,
+        max_completion_tokens: int | None = None,
+        candidate_count: int | None = None,
+        **kwargs: Any,
+    ) -> ChatCompletion:
+        """Create a chat completion (async)."""
+        body = _build_request(
+            messages=messages,
+            tools=tools,
+            tool_choice=tool_choice,
+            response_format=response_format,
+            strict=strict,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k,
+            stop=stop,
+            n=n,
+            frequency_penalty=frequency_penalty,
+            presence_penalty=presence_penalty,
+            seed=seed,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            parallel_tool_calls=parallel_tool_calls,
+            reasoning_effort=reasoning_effort,
+            reasoning=reasoning,
+            thinking=thinking,
+            thinking_level=thinking_level,
+            thinking_budget=thinking_budget,
+            include_thoughts=include_thoughts,
+            safety_settings=safety_settings,
+            verbosity=verbosity,
+            stop_sequences=stop_sequences,
+            max_output_tokens=max_output_tokens,
+            max_completion_tokens=max_completion_tokens,
+            candidate_count=candidate_count,
+            **kwargs,
+        )
+        response = await self._client._async_client.request("POST", "/", json=body)
+        response.raise_for_status()
+        return _parse_response(response.json(), response_format=response_format)
+
+    def stream(
+        self,
+        *,
+        messages: Sequence[MessageType],
+        tools: Sequence[ToolType] | None = None,
+        tool_choice: ToolChoiceType | None = None,
+        response_format: ResponseFormatType | None = None,
+        strict: bool | None = None,
+        # Common
+        max_tokens: int | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        top_k: int | None = None,
+        stop: list[str] | str | None = None,
+        n: int | None = None,
+        frequency_penalty: float | None = None,
+        presence_penalty: float | None = None,
+        seed: int | None = None,
+        # OpenAI
+        logit_bias: dict[str, int] | None = None,
+        logprobs: bool | None = None,
+        top_logprobs: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        reasoning_effort: str | None = None,
+        reasoning: dict[str, Any] | None = None,
+        # Anthropic
+        thinking: dict[str, Any] | None = None,
+        # Google
+        thinking_level: str | None = None,
+        thinking_budget: int | None = None,
+        include_thoughts: bool | None = None,
+        safety_settings: list[dict[str, Any]] | None = None,
+        # Shared
+        verbosity: str | None = None,
+        # Aliases
+        stop_sequences: list[str] | None = None,
+        max_output_tokens: int | None = None,
+        max_completion_tokens: int | None = None,
+        candidate_count: int | None = None,
+        **kwargs: Any,
+    ) -> Generator[ChatCompletionChunk, None, None]:
+        """Stream chat completion chunks (sync)."""
+        body = _build_request(
+            messages=messages,
+            stream=True,
+            tools=tools,
+            tool_choice=tool_choice,
+            response_format=response_format,
+            strict=strict,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k,
+            stop=stop,
+            n=n,
+            frequency_penalty=frequency_penalty,
+            presence_penalty=presence_penalty,
+            seed=seed,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            parallel_tool_calls=parallel_tool_calls,
+            reasoning_effort=reasoning_effort,
+            reasoning=reasoning,
+            thinking=thinking,
+            thinking_level=thinking_level,
+            thinking_budget=thinking_budget,
+            include_thoughts=include_thoughts,
+            safety_settings=safety_settings,
+            verbosity=verbosity,
+            stop_sequences=stop_sequences,
+            max_output_tokens=max_output_tokens,
+            max_completion_tokens=max_completion_tokens,
+            candidate_count=candidate_count,
+            **kwargs,
+        )
+        with self._client._sync_client.stream("POST", "/", json=body) as response:
+            response.raise_for_status()
+            for data in _iter_sse(response.iter_lines()):
+                yield _parse_stream_chunk(data)
+
+    async def astream(
+        self,
+        *,
+        messages: Sequence[MessageType],
+        tools: Sequence[ToolType] | None = None,
+        tool_choice: ToolChoiceType | None = None,
+        response_format: ResponseFormatType | None = None,
+        strict: bool | None = None,
+        # Common
+        max_tokens: int | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        top_k: int | None = None,
+        stop: list[str] | str | None = None,
+        n: int | None = None,
+        frequency_penalty: float | None = None,
+        presence_penalty: float | None = None,
+        seed: int | None = None,
+        # OpenAI
+        logit_bias: dict[str, int] | None = None,
+        logprobs: bool | None = None,
+        top_logprobs: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        reasoning_effort: str | None = None,
+        reasoning: dict[str, Any] | None = None,
+        # Anthropic
+        thinking: dict[str, Any] | None = None,
+        # Google
+        thinking_level: str | None = None,
+        thinking_budget: int | None = None,
+        include_thoughts: bool | None = None,
+        safety_settings: list[dict[str, Any]] | None = None,
+        # Shared
+        verbosity: str | None = None,
+        # Aliases
+        stop_sequences: list[str] | None = None,
+        max_output_tokens: int | None = None,
+        max_completion_tokens: int | None = None,
+        candidate_count: int | None = None,
+        **kwargs: Any,
+    ) -> AsyncGenerator[ChatCompletionChunk, None]:
+        """Stream chat completion chunks (async)."""
+        body = _build_request(
+            messages=messages,
+            stream=True,
+            tools=tools,
+            tool_choice=tool_choice,
+            response_format=response_format,
+            strict=strict,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k,
+            stop=stop,
+            n=n,
+            frequency_penalty=frequency_penalty,
+            presence_penalty=presence_penalty,
+            seed=seed,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            parallel_tool_calls=parallel_tool_calls,
+            reasoning_effort=reasoning_effort,
+            reasoning=reasoning,
+            thinking=thinking,
+            thinking_level=thinking_level,
+            thinking_budget=thinking_budget,
+            include_thoughts=include_thoughts,
+            safety_settings=safety_settings,
+            verbosity=verbosity,
+            stop_sequences=stop_sequences,
+            max_output_tokens=max_output_tokens,
+            max_completion_tokens=max_completion_tokens,
+            candidate_count=candidate_count,
+            **kwargs,
+        )
+        async with self._client._async_client.stream("POST", "/", json=body) as response:
+            response.raise_for_status()
+            async for data in _aiter_sse(response.aiter_lines()):
+                yield _parse_stream_chunk(data)
diff --git a/src/uipath/llm_client/clients/normalized/embeddings.py b/src/uipath/llm_client/clients/normalized/embeddings.py
new file mode 100644
index 0000000..9caf92c
--- /dev/null
+++ b/src/uipath/llm_client/clients/normalized/embeddings.py
@@ -0,0 +1,94 @@
+"""Embeddings endpoint for the UiPath Normalized API.
+
+Provides synchronous and asynchronous methods for generating text embeddings.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from uipath.llm_client.clients.normalized.types import (
+    EmbeddingData,
+    EmbeddingResponse,
+    Usage,
+)
+
+
+def _parse_embedding_response(data: dict[str, Any]) -> EmbeddingResponse:
+    """Parse an embedding response from the API."""
+    usage_data = data.get("usage", {})
+    embeddings = [
+        EmbeddingData(
+            embedding=item.get("embedding", []),
+            index=item.get("index", i),
+        )
+        for i, item in enumerate(data.get("data", []))
+    ]
+    return EmbeddingResponse(
+        data=embeddings,
+        model=data.get("model", ""),
+        usage=Usage(**usage_data),
+    )
+
+
+class Embeddings:
+    """Embeddings namespace with ``create`` and ``acreate``.
+
+    Handles request building and response parsing for the UiPath normalized
+    embeddings API.
+
+    Example:
+        >>> response = client.embeddings.create(input=["Hello world"])
+        >>> print(response.data[0].embedding[:5])
+        >>>
+        >>> response = await client.embeddings.acreate(input=["Hello world"])
+    """
+
+    def __init__(self, client: Any) -> None:
+        self._client = client
+
+    def create(
+        self,
+        *,
+        input: str | list[str],
+        **kwargs: Any,
+    ) -> EmbeddingResponse:
+        """Create embeddings (sync).
+
+        Args:
+            input: A string or list of strings to embed.
+            **kwargs: Additional parameters for the API.
+
+        Returns:
+            EmbeddingResponse with embedding vectors.
+        """
+        if isinstance(input, str):
+            input = [input]
+
+        body: dict[str, Any] = {"input": input, **kwargs}
+        response = self._client._embedding_sync_client.request("POST", "/", json=body)
+        response.raise_for_status()
+        return _parse_embedding_response(response.json())
+
+    async def acreate(
+        self,
+        *,
+        input: str | list[str],
+        **kwargs: Any,
+    ) -> EmbeddingResponse:
+        """Create embeddings (async).
+
+        Args:
+            input: A string or list of strings to embed.
+            **kwargs: Additional parameters for the API.
+
+        Returns:
+            EmbeddingResponse with embedding vectors.
+        """
+        if isinstance(input, str):
+            input = [input]
+
+        body: dict[str, Any] = {"input": input, **kwargs}
+        response = await self._client._embedding_async_client.request("POST", "/", json=body)
+        response.raise_for_status()
+        return _parse_embedding_response(response.json())
diff --git a/src/uipath/llm_client/clients/normalized/types.py b/src/uipath/llm_client/clients/normalized/types.py
new file mode 100644
index 0000000..aeaa372
--- /dev/null
+++ b/src/uipath/llm_client/clients/normalized/types.py
@@ -0,0 +1,87 @@
+"""Response types for the UiPath Normalized API."""
+
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+
+class ToolCall(BaseModel):
+    id: str = ""
+    name: str = ""
+    arguments: dict[str, Any] = Field(default_factory=dict)
+
+
+class ToolCallChunk(BaseModel):
+    id: str = ""
+    name: str = ""
+    arguments: str = ""
+    index: int = 0
+
+
+class Usage(BaseModel):
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    total_tokens: int = 0
+    cache_read_input_tokens: int = 0
+    cache_creation_input_tokens: int = 0
+    thoughts_tokens: int = 0
+    request_processing_tier: str | None = None
+
+
+class Message(BaseModel):
+    role: str = "assistant"
+    content: str | None = ""
+    tool_calls: list[ToolCall] = Field(default_factory=list)
+    signature: str | None = None
+    thinking: str | None = None
+    # Structured output (populated client-side when output_format is used)
+    parsed: Any = None
+
+
+class Delta(BaseModel):
+    role: str | None = None
+    content: str | None = ""
+    tool_calls: list[ToolCallChunk] = Field(default_factory=list)
+
+
+class Choice(BaseModel):
+    index: int = 0
+    message: Message = Field(default_factory=Message)
+    finish_reason: str | None = None
+    avg_logprobs: float | None = None
+
+
+class StreamChoice(BaseModel):
+    index: int = 0
+    delta: Delta = Field(default_factory=Delta)
+    finish_reason: str | None = None
+    avg_logprobs: float | None = None
+
+
+class ChatCompletion(BaseModel):
+    id: str = ""
+    object: str = ""
+    created: int = 0
+    model: str = ""
+    choices: list[Choice] = Field(default_factory=list)
+    usage: Usage = Field(default_factory=Usage)
+
+
+class ChatCompletionChunk(BaseModel):
+    id: str = ""
+    object: str = ""
+    created: int | str = 0
+    model: str = ""
+    choices: list[StreamChoice] = Field(default_factory=list)
+    usage: Usage | None = None
+
+
+class EmbeddingData(BaseModel):
+    embedding: list[float] = Field(default_factory=list)
+    index: int = 0
+
+
+class EmbeddingResponse(BaseModel):
+    data: list[EmbeddingData] = Field(default_factory=list)
+    model: str = ""
+    usage: Usage = Field(default_factory=Usage)
diff --git a/tests/cassettes.db b/tests/cassettes.db
index f98a4a6..7caa33b 100644
Binary files a/tests/cassettes.db and b/tests/cassettes.db differ
diff --git a/tests/core/core_smoke_test.py b/tests/core/core_smoke_test.py
index deb72dd..17b44db 100644
--- a/tests/core/core_smoke_test.py
+++ b/tests/core/core_smoke_test.py
@@ -20,6 +20,7 @@ def test_main_package_imports():
         RetryConfig,
         UiPathHttpxAsyncClient,
         UiPathHttpxClient,
+        UiPathNormalizedClient,
         __version__,
         get_default_client_settings,
     )
@@ -47,6 +48,10 @@ def test_main_package_imports():
     assert RetryConfig is not None, "RetryConfig should be importable"
     print("  RetryConfig is importable")
 
+    # Verify normalized client is a type
+    assert isinstance(UiPathNormalizedClient, type), "UiPathNormalizedClient should be a class"
+    print("  UiPathNormalizedClient is importable")
+
     print("  Main package imports OK")
 
 
@@ -414,6 +419,47 @@ def test_google_client_inheritance():
     print("  Google client inheritance OK")
 
 
+def test_normalized_client_imports():
+    """Test that normalized client and its types can be imported."""
+    print("Testing normalized client imports...")
+
+    from uipath.llm_client.clients.normalized import (
+        ChatCompletion,
+        ChatCompletionChunk,
+        Choice,
+        Delta,
+        EmbeddingData,
+        EmbeddingResponse,
+        Message,
+        StreamChoice,
+        ToolCall,
+        ToolCallChunk,
+        UiPathNormalizedClient,
+        Usage,
+    )
+
+    # Verify all are types
+    types = [
+        UiPathNormalizedClient,
+        ChatCompletion,
+        ChatCompletionChunk,
+        Choice,
+        Delta,
+        EmbeddingData,
+        EmbeddingResponse,
+        Message,
+        StreamChoice,
+        ToolCall,
+        ToolCallChunk,
+        Usage,
+    ]
+    for t in types:
+        assert isinstance(t, type), f"{t.__name__} should be a class"
+
+    print(f"  All {len(types)} normalized client types are importable")
+    print("  Normalized client imports OK")
+
+
 def test_uipath_api_config():
     """Test UiPathAPIConfig can be instantiated with valid configurations."""
     print("Testing UiPathAPIConfig instantiation...")
@@ -463,6 +509,7 @@ def main():
         test_httpx_client_module_imports,
         test_exceptions_module_imports,
         test_retry_module_imports,
+        test_normalized_client_imports,
         test_openai_client_imports,
         test_anthropic_client_imports,
         test_google_client_imports,
diff --git a/tests/core/test_normalized_client.py b/tests/core/test_normalized_client.py
new file mode 100644
index 0000000..397db1d
--- /dev/null
+++ b/tests/core/test_normalized_client.py
@@ -0,0 +1,1386 @@
+"""Tests for the normalized client module.
+
+This module tests:
+1. UiPathNormalizedClient initialization and client creation
+2. Completions.create (sync, non-streaming)
+3. Completions.stream (sync, streaming)
+4. Completions.acreate (async, non-streaming)
+5. Tool calling (tool definition building, tool_choice resolution)
+6. Structured output (Pydantic, TypedDict, dict schemas)
+7. Embeddings.create and Embeddings.acreate
+8. Response type parsing (ChatCompletion, ChatCompletionChunk, EmbeddingResponse)
+"""
+
+import json
+from typing import TypedDict
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from pydantic import BaseModel
+
+from uipath.llm_client.clients.normalized import (
+    ChatCompletion,
+    ChatCompletionChunk,
+    Choice,
+    Delta,
+    EmbeddingData,
+    EmbeddingResponse,
+    Message,
+    ToolCall,
+    ToolCallChunk,
+    UiPathNormalizedClient,
+    Usage,
+)
+from uipath.llm_client.clients.normalized.completions import (
+    Completions,
+    _build_request,
+    _build_response_format,
+    _build_tool_definition,
+    _parse_response,
+    _parse_stream_chunk,
+    _parse_structured_output,
+    _parse_tool_call,
+    _parse_tool_call_chunk,
+    _resolve_tool_choice,
+)
+from uipath.llm_client.clients.normalized.embeddings import _parse_embedding_response
+from uipath.llm_client.settings.utils import SingletonMeta
+
+# ============================================================================
+# Fixtures
+# ============================================================================
+
+_CLIENT_MODULE = "uipath.llm_client.clients.normalized.client"
+
+
+@pytest.fixture(autouse=True)
+def clear_singleton_instances():
+    """Clear singleton instances before each test to ensure isolation."""
+    SingletonMeta._instances.clear()
+    yield
+    SingletonMeta._instances.clear()
+
+
+@pytest.fixture
+def mock_settings():
+    settings = MagicMock()
+    settings.build_base_url.return_value = "https://gateway.uipath.com/llm/v1"
+    settings.build_auth_headers.return_value = {"Authorization": "Bearer test-token"}
+    settings.build_auth_pipeline.return_value = None
+    return settings
+
+
+@pytest.fixture
+def mock_sync_client():
+    client = MagicMock()
+    return client
+
+
+@pytest.fixture
+def mock_async_client():
+    client = AsyncMock()
+    return client
+
+
+# ============================================================================
+# Response parsing helpers
+# ============================================================================
+
+SAMPLE_COMPLETION_RESPONSE = {
+    "id": "chatcmpl-123",
+    "created": 1234567890,
+    "model": "gpt-4o",
+    "choices": [
+        {
+            "index": 0,
+            "message": {
+                "role": "assistant",
+                "content": "Hello! How can I help you?",
+            },
+            "finish_reason": "stop",
+        }
+    ],
+    "usage": {
+        "prompt_tokens": 10,
+        "completion_tokens": 8,
+        "total_tokens": 18,
+    },
+}
+
+SAMPLE_TOOL_CALL_RESPONSE = {
+    "id": "chatcmpl-456",
+    "created": 1234567890,
+    "model": "gpt-4o",
+    "choices": [
+        {
+            "index": 0,
+            "message": {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_abc123",
+                        "name": "get_weather",
+                        "arguments": {"city": "London"},
+                    }
+                ],
+            },
+            "finish_reason": "tool_calls",
+        }
+    ],
+    "usage": {
+        "prompt_tokens": 15,
+        "completion_tokens": 20,
+        "total_tokens": 35,
+    },
+}
+
+SAMPLE_STREAM_CHUNKS = [
+    {
+        "id": "chatcmpl-789",
+        "created": 1234567890,
+        "model": "gpt-4o",
+        "choices": [
+            {
+                "index": 0,
+                "delta": {"role": "assistant", "content": "Hello"},
+                "finish_reason": None,
+            }
+        ],
+    },
+    {
+        "id": "chatcmpl-789",
+        "created": 1234567890,
+        "model": "gpt-4o",
+        "choices": [
+            {
+                "index": 0,
+                "delta": {"content": " world!"},
+                "finish_reason": None,
+            }
+        ],
+    },
+    {
+        "id": "chatcmpl-789",
+        "created": 1234567890,
+        "model": "gpt-4o",
+        "choices": [
+            {
+                "index": 0,
+                "delta": {},
+                "finish_reason": "stop",
+            }
+        ],
+        "usage": {
+            "prompt_tokens": 5,
+            "completion_tokens": 3,
+            "total_tokens": 8,
+        },
+    },
+]
+
+SAMPLE_EMBEDDING_RESPONSE = {
+    "data": [
+        {"embedding": [0.1, 0.2, 0.3], "index": 0},
+        {"embedding": [0.4, 0.5, 0.6], "index": 1},
+    ],
+    "model": "text-embedding-ada-002",
+    "usage": {"prompt_tokens": 5, "total_tokens": 5},
+}
+
+
+# ============================================================================
+# Test: Response type parsing
+# ============================================================================
+
+
+class TestParseResponse:
+    def test_basic_completion(self):
+        result = _parse_response(SAMPLE_COMPLETION_RESPONSE)
+        assert isinstance(result, ChatCompletion)
+        assert result.id == "chatcmpl-123"
+        assert result.model == "gpt-4o"
+        assert len(result.choices) == 1
+        assert result.choices[0].message.content == "Hello! How can I help you?"
+        assert result.choices[0].finish_reason == "stop"
+        assert result.usage.prompt_tokens == 10
+        assert result.usage.completion_tokens == 8
+        assert result.usage.total_tokens == 18
+
+    def test_tool_call_response(self):
+        result = _parse_response(SAMPLE_TOOL_CALL_RESPONSE)
+        assert len(result.choices[0].message.tool_calls) == 1
+        tc = result.choices[0].message.tool_calls[0]
+        assert tc.id == "call_abc123"
+        assert tc.name == "get_weather"
+        assert tc.arguments == {"city": "London"}
+
+    def test_empty_response(self):
+        result = _parse_response({"choices": [], "usage": {}})
+        assert len(result.choices) == 0
+        assert result.usage.prompt_tokens == 0
+
+    def test_tool_call_with_string_arguments(self):
+        data = {
+            "choices": [
+                {
+                    "message": {
+                        "tool_calls": [
+                            {
+                                "id": "call_1",
+                                "name": "func",
+                                "arguments": '{"key": "value"}',
+                            }
+                        ]
+                    }
+                }
+            ],
+            "usage": {},
+        }
+        result = _parse_response(data)
+        tc = result.choices[0].message.tool_calls[0]
+        assert tc.arguments == {"key": "value"}
+
+    def test_tool_call_with_invalid_json_arguments(self):
+        data = {
+            "choices": [
+                {
+                    "message": {
+                        "tool_calls": [
+                            {
+                                "id": "call_1",
+                                "name": "func",
+                                "arguments": "not json",
+                            }
+                        ]
+                    }
+                }
+            ],
+            "usage": {},
+        }
+        result = _parse_response(data)
+        tc = result.choices[0].message.tool_calls[0]
+        assert tc.arguments == {}
+
+
+class TestParseStreamChunk:
+    def test_content_chunk(self):
+        result = _parse_stream_chunk(SAMPLE_STREAM_CHUNKS[0])
+        assert isinstance(result, ChatCompletionChunk)
+        assert result.id == "chatcmpl-789"
+        assert len(result.choices) == 1
+        assert result.choices[0].delta.content == "Hello"
+        assert result.choices[0].delta.role == "assistant"
+
+    def test_chunk_with_usage(self):
+        result = _parse_stream_chunk(SAMPLE_STREAM_CHUNKS[2])
+        assert result.usage is not None
+        assert result.usage.prompt_tokens == 5
+        assert result.choices[0].finish_reason == "stop"
+
+    def test_chunk_without_usage(self):
+        result = _parse_stream_chunk(SAMPLE_STREAM_CHUNKS[0])
+        assert result.usage is None
+
+    def test_stream_tool_call_chunk(self):
+        data = {
+            "id": "chatcmpl-tc",
+            "choices": [
+                {
+                    "delta": {
+                        "tool_calls": [
+                            {
+                                "id": "call_1",
+                                "name": "get_weather",
+                                "arguments": '{"city":',
+                                "index": 0,
+                            }
+                        ]
+                    }
+                }
+            ],
+        }
+        result = _parse_stream_chunk(data)
+        assert len(result.choices[0].delta.tool_calls) == 1
+        tc = result.choices[0].delta.tool_calls[0]
+        assert tc.name == "get_weather"
+        assert tc.arguments == '{"city":'
+
+    def test_stream_tool_call_with_function_format(self):
+        data = {
+            "id": "chatcmpl-tc",
+            "choices": [
+                {
+                    "delta": {
+                        "tool_calls": [
+                            {
+                                "id": "call_1",
+                                "function": {
+                                    "name": "get_weather",
+                                    "arguments": '{"city": "Paris"}',
+                                },
+                                "index": 0,
+                            }
+                        ]
+                    }
+                }
+            ],
+        }
+        result = _parse_stream_chunk(data)
+        tc = result.choices[0].delta.tool_calls[0]
+        assert tc.name == "get_weather"
+        assert tc.arguments == '{"city": "Paris"}'
+
+
+class TestParseEmbeddingResponse:
+    def test_basic_embedding(self):
+        result = _parse_embedding_response(SAMPLE_EMBEDDING_RESPONSE)
+        assert isinstance(result, EmbeddingResponse)
+        assert len(result.data) == 2
+        assert result.data[0].embedding == [0.1, 0.2, 0.3]
+        assert result.data[1].embedding == [0.4, 0.5, 0.6]
+        assert result.model == "text-embedding-ada-002"
+        assert result.usage.prompt_tokens == 5
+
+    def test_empty_embedding(self):
+        result = _parse_embedding_response({"data": [], "usage": {}})
+        assert len(result.data) == 0
+
+
+# ============================================================================
+# Test: Structured output
+# ============================================================================
+
+
+class TestBuildResponseFormat:
+    def test_pydantic_model(self):
+        class MyModel(BaseModel):
+            name: str
+            age: int
+
+        result = _build_response_format(MyModel)
+        assert result["type"] == "json_schema"
+        assert result["json_schema"]["name"] == "MyModel"
+        assert result["json_schema"]["strict"] is True
+        assert "properties" in result["json_schema"]["schema"]
+
+    def test_typed_dict(self):
+        class MyDict(TypedDict):
+            name: str
+            score: float
+
+        result = _build_response_format(MyDict)
+        assert result["type"] == "json_schema"
+        assert result["json_schema"]["name"] == "MyDict"
+        assert result["json_schema"]["strict"] is True
+        schema = result["json_schema"]["schema"]
+        assert schema["type"] == "object"
+        assert "name" in schema["properties"]
+        assert "score" in schema["properties"]
+        assert schema["properties"]["name"]["type"] == "string"
+        assert schema["properties"]["score"]["type"] == "number"
+
+    def test_dict_schema(self):
+        schema = {
+            "name": "my_schema",
+            "schema": {"type": "object", "properties": {"x": {"type": "integer"}}},
+        }
+        result = _build_response_format(schema)
+        assert result["type"] == "json_schema"
+        assert result["json_schema"] == schema
+
+    def test_unsupported_type(self):
+        with pytest.raises(TypeError, match="Unsupported response_format"):
+            _build_response_format("not a type")  # type: ignore[arg-type]
+
+
+class TestParseStructuredOutput:
+    def test_parse_pydantic(self):
+        class Answer(BaseModel):
+            text: str
+            score: float
+
+        content = '{"text": "hello", "score": 0.9}'
+        result = _parse_structured_output(content, Answer)
+        assert isinstance(result, Answer)
+        assert result.text == "hello"
+        assert result.score == 0.9
+
+    def test_parse_dict(self):
+        content = '{"key": "value"}'
+        result = _parse_structured_output(content, {"type": "object"})
+        assert result == {"key": "value"}
+
+    def test_parse_invalid_json(self):
+        result = _parse_structured_output("not json", str)
+        assert result is None
+
+    def test_response_with_structured_output(self):
+        class Answer(BaseModel):
+            text: str
+
+        data = {
+            "choices": [
+                {
+                    "message": {
+                        "content": '{"text": "hello"}',
+                    }
+                }
+            ],
+            "usage": {},
+        }
+        result = _parse_response(data, response_format=Answer)
+        assert result.choices[0].message.parsed is not None
+        assert isinstance(result.choices[0].message.parsed, Answer)
+        assert result.choices[0].message.parsed.text == "hello"
+
+    def test_response_without_structured_output(self):
+        data = {
+            "choices": [
+                {
+                    "message": {
+                        "content": "plain text",
+                    }
+                }
+            ],
+            "usage": {},
+        }
+        result = _parse_response(data)
+        assert result.choices[0].message.parsed is None
+
+
+# ============================================================================
+# Test: Tool definition building
+# ============================================================================
+
+
+class TestBuildToolDefinition:
+    def test_dict_passthrough(self):
+        tool = {"name": "my_tool", "description": "does stuff", "parameters": {}}
+        result = _build_tool_definition(tool)
+        assert result is tool
+
+    def test_pydantic_model(self):
+        class WeatherInput(BaseModel):
+            """Get weather for a city."""
+
+            city: str
+            units: str = "celsius"
+
+        result = _build_tool_definition(WeatherInput)
+        assert result["name"] == "WeatherInput"
+        assert result["description"] == "Get weather for a city."
+        assert "properties" in result["parameters"]
+        assert "city" in result["parameters"]["properties"]
+
+    def test_callable(self):
+        def get_weather(city: str, units: str = "celsius") -> str:
+            """Get weather for a city."""
+            return f"Weather in {city}"
+
+        result = _build_tool_definition(get_weather)
+        assert result["name"] == "get_weather"
+        assert result["description"] == "Get weather for a city."
+        assert "city" in result["parameters"]["properties"]
+        assert "city" in result["parameters"]["required"]
+        assert "units" not in result["parameters"]["required"]
+
+    def test_unsupported_type(self):
+        with pytest.raises(TypeError, match="Unsupported tool type"):
+            _build_tool_definition(42)  # type: ignore[arg-type]
+
+
+class TestToolChoiceResolution:
+    def test_auto(self):
+        result = _resolve_tool_choice("auto", [])
+        assert result == "auto"
+
+    def test_required(self):
+        result = _resolve_tool_choice("required", [])
+        assert result == "required"
+
+    def test_none(self):
+        result = _resolve_tool_choice("none", [])
+        assert result == "none"
+
+    def test_specific_tool(self):
+        tools = [{"name": "get_weather"}, {"name": "search"}]
+        result = _resolve_tool_choice("get_weather", tools)
+        assert result == {"type": "tool", "name": "get_weather"}
+
+    def test_unknown_becomes_auto(self):
+        result = _resolve_tool_choice("unknown_tool", [{"name": "other"}])
+        assert result == "auto"
+
+    def test_dict_passthrough(self):
+        choice = {"type": "required"}
+        result = _resolve_tool_choice(choice, [])
+        assert result is choice
+
+
+# ============================================================================
+# Test: Tool call parsing
+# ============================================================================
+
+
+class TestParseToolCall:
+    def test_basic(self):
+        tc = _parse_tool_call({"id": "call_1", "name": "func", "arguments": {"x": 1}})
+        assert tc.id == "call_1"
+        assert tc.name == "func"
+        assert tc.arguments == {"x": 1}
+
+    def test_string_arguments(self):
+        tc = _parse_tool_call({"id": "call_1", "name": "func", "arguments": '{"x": 1}'})
+        assert tc.arguments == {"x": 1}
+
+    def test_invalid_string_arguments(self):
+        tc = _parse_tool_call({"id": "call_1", "name": "func", "arguments": "not json"})
+        assert tc.arguments == {}
+
+
+class TestParseToolCallChunk:
+    def test_flat_format(self):
+        tc = _parse_tool_call_chunk(
+            {"id": "call_1", "name": "func", "arguments": '{"x":', "index": 0}
+        )
+        assert tc.name == "func"
+        assert tc.arguments == '{"x":'
+
+    def test_function_format(self):
+        tc = _parse_tool_call_chunk(
+            {
+                "id": "call_1",
+                "function": {"name": "func", "arguments": '{"x": 1}'},
+                "index": 0,
+            }
+        )
+        assert tc.name == "func"
+        assert tc.arguments == '{"x": 1}'
+
+    def test_dict_arguments_converted(self):
+        tc = _parse_tool_call_chunk(
+            {"id": "call_1", "name": "func", "arguments": {"x": 1}, "index": 0}
+        )
+        assert tc.arguments == '{"x": 1}'
+
+
+# ============================================================================
+# Test: Client initialization
+# ============================================================================
+
+
+class TestUiPathNormalizedClientInit:
+    @patch(f"{_CLIENT_MODULE}.build_httpx_client")
+    @patch(f"{_CLIENT_MODULE}.get_default_client_settings")
+    def test_default_settings(self, mock_get_settings, mock_build):
+        mock_settings = MagicMock()
+        mock_settings.build_auth_pipeline.return_value = None
+        mock_get_settings.return_value = mock_settings
+
+        client = UiPathNormalizedClient(model_name="gpt-4o")
+        assert client._model_name == "gpt-4o"
+        mock_get_settings.assert_called_once()
+
+    @patch(f"{_CLIENT_MODULE}.build_httpx_client")
+    def test_custom_settings(self, mock_build):
+        settings = MagicMock()
+        settings.build_auth_pipeline.return_value = None
+
+        client = UiPathNormalizedClient(model_name="gpt-4o", client_settings=settings)
+        assert client._client_settings is settings
+
+    @patch(f"{_CLIENT_MODULE}.build_httpx_client")
+    @patch(f"{_CLIENT_MODULE}.get_default_client_settings")
+    def test_has_completions_namespace(self, mock_get_settings, mock_build):
+        mock_settings = MagicMock()
+        mock_settings.build_auth_pipeline.return_value = None
+        mock_get_settings.return_value = mock_settings
+        mock_build.return_value = MagicMock()
+
+        client = UiPathNormalizedClient(model_name="gpt-4o")
+        assert hasattr(client, "completions")
+        assert isinstance(client.completions, Completions)
+
+    @patch(f"{_CLIENT_MODULE}.build_httpx_client")
+    @patch(f"{_CLIENT_MODULE}.get_default_client_settings")
+    def test_has_embeddings_namespace(self, mock_get_settings, mock_build):
+        mock_settings = MagicMock()
+        mock_settings.build_auth_pipeline.return_value = None
+        mock_get_settings.return_value = mock_settings
+        mock_build.return_value = MagicMock()
+
+        client = UiPathNormalizedClient(model_name="gpt-4o")
+        from uipath.llm_client.clients.normalized.embeddings import Embeddings
+
+        assert hasattr(client, "embeddings")
+        assert isinstance(client.embeddings, Embeddings)
+
+    @patch(f"{_CLIENT_MODULE}.build_httpx_client")
+    @patch(f"{_CLIENT_MODULE}.get_default_client_settings")
+    def test_completions_api_config(self, mock_get_settings, mock_build):
+        mock_settings = MagicMock()
+        mock_settings.build_auth_pipeline.return_value = None
+        mock_get_settings.return_value = mock_settings
+
+        client = UiPathNormalizedClient(model_name="gpt-4o")
+        assert client._completions_api_config.api_type == "completions"
+        assert client._completions_api_config.routing_mode == "normalized"
+        assert client._completions_api_config.freeze_base_url is True
+
+    @patch(f"{_CLIENT_MODULE}.build_httpx_client")
+    @patch(f"{_CLIENT_MODULE}.get_default_client_settings")
+    def test_embeddings_api_config(self, mock_get_settings, mock_build):
+        mock_settings = MagicMock()
+        mock_settings.build_auth_pipeline.return_value = None
+        mock_get_settings.return_value = mock_settings
+
+        client = UiPathNormalizedClient(model_name="gpt-4o")
+        assert client._embeddings_api_config.api_type == "embeddings"
+        assert client._embeddings_api_config.routing_mode == "normalized"
+        assert client._embeddings_api_config.freeze_base_url is True
+
+
+# ============================================================================
+# Test: Completions.create (sync, non-streaming)
+# ============================================================================
+
+
+class TestCompletionsCreate:
+    def test_basic_create(self, mock_sync_client):
+        mock_response = MagicMock()
+        mock_response.json.return_value = SAMPLE_COMPLETION_RESPONSE
+        mock_sync_client.request.return_value = mock_response
+
+        client_obj = MagicMock()
+        client_obj._sync_client = mock_sync_client
+
+        completions = Completions(client_obj)
+        result = completions.create(
+            messages=[{"role": "user", "content": "Hello"}],
+        )
+
+        assert isinstance(result, ChatCompletion)
+        assert result.choices[0].message.content == "Hello! How can I help you?"
+        mock_sync_client.request.assert_called_once()
+        call_kwargs = mock_sync_client.request.call_args
+        body = call_kwargs.kwargs["json"]
+        assert body["messages"] == [{"role": "user", "content": "Hello"}]
+
+    def test_create_with_params(self, mock_sync_client):
+        mock_response = MagicMock()
+        mock_response.json.return_value = SAMPLE_COMPLETION_RESPONSE
+        mock_sync_client.request.return_value = mock_response
+
+        client_obj = MagicMock()
+        client_obj._sync_client = mock_sync_client
+
+        completions = Completions(client_obj)
+        completions.create(
+            messages=[{"role": "user", "content": "Hello"}],
+            max_tokens=100,
+            temperature=0.5,
+            top_p=0.9,
+            stop=["END"],
+            n=2,
+            presence_penalty=0.1,
+            frequency_penalty=0.2,
+        )
+
+        body = mock_sync_client.request.call_args.kwargs["json"]
+        assert body["max_tokens"] == 100
+        assert body["temperature"] == 0.5
+        assert body["top_p"] == 0.9
+        assert body["stop"] == ["END"]
+        assert body["n"] == 2
+        assert body["presence_penalty"] == 0.1
+        assert body["frequency_penalty"] == 0.2
+
+    def test_create_omits_none_params(self, mock_sync_client):
+        mock_response = MagicMock()
+        mock_response.json.return_value = SAMPLE_COMPLETION_RESPONSE
+        mock_sync_client.request.return_value = mock_response
+
+        client_obj = MagicMock()
+        client_obj._sync_client = mock_sync_client
+
+        completions = Completions(client_obj)
+        completions.create(
+            messages=[{"role": "user", "content": "Hello"}],
+        )
+
+        body = mock_sync_client.request.call_args.kwargs["json"]
+        assert "max_tokens" not in body
+        assert "temperature" not in body
+        assert "stop" not in body
+
+    def test_create_with_tools(self, mock_sync_client):
+        mock_response = MagicMock()
+        mock_response.json.return_value = SAMPLE_TOOL_CALL_RESPONSE
+        mock_sync_client.request.return_value = mock_response
+
+        client_obj = MagicMock()
+        client_obj._sync_client = mock_sync_client
+
+        completions = Completions(client_obj)
+        result = completions.create(
+            messages=[{"role": "user", "content": "What's the weather?"}],
+            tools=[
+                {
+                    "name": "get_weather",
+                    "description": "Get weather",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {"city": {"type": "string"}},
+                    },
+                }
+            ],
+            tool_choice="auto",
+        )
+
+        body = mock_sync_client.request.call_args.kwargs["json"]
+        assert "tools" in body
+        assert body["tool_choice"] == "auto"
+        assert len(result.choices[0].message.tool_calls) == 1
+
+    def test_create_with_response_format(self, mock_sync_client):
+        class MyOutput(BaseModel):
+            answer: str
+
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "choices": [{"message": {"content": '{"answer": "42"}'}}],
+            "usage": {},
+        }
+        mock_sync_client.request.return_value = mock_response
+
+        client_obj = MagicMock()
+        client_obj._sync_client = mock_sync_client
+
+        completions = Completions(client_obj)
+        result = completions.create(
+            messages=[{"role": "user", "content": "What is 6*7?"}],
+            response_format=MyOutput,
+        )
+
+        body = mock_sync_client.request.call_args.kwargs["json"]
+        assert "response_format" in body
+        assert body["response_format"]["type"] == "json_schema"
+        assert result.choices[0].message.parsed is not None
+        assert result.choices[0].message.parsed.answer == "42"
+
+    def test_create_with_kwargs(self, mock_sync_client):
+        mock_response = MagicMock()
+        mock_response.json.return_value = SAMPLE_COMPLETION_RESPONSE
+        mock_sync_client.request.return_value = mock_response
+
+        client_obj = MagicMock()
+        client_obj._sync_client = mock_sync_client
+
+        completions = Completions(client_obj)
+        completions.create(
+            messages=[{"role": "user", "content": "Hello"}],
+            reasoning={"effort": "high"},
+        )
+
+        body = mock_sync_client.request.call_args.kwargs["json"]
+        assert body["reasoning"] == {"effort": "high"}
+
+
+# ============================================================================
+# Test: Completions.stream (sync, streaming)
+# ============================================================================
+
+
+class TestCompletionsStream:
+    def test_stream_yields_chunks(self, mock_sync_client):
+        sse_lines = [f"data: {json.dumps(chunk)}" for chunk in SAMPLE_STREAM_CHUNKS]
+
+        mock_response = MagicMock()
+        mock_response.iter_lines.return_value = iter(sse_lines)
+        mock_sync_client.stream.return_value.__enter__ = MagicMock(return_value=mock_response)
+        mock_sync_client.stream.return_value.__exit__ = MagicMock(return_value=False)
+
+        client_obj = MagicMock()
+        client_obj._sync_client = mock_sync_client
+
+        completions = Completions(client_obj)
+        chunks = list(
+            completions.stream(
+                messages=[{"role": "user", "content": "Hello"}],
+            )
+        )
+
+        assert len(chunks) == 3
+        assert chunks[0].choices[0].delta.content == "Hello"
+        assert chunks[1].choices[0].delta.content == " world!"
+        assert chunks[2].choices[0].finish_reason == "stop"
+
+    def test_stream_skips_invalid_json(self, mock_sync_client):
+        lines = [
+            "data: {invalid json",
+            f"data: {json.dumps(SAMPLE_STREAM_CHUNKS[0])}",
+            "",  # empty line
+        ]
+
+        mock_response = MagicMock()
+        mock_response.iter_lines.return_value = iter(lines)
+        mock_sync_client.stream.return_value.__enter__ = MagicMock(return_value=mock_response)
+        mock_sync_client.stream.return_value.__exit__ = MagicMock(return_value=False)
+
+        client_obj = MagicMock()
+        client_obj._sync_client = mock_sync_client
+
+        completions = Completions(client_obj)
+        chunks = list(
+            completions.stream(
+                messages=[{"role": "user", "content": "Hello"}],
+            )
+        )
+
+        assert len(chunks) == 1
+
+    def test_stream_skips_empty_id(self, mock_sync_client):
+        lines = [
+            f"data: {json.dumps({'id': '', 'choices': []})}",
+            f"data: {json.dumps(SAMPLE_STREAM_CHUNKS[0])}",
+        ]
+
+        mock_response = MagicMock()
+        mock_response.iter_lines.return_value = iter(lines)
+        mock_sync_client.stream.return_value.__enter__ = MagicMock(return_value=mock_response)
+        mock_sync_client.stream.return_value.__exit__ = MagicMock(return_value=False)
+
+        client_obj = MagicMock()
+        client_obj._sync_client = mock_sync_client
+
+        completions = Completions(client_obj)
+        chunks = list(
+            completions.stream(
+                messages=[{"role": "user", "content": "Hello"}],
+            )
+        )
+
+        assert len(chunks) == 1
+
+    def test_stream_sets_stream_flag(self, mock_sync_client):
+        mock_response = MagicMock()
+        mock_response.iter_lines.return_value = iter([])
+        mock_sync_client.stream.return_value.__enter__ = MagicMock(return_value=mock_response)
+        mock_sync_client.stream.return_value.__exit__ = MagicMock(return_value=False)
+
+        client_obj = MagicMock()
+        client_obj._sync_client = mock_sync_client
+
+        completions = Completions(client_obj)
+        list(
+            completions.stream(
+                messages=[{"role": "user", "content": "Hello"}],
+            )
+        )
+
+        call_kwargs = mock_sync_client.stream.call_args
+        body = call_kwargs.kwargs["json"]
+        assert body["stream"] is True
+
+
+# ============================================================================
+# Test: Completions.acreate (async, non-streaming)
+# ============================================================================
+
+
+class TestAsyncCompletionsCreate:
+    @pytest.mark.asyncio
+    async def test_basic_acreate(self, mock_async_client):
+        mock_response = MagicMock()
+        mock_response.json.return_value = SAMPLE_COMPLETION_RESPONSE
+        mock_async_client.request.return_value = mock_response
+
+        client_obj = MagicMock()
+        client_obj._async_client = mock_async_client
+
+        completions = Completions(client_obj)
+        result = await completions.acreate(
+            messages=[{"role": "user", "content": "Hello"}],
+        )
+
+        assert isinstance(result, ChatCompletion)
+        assert result.choices[0].message.content == "Hello! How can I help you?"
+
+
+# ============================================================================
+# Test: Embeddings
+# ============================================================================
+
+
+class TestEmbeddingsCreate:
+    def test_basic_create(self):
+        mock_response = MagicMock()
+        mock_response.json.return_value = SAMPLE_EMBEDDING_RESPONSE
+
+        mock_client = MagicMock()
+        mock_client.request.return_value = mock_response
+
+        client_obj = MagicMock()
+        client_obj._embedding_sync_client = mock_client
+
+        from uipath.llm_client.clients.normalized.embeddings import Embeddings
+
+        embeddings = Embeddings(client_obj)
+        result = embeddings.create(input=["Hello world", "Goodbye"])
+
+        assert isinstance(result, EmbeddingResponse)
+        assert len(result.data) == 2
+        assert result.data[0].embedding == [0.1, 0.2, 0.3]
+
+        body = mock_client.request.call_args.kwargs["json"]
+        assert body["input"] == ["Hello world", "Goodbye"]
+
+    def test_string_input_wrapped(self):
+        mock_response = MagicMock()
+        mock_response.json.return_value = SAMPLE_EMBEDDING_RESPONSE
+
+        mock_client = MagicMock()
+        mock_client.request.return_value = mock_response
+
+        client_obj = MagicMock()
+        client_obj._embedding_sync_client = mock_client
+
+        from uipath.llm_client.clients.normalized.embeddings import Embeddings
+
+        embeddings = Embeddings(client_obj)
+        embeddings.create(input="Hello world")
+
+        body = mock_client.request.call_args.kwargs["json"]
+        assert body["input"] == ["Hello world"]
+
+
+class TestAsyncEmbeddingsCreate:
+    @pytest.mark.asyncio
+    async def test_basic_acreate(self):
+        mock_response = MagicMock()
+        mock_response.json.return_value = SAMPLE_EMBEDDING_RESPONSE
+
+        mock_client = AsyncMock()
+        mock_client.request.return_value = mock_response
+
+        client_obj = MagicMock()
+        client_obj._embedding_async_client = mock_client
+
+        from uipath.llm_client.clients.normalized.embeddings import Embeddings
+
+        embeddings = Embeddings(client_obj)
+        result = await embeddings.acreate(input=["Hello world"])
+
+        assert isinstance(result, EmbeddingResponse)
+        assert len(result.data) == 2
+
+
+# ============================================================================
+# Test: Type models
+# ============================================================================
+
+
+class TestTypeModels:
+    def test_usage_defaults(self):
+        usage = Usage()
+        assert usage.prompt_tokens == 0
+        assert usage.completion_tokens == 0
+        assert usage.total_tokens == 0
+        assert usage.cache_read_input_tokens == 0
+
+    def test_tool_call(self):
+        tc = ToolCall(id="call_1", name="func", arguments={"x": 1})
+        assert tc.id == "call_1"
+        assert tc.name == "func"
+        assert tc.arguments == {"x": 1}
+
+    def test_tool_call_chunk(self):
+        tc = ToolCallChunk(id="call_1", name="func", arguments='{"x":', index=0)
+        assert tc.arguments == '{"x":'
+
+    def test_message_defaults(self):
+        msg = Message()
+        assert msg.role == "assistant"
+        assert msg.content == ""
+        assert msg.tool_calls == []
+        assert msg.parsed is None
+
+    def test_delta_defaults(self):
+        delta = Delta()
+        assert delta.role is None
+        assert delta.content == ""
+        assert delta.tool_calls == []
+
+    def test_choice_defaults(self):
+        choice = Choice()
+        assert choice.index == 0
+        assert choice.finish_reason is None
+
+    def test_chat_completion_defaults(self):
+        cc = ChatCompletion()
+        assert cc.id == ""
+        assert cc.choices == []
+        assert cc.usage.prompt_tokens == 0
+
+    def test_embedding_data(self):
+        ed = EmbeddingData(embedding=[0.1, 0.2], index=0)
+        assert ed.embedding == [0.1, 0.2]
+
+    def test_embedding_response(self):
+        er = EmbeddingResponse(
+            data=[EmbeddingData(embedding=[0.1], index=0)],
+            model="test-model",
+        )
+        assert len(er.data) == 1
+        assert er.model == "test-model"
+
+
+# ============================================================================
+# Test: Request body building
+# ============================================================================
+
+
+class TestBuildRequest:
+    def test_minimal_request(self):
+        body = _build_request(
+            messages=[{"role": "user", "content": "Hi"}],
+        )
+        assert body == {"messages": [{"role": "user", "content": "Hi"}]}
+
+    def test_stream_flag(self):
+        body = _build_request(
+            messages=[{"role": "user", "content": "Hi"}],
+            stream=True,
+        )
+        assert body["stream"] is True
+
+    def test_all_optional_params(self):
+        body = _build_request(
+            messages=[{"role": "user", "content": "Hi"}],
+            max_tokens=100,
+            temperature=0.7,
+            stop=["END"],
+            n=3,
+            top_p=0.9,
+            presence_penalty=0.5,
+            frequency_penalty=0.3,
+        )
+        assert body["max_tokens"] == 100
+        assert body["temperature"] == 0.7
+        assert body["stop"] == ["END"]
+        assert body["n"] == 3
+        assert body["top_p"] == 0.9
+        assert body["presence_penalty"] == 0.5
+        assert body["frequency_penalty"] == 0.3
+
+    def test_with_tools(self):
+        body = _build_request(
+            messages=[{"role": "user", "content": "Hi"}],
+            tools=[{"name": "func", "description": "d", "parameters": {}}],
+            tool_choice="auto",
+        )
+        assert len(body["tools"]) == 1
+        assert body["tool_choice"] == "auto"
+
+    def test_with_response_format(self):
+        class MyModel(BaseModel):
+            x: int
+
+        body = _build_request(
+            messages=[{"role": "user", "content": "Hi"}],
+            response_format=MyModel,
+        )
+        assert body["response_format"]["type"] == "json_schema"
+        assert body["response_format"]["json_schema"]["name"] == "MyModel"
+
+    def test_kwargs_merged(self):
+        body = _build_request(
+            messages=[{"role": "user", "content": "Hi"}],
+            custom_param="value",
+        )
+        assert body["custom_param"] == "value"
+
+    def test_openai_specific_params(self):
+        body = _build_request(
+            messages=[{"role": "user", "content": "Hi"}],
+            seed=42,
+            logit_bias={"123": -100},
+            logprobs=True,
+            top_logprobs=5,
+            parallel_tool_calls=False,
+            reasoning_effort="high",
+            reasoning={"effort": "high"},
+        )
+        assert body["seed"] == 42
+        assert body["logit_bias"] == {"123": -100}
+        assert body["logprobs"] is True
+        assert body["top_logprobs"] == 5
+        assert body["parallel_tool_calls"] is False
+        assert body["reasoning_effort"] == "high"
+        assert body["reasoning"] == {"effort": "high"}
+
+    def test_anthropic_specific_params(self):
+        body = _build_request(
+            messages=[{"role": "user", "content": "Hi"}],
+            top_k=40,
+            thinking={"type": "enabled", "budget_tokens": 1000},
+        )
+        assert body["top_k"] == 40
+        assert body["thinking"] == {"type": "enabled", "budget_tokens": 1000}
+
+    def test_google_specific_params(self):
+        body = _build_request(
+            messages=[{"role": "user", "content": "Hi"}],
+            thinking_level="high",
+            thinking_budget=2000,
+            include_thoughts=True,
+            safety_settings=[{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"}],
+        )
+        assert body["thinking_level"] == "high"
+        assert body["thinking_budget"] == 2000
+        assert body["include_thoughts"] is True
+        assert len(body["safety_settings"]) == 1
+
+    def test_shared_params(self):
+        body = _build_request(
+            messages=[{"role": "user", "content": "Hi"}],
+            verbosity="low",
+        )
+        assert body["verbosity"] == "low"
+
+    def test_removed_infra_params_go_through_kwargs(self):
+        body = _build_request(
+            messages=[{"role": "user", "content": "Hi"}],
+            user="user-123",
+            service_tier="auto",
+            metadata={"request_id": "abc"},
+        )
+        assert body["user"] == "user-123"
+        assert body["service_tier"] == "auto"
+        assert body["metadata"] == {"request_id": "abc"}
+
+    def test_pydantic_messages(self):
+        class ChatMessage(BaseModel):
+            role: str
+            content: str
+
+        body = _build_request(
+            messages=[ChatMessage(role="user", content="Hi")],
+        )
+        assert body["messages"] == [{"role": "user", "content": "Hi"}]
+
+    def test_mixed_dict_and_pydantic_messages(self):
+        class ChatMessage(BaseModel):
+            role: str
+            content: str
+
+        body = _build_request(
+            messages=[
+                {"role": "system", "content": "Be brief."},
+                ChatMessage(role="user", content="Hi"),
+            ],
+        )
+        assert body["messages"] == [
+            {"role": "system", "content": "Be brief."},
+            {"role": "user", "content": "Hi"},
+        ]
+
+    def test_pydantic_message_with_none_fields_excluded(self):
+        class ChatMessage(BaseModel):
+            role: str
+            content: str
+            name: str | None = None
+
+        body = _build_request(
+            messages=[ChatMessage(role="user", content="Hi")],
+        )
+        assert body["messages"] == [{"role": "user", "content": "Hi"}]
+
+    def test_pydantic_tool_in_tools_list(self):
+        class GetWeather(BaseModel):
+            """Get weather for a city."""
+
+            city: str
+
+        body = _build_request(
+            messages=[{"role": "user", "content": "Hi"}],
+            tools=[GetWeather],
+            tool_choice="auto",
+        )
+        assert body["tools"][0]["name"] == "GetWeather"
+        assert "city" in body["tools"][0]["parameters"]["properties"]
+
+
+# ============================================================================
+# Test: Real-world response shapes (from captured API payloads)
+# ============================================================================
+
+
+class TestRealWorldResponses:
+    """Tests using actual response shapes observed from the normalized API."""
+
+    def test_gpt4o_basic(self):
+        data = {
+            "id": "chatcmpl-DQdh09fdBuc8LPCkDqhJKgrQy3IN8",
+            "model": "gpt-4o-2024-11-20",
+            "object": "chat.completion",
+            "choices": [
+                {
+                    "finish_reason": "stop",
+                    "index": 0,
+                    "message": {"content": "Hello.", "role": "assistant"},
+                }
+            ],
+            "created": 1775241562,
+            "usage": {
+                "completion_tokens": 3,
+                "prompt_tokens": 14,
+                "total_tokens": 17,
+                "cache_read_input_tokens": 0,
+                "thoughts_tokens": 0,
+            },
+        }
+        result = _parse_response(data)
+        assert result.object == "chat.completion"
+        assert result.model == "gpt-4o-2024-11-20"
+        assert result.choices[0].message.content == "Hello."
+        assert result.usage.thoughts_tokens == 0
+
+    def test_gemini_with_avg_logprobs_and_signature(self):
+        data = {
+            "id": "gemini-123",
+            "model": "gemini-2.5-flash",
+            "object": "chat.completion",
+            "choices": [
+                {
+                    "finish_reason": "stop",
+                    "avg_logprobs": -0.123,
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "signature": "abc123signature",
+                        "tool_calls": [
+                            {"id": "call_1", "name": "get_weather", "arguments": {"city": "London"}}
+                        ],
+                    },
+                }
+            ],
+            "created": 1775241600,
+            "usage": {
+                "completion_tokens": 5,
+                "prompt_tokens": 31,
+                "total_tokens": 130,
+                "cache_read_input_tokens": 0,
+                "thoughts_tokens": 94,
+                "request_processing_tier": "ON_DEMAND",
+            },
+        }
+        result = _parse_response(data)
+        assert result.choices[0].avg_logprobs == -0.123
+        assert result.choices[0].message.signature == "abc123signature"
+        assert result.choices[0].message.tool_calls[0].name == "get_weather"
+        assert result.usage.thoughts_tokens == 94
+        assert result.usage.request_processing_tier == "ON_DEMAND"
+
+    def test_anthropic_with_thinking(self):
+        data = {
+            "id": "anthropic-456",
+            "model": "claude-haiku-4-5",
+            "object": "chat.completion",
+            "choices": [
+                {
+                    "finish_reason": "stop",
+                    "index": 0,
+                    "message": {
+                        "content": "15 + 27 = 42",
+                        "role": "assistant",
+                        "signature": "ErACsignature",
+                        "thinking": "This is a straightforward arithmetic problem.\n15 + 27 = 42",
+                    },
+                }
+            ],
+            "created": 1775241700,
+            "usage": {
+                "cache_creation_input_tokens": 0,
+                "cache_read_input_tokens": 0,
+                "completion_tokens": 10,
+                "prompt_tokens": 14,
+                "total_tokens": 24,
+            },
+        }
+        result = _parse_response(data)
+        assert (
+            result.choices[0].message.thinking
+            == "This is a straightforward arithmetic problem.\n15 + 27 = 42"
+        )
+        assert result.choices[0].message.signature == "ErACsignature"
+        assert result.choices[0].message.content == "15 + 27 = 42"
+        assert result.usage.cache_creation_input_tokens == 0
+
+    def test_gpt5_with_reasoning_usage(self):
+        data = {
+            "id": "chatcmpl-gpt5",
+            "model": "gpt-5.2-2025-12-11",
+            "object": "chat.completion",
+            "choices": [
+                {
+                    "finish_reason": "stop",
+                    "index": 0,
+                    "message": {"content": "100", "role": "assistant"},
+                }
+            ],
+            "created": 1775241800,
+            "usage": {
+                "completion_tokens": 2,
+                "prompt_tokens": 20,
+                "total_tokens": 22,
+                "cache_read_input_tokens": 0,
+                "thoughts_tokens": 50,
+                "request_processing_tier": "ON_DEMAND",
+            },
+        }
+        result = _parse_response(data)
+        assert result.usage.thoughts_tokens == 50
+        assert result.usage.request_processing_tier == "ON_DEMAND"
+
+    def test_embedding_response_real_shape(self):
+        """Embeddings only return prompt_tokens and total_tokens."""
+        data = {
+            "data": [{"embedding": [0.1, 0.2, 0.3]}],
+            "usage": {"prompt_tokens": 2, "total_tokens": 2},
+        }
+        from uipath.llm_client.clients.normalized.embeddings import _parse_embedding_response
+
+        result = _parse_embedding_response(data)
+        assert result.data[0].embedding == [0.1, 0.2, 0.3]
+        assert result.data[0].index == 0  # auto-assigned
+        assert result.usage.prompt_tokens == 2
+        assert result.usage.completion_tokens == 0  # default
+
+    def test_tool_call_arguments_always_dict(self):
+        """Normalized API always returns arguments as dict, not string."""
+        data = {
+            "id": "tc-test",
+            "object": "chat.completion",
+            "choices": [
+                {
+                    "message": {
+                        "role": "assistant",
+                        "tool_calls": [
+                            {
+                                "id": "call_1",
+                                "name": "get_weather",
+                                "arguments": {"city": "London"},
+                            },
+                        ],
+                    },
+                    "finish_reason": "tool_calls",
+                }
+            ],
+            "usage": {},
+        }
+        result = _parse_response(data)
+        tc = result.choices[0].message.tool_calls[0]
+        assert isinstance(tc.arguments, dict)
+        assert tc.arguments == {"city": "London"}
diff --git a/tests/core/test_normalized_integration.py b/tests/core/test_normalized_integration.py
new file mode 100644
index 0000000..10ff5da
--- /dev/null
+++ b/tests/core/test_normalized_integration.py
@@ -0,0 +1,245 @@
+"""Integration tests for the normalized client.
+
+These tests verify the normalized client works end-to-end with VCR cassettes.
+They test:
+1. Basic chat completions (sync)
+2. Chat completions with parameters (temperature, max_tokens)
+3. Streaming completions (sync via .stream())
+4. Tool calling (dict and Pydantic tools)
+5. Structured output via json_object response_format
+6. Embeddings (sync)
+7. Async completions (via .acreate())
+8. Async embeddings (via .acreate())
+"""
+
+import json
+
+import pytest
+from pydantic import BaseModel
+
+from uipath.llm_client.clients.normalized import (
+    ChatCompletion,
+    ChatCompletionChunk,
+    EmbeddingResponse,
+    UiPathNormalizedClient,
+)
+from uipath.llm_client.settings import UiPathBaseSettings
+
+# ============================================================================
+# Fixtures
+# ============================================================================
+
+
+@pytest.fixture
+def normalized_client(client_settings: UiPathBaseSettings) -> UiPathNormalizedClient:
+    return UiPathNormalizedClient(
+        model_name="gpt-4o-2024-11-20",
+        client_settings=client_settings,
+    )
+
+
+@pytest.fixture
+def embedding_client(client_settings: UiPathBaseSettings) -> UiPathNormalizedClient:
+    return UiPathNormalizedClient(
+        model_name="text-embedding-ada-002",
+        client_settings=client_settings,
+    )
+
+
+# ============================================================================
+# Structured output models
+# ============================================================================
+
+
+class MathAnswer(BaseModel):
+    answer: int
+    explanation: str
+
+
+# ============================================================================
+# Sync completions tests
+# ============================================================================
+
+
+class TestNormalizedCompletions:
+    @pytest.mark.vcr()
+    def test_basic_completion(self, normalized_client: UiPathNormalizedClient):
+        response = normalized_client.completions.create(
+            messages=[{"role": "user", "content": "Say hello in one word."}],
+        )
+        assert isinstance(response, ChatCompletion)
+        assert len(response.choices) >= 1
+        assert response.choices[0].message.content
+        assert response.choices[0].finish_reason == "stop"
+        assert response.usage.total_tokens > 0
+
+    @pytest.mark.vcr()
+    def test_completion_with_params(self, normalized_client: UiPathNormalizedClient):
+        response = normalized_client.completions.create(
+            messages=[{"role": "user", "content": "Say hi."}],
+            max_tokens=10,
+            temperature=0.0,
+        )
+        assert isinstance(response, ChatCompletion)
+        assert response.choices[0].message.content
+
+    @pytest.mark.vcr()
+    def test_completion_with_system_message(self, normalized_client: UiPathNormalizedClient):
+        response = normalized_client.completions.create(
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant. Be very brief."},
+                {"role": "user", "content": "What is 2+2?"},
+            ],
+        )
+        assert isinstance(response, ChatCompletion)
+        assert response.choices[0].message.content
+
+
+class TestNormalizedStreaming:
+    @pytest.mark.vcr()
+    def test_streaming(self, normalized_client: UiPathNormalizedClient):
+        chunks = list(
+            normalized_client.completions.stream(
+                messages=[{"role": "user", "content": "Count from 1 to 3."}],
+            )
+        )
+        assert len(chunks) > 0
+        assert all(isinstance(c, ChatCompletionChunk) for c in chunks)
+
+        content_chunks = [c for c in chunks if c.choices and c.choices[0].delta.content]
+        assert len(content_chunks) > 0
+
+
+class TestNormalizedToolCalling:
+    @pytest.mark.vcr()
+    def test_tool_calling(self, normalized_client: UiPathNormalizedClient):
+        response = normalized_client.completions.create(
+            messages=[
+                {"role": "user", "content": "What is the weather in London?"},
+            ],
+            tools=[
+                {
+                    "name": "get_weather",
+                    "description": "Get the current weather in a city",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "city": {"type": "string", "description": "The city name"},
+                        },
+                        "required": ["city"],
+                    },
+                }
+            ],
+            tool_choice={"type": "required"},
+        )
+        assert isinstance(response, ChatCompletion)
+        assert len(response.choices[0].message.tool_calls) >= 1
+        tc = response.choices[0].message.tool_calls[0]
+        assert tc.name == "get_weather"
+        assert "city" in tc.arguments
+
+    @pytest.mark.vcr()
+    def test_tool_calling_with_pydantic(self, normalized_client: UiPathNormalizedClient):
+        class GetWeatherInput(BaseModel):
+            """Get the current weather in a city."""
+
+            city: str
+
+        response = normalized_client.completions.create(
+            messages=[
+                {"role": "user", "content": "What is the weather in Paris?"},
+            ],
+            tools=[GetWeatherInput],
+            tool_choice={"type": "required"},
+        )
+        assert isinstance(response, ChatCompletion)
+        assert len(response.choices[0].message.tool_calls) >= 1
+
+
+class TestNormalizedStructuredOutput:
+    @pytest.mark.vcr()
+    def test_structured_output_json_object(self, normalized_client: UiPathNormalizedClient):
+        """Test structured output using json_object response_format."""
+        response = normalized_client.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": (
+                        'What is 15 + 27? Respond with JSON: {"answer": <int>, "explanation": "<str>"}'
+                    ),
+                },
+            ],
+            response_format={"type": "json_object"},
+        )
+        assert isinstance(response, ChatCompletion)
+        content = response.choices[0].message.content
+        assert content
+        parsed = json.loads(content)
+        assert parsed["answer"] == 42
+
+    @pytest.mark.vcr()
+    def test_structured_output_pydantic_parsed(self, normalized_client: UiPathNormalizedClient):
+        """Test that response_format with a Pydantic model populates message.parsed."""
+        response = normalized_client.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": (
+                        'What is 15 + 27? Respond with JSON: {"answer": <int>, "explanation": "<str>"}'
+                    ),
+                },
+            ],
+            response_format={"type": "json_object"},
+        )
+        assert isinstance(response, ChatCompletion)
+        content = response.choices[0].message.content
+        assert content
+        parsed = MathAnswer.model_validate_json(content)
+        assert parsed.answer == 42
+
+
+# ============================================================================
+# Embeddings tests
+# ============================================================================
+
+
+class TestNormalizedEmbeddings:
+    @pytest.mark.vcr()
+    def test_single_embedding(self, embedding_client: UiPathNormalizedClient):
+        response = embedding_client.embeddings.create(input="Hello world")
+        assert isinstance(response, EmbeddingResponse)
+        assert len(response.data) == 1
+        assert len(response.data[0].embedding) > 0
+
+    @pytest.mark.vcr()
+    def test_batch_embeddings(self, embedding_client: UiPathNormalizedClient):
+        response = embedding_client.embeddings.create(input=["Hello world", "Goodbye world"])
+        assert isinstance(response, EmbeddingResponse)
+        assert len(response.data) == 2
+
+
+# ============================================================================
+# Async tests
+# ============================================================================
+
+
+class TestAsyncNormalizedCompletions:
+    @pytest.mark.asyncio
+    @pytest.mark.vcr()
+    async def test_async_completion(self, normalized_client: UiPathNormalizedClient):
+        response = await normalized_client.completions.acreate(
+            messages=[{"role": "user", "content": "Say hello in one word."}],
+        )
+        assert isinstance(response, ChatCompletion)
+        assert response.choices[0].message.content
+
+
+class TestAsyncNormalizedEmbeddings:
+    @pytest.mark.asyncio
+    @pytest.mark.vcr()
+    async def test_async_embedding(self, embedding_client: UiPathNormalizedClient):
+        response = await embedding_client.embeddings.acreate(
+            input="Hello world",
+        )
+        assert isinstance(response, EmbeddingResponse)
+        assert len(response.data) == 1