diff --git a/CHANGELOG.md b/CHANGELOG.md index 34b8f44..12a0427 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,19 @@ All notable changes to `uipath_llm_client` (core package) will be documented in this file. +## [1.7.0] - 2026-04-03 + +### Added +- `UiPathNormalizedClient` — provider-agnostic LLM client with no optional dependencies + - `client.completions.create/acreate/stream/astream` for chat completions + - `client.embeddings.create/acreate` for embeddings + - Structured output via `response_format` (Pydantic, TypedDict, dict, json_object) + - Tool calling with dicts, Pydantic models, or callables + - Streaming with SSE parsing + - Full vendor parameter coverage: OpenAI (reasoning, logprobs, logit_bias), Anthropic (thinking, top_k), Google (thinking_level/budget, safety_settings, cached_content) + - Typed response models: `ChatCompletion`, `ChatCompletionChunk`, `EmbeddingResponse` + - Accepts both dict and Pydantic model messages + ## [1.6.0] - 2026-04-03 ### Fixed diff --git a/packages/uipath_langchain_client/CHANGELOG.md b/packages/uipath_langchain_client/CHANGELOG.md index 6ac4201..3674583 100644 --- a/packages/uipath_langchain_client/CHANGELOG.md +++ b/packages/uipath_langchain_client/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to `uipath_langchain_client` will be documented in this file. +## [1.7.0] - 2026-04-03 + +### Added +- `UiPathChat.with_structured_output()` — supports `function_calling`, `json_schema`, and `json_mode` methods +- `UiPathChat.bind_tools()` — added `parallel_tool_calls` parameter +- Added vendor-specific parameters to `UiPathChat`: `logit_bias`, `logprobs`, `top_logprobs`, `parallel_tool_calls`, `top_k`, `safety_settings`, `cached_content`, `labels`, `seed` + ## [1.6.0] - 2026-04-03 ### Fixed diff --git a/packages/uipath_langchain_client/pyproject.toml b/packages/uipath_langchain_client/pyproject.toml index 93ff0be..ba69f55 100644 --- a/packages/uipath_langchain_client/pyproject.toml +++ b/packages/uipath_langchain_client/pyproject.toml @@ -6,7 +6,7 @@ readme = "README.md" requires-python = ">=3.11" dependencies = [ "langchain>=1.2.13", - "uipath-llm-client>=1.5.10", + "uipath-llm-client>=1.7.0", ] [project.optional-dependencies] diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py b/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py index 5149f6e..66e749d 100644 --- a/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py +++ b/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py @@ -1,3 +1,3 @@ __title__ = "UiPath LangChain Client" __description__ = "A Python client for interacting with UiPath's LLM services via LangChain." -__version__ = "1.6.0" +__version__ = "1.7.0" diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/chat_models.py b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/chat_models.py index 47e325b..a1d545e 100644 --- a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/chat_models.py +++ b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/chat_models.py @@ -25,7 +25,8 @@ import json from collections.abc import AsyncGenerator, Callable, Generator, Sequence -from typing import Any +from functools import partial +from typing import Any, Literal, Union, cast from langchain_core.callbacks import ( AsyncCallbackManagerForLLMRun, @@ -44,21 +45,76 @@ UsageMetadata, ) from langchain_core.messages.utils import convert_to_openai_messages +from langchain_core.output_parsers import JsonOutputParser +from langchain_core.output_parsers.openai_tools import ( + JsonOutputKeyToolsParser, + PydanticToolsParser, +) from langchain_core.outputs import ( ChatGeneration, ChatGenerationChunk, ChatResult, ) -from langchain_core.runnables import Runnable +from langchain_core.runnables import Runnable, RunnableLambda, RunnablePassthrough from langchain_core.tools import BaseTool from langchain_core.utils.function_calling import ( convert_to_openai_function, + convert_to_openai_tool, ) -from pydantic import Field +from langchain_core.utils.pydantic import is_basemodel_subclass +from pydantic import AliasChoices, BaseModel, Field from uipath_langchain_client.base_client import UiPathBaseChatModel from uipath_langchain_client.settings import ApiType, RoutingMode, UiPathAPIConfig +_DictOrPydanticClass = Union[dict[str, Any], type[BaseModel], type] +_DictOrPydantic = Union[dict[str, Any], BaseModel] + + +def _oai_structured_outputs_parser(ai_msg: AIMessage, schema: type[BaseModel]) -> BaseModel: + if not ai_msg.content: + raise ValueError("Expected non-empty content from model.") + content = ai_msg.content + if isinstance(content, list): + # Extract the first text block from content parts + content = next((c for c in content if isinstance(c, str)), str(content[0])) + parsed = json.loads(content) + return schema.model_validate(parsed) + + +def _build_normalized_response_format( + schema: _DictOrPydanticClass, strict: bool | None = None +) -> dict[str, Any]: + """Build response_format for the normalized API from a schema.""" + if isinstance(schema, dict): + return {"type": "json_schema", "json_schema": schema} + + if isinstance(schema, type) and issubclass(schema, BaseModel): + json_schema = schema.model_json_schema() + rf: dict[str, Any] = { + "type": "json_schema", + "json_schema": { + "name": schema.__name__, + "schema": json_schema, + }, + } + if strict is not None: + rf["json_schema"]["strict"] = strict + return rf + + # TypedDict or other type — convert via openai tool schema + tool_schema = convert_to_openai_tool(schema) + rf = { + "type": "json_schema", + "json_schema": { + "name": tool_schema["function"]["name"], + "schema": tool_schema["function"]["parameters"], + }, + } + if strict is not None: + rf["json_schema"]["strict"] = strict + return rf + class UiPathChat(UiPathBaseChatModel): """LangChain chat model using UiPath's normalized (provider-agnostic) API. @@ -101,33 +157,48 @@ class UiPathChat(UiPathBaseChatModel): freeze_base_url=True, ) - # Standard LLM parameters - max_tokens: int | None = None + # Common + max_tokens: int | None = Field( + default=None, + validation_alias=AliasChoices("max_tokens", "max_output_tokens", "max_completion_tokens"), + ) temperature: float | None = None - stop: list[str] | str | None = Field(default=None, alias="stop_sequences") + top_p: float | None = None + top_k: int | None = None + stop: list[str] | str | None = Field( + default=None, + validation_alias=AliasChoices("stop", "stop_sequences"), + ) + n: int | None = Field( + default=None, + validation_alias=AliasChoices("n", "candidate_count"), + ) + frequency_penalty: float | None = None + presence_penalty: float | None = None + seed: int | None = None - n: int | None = None # Number of completions to generate - top_p: float | None = None # Nucleus sampling probability mass - presence_penalty: float | None = None # Penalty for repeated tokens - frequency_penalty: float | None = None # Frequency-based repetition penalty - verbosity: str | None = None # Response verbosity: "low", "medium", or "high" + model_kwargs: dict[str, Any] = Field(default_factory=dict) + disabled_params: dict[str, Any] | None = None - model_kwargs: dict[str, Any] = Field( - default_factory=dict - ) # Additional model-specific parameters - disabled_params: dict[str, Any] | None = None # Parameters to exclude from requests + # OpenAI + logit_bias: dict[str, int] | None = None + logprobs: bool | None = None + top_logprobs: int | None = None + parallel_tool_calls: bool | None = None + reasoning_effort: str | None = None + reasoning: dict[str, Any] | None = None - # OpenAI o1/o3 reasoning parameters - reasoning: dict[str, Any] | None = None # {"effort": "low"|"medium"|"high", "summary": ...} - reasoning_effort: str | None = None # "minimal", "low", "medium", or "high" + # Anthropic + thinking: dict[str, Any] | None = None - # Anthropic Claude extended thinking parameters - thinking: dict[str, Any] | None = None # {"type": "enabled"|"disabled", "budget_tokens": N} + # Google + thinking_level: str | None = None + thinking_budget: int | None = None + include_thoughts: bool | None = None + safety_settings: list[dict[str, Any]] | None = None - # Google Gemini thinking parameters - thinking_level: str | None = None # Thinking depth level - thinking_budget: int | None = None # Token budget for thinking - include_thoughts: bool | None = None # Include thinking in response + # Shared + verbosity: str | None = None @property def _llm_type(self) -> str: @@ -138,20 +209,31 @@ def _llm_type(self) -> str: def _default_params(self) -> dict[str, Any]: """Get the default parameters for the normalized API request.""" exclude_if_none = { - "frequency_penalty": self.frequency_penalty, - "presence_penalty": self.presence_penalty, - "top_p": self.top_p, - "stop": self.stop or None, # Also exclude empty list for this - "n": self.n, "max_tokens": self.max_tokens, "temperature": self.temperature, - "verbosity": self.verbosity, - "reasoning": self.reasoning, + "top_p": self.top_p, + "top_k": self.top_k, + "stop": self.stop or None, + "n": self.n, + "frequency_penalty": self.frequency_penalty, + "presence_penalty": self.presence_penalty, + "seed": self.seed, + # OpenAI + "logit_bias": self.logit_bias, + "logprobs": self.logprobs, + "top_logprobs": self.top_logprobs, + "parallel_tool_calls": self.parallel_tool_calls, "reasoning_effort": self.reasoning_effort, + "reasoning": self.reasoning, + # Anthropic "thinking": self.thinking, + # Google "thinking_level": self.thinking_level, "thinking_budget": self.thinking_budget, "include_thoughts": self.include_thoughts, + "safety_settings": self.safety_settings, + # Shared + "verbosity": self.verbosity, } return { @@ -181,6 +263,7 @@ def bind_tools( *, tool_choice: str | None = None, strict: bool | None = None, + parallel_tool_calls: bool | None = None, **kwargs: Any, ) -> Runnable[LanguageModelInput, AIMessage]: """Bind tools to the model with automatic tool choice detection.""" @@ -197,7 +280,7 @@ def bind_tools( tool_choice = "auto" if tool_choice in ["required", "auto"]: - tool_choice_object = { + tool_choice_object: dict[str, Any] = { "type": tool_choice, } else: @@ -206,11 +289,113 @@ def bind_tools( "name": tool_choice, } - return super().bind( - tools=formatted_tools, - tool_choice=tool_choice_object, + bind_kwargs: dict[str, Any] = { + "tools": formatted_tools, + "tool_choice": tool_choice_object, **kwargs, - ) + } + if parallel_tool_calls is not None: + bind_kwargs["parallel_tool_calls"] = parallel_tool_calls + + return super().bind(**bind_kwargs) + + def with_structured_output( + self, + schema: _DictOrPydanticClass | None = None, + *, + method: Literal["function_calling", "json_mode", "json_schema"] = "function_calling", + include_raw: bool = False, + strict: bool | None = None, + **kwargs: Any, + ) -> Runnable[LanguageModelInput, _DictOrPydantic]: + """Model wrapper that returns outputs formatted to match the given schema. + + Args: + schema: The output schema as a Pydantic class, TypedDict, JSON Schema dict, + or OpenAI function schema. + method: Either "json_schema" (uses response_format) or "function_calling" + (uses tool calling to force the schema). + include_raw: If True, returns dict with 'raw', 'parsed', and 'parsing_error'. + strict: If True, model output is guaranteed to match the schema exactly. + **kwargs: Additional arguments passed to bind(). + + Returns: + A Runnable that parses the model output into the given schema. + """ + if schema is None: + raise ValueError("schema must be specified.") + + is_pydantic = isinstance(schema, type) and is_basemodel_subclass(schema) + + if method == "function_calling": + tool_name = convert_to_openai_tool(schema)["function"]["name"] + llm = self.bind_tools( + [schema], + tool_choice="any", + strict=strict, + ls_structured_output_format={ + "kwargs": {"method": "function_calling", "strict": strict}, + "schema": schema, + }, + **kwargs, + ) + if is_pydantic: + output_parser: Runnable = PydanticToolsParser( + tools=[schema], # type: ignore[list-item] + first_tool_only=True, + ) + else: + output_parser = JsonOutputKeyToolsParser(key_name=tool_name, first_tool_only=True) + elif method == "json_mode": + llm = self.bind( + response_format={"type": "json_object"}, + ls_structured_output_format={ + "kwargs": {"method": method}, + "schema": schema, + }, + **kwargs, + ) + if is_pydantic: + from langchain_core.output_parsers import PydanticOutputParser + + output_parser = PydanticOutputParser(pydantic_object=schema) # type: ignore[arg-type] + else: + output_parser = JsonOutputParser() + elif method == "json_schema": + response_format = _build_normalized_response_format(schema, strict=strict) + llm = self.bind( + response_format=response_format, + ls_structured_output_format={ + "kwargs": {"method": method, "strict": strict}, + "schema": convert_to_openai_tool(schema), + }, + **kwargs, + ) + if is_pydantic: + output_parser = RunnableLambda( + partial(_oai_structured_outputs_parser, schema=cast(type, schema)) + ).with_types(output_type=cast(type, schema)) + else: + output_parser = JsonOutputParser() + else: + raise ValueError( + f"Unrecognized method: '{method}'. " + "Expected 'function_calling', 'json_mode', or 'json_schema'." + ) + + if include_raw: + parser_assign = RunnablePassthrough.assign( + parsed=lambda x: output_parser.invoke(x["raw"]), + parsing_error=lambda _: None, + ) + parser_none = RunnablePassthrough.assign( + parsed=lambda _: None, + ) + parser_with_fallback = parser_assign.with_fallbacks( + [parser_none], exception_key="parsing_error" + ) + return RunnablePassthrough.assign(raw=llm) | parser_with_fallback # type: ignore[return-value] + return llm | output_parser # type: ignore[return-value] def _preprocess_request( self, messages: list[BaseMessage], stop: list[str] | None = None, **kwargs: Any diff --git a/src/uipath/llm_client/__init__.py b/src/uipath/llm_client/__init__.py index b437cef..a42a286 100644 --- a/src/uipath/llm_client/__init__.py +++ b/src/uipath/llm_client/__init__.py @@ -28,6 +28,7 @@ """ from uipath.llm_client.__version__ import __version__ +from uipath.llm_client.clients.normalized import UiPathNormalizedClient from uipath.llm_client.httpx_client import ( UiPathHttpxAsyncClient, UiPathHttpxClient, @@ -60,6 +61,8 @@ "get_default_client_settings", "PlatformSettings", "LLMGatewaySettings", + # Normalized client + "UiPathNormalizedClient", # HTTPX clients "UiPathHttpxClient", "UiPathHttpxAsyncClient", diff --git a/src/uipath/llm_client/__version__.py b/src/uipath/llm_client/__version__.py index c5bf7a8..cbd0256 100644 --- a/src/uipath/llm_client/__version__.py +++ b/src/uipath/llm_client/__version__.py @@ -1,3 +1,3 @@ __title__ = "UiPath LLM Client" __description__ = "A Python client for interacting with UiPath's LLM services." -__version__ = "1.6.0" +__version__ = "1.7.0" diff --git a/src/uipath/llm_client/clients/normalized/__init__.py b/src/uipath/llm_client/clients/normalized/__init__.py new file mode 100644 index 0000000..11bf16d --- /dev/null +++ b/src/uipath/llm_client/clients/normalized/__init__.py @@ -0,0 +1,46 @@ +"""UiPath Normalized Client - Provider-agnostic LLM client. + +No optional dependencies required. Works with the base uipath-llm-client package. +""" + +from uipath.llm_client.clients.normalized.client import UiPathNormalizedClient +from uipath.llm_client.clients.normalized.completions import ( + MessageType, + ResponseFormatType, + ToolChoiceType, + ToolType, +) +from uipath.llm_client.clients.normalized.types import ( + ChatCompletion, + ChatCompletionChunk, + Choice, + Delta, + EmbeddingData, + EmbeddingResponse, + Message, + StreamChoice, + ToolCall, + ToolCallChunk, + Usage, +) + +__all__ = [ + "UiPathNormalizedClient", + # Input types + "MessageType", + "ToolType", + "ToolChoiceType", + "ResponseFormatType", + # Response types + "ChatCompletion", + "ChatCompletionChunk", + "Choice", + "Delta", + "EmbeddingData", + "EmbeddingResponse", + "Message", + "StreamChoice", + "ToolCall", + "ToolCallChunk", + "Usage", +] diff --git a/src/uipath/llm_client/clients/normalized/client.py b/src/uipath/llm_client/clients/normalized/client.py new file mode 100644 index 0000000..43ddf5a --- /dev/null +++ b/src/uipath/llm_client/clients/normalized/client.py @@ -0,0 +1,184 @@ +"""UiPath Normalized Client. + +A provider-agnostic LLM client that uses UiPath's normalized API to provide +a consistent interface across all supported providers (OpenAI, Google, Anthropic, etc.). + +No optional dependencies required - works with the base uipath-llm-client package. + +Example: + >>> from uipath.llm_client.clients.normalized import UiPathNormalizedClient + >>> + >>> client = UiPathNormalizedClient(model_name="gpt-4o-2024-11-20") + >>> + >>> # Chat completion + >>> response = client.completions.create( + ... messages=[{"role": "user", "content": "Hello!"}], + ... ) + >>> print(response.choices[0].message.content) + >>> + >>> # Streaming + >>> for chunk in client.completions.stream( + ... messages=[{"role": "user", "content": "Hello!"}], + ... ): + ... print(chunk.choices[0].delta.content, end="") + >>> + >>> # Async + >>> response = await client.completions.acreate( + ... messages=[{"role": "user", "content": "Hello!"}], + ... ) + >>> + >>> # Structured output + >>> from pydantic import BaseModel + >>> class Answer(BaseModel): + ... text: str + ... confidence: float + >>> + >>> response = client.completions.create( + ... messages=[{"role": "user", "content": "What is 2+2?"}], + ... output_format=Answer, + ... ) + >>> print(response.choices[0].message.parsed) # Answer(text='4', confidence=1.0) + >>> + >>> # Embeddings + >>> response = client.embeddings.create(input=["Hello world"]) + >>> print(len(response.data[0].embedding)) +""" + +import logging +from collections.abc import Mapping, Sequence +from functools import cached_property + +from uipath.llm_client.clients.normalized.completions import Completions +from uipath.llm_client.clients.normalized.embeddings import Embeddings +from uipath.llm_client.clients.utils import build_httpx_async_client, build_httpx_client +from uipath.llm_client.httpx_client import UiPathHttpxAsyncClient, UiPathHttpxClient +from uipath.llm_client.settings import UiPathBaseSettings, get_default_client_settings +from uipath.llm_client.settings.base import UiPathAPIConfig +from uipath.llm_client.settings.constants import ApiType, RoutingMode +from uipath.llm_client.utils.retry import RetryConfig + + +class UiPathNormalizedClient: + """Provider-agnostic LLM client using UiPath's normalized API. + + Routes requests through UiPath's LLM Gateway using the normalized API, + which provides a consistent interface across all supported LLM providers. + No vendor-specific SDK dependencies are required. + + Namespaces: + - ``completions``: ``create``, ``acreate``, ``stream``, ``astream`` + - ``embeddings``: ``create``, ``acreate`` + + Args: + model_name: The model name (e.g., "gpt-4o-2024-11-20", "gemini-2.5-flash"). + byo_connection_id: Bring Your Own connection ID for custom deployments. + client_settings: UiPath client settings. Defaults to environment-based settings. + timeout: Client-side request timeout in seconds. + max_retries: Maximum retry attempts for failed requests. + default_headers: Additional headers to include in requests. + captured_headers: Response header prefixes to capture (case-insensitive). + retry_config: Custom retry configuration. + logger: Logger instance for request/response logging. + + Example: + >>> client = UiPathNormalizedClient(model_name="gpt-4o-2024-11-20") + >>> response = client.completions.create( + ... messages=[{"role": "user", "content": "Hello!"}], + ... ) + """ + + def __init__( + self, + *, + model_name: str, + byo_connection_id: str | None = None, + client_settings: UiPathBaseSettings | None = None, + timeout: float | None = None, + max_retries: int | None = None, + default_headers: Mapping[str, str] | None = None, + captured_headers: Sequence[str] = ("x-uipath-",), + retry_config: RetryConfig | None = None, + logger: logging.Logger | None = None, + ): + self._model_name = model_name + self._byo_connection_id = byo_connection_id + self._client_settings = client_settings or get_default_client_settings() + self._timeout = timeout + self._max_retries = max_retries + self._default_headers = default_headers + self._captured_headers = captured_headers + self._retry_config = retry_config + self._logger = logger + + self._completions_api_config = UiPathAPIConfig( + api_type=ApiType.COMPLETIONS, + routing_mode=RoutingMode.NORMALIZED, + freeze_base_url=True, + ) + self._embeddings_api_config = UiPathAPIConfig( + api_type=ApiType.EMBEDDINGS, + routing_mode=RoutingMode.NORMALIZED, + freeze_base_url=True, + ) + + # ------------------------------------------------------------------ + # HTTP clients (lazily created) + # ------------------------------------------------------------------ + + def _build_sync(self, api_config: UiPathAPIConfig) -> UiPathHttpxClient: + return build_httpx_client( + model_name=self._model_name, + byo_connection_id=self._byo_connection_id, + client_settings=self._client_settings, + timeout=self._timeout, + max_retries=self._max_retries, + default_headers=self._default_headers, + captured_headers=self._captured_headers, + retry_config=self._retry_config, + logger=self._logger, + api_config=api_config, + ) + + def _build_async(self, api_config: UiPathAPIConfig) -> UiPathHttpxAsyncClient: + return build_httpx_async_client( + model_name=self._model_name, + byo_connection_id=self._byo_connection_id, + client_settings=self._client_settings, + timeout=self._timeout, + max_retries=self._max_retries, + default_headers=self._default_headers, + captured_headers=self._captured_headers, + retry_config=self._retry_config, + logger=self._logger, + api_config=api_config, + ) + + @cached_property + def _sync_client(self) -> UiPathHttpxClient: + return self._build_sync(self._completions_api_config) + + @cached_property + def _async_client(self) -> UiPathHttpxAsyncClient: + return self._build_async(self._completions_api_config) + + @cached_property + def _embedding_sync_client(self) -> UiPathHttpxClient: + return self._build_sync(self._embeddings_api_config) + + @cached_property + def _embedding_async_client(self) -> UiPathHttpxAsyncClient: + return self._build_async(self._embeddings_api_config) + + # ------------------------------------------------------------------ + # Public namespaces + # ------------------------------------------------------------------ + + @cached_property + def completions(self) -> Completions: + """Chat completions namespace (``create``, ``acreate``, ``stream``, ``astream``).""" + return Completions(self) + + @cached_property + def embeddings(self) -> Embeddings: + """Embeddings namespace (``create``, ``acreate``).""" + return Embeddings(self) diff --git a/src/uipath/llm_client/clients/normalized/completions.py b/src/uipath/llm_client/clients/normalized/completions.py new file mode 100644 index 0000000..35b91a3 --- /dev/null +++ b/src/uipath/llm_client/clients/normalized/completions.py @@ -0,0 +1,727 @@ +"""Completions endpoint for the UiPath Normalized API.""" + +from __future__ import annotations + +import json +from collections.abc import AsyncGenerator, Callable, Generator, Sequence +from typing import Any, Union, get_args, get_origin, get_type_hints + +from pydantic import BaseModel + +from uipath.llm_client.clients.normalized.types import ( + ChatCompletion, + ChatCompletionChunk, + Choice, + Delta, + Message, + StreamChoice, + ToolCall, + ToolCallChunk, + Usage, +) + +try: + from typing import is_typeddict +except ImportError: + from typing_extensions import is_typeddict + +# --------------------------------------------------------------------------- +# Public input types +# --------------------------------------------------------------------------- + +ResponseFormatType = Union[type[BaseModel], type, dict[str, Any]] +"""Response format: Pydantic model, TypedDict, or raw dict (e.g. {"type": "json_object"}).""" + +ToolType = Union[dict[str, Any], type[BaseModel], Callable[..., Any]] +"""Tool definition: dict (raw schema), Pydantic model, or callable.""" + +ToolChoiceType = Union[str, dict[str, Any]] +"""Tool choice: "auto", "required", "none", a tool name, or a dict.""" + +MessageType = Union[dict[str, Any], BaseModel] +"""A single message: dict with role/content or a Pydantic model with those fields.""" + + +def _normalize_messages(messages: Sequence[MessageType]) -> list[dict[str, Any]]: + """Convert a sequence of messages (dicts or pydantic models) to dicts.""" + result: list[dict[str, Any]] = [] + for msg in messages: + if isinstance(msg, dict): + result.append(msg) + elif isinstance(msg, BaseModel): + result.append(msg.model_dump(exclude_none=True)) + else: + result.append(dict(msg)) # type: ignore[arg-type] + return result + + +# --------------------------------------------------------------------------- +# Schema helpers +# --------------------------------------------------------------------------- + + +def _json_schema_from_type(tp: type) -> dict[str, Any]: + origin = get_origin(tp) + if origin is list: + args = get_args(tp) + return {"type": "array", "items": _json_schema_from_type(args[0]) if args else {}} + if origin is dict: + return {"type": "object"} + simple = {str: "string", int: "integer", float: "number", bool: "boolean"} + return {"type": simple.get(tp, "object")} + + +def _build_response_format( + response_format: ResponseFormatType, strict: bool | None = None +) -> dict[str, Any]: + if isinstance(response_format, dict): + if "type" in response_format: + return response_format + return {"type": "json_schema", "json_schema": response_format} + + if isinstance(response_format, type) and issubclass(response_format, BaseModel): + js: dict[str, Any] = { + "name": response_format.__name__, + "schema": response_format.model_json_schema(), + } + if strict is not False: + js["strict"] = True + return {"type": "json_schema", "json_schema": js} + + if isinstance(response_format, type) and is_typeddict(response_format): + hints = get_type_hints(response_format) + properties = {name: _json_schema_from_type(tp) for name, tp in hints.items()} + js = { + "name": response_format.__name__, + "schema": { + "type": "object", + "properties": properties, + "required": list(properties.keys()), + "additionalProperties": False, + }, + } + if strict is not False: + js["strict"] = True + return {"type": "json_schema", "json_schema": js} + + if isinstance(response_format, type): + js = { + "name": response_format.__name__, + "schema": _json_schema_from_type(response_format), + } + if strict is True: + js["strict"] = True + return {"type": "json_schema", "json_schema": js} + + raise TypeError(f"Unsupported response_format type: {type(response_format)}") + + +# --------------------------------------------------------------------------- +# Tool helpers +# --------------------------------------------------------------------------- + + +def _build_tool_definition(tool: ToolType) -> dict[str, Any]: + if isinstance(tool, dict): + return tool + + if isinstance(tool, type) and issubclass(tool, BaseModel): + schema = tool.model_json_schema() + schema.pop("title", None) + return {"name": tool.__name__, "description": tool.__doc__ or "", "parameters": schema} + + if callable(tool): + import inspect + + sig = inspect.signature(tool) + hints = get_type_hints(tool) + properties = {name: _json_schema_from_type(hints.get(name, str)) for name in sig.parameters} + required = [ + name for name, p in sig.parameters.items() if p.default is inspect.Parameter.empty + ] + return { + "name": tool.__name__, + "description": tool.__doc__ or "", + "parameters": {"type": "object", "properties": properties, "required": required}, + } + + raise TypeError(f"Unsupported tool type: {type(tool)}") + + +def _resolve_tool_choice( + tool_choice: ToolChoiceType, tools: list[dict[str, Any]] +) -> dict[str, Any] | str: + if isinstance(tool_choice, dict): + return tool_choice + if tool_choice in ("auto", "required", "none"): + return tool_choice + tool_names = [t.get("name", "") for t in tools] + if tool_choice in tool_names: + return {"type": "tool", "name": tool_choice} + return "auto" + + +# --------------------------------------------------------------------------- +# Response parsing +# --------------------------------------------------------------------------- + + +def _parse_tool_call(raw: dict[str, Any]) -> ToolCall: + arguments = raw.get("arguments", {}) + if isinstance(arguments, str): + try: + arguments = json.loads(arguments) + except json.JSONDecodeError: + arguments = {} + return ToolCall(id=raw.get("id", ""), name=raw.get("name", ""), arguments=arguments) + + +def _parse_tool_call_chunk(raw: dict[str, Any]) -> ToolCallChunk: + if "function" in raw: + name = raw["function"].get("name", "") + args = raw["function"].get("arguments", "") + else: + name = raw.get("name", "") + args = raw.get("arguments", "") + if isinstance(args, dict): + args = json.dumps(args) if args else "" + return ToolCallChunk(id=raw.get("id", ""), name=name, arguments=args, index=raw.get("index", 0)) + + +def _parse_structured_output(content: str, response_format: ResponseFormatType) -> Any: + try: + parsed_json = json.loads(content) + except json.JSONDecodeError: + return None + if isinstance(response_format, type) and issubclass(response_format, BaseModel): + return response_format.model_validate(parsed_json) + return parsed_json + + +def _parse_response( + data: dict[str, Any], response_format: ResponseFormatType | None = None +) -> ChatCompletion: + usage = Usage(**data.get("usage", {})) + choices: list[Choice] = [] + for choice_data in data.get("choices", []): + msg_data = choice_data.get("message", {}) + tool_calls = [_parse_tool_call(tc) for tc in msg_data.get("tool_calls", [])] + content = msg_data.get("content", "") + parsed = ( + _parse_structured_output(content, response_format) + if response_format and content + else None + ) + message = Message( + role=msg_data.get("role", "assistant"), + content=content, + tool_calls=tool_calls, + signature=msg_data.get("signature"), + thinking=msg_data.get("thinking"), + parsed=parsed, + ) + choices.append( + Choice( + index=choice_data.get("index", 0), + message=message, + finish_reason=choice_data.get("finish_reason"), + avg_logprobs=choice_data.get("avg_logprobs"), + ) + ) + return ChatCompletion( + id=data.get("id", ""), + object=data.get("object", ""), + created=data.get("created", 0), + model=data.get("model", ""), + choices=choices, + usage=usage, + ) + + +def _parse_stream_chunk(data: dict[str, Any]) -> ChatCompletionChunk: + usage = Usage(**data["usage"]) if data.get("usage") else None + choices: list[StreamChoice] = [] + for choice_data in data.get("choices", []): + delta_data = choice_data.get("delta", choice_data.get("message", {})) + tool_calls = [_parse_tool_call_chunk(tc) for tc in delta_data.get("tool_calls", [])] + delta = Delta( + role=delta_data.get("role"), + content=delta_data.get("content", ""), + tool_calls=tool_calls, + ) + choices.append( + StreamChoice( + index=choice_data.get("index", 0), + delta=delta, + finish_reason=choice_data.get("finish_reason"), + avg_logprobs=choice_data.get("avg_logprobs"), + ) + ) + return ChatCompletionChunk( + id=data.get("id", ""), + object=data.get("object", ""), + created=data.get("created", 0), + model=data.get("model", ""), + choices=choices, + usage=usage, + ) + + +# --------------------------------------------------------------------------- +# Request building +# --------------------------------------------------------------------------- + + +def _build_request( + *, + messages: Sequence[MessageType], + stream: bool = False, + tools: Sequence[ToolType] | None = None, + tool_choice: ToolChoiceType | None = None, + response_format: ResponseFormatType | None = None, + strict: bool | None = None, + # Common + max_tokens: int | None = None, + temperature: float | None = None, + top_p: float | None = None, + top_k: int | None = None, + stop: list[str] | str | None = None, + n: int | None = None, + frequency_penalty: float | None = None, + presence_penalty: float | None = None, + seed: int | None = None, + # OpenAI + logit_bias: dict[str, int] | None = None, + logprobs: bool | None = None, + top_logprobs: int | None = None, + parallel_tool_calls: bool | None = None, + # OpenAI reasoning (o1/o3/gpt-5) + reasoning_effort: str | None = None, + reasoning: dict[str, Any] | None = None, + # Anthropic + thinking: dict[str, Any] | None = None, + # Google + thinking_level: str | None = None, + thinking_budget: int | None = None, + include_thoughts: bool | None = None, + safety_settings: list[dict[str, Any]] | None = None, + # Shared + verbosity: str | None = None, + # Aliases (resolve to canonical names above) + stop_sequences: list[str] | None = None, + max_output_tokens: int | None = None, + max_completion_tokens: int | None = None, + candidate_count: int | None = None, + **kwargs: Any, +) -> dict[str, Any]: + """Build the request body for a chat completion.""" + # Resolve aliases + max_tokens = max_tokens or max_output_tokens or max_completion_tokens + stop = stop or stop_sequences + n = n or candidate_count + + body: dict[str, Any] = {"messages": _normalize_messages(messages)} + + if stream: + body["stream"] = True + + optional: dict[str, Any] = { + "max_tokens": max_tokens, + "temperature": temperature, + "top_p": top_p, + "top_k": top_k, + "stop": stop, + "n": n, + "frequency_penalty": frequency_penalty, + "presence_penalty": presence_penalty, + "seed": seed, + "logit_bias": logit_bias, + "logprobs": logprobs, + "top_logprobs": top_logprobs, + "parallel_tool_calls": parallel_tool_calls, + "reasoning_effort": reasoning_effort, + "reasoning": reasoning, + "thinking": thinking, + "thinking_level": thinking_level, + "thinking_budget": thinking_budget, + "include_thoughts": include_thoughts, + "safety_settings": safety_settings, + "verbosity": verbosity, + } + body.update({k: v for k, v in optional.items() if v is not None}) + + if tools is not None: + body["tools"] = [_build_tool_definition(t) for t in tools] + if tool_choice is not None: + body["tool_choice"] = _resolve_tool_choice(tool_choice, body["tools"]) + + if response_format is not None: + body["response_format"] = _build_response_format(response_format, strict=strict) + + body.update(kwargs) + return body + + +# --------------------------------------------------------------------------- +# SSE helpers +# --------------------------------------------------------------------------- + + +def _iter_sse(lines: Generator[str, None, None]) -> Generator[dict[str, Any], None, None]: + for line in lines: + line = line.strip() + if line.startswith("data:"): + line = line[len("data:") :].strip() + try: + data = json.loads(line) + except json.JSONDecodeError: + continue + if "id" in data and not data["id"]: + continue + yield data + + +async def _aiter_sse(lines: AsyncGenerator[str, None]) -> AsyncGenerator[dict[str, Any], None]: + async for line in lines: + line = line.strip() + if line.startswith("data:"): + line = line[len("data:") :].strip() + try: + data = json.loads(line) + except json.JSONDecodeError: + continue + if "id" in data and not data["id"]: + continue + yield data + + +# --------------------------------------------------------------------------- +# Completions namespace +# --------------------------------------------------------------------------- + + +class Completions: + """``client.completions`` — create, acreate, stream, astream.""" + + def __init__(self, client: Any) -> None: + self._client = client + + def create( + self, + *, + messages: Sequence[MessageType], + tools: Sequence[ToolType] | None = None, + tool_choice: ToolChoiceType | None = None, + response_format: ResponseFormatType | None = None, + strict: bool | None = None, + # Common + max_tokens: int | None = None, + temperature: float | None = None, + top_p: float | None = None, + top_k: int | None = None, + stop: list[str] | str | None = None, + n: int | None = None, + frequency_penalty: float | None = None, + presence_penalty: float | None = None, + seed: int | None = None, + # OpenAI + logit_bias: dict[str, int] | None = None, + logprobs: bool | None = None, + top_logprobs: int | None = None, + parallel_tool_calls: bool | None = None, + reasoning_effort: str | None = None, + reasoning: dict[str, Any] | None = None, + # Anthropic + thinking: dict[str, Any] | None = None, + # Google + thinking_level: str | None = None, + thinking_budget: int | None = None, + include_thoughts: bool | None = None, + safety_settings: list[dict[str, Any]] | None = None, + # Shared + verbosity: str | None = None, + # Aliases + stop_sequences: list[str] | None = None, + max_output_tokens: int | None = None, + max_completion_tokens: int | None = None, + candidate_count: int | None = None, + **kwargs: Any, + ) -> ChatCompletion: + """Create a chat completion (sync).""" + body = _build_request( + messages=messages, + tools=tools, + tool_choice=tool_choice, + response_format=response_format, + strict=strict, + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + top_k=top_k, + stop=stop, + n=n, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + seed=seed, + logit_bias=logit_bias, + logprobs=logprobs, + top_logprobs=top_logprobs, + parallel_tool_calls=parallel_tool_calls, + reasoning_effort=reasoning_effort, + reasoning=reasoning, + thinking=thinking, + thinking_level=thinking_level, + thinking_budget=thinking_budget, + include_thoughts=include_thoughts, + safety_settings=safety_settings, + verbosity=verbosity, + stop_sequences=stop_sequences, + max_output_tokens=max_output_tokens, + max_completion_tokens=max_completion_tokens, + candidate_count=candidate_count, + **kwargs, + ) + response = self._client._sync_client.request("POST", "/", json=body) + response.raise_for_status() + return _parse_response(response.json(), response_format=response_format) + + async def acreate( + self, + *, + messages: Sequence[MessageType], + tools: Sequence[ToolType] | None = None, + tool_choice: ToolChoiceType | None = None, + response_format: ResponseFormatType | None = None, + strict: bool | None = None, + # Common + max_tokens: int | None = None, + temperature: float | None = None, + top_p: float | None = None, + top_k: int | None = None, + stop: list[str] | str | None = None, + n: int | None = None, + frequency_penalty: float | None = None, + presence_penalty: float | None = None, + seed: int | None = None, + # OpenAI + logit_bias: dict[str, int] | None = None, + logprobs: bool | None = None, + top_logprobs: int | None = None, + parallel_tool_calls: bool | None = None, + reasoning_effort: str | None = None, + reasoning: dict[str, Any] | None = None, + # Anthropic + thinking: dict[str, Any] | None = None, + # Google + thinking_level: str | None = None, + thinking_budget: int | None = None, + include_thoughts: bool | None = None, + safety_settings: list[dict[str, Any]] | None = None, + # Shared + verbosity: str | None = None, + # Aliases + stop_sequences: list[str] | None = None, + max_output_tokens: int | None = None, + max_completion_tokens: int | None = None, + candidate_count: int | None = None, + **kwargs: Any, + ) -> ChatCompletion: + """Create a chat completion (async).""" + body = _build_request( + messages=messages, + tools=tools, + tool_choice=tool_choice, + response_format=response_format, + strict=strict, + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + top_k=top_k, + stop=stop, + n=n, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + seed=seed, + logit_bias=logit_bias, + logprobs=logprobs, + top_logprobs=top_logprobs, + parallel_tool_calls=parallel_tool_calls, + reasoning_effort=reasoning_effort, + reasoning=reasoning, + thinking=thinking, + thinking_level=thinking_level, + thinking_budget=thinking_budget, + include_thoughts=include_thoughts, + safety_settings=safety_settings, + verbosity=verbosity, + stop_sequences=stop_sequences, + max_output_tokens=max_output_tokens, + max_completion_tokens=max_completion_tokens, + candidate_count=candidate_count, + **kwargs, + ) + response = await self._client._async_client.request("POST", "/", json=body) + response.raise_for_status() + return _parse_response(response.json(), response_format=response_format) + + def stream( + self, + *, + messages: Sequence[MessageType], + tools: Sequence[ToolType] | None = None, + tool_choice: ToolChoiceType | None = None, + response_format: ResponseFormatType | None = None, + strict: bool | None = None, + # Common + max_tokens: int | None = None, + temperature: float | None = None, + top_p: float | None = None, + top_k: int | None = None, + stop: list[str] | str | None = None, + n: int | None = None, + frequency_penalty: float | None = None, + presence_penalty: float | None = None, + seed: int | None = None, + # OpenAI + logit_bias: dict[str, int] | None = None, + logprobs: bool | None = None, + top_logprobs: int | None = None, + parallel_tool_calls: bool | None = None, + reasoning_effort: str | None = None, + reasoning: dict[str, Any] | None = None, + # Anthropic + thinking: dict[str, Any] | None = None, + # Google + thinking_level: str | None = None, + thinking_budget: int | None = None, + include_thoughts: bool | None = None, + safety_settings: list[dict[str, Any]] | None = None, + # Shared + verbosity: str | None = None, + # Aliases + stop_sequences: list[str] | None = None, + max_output_tokens: int | None = None, + max_completion_tokens: int | None = None, + candidate_count: int | None = None, + **kwargs: Any, + ) -> Generator[ChatCompletionChunk, None, None]: + """Stream chat completion chunks (sync).""" + body = _build_request( + messages=messages, + stream=True, + tools=tools, + tool_choice=tool_choice, + response_format=response_format, + strict=strict, + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + top_k=top_k, + stop=stop, + n=n, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + seed=seed, + logit_bias=logit_bias, + logprobs=logprobs, + top_logprobs=top_logprobs, + parallel_tool_calls=parallel_tool_calls, + reasoning_effort=reasoning_effort, + reasoning=reasoning, + thinking=thinking, + thinking_level=thinking_level, + thinking_budget=thinking_budget, + include_thoughts=include_thoughts, + safety_settings=safety_settings, + verbosity=verbosity, + stop_sequences=stop_sequences, + max_output_tokens=max_output_tokens, + max_completion_tokens=max_completion_tokens, + candidate_count=candidate_count, + **kwargs, + ) + with self._client._sync_client.stream("POST", "/", json=body) as response: + response.raise_for_status() + for data in _iter_sse(response.iter_lines()): + yield _parse_stream_chunk(data) + + async def astream( + self, + *, + messages: Sequence[MessageType], + tools: Sequence[ToolType] | None = None, + tool_choice: ToolChoiceType | None = None, + response_format: ResponseFormatType | None = None, + strict: bool | None = None, + # Common + max_tokens: int | None = None, + temperature: float | None = None, + top_p: float | None = None, + top_k: int | None = None, + stop: list[str] | str | None = None, + n: int | None = None, + frequency_penalty: float | None = None, + presence_penalty: float | None = None, + seed: int | None = None, + # OpenAI + logit_bias: dict[str, int] | None = None, + logprobs: bool | None = None, + top_logprobs: int | None = None, + parallel_tool_calls: bool | None = None, + reasoning_effort: str | None = None, + reasoning: dict[str, Any] | None = None, + # Anthropic + thinking: dict[str, Any] | None = None, + # Google + thinking_level: str | None = None, + thinking_budget: int | None = None, + include_thoughts: bool | None = None, + safety_settings: list[dict[str, Any]] | None = None, + # Shared + verbosity: str | None = None, + # Aliases + stop_sequences: list[str] | None = None, + max_output_tokens: int | None = None, + max_completion_tokens: int | None = None, + candidate_count: int | None = None, + **kwargs: Any, + ) -> AsyncGenerator[ChatCompletionChunk, None]: + """Stream chat completion chunks (async).""" + body = _build_request( + messages=messages, + stream=True, + tools=tools, + tool_choice=tool_choice, + response_format=response_format, + strict=strict, + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + top_k=top_k, + stop=stop, + n=n, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + seed=seed, + logit_bias=logit_bias, + logprobs=logprobs, + top_logprobs=top_logprobs, + parallel_tool_calls=parallel_tool_calls, + reasoning_effort=reasoning_effort, + reasoning=reasoning, + thinking=thinking, + thinking_level=thinking_level, + thinking_budget=thinking_budget, + include_thoughts=include_thoughts, + safety_settings=safety_settings, + verbosity=verbosity, + stop_sequences=stop_sequences, + max_output_tokens=max_output_tokens, + max_completion_tokens=max_completion_tokens, + candidate_count=candidate_count, + **kwargs, + ) + async with self._client._async_client.stream("POST", "/", json=body) as response: + response.raise_for_status() + async for data in _aiter_sse(response.aiter_lines()): + yield _parse_stream_chunk(data) diff --git a/src/uipath/llm_client/clients/normalized/embeddings.py b/src/uipath/llm_client/clients/normalized/embeddings.py new file mode 100644 index 0000000..9caf92c --- /dev/null +++ b/src/uipath/llm_client/clients/normalized/embeddings.py @@ -0,0 +1,94 @@ +"""Embeddings endpoint for the UiPath Normalized API. + +Provides synchronous and asynchronous methods for generating text embeddings. +""" + +from __future__ import annotations + +from typing import Any + +from uipath.llm_client.clients.normalized.types import ( + EmbeddingData, + EmbeddingResponse, + Usage, +) + + +def _parse_embedding_response(data: dict[str, Any]) -> EmbeddingResponse: + """Parse an embedding response from the API.""" + usage_data = data.get("usage", {}) + embeddings = [ + EmbeddingData( + embedding=item.get("embedding", []), + index=item.get("index", i), + ) + for i, item in enumerate(data.get("data", [])) + ] + return EmbeddingResponse( + data=embeddings, + model=data.get("model", ""), + usage=Usage(**usage_data), + ) + + +class Embeddings: + """Embeddings namespace with ``create`` and ``acreate``. + + Handles request building and response parsing for the UiPath normalized + embeddings API. + + Example: + >>> response = client.embeddings.create(input=["Hello world"]) + >>> print(response.data[0].embedding[:5]) + >>> + >>> response = await client.embeddings.acreate(input=["Hello world"]) + """ + + def __init__(self, client: Any) -> None: + self._client = client + + def create( + self, + *, + input: str | list[str], + **kwargs: Any, + ) -> EmbeddingResponse: + """Create embeddings (sync). + + Args: + input: A string or list of strings to embed. + **kwargs: Additional parameters for the API. + + Returns: + EmbeddingResponse with embedding vectors. + """ + if isinstance(input, str): + input = [input] + + body: dict[str, Any] = {"input": input, **kwargs} + response = self._client._embedding_sync_client.request("POST", "/", json=body) + response.raise_for_status() + return _parse_embedding_response(response.json()) + + async def acreate( + self, + *, + input: str | list[str], + **kwargs: Any, + ) -> EmbeddingResponse: + """Create embeddings (async). + + Args: + input: A string or list of strings to embed. + **kwargs: Additional parameters for the API. + + Returns: + EmbeddingResponse with embedding vectors. + """ + if isinstance(input, str): + input = [input] + + body: dict[str, Any] = {"input": input, **kwargs} + response = await self._client._embedding_async_client.request("POST", "/", json=body) + response.raise_for_status() + return _parse_embedding_response(response.json()) diff --git a/src/uipath/llm_client/clients/normalized/types.py b/src/uipath/llm_client/clients/normalized/types.py new file mode 100644 index 0000000..aeaa372 --- /dev/null +++ b/src/uipath/llm_client/clients/normalized/types.py @@ -0,0 +1,87 @@ +"""Response types for the UiPath Normalized API.""" + +from typing import Any + +from pydantic import BaseModel, Field + + +class ToolCall(BaseModel): + id: str = "" + name: str = "" + arguments: dict[str, Any] = Field(default_factory=dict) + + +class ToolCallChunk(BaseModel): + id: str = "" + name: str = "" + arguments: str = "" + index: int = 0 + + +class Usage(BaseModel): + prompt_tokens: int = 0 + completion_tokens: int = 0 + total_tokens: int = 0 + cache_read_input_tokens: int = 0 + cache_creation_input_tokens: int = 0 + thoughts_tokens: int = 0 + request_processing_tier: str | None = None + + +class Message(BaseModel): + role: str = "assistant" + content: str | None = "" + tool_calls: list[ToolCall] = Field(default_factory=list) + signature: str | None = None + thinking: str | None = None + # Structured output (populated client-side when output_format is used) + parsed: Any = None + + +class Delta(BaseModel): + role: str | None = None + content: str | None = "" + tool_calls: list[ToolCallChunk] = Field(default_factory=list) + + +class Choice(BaseModel): + index: int = 0 + message: Message = Field(default_factory=Message) + finish_reason: str | None = None + avg_logprobs: float | None = None + + +class StreamChoice(BaseModel): + index: int = 0 + delta: Delta = Field(default_factory=Delta) + finish_reason: str | None = None + avg_logprobs: float | None = None + + +class ChatCompletion(BaseModel): + id: str = "" + object: str = "" + created: int = 0 + model: str = "" + choices: list[Choice] = Field(default_factory=list) + usage: Usage = Field(default_factory=Usage) + + +class ChatCompletionChunk(BaseModel): + id: str = "" + object: str = "" + created: int | str = 0 + model: str = "" + choices: list[StreamChoice] = Field(default_factory=list) + usage: Usage | None = None + + +class EmbeddingData(BaseModel): + embedding: list[float] = Field(default_factory=list) + index: int = 0 + + +class EmbeddingResponse(BaseModel): + data: list[EmbeddingData] = Field(default_factory=list) + model: str = "" + usage: Usage = Field(default_factory=Usage) diff --git a/tests/cassettes.db b/tests/cassettes.db index f98a4a6..7caa33b 100644 Binary files a/tests/cassettes.db and b/tests/cassettes.db differ diff --git a/tests/core/core_smoke_test.py b/tests/core/core_smoke_test.py index deb72dd..17b44db 100644 --- a/tests/core/core_smoke_test.py +++ b/tests/core/core_smoke_test.py @@ -20,6 +20,7 @@ def test_main_package_imports(): RetryConfig, UiPathHttpxAsyncClient, UiPathHttpxClient, + UiPathNormalizedClient, __version__, get_default_client_settings, ) @@ -47,6 +48,10 @@ def test_main_package_imports(): assert RetryConfig is not None, "RetryConfig should be importable" print(" RetryConfig is importable") + # Verify normalized client is a type + assert isinstance(UiPathNormalizedClient, type), "UiPathNormalizedClient should be a class" + print(" UiPathNormalizedClient is importable") + print(" Main package imports OK") @@ -414,6 +419,47 @@ def test_google_client_inheritance(): print(" Google client inheritance OK") +def test_normalized_client_imports(): + """Test that normalized client and its types can be imported.""" + print("Testing normalized client imports...") + + from uipath.llm_client.clients.normalized import ( + ChatCompletion, + ChatCompletionChunk, + Choice, + Delta, + EmbeddingData, + EmbeddingResponse, + Message, + StreamChoice, + ToolCall, + ToolCallChunk, + UiPathNormalizedClient, + Usage, + ) + + # Verify all are types + types = [ + UiPathNormalizedClient, + ChatCompletion, + ChatCompletionChunk, + Choice, + Delta, + EmbeddingData, + EmbeddingResponse, + Message, + StreamChoice, + ToolCall, + ToolCallChunk, + Usage, + ] + for t in types: + assert isinstance(t, type), f"{t.__name__} should be a class" + + print(f" All {len(types)} normalized client types are importable") + print(" Normalized client imports OK") + + def test_uipath_api_config(): """Test UiPathAPIConfig can be instantiated with valid configurations.""" print("Testing UiPathAPIConfig instantiation...") @@ -463,6 +509,7 @@ def main(): test_httpx_client_module_imports, test_exceptions_module_imports, test_retry_module_imports, + test_normalized_client_imports, test_openai_client_imports, test_anthropic_client_imports, test_google_client_imports, diff --git a/tests/core/test_normalized_client.py b/tests/core/test_normalized_client.py new file mode 100644 index 0000000..397db1d --- /dev/null +++ b/tests/core/test_normalized_client.py @@ -0,0 +1,1386 @@ +"""Tests for the normalized client module. + +This module tests: +1. UiPathNormalizedClient initialization and client creation +2. Completions.create (sync, non-streaming) +3. Completions.stream (sync, streaming) +4. Completions.acreate (async, non-streaming) +5. Tool calling (tool definition building, tool_choice resolution) +6. Structured output (Pydantic, TypedDict, dict schemas) +7. Embeddings.create and Embeddings.acreate +8. Response type parsing (ChatCompletion, ChatCompletionChunk, EmbeddingResponse) +""" + +import json +from typing import TypedDict +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from pydantic import BaseModel + +from uipath.llm_client.clients.normalized import ( + ChatCompletion, + ChatCompletionChunk, + Choice, + Delta, + EmbeddingData, + EmbeddingResponse, + Message, + ToolCall, + ToolCallChunk, + UiPathNormalizedClient, + Usage, +) +from uipath.llm_client.clients.normalized.completions import ( + Completions, + _build_request, + _build_response_format, + _build_tool_definition, + _parse_response, + _parse_stream_chunk, + _parse_structured_output, + _parse_tool_call, + _parse_tool_call_chunk, + _resolve_tool_choice, +) +from uipath.llm_client.clients.normalized.embeddings import _parse_embedding_response +from uipath.llm_client.settings.utils import SingletonMeta + +# ============================================================================ +# Fixtures +# ============================================================================ + +_CLIENT_MODULE = "uipath.llm_client.clients.normalized.client" + + +@pytest.fixture(autouse=True) +def clear_singleton_instances(): + """Clear singleton instances before each test to ensure isolation.""" + SingletonMeta._instances.clear() + yield + SingletonMeta._instances.clear() + + +@pytest.fixture +def mock_settings(): + settings = MagicMock() + settings.build_base_url.return_value = "https://gateway.uipath.com/llm/v1" + settings.build_auth_headers.return_value = {"Authorization": "Bearer test-token"} + settings.build_auth_pipeline.return_value = None + return settings + + +@pytest.fixture +def mock_sync_client(): + client = MagicMock() + return client + + +@pytest.fixture +def mock_async_client(): + client = AsyncMock() + return client + + +# ============================================================================ +# Response parsing helpers +# ============================================================================ + +SAMPLE_COMPLETION_RESPONSE = { + "id": "chatcmpl-123", + "created": 1234567890, + "model": "gpt-4o", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello! How can I help you?", + }, + "finish_reason": "stop", + } + ], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 8, + "total_tokens": 18, + }, +} + +SAMPLE_TOOL_CALL_RESPONSE = { + "id": "chatcmpl-456", + "created": 1234567890, + "model": "gpt-4o", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_abc123", + "name": "get_weather", + "arguments": {"city": "London"}, + } + ], + }, + "finish_reason": "tool_calls", + } + ], + "usage": { + "prompt_tokens": 15, + "completion_tokens": 20, + "total_tokens": 35, + }, +} + +SAMPLE_STREAM_CHUNKS = [ + { + "id": "chatcmpl-789", + "created": 1234567890, + "model": "gpt-4o", + "choices": [ + { + "index": 0, + "delta": {"role": "assistant", "content": "Hello"}, + "finish_reason": None, + } + ], + }, + { + "id": "chatcmpl-789", + "created": 1234567890, + "model": "gpt-4o", + "choices": [ + { + "index": 0, + "delta": {"content": " world!"}, + "finish_reason": None, + } + ], + }, + { + "id": "chatcmpl-789", + "created": 1234567890, + "model": "gpt-4o", + "choices": [ + { + "index": 0, + "delta": {}, + "finish_reason": "stop", + } + ], + "usage": { + "prompt_tokens": 5, + "completion_tokens": 3, + "total_tokens": 8, + }, + }, +] + +SAMPLE_EMBEDDING_RESPONSE = { + "data": [ + {"embedding": [0.1, 0.2, 0.3], "index": 0}, + {"embedding": [0.4, 0.5, 0.6], "index": 1}, + ], + "model": "text-embedding-ada-002", + "usage": {"prompt_tokens": 5, "total_tokens": 5}, +} + + +# ============================================================================ +# Test: Response type parsing +# ============================================================================ + + +class TestParseResponse: + def test_basic_completion(self): + result = _parse_response(SAMPLE_COMPLETION_RESPONSE) + assert isinstance(result, ChatCompletion) + assert result.id == "chatcmpl-123" + assert result.model == "gpt-4o" + assert len(result.choices) == 1 + assert result.choices[0].message.content == "Hello! How can I help you?" + assert result.choices[0].finish_reason == "stop" + assert result.usage.prompt_tokens == 10 + assert result.usage.completion_tokens == 8 + assert result.usage.total_tokens == 18 + + def test_tool_call_response(self): + result = _parse_response(SAMPLE_TOOL_CALL_RESPONSE) + assert len(result.choices[0].message.tool_calls) == 1 + tc = result.choices[0].message.tool_calls[0] + assert tc.id == "call_abc123" + assert tc.name == "get_weather" + assert tc.arguments == {"city": "London"} + + def test_empty_response(self): + result = _parse_response({"choices": [], "usage": {}}) + assert len(result.choices) == 0 + assert result.usage.prompt_tokens == 0 + + def test_tool_call_with_string_arguments(self): + data = { + "choices": [ + { + "message": { + "tool_calls": [ + { + "id": "call_1", + "name": "func", + "arguments": '{"key": "value"}', + } + ] + } + } + ], + "usage": {}, + } + result = _parse_response(data) + tc = result.choices[0].message.tool_calls[0] + assert tc.arguments == {"key": "value"} + + def test_tool_call_with_invalid_json_arguments(self): + data = { + "choices": [ + { + "message": { + "tool_calls": [ + { + "id": "call_1", + "name": "func", + "arguments": "not json", + } + ] + } + } + ], + "usage": {}, + } + result = _parse_response(data) + tc = result.choices[0].message.tool_calls[0] + assert tc.arguments == {} + + +class TestParseStreamChunk: + def test_content_chunk(self): + result = _parse_stream_chunk(SAMPLE_STREAM_CHUNKS[0]) + assert isinstance(result, ChatCompletionChunk) + assert result.id == "chatcmpl-789" + assert len(result.choices) == 1 + assert result.choices[0].delta.content == "Hello" + assert result.choices[0].delta.role == "assistant" + + def test_chunk_with_usage(self): + result = _parse_stream_chunk(SAMPLE_STREAM_CHUNKS[2]) + assert result.usage is not None + assert result.usage.prompt_tokens == 5 + assert result.choices[0].finish_reason == "stop" + + def test_chunk_without_usage(self): + result = _parse_stream_chunk(SAMPLE_STREAM_CHUNKS[0]) + assert result.usage is None + + def test_stream_tool_call_chunk(self): + data = { + "id": "chatcmpl-tc", + "choices": [ + { + "delta": { + "tool_calls": [ + { + "id": "call_1", + "name": "get_weather", + "arguments": '{"city":', + "index": 0, + } + ] + } + } + ], + } + result = _parse_stream_chunk(data) + assert len(result.choices[0].delta.tool_calls) == 1 + tc = result.choices[0].delta.tool_calls[0] + assert tc.name == "get_weather" + assert tc.arguments == '{"city":' + + def test_stream_tool_call_with_function_format(self): + data = { + "id": "chatcmpl-tc", + "choices": [ + { + "delta": { + "tool_calls": [ + { + "id": "call_1", + "function": { + "name": "get_weather", + "arguments": '{"city": "Paris"}', + }, + "index": 0, + } + ] + } + } + ], + } + result = _parse_stream_chunk(data) + tc = result.choices[0].delta.tool_calls[0] + assert tc.name == "get_weather" + assert tc.arguments == '{"city": "Paris"}' + + +class TestParseEmbeddingResponse: + def test_basic_embedding(self): + result = _parse_embedding_response(SAMPLE_EMBEDDING_RESPONSE) + assert isinstance(result, EmbeddingResponse) + assert len(result.data) == 2 + assert result.data[0].embedding == [0.1, 0.2, 0.3] + assert result.data[1].embedding == [0.4, 0.5, 0.6] + assert result.model == "text-embedding-ada-002" + assert result.usage.prompt_tokens == 5 + + def test_empty_embedding(self): + result = _parse_embedding_response({"data": [], "usage": {}}) + assert len(result.data) == 0 + + +# ============================================================================ +# Test: Structured output +# ============================================================================ + + +class TestBuildResponseFormat: + def test_pydantic_model(self): + class MyModel(BaseModel): + name: str + age: int + + result = _build_response_format(MyModel) + assert result["type"] == "json_schema" + assert result["json_schema"]["name"] == "MyModel" + assert result["json_schema"]["strict"] is True + assert "properties" in result["json_schema"]["schema"] + + def test_typed_dict(self): + class MyDict(TypedDict): + name: str + score: float + + result = _build_response_format(MyDict) + assert result["type"] == "json_schema" + assert result["json_schema"]["name"] == "MyDict" + assert result["json_schema"]["strict"] is True + schema = result["json_schema"]["schema"] + assert schema["type"] == "object" + assert "name" in schema["properties"] + assert "score" in schema["properties"] + assert schema["properties"]["name"]["type"] == "string" + assert schema["properties"]["score"]["type"] == "number" + + def test_dict_schema(self): + schema = { + "name": "my_schema", + "schema": {"type": "object", "properties": {"x": {"type": "integer"}}}, + } + result = _build_response_format(schema) + assert result["type"] == "json_schema" + assert result["json_schema"] == schema + + def test_unsupported_type(self): + with pytest.raises(TypeError, match="Unsupported response_format"): + _build_response_format("not a type") # type: ignore[arg-type] + + +class TestParseStructuredOutput: + def test_parse_pydantic(self): + class Answer(BaseModel): + text: str + score: float + + content = '{"text": "hello", "score": 0.9}' + result = _parse_structured_output(content, Answer) + assert isinstance(result, Answer) + assert result.text == "hello" + assert result.score == 0.9 + + def test_parse_dict(self): + content = '{"key": "value"}' + result = _parse_structured_output(content, {"type": "object"}) + assert result == {"key": "value"} + + def test_parse_invalid_json(self): + result = _parse_structured_output("not json", str) + assert result is None + + def test_response_with_structured_output(self): + class Answer(BaseModel): + text: str + + data = { + "choices": [ + { + "message": { + "content": '{"text": "hello"}', + } + } + ], + "usage": {}, + } + result = _parse_response(data, response_format=Answer) + assert result.choices[0].message.parsed is not None + assert isinstance(result.choices[0].message.parsed, Answer) + assert result.choices[0].message.parsed.text == "hello" + + def test_response_without_structured_output(self): + data = { + "choices": [ + { + "message": { + "content": "plain text", + } + } + ], + "usage": {}, + } + result = _parse_response(data) + assert result.choices[0].message.parsed is None + + +# ============================================================================ +# Test: Tool definition building +# ============================================================================ + + +class TestBuildToolDefinition: + def test_dict_passthrough(self): + tool = {"name": "my_tool", "description": "does stuff", "parameters": {}} + result = _build_tool_definition(tool) + assert result is tool + + def test_pydantic_model(self): + class WeatherInput(BaseModel): + """Get weather for a city.""" + + city: str + units: str = "celsius" + + result = _build_tool_definition(WeatherInput) + assert result["name"] == "WeatherInput" + assert result["description"] == "Get weather for a city." + assert "properties" in result["parameters"] + assert "city" in result["parameters"]["properties"] + + def test_callable(self): + def get_weather(city: str, units: str = "celsius") -> str: + """Get weather for a city.""" + return f"Weather in {city}" + + result = _build_tool_definition(get_weather) + assert result["name"] == "get_weather" + assert result["description"] == "Get weather for a city." + assert "city" in result["parameters"]["properties"] + assert "city" in result["parameters"]["required"] + assert "units" not in result["parameters"]["required"] + + def test_unsupported_type(self): + with pytest.raises(TypeError, match="Unsupported tool type"): + _build_tool_definition(42) # type: ignore[arg-type] + + +class TestToolChoiceResolution: + def test_auto(self): + result = _resolve_tool_choice("auto", []) + assert result == "auto" + + def test_required(self): + result = _resolve_tool_choice("required", []) + assert result == "required" + + def test_none(self): + result = _resolve_tool_choice("none", []) + assert result == "none" + + def test_specific_tool(self): + tools = [{"name": "get_weather"}, {"name": "search"}] + result = _resolve_tool_choice("get_weather", tools) + assert result == {"type": "tool", "name": "get_weather"} + + def test_unknown_becomes_auto(self): + result = _resolve_tool_choice("unknown_tool", [{"name": "other"}]) + assert result == "auto" + + def test_dict_passthrough(self): + choice = {"type": "required"} + result = _resolve_tool_choice(choice, []) + assert result is choice + + +# ============================================================================ +# Test: Tool call parsing +# ============================================================================ + + +class TestParseToolCall: + def test_basic(self): + tc = _parse_tool_call({"id": "call_1", "name": "func", "arguments": {"x": 1}}) + assert tc.id == "call_1" + assert tc.name == "func" + assert tc.arguments == {"x": 1} + + def test_string_arguments(self): + tc = _parse_tool_call({"id": "call_1", "name": "func", "arguments": '{"x": 1}'}) + assert tc.arguments == {"x": 1} + + def test_invalid_string_arguments(self): + tc = _parse_tool_call({"id": "call_1", "name": "func", "arguments": "not json"}) + assert tc.arguments == {} + + +class TestParseToolCallChunk: + def test_flat_format(self): + tc = _parse_tool_call_chunk( + {"id": "call_1", "name": "func", "arguments": '{"x":', "index": 0} + ) + assert tc.name == "func" + assert tc.arguments == '{"x":' + + def test_function_format(self): + tc = _parse_tool_call_chunk( + { + "id": "call_1", + "function": {"name": "func", "arguments": '{"x": 1}'}, + "index": 0, + } + ) + assert tc.name == "func" + assert tc.arguments == '{"x": 1}' + + def test_dict_arguments_converted(self): + tc = _parse_tool_call_chunk( + {"id": "call_1", "name": "func", "arguments": {"x": 1}, "index": 0} + ) + assert tc.arguments == '{"x": 1}' + + +# ============================================================================ +# Test: Client initialization +# ============================================================================ + + +class TestUiPathNormalizedClientInit: + @patch(f"{_CLIENT_MODULE}.build_httpx_client") + @patch(f"{_CLIENT_MODULE}.get_default_client_settings") + def test_default_settings(self, mock_get_settings, mock_build): + mock_settings = MagicMock() + mock_settings.build_auth_pipeline.return_value = None + mock_get_settings.return_value = mock_settings + + client = UiPathNormalizedClient(model_name="gpt-4o") + assert client._model_name == "gpt-4o" + mock_get_settings.assert_called_once() + + @patch(f"{_CLIENT_MODULE}.build_httpx_client") + def test_custom_settings(self, mock_build): + settings = MagicMock() + settings.build_auth_pipeline.return_value = None + + client = UiPathNormalizedClient(model_name="gpt-4o", client_settings=settings) + assert client._client_settings is settings + + @patch(f"{_CLIENT_MODULE}.build_httpx_client") + @patch(f"{_CLIENT_MODULE}.get_default_client_settings") + def test_has_completions_namespace(self, mock_get_settings, mock_build): + mock_settings = MagicMock() + mock_settings.build_auth_pipeline.return_value = None + mock_get_settings.return_value = mock_settings + mock_build.return_value = MagicMock() + + client = UiPathNormalizedClient(model_name="gpt-4o") + assert hasattr(client, "completions") + assert isinstance(client.completions, Completions) + + @patch(f"{_CLIENT_MODULE}.build_httpx_client") + @patch(f"{_CLIENT_MODULE}.get_default_client_settings") + def test_has_embeddings_namespace(self, mock_get_settings, mock_build): + mock_settings = MagicMock() + mock_settings.build_auth_pipeline.return_value = None + mock_get_settings.return_value = mock_settings + mock_build.return_value = MagicMock() + + client = UiPathNormalizedClient(model_name="gpt-4o") + from uipath.llm_client.clients.normalized.embeddings import Embeddings + + assert hasattr(client, "embeddings") + assert isinstance(client.embeddings, Embeddings) + + @patch(f"{_CLIENT_MODULE}.build_httpx_client") + @patch(f"{_CLIENT_MODULE}.get_default_client_settings") + def test_completions_api_config(self, mock_get_settings, mock_build): + mock_settings = MagicMock() + mock_settings.build_auth_pipeline.return_value = None + mock_get_settings.return_value = mock_settings + + client = UiPathNormalizedClient(model_name="gpt-4o") + assert client._completions_api_config.api_type == "completions" + assert client._completions_api_config.routing_mode == "normalized" + assert client._completions_api_config.freeze_base_url is True + + @patch(f"{_CLIENT_MODULE}.build_httpx_client") + @patch(f"{_CLIENT_MODULE}.get_default_client_settings") + def test_embeddings_api_config(self, mock_get_settings, mock_build): + mock_settings = MagicMock() + mock_settings.build_auth_pipeline.return_value = None + mock_get_settings.return_value = mock_settings + + client = UiPathNormalizedClient(model_name="gpt-4o") + assert client._embeddings_api_config.api_type == "embeddings" + assert client._embeddings_api_config.routing_mode == "normalized" + assert client._embeddings_api_config.freeze_base_url is True + + +# ============================================================================ +# Test: Completions.create (sync, non-streaming) +# ============================================================================ + + +class TestCompletionsCreate: + def test_basic_create(self, mock_sync_client): + mock_response = MagicMock() + mock_response.json.return_value = SAMPLE_COMPLETION_RESPONSE + mock_sync_client.request.return_value = mock_response + + client_obj = MagicMock() + client_obj._sync_client = mock_sync_client + + completions = Completions(client_obj) + result = completions.create( + messages=[{"role": "user", "content": "Hello"}], + ) + + assert isinstance(result, ChatCompletion) + assert result.choices[0].message.content == "Hello! How can I help you?" + mock_sync_client.request.assert_called_once() + call_kwargs = mock_sync_client.request.call_args + body = call_kwargs.kwargs["json"] + assert body["messages"] == [{"role": "user", "content": "Hello"}] + + def test_create_with_params(self, mock_sync_client): + mock_response = MagicMock() + mock_response.json.return_value = SAMPLE_COMPLETION_RESPONSE + mock_sync_client.request.return_value = mock_response + + client_obj = MagicMock() + client_obj._sync_client = mock_sync_client + + completions = Completions(client_obj) + completions.create( + messages=[{"role": "user", "content": "Hello"}], + max_tokens=100, + temperature=0.5, + top_p=0.9, + stop=["END"], + n=2, + presence_penalty=0.1, + frequency_penalty=0.2, + ) + + body = mock_sync_client.request.call_args.kwargs["json"] + assert body["max_tokens"] == 100 + assert body["temperature"] == 0.5 + assert body["top_p"] == 0.9 + assert body["stop"] == ["END"] + assert body["n"] == 2 + assert body["presence_penalty"] == 0.1 + assert body["frequency_penalty"] == 0.2 + + def test_create_omits_none_params(self, mock_sync_client): + mock_response = MagicMock() + mock_response.json.return_value = SAMPLE_COMPLETION_RESPONSE + mock_sync_client.request.return_value = mock_response + + client_obj = MagicMock() + client_obj._sync_client = mock_sync_client + + completions = Completions(client_obj) + completions.create( + messages=[{"role": "user", "content": "Hello"}], + ) + + body = mock_sync_client.request.call_args.kwargs["json"] + assert "max_tokens" not in body + assert "temperature" not in body + assert "stop" not in body + + def test_create_with_tools(self, mock_sync_client): + mock_response = MagicMock() + mock_response.json.return_value = SAMPLE_TOOL_CALL_RESPONSE + mock_sync_client.request.return_value = mock_response + + client_obj = MagicMock() + client_obj._sync_client = mock_sync_client + + completions = Completions(client_obj) + result = completions.create( + messages=[{"role": "user", "content": "What's the weather?"}], + tools=[ + { + "name": "get_weather", + "description": "Get weather", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + }, + } + ], + tool_choice="auto", + ) + + body = mock_sync_client.request.call_args.kwargs["json"] + assert "tools" in body + assert body["tool_choice"] == "auto" + assert len(result.choices[0].message.tool_calls) == 1 + + def test_create_with_response_format(self, mock_sync_client): + class MyOutput(BaseModel): + answer: str + + mock_response = MagicMock() + mock_response.json.return_value = { + "choices": [{"message": {"content": '{"answer": "42"}'}}], + "usage": {}, + } + mock_sync_client.request.return_value = mock_response + + client_obj = MagicMock() + client_obj._sync_client = mock_sync_client + + completions = Completions(client_obj) + result = completions.create( + messages=[{"role": "user", "content": "What is 6*7?"}], + response_format=MyOutput, + ) + + body = mock_sync_client.request.call_args.kwargs["json"] + assert "response_format" in body + assert body["response_format"]["type"] == "json_schema" + assert result.choices[0].message.parsed is not None + assert result.choices[0].message.parsed.answer == "42" + + def test_create_with_kwargs(self, mock_sync_client): + mock_response = MagicMock() + mock_response.json.return_value = SAMPLE_COMPLETION_RESPONSE + mock_sync_client.request.return_value = mock_response + + client_obj = MagicMock() + client_obj._sync_client = mock_sync_client + + completions = Completions(client_obj) + completions.create( + messages=[{"role": "user", "content": "Hello"}], + reasoning={"effort": "high"}, + ) + + body = mock_sync_client.request.call_args.kwargs["json"] + assert body["reasoning"] == {"effort": "high"} + + +# ============================================================================ +# Test: Completions.stream (sync, streaming) +# ============================================================================ + + +class TestCompletionsStream: + def test_stream_yields_chunks(self, mock_sync_client): + sse_lines = [f"data: {json.dumps(chunk)}" for chunk in SAMPLE_STREAM_CHUNKS] + + mock_response = MagicMock() + mock_response.iter_lines.return_value = iter(sse_lines) + mock_sync_client.stream.return_value.__enter__ = MagicMock(return_value=mock_response) + mock_sync_client.stream.return_value.__exit__ = MagicMock(return_value=False) + + client_obj = MagicMock() + client_obj._sync_client = mock_sync_client + + completions = Completions(client_obj) + chunks = list( + completions.stream( + messages=[{"role": "user", "content": "Hello"}], + ) + ) + + assert len(chunks) == 3 + assert chunks[0].choices[0].delta.content == "Hello" + assert chunks[1].choices[0].delta.content == " world!" + assert chunks[2].choices[0].finish_reason == "stop" + + def test_stream_skips_invalid_json(self, mock_sync_client): + lines = [ + "data: {invalid json", + f"data: {json.dumps(SAMPLE_STREAM_CHUNKS[0])}", + "", # empty line + ] + + mock_response = MagicMock() + mock_response.iter_lines.return_value = iter(lines) + mock_sync_client.stream.return_value.__enter__ = MagicMock(return_value=mock_response) + mock_sync_client.stream.return_value.__exit__ = MagicMock(return_value=False) + + client_obj = MagicMock() + client_obj._sync_client = mock_sync_client + + completions = Completions(client_obj) + chunks = list( + completions.stream( + messages=[{"role": "user", "content": "Hello"}], + ) + ) + + assert len(chunks) == 1 + + def test_stream_skips_empty_id(self, mock_sync_client): + lines = [ + f"data: {json.dumps({'id': '', 'choices': []})}", + f"data: {json.dumps(SAMPLE_STREAM_CHUNKS[0])}", + ] + + mock_response = MagicMock() + mock_response.iter_lines.return_value = iter(lines) + mock_sync_client.stream.return_value.__enter__ = MagicMock(return_value=mock_response) + mock_sync_client.stream.return_value.__exit__ = MagicMock(return_value=False) + + client_obj = MagicMock() + client_obj._sync_client = mock_sync_client + + completions = Completions(client_obj) + chunks = list( + completions.stream( + messages=[{"role": "user", "content": "Hello"}], + ) + ) + + assert len(chunks) == 1 + + def test_stream_sets_stream_flag(self, mock_sync_client): + mock_response = MagicMock() + mock_response.iter_lines.return_value = iter([]) + mock_sync_client.stream.return_value.__enter__ = MagicMock(return_value=mock_response) + mock_sync_client.stream.return_value.__exit__ = MagicMock(return_value=False) + + client_obj = MagicMock() + client_obj._sync_client = mock_sync_client + + completions = Completions(client_obj) + list( + completions.stream( + messages=[{"role": "user", "content": "Hello"}], + ) + ) + + call_kwargs = mock_sync_client.stream.call_args + body = call_kwargs.kwargs["json"] + assert body["stream"] is True + + +# ============================================================================ +# Test: Completions.acreate (async, non-streaming) +# ============================================================================ + + +class TestAsyncCompletionsCreate: + @pytest.mark.asyncio + async def test_basic_acreate(self, mock_async_client): + mock_response = MagicMock() + mock_response.json.return_value = SAMPLE_COMPLETION_RESPONSE + mock_async_client.request.return_value = mock_response + + client_obj = MagicMock() + client_obj._async_client = mock_async_client + + completions = Completions(client_obj) + result = await completions.acreate( + messages=[{"role": "user", "content": "Hello"}], + ) + + assert isinstance(result, ChatCompletion) + assert result.choices[0].message.content == "Hello! How can I help you?" + + +# ============================================================================ +# Test: Embeddings +# ============================================================================ + + +class TestEmbeddingsCreate: + def test_basic_create(self): + mock_response = MagicMock() + mock_response.json.return_value = SAMPLE_EMBEDDING_RESPONSE + + mock_client = MagicMock() + mock_client.request.return_value = mock_response + + client_obj = MagicMock() + client_obj._embedding_sync_client = mock_client + + from uipath.llm_client.clients.normalized.embeddings import Embeddings + + embeddings = Embeddings(client_obj) + result = embeddings.create(input=["Hello world", "Goodbye"]) + + assert isinstance(result, EmbeddingResponse) + assert len(result.data) == 2 + assert result.data[0].embedding == [0.1, 0.2, 0.3] + + body = mock_client.request.call_args.kwargs["json"] + assert body["input"] == ["Hello world", "Goodbye"] + + def test_string_input_wrapped(self): + mock_response = MagicMock() + mock_response.json.return_value = SAMPLE_EMBEDDING_RESPONSE + + mock_client = MagicMock() + mock_client.request.return_value = mock_response + + client_obj = MagicMock() + client_obj._embedding_sync_client = mock_client + + from uipath.llm_client.clients.normalized.embeddings import Embeddings + + embeddings = Embeddings(client_obj) + embeddings.create(input="Hello world") + + body = mock_client.request.call_args.kwargs["json"] + assert body["input"] == ["Hello world"] + + +class TestAsyncEmbeddingsCreate: + @pytest.mark.asyncio + async def test_basic_acreate(self): + mock_response = MagicMock() + mock_response.json.return_value = SAMPLE_EMBEDDING_RESPONSE + + mock_client = AsyncMock() + mock_client.request.return_value = mock_response + + client_obj = MagicMock() + client_obj._embedding_async_client = mock_client + + from uipath.llm_client.clients.normalized.embeddings import Embeddings + + embeddings = Embeddings(client_obj) + result = await embeddings.acreate(input=["Hello world"]) + + assert isinstance(result, EmbeddingResponse) + assert len(result.data) == 2 + + +# ============================================================================ +# Test: Type models +# ============================================================================ + + +class TestTypeModels: + def test_usage_defaults(self): + usage = Usage() + assert usage.prompt_tokens == 0 + assert usage.completion_tokens == 0 + assert usage.total_tokens == 0 + assert usage.cache_read_input_tokens == 0 + + def test_tool_call(self): + tc = ToolCall(id="call_1", name="func", arguments={"x": 1}) + assert tc.id == "call_1" + assert tc.name == "func" + assert tc.arguments == {"x": 1} + + def test_tool_call_chunk(self): + tc = ToolCallChunk(id="call_1", name="func", arguments='{"x":', index=0) + assert tc.arguments == '{"x":' + + def test_message_defaults(self): + msg = Message() + assert msg.role == "assistant" + assert msg.content == "" + assert msg.tool_calls == [] + assert msg.parsed is None + + def test_delta_defaults(self): + delta = Delta() + assert delta.role is None + assert delta.content == "" + assert delta.tool_calls == [] + + def test_choice_defaults(self): + choice = Choice() + assert choice.index == 0 + assert choice.finish_reason is None + + def test_chat_completion_defaults(self): + cc = ChatCompletion() + assert cc.id == "" + assert cc.choices == [] + assert cc.usage.prompt_tokens == 0 + + def test_embedding_data(self): + ed = EmbeddingData(embedding=[0.1, 0.2], index=0) + assert ed.embedding == [0.1, 0.2] + + def test_embedding_response(self): + er = EmbeddingResponse( + data=[EmbeddingData(embedding=[0.1], index=0)], + model="test-model", + ) + assert len(er.data) == 1 + assert er.model == "test-model" + + +# ============================================================================ +# Test: Request body building +# ============================================================================ + + +class TestBuildRequest: + def test_minimal_request(self): + body = _build_request( + messages=[{"role": "user", "content": "Hi"}], + ) + assert body == {"messages": [{"role": "user", "content": "Hi"}]} + + def test_stream_flag(self): + body = _build_request( + messages=[{"role": "user", "content": "Hi"}], + stream=True, + ) + assert body["stream"] is True + + def test_all_optional_params(self): + body = _build_request( + messages=[{"role": "user", "content": "Hi"}], + max_tokens=100, + temperature=0.7, + stop=["END"], + n=3, + top_p=0.9, + presence_penalty=0.5, + frequency_penalty=0.3, + ) + assert body["max_tokens"] == 100 + assert body["temperature"] == 0.7 + assert body["stop"] == ["END"] + assert body["n"] == 3 + assert body["top_p"] == 0.9 + assert body["presence_penalty"] == 0.5 + assert body["frequency_penalty"] == 0.3 + + def test_with_tools(self): + body = _build_request( + messages=[{"role": "user", "content": "Hi"}], + tools=[{"name": "func", "description": "d", "parameters": {}}], + tool_choice="auto", + ) + assert len(body["tools"]) == 1 + assert body["tool_choice"] == "auto" + + def test_with_response_format(self): + class MyModel(BaseModel): + x: int + + body = _build_request( + messages=[{"role": "user", "content": "Hi"}], + response_format=MyModel, + ) + assert body["response_format"]["type"] == "json_schema" + assert body["response_format"]["json_schema"]["name"] == "MyModel" + + def test_kwargs_merged(self): + body = _build_request( + messages=[{"role": "user", "content": "Hi"}], + custom_param="value", + ) + assert body["custom_param"] == "value" + + def test_openai_specific_params(self): + body = _build_request( + messages=[{"role": "user", "content": "Hi"}], + seed=42, + logit_bias={"123": -100}, + logprobs=True, + top_logprobs=5, + parallel_tool_calls=False, + reasoning_effort="high", + reasoning={"effort": "high"}, + ) + assert body["seed"] == 42 + assert body["logit_bias"] == {"123": -100} + assert body["logprobs"] is True + assert body["top_logprobs"] == 5 + assert body["parallel_tool_calls"] is False + assert body["reasoning_effort"] == "high" + assert body["reasoning"] == {"effort": "high"} + + def test_anthropic_specific_params(self): + body = _build_request( + messages=[{"role": "user", "content": "Hi"}], + top_k=40, + thinking={"type": "enabled", "budget_tokens": 1000}, + ) + assert body["top_k"] == 40 + assert body["thinking"] == {"type": "enabled", "budget_tokens": 1000} + + def test_google_specific_params(self): + body = _build_request( + messages=[{"role": "user", "content": "Hi"}], + thinking_level="high", + thinking_budget=2000, + include_thoughts=True, + safety_settings=[{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"}], + ) + assert body["thinking_level"] == "high" + assert body["thinking_budget"] == 2000 + assert body["include_thoughts"] is True + assert len(body["safety_settings"]) == 1 + + def test_shared_params(self): + body = _build_request( + messages=[{"role": "user", "content": "Hi"}], + verbosity="low", + ) + assert body["verbosity"] == "low" + + def test_removed_infra_params_go_through_kwargs(self): + body = _build_request( + messages=[{"role": "user", "content": "Hi"}], + user="user-123", + service_tier="auto", + metadata={"request_id": "abc"}, + ) + assert body["user"] == "user-123" + assert body["service_tier"] == "auto" + assert body["metadata"] == {"request_id": "abc"} + + def test_pydantic_messages(self): + class ChatMessage(BaseModel): + role: str + content: str + + body = _build_request( + messages=[ChatMessage(role="user", content="Hi")], + ) + assert body["messages"] == [{"role": "user", "content": "Hi"}] + + def test_mixed_dict_and_pydantic_messages(self): + class ChatMessage(BaseModel): + role: str + content: str + + body = _build_request( + messages=[ + {"role": "system", "content": "Be brief."}, + ChatMessage(role="user", content="Hi"), + ], + ) + assert body["messages"] == [ + {"role": "system", "content": "Be brief."}, + {"role": "user", "content": "Hi"}, + ] + + def test_pydantic_message_with_none_fields_excluded(self): + class ChatMessage(BaseModel): + role: str + content: str + name: str | None = None + + body = _build_request( + messages=[ChatMessage(role="user", content="Hi")], + ) + assert body["messages"] == [{"role": "user", "content": "Hi"}] + + def test_pydantic_tool_in_tools_list(self): + class GetWeather(BaseModel): + """Get weather for a city.""" + + city: str + + body = _build_request( + messages=[{"role": "user", "content": "Hi"}], + tools=[GetWeather], + tool_choice="auto", + ) + assert body["tools"][0]["name"] == "GetWeather" + assert "city" in body["tools"][0]["parameters"]["properties"] + + +# ============================================================================ +# Test: Real-world response shapes (from captured API payloads) +# ============================================================================ + + +class TestRealWorldResponses: + """Tests using actual response shapes observed from the normalized API.""" + + def test_gpt4o_basic(self): + data = { + "id": "chatcmpl-DQdh09fdBuc8LPCkDqhJKgrQy3IN8", + "model": "gpt-4o-2024-11-20", + "object": "chat.completion", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": {"content": "Hello.", "role": "assistant"}, + } + ], + "created": 1775241562, + "usage": { + "completion_tokens": 3, + "prompt_tokens": 14, + "total_tokens": 17, + "cache_read_input_tokens": 0, + "thoughts_tokens": 0, + }, + } + result = _parse_response(data) + assert result.object == "chat.completion" + assert result.model == "gpt-4o-2024-11-20" + assert result.choices[0].message.content == "Hello." + assert result.usage.thoughts_tokens == 0 + + def test_gemini_with_avg_logprobs_and_signature(self): + data = { + "id": "gemini-123", + "model": "gemini-2.5-flash", + "object": "chat.completion", + "choices": [ + { + "finish_reason": "stop", + "avg_logprobs": -0.123, + "index": 0, + "message": { + "role": "assistant", + "signature": "abc123signature", + "tool_calls": [ + {"id": "call_1", "name": "get_weather", "arguments": {"city": "London"}} + ], + }, + } + ], + "created": 1775241600, + "usage": { + "completion_tokens": 5, + "prompt_tokens": 31, + "total_tokens": 130, + "cache_read_input_tokens": 0, + "thoughts_tokens": 94, + "request_processing_tier": "ON_DEMAND", + }, + } + result = _parse_response(data) + assert result.choices[0].avg_logprobs == -0.123 + assert result.choices[0].message.signature == "abc123signature" + assert result.choices[0].message.tool_calls[0].name == "get_weather" + assert result.usage.thoughts_tokens == 94 + assert result.usage.request_processing_tier == "ON_DEMAND" + + def test_anthropic_with_thinking(self): + data = { + "id": "anthropic-456", + "model": "claude-haiku-4-5", + "object": "chat.completion", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": "15 + 27 = 42", + "role": "assistant", + "signature": "ErACsignature", + "thinking": "This is a straightforward arithmetic problem.\n15 + 27 = 42", + }, + } + ], + "created": 1775241700, + "usage": { + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "completion_tokens": 10, + "prompt_tokens": 14, + "total_tokens": 24, + }, + } + result = _parse_response(data) + assert ( + result.choices[0].message.thinking + == "This is a straightforward arithmetic problem.\n15 + 27 = 42" + ) + assert result.choices[0].message.signature == "ErACsignature" + assert result.choices[0].message.content == "15 + 27 = 42" + assert result.usage.cache_creation_input_tokens == 0 + + def test_gpt5_with_reasoning_usage(self): + data = { + "id": "chatcmpl-gpt5", + "model": "gpt-5.2-2025-12-11", + "object": "chat.completion", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": {"content": "100", "role": "assistant"}, + } + ], + "created": 1775241800, + "usage": { + "completion_tokens": 2, + "prompt_tokens": 20, + "total_tokens": 22, + "cache_read_input_tokens": 0, + "thoughts_tokens": 50, + "request_processing_tier": "ON_DEMAND", + }, + } + result = _parse_response(data) + assert result.usage.thoughts_tokens == 50 + assert result.usage.request_processing_tier == "ON_DEMAND" + + def test_embedding_response_real_shape(self): + """Embeddings only return prompt_tokens and total_tokens.""" + data = { + "data": [{"embedding": [0.1, 0.2, 0.3]}], + "usage": {"prompt_tokens": 2, "total_tokens": 2}, + } + from uipath.llm_client.clients.normalized.embeddings import _parse_embedding_response + + result = _parse_embedding_response(data) + assert result.data[0].embedding == [0.1, 0.2, 0.3] + assert result.data[0].index == 0 # auto-assigned + assert result.usage.prompt_tokens == 2 + assert result.usage.completion_tokens == 0 # default + + def test_tool_call_arguments_always_dict(self): + """Normalized API always returns arguments as dict, not string.""" + data = { + "id": "tc-test", + "object": "chat.completion", + "choices": [ + { + "message": { + "role": "assistant", + "tool_calls": [ + { + "id": "call_1", + "name": "get_weather", + "arguments": {"city": "London"}, + }, + ], + }, + "finish_reason": "tool_calls", + } + ], + "usage": {}, + } + result = _parse_response(data) + tc = result.choices[0].message.tool_calls[0] + assert isinstance(tc.arguments, dict) + assert tc.arguments == {"city": "London"} diff --git a/tests/core/test_normalized_integration.py b/tests/core/test_normalized_integration.py new file mode 100644 index 0000000..10ff5da --- /dev/null +++ b/tests/core/test_normalized_integration.py @@ -0,0 +1,245 @@ +"""Integration tests for the normalized client. + +These tests verify the normalized client works end-to-end with VCR cassettes. +They test: +1. Basic chat completions (sync) +2. Chat completions with parameters (temperature, max_tokens) +3. Streaming completions (sync via .stream()) +4. Tool calling (dict and Pydantic tools) +5. Structured output via json_object response_format +6. Embeddings (sync) +7. Async completions (via .acreate()) +8. Async embeddings (via .acreate()) +""" + +import json + +import pytest +from pydantic import BaseModel + +from uipath.llm_client.clients.normalized import ( + ChatCompletion, + ChatCompletionChunk, + EmbeddingResponse, + UiPathNormalizedClient, +) +from uipath.llm_client.settings import UiPathBaseSettings + +# ============================================================================ +# Fixtures +# ============================================================================ + + +@pytest.fixture +def normalized_client(client_settings: UiPathBaseSettings) -> UiPathNormalizedClient: + return UiPathNormalizedClient( + model_name="gpt-4o-2024-11-20", + client_settings=client_settings, + ) + + +@pytest.fixture +def embedding_client(client_settings: UiPathBaseSettings) -> UiPathNormalizedClient: + return UiPathNormalizedClient( + model_name="text-embedding-ada-002", + client_settings=client_settings, + ) + + +# ============================================================================ +# Structured output models +# ============================================================================ + + +class MathAnswer(BaseModel): + answer: int + explanation: str + + +# ============================================================================ +# Sync completions tests +# ============================================================================ + + +class TestNormalizedCompletions: + @pytest.mark.vcr() + def test_basic_completion(self, normalized_client: UiPathNormalizedClient): + response = normalized_client.completions.create( + messages=[{"role": "user", "content": "Say hello in one word."}], + ) + assert isinstance(response, ChatCompletion) + assert len(response.choices) >= 1 + assert response.choices[0].message.content + assert response.choices[0].finish_reason == "stop" + assert response.usage.total_tokens > 0 + + @pytest.mark.vcr() + def test_completion_with_params(self, normalized_client: UiPathNormalizedClient): + response = normalized_client.completions.create( + messages=[{"role": "user", "content": "Say hi."}], + max_tokens=10, + temperature=0.0, + ) + assert isinstance(response, ChatCompletion) + assert response.choices[0].message.content + + @pytest.mark.vcr() + def test_completion_with_system_message(self, normalized_client: UiPathNormalizedClient): + response = normalized_client.completions.create( + messages=[ + {"role": "system", "content": "You are a helpful assistant. Be very brief."}, + {"role": "user", "content": "What is 2+2?"}, + ], + ) + assert isinstance(response, ChatCompletion) + assert response.choices[0].message.content + + +class TestNormalizedStreaming: + @pytest.mark.vcr() + def test_streaming(self, normalized_client: UiPathNormalizedClient): + chunks = list( + normalized_client.completions.stream( + messages=[{"role": "user", "content": "Count from 1 to 3."}], + ) + ) + assert len(chunks) > 0 + assert all(isinstance(c, ChatCompletionChunk) for c in chunks) + + content_chunks = [c for c in chunks if c.choices and c.choices[0].delta.content] + assert len(content_chunks) > 0 + + +class TestNormalizedToolCalling: + @pytest.mark.vcr() + def test_tool_calling(self, normalized_client: UiPathNormalizedClient): + response = normalized_client.completions.create( + messages=[ + {"role": "user", "content": "What is the weather in London?"}, + ], + tools=[ + { + "name": "get_weather", + "description": "Get the current weather in a city", + "parameters": { + "type": "object", + "properties": { + "city": {"type": "string", "description": "The city name"}, + }, + "required": ["city"], + }, + } + ], + tool_choice={"type": "required"}, + ) + assert isinstance(response, ChatCompletion) + assert len(response.choices[0].message.tool_calls) >= 1 + tc = response.choices[0].message.tool_calls[0] + assert tc.name == "get_weather" + assert "city" in tc.arguments + + @pytest.mark.vcr() + def test_tool_calling_with_pydantic(self, normalized_client: UiPathNormalizedClient): + class GetWeatherInput(BaseModel): + """Get the current weather in a city.""" + + city: str + + response = normalized_client.completions.create( + messages=[ + {"role": "user", "content": "What is the weather in Paris?"}, + ], + tools=[GetWeatherInput], + tool_choice={"type": "required"}, + ) + assert isinstance(response, ChatCompletion) + assert len(response.choices[0].message.tool_calls) >= 1 + + +class TestNormalizedStructuredOutput: + @pytest.mark.vcr() + def test_structured_output_json_object(self, normalized_client: UiPathNormalizedClient): + """Test structured output using json_object response_format.""" + response = normalized_client.completions.create( + messages=[ + { + "role": "user", + "content": ( + 'What is 15 + 27? Respond with JSON: {"answer": , "explanation": ""}' + ), + }, + ], + response_format={"type": "json_object"}, + ) + assert isinstance(response, ChatCompletion) + content = response.choices[0].message.content + assert content + parsed = json.loads(content) + assert parsed["answer"] == 42 + + @pytest.mark.vcr() + def test_structured_output_pydantic_parsed(self, normalized_client: UiPathNormalizedClient): + """Test that response_format with a Pydantic model populates message.parsed.""" + response = normalized_client.completions.create( + messages=[ + { + "role": "user", + "content": ( + 'What is 15 + 27? Respond with JSON: {"answer": , "explanation": ""}' + ), + }, + ], + response_format={"type": "json_object"}, + ) + assert isinstance(response, ChatCompletion) + content = response.choices[0].message.content + assert content + parsed = MathAnswer.model_validate_json(content) + assert parsed.answer == 42 + + +# ============================================================================ +# Embeddings tests +# ============================================================================ + + +class TestNormalizedEmbeddings: + @pytest.mark.vcr() + def test_single_embedding(self, embedding_client: UiPathNormalizedClient): + response = embedding_client.embeddings.create(input="Hello world") + assert isinstance(response, EmbeddingResponse) + assert len(response.data) == 1 + assert len(response.data[0].embedding) > 0 + + @pytest.mark.vcr() + def test_batch_embeddings(self, embedding_client: UiPathNormalizedClient): + response = embedding_client.embeddings.create(input=["Hello world", "Goodbye world"]) + assert isinstance(response, EmbeddingResponse) + assert len(response.data) == 2 + + +# ============================================================================ +# Async tests +# ============================================================================ + + +class TestAsyncNormalizedCompletions: + @pytest.mark.asyncio + @pytest.mark.vcr() + async def test_async_completion(self, normalized_client: UiPathNormalizedClient): + response = await normalized_client.completions.acreate( + messages=[{"role": "user", "content": "Say hello in one word."}], + ) + assert isinstance(response, ChatCompletion) + assert response.choices[0].message.content + + +class TestAsyncNormalizedEmbeddings: + @pytest.mark.asyncio + @pytest.mark.vcr() + async def test_async_embedding(self, embedding_client: UiPathNormalizedClient): + response = await embedding_client.embeddings.acreate( + input="Hello world", + ) + assert isinstance(response, EmbeddingResponse) + assert len(response.data) == 1