Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
77 commits
Select commit Hold shift + click to select a range
ab30a2d
plans for model facade overhaul
nabinchha Feb 19, 2026
43824ea
update plan
nabinchha Feb 20, 2026
2a5f1e4
add review
johnnygreco Feb 20, 2026
f945d5b
address feedback + add more details after several self reviews
nabinchha Feb 20, 2026
dfa3817
update plan doc
nabinchha Feb 25, 2026
5b18f74
Merge branch 'main' into nm/overhaul-model-facade-guts
nabinchha Feb 25, 2026
0f449a7
address nits
nabinchha Feb 25, 2026
37f092a
Merge branch 'nm/overhaul-model-facade-guts' into nm/overhaul-model-f…
nabinchha Feb 25, 2026
08e57f8
Add cannonical objects
nabinchha Feb 26, 2026
3ab18ee
Merge branch 'main' into nm/overhaul-model-facade-guts-pr1
nabinchha Feb 27, 2026
34349c7
self-review feedback + address
nabinchha Feb 28, 2026
6aae4b6
add LiteLLMRouter protocol to strongly type bridge router param
nabinchha Feb 28, 2026
2a53d37
simplify some things
nabinchha Feb 28, 2026
4e2f3af
add a protol for http response like object
nabinchha Feb 28, 2026
b1c85f2
move HttpResponse
nabinchha Feb 28, 2026
f6dc769
update PR-1 architecture notes for lifecycle and router protocol
nabinchha Feb 28, 2026
ec5ed9b
Address PR #359 feedback: exception wrapping, shared parsing, test im…
nabinchha Mar 4, 2026
b6b4028
Merge branch 'main' into nm/overhaul-model-facade-guts-pr1
nabinchha Mar 4, 2026
ba22397
Use contextlib to dry out some code
nabinchha Mar 4, 2026
aeac3b9
Address Greptile feedback: HTTP-date retry-after parsing, docstring c…
nabinchha Mar 4, 2026
55f3c96
Address Greptile feedback: FastAPI detail parsing, comment fixes
nabinchha Mar 4, 2026
c390912
Merge branch 'main' into nm/overhaul-model-facade-guts-pr1
nabinchha Mar 4, 2026
828cc49
add PR-2 architecture notes for model facade overhaul
nabinchha Mar 4, 2026
89a6d4e
save progress on pr2
nabinchha Mar 5, 2026
e527503
Merge branch 'main' into nm/overhaul-model-facade-guts-pr1
nabinchha Mar 5, 2026
f6fa447
Merge branch 'nm/overhaul-model-facade-guts-pr1' into nm/overhaul-mod…
nabinchha Mar 5, 2026
b8579c2
small refactor
nabinchha Mar 5, 2026
61024c0
address feedback
nabinchha Mar 5, 2026
d47d508
Merge branch 'nm/overhaul-model-facade-guts-pr1' into nm/overhaul-mod…
nabinchha Mar 5, 2026
49a45ba
Address greptile comment in pr1
nabinchha Mar 5, 2026
e8445cc
refactor ProviderError from dataclass to regular Exception
nabinchha Mar 5, 2026
8a385ff
Merge branch 'main' into nm/overhaul-model-facade-guts-pr2
nabinchha Mar 6, 2026
521c1e4
Address greptile feedback
nabinchha Mar 6, 2026
a831d24
Merge branch 'main' into nm/overhaul-model-facade-guts-pr2
nabinchha Mar 6, 2026
4836e03
PR feedback
nabinchha Mar 6, 2026
ae1bf98
track usage tracking in finally block for images
nabinchha Mar 6, 2026
18b9966
pr feedback
nabinchha Mar 6, 2026
ad45ee2
add native OpenAI adapter with retry and throttle infrastructure
nabinchha Mar 6, 2026
724e734
Self CR
nabinchha Mar 6, 2026
25650b0
Merge branch 'main' into nm/overhaul-model-facade-guts-pr2
nabinchha Mar 6, 2026
651813b
Merge branch 'main' into nm/overhaul-model-facade-guts-pr2
nabinchha Mar 6, 2026
bfed5af
Merge branch 'main' into nm/overhaul-model-facade-guts-pr2
nabinchha Mar 6, 2026
3636c56
Merge branch 'nm/overhaul-model-facade-guts-pr2' into nm/overhaul-mod…
nabinchha Mar 6, 2026
504d040
fix claude slop
nabinchha Mar 6, 2026
afbe197
Updates after self-review. Simplify use of ThrottleManager in light o…
nabinchha Mar 6, 2026
c9d6f4c
Merge branch 'main' into nm/overhaul-model-facade-guts-pr2
nabinchha Mar 9, 2026
a084038
Merge branch 'main' into nm/overhaul-model-facade-guts-pr2
nabinchha Mar 9, 2026
632c7c6
wrap facade close in try/catch
nabinchha Mar 10, 2026
40c05ae
Merge branch 'main' into nm/overhaul-model-facade-guts-pr2
nabinchha Mar 10, 2026
c1d807c
clean up stray params
nabinchha Mar 10, 2026
56caed5
Merge branch 'nm/overhaul-model-facade-guts-pr2' into nm/overhaul-mod…
nabinchha Mar 10, 2026
e34e566
fix: address review findings from model facade overhaul PR3
nabinchha Mar 10, 2026
879b941
fix stray inclusion of metadata
nabinchha Mar 10, 2026
dcbbcba
small regression fix
nabinchha Mar 10, 2026
462810c
address more feedback
nabinchha Mar 10, 2026
ac02f2c
Merge branch 'main' into nm/overhaul-model-facade-guts-pr2
nabinchha Mar 11, 2026
fb809bd
Merge branch 'nm/overhaul-model-facade-guts-pr2' into nm/overhaul-mod…
nabinchha Mar 12, 2026
c538367
Merge branch 'main' into nm/overhaul-model-facade-guts-pr3
nabinchha Mar 12, 2026
117baf4
self review
nabinchha Mar 12, 2026
843227b
Fixes
nabinchha Mar 12, 2026
c7e67d6
new test for aimd lifecycle
nabinchha Mar 12, 2026
707a22f
update plan docs
nabinchha Mar 12, 2026
0f55d4c
update plans with refs to prs
nabinchha Mar 12, 2026
e13f7b6
fix: cap acquire_sync/acquire_async sleep to remaining budget to prev…
nabinchha Mar 12, 2026
664e3cf
test lay init
nabinchha Mar 12, 2026
eb27418
Merge branch 'main' into nm/overhaul-model-facade-guts-pr3
nabinchha Mar 12, 2026
7d7fd41
fix timeout for openaicompatibleadapter
nabinchha Mar 12, 2026
9142494
remove unused attr
nabinchha Mar 12, 2026
bdd0202
fix: address review findings from PR #402
nabinchha Mar 12, 2026
e46efdf
Merge branch 'main' into nm/overhaul-model-facade-guts-pr3
nabinchha Mar 12, 2026
5c5bfab
Merge branch 'main' into nm/overhaul-model-facade-guts-pr3
nabinchha Mar 12, 2026
c23e360
Address pr feedback
nabinchha Mar 13, 2026
7bd4763
fix method order
nabinchha Mar 13, 2026
1658544
Merge branch 'main' into nm/overhaul-model-facade-guts-pr3
nabinchha Mar 13, 2026
ffa0e11
Merge branch 'main' into nm/overhaul-model-facade-guts-pr3
nabinchha Mar 13, 2026
c0c2418
Merge branch 'main' into nm/overhaul-model-facade-guts-pr3
nabinchha Mar 13, 2026
3b491c9
Fix failing test
nabinchha Mar 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from __future__ import annotations

from data_designer.engine.models.clients.adapters.openai_compatible import OpenAICompatibleClient
from data_designer.engine.models.clients.base import ModelClient
from data_designer.engine.models.clients.errors import (
ProviderError,
Expand All @@ -11,6 +12,8 @@
map_http_status_to_provider_error_kind,
)
from data_designer.engine.models.clients.factory import create_model_client
from data_designer.engine.models.clients.retry import RetryConfig
from data_designer.engine.models.clients.throttle import ThrottleDomain, ThrottleManager
from data_designer.engine.models.clients.types import (
AssistantMessage,
ChatCompletionRequest,
Expand All @@ -36,8 +39,12 @@
"ImageGenerationResponse",
"ImagePayload",
"ModelClient",
"OpenAICompatibleClient",
"ProviderError",
"ProviderErrorKind",
"RetryConfig",
"ThrottleDomain",
"ThrottleManager",
"ToolCall",
"Usage",
"create_model_client",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
from __future__ import annotations

from data_designer.engine.models.clients.adapters.litellm_bridge import LiteLLMBridgeClient, LiteLLMRouter
from data_designer.engine.models.clients.adapters.openai_compatible import OpenAICompatibleClient

__all__ = ["LiteLLMBridgeClient", "LiteLLMRouter"]
__all__ = ["LiteLLMBridgeClient", "LiteLLMRouter", "OpenAICompatibleClient"]
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
from data_designer.engine.models.clients.base import ModelClient
from data_designer.engine.models.clients.errors import (
ProviderError,
ProviderErrorKind,
extract_message_from_exception_string,
infer_error_kind_from_exception,
map_http_status_to_provider_error_kind,
)
from data_designer.engine.models.clients.parsing import (
Expand Down Expand Up @@ -192,7 +192,7 @@ def _handle_non_provider_errors(provider_name: str) -> Iterator[None]:
if isinstance(status_code, int):
kind = map_http_status_to_provider_error_kind(status_code=status_code, body_text=str(exc))
else:
kind = _infer_error_kind(exc)
kind = infer_error_kind_from_exception(exc)

raise ProviderError(
kind=kind,
Expand All @@ -201,17 +201,3 @@ def _handle_non_provider_errors(provider_name: str) -> Iterator[None]:
provider_name=provider_name,
cause=exc,
) from exc


def _infer_error_kind(exc: Exception) -> ProviderErrorKind:
"""Infer error kind from exception type name when no status code is available."""
type_name = type(exc).__name__.lower()
if "timeout" in type_name:
return ProviderErrorKind.TIMEOUT
if "connection" in type_name or "connect" in type_name:
return ProviderErrorKind.API_CONNECTION
if "auth" in type_name:
return ProviderErrorKind.AUTHENTICATION
if "ratelimit" in type_name:
return ProviderErrorKind.RATE_LIMIT
return ProviderErrorKind.API_ERROR
Original file line number Diff line number Diff line change
@@ -0,0 +1,319 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

import logging
import threading
from typing import TYPE_CHECKING, Any

import data_designer.lazy_heavy_imports as lazy
from data_designer.engine.models.clients.base import ModelClient
from data_designer.engine.models.clients.errors import (
ProviderError,
ProviderErrorKind,
infer_error_kind_from_exception,
map_http_error_to_provider_error,
)
from data_designer.engine.models.clients.parsing import (
aextract_images_from_chat_response,
aextract_images_from_image_response,
aparse_chat_completion_response,
extract_embedding_vector,
extract_images_from_chat_response,
extract_images_from_image_response,
extract_usage,
parse_chat_completion_response,
)
from data_designer.engine.models.clients.retry import RetryConfig, create_retry_transport
from data_designer.engine.models.clients.types import (
ChatCompletionRequest,
ChatCompletionResponse,
EmbeddingRequest,
EmbeddingResponse,
ImageGenerationRequest,
ImageGenerationResponse,
TransportKwargs,
)

if TYPE_CHECKING:
import httpx

logger = logging.getLogger(__name__)


class OpenAICompatibleClient(ModelClient):
"""Native HTTP adapter for OpenAI-compatible provider APIs.

Uses ``httpx`` with ``httpx_retries.RetryTransport`` for resilient HTTP
calls. Concurrency / throttle policy is an orchestration concern and
is not managed here β€” see ``ThrottleManager`` and ``AsyncTaskScheduler``.
"""

_ROUTE_CHAT = "/chat/completions"
_ROUTE_EMBEDDING = "/embeddings"
_ROUTE_IMAGE = "/images/generations"
_IMAGE_EXCLUDE = frozenset({"messages", "prompt"})

def __init__(
self,
*,
provider_name: str,
model_id: str,
endpoint: str,
api_key: str | None = None,
retry_config: RetryConfig | None = None,
max_parallel_requests: int = 32,
timeout_s: float = 60.0,
sync_client: httpx.Client | None = None,
async_client: httpx.AsyncClient | None = None,
) -> None:
self.provider_name = provider_name
self._model_id = model_id
self._endpoint = endpoint.rstrip("/")
self._api_key = api_key
self._timeout_s = timeout_s
self._retry_config = retry_config

# 2x headroom for burst traffic across domains; floor of 32/16 for low-concurrency configs.
pool_max = max(32, 2 * max_parallel_requests)
pool_keepalive = max(16, max_parallel_requests)
self._limits = lazy.httpx.Limits(
max_connections=pool_max,
max_keepalive_connections=pool_keepalive,
)
self._transport = create_retry_transport(self._retry_config)
self._client: httpx.Client | None = sync_client
self._aclient: httpx.AsyncClient | None = async_client
self._init_lock = threading.Lock()

def _get_sync_client(self) -> httpx.Client:
if self._client is None:
with self._init_lock:
if self._client is None:
self._client = lazy.httpx.Client(
transport=self._transport,
limits=self._limits,
timeout=lazy.httpx.Timeout(self._timeout_s),
)
return self._client

def _get_async_client(self) -> httpx.AsyncClient:
if self._aclient is None:
with self._init_lock:
if self._aclient is None:
self._aclient = lazy.httpx.AsyncClient(
transport=self._transport,
limits=self._limits,
timeout=lazy.httpx.Timeout(self._timeout_s),
)
return self._aclient

# -------------------------------------------------------------------
# Capability checks β€” adapter-level (see ModelClient docstring)
# -------------------------------------------------------------------

def supports_chat_completion(self) -> bool:
return True

def supports_embeddings(self) -> bool:
return True

def supports_image_generation(self) -> bool:
return True

# -------------------------------------------------------------------
# Chat completion
# -------------------------------------------------------------------

def completion(self, request: ChatCompletionRequest) -> ChatCompletionResponse:
transport = TransportKwargs.from_request(request)
payload = {"model": request.model, "messages": request.messages, **transport.body}
response_json = self._post_sync(self._ROUTE_CHAT, payload, transport.headers, request.model, transport.timeout)
return parse_chat_completion_response(response_json)

async def acompletion(self, request: ChatCompletionRequest) -> ChatCompletionResponse:
transport = TransportKwargs.from_request(request)
payload = {"model": request.model, "messages": request.messages, **transport.body}
response_json = await self._apost(
self._ROUTE_CHAT, payload, transport.headers, request.model, transport.timeout
)
return await aparse_chat_completion_response(response_json)

# -------------------------------------------------------------------
# Embeddings
# -------------------------------------------------------------------

def embeddings(self, request: EmbeddingRequest) -> EmbeddingResponse:
transport = TransportKwargs.from_request(request)
payload = {"model": request.model, "input": request.inputs, **transport.body}
response_json = self._post_sync(
self._ROUTE_EMBEDDING, payload, transport.headers, request.model, transport.timeout
)
return _parse_embedding_json(response_json)

async def aembeddings(self, request: EmbeddingRequest) -> EmbeddingResponse:
transport = TransportKwargs.from_request(request)
payload = {"model": request.model, "input": request.inputs, **transport.body}
response_json = await self._apost(
self._ROUTE_EMBEDDING, payload, transport.headers, request.model, transport.timeout
)
return _parse_embedding_json(response_json)

# -------------------------------------------------------------------
# Image generation
# -------------------------------------------------------------------

def generate_image(self, request: ImageGenerationRequest) -> ImageGenerationResponse:
transport = TransportKwargs.from_request(request, exclude=self._IMAGE_EXCLUDE)
if request.messages is not None:
route = self._ROUTE_CHAT
payload = {"model": request.model, "messages": request.messages, **transport.body}
else:
route = self._ROUTE_IMAGE
payload = {"model": request.model, "prompt": request.prompt, **transport.body}
response_json = self._post_sync(route, payload, transport.headers, request.model, transport.timeout)
return _parse_image_json(response_json, is_chat_route=request.messages is not None)

async def agenerate_image(self, request: ImageGenerationRequest) -> ImageGenerationResponse:
transport = TransportKwargs.from_request(request, exclude=self._IMAGE_EXCLUDE)
if request.messages is not None:
route = self._ROUTE_CHAT
payload = {"model": request.model, "messages": request.messages, **transport.body}
else:
route = self._ROUTE_IMAGE
payload = {"model": request.model, "prompt": request.prompt, **transport.body}
response_json = await self._apost(route, payload, transport.headers, request.model, transport.timeout)
return await _aparse_image_json(response_json, is_chat_route=request.messages is not None)

# -------------------------------------------------------------------
# Lifecycle
# -------------------------------------------------------------------

def close(self) -> None:
if self._client is not None:
self._client.close()
self._client = None

async def aclose(self) -> None:
if self._aclient is not None:
await self._aclient.aclose()
self._aclient = None
if self._client is not None:
self._client.close()
self._client = None

# -------------------------------------------------------------------
# HTTP helpers
# -------------------------------------------------------------------

def _build_headers(self, extra_headers: dict[str, str]) -> dict[str, str]:
headers: dict[str, str] = {"Content-Type": "application/json"}
if self._api_key:
headers["Authorization"] = f"Bearer {self._api_key}"
if extra_headers:
headers.update(extra_headers)
return headers

def _resolve_timeout(self, per_request: float | None) -> httpx.Timeout:
return lazy.httpx.Timeout(per_request if per_request is not None else self._timeout_s)

def _post_sync(
self,
route: str,
payload: dict[str, Any],
extra_headers: dict[str, str],
model_name: str,
timeout: float | None = None,
) -> dict[str, Any]:
url = f"{self._endpoint}{route}"
headers = self._build_headers(extra_headers)
try:
response = self._get_sync_client().post(
url, json=payload, headers=headers, timeout=self._resolve_timeout(timeout)
)
except Exception as exc:
raise _wrap_transport_error(exc, self.provider_name, model_name) from exc
if response.status_code >= 400:
raise map_http_error_to_provider_error(
response=response, provider_name=self.provider_name, model_name=model_name
)
return _parse_json_body(response, self.provider_name, model_name)

async def _apost(
self,
route: str,
payload: dict[str, Any],
extra_headers: dict[str, str],
model_name: str,
timeout: float | None = None,
) -> dict[str, Any]:
url = f"{self._endpoint}{route}"
headers = self._build_headers(extra_headers)
try:
response = await self._get_async_client().post(
url, json=payload, headers=headers, timeout=self._resolve_timeout(timeout)
)
except Exception as exc:
raise _wrap_transport_error(exc, self.provider_name, model_name) from exc
if response.status_code >= 400:
raise map_http_error_to_provider_error(
response=response, provider_name=self.provider_name, model_name=model_name
)
return _parse_json_body(response, self.provider_name, model_name)


# ---------------------------------------------------------------------------
# Response parsing helpers
# ---------------------------------------------------------------------------


def _parse_embedding_json(response_json: dict[str, Any]) -> EmbeddingResponse:
data = response_json.get("data") or []
vectors = [extract_embedding_vector(item) for item in data]
usage = extract_usage(response_json.get("usage"))
return EmbeddingResponse(vectors=vectors, usage=usage, raw=response_json)


def _parse_image_json(response_json: dict[str, Any], *, is_chat_route: bool) -> ImageGenerationResponse:
if is_chat_route:
images = extract_images_from_chat_response(response_json)
else:
images = extract_images_from_image_response(response_json)
usage = extract_usage(response_json.get("usage"), generated_images=len(images))
return ImageGenerationResponse(images=images, usage=usage, raw=response_json)


async def _aparse_image_json(response_json: dict[str, Any], *, is_chat_route: bool) -> ImageGenerationResponse:
if is_chat_route:
images = await aextract_images_from_chat_response(response_json)
else:
images = await aextract_images_from_image_response(response_json)
usage = extract_usage(response_json.get("usage"), generated_images=len(images))
return ImageGenerationResponse(images=images, usage=usage, raw=response_json)


def _parse_json_body(response: httpx.Response, provider_name: str, model_name: str) -> dict[str, Any]:
"""Parse JSON from a successful HTTP response, wrapping decode errors as ``ProviderError``."""
try:
return response.json()
except Exception as exc:
raise ProviderError(
kind=ProviderErrorKind.API_ERROR,
message=f"Provider {provider_name!r} returned a non-JSON response (status {response.status_code}).",
status_code=response.status_code,
provider_name=provider_name,
model_name=model_name,
cause=exc,
) from exc


def _wrap_transport_error(exc: Exception, provider_name: str, model_name: str) -> ProviderError:
"""Convert httpx transport exceptions into canonical ``ProviderError``."""
return ProviderError(
kind=infer_error_kind_from_exception(exc),
message=str(exc) or f"Transport error from provider {provider_name!r}",
provider_name=provider_name,
model_name=model_name,
cause=exc,
)
Loading
Loading