diff --git a/pyproject.toml b/pyproject.toml index 4d0122049f..f9dc8ae842 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ requires-python = ">=3.10" license = "MIT" authors = [{ name = "OpenAI", email = "support@openai.com" }] dependencies = [ - "openai>=2.26.0,<3", + "openai>=2.36.0,<3", "pydantic>=2.12.2, <3", "griffelib>=2, <3", "typing-extensions>=4.12.2, <5", diff --git a/src/agents/realtime/config.py b/src/agents/realtime/config.py index defd4428b4..8998a7db99 100644 --- a/src/agents/realtime/config.py +++ b/src/agents/realtime/config.py @@ -46,6 +46,17 @@ """The audio format for realtime audio streams.""" +class RealtimeCustomVoice(TypedDict): + """A custom Realtime voice object.""" + + id: str + """The custom voice ID.""" + + +RealtimeVoice: TypeAlias = str | RealtimeCustomVoice | Mapping[str, Any] +"""The voice to use for realtime audio output.""" + + RealtimeReasoningEffort: TypeAlias = Literal["minimal", "low", "medium", "high", "xhigh"] | str """The reasoning effort for realtime model responses.""" @@ -124,7 +135,7 @@ class RealtimeAudioOutputConfig(TypedDict, total=False): """Configuration for audio output in realtime sessions.""" format: RealtimeAudioFormat | OpenAIRealtimeAudioFormats - voice: str + voice: RealtimeVoice speed: float @@ -163,7 +174,7 @@ class RealtimeSessionModelSettings(TypedDict): audio: NotRequired[RealtimeAudioConfig] """The audio configuration for the session.""" - voice: NotRequired[str] + voice: NotRequired[RealtimeVoice] """The voice to use for audio output.""" speed: NotRequired[float] diff --git a/src/agents/realtime/openai_realtime.py b/src/agents/realtime/openai_realtime.py index 6b986c6edc..132ca74e31 100644 --- a/src/agents/realtime/openai_realtime.py +++ b/src/agents/realtime/openai_realtime.py @@ -370,6 +370,25 @@ def get_server_event_type_adapter() -> TypeAdapter[AllRealtimeServerEvents]: return ServerEventTypeAdapter +def _normalize_custom_voice_for_server_event_validation(value: Any) -> Any: + # TODO: Remove this once generated Realtime server event models accept custom voice objects. + if isinstance(value, list): + return [_normalize_custom_voice_for_server_event_validation(item) for item in value] + + if not isinstance(value, dict): + return value + + normalized: dict[str, Any] = {} + for key, item in value.items(): + if key == "voice" and isinstance(item, Mapping): + voice_id = item.get("id") + if isinstance(voice_id, str): + normalized[key] = voice_id + continue + normalized[key] = _normalize_custom_voice_for_server_event_validation(item) + return normalized + + async def _collect_enabled_handoffs( agent: RealtimeAgent[Any], context_wrapper: RunContextWrapper[Any] ) -> list[Handoff[Any, RealtimeAgent[Any]]]: @@ -1054,7 +1073,10 @@ async def _handle_ws_event(self, event: dict[str, Any]): try: if "previous_item_id" in event and event["previous_item_id"] is None: event["previous_item_id"] = "" # TODO (rm) remove - parsed: AllRealtimeServerEvents = self._server_event_type_adapter.validate_python(event) + validation_event = _normalize_custom_voice_for_server_event_validation(event) + parsed: AllRealtimeServerEvents = self._server_event_type_adapter.validate_python( + validation_event + ) except pydantic.ValidationError as e: logger.error(f"Failed to validate server event: {event}", exc_info=True) await self._emit_event(RealtimeModelErrorEvent(error=e)) diff --git a/tests/realtime/test_openai_realtime.py b/tests/realtime/test_openai_realtime.py index 4ebc2aa9a3..ce6fb680b7 100644 --- a/tests/realtime/test_openai_realtime.py +++ b/tests/realtime/test_openai_realtime.py @@ -7,6 +7,7 @@ import pytest import websockets +from pydantic import TypeAdapter from agents import Agent, function_tool from agents.exceptions import UserError @@ -445,6 +446,80 @@ async def test_handle_invalid_event_schema_logs_error(self, model): error_event = mock_listener.on_event.call_args_list[1][0][0] assert error_event.type == "error" + @pytest.mark.asyncio + async def test_custom_voice_response_events_update_response_sequencer(self, model, monkeypatch): + """Dict-shaped custom voices should not block response.create sequencing.""" + payload_types: list[str] = [] + + async def fake_send_raw(event): + payload_types.append(event.type) + + class CustomVoiceRejectingAdapter: + _string_adapter = TypeAdapter(str) + + def validate_python(self, event): + voice = event.get("response", {}).get("audio", {}).get("output", {}).get("voice") + if isinstance(voice, dict): + self._string_adapter.validate_python(voice) + return SimpleNamespace(type=event["type"]) + + monkeypatch.setattr(model, "_send_raw_message", fake_send_raw) + model._server_event_type_adapter = CustomVoiceRejectingAdapter() + mock_listener = AsyncMock() + model.add_listener(mock_listener) + + await model._send_user_input(RealtimeModelSendUserInput(user_input="hi")) + await asyncio.sleep(0) + + assert payload_types == ["conversation.item.create", "response.create"] + assert model._response_control == "create_requested" + + response_with_custom_voice = { + "type": "response.created", + "response": {"audio": {"output": {"voice": {"id": "voice_test"}}}}, + } + await model._handle_ws_event(response_with_custom_voice) + + assert model._ongoing_response is True + assert model._response_control == "free" + + await model._handle_ws_event( + { + "type": "response.done", + "response": {"audio": {"output": {"voice": {"id": "voice_test"}}}}, + } + ) + + assert model._ongoing_response is False + assert model._response_control == "free" + raw_event = mock_listener.on_event.call_args_list[0][0][0] + assert raw_event.data is response_with_custom_voice + assert response_with_custom_voice["response"]["audio"]["output"]["voice"] == { + "id": "voice_test" + } + + await model._send_tool_output( + RealtimeModelSendToolOutput( + tool_call=SimpleNamespace( + id="item_1", + previous_item_id=None, + call_id="call_1", + arguments="{}", + name="lookup", + ), + output="ok", + start_response=True, + ) + ) + await asyncio.sleep(0) + + assert payload_types == [ + "conversation.item.create", + "response.create", + "conversation.item.create", + "response.create", + ] + @pytest.mark.asyncio async def test_handle_unknown_event_type_ignored(self, model): """Test that unknown event types are ignored gracefully.""" @@ -1519,6 +1594,22 @@ def test_get_and_update_session_config(self, model): assert cfg.audio is not None and cfg.audio.output is not None assert cfg.audio.output.voice == "verse" + def test_session_config_accepts_custom_voice_object(self, model): + custom_voice = {"id": "voice_test"} + + cfg = model._get_session_config({"voice": custom_voice}) + payload = cfg.model_dump(exclude_unset=True) + + assert payload["audio"]["output"]["voice"] == custom_voice + + def test_session_config_accepts_nested_custom_voice_object(self, model): + custom_voice = {"id": "voice_test"} + + cfg = model._get_session_config({"audio": {"output": {"voice": custom_voice}}}) + payload = cfg.model_dump(exclude_unset=True) + + assert payload["audio"]["output"]["voice"] == custom_voice + def test_session_config_defaults_audio_formats_when_not_call(self, model): settings: dict[str, Any] = {} cfg = model._get_session_config(settings) diff --git a/tests/realtime/test_session.py b/tests/realtime/test_session.py index e289bc3c9e..03148c739a 100644 --- a/tests/realtime/test_session.py +++ b/tests/realtime/test_session.py @@ -1386,6 +1386,42 @@ async def test_handoff_tool_handling(self, mock_model): # Verify agent was updated assert session._current_agent == second_agent + @pytest.mark.asyncio + async def test_handoff_session_update_preserves_custom_voice(self, mock_model): + custom_voice = {"id": "voice_test"} + first_agent = RealtimeAgent( + name="first_agent", + instructions="first_agent_instructions", + tools=[], + handoffs=[], + ) + second_agent = RealtimeAgent( + name="second_agent", + instructions="second_agent_instructions", + tools=[], + handoffs=[], + ) + first_agent.handoffs = [second_agent] + session = RealtimeSession( + mock_model, + first_agent, + None, + model_config={"initial_model_settings": {"voice": custom_voice}}, + ) + + await session._handle_tool_call( + RealtimeModelToolCallEvent( + name=Handoff.default_tool_name(second_agent), + call_id="call_789", + arguments="{}", + ) + ) + + session_update_event = mock_model.sent_events[0] + assert isinstance(session_update_event, RealtimeModelSendSessionUpdate) + assert session_update_event.session_settings["voice"] == custom_voice + assert mock_model.sent_events[1].start_response is True + @pytest.mark.asyncio async def test_unknown_tool_handling(self, mock_model, mock_agent, mock_function_tool): """Test that unknown tools complete the model call without starting a response.""" diff --git a/uv.lock b/uv.lock index 3e5cb31b70..f602c86021 100644 --- a/uv.lock +++ b/uv.lock @@ -2412,7 +2412,7 @@ wheels = [ [[package]] name = "openai" -version = "2.26.0" +version = "2.36.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -2424,9 +2424,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d7/91/2a06c4e9597c338cac1e5e5a8dd6f29e1836fc229c4c523529dca387fda8/openai-2.26.0.tar.gz", hash = "sha256:b41f37c140ae0034a6e92b0c509376d907f3a66109935fba2c1b471a7c05a8fb", size = 666702, upload-time = "2026-03-05T23:17:35.874Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f4/a1/4d5e84cf51720fc1526cc49e10ac1961abcccb55b0efb3d970db1e9a2728/openai-2.36.0.tar.gz", hash = "sha256:139dea0edd2f1b30c33d46ae1a6929e03906254140318e4608e98fe8c566f2e7", size = 753003, upload-time = "2026-05-07T17:33:17.075Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/2e/3f73e8ca53718952222cacd0cf7eecc9db439d020f0c1fe7ae717e4e199a/openai-2.26.0-py3-none-any.whl", hash = "sha256:6151bf8f83802f036117f06cc8a57b3a4da60da9926826cc96747888b57f394f", size = 1136409, upload-time = "2026-03-05T23:17:34.072Z" }, + { url = "https://files.pythonhosted.org/packages/9d/1c/5d43735b2553baae2a5e899dcbcd0670a86930d993184d72ca909bf11c9b/openai-2.36.0-py3-none-any.whl", hash = "sha256:143f6194b548dbc2c921af1f1b03b9f14c85fed8a75b5b516f5bcc11a2a50c63", size = 1302361, upload-time = "2026-05-07T17:33:15.063Z" }, ] [[package]] @@ -2568,7 +2568,7 @@ requires-dist = [ { name = "mcp", marker = "python_full_version >= '3.10'", specifier = ">=1.19.0,<2" }, { name = "modal", marker = "extra == 'modal'", specifier = "==1.3.5" }, { name = "numpy", marker = "python_full_version >= '3.10' and extra == 'voice'", specifier = ">=2.2.0,<3" }, - { name = "openai", specifier = ">=2.26.0,<3" }, + { name = "openai", specifier = ">=2.36.0,<3" }, { name = "pydantic", specifier = ">=2.12.2,<3" }, { name = "pymongo", marker = "extra == 'mongodb'", specifier = ">=4.14" }, { name = "redis", marker = "extra == 'redis'", specifier = ">=7" },