@@ -2219,17 +2219,23 @@ async def test_streaming_ttft_on_chat_span(sentry_init, test_agent):
22192219 Events WITHOUT delta (like ResponseCompletedEvent, ResponseCreatedEvent, etc.)
22202220 should NOT trigger TTFT.
22212221 """
2222- import time
2222+ from sentry_sdk .integrations .openai_agents .patches .models import (
2223+ _create_get_model_wrapper ,
2224+ )
22232225
22242226 sentry_init (
22252227 integrations = [OpenAIAgentsIntegration ()],
22262228 traces_sample_rate = 1.0 ,
22272229 )
22282230
2229- # Create a mock model that returns a stream_response generator
2231+ # Create a mock model with stream_response and get_response
22302232 class MockModel :
22312233 model = "gpt-4"
22322234
2235+ async def get_response (self , * args , ** kwargs ):
2236+ # Not used in this test, but required by the wrapper
2237+ pass
2238+
22332239 async def stream_response (self , * args , ** kwargs ):
22342240 # First event: ResponseCreatedEvent (no delta - should NOT trigger TTFT)
22352241 created_event = MagicMock (spec = ["type" , "sequence_number" ])
@@ -2240,12 +2246,10 @@ async def stream_response(self, *args, **kwargs):
22402246 await asyncio .sleep (0.05 ) # 50ms delay
22412247
22422248 # Second event: ResponseTextDeltaEvent (HAS delta - triggers TTFT)
2243- # This simulates the first actual content token
22442249 text_delta_event = MagicMock (spec = ["delta" , "type" , "content_index" ])
22452250 text_delta_event .delta = "Hello"
22462251 text_delta_event .type = "response.output_text.delta"
22472252 yield text_delta_event
2248- await asyncio .sleep (0.05 ) # 50ms delay
22492253
22502254 # Third event: more text content (also has delta, but TTFT already recorded)
22512255 text_delta_event2 = MagicMock (spec = ["delta" , "type" , "content_index" ])
@@ -2266,33 +2270,32 @@ async def stream_response(self, *args, **kwargs):
22662270 completed_event .response .output = []
22672271 yield completed_event
22682272
2269- mock_model = MockModel ()
2270-
2271- with sentry_sdk .start_transaction (name = "test_ttft" , sampled = True ) as transaction :
2272- # Simulate calling the wrapped stream_response logic
2273- from sentry_sdk .integrations .openai_agents .spans import ai_client_span
2273+ # Create a mock original _get_model that returns our mock model
2274+ def mock_get_model (agent , run_config ):
2275+ return MockModel ()
22742276
2275- with ai_client_span ( test_agent , {}) as span :
2276- span . set_data ( SPANDATA . GEN_AI_RESPONSE_STREAMING , True )
2277+ # Wrap it with our integration wrapper
2278+ wrapped_get_model = _create_get_model_wrapper ( mock_get_model )
22772279
2278- ttft_recorded = False
2279- # Capture start time locally (same as production code after race condition fix )
2280- start_time = time . perf_counter ( )
2280+ with sentry_sdk . start_transaction ( name = "test_ttft" , sampled = True ) as transaction :
2281+ # Get the wrapped model (this applies the stream_response wrapper )
2282+ wrapped_model = wrapped_get_model ( None , test_agent , MagicMock () )
22812283
2282- async for event in mock_model .stream_response ():
2283- # This is the same logic used in the actual integration
2284- if not ttft_recorded and hasattr (event , "delta" ):
2285- ttft = time .perf_counter () - start_time
2286- span .set_data (SPANDATA .GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN , ttft )
2287- ttft_recorded = True
2284+ # Call the wrapped stream_response and consume all events
2285+ async for _event in wrapped_model .stream_response ():
2286+ pass
22882287
2289- # Verify TTFT is recorded on the chat span (inside transaction context)
2288+ # Verify TTFT is recorded on the chat span (must be inside transaction context)
22902289 chat_spans = [
22912290 s for s in transaction ._span_recorder .spans if s .op == "gen_ai.chat"
22922291 ]
22932292 assert len (chat_spans ) >= 1
22942293 chat_span = chat_spans [0 ]
2294+
22952295 assert SPANDATA .GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in chat_span ._data
22962296 ttft_value = chat_span ._data [SPANDATA .GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN ]
22972297 # TTFT should be at least 40ms (our simulated delay minus some variance) but reasonable
22982298 assert 0.04 < ttft_value < 1.0 , f"TTFT { ttft_value } should be around 50ms"
2299+
2300+ # Verify streaming flag is set
2301+ assert chat_span ._data .get (SPANDATA .GEN_AI_RESPONSE_STREAMING ) is True
0 commit comments