Skip to content

Commit 092635c

Browse files
committed
test: fix test case to be actually useful
1 parent f3f3f24 commit 092635c

1 file changed

Lines changed: 24 additions & 21 deletions

File tree

tests/integrations/openai_agents/test_openai_agents.py

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2219,17 +2219,23 @@ async def test_streaming_ttft_on_chat_span(sentry_init, test_agent):
22192219
Events WITHOUT delta (like ResponseCompletedEvent, ResponseCreatedEvent, etc.)
22202220
should NOT trigger TTFT.
22212221
"""
2222-
import time
2222+
from sentry_sdk.integrations.openai_agents.patches.models import (
2223+
_create_get_model_wrapper,
2224+
)
22232225

22242226
sentry_init(
22252227
integrations=[OpenAIAgentsIntegration()],
22262228
traces_sample_rate=1.0,
22272229
)
22282230

2229-
# Create a mock model that returns a stream_response generator
2231+
# Create a mock model with stream_response and get_response
22302232
class MockModel:
22312233
model = "gpt-4"
22322234

2235+
async def get_response(self, *args, **kwargs):
2236+
# Not used in this test, but required by the wrapper
2237+
pass
2238+
22332239
async def stream_response(self, *args, **kwargs):
22342240
# First event: ResponseCreatedEvent (no delta - should NOT trigger TTFT)
22352241
created_event = MagicMock(spec=["type", "sequence_number"])
@@ -2240,12 +2246,10 @@ async def stream_response(self, *args, **kwargs):
22402246
await asyncio.sleep(0.05) # 50ms delay
22412247

22422248
# Second event: ResponseTextDeltaEvent (HAS delta - triggers TTFT)
2243-
# This simulates the first actual content token
22442249
text_delta_event = MagicMock(spec=["delta", "type", "content_index"])
22452250
text_delta_event.delta = "Hello"
22462251
text_delta_event.type = "response.output_text.delta"
22472252
yield text_delta_event
2248-
await asyncio.sleep(0.05) # 50ms delay
22492253

22502254
# Third event: more text content (also has delta, but TTFT already recorded)
22512255
text_delta_event2 = MagicMock(spec=["delta", "type", "content_index"])
@@ -2266,33 +2270,32 @@ async def stream_response(self, *args, **kwargs):
22662270
completed_event.response.output = []
22672271
yield completed_event
22682272

2269-
mock_model = MockModel()
2270-
2271-
with sentry_sdk.start_transaction(name="test_ttft", sampled=True) as transaction:
2272-
# Simulate calling the wrapped stream_response logic
2273-
from sentry_sdk.integrations.openai_agents.spans import ai_client_span
2273+
# Create a mock original _get_model that returns our mock model
2274+
def mock_get_model(agent, run_config):
2275+
return MockModel()
22742276

2275-
with ai_client_span(test_agent, {}) as span:
2276-
span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True)
2277+
# Wrap it with our integration wrapper
2278+
wrapped_get_model = _create_get_model_wrapper(mock_get_model)
22772279

2278-
ttft_recorded = False
2279-
# Capture start time locally (same as production code after race condition fix)
2280-
start_time = time.perf_counter()
2280+
with sentry_sdk.start_transaction(name="test_ttft", sampled=True) as transaction:
2281+
# Get the wrapped model (this applies the stream_response wrapper)
2282+
wrapped_model = wrapped_get_model(None, test_agent, MagicMock())
22812283

2282-
async for event in mock_model.stream_response():
2283-
# This is the same logic used in the actual integration
2284-
if not ttft_recorded and hasattr(event, "delta"):
2285-
ttft = time.perf_counter() - start_time
2286-
span.set_data(SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft)
2287-
ttft_recorded = True
2284+
# Call the wrapped stream_response and consume all events
2285+
async for _event in wrapped_model.stream_response():
2286+
pass
22882287

2289-
# Verify TTFT is recorded on the chat span (inside transaction context)
2288+
# Verify TTFT is recorded on the chat span (must be inside transaction context)
22902289
chat_spans = [
22912290
s for s in transaction._span_recorder.spans if s.op == "gen_ai.chat"
22922291
]
22932292
assert len(chat_spans) >= 1
22942293
chat_span = chat_spans[0]
2294+
22952295
assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in chat_span._data
22962296
ttft_value = chat_span._data[SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
22972297
# TTFT should be at least 40ms (our simulated delay minus some variance) but reasonable
22982298
assert 0.04 < ttft_value < 1.0, f"TTFT {ttft_value} should be around 50ms"
2299+
2300+
# Verify streaming flag is set
2301+
assert chat_span._data.get(SPANDATA.GEN_AI_RESPONSE_STREAMING) is True

0 commit comments

Comments
 (0)