From 7c994024c4d8b0d79e90f4956e148ad0f6e94218 Mon Sep 17 00:00:00 2001 From: Sohan Kshirsagar Date: Wed, 25 Feb 2026 11:36:59 -0800 Subject: [PATCH] fix: inbound replay span not sent to CLI in Django/FastAPI REPLAY mode --- .cursor/BUGBOT.md | 12 +++++ .../django/e2e-tests/.tusk/config.yaml | 1 - drift/instrumentation/django/middleware.py | 54 +++++++++++++++++-- .../fastapi/instrumentation.py | 5 +- 4 files changed, 65 insertions(+), 7 deletions(-) diff --git a/.cursor/BUGBOT.md b/.cursor/BUGBOT.md index 151a00b..8339447 100644 --- a/.cursor/BUGBOT.md +++ b/.cursor/BUGBOT.md @@ -3,3 +3,15 @@ ## Instrumentation Guidelines - When adding a new instrumentation, the README must be updated to document the new instrumentation. + +## REPLAY Mode: Context Ordering and OUTPUT_VALUE on Inbound Spans + +When implementing or modifying a REPLAY mode handler (e.g., `_handle_replay_request`) in any framework instrumentation (Django, FastAPI, Flask, WSGI, etc.): + +1. **`span.end()` MUST be called BEFORE `replay_trace_id_context.reset()`.** + `TdSpanProcessor.on_end()` is triggered synchronously by `span.end()` and reads `replay_trace_id_context.get()` to route the inbound replay span to the CLI. If the context is reset first, the processor silently drops the span and the CLI never receives it. + +2. **`OUTPUT_VALUE` MUST be set on the server span before `span.end()` in REPLAY mode.** + The CLI determines pass/fail using the raw HTTP response it receives directly, but the backend/UI uses the `OUTPUT_VALUE` from the inbound replay span to populate `span_result_recording` for the "Expected vs Actual" diff view. If OUTPUT_VALUE is missing, the UI shows expected data on the left and empty `{}` on the right — appearing as a deviation even though the test passed. + +3. **E2E tests will NOT catch these bugs** because they only check the CLI's `passed` boolean, which is based on the raw HTTP response comparison — not the inbound replay span data. diff --git a/drift/instrumentation/django/e2e-tests/.tusk/config.yaml b/drift/instrumentation/django/e2e-tests/.tusk/config.yaml index e31d4d9..c39e52e 100644 --- a/drift/instrumentation/django/e2e-tests/.tusk/config.yaml +++ b/drift/instrumentation/django/e2e-tests/.tusk/config.yaml @@ -25,4 +25,3 @@ recording: replay: enable_telemetry: false - diff --git a/drift/instrumentation/django/middleware.py b/drift/instrumentation/django/middleware.py index 464ce85..2e571c6 100644 --- a/drift/instrumentation/django/middleware.py +++ b/drift/instrumentation/django/middleware.py @@ -144,15 +144,19 @@ def _handle_replay_request(self, request: HttpRequest, sdk) -> HttpResponse: try: with SpanUtils.with_span(span_info): response = self.get_response(request) - # REPLAY mode: don't capture the span (it's already recorded) - # But do normalize the response so comparison succeeds response = self._normalize_html_response(response) + + # Capture response data on the span so the inbound replay span + # sent to the CLI includes the actual OUTPUT_VALUE for UI display + self._capture_replay_output(request, response, span_info) + return response finally: - # Reset context + # End span BEFORE resetting context so that TdSpanProcessor.on_end() + # can still read replay_trace_id_context to send the inbound span + span_info.span.end() span_kind_context.reset(span_kind_token) replay_trace_id_context.reset(replay_token) - span_info.span.end() def _record_request(self, request: HttpRequest, sdk, is_pre_app_start: bool) -> HttpResponse: """Handle request in RECORD mode. @@ -262,6 +266,48 @@ def process_view( if route: request._drift_route_template = route # type: ignore + def _capture_replay_output(self, request: HttpRequest, response: HttpResponse, span_info: SpanInfo) -> None: + """Capture response data on the span for REPLAY mode. + + Sets OUTPUT_VALUE so the inbound replay span sent to the CLI includes + the actual response for UI comparison. Skips RECORD-mode concerns like + transforms, trace blocking, and schema merges. + + Args: + request: Django HttpRequest object + response: Django HttpResponse object + span_info: SpanInfo containing trace/span IDs and span reference + """ + if not span_info.span.is_recording(): + return + + status_code = response.status_code + status_message = response.reason_phrase if hasattr(response, "reason_phrase") else "" + response_headers = dict(response.items()) if hasattr(response, "items") else {} + + response_body = None + if hasattr(response, "content"): + content = response.content + if isinstance(content, bytes) and len(content) > 0: + response_body = content + + if response_body: + from .html_utils import normalize_html_body + + content_type = response_headers.get("Content-Type", "") + content_encoding = response_headers.get("Content-Encoding", "") + response_body = normalize_html_body(response_body, content_type, content_encoding) + + output_value = build_output_value( + status_code, + status_message, + response_headers, + response_body, + None, + ) + + span_info.span.set_attribute(TdSpanAttributes.OUTPUT_VALUE, json.dumps(output_value)) + def _normalize_html_response(self, response: HttpResponse) -> HttpResponse: """Normalize HTML response body for REPLAY mode comparison. diff --git a/drift/instrumentation/fastapi/instrumentation.py b/drift/instrumentation/fastapi/instrumentation.py index b8dcef2..eaa125f 100644 --- a/drift/instrumentation/fastapi/instrumentation.py +++ b/drift/instrumentation/fastapi/instrumentation.py @@ -226,10 +226,11 @@ async def wrapped_send(message: dict[str, Any]) -> None: transform_engine, ) finally: - # Reset context + # End span BEFORE resetting context so that TdSpanProcessor.on_end() + # can still read replay_trace_id_context to send the inbound span + span_info.span.end() span_kind_context.reset(span_kind_token) replay_trace_id_context.reset(replay_token) - span_info.span.end() async def _record_request(