From c10ed5a8f4f3bdd2dff5c69af8f66ad5f07491f6 Mon Sep 17 00:00:00 2001 From: Philip Munksgaard Date: Wed, 15 Apr 2026 12:06:10 +0200 Subject: [PATCH] fix: render multimodal content parts in span attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit extract_text_content/1 only handled :text parts; file/image/audio parts collapsed to empty strings, making multimodal LLM inputs invisible in traces. Now renders them as '[file: application/pdf, 1024 bytes]' placeholders by default. Set 'config :agent_obs, include_multimodal_data: true' to inline the raw base64 data instead — useful for replay/playground in dev. --- guides/configuration.md | 21 +++++++ lib/agent_obs/handlers/phoenix/translator.ex | 42 ++++++++++--- .../handlers/phoenix/translator_test.exs | 62 +++++++++++++++++++ 3 files changed, 118 insertions(+), 7 deletions(-) diff --git a/guides/configuration.md b/guides/configuration.md index 3723512..a8ea0de 100644 --- a/guides/configuration.md +++ b/guides/configuration.md @@ -194,6 +194,27 @@ config :opentelemetry_exporter, ] ``` +#### Multimodal Content + +When LLM input messages contain non-text content parts (`:file`, `:image`, +`:audio`), the Phoenix handler renders them as compact placeholders by default: + +``` +[file: application/pdf, 124583 bytes] +``` + +This keeps spans small. To inline the raw base64 data instead — useful for +Phoenix's playground/replay feature in development — opt in: + +```elixir +config :agent_obs, include_multimodal_data: true +``` + +**Warning:** Inlined base64 data can make spans very large (a 1 MB PDF becomes +~1.4 MB of base64 in the span attribute, sent over OTLP on every call). Leave +this off in production; enable it only in dev/staging environments where you +need to replay LLM calls from the trace UI. + ### Generic Handler The Generic handler translates AgentObs events to standard OpenTelemetry spans diff --git a/lib/agent_obs/handlers/phoenix/translator.ex b/lib/agent_obs/handlers/phoenix/translator.ex index 948429c..57dbcb6 100644 --- a/lib/agent_obs/handlers/phoenix/translator.ex +++ b/lib/agent_obs/handlers/phoenix/translator.ex @@ -371,24 +371,52 @@ defmodule AgentObs.Handlers.Phoenix.Translator do end end - # Extract text from ContentPart structs or list of plain strings + # Extract text from ContentPart structs or list of plain strings. + # + # Non-text parts (file/image/audio) render as a placeholder by default + # (`[file: application/pdf, 1024 bytes]`) so traces stay small. Set + # `config :agent_obs, include_multimodal_data: true` to inline the raw + # base64 data instead — useful for replay/playground in dev, but produces + # very large spans, so leave off in production. # Note: Only called when value is a list (see guard on line 329) defp extract_text_content([]), do: nil defp extract_text_content(content) when is_list(content) do + include_data? = Application.get_env(:agent_obs, :include_multimodal_data, false) + content - |> Enum.map_join("\n", fn - %{type: :text, text: text} -> text - %{text: text} -> text - text when is_binary(text) -> text - _ -> "" - end) + |> Enum.map_join("\n", &content_part_to_text(&1, include_data?)) |> case do "" -> nil text -> text end end + defp content_part_to_text(%{type: :text, text: text}, _) when is_binary(text), do: text + defp content_part_to_text(%{text: text}, _) when is_binary(text), do: text + defp content_part_to_text(text, _) when is_binary(text), do: text + + defp content_part_to_text(%{type: type, data: data, media_type: media_type}, true) + when type in [:file, :image, :audio] and is_binary(data) do + "[#{type}: #{media_type}, #{byte_size(data)} bytes]\n#{data}" + end + + defp content_part_to_text(%{type: type, data: data, media_type: media_type}, false) + when type in [:file, :image, :audio] and is_binary(data) do + "[#{type}: #{media_type}, #{byte_size(data)} bytes]" + end + + defp content_part_to_text(%{type: :image_url, url: url}, _) when is_binary(url) do + "[image_url: #{url}]" + end + + defp content_part_to_text(%{type: type, media_type: media_type}, _) do + "[#{type}: #{media_type}]" + end + + defp content_part_to_text(%{type: type}, _), do: "[#{type}]" + defp content_part_to_text(_, _), do: "" + # Helper to safely access tool call fields (handles both maps and structs) # Supports both flat format (name/arguments) and nested format (function: %{name, arguments}) defp get_tool_call_field(tool_call, field) when is_map(tool_call) do diff --git a/test/agent_obs/handlers/phoenix/translator_test.exs b/test/agent_obs/handlers/phoenix/translator_test.exs index 94dd9c7..c365b20 100644 --- a/test/agent_obs/handlers/phoenix/translator_test.exs +++ b/test/agent_obs/handlers/phoenix/translator_test.exs @@ -189,6 +189,68 @@ defmodule AgentObs.Handlers.Phoenix.TranslatorTest do assert is_binary(content) assert String.contains?(content, "Entity") end + + test "renders multimodal content parts as placeholders by default" do + pdf_data = String.duplicate("x", 1024) + + metadata = %{ + model: "gpt-4o", + input_messages: [ + %{ + role: "user", + content: [ + %{type: :text, text: "Extract from this document."}, + %{type: :file, data: pdf_data, media_type: "application/pdf"} + ] + } + ] + } + + attributes = Translator.from_start_metadata(:llm, metadata) + content = attributes["llm.input_messages.0.message.content"] + + assert is_binary(content) + assert String.contains?(content, "Extract from this document.") + assert String.contains?(content, "[file: application/pdf, 1024 bytes]") + refute String.contains?(content, pdf_data) + end + + test "renders image content parts with media type and size by default" do + image_data = String.duplicate("x", 2048) + + metadata = %{ + model: "gpt-4o", + input_messages: [ + %{role: "user", content: [%{type: :image, data: image_data, media_type: "image/png"}]} + ] + } + + attributes = Translator.from_start_metadata(:llm, metadata) + assert attributes["llm.input_messages.0.message.content"] == "[image: image/png, 2048 bytes]" + end + + test "inlines raw multimodal data when include_multimodal_data is enabled" do + Application.put_env(:agent_obs, :include_multimodal_data, true) + on_exit(fn -> Application.delete_env(:agent_obs, :include_multimodal_data) end) + + pdf_data = String.duplicate("x", 1024) + + metadata = %{ + model: "gpt-4o", + input_messages: [ + %{ + role: "user", + content: [%{type: :file, data: pdf_data, media_type: "application/pdf"}] + } + ] + } + + attributes = Translator.from_start_metadata(:llm, metadata) + content = attributes["llm.input_messages.0.message.content"] + + assert String.contains?(content, "[file: application/pdf, 1024 bytes]") + assert String.contains?(content, pdf_data) + end end describe "from_stop_metadata/3 for agent events" do