Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions guides/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,27 @@ config :opentelemetry_exporter,
]
```

#### Multimodal Content

When LLM input messages contain non-text content parts (`:file`, `:image`,
`:audio`), the Phoenix handler renders them as compact placeholders by default:

```
[file: application/pdf, 124583 bytes]
```

This keeps spans small. To inline the raw base64 data instead — useful for
Phoenix's playground/replay feature in development — opt in:

```elixir
config :agent_obs, include_multimodal_data: true
```

**Warning:** Inlined base64 data can make spans very large (a 1 MB PDF becomes
~1.4 MB of base64 in the span attribute, sent over OTLP on every call). Leave
this off in production; enable it only in dev/staging environments where you
need to replay LLM calls from the trace UI.

### Generic Handler

The Generic handler translates AgentObs events to standard OpenTelemetry spans
Expand Down
42 changes: 35 additions & 7 deletions lib/agent_obs/handlers/phoenix/translator.ex
Original file line number Diff line number Diff line change
Expand Up @@ -371,24 +371,52 @@ defmodule AgentObs.Handlers.Phoenix.Translator do
end
end

# Extract text from ContentPart structs or list of plain strings
# Extract text from ContentPart structs or list of plain strings.
#
# Non-text parts (file/image/audio) render as a placeholder by default
# (`[file: application/pdf, 1024 bytes]`) so traces stay small. Set
# `config :agent_obs, include_multimodal_data: true` to inline the raw
# base64 data instead — useful for replay/playground in dev, but produces
# very large spans, so leave off in production.
# Note: Only called when value is a list (see guard on line 329)
defp extract_text_content([]), do: nil

defp extract_text_content(content) when is_list(content) do
include_data? = Application.get_env(:agent_obs, :include_multimodal_data, false)

content
|> Enum.map_join("\n", fn
%{type: :text, text: text} -> text
%{text: text} -> text
text when is_binary(text) -> text
_ -> ""
end)
|> Enum.map_join("\n", &content_part_to_text(&1, include_data?))
|> case do
"" -> nil
text -> text
end
end

defp content_part_to_text(%{type: :text, text: text}, _) when is_binary(text), do: text
defp content_part_to_text(%{text: text}, _) when is_binary(text), do: text
defp content_part_to_text(text, _) when is_binary(text), do: text

defp content_part_to_text(%{type: type, data: data, media_type: media_type}, true)
when type in [:file, :image, :audio] and is_binary(data) do
"[#{type}: #{media_type}, #{byte_size(data)} bytes]\n#{data}"
end

defp content_part_to_text(%{type: type, data: data, media_type: media_type}, false)
when type in [:file, :image, :audio] and is_binary(data) do
"[#{type}: #{media_type}, #{byte_size(data)} bytes]"
end

defp content_part_to_text(%{type: :image_url, url: url}, _) when is_binary(url) do
"[image_url: #{url}]"
end

defp content_part_to_text(%{type: type, media_type: media_type}, _) do
"[#{type}: #{media_type}]"
end

defp content_part_to_text(%{type: type}, _), do: "[#{type}]"
defp content_part_to_text(_, _), do: ""

# Helper to safely access tool call fields (handles both maps and structs)
# Supports both flat format (name/arguments) and nested format (function: %{name, arguments})
defp get_tool_call_field(tool_call, field) when is_map(tool_call) do
Expand Down
62 changes: 62 additions & 0 deletions test/agent_obs/handlers/phoenix/translator_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,68 @@ defmodule AgentObs.Handlers.Phoenix.TranslatorTest do
assert is_binary(content)
assert String.contains?(content, "Entity")
end

test "renders multimodal content parts as placeholders by default" do
pdf_data = String.duplicate("x", 1024)

metadata = %{
model: "gpt-4o",
input_messages: [
%{
role: "user",
content: [
%{type: :text, text: "Extract from this document."},
%{type: :file, data: pdf_data, media_type: "application/pdf"}
]
}
]
}

attributes = Translator.from_start_metadata(:llm, metadata)
content = attributes["llm.input_messages.0.message.content"]

assert is_binary(content)
assert String.contains?(content, "Extract from this document.")
assert String.contains?(content, "[file: application/pdf, 1024 bytes]")
refute String.contains?(content, pdf_data)
end

test "renders image content parts with media type and size by default" do
image_data = String.duplicate("x", 2048)

metadata = %{
model: "gpt-4o",
input_messages: [
%{role: "user", content: [%{type: :image, data: image_data, media_type: "image/png"}]}
]
}

attributes = Translator.from_start_metadata(:llm, metadata)
assert attributes["llm.input_messages.0.message.content"] == "[image: image/png, 2048 bytes]"
end

test "inlines raw multimodal data when include_multimodal_data is enabled" do
Application.put_env(:agent_obs, :include_multimodal_data, true)
on_exit(fn -> Application.delete_env(:agent_obs, :include_multimodal_data) end)

pdf_data = String.duplicate("x", 1024)

metadata = %{
model: "gpt-4o",
input_messages: [
%{
role: "user",
content: [%{type: :file, data: pdf_data, media_type: "application/pdf"}]
}
]
}

attributes = Translator.from_start_metadata(:llm, metadata)
content = attributes["llm.input_messages.0.message.content"]

assert String.contains?(content, "[file: application/pdf, 1024 bytes]")
assert String.contains?(content, pdf_data)
end
end

describe "from_stop_metadata/3 for agent events" do
Expand Down