From 3133cc007937561da5bb12ca08a1a0ac21e56192 Mon Sep 17 00:00:00 2001 From: 1wos <1wosomm1@gmail.com> Date: Thu, 14 May 2026 00:26:15 +0900 Subject: [PATCH] fix(tools): convert image/svg+xml to text in LoadArtifactsTool Gemini rejects every SVG MIME variant with `400 INVALID_ARGUMENT - Unsupported MIME type`, but `_is_inline_mime_type_supported` returns True for `image/svg+xml` via the `image/` prefix match, so SVG artifacts are forwarded as inline image data and crash the request. Treat SVG the same way #4028 treats CSV / JSON / XML: text-decode it and deliver it to the model as a text Part. - Add `_GEMINI_UNSUPPORTED_INLINE_SUBTYPES = frozenset({'image/svg+xml'})` and short-circuit `_is_inline_mime_type_supported` so SVG falls through to `_as_safe_part_for_llm` instead of being forwarded inline. - Add `image/svg+xml` to `_TEXT_LIKE_MIME_TYPES` so the fallback path utf-8 decodes the SVG markup instead of returning the binary placeholder text. - Add `test_load_artifacts_converts_svg_to_text` mirroring the existing CSV test. Verified against gemini-2.5-flash via google-genai 1.69.0: image/png, image/jpeg, image/webp, image/avif, image/gif, audio/mpeg, audio/mp3, video/mp4, video/webm, application/pdf all still accepted unchanged; image/svg+xml now lands as a text Part rather than crashing. --- src/google/adk/tools/load_artifacts_tool.py | 15 +++++++ .../tools/test_load_artifacts_tool.py | 43 +++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/src/google/adk/tools/load_artifacts_tool.py b/src/google/adk/tools/load_artifacts_tool.py index ec717bad4c..0620797d75 100644 --- a/src/google/adk/tools/load_artifacts_tool.py +++ b/src/google/adk/tools/load_artifacts_tool.py @@ -35,10 +35,23 @@ 'video/', ) _GEMINI_SUPPORTED_INLINE_MIME_TYPES = frozenset({'application/pdf'}) +# MIME subtypes that match a supported prefix above but that Gemini +# rejects with 400 INVALID_ARGUMENT when sent as inline data. These +# must fall through to the text-conversion path in +# `_as_safe_part_for_llm` instead of being forwarded as inline image +# data. Verified empirically against gemini-2.5-flash via +# google-genai 1.69.0 on 2026-05-13. +_GEMINI_UNSUPPORTED_INLINE_SUBTYPES = frozenset({ + 'image/svg+xml', +}) _TEXT_LIKE_MIME_TYPES = frozenset({ 'application/csv', 'application/json', 'application/xml', + # SVG is XML-based and Gemini rejects it as inline image data (see + # _GEMINI_UNSUPPORTED_INLINE_SUBTYPES above), so it falls through here + # and is delivered to the model as text. + 'image/svg+xml', }) if TYPE_CHECKING: @@ -60,6 +73,8 @@ def _is_inline_mime_type_supported(mime_type: str | None) -> bool: normalized = _normalize_mime_type(mime_type) if not normalized: return False + if normalized in _GEMINI_UNSUPPORTED_INLINE_SUBTYPES: + return False return normalized.startswith(_GEMINI_SUPPORTED_INLINE_MIME_PREFIXES) or ( normalized in _GEMINI_SUPPORTED_INLINE_MIME_TYPES ) diff --git a/tests/unittests/tools/test_load_artifacts_tool.py b/tests/unittests/tools/test_load_artifacts_tool.py index 6a420574f0..c4fa71e676 100644 --- a/tests/unittests/tools/test_load_artifacts_tool.py +++ b/tests/unittests/tools/test_load_artifacts_tool.py @@ -144,6 +144,49 @@ async def test_load_artifacts_keeps_supported_mime_types(): assert artifact_part.inline_data.mime_type == 'application/pdf' +@mark.asyncio +async def test_load_artifacts_converts_svg_to_text(): + """`image/svg+xml` matches the `image/` prefix but is rejected by Gemini + with 400 INVALID_ARGUMENT, so it must fall through to the text-conversion + path instead of being forwarded as inline image data. + """ + artifact_name = 'logo.svg' + svg_bytes = ( + b'' + b'' + ) + artifact = types.Part( + inline_data=types.Blob(data=svg_bytes, mime_type='image/svg+xml') + ) + + tool_context = _StubToolContext({artifact_name: artifact}) + llm_request = LlmRequest( + contents=[ + types.Content( + role='user', + parts=[ + types.Part( + function_response=types.FunctionResponse( + name='load_artifacts', + response={'artifact_names': [artifact_name]}, + ) + ) + ], + ) + ] + ) + + await load_artifacts_tool.process_llm_request( + tool_context=tool_context, llm_request=llm_request + ) + + artifact_part = llm_request.contents[-1].parts[1] + # The SVG must NOT be forwarded as inline image data — Gemini would 400. + assert artifact_part.inline_data is None + # And the original SVG markup is delivered as a text part instead. + assert artifact_part.text == svg_bytes.decode('utf-8') + + def test_maybe_base64_to_bytes_decodes_standard_base64(): """Standard base64 encoded strings are decoded correctly.""" original = b'hello world'