Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dependencies = [
"jsonpath-ng>=1.7.0",
"mcp==1.26.0",
"langchain-mcp-adapters==0.2.1",
"pillow>=12.1.1",
]

classifiers = [
Expand Down
16 changes: 12 additions & 4 deletions src/uipath_langchain/agent/multimodal/__init__.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
"""Multimodal LLM input handling (images, PDFs, etc.)."""

from .invoke import (
build_file_content_block,
build_file_content_blocks_for,
llm_call_with_files,
)
from .types import IMAGE_MIME_TYPES, FileInfo
from .utils import download_file_base64, is_image, is_pdf, sanitize_filename
from .types import IMAGE_MIME_TYPES, TIFF_MIME_TYPES, FileInfo
from .utils import (
download_file_base64,
is_image,
is_pdf,
is_tiff,
sanitize_filename,
)

__all__ = [
"FileInfo",
"IMAGE_MIME_TYPES",
"build_file_content_block",
"TIFF_MIME_TYPES",
"build_file_content_blocks_for",
"download_file_base64",
"is_image",
"is_pdf",
"is_tiff",
"llm_call_with_files",
"sanitize_filename",
]
50 changes: 32 additions & 18 deletions src/uipath_langchain/agent/multimodal/invoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,47 +13,61 @@
from langchain_core.messages.content import create_file_block, create_image_block

from .types import MAX_FILE_SIZE_BYTES, FileInfo
from .utils import download_file_base64, is_image, is_pdf, sanitize_filename
from .utils import (
download_file_base64,
is_image,
is_pdf,
is_tiff,
sanitize_filename,
stream_tiff_to_content_blocks,
)

logger = logging.getLogger("uipath")


async def build_file_content_block(
async def build_file_content_blocks_for(
file_info: FileInfo,
*,
max_size: int = MAX_FILE_SIZE_BYTES,
) -> DataContentBlock:
"""Build a LangChain content block for a file attachment.
) -> list[DataContentBlock]:
"""Build LangChain content blocks for a single file attachment.

Downloads the file with size enforcement and creates the content block.
Size validation happens during download (via Content-Length check and
streaming guard) to avoid loading oversized files into memory.
Handles all supported MIME types in one place: images, PDFs, and
TIFFs (multi-page, converted to individual PNG blocks).

Args:
file_info: File URL, name, and MIME type.
max_size: Maximum allowed raw file size in bytes. LLM providers
enforce payload limits; base64 encoding adds ~30% overhead.

Returns:
A DataContentBlock for the file (image or PDF).
A list of DataContentBlock instances for the file.

Raises:
ValueError: If the MIME type is not supported or the file exceeds
the size limit for LLM payloads.
"""
if is_tiff(file_info.mime_type):
try:
return await stream_tiff_to_content_blocks(file_info.url, max_size=max_size)
except ValueError as exc:
raise ValueError(f"File '{file_info.name}': {exc}") from exc

try:
base64_file = await download_file_base64(file_info.url, max_size=max_size)
except ValueError as exc:
raise ValueError(f"File '{file_info.name}': {exc}") from exc

if is_image(file_info.mime_type):
return create_image_block(base64=base64_file, mime_type=file_info.mime_type)
return [create_image_block(base64=base64_file, mime_type=file_info.mime_type)]
if is_pdf(file_info.mime_type):
return create_file_block(
base64=base64_file,
mime_type=file_info.mime_type,
filename=sanitize_filename(file_info.name),
)
return [
create_file_block(
base64=base64_file,
mime_type=file_info.mime_type,
filename=sanitize_filename(file_info.name),
)
]

raise ValueError(f"Unsupported mime_type={file_info.mime_type}")

Expand All @@ -75,8 +89,8 @@ async def build_file_content_blocks(files: list[FileInfo]) -> list[DataContentBl

file_content_blocks: list[DataContentBlock] = []
for file in files:
block = await build_file_content_block(file)
file_content_blocks.append(block)
blocks = await build_file_content_blocks_for(file)
file_content_blocks.extend(blocks)
return file_content_blocks


Expand Down Expand Up @@ -111,8 +125,8 @@ async def llm_call_with_files(

content_blocks: list[Any] = []
for file_info in files:
content_block = await build_file_content_block(file_info)
content_blocks.append(content_block)
blocks = await build_file_content_blocks_for(file_info)
content_blocks.extend(blocks)

file_message = HumanMessage(content_blocks=content_blocks)
all_messages = list(messages) + [file_message]
Expand Down
5 changes: 5 additions & 0 deletions src/uipath_langchain/agent/multimodal/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
"image/webp",
}

TIFF_MIME_TYPES: set[str] = {
"image/tiff",
"image/x-tiff",
}


@dataclass
class FileInfo:
Expand Down
91 changes: 83 additions & 8 deletions src/uipath_langchain/agent/multimodal/utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
"""Utility functions for multimodal file handling."""

import base64
import io
import re
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager

import httpx
from langchain_core.messages import DataContentBlock
from uipath._utils._ssl_context import get_httpx_client_kwargs

from .types import IMAGE_MIME_TYPES
from .types import IMAGE_MIME_TYPES, TIFF_MIME_TYPES


def sanitize_filename(filename: str) -> str:
Expand Down Expand Up @@ -37,6 +40,11 @@ def is_image(mime_type: str) -> bool:
return mime_type.lower() in IMAGE_MIME_TYPES


def is_tiff(mime_type: str) -> bool:
"""Check if the MIME type represents a TIFF image."""
return mime_type.lower() in TIFF_MIME_TYPES


def _format_mb(size_bytes: int, decimals: int = 1) -> str:
"""Format a byte count as MB.

Expand Down Expand Up @@ -97,22 +105,28 @@ async def encode_streamed_base64(
return result


async def download_file_base64(url: str, *, max_size: int = 0) -> str:
"""Download a file from a URL and return its content as a base64 string.
@asynccontextmanager
async def _stream_download(url: str, *, max_size: int = 0):
"""Stream an HTTP download with size enforcement.

Yields the validated response object. Checks Content-Length upfront
and raises ValueError if the file is known to exceed the limit.

Args:
url: The URL to download from.
max_size: Maximum allowed file size in bytes. 0 means unlimited.

Yields:
The httpx response object, ready for streaming via aiter_bytes().

Raises:
ValueError: If the file exceeds max_size.
ValueError: If the file exceeds max_size (Content-Length check).
httpx.HTTPStatusError: If the HTTP request fails.
"""
async with httpx.AsyncClient(**get_httpx_client_kwargs()) as client:
async with client.stream("GET", url) as response:
response.raise_for_status()

# Fast reject via Content-Length before reading the body
if max_size > 0:
content_length = response.headers.get("content-length")
if content_length:
Expand All @@ -130,6 +144,67 @@ async def download_file_base64(url: str, *, max_size: int = 0) -> str:
f" limit for Agent LLM payloads"
)

return await encode_streamed_base64(
response.aiter_bytes(), max_size=max_size
)
yield response


async def stream_tiff_to_content_blocks(
url: str, *, max_size: int = 0
) -> list[DataContentBlock]:
"""Download a TIFF via streaming and convert each page to a content block.

Streams the HTTP response directly into a buffer for PIL, enforcing
size limits as chunks arrive. Each TIFF page is converted to PNG,
base64-encoded, and wrapped in a DataContentBlock immediately so
the raw PNG bytes can be freed.

Args:
url: The URL to download from.
max_size: Maximum allowed file size in bytes. 0 means unlimited.

Returns:
A list of DataContentBlock instances, one per TIFF page.

Raises:
ValueError: If the file exceeds max_size.
httpx.HTTPStatusError: If the HTTP request fails.
"""
from langchain_core.messages.content import create_image_block
from PIL import Image, ImageSequence

async with _stream_download(url, max_size=max_size) as response:
buf = io.BytesIO()
total = 0
async for chunk in response.aiter_bytes():
total += len(chunk)
if max_size > 0 and total > max_size:
raise ValueError(
f"File exceeds the {_format_mb(max_size, decimals=0)}"
f" limit for LLM payloads"
f" (downloaded {_format_mb(total)} so far)"
)
buf.write(chunk)

buf.seek(0)
blocks: list[DataContentBlock] = []
with Image.open(buf) as img:
for frame in ImageSequence.Iterator(img):
png_buf = io.BytesIO()
frame.convert("RGBA").save(png_buf, format="PNG")
png_b64 = base64.b64encode(png_buf.getvalue()).decode("ascii")
blocks.append(create_image_block(base64=png_b64, mime_type="image/png"))
return blocks


async def download_file_base64(url: str, *, max_size: int = 0) -> str:
"""Download a file from a URL and return its content as a base64 string.

Args:
url: The URL to download from.
max_size: Maximum allowed file size in bytes. 0 means unlimited.

Raises:
ValueError: If the file exceeds max_size.
httpx.HTTPStatusError: If the HTTP request fails.
"""
async with _stream_download(url, max_size=max_size) as response:
return await encode_streamed_base64(response.aiter_bytes(), max_size=max_size)
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@
AgentRuntimeError,
AgentRuntimeErrorCode,
)
from uipath_langchain.agent.multimodal import FileInfo, build_file_content_block
from uipath_langchain.agent.multimodal import (
FileInfo,
build_file_content_blocks_for,
)
from uipath_langchain.agent.react.jsonschema_pydantic_converter import create_model
from uipath_langchain.agent.tools.structured_tool_with_argument_properties import (
StructuredToolWithArgumentProperties,
Expand Down Expand Up @@ -182,8 +185,8 @@ async def add_files_to_message(

file_content_blocks: list[DataContentBlock] = []
for file in files:
block = await build_file_content_block(file)
file_content_blocks.append(block)
blocks = await build_file_content_blocks_for(file)
file_content_blocks.extend(blocks)
return append_content_blocks_to_message(
message, cast(list[ContentBlock], file_content_blocks)
)
Loading
Loading