-
Notifications
You must be signed in to change notification settings - Fork 0
Description
Censic AI: Flip single-modal to multi-modal (Text, Image, Audio)
Example
Domain
`public sealed class MultimodalInput
{
public List Parts { get; init; } = [];
}
public sealed class AssetPart
{
public string Kind { get; init; } = default!; // "text", "image", "audio"
public string? Text { get; init; }
public byte[]? Bytes { get; init; }
public string? MimeType { get; init; }
}
ORpublic sealed class MultimodalInput
{
public string? Text { get; init; }
public byte[]? Image { get; init; }
public string? ImageMimeType { get; init; }
public byte[]? Audio { get; init; }
public string? AudioMimeType { get; init; }
// Optional metadata
public string? UserId { get; init; }
public string? ConversationId { get; init; }
}
`
Application
public sealed record GenerateSpeechCommand(string Text); public sealed record TranscribeAudioQuery(byte[] Audio, string MimeType); public sealed record GenerateImageCommand(string Prompt); public sealed record DescribeImageQuery(byte[] Image, string MimeType); public sealed record EvaluateAssetQuery(byte[] Asset, string MimeType, string Purpose);
Intention Router (Infrastructure)
`
public static IAgentMessage MapToAgentMessage(MultimodalInput input)
{
if (input.Audio is not null)
return new AgentMessage(
new(input.Audio, input.AudioMimeType!)
);
if (input.Image is not null)
return new AgentMessage<DescribeImageQuery>(
new(input.Image, input.ImageMimeType!)
);
if (!string.IsNullOrWhiteSpace(input.Text))
return new AgentMessage<GenerateSpeechCommand>(
new(input.Text)
);
throw new InvalidOperationException("Unsupported multimodal input.");
}
`