diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/CompatibilitySuppressions.xml b/src/Libraries/Microsoft.Extensions.AI.Abstractions/CompatibilitySuppressions.xml new file mode 100644 index 00000000000..8b2e05c3a7c --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/CompatibilitySuppressions.xml @@ -0,0 +1,144 @@ + + + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_ModelId + lib/net10.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net10.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_SpeechLanguage + lib/net10.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net10.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_ModelId(System.String) + lib/net10.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net10.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_SpeechLanguage(System.String) + lib/net10.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net10.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_ModelId + lib/net462/Microsoft.Extensions.AI.Abstractions.dll + lib/net462/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_SpeechLanguage + lib/net462/Microsoft.Extensions.AI.Abstractions.dll + lib/net462/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_ModelId(System.String) + lib/net462/Microsoft.Extensions.AI.Abstractions.dll + lib/net462/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_SpeechLanguage(System.String) + lib/net462/Microsoft.Extensions.AI.Abstractions.dll + lib/net462/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_ModelId + lib/net8.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net8.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_SpeechLanguage + lib/net8.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net8.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_ModelId(System.String) + lib/net8.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net8.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_SpeechLanguage(System.String) + lib/net8.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net8.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_ModelId + lib/net9.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net9.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_SpeechLanguage + lib/net9.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net9.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_ModelId(System.String) + lib/net9.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net9.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_SpeechLanguage(System.String) + lib/net9.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net9.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_ModelId + lib/netstandard2.0/Microsoft.Extensions.AI.Abstractions.dll + lib/netstandard2.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_SpeechLanguage + lib/netstandard2.0/Microsoft.Extensions.AI.Abstractions.dll + lib/netstandard2.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_ModelId(System.String) + lib/netstandard2.0/Microsoft.Extensions.AI.Abstractions.dll + lib/netstandard2.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_SpeechLanguage(System.String) + lib/netstandard2.0/Microsoft.Extensions.AI.Abstractions.dll + lib/netstandard2.0/Microsoft.Extensions.AI.Abstractions.dll + true + + \ No newline at end of file diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/DelegatingRealtimeSession.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/DelegatingRealtimeSession.cs new file mode 100644 index 00000000000..4bb1defb3b9 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/DelegatingRealtimeSession.cs @@ -0,0 +1,103 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// +/// Provides an optional base class for an that passes through calls to another instance. +/// +/// +/// This is recommended as a base type when building sessions that can be chained around an underlying . +/// The default implementation simply passes each call to the inner session instance. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class DelegatingRealtimeSession : IRealtimeSession +{ + /// + /// Initializes a new instance of the class. + /// + /// The wrapped session instance. + /// is . + protected DelegatingRealtimeSession(IRealtimeSession innerSession) + { + InnerSession = Throw.IfNull(innerSession); + } + + /// + public void Dispose() + { + Dispose(disposing: true); + GC.SuppressFinalize(this); + } + + /// + public async ValueTask DisposeAsync() + { + await DisposeAsyncCore().ConfigureAwait(false); + GC.SuppressFinalize(this); + } + + /// Performs async cleanup of managed resources. + /// A task representing the asynchronous dispose operation. +#pragma warning disable EA0014 // The async method doesn't support cancellation + protected virtual async ValueTask DisposeAsyncCore() +#pragma warning restore EA0014 + { + if (InnerSession is IAsyncDisposable asyncDisposable) + { + await asyncDisposable.DisposeAsync().ConfigureAwait(false); + } + else + { + InnerSession.Dispose(); + } + } + + /// Gets the inner . + protected IRealtimeSession InnerSession { get; } + + /// + public virtual RealtimeSessionOptions? Options => InnerSession.Options; + + /// + public virtual Task SendClientMessageAsync(RealtimeClientMessage message, CancellationToken cancellationToken = default) => + InnerSession.SendClientMessageAsync(message, cancellationToken); + + /// + public virtual Task UpdateAsync(RealtimeSessionOptions options, CancellationToken cancellationToken = default) => + InnerSession.UpdateAsync(options, cancellationToken); + + /// + public virtual IAsyncEnumerable GetStreamingResponseAsync( + CancellationToken cancellationToken = default) => + InnerSession.GetStreamingResponseAsync(cancellationToken); + + /// + public virtual object? GetService(Type serviceType, object? serviceKey = null) + { + _ = Throw.IfNull(serviceType); + + // If the key is non-null, we don't know what it means so pass through to the inner service. + return + serviceKey is null && serviceType.IsInstanceOfType(this) ? this : + InnerSession.GetService(serviceType, serviceKey); + } + + /// Provides a mechanism for releasing unmanaged resources. + /// if being called from ; otherwise, . + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + InnerSession.Dispose(); + } + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/IRealtimeClient.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/IRealtimeClient.cs new file mode 100644 index 00000000000..b8ccd29d36a --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/IRealtimeClient.cs @@ -0,0 +1,33 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// Represents a real-time client. +/// This interface provides methods to create and manage real-time sessions. +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public interface IRealtimeClient : IDisposable +{ + /// Creates a new real-time session with the specified options. + /// The session options. + /// A token to cancel the operation. + /// The created real-time session. + Task CreateSessionAsync(RealtimeSessionOptions? options = null, CancellationToken cancellationToken = default); + + /// Asks the for an object of the specified type . + /// The type of object being requested. + /// An optional key that can be used to help identify the target service. + /// The found object, otherwise . + /// is . + /// + /// The purpose of this method is to allow for the retrieval of strongly typed services that might be provided by the , + /// including itself or any services it might be wrapping. + /// + object? GetService(Type serviceType, object? serviceKey = null); +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/IRealtimeSession.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/IRealtimeSession.cs new file mode 100644 index 00000000000..ae27baf91b1 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/IRealtimeSession.cs @@ -0,0 +1,59 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// Represents a real-time session. +/// This interface provides methods to manage a real-time session and to interact with the real-time model. +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public interface IRealtimeSession : IDisposable, IAsyncDisposable +{ + /// Updates the session with new options. + /// The new session options. + /// A token to cancel the operation. + /// A task that represents the asynchronous update operation. + Task UpdateAsync(RealtimeSessionOptions options, CancellationToken cancellationToken = default); + + /// + /// Gets the current session options. + /// + RealtimeSessionOptions? Options { get; } + + /// + /// Sends a client message to the session. + /// + /// The client message to send. + /// A token to cancel the operation. + /// A task that represents the asynchronous send operation. + /// + /// This method allows for sending client messages to the session at any time, which can be used to influence the session's behavior or state. + /// + Task SendClientMessageAsync(RealtimeClientMessage message, CancellationToken cancellationToken = default); + + /// Streams the response from the real-time session. + /// A token to cancel the operation. + /// The response messages generated by the session. + /// + /// This method cannot be called multiple times concurrently on the same session instance. + /// + IAsyncEnumerable GetStreamingResponseAsync( + CancellationToken cancellationToken = default); + + /// Asks the for an object of the specified type . + /// The type of object being requested. + /// An optional key that can be used to help identify the target service. + /// The found object, otherwise . + /// is . + /// + /// The purpose of this method is to allow for the retrieval of strongly typed services that might be provided by the , + /// including itself or any services it might be wrapping. + /// + object? GetService(Type serviceType, object? serviceKey = null); +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/NoiseReductionOptions.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/NoiseReductionOptions.cs new file mode 100644 index 00000000000..d39d759f462 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/NoiseReductionOptions.cs @@ -0,0 +1,29 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents options for configuring a real-time session. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public enum NoiseReductionOptions +{ + /// + /// No noise reduction applied. + /// + None, + + /// + /// for close-talking microphones. + /// + NearField, + + /// + /// For far-field microphones. + /// + FarField +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeAudioFormat.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeAudioFormat.cs new file mode 100644 index 00000000000..3d8962c6780 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeAudioFormat.cs @@ -0,0 +1,37 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents options for configuring real-time audio. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class RealtimeAudioFormat +{ + /// + /// Initializes a new instance of the class. + /// + public RealtimeAudioFormat(string mediaType, int sampleRate) + { + MediaType = mediaType; + SampleRate = sampleRate; + } + + /// + /// Gets the media type of the audio (e.g., "audio/pcm", "audio/pcmu", "audio/pcma"). + /// + public string MediaType { get; init; } + + /// + /// Gets the sample rate of the audio in Hertz. + /// + /// + /// When constructed via , this property is always set. + /// The nullable type allows deserialized instances to omit the sample rate when the server does not provide one. + /// + public int? SampleRate { get; init; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientConversationItemCreateMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientConversationItemCreateMessage.cs new file mode 100644 index 00000000000..dc10766e347 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientConversationItemCreateMessage.cs @@ -0,0 +1,37 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time message for creating a conversation item. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class RealtimeClientConversationItemCreateMessage : RealtimeClientMessage +{ + /// + /// Initializes a new instance of the class. + /// + /// The conversation item to create. + /// The optional ID of the previous conversation item to insert the new one after. + public RealtimeClientConversationItemCreateMessage(RealtimeContentItem item, string? previousId = null) + { + PreviousId = previousId; + Item = Throw.IfNull(item); + } + + /// + /// Gets or sets the optional previous conversation item ID. + /// If not set, the new item will be appended to the end of the conversation. + /// + public string? PreviousId { get; set; } + + /// + /// Gets or sets the conversation item to create. + /// + public RealtimeContentItem Item { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientInputAudioBufferAppendMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientInputAudioBufferAppendMessage.cs new file mode 100644 index 00000000000..b1b9dd2f038 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientInputAudioBufferAppendMessage.cs @@ -0,0 +1,33 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time message for appending audio buffer input. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] + +public class RealtimeClientInputAudioBufferAppendMessage : RealtimeClientMessage +{ + /// + /// Initializes a new instance of the class. + /// + /// The data content containing the audio buffer data to append. + public RealtimeClientInputAudioBufferAppendMessage(DataContent audioContent) + { + Content = Throw.IfNull(audioContent); + } + + /// + /// Gets or sets the audio content to append to the model audio buffer. + /// + /// + /// The content should include the audio buffer data that needs to be appended to the input audio buffer. + /// + public DataContent Content { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientInputAudioBufferCommitMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientInputAudioBufferCommitMessage.cs new file mode 100644 index 00000000000..15be87316d3 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientInputAudioBufferCommitMessage.cs @@ -0,0 +1,23 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time message for committing audio buffer input. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] + +public class RealtimeClientInputAudioBufferCommitMessage : RealtimeClientMessage +{ + /// + /// Initializes a new instance of the class. + /// + public RealtimeClientInputAudioBufferCommitMessage() + { + } +} + diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientMessage.cs new file mode 100644 index 00000000000..0f035933462 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientMessage.cs @@ -0,0 +1,30 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time message the client sends to the model. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class RealtimeClientMessage +{ + /// + /// Gets or sets the optional message ID associated with the message. + /// This can be used for tracking and correlation purposes. + /// + public string? MessageId { get; set; } + + /// + /// Gets or sets the raw representation of the message. + /// This can be used to send the raw data to the model. + /// + /// + /// The raw representation is typically used for custom or unsupported message types. + /// For example, the model may accept a JSON serialized message. + /// + public object? RawRepresentation { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientResponseCreateMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientResponseCreateMessage.cs new file mode 100644 index 00000000000..bcd4a016f97 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientResponseCreateMessage.cs @@ -0,0 +1,117 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a client message that triggers model inference to generate a response. +/// +/// +/// Sending this message instructs the provider to generate a new response from the model. +/// The response may include one or more output items (text, audio, or tool calls). +/// Properties on this message optionally override the session-level configuration +/// for this response only. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class RealtimeClientResponseCreateMessage : RealtimeClientMessage +{ + /// + /// Initializes a new instance of the class. + /// + public RealtimeClientResponseCreateMessage() + { + } + + /// + /// Gets or sets the list of the conversation items to create a response for. + /// + public IList? Items { get; set; } + + /// + /// Gets or sets the output audio options for the response. + /// + /// + /// If set, overrides the session-level audio output configuration for this response only. + /// If , the session's default audio options are used. + /// + public RealtimeAudioFormat? OutputAudioOptions { get; set; } + + /// + /// Gets or sets the voice of the output audio. + /// + /// + /// If set, overrides the session-level voice for this response only. + /// If , the session's default voice is used. + /// + public string? OutputVoice { get; set; } + + /// + /// Gets or sets a value indicating whether the response output should be excluded from the conversation context. + /// + /// + /// When , the response is generated out-of-band: the model produces output + /// but the resulting items are not added to the conversation history, so they will not appear + /// as context for subsequent responses. Defaults to , meaning response + /// output is added to the default conversation. + /// + public bool ExcludeFromConversation { get; set; } + + /// + /// Gets or sets the instructions that guide the model on desired responses. + /// + /// + /// If set, overrides the session-level instructions for this response only. + /// If , the session's default instructions are used. + /// + public string? Instructions { get; set; } + + /// + /// Gets or sets the maximum number of output tokens for the response, inclusive of all modalities and tool calls. + /// + /// + /// This limit applies to the total output tokens regardless of modality (text, audio, etc.). + /// If , the provider's default limit is used. + /// + public int? MaxOutputTokens { get; set; } + + /// + /// Gets or sets any additional properties associated with the response request. + /// + /// + /// This can be used to attach arbitrary key-value metadata to a response request + /// for tracking or disambiguation purposes (e.g., correlating multiple simultaneous responses). + /// Providers may map this to their own metadata fields. + /// + public AdditionalPropertiesDictionary? AdditionalProperties { get; set; } + + /// + /// Gets or sets the output modalities for the response (e.g., "text", "audio"). + /// + /// + /// If set, overrides the session-level output modalities for this response only. + /// If , the session's default modalities are used. + /// + public IList? OutputModalities { get; set; } + + /// + /// Gets or sets the tool choice mode for the response. + /// + /// + /// If set, overrides the session-level tool choice for this response only. + /// If , the session's default tool choice is used. + /// + public ChatToolMode? ToolMode { get; set; } + + /// + /// Gets or sets the AI tools available for generating the response. + /// + /// + /// If set, overrides the session-level tools for this response only. + /// If , the session's default tools are used. + /// + public IList? Tools { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeContentItem.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeContentItem.cs new file mode 100644 index 00000000000..f6d6d7e9c39 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeContentItem.cs @@ -0,0 +1,61 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time conversation item. +/// +/// +/// This class is used to encapsulate the details of a real-time item that can be inserted into a conversation, +/// or sent as part of a real-time response creation process. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class RealtimeContentItem +{ + /// + /// Initializes a new instance of the class. + /// + /// The contents of the conversation item. + /// The ID of the conversation item. + /// The role of the conversation item. + public RealtimeContentItem(IList contents, string? id = null, ChatRole? role = null) + { + Id = id; + Role = role; + Contents = contents; + } + + /// + /// Gets or sets the ID of the conversation item. + /// + /// + /// This ID can be null in case passing Function or MCP content where the ID is not required. + /// The Id only needed of having contents representing a user, system, or assistant message with contents like text, audio, image or similar. + /// + public string? Id { get; set; } + + /// + /// Gets or sets the role of the conversation item. + /// + /// + /// The role not used in case of Function or MCP content. + /// The role only needed of having contents representing a user, system, or assistant message with contents like text, audio, image or similar. + /// + public ChatRole? Role { get; set; } + + /// + /// Gets or sets the content of the conversation item. + /// + public IList Contents { get; set; } + + /// + /// Gets or sets the raw representation of the conversation item. + /// This can be used to hold the original data structure received from or sent to the provider. + /// + public object? RawRepresentation { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerErrorMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerErrorMessage.cs new file mode 100644 index 00000000000..ea3131482fa --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerErrorMessage.cs @@ -0,0 +1,40 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time server error message. +/// +/// +/// Used with the . +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class RealtimeServerErrorMessage : RealtimeServerMessage +{ + /// + /// Initializes a new instance of the class. + /// + public RealtimeServerErrorMessage() + { + Type = RealtimeServerMessageType.Error; + } + + /// + /// Gets or sets the error content associated with the error message. + /// + public ErrorContent? Error { get; set; } + + /// + /// Gets or sets the message ID of the client message that caused the error. + /// + /// + /// This is specific to event-driven protocols where multiple client messages may be in-flight, + /// allowing correlation of the error to the originating client request. + /// + public string? ErrorMessageId { get; set; } + +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerInputAudioTranscriptionMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerInputAudioTranscriptionMessage.cs new file mode 100644 index 00000000000..ec011d3faee --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerInputAudioTranscriptionMessage.cs @@ -0,0 +1,58 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time server message for input audio transcription. +/// +/// +/// Used when having InputAudioTranscriptionCompleted, InputAudioTranscriptionDelta, or InputAudioTranscriptionFailed response types. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class RealtimeServerInputAudioTranscriptionMessage : RealtimeServerMessage +{ + /// + /// Initializes a new instance of the class. + /// + /// The type of the real-time server response. + /// + /// The parameter should be InputAudioTranscriptionCompleted, InputAudioTranscriptionDelta, or InputAudioTranscriptionFailed. + /// + public RealtimeServerInputAudioTranscriptionMessage(RealtimeServerMessageType type) + { + Type = type; + } + + /// + /// Gets or sets the index of the content part containing the audio. + /// + public int? ContentIndex { get; set; } + + /// + /// Gets or sets the ID of the item containing the audio that is being transcribed. + /// + public string? ItemId { get; set; } + + /// + /// Gets or sets the transcription text of the audio. + /// + public string? Transcription { get; set; } + + /// + /// Gets or sets the transcription-specific usage, which is billed separately from the realtime model. + /// + /// + /// This usage reflects the cost of the speech-to-text transcription and is billed according to the + /// ASR (Automatic Speech Recognition) model's pricing rather than the realtime model's pricing. + /// + public UsageDetails? Usage { get; set; } + + /// + /// Gets or sets the error content if an error occurred during transcription. + /// + public ErrorContent? Error { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerMessage.cs new file mode 100644 index 00000000000..0e023fde4f4 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerMessage.cs @@ -0,0 +1,35 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time server response message. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class RealtimeServerMessage +{ + /// + /// Gets or sets the type of the real-time response. + /// + public RealtimeServerMessageType Type { get; set; } + + /// + /// Gets or sets the optional message ID associated with the response. + /// This can be used for tracking and correlation purposes. + /// + public string? MessageId { get; set; } + + /// + /// Gets or sets the raw representation of the response. + /// This can be used to hold the original data structure received from the model. + /// + /// + /// The raw representation is typically used for custom or unsupported message types. + /// For example, the model may accept a JSON serialized server message. + /// + public object? RawRepresentation { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerMessageType.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerMessageType.cs new file mode 100644 index 00000000000..7c722cf06a0 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerMessageType.cs @@ -0,0 +1,160 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.ComponentModel; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Text.Json; +using System.Text.Json.Serialization; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents the type of a real-time server message. +/// This is used to identify the message type being received from the model. +/// +/// +/// Well-known message types are provided as static properties. Providers may define additional +/// message types by constructing new instances with custom values. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +[JsonConverter(typeof(Converter))] +[DebuggerDisplay("{Value,nq}")] +public readonly struct RealtimeServerMessageType : IEquatable +{ + /// Gets a message type indicating that the response contains only raw content. + /// + /// This type supports extensibility for custom content types not natively supported by the SDK. + /// + public static RealtimeServerMessageType RawContentOnly { get; } = new("RawContentOnly"); + + /// Gets a message type indicating the output of audio transcription for user audio written to the user audio buffer. + public static RealtimeServerMessageType InputAudioTranscriptionCompleted { get; } = new("InputAudioTranscriptionCompleted"); + + /// Gets a message type indicating the text value of an input audio transcription content part is updated with incremental transcription results. + public static RealtimeServerMessageType InputAudioTranscriptionDelta { get; } = new("InputAudioTranscriptionDelta"); + + /// Gets a message type indicating that the audio transcription for user audio written to the user audio buffer has failed. + public static RealtimeServerMessageType InputAudioTranscriptionFailed { get; } = new("InputAudioTranscriptionFailed"); + + /// Gets a message type indicating the output text update with incremental results. + public static RealtimeServerMessageType OutputTextDelta { get; } = new("OutputTextDelta"); + + /// Gets a message type indicating the output text is complete. + public static RealtimeServerMessageType OutputTextDone { get; } = new("OutputTextDone"); + + /// Gets a message type indicating the model-generated transcription of audio output updated. + public static RealtimeServerMessageType OutputAudioTranscriptionDelta { get; } = new("OutputAudioTranscriptionDelta"); + + /// Gets a message type indicating the model-generated transcription of audio output is done streaming. + public static RealtimeServerMessageType OutputAudioTranscriptionDone { get; } = new("OutputAudioTranscriptionDone"); + + /// Gets a message type indicating the audio output updated. + public static RealtimeServerMessageType OutputAudioDelta { get; } = new("OutputAudioDelta"); + + /// Gets a message type indicating the audio output is done streaming. + public static RealtimeServerMessageType OutputAudioDone { get; } = new("OutputAudioDone"); + + /// Gets a message type indicating the response has completed. + public static RealtimeServerMessageType ResponseDone { get; } = new("ResponseDone"); + + /// Gets a message type indicating the response has been created. + public static RealtimeServerMessageType ResponseCreated { get; } = new("ResponseCreated"); + + /// Gets a message type indicating an individual output item in the response has completed. + public static RealtimeServerMessageType ResponseOutputItemDone { get; } = new("ResponseOutputItemDone"); + + /// Gets a message type indicating an individual output item has been added to the response. + public static RealtimeServerMessageType ResponseOutputItemAdded { get; } = new("ResponseOutputItemAdded"); + + /// Gets a message type indicating an error occurred while processing the request. + public static RealtimeServerMessageType Error { get; } = new("Error"); + + /// Gets a message type indicating that an MCP tool call is in progress. + public static RealtimeServerMessageType McpCallInProgress { get; } = new("McpCallInProgress"); + + /// Gets a message type indicating that an MCP tool call has completed. + public static RealtimeServerMessageType McpCallCompleted { get; } = new("McpCallCompleted"); + + /// Gets a message type indicating that an MCP tool call has failed. + public static RealtimeServerMessageType McpCallFailed { get; } = new("McpCallFailed"); + + /// Gets a message type indicating that listing MCP tools is in progress. + public static RealtimeServerMessageType McpListToolsInProgress { get; } = new("McpListToolsInProgress"); + + /// Gets a message type indicating that listing MCP tools has completed. + public static RealtimeServerMessageType McpListToolsCompleted { get; } = new("McpListToolsCompleted"); + + /// Gets a message type indicating that listing MCP tools has failed. + public static RealtimeServerMessageType McpListToolsFailed { get; } = new("McpListToolsFailed"); + + /// + /// Gets the value associated with this . + /// + public string Value { get; } + + /// + /// Initializes a new instance of the struct with the provided value. + /// + /// The value to associate with this . + [JsonConstructor] + public RealtimeServerMessageType(string value) + { + Value = Throw.IfNullOrWhitespace(value); + } + + /// + /// Returns a value indicating whether two instances are equivalent, as determined by a + /// case-insensitive comparison of their values. + /// + /// The first instance to compare. + /// The second instance to compare. + /// if left and right have equivalent values; otherwise, . + public static bool operator ==(RealtimeServerMessageType left, RealtimeServerMessageType right) + { + return left.Equals(right); + } + + /// + /// Returns a value indicating whether two instances are not equivalent, as determined by a + /// case-insensitive comparison of their values. + /// + /// The first instance to compare. + /// The second instance to compare. + /// if left and right have different values; otherwise, . + public static bool operator !=(RealtimeServerMessageType left, RealtimeServerMessageType right) + { + return !(left == right); + } + + /// + public override bool Equals([NotNullWhen(true)] object? obj) + => obj is RealtimeServerMessageType other && Equals(other); + + /// + public bool Equals(RealtimeServerMessageType other) + => string.Equals(Value, other.Value, StringComparison.OrdinalIgnoreCase); + + /// + public override int GetHashCode() + => Value is null ? 0 : StringComparer.OrdinalIgnoreCase.GetHashCode(Value); + + /// + public override string ToString() => Value ?? string.Empty; + + /// Provides a for serializing instances. + [EditorBrowsable(EditorBrowsableState.Never)] + public sealed class Converter : JsonConverter + { + /// + public override RealtimeServerMessageType Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) => + new(reader.GetString()!); + + /// + public override void Write(Utf8JsonWriter writer, RealtimeServerMessageType value, JsonSerializerOptions options) => + Throw.IfNull(writer).WriteStringValue(value.Value); + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerOutputTextAudioMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerOutputTextAudioMessage.cs new file mode 100644 index 00000000000..be6b9137bfc --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerOutputTextAudioMessage.cs @@ -0,0 +1,70 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time server message for output text and audio. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class RealtimeServerOutputTextAudioMessage : RealtimeServerMessage +{ + /// + /// Initializes a new instance of the class for handling output text delta responses. + /// + /// The type of the real-time server response. + /// + /// The should be , , + /// , , + /// , or . + /// + public RealtimeServerOutputTextAudioMessage(RealtimeServerMessageType type) + { + Type = type; + } + + /// + /// Gets or sets the index of the content part whose text has been updated. + /// + public int? ContentIndex { get; set; } + + /// + /// Gets or sets the text delta or final text content. + /// + /// + /// Populated for , , + /// , and messages. + /// For audio messages ( and ), + /// use instead. + /// + public string? Text { get; set; } + + /// + /// Gets or sets the Base64-encoded audio data delta or final audio content. + /// + /// + /// Populated for messages. + /// For , this is typically + /// as the final audio is not included; use the accumulated deltas instead. + /// For text content, use instead. + /// + public string? Audio { get; set; } + + /// + /// Gets or sets the ID of the item containing the content part whose text has been updated. + /// + public string? ItemId { get; set; } + + /// + /// Gets or sets the index of the output item in the response. + /// + public int? OutputIndex { get; set; } + + /// + /// Gets or sets the ID of the response. + /// + public string? ResponseId { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerResponseCreatedMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerResponseCreatedMessage.cs new file mode 100644 index 00000000000..36823ae99e8 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerResponseCreatedMessage.cs @@ -0,0 +1,104 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time message for creating a response item. +/// +/// +/// Used with the and messages. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class RealtimeServerResponseCreatedMessage : RealtimeServerMessage +{ + /// + /// Initializes a new instance of the class. + /// + /// + /// The should be or . + /// + public RealtimeServerResponseCreatedMessage(RealtimeServerMessageType type) + { + Type = type; + } + + /// + /// Gets or sets the output audio options for the response. If null, the default conversation audio options will be used. + /// + public RealtimeAudioFormat? OutputAudioOptions { get; set; } + + /// + /// Gets or sets the voice of the output audio. + /// + public string? OutputVoice { get; set; } + + /// + /// Gets or sets the conversation ID associated with the response. + /// + /// + /// Identifies which conversation within the session this response belongs to. + /// A session may have a default conversation to which items are automatically added, + /// or responses may be generated out-of-band (not associated with any conversation). + /// + public string? ConversationId { get; set; } + + /// + /// Gets or sets the unique response ID. + /// + public string? ResponseId { get; set; } + + /// + /// Gets or sets the maximum number of output tokens for the response, inclusive of all modalities and tool calls. + /// + /// + /// This limit applies to the total output tokens regardless of modality (text, audio, etc.). + /// If , the provider's default limit was used. + /// + public int? MaxOutputTokens { get; set; } + + /// + /// Gets or sets any additional properties associated with the response. + /// + /// + /// Contains arbitrary key-value metadata attached to the response. + /// This is the metadata that was provided when the response was created + /// (e.g., for tracking or disambiguating multiple simultaneous responses). + /// + public AdditionalPropertiesDictionary? AdditionalProperties { get; set; } + + /// + /// Gets or sets the list of the conversation items included in the response. + /// + public IList? Items { get; set; } + + /// + /// Gets or sets the output modalities for the response. like "text", "audio". + /// If null, then default conversation modalities will be used. + /// + public IList? OutputModalities { get; set; } + + /// + /// Gets or sets the status of the response. + /// + public string? Status { get; set; } + + /// + /// Gets or sets the error content of the response, if any. + /// + public ErrorContent? Error { get; set; } + + /// + /// Gets or sets the per-response token usage for billing purposes. + /// + /// + /// Populated when the response is complete (i.e., on ). + /// Input tokens include the entire conversation context, so they grow over successive turns + /// as previous output becomes input for later responses. + /// + public UsageDetails? Usage { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerResponseOutputItemMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerResponseOutputItemMessage.cs new file mode 100644 index 00000000000..bc4e8ff20e6 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerResponseOutputItemMessage.cs @@ -0,0 +1,43 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time message representing a new output item added or created during response generation. +/// +/// +/// Used with the and messages. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class RealtimeServerResponseOutputItemMessage : RealtimeServerMessage +{ + /// + /// Initializes a new instance of the class. + /// + /// + /// The should be or . + /// + public RealtimeServerResponseOutputItemMessage(RealtimeServerMessageType type) + { + Type = type; + } + + /// + /// Gets or sets the unique response ID. + /// + public string? ResponseId { get; set; } + + /// + /// Gets or sets the unique output index. + /// + public int? OutputIndex { get; set; } + + /// + /// Gets or sets the conversation item included in the response. + /// + public RealtimeContentItem? Item { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeSessionKind.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeSessionKind.cs new file mode 100644 index 00000000000..120c7787a81 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeSessionKind.cs @@ -0,0 +1,24 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents options for configuring a real-time session. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public enum RealtimeSessionKind +{ + /// + /// Represent a realtime sessions which process audio, text, or other media in real-time. + /// + Realtime, + + /// + /// Represent transcription only session. + /// + Transcription +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeSessionOptions.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeSessionOptions.cs new file mode 100644 index 00000000000..03a02b73187 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeSessionOptions.cs @@ -0,0 +1,112 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Text.Json.Serialization; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// Represents options for configuring a real-time session. +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class RealtimeSessionOptions +{ + /// + /// Gets the session kind. + /// + /// + /// If set to , most of the sessions properties will not apply to the session. Only InputAudioFormat, NoiseReductionOptions, TranscriptionOptions, and VoiceActivityDetection will be used. + /// + public RealtimeSessionKind SessionKind { get; init; } = RealtimeSessionKind.Realtime; + + /// + /// Gets the model name to use for the session. + /// + public string? Model { get; init; } + + /// + /// Gets the input audio format for the session. + /// + public RealtimeAudioFormat? InputAudioFormat { get; init; } + + /// + /// Gets the noise reduction options for the session. + /// + public NoiseReductionOptions? NoiseReductionOptions { get; init; } + + /// + /// Gets the transcription options for the session. + /// + public TranscriptionOptions? TranscriptionOptions { get; init; } + + /// + /// Gets the voice activity detection options for the session. + /// + public VoiceActivityDetection? VoiceActivityDetection { get; init; } + + /// + /// Gets the output audio format for the session. + /// + public RealtimeAudioFormat? OutputAudioFormat { get; init; } + + /// + /// Gets the output voice speed for the session. + /// + /// + /// The default value is 1.0, which represents normal speed. + /// + public double VoiceSpeed { get; init; } = 1.0; + + /// + /// Gets the output voice for the session. + /// + public string? Voice { get; init; } + + /// + /// Gets the default system instructions for the session. + /// + public string? Instructions { get; init; } + + /// + /// Gets the maximum number of response tokens for the session. + /// + public int? MaxOutputTokens { get; init; } + + /// + /// Gets the output modalities for the response. like "text", "audio". + /// If null, then default conversation modalities will be used. + /// + public IReadOnlyList? OutputModalities { get; init; } + + /// + /// Gets the tool choice mode for the session. + /// + public ChatToolMode? ToolMode { get; init; } + + /// + /// Gets the AI tools available for generating the response. + /// + public IReadOnlyList? Tools { get; init; } + + /// + /// Gets a callback responsible for creating the raw representation of the session options from an underlying implementation. + /// + /// + /// The underlying implementation might have its own representation of options. + /// When is invoked with a , + /// that implementation might convert the provided options into its own representation in order to use it while + /// performing the operation. For situations where a consumer knows which concrete + /// is being used and how it represents options, a new instance of that implementation-specific options type can be + /// returned by this callback for the implementation to use, instead of creating a + /// new instance. Such implementations might mutate the supplied options instance further based on other settings + /// supplied on this instance or from other inputs. + /// Therefore, it is strongly recommended to not return shared instances and instead make the callback return + /// a new instance on each call. + /// This is typically used to set an implementation-specific setting that isn't otherwise exposed from the strongly typed + /// properties on . + /// + [JsonIgnore] + public Func? RawRepresentationFactory { get; init; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/SemanticEagerness.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/SemanticEagerness.cs new file mode 100644 index 00000000000..3d6cabe4950 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/SemanticEagerness.cs @@ -0,0 +1,95 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.ComponentModel; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Text.Json; +using System.Text.Json.Serialization; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents the eagerness level for semantic voice activity detection. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +[JsonConverter(typeof(Converter))] +[DebuggerDisplay("{Value,nq}")] +public readonly struct SemanticEagerness : IEquatable +{ + /// Gets a value representing low eagerness. + public static SemanticEagerness Low { get; } = new("low"); + + /// Gets a value representing medium eagerness. + public static SemanticEagerness Medium { get; } = new("medium"); + + /// Gets a value representing high eagerness. + public static SemanticEagerness High { get; } = new("high"); + + /// Gets a value representing automatic eagerness detection. + public static SemanticEagerness Auto { get; } = new("auto"); + + /// + /// Gets the value associated with this . + /// + public string Value { get; } + + /// + /// Initializes a new instance of the struct with the provided value. + /// + /// The value to associate with this . + [JsonConstructor] + public SemanticEagerness(string value) + { + Value = Throw.IfNullOrWhitespace(value); + } + + /// + /// Returns a value indicating whether two instances are equivalent, as determined by a + /// case-insensitive comparison of their values. + /// + public static bool operator ==(SemanticEagerness left, SemanticEagerness right) + { + return left.Equals(right); + } + + /// + /// Returns a value indicating whether two instances are not equivalent, as determined by a + /// case-insensitive comparison of their values. + /// + public static bool operator !=(SemanticEagerness left, SemanticEagerness right) + { + return !(left == right); + } + + /// + public override bool Equals([NotNullWhen(true)] object? obj) + => obj is SemanticEagerness other && Equals(other); + + /// + public bool Equals(SemanticEagerness other) + => string.Equals(Value, other.Value, StringComparison.OrdinalIgnoreCase); + + /// + public override int GetHashCode() + => StringComparer.OrdinalIgnoreCase.GetHashCode(Value); + + /// + public override string ToString() => Value; + + /// Provides a for serializing instances. + [EditorBrowsable(EditorBrowsableState.Never)] + public sealed class Converter : JsonConverter + { + /// + public override SemanticEagerness Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) => + new(reader.GetString()!); + + /// + public override void Write(Utf8JsonWriter writer, SemanticEagerness value, JsonSerializerOptions options) => + Throw.IfNull(writer).WriteStringValue(value.Value); + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/SemanticVoiceActivityDetection.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/SemanticVoiceActivityDetection.cs new file mode 100644 index 00000000000..c4c94f3b7f5 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/SemanticVoiceActivityDetection.cs @@ -0,0 +1,19 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents options for configuring server voice activity detection in a real-time session. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class SemanticVoiceActivityDetection : VoiceActivityDetection +{ + /// + /// Gets the eagerness level for semantic voice activity detection. + /// + public SemanticEagerness Eagerness { get; init; } = SemanticEagerness.Auto; +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/ServerVoiceActivityDetection.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/ServerVoiceActivityDetection.cs new file mode 100644 index 00000000000..7b0946337ef --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/ServerVoiceActivityDetection.cs @@ -0,0 +1,37 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents options for configuring server voice activity detection in a real-time session. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class ServerVoiceActivityDetection : VoiceActivityDetection +{ + /// + /// Gets the idle timeout in milliseconds to detect the end of speech. + /// + public int IdleTimeoutInMilliseconds { get; init; } + + /// + /// Gets the prefix padding in milliseconds to include before detected speech. + /// + public int PrefixPaddingInMilliseconds { get; init; } = 300; + + /// + /// Gets the silence duration in milliseconds to consider as a pause. + /// + public int SilenceDurationInMilliseconds { get; init; } = 500; + + /// + /// Gets the threshold for voice activity detection. + /// + /// + /// A value between 0.0 and 1.0, where higher values make the detection more sensitive. + /// + public double Threshold { get; init; } = 0.5; +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/VoiceActivityDetection.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/VoiceActivityDetection.cs new file mode 100644 index 00000000000..aa6c58c00f7 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/VoiceActivityDetection.cs @@ -0,0 +1,24 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents options for configuring voice activity detection in a real-time session. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class VoiceActivityDetection +{ + /// + /// Gets a value indicating whether to create a response when voice activity is detected. + /// + public bool CreateResponse { get; init; } + + /// + /// Gets a value indicating whether to interrupt the response when voice activity is detected. + /// + public bool InterruptResponse { get; init; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextClientMetadata.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextClientMetadata.cs index 24021577803..e8fc8517ab1 100644 --- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextClientMetadata.cs +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextClientMetadata.cs @@ -38,7 +38,7 @@ public SpeechToTextClientMetadata(string? providerName = null, Uri? providerUri /// Gets the ID of the default model used by this speech to text client. /// /// This value can be null if either the name is unknown or there are multiple possible models associated with this instance. - /// An individual request may override this value via . + /// An individual request may override this value via . /// public string? DefaultModelId { get; } } diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextOptions.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextOptions.cs index 856442fbad3..aacd4259db4 100644 --- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextOptions.cs +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextOptions.cs @@ -26,9 +26,8 @@ protected SpeechToTextOptions(SpeechToTextOptions? other) } AdditionalProperties = other.AdditionalProperties?.Clone(); - ModelId = other.ModelId; + Transcription = other.Transcription; RawRepresentationFactory = other.RawRepresentationFactory; - SpeechLanguage = other.SpeechLanguage; SpeechSampleRate = other.SpeechSampleRate; TextLanguage = other.TextLanguage; } @@ -36,11 +35,8 @@ protected SpeechToTextOptions(SpeechToTextOptions? other) /// Gets or sets any additional properties associated with the options. public AdditionalPropertiesDictionary? AdditionalProperties { get; set; } - /// Gets or sets the model ID for the speech to text. - public string? ModelId { get; set; } - - /// Gets or sets the language of source speech. - public string? SpeechLanguage { get; set; } + /// Gets or sets the transcription options for the speech to text request. + public TranscriptionOptions? Transcription { get; set; } /// Gets or sets the sample rate of the speech input audio. public int? SpeechSampleRate { get; set; } diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/TranscriptionOptions.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/TranscriptionOptions.cs new file mode 100644 index 00000000000..876c71c2aca --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/TranscriptionOptions.cs @@ -0,0 +1,40 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents options for configuring transcription. +/// +[Experimental(DiagnosticIds.Experiments.AISpeechToText, UrlFormat = DiagnosticIds.UrlFormat)] +public class TranscriptionOptions +{ + /// + /// Initializes a new instance of the class. + /// + public TranscriptionOptions() + { + } + + /// + /// Gets or sets the language of the input speech audio. + /// + /// + /// The language should be specified in ISO-639-1 format (e.g. "en"). + /// Supplying the input speech language improves transcription accuracy and latency. + /// + public string? SpeechLanguage { get; set; } + + /// + /// Gets or sets the model ID to use for transcription. + /// + public string? ModelId { get; set; } + + /// + /// Gets or sets an optional prompt to guide the transcription. + /// + public string? Prompt { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/UsageDetails.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/UsageDetails.cs index b3edbad5e99..4af1aa83b6a 100644 --- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/UsageDetails.cs +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/UsageDetails.cs @@ -1,9 +1,12 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System; using System.Collections.Generic; using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Text.Json.Serialization; +using Microsoft.Shared.DiagnosticIds; using Microsoft.Shared.Diagnostics; namespace Microsoft.Extensions.AI; @@ -38,6 +41,38 @@ public class UsageDetails /// public long? ReasoningTokenCount { get; set; } + /// Gets or sets the number of audio input tokens used. + /// + /// Audio input tokens should be counted as part of . + /// + [Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] + [JsonIgnore] + public long? InputAudioTokenCount { get; set; } + + /// Gets or sets the number of text input tokens used. + /// + /// Text input tokens should be counted as part of . + /// + [Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] + [JsonIgnore] + public long? InputTextTokenCount { get; set; } + + /// Gets or sets the number of audio output tokens used. + /// + /// Audio output tokens should be counted as part of . + /// + [Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] + [JsonIgnore] + public long? OutputAudioTokenCount { get; set; } + + /// Gets or sets the number of text output tokens used. + /// + /// Text output tokens should be counted as part of . + /// + [Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] + [JsonIgnore] + public long? OutputTextTokenCount { get; set; } + /// Gets or sets a dictionary of additional usage counts. /// /// All values set here are assumed to be summable. For example, when middleware makes multiple calls to an underlying @@ -57,6 +92,10 @@ public void Add(UsageDetails usage) TotalTokenCount = NullableSum(TotalTokenCount, usage.TotalTokenCount); CachedInputTokenCount = NullableSum(CachedInputTokenCount, usage.CachedInputTokenCount); ReasoningTokenCount = NullableSum(ReasoningTokenCount, usage.ReasoningTokenCount); + InputAudioTokenCount = NullableSum(InputAudioTokenCount, usage.InputAudioTokenCount); + InputTextTokenCount = NullableSum(InputTextTokenCount, usage.InputTextTokenCount); + OutputAudioTokenCount = NullableSum(OutputAudioTokenCount, usage.OutputAudioTokenCount); + OutputTextTokenCount = NullableSum(OutputTextTokenCount, usage.OutputTextTokenCount); if (usage.AdditionalCounts is { } countsToAdd) { @@ -109,6 +148,25 @@ internal string DebuggerDisplay parts.Add($"{nameof(ReasoningTokenCount)} = {reasoning}"); } + if (InputAudioTokenCount is { } inputAudio) + { + parts.Add($"{nameof(InputAudioTokenCount)} = {inputAudio}"); + } + + if (InputTextTokenCount is { } inputText) + { + parts.Add($"{nameof(InputTextTokenCount)} = {inputText}"); + } + + if (OutputAudioTokenCount is { } outputAudio) + { + parts.Add($"{nameof(OutputAudioTokenCount)} = {outputAudio}"); + } + + if (OutputTextTokenCount is { } outputText) + { + parts.Add($"{nameof(OutputTextTokenCount)} = {outputText}"); + } if (AdditionalCounts is { } additionalCounts) { foreach (var entry in additionalCounts) diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/Microsoft.Extensions.AI.Evaluation.Reporting.csproj b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/Microsoft.Extensions.AI.Evaluation.Reporting.csproj index 8ee31bc2b1a..8a960fc4df1 100644 --- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/Microsoft.Extensions.AI.Evaluation.Reporting.csproj +++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/Microsoft.Extensions.AI.Evaluation.Reporting.csproj @@ -1,6 +1,6 @@  -