diff --git a/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions.g.cs b/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions.g.cs
index 8254402..eeb0143 100644
--- a/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions.g.cs
+++ b/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions.g.cs
@@ -306,6 +306,9 @@ partial void ProcessOpenaiChatCompletionsResponseContent(
///
/// Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.
///
+ ///
+ /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+ ///
/// The token to cancel the operation with
///
public async global::System.Threading.Tasks.Task OpenaiChatCompletionsAsync(
@@ -333,6 +336,7 @@ partial void ProcessOpenaiChatCompletionsResponseContent(
bool? logprobs = default,
global::DeepInfra.StreamOptions? streamOptions = default,
global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort = default,
+ string? promptCacheKey = default,
global::System.Threading.CancellationToken cancellationToken = default)
{
var __request = new global::DeepInfra.OpenAIChatCompletionsIn
@@ -358,6 +362,7 @@ partial void ProcessOpenaiChatCompletionsResponseContent(
Logprobs = logprobs,
StreamOptions = streamOptions,
ReasoningEffort = reasoningEffort,
+ PromptCacheKey = promptCacheKey,
};
return await OpenaiChatCompletionsAsync(
diff --git a/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions2.g.cs b/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions2.g.cs
index dfe7a1b..3b645a2 100644
--- a/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions2.g.cs
+++ b/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions2.g.cs
@@ -306,6 +306,9 @@ partial void ProcessOpenaiChatCompletions2ResponseContent(
///
/// Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.
///
+ ///
+ /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+ ///
/// The token to cancel the operation with
///
public async global::System.Threading.Tasks.Task OpenaiChatCompletions2Async(
@@ -333,6 +336,7 @@ partial void ProcessOpenaiChatCompletions2ResponseContent(
bool? logprobs = default,
global::DeepInfra.StreamOptions? streamOptions = default,
global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort = default,
+ string? promptCacheKey = default,
global::System.Threading.CancellationToken cancellationToken = default)
{
var __request = new global::DeepInfra.OpenAIChatCompletionsIn
@@ -358,6 +362,7 @@ partial void ProcessOpenaiChatCompletions2ResponseContent(
Logprobs = logprobs,
StreamOptions = streamOptions,
ReasoningEffort = reasoningEffort,
+ PromptCacheKey = promptCacheKey,
};
return await OpenaiChatCompletions2Async(
diff --git a/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions.g.cs b/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions.g.cs
index 01b7fa5..9dcacaf 100644
--- a/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions.g.cs
+++ b/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions.g.cs
@@ -96,6 +96,9 @@ public partial interface IDeepInfraClient
///
/// Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.
///
+ ///
+ /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+ ///
/// The token to cancel the operation with
///
global::System.Threading.Tasks.Task OpenaiChatCompletionsAsync(
@@ -123,6 +126,7 @@ public partial interface IDeepInfraClient
bool? logprobs = default,
global::DeepInfra.StreamOptions? streamOptions = default,
global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort = default,
+ string? promptCacheKey = default,
global::System.Threading.CancellationToken cancellationToken = default);
}
}
\ No newline at end of file
diff --git a/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions2.g.cs b/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions2.g.cs
index 4367aab..1d8b9dd 100644
--- a/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions2.g.cs
+++ b/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions2.g.cs
@@ -96,6 +96,9 @@ public partial interface IDeepInfraClient
///
/// Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.
///
+ ///
+ /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+ ///
/// The token to cancel the operation with
///
global::System.Threading.Tasks.Task OpenaiChatCompletions2Async(
@@ -123,6 +126,7 @@ public partial interface IDeepInfraClient
bool? logprobs = default,
global::DeepInfra.StreamOptions? streamOptions = default,
global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort = default,
+ string? promptCacheKey = default,
global::System.Threading.CancellationToken cancellationToken = default);
}
}
\ No newline at end of file
diff --git a/src/libs/DeepInfra/Generated/DeepInfra.Models.OpenAIChatCompletionsIn.g.cs b/src/libs/DeepInfra/Generated/DeepInfra.Models.OpenAIChatCompletionsIn.g.cs
index cf9c7f3..35d850d 100644
--- a/src/libs/DeepInfra/Generated/DeepInfra.Models.OpenAIChatCompletionsIn.g.cs
+++ b/src/libs/DeepInfra/Generated/DeepInfra.Models.OpenAIChatCompletionsIn.g.cs
@@ -153,6 +153,12 @@ public sealed partial class OpenAIChatCompletionsIn
[global::System.Text.Json.Serialization.JsonConverter(typeof(global::DeepInfra.JsonConverters.OpenAIChatCompletionsInReasoningEffortJsonConverter))]
public global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? ReasoningEffort { get; set; }
+ ///
+ /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+ ///
+ [global::System.Text.Json.Serialization.JsonPropertyName("prompt_cache_key")]
+ public string? PromptCacheKey { get; set; }
+
///
/// Additional properties that are not explicitly defined in the schema
///
@@ -232,6 +238,9 @@ public sealed partial class OpenAIChatCompletionsIn
///
/// Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.
///
+ ///
+ /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+ ///
#if NET7_0_OR_GREATER
[global::System.Diagnostics.CodeAnalysis.SetsRequiredMembers]
#endif
@@ -256,7 +265,8 @@ public OpenAIChatCompletionsIn(
int? seed,
bool? logprobs,
global::DeepInfra.StreamOptions? streamOptions,
- global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort)
+ global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort,
+ string? promptCacheKey)
{
this.Model = model ?? throw new global::System.ArgumentNullException(nameof(model));
this.Messages = messages ?? throw new global::System.ArgumentNullException(nameof(messages));
@@ -279,6 +289,7 @@ public OpenAIChatCompletionsIn(
this.Logprobs = logprobs;
this.StreamOptions = streamOptions;
this.ReasoningEffort = reasoningEffort;
+ this.PromptCacheKey = promptCacheKey;
}
///
diff --git a/src/libs/DeepInfra/openapi.yaml b/src/libs/DeepInfra/openapi.yaml
index b3a9daa..f3effee 100644
--- a/src/libs/DeepInfra/openapi.yaml
+++ b/src/libs/DeepInfra/openapi.yaml
@@ -7287,6 +7287,11 @@ components:
type: string
description: 'Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.'
nullable: true
+ prompt_cache_key:
+ title: Prompt Cache Key
+ type: string
+ description: 'A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.'
+ nullable: true
OpenAICompletionsIn:
title: OpenAICompletionsIn
required: