tryAGI · HavenDV · Sep 8, 2025 · Sep 8, 2025 · coderabbitai · Sep 8, 2025
diff --git a/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions.g.cs b/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions.g.cs
@@ -306,6 +306,9 @@ partial void ProcessOpenaiChatCompletionsResponseContent(
         /// <param name="reasoningEffort">
         /// Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.
         /// </param>
+        /// <param name="promptCacheKey">
+        /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+        /// </param>
         /// <param name="cancellationToken">The token to cancel the operation with</param>
         /// <exception cref="global::System.InvalidOperationException"></exception>
         public async global::System.Threading.Tasks.Task<string> OpenaiChatCompletionsAsync(
@@ -333,6 +336,7 @@ partial void ProcessOpenaiChatCompletionsResponseContent(
             bool? logprobs = default,
             global::DeepInfra.StreamOptions? streamOptions = default,
             global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort = default,
+            string? promptCacheKey = default,
             global::System.Threading.CancellationToken cancellationToken = default)
         {
             var __request = new global::DeepInfra.OpenAIChatCompletionsIn
@@ -358,6 +362,7 @@ partial void ProcessOpenaiChatCompletionsResponseContent(
                 Logprobs = logprobs,
                 StreamOptions = streamOptions,
                 ReasoningEffort = reasoningEffort,
+                PromptCacheKey = promptCacheKey,
             };
 
             return await OpenaiChatCompletionsAsync(

diff --git a/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions2.g.cs b/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions2.g.cs
@@ -306,6 +306,9 @@ partial void ProcessOpenaiChatCompletions2ResponseContent(
         /// <param name="reasoningEffort">
         /// Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.
         /// </param>
+        /// <param name="promptCacheKey">
+        /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+        /// </param>
         /// <param name="cancellationToken">The token to cancel the operation with</param>
         /// <exception cref="global::System.InvalidOperationException"></exception>
         public async global::System.Threading.Tasks.Task<string> OpenaiChatCompletions2Async(
@@ -333,6 +336,7 @@ partial void ProcessOpenaiChatCompletions2ResponseContent(
             bool? logprobs = default,
             global::DeepInfra.StreamOptions? streamOptions = default,
             global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort = default,
+            string? promptCacheKey = default,
             global::System.Threading.CancellationToken cancellationToken = default)
         {
             var __request = new global::DeepInfra.OpenAIChatCompletionsIn
@@ -358,6 +362,7 @@ partial void ProcessOpenaiChatCompletions2ResponseContent(
                 Logprobs = logprobs,
                 StreamOptions = streamOptions,
                 ReasoningEffort = reasoningEffort,
+                PromptCacheKey = promptCacheKey,
             };
 
             return await OpenaiChatCompletions2Async(

diff --git a/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions.g.cs b/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions.g.cs
@@ -96,6 +96,9 @@ public partial interface IDeepInfraClient
         /// <param name="reasoningEffort">
         /// Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.
         /// </param>
+        /// <param name="promptCacheKey">
+        /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+        /// </param>
         /// <param name="cancellationToken">The token to cancel the operation with</param>
         /// <exception cref="global::System.InvalidOperationException"></exception>
         global::System.Threading.Tasks.Task<string> OpenaiChatCompletionsAsync(
@@ -123,6 +126,7 @@ public partial interface IDeepInfraClient
             bool? logprobs = default,
             global::DeepInfra.StreamOptions? streamOptions = default,
             global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort = default,
+            string? promptCacheKey = default,
             global::System.Threading.CancellationToken cancellationToken = default);
     }
 }
diff --git a/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions2.g.cs b/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions2.g.cs
@@ -96,6 +96,9 @@ public partial interface IDeepInfraClient
         /// <param name="reasoningEffort">
         /// Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.
         /// </param>
+        /// <param name="promptCacheKey">
+        /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+        /// </param>
         /// <param name="cancellationToken">The token to cancel the operation with</param>
         /// <exception cref="global::System.InvalidOperationException"></exception>
         global::System.Threading.Tasks.Task<string> OpenaiChatCompletions2Async(
@@ -123,6 +126,7 @@ public partial interface IDeepInfraClient
             bool? logprobs = default,
             global::DeepInfra.StreamOptions? streamOptions = default,
             global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort = default,
+            string? promptCacheKey = default,
             global::System.Threading.CancellationToken cancellationToken = default);
     }
 }
diff --git a/src/libs/DeepInfra/Generated/DeepInfra.Models.OpenAIChatCompletionsIn.g.cs b/src/libs/DeepInfra/Generated/DeepInfra.Models.OpenAIChatCompletionsIn.g.cs
@@ -153,6 +153,12 @@ public sealed partial class OpenAIChatCompletionsIn
         [global::System.Text.Json.Serialization.JsonConverter(typeof(global::DeepInfra.JsonConverters.OpenAIChatCompletionsInReasoningEffortJsonConverter))]
         public global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? ReasoningEffort { get; set; }
 
+        /// <summary>
+        /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+        /// </summary>
+        [global::System.Text.Json.Serialization.JsonPropertyName("prompt_cache_key")]
+        public string? PromptCacheKey { get; set; }
+
         /// <summary>
         /// Additional properties that are not explicitly defined in the schema
         /// </summary>
@@ -232,6 +238,9 @@ public sealed partial class OpenAIChatCompletionsIn
         /// <param name="reasoningEffort">
         /// Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.
         /// </param>
+        /// <param name="promptCacheKey">
+        /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+        /// </param>
 #if NET7_0_OR_GREATER
         [global::System.Diagnostics.CodeAnalysis.SetsRequiredMembers]
 #endif
@@ -256,7 +265,8 @@ public OpenAIChatCompletionsIn(
             int? seed,
             bool? logprobs,
             global::DeepInfra.StreamOptions? streamOptions,
-            global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort)
+            global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort,
+            string? promptCacheKey)
         {
             this.Model = model ?? throw new global::System.ArgumentNullException(nameof(model));
             this.Messages = messages ?? throw new global::System.ArgumentNullException(nameof(messages));
@@ -279,6 +289,7 @@ public OpenAIChatCompletionsIn(
             this.Logprobs = logprobs;
             this.StreamOptions = streamOptions;
             this.ReasoningEffort = reasoningEffort;
+            this.PromptCacheKey = promptCacheKey;
         }
 
         /// <summary>

diff --git a/src/libs/DeepInfra/openapi.yaml b/src/libs/DeepInfra/openapi.yaml
@@ -7287,6 +7287,11 @@ components:
           type: string
           description: 'Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.'
           nullable: true
+        prompt_cache_key:
+          title: Prompt Cache Key
+          type: string
+          description: 'A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.'
+          nullable: true
-        prompt_cache_key:
-          title: Prompt Cache Key
-          type: string
-          description: 'A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.'
-          nullable: true
+        prompt_cache_key:
+          title: Prompt Cache Key
+          type: string
+          description: 'A key to identify the prompt cache for reuse across requests. Scoped to the authenticated account/team. Case-sensitive.'
+          minLength: 1
+          maxLength: 256
+          pattern: '^[A-Za-z0-9._:-]+$'
+          nullable: true
-        prompt_cache_key:
-          title: Prompt Cache Key
-          type: string
-          description: 'A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.'
-          nullable: true
+        prompt_cache_key:
+          title: Prompt Cache Key
+          type: string
+          description: 'A key to identify the prompt cache for reuse across requests. Scoped to the authenticated account/team. Case-sensitive.'
+          minLength: 1
+          maxLength: 256
+          pattern: '^[A-Za-z0-9._:-]+$'
+          nullable: true
     OpenAICompletionsIn:
       title: OpenAICompletionsIn
       required: