diff --git a/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions.g.cs b/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions.g.cs
index 8254402..eeb0143 100644
--- a/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions.g.cs
+++ b/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions.g.cs
@@ -306,6 +306,9 @@ partial void ProcessOpenaiChatCompletionsResponseContent(
         /// <param name="reasoningEffort">
         /// Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.
         /// </param>
+        /// <param name="promptCacheKey">
+        /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+        /// </param>
         /// <param name="cancellationToken">The token to cancel the operation with</param>
         /// <exception cref="global::System.InvalidOperationException"></exception>
         public async global::System.Threading.Tasks.Task<string> OpenaiChatCompletionsAsync(
@@ -333,6 +336,7 @@ partial void ProcessOpenaiChatCompletionsResponseContent(
             bool? logprobs = default,
             global::DeepInfra.StreamOptions? streamOptions = default,
             global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort = default,
+            string? promptCacheKey = default,
             global::System.Threading.CancellationToken cancellationToken = default)
         {
             var __request = new global::DeepInfra.OpenAIChatCompletionsIn
@@ -358,6 +362,7 @@ partial void ProcessOpenaiChatCompletionsResponseContent(
                 Logprobs = logprobs,
                 StreamOptions = streamOptions,
                 ReasoningEffort = reasoningEffort,
+                PromptCacheKey = promptCacheKey,
             };
 
             return await OpenaiChatCompletionsAsync(
diff --git a/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions2.g.cs b/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions2.g.cs
index dfe7a1b..3b645a2 100644
--- a/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions2.g.cs
+++ b/src/libs/DeepInfra/Generated/DeepInfra.DeepInfraClient.OpenaiChatCompletions2.g.cs
@@ -306,6 +306,9 @@ partial void ProcessOpenaiChatCompletions2ResponseContent(
         /// <param name="reasoningEffort">
         /// Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.
         /// </param>
+        /// <param name="promptCacheKey">
+        /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+        /// </param>
         /// <param name="cancellationToken">The token to cancel the operation with</param>
         /// <exception cref="global::System.InvalidOperationException"></exception>
         public async global::System.Threading.Tasks.Task<string> OpenaiChatCompletions2Async(
@@ -333,6 +336,7 @@ partial void ProcessOpenaiChatCompletions2ResponseContent(
             bool? logprobs = default,
             global::DeepInfra.StreamOptions? streamOptions = default,
             global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort = default,
+            string? promptCacheKey = default,
             global::System.Threading.CancellationToken cancellationToken = default)
         {
             var __request = new global::DeepInfra.OpenAIChatCompletionsIn
@@ -358,6 +362,7 @@ partial void ProcessOpenaiChatCompletions2ResponseContent(
                 Logprobs = logprobs,
                 StreamOptions = streamOptions,
                 ReasoningEffort = reasoningEffort,
+                PromptCacheKey = promptCacheKey,
             };
 
             return await OpenaiChatCompletions2Async(
diff --git a/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions.g.cs b/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions.g.cs
index 01b7fa5..9dcacaf 100644
--- a/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions.g.cs
+++ b/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions.g.cs
@@ -96,6 +96,9 @@ public partial interface IDeepInfraClient
         /// <param name="reasoningEffort">
         /// Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.
         /// </param>
+        /// <param name="promptCacheKey">
+        /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+        /// </param>
         /// <param name="cancellationToken">The token to cancel the operation with</param>
         /// <exception cref="global::System.InvalidOperationException"></exception>
         global::System.Threading.Tasks.Task<string> OpenaiChatCompletionsAsync(
@@ -123,6 +126,7 @@ public partial interface IDeepInfraClient
             bool? logprobs = default,
             global::DeepInfra.StreamOptions? streamOptions = default,
             global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort = default,
+            string? promptCacheKey = default,
             global::System.Threading.CancellationToken cancellationToken = default);
     }
 }
\ No newline at end of file
diff --git a/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions2.g.cs b/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions2.g.cs
index 4367aab..1d8b9dd 100644
--- a/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions2.g.cs
+++ b/src/libs/DeepInfra/Generated/DeepInfra.IDeepInfraClient.OpenaiChatCompletions2.g.cs
@@ -96,6 +96,9 @@ public partial interface IDeepInfraClient
         /// <param name="reasoningEffort">
         /// Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.
         /// </param>
+        /// <param name="promptCacheKey">
+        /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+        /// </param>
         /// <param name="cancellationToken">The token to cancel the operation with</param>
         /// <exception cref="global::System.InvalidOperationException"></exception>
         global::System.Threading.Tasks.Task<string> OpenaiChatCompletions2Async(
@@ -123,6 +126,7 @@ public partial interface IDeepInfraClient
             bool? logprobs = default,
             global::DeepInfra.StreamOptions? streamOptions = default,
             global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort = default,
+            string? promptCacheKey = default,
             global::System.Threading.CancellationToken cancellationToken = default);
     }
 }
\ No newline at end of file
diff --git a/src/libs/DeepInfra/Generated/DeepInfra.Models.OpenAIChatCompletionsIn.g.cs b/src/libs/DeepInfra/Generated/DeepInfra.Models.OpenAIChatCompletionsIn.g.cs
index cf9c7f3..35d850d 100644
--- a/src/libs/DeepInfra/Generated/DeepInfra.Models.OpenAIChatCompletionsIn.g.cs
+++ b/src/libs/DeepInfra/Generated/DeepInfra.Models.OpenAIChatCompletionsIn.g.cs
@@ -153,6 +153,12 @@ public sealed partial class OpenAIChatCompletionsIn
         [global::System.Text.Json.Serialization.JsonConverter(typeof(global::DeepInfra.JsonConverters.OpenAIChatCompletionsInReasoningEffortJsonConverter))]
         public global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? ReasoningEffort { get; set; }
 
+        /// <summary>
+        /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+        /// </summary>
+        [global::System.Text.Json.Serialization.JsonPropertyName("prompt_cache_key")]
+        public string? PromptCacheKey { get; set; }
+
         /// <summary>
         /// Additional properties that are not explicitly defined in the schema
         /// </summary>
@@ -232,6 +238,9 @@ public sealed partial class OpenAIChatCompletionsIn
         /// <param name="reasoningEffort">
         /// Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.
         /// </param>
+        /// <param name="promptCacheKey">
+        /// A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.
+        /// </param>
 #if NET7_0_OR_GREATER
         [global::System.Diagnostics.CodeAnalysis.SetsRequiredMembers]
 #endif
@@ -256,7 +265,8 @@ public OpenAIChatCompletionsIn(
             int? seed,
             bool? logprobs,
             global::DeepInfra.StreamOptions? streamOptions,
-            global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort)
+            global::DeepInfra.OpenAIChatCompletionsInReasoningEffort? reasoningEffort,
+            string? promptCacheKey)
         {
             this.Model = model ?? throw new global::System.ArgumentNullException(nameof(model));
             this.Messages = messages ?? throw new global::System.ArgumentNullException(nameof(messages));
@@ -279,6 +289,7 @@ public OpenAIChatCompletionsIn(
             this.Logprobs = logprobs;
             this.StreamOptions = streamOptions;
             this.ReasoningEffort = reasoningEffort;
+            this.PromptCacheKey = promptCacheKey;
         }
 
         /// <summary>
diff --git a/src/libs/DeepInfra/openapi.yaml b/src/libs/DeepInfra/openapi.yaml
index b3a9daa..f3effee 100644
--- a/src/libs/DeepInfra/openapi.yaml
+++ b/src/libs/DeepInfra/openapi.yaml
@@ -7287,6 +7287,11 @@ components:
           type: string
           description: 'Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Setting to none disables reasoning entirely if the model supports.'
           nullable: true
+        prompt_cache_key:
+          title: Prompt Cache Key
+          type: string
+          description: 'A key to identify prompt cache for reuse across requests. If provided, the prompt will be cached and can be reused in subsequent requests with the same key.'
+          nullable: true
     OpenAICompletionsIn:
       title: OpenAICompletionsIn
       required: