diff --git a/google/genai/types.py b/google/genai/types.py index 609134c58..6748f52b7 100644 --- a/google/genai/types.py +++ b/google/genai/types.py @@ -4901,32 +4901,85 @@ class ToolConfigDict(TypedDict, total=False): ToolConfigOrDict = Union[ToolConfig, ToolConfigDict] +class VoiceConsentSignature(_common.BaseModel): + """The signature of the voice consent check.""" + + signature: Optional[str] = Field( + default=None, + description="""The signature string. + """, + ) + + +class VoiceConsentSignatureDict(TypedDict, total=False): + """The signature of the voice consent check.""" + + signature: Optional[str] + """The signature string. + """ + + +VoiceConsentSignatureOrDict = Union[ + VoiceConsentSignature, VoiceConsentSignatureDict +] + + class ReplicatedVoiceConfig(_common.BaseModel): - """ReplicatedVoiceConfig is used to configure replicated voice.""" + """The configuration for the replicated voice to use.""" mime_type: Optional[str] = Field( default=None, - description="""The mime type of the replicated voice. + description="""The mimetype of the voice sample. The only currently supported + value is `audio/wav`. This represents 16-bit signed little-endian wav + data, with a 24kHz sampling rate. """, ) voice_sample_audio: Optional[bytes] = Field( default=None, - description="""The sample audio of the replicated voice. + description="""The sample of the custom voice. """, ) + consent_audio: Optional[bytes] = Field( + default=None, + description="""Recorded consent verifying ownership of the voice. This + represents 16-bit signed little-endian wav data, with a 24kHz sampling + rate.""", + ) + voice_consent_signature: Optional[VoiceConsentSignature] = Field( + default=None, + description="""Signature of a previously verified consent audio. This should be + populated with a signature generated by the server for a previous + request containing the consent_audio field. When provided, the + signature is verified instead of the consent_audio field to reduce + latency. Requests will fail if the signature is invalid or expired.""", + ) class ReplicatedVoiceConfigDict(TypedDict, total=False): - """ReplicatedVoiceConfig is used to configure replicated voice.""" + """The configuration for the replicated voice to use.""" mime_type: Optional[str] - """The mime type of the replicated voice. + """The mimetype of the voice sample. The only currently supported + value is `audio/wav`. This represents 16-bit signed little-endian wav + data, with a 24kHz sampling rate. """ voice_sample_audio: Optional[bytes] - """The sample audio of the replicated voice. + """The sample of the custom voice. """ + consent_audio: Optional[bytes] + """Recorded consent verifying ownership of the voice. This + represents 16-bit signed little-endian wav data, with a 24kHz sampling + rate.""" + + voice_consent_signature: Optional[VoiceConsentSignatureDict] + """Signature of a previously verified consent audio. This should be + populated with a signature generated by the server for a previous + request containing the consent_audio field. When provided, the + signature is verified instead of the consent_audio field to reduce + latency. Requests will fail if the signature is invalid or expired.""" + ReplicatedVoiceConfigOrDict = Union[ ReplicatedVoiceConfig, ReplicatedVoiceConfigDict @@ -4952,10 +5005,13 @@ class PrebuiltVoiceConfigDict(TypedDict, total=False): class VoiceConfig(_common.BaseModel): + """The configuration for the voice to use.""" replicated_voice_config: Optional[ReplicatedVoiceConfig] = Field( default=None, - description="""If true, the model will use a replicated voice for the response.""", + description="""The configuration for a replicated voice, which is a clone of a + user's voice that can be used for speech synthesis. If this is unset, a + default voice is used.""", ) prebuilt_voice_config: Optional[PrebuiltVoiceConfig] = Field( default=None, description="""The configuration for a prebuilt voice.""" @@ -4963,9 +5019,12 @@ class VoiceConfig(_common.BaseModel): class VoiceConfigDict(TypedDict, total=False): + """The configuration for the voice to use.""" replicated_voice_config: Optional[ReplicatedVoiceConfigDict] - """If true, the model will use a replicated voice for the response.""" + """The configuration for a replicated voice, which is a clone of a + user's voice that can be used for speech synthesis. If this is unset, a + default voice is used.""" prebuilt_voice_config: Optional[PrebuiltVoiceConfigDict] """The configuration for a prebuilt voice.""" @@ -5022,10 +5081,11 @@ class MultiSpeakerVoiceConfigDict(TypedDict, total=False): class SpeechConfig(_common.BaseModel): + """Config for speech generation and transcription.""" voice_config: Optional[VoiceConfig] = Field( default=None, - description="""Configuration for the voice of the response.""", + description="""The configuration in case of single-voice output.""", ) language_code: Optional[str] = Field( default=None, @@ -5038,9 +5098,10 @@ class SpeechConfig(_common.BaseModel): class SpeechConfigDict(TypedDict, total=False): + """Config for speech generation and transcription.""" voice_config: Optional[VoiceConfigDict] - """Configuration for the voice of the response.""" + """The configuration in case of single-voice output.""" language_code: Optional[str] """Optional. The language code (ISO 639-1) for the speech synthesis."""