From 41824f709c13955748ee08fdbb3dbef7b9c1feb2 Mon Sep 17 00:00:00 2001 From: marcus-daily <111281783+marcus-daily@users.noreply.github.com> Date: Thu, 8 May 2025 15:01:42 +0100 Subject: [PATCH 1/2] Add sendCustomMessage(), log in onGenericMessage, use gpt-4o-transcribe model --- gradle/libs.versions.toml | 2 +- .../VoiceClientManager.kt | 59 ++++++++++++++++--- 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index e5bbb49..d34f425 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -2,7 +2,7 @@ accompanistPermissions = "0.34.0" agp = "8.5.2" constraintlayoutCompose = "1.0.1" -pipecatClient = "0.3.3" +pipecatClient = "0.3.5" kotlin = "2.0.20" coreKtx = "1.13.1" lifecycleRuntimeKtx = "2.8.6" diff --git a/openai-realtime-webrtc-demo/src/main/java/ai/pipecat/openai_realtime_webrtc_demo/VoiceClientManager.kt b/openai-realtime-webrtc-demo/src/main/java/ai/pipecat/openai_realtime_webrtc_demo/VoiceClientManager.kt index fb766f1..fe5a637 100644 --- a/openai-realtime-webrtc-demo/src/main/java/ai/pipecat/openai_realtime_webrtc_demo/VoiceClientManager.kt +++ b/openai-realtime-webrtc-demo/src/main/java/ai/pipecat/openai_realtime_webrtc_demo/VoiceClientManager.kt @@ -4,11 +4,14 @@ import ai.pipecat.client.RTVIClient import ai.pipecat.client.RTVIClientOptions import ai.pipecat.client.RTVIClientParams import ai.pipecat.client.RTVIEventCallbacks +import ai.pipecat.client.helper.LLMFunctionCall +import ai.pipecat.client.helper.LLMHelper import ai.pipecat.client.openai_realtime_webrtc.OpenAIRealtimeSessionConfig import ai.pipecat.client.openai_realtime_webrtc.OpenAIRealtimeWebRTCTransport import ai.pipecat.client.result.Future import ai.pipecat.client.result.RTVIError import ai.pipecat.client.result.Result +import ai.pipecat.client.transport.MsgClientToServer import ai.pipecat.client.transport.MsgServerToClient import ai.pipecat.client.types.ActionDescription import ai.pipecat.client.types.Participant @@ -26,6 +29,7 @@ import androidx.compose.runtime.Stable import androidx.compose.runtime.mutableFloatStateOf import androidx.compose.runtime.mutableStateListOf import androidx.compose.runtime.mutableStateOf +import kotlinx.serialization.json.JsonElement @Immutable data class Error(val message: String) @@ -65,6 +69,8 @@ class VoiceClientManager(private val context: Context) { fun start() { + infix fun String.toStr(rhs: String) = this to Value.Str(rhs) + if (client.value != null) { return } @@ -91,14 +97,26 @@ class VoiceClientManager(private val context: Context) { ) ),*/ initialConfig = OpenAIRealtimeSessionConfig( - turnDetection = Value.Object( - "type" to Value.Str("semantic_vad") - ), - inputAudioNoiseReduction = Value.Object( - "type" to Value.Str("near_field") - ), - inputAudioTranscription = Value.Object( - "model" to Value.Str("whisper-1") + voice = "ballad", + turnDetection = Value.Object("type" to Value.Str("semantic_vad")), + inputAudioNoiseReduction = Value.Object("type" to Value.Str("near_field")), + inputAudioTranscription = Value.Object("model" to Value.Str("gpt-4o-transcribe")), + tools = Value.Array( + Value.Object( + "type" toStr "function", + "name" toStr "get_current_weather", + "description" toStr "Get the current weather for a given location", + "parameters" to Value.Object( + "type" toStr "object", + "properties" to Value.Object( + "location" to Value.Object( + "type" toStr "string", + "description" toStr "The city and country, eg. San Francisco, USA", + ) + ), + "required" to Value.Array(Value.Str("location")) + ) + ) ) ) ) @@ -196,10 +214,26 @@ class VoiceClientManager(private val context: Context) { override fun onRemoteAudioLevel(level: Float, participant: Participant) { botAudioLevel.floatValue = level } + + override fun onGenericMessage(msg: MsgServerToClient) { + Log.i(TAG, "onGenericMessage: $msg") + } } val client = RTVIClient(OpenAIRealtimeWebRTCTransport.Factory(context), callbacks, options) + val llmHelper = LLMHelper(object : LLMHelper.Callbacks() { + override fun onLLMFunctionCall( + func: LLMFunctionCall, + onResult: (Value) -> Unit + ) { + Log.i(TAG, "Function call from bot: $func") + onResult(Value.Str("27 degrees celsius, rainy")) + } + }) + + client.registerHelper("llm", llmHelper) + client.connect().displayErrors().withErrorCallback { callbacks.onDisconnected() } @@ -216,4 +250,13 @@ class VoiceClientManager(private val context: Context) { fun stop() { client.value?.disconnect()?.displayErrors() } + + fun sendCustomMessage(msg: JsonElement) = + client.value?.sendMessage( + MsgClientToServer( + type = "custom-request", + data = msg + ) + ) + } \ No newline at end of file From 6da748e89236e90f9c9da2aecb9b0688691f8a4b Mon Sep 17 00:00:00 2001 From: marcus-daily <111281783+marcus-daily@users.noreply.github.com> Date: Thu, 8 May 2025 15:08:21 +0100 Subject: [PATCH 2/2] Tidying: use toStr --- .../openai_realtime_webrtc_demo/VoiceClientManager.kt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openai-realtime-webrtc-demo/src/main/java/ai/pipecat/openai_realtime_webrtc_demo/VoiceClientManager.kt b/openai-realtime-webrtc-demo/src/main/java/ai/pipecat/openai_realtime_webrtc_demo/VoiceClientManager.kt index fe5a637..92eb98d 100644 --- a/openai-realtime-webrtc-demo/src/main/java/ai/pipecat/openai_realtime_webrtc_demo/VoiceClientManager.kt +++ b/openai-realtime-webrtc-demo/src/main/java/ai/pipecat/openai_realtime_webrtc_demo/VoiceClientManager.kt @@ -98,9 +98,9 @@ class VoiceClientManager(private val context: Context) { ),*/ initialConfig = OpenAIRealtimeSessionConfig( voice = "ballad", - turnDetection = Value.Object("type" to Value.Str("semantic_vad")), - inputAudioNoiseReduction = Value.Object("type" to Value.Str("near_field")), - inputAudioTranscription = Value.Object("model" to Value.Str("gpt-4o-transcribe")), + turnDetection = Value.Object("type" toStr "semantic_vad"), + inputAudioNoiseReduction = Value.Object("type" toStr "near_field"), + inputAudioTranscription = Value.Object("model" toStr "gpt-4o-transcribe"), tools = Value.Array( Value.Object( "type" toStr "function",