Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion examples/openai-dialog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ struct Cli {
protocol: Option<CliProtocol>,
#[arg(long)]
endpoint: Option<String>,
#[arg(long)]
model: Option<String>,
#[arg(long, value_parser = realtime_voice_value_parser())]
voice: Option<types::RealtimeVoice>,
}
Expand Down Expand Up @@ -114,7 +116,11 @@ async fn main() -> Result<()> {
stream.play().expect("Failed to play stream");

let key = env::var("OPENAI_API_KEY").unwrap();
let model = env::var("OPENAI_REALTIME_API_MODEL").unwrap();
let model = cli
.model
.or_else(|| env::var("OPENAI_REALTIME_API_MODEL").ok())
.filter(|model| !model.trim().is_empty())
.context("Provide --model or set OPENAI_REALTIME_API_MODEL")?;

let openai = OpenAIDialog;
let mut params = openai_dialog::Params::new(key, model);
Expand Down
2 changes: 1 addition & 1 deletion external/openai-api-rs
3 changes: 3 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ transcribe-google-latest-long:
transcribe-google-diarization:
cargo run --example transcribe -- google --diarization --language de-DE --model chirp_3 --region eu

openai-dialog-realtime-2:
cargo run --example openai-dialog -- --protocol openai --model gpt-realtime-2

77 changes: 23 additions & 54 deletions services/openai-dialog/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,14 +250,13 @@ impl Host {
.await
.map_err(|e| anyhow!(e.to_string()))?;

Ok(Client::new(read, write, self.client.protocol))
Ok(Client::new(read, write))
}
}

pub struct Client {
read: SplitStream<WebSocketStream<MaybeTlsStream<TcpStream>>>,
write: SplitSink<WebSocketStream<MaybeTlsStream<TcpStream>>, Message>,
protocol: RealtimeProtocol,

response_state: ResponseState,
inflight_prompt: Option<(String, PromptRequest)>,
Expand All @@ -275,32 +274,16 @@ impl Client {
fn new(
read: SplitStream<WebSocketStream<MaybeTlsStream<TcpStream>>>,
write: SplitSink<WebSocketStream<MaybeTlsStream<TcpStream>>, Message>,
protocol: RealtimeProtocol,
) -> Self {
Self {
read,
write,
protocol,
response_state: ResponseState::Idle,
inflight_prompt: None,
pending_prompts: Default::default(),
}
}

fn session_update_payload(
&self,
session: types::RealtimeSession,
) -> client_event::SessionUpdatePayload {
match self.protocol {
RealtimeProtocol::OpenAI => client_event::SessionUpdatePayload::Untagged(
types::UntaggedSession::Realtime(session),
),
RealtimeProtocol::Azure => {
client_event::SessionUpdatePayload::Tagged(types::Session::Realtime(session))
}
}
}

/// Run an audio dialog.
pub async fn dialog(
&mut self,
Expand Down Expand Up @@ -345,21 +328,14 @@ impl Client {
};

if let Some(voice) = params.voice {
match self.protocol {
RealtimeProtocol::OpenAI => {
session.voice = Some(voice);
}
RealtimeProtocol::Azure => {
session.audio = Some(types::AudioConfig {
input: None,
output: Some(types::AudioOutput {
format: None,
speed: 1.0,
voice: Some(voice),
}),
});
}
}
session.audio = Some(types::AudioConfig {
input: None,
output: Some(types::AudioOutput {
format: None,
speed: 1.0,
voice: Some(voice),
}),
});
send_update = true;
}

Expand All @@ -374,11 +350,11 @@ impl Client {
}

if send_update {
let payload = self.session_update_payload(session);

self.send_client_event(ClientEvent::SessionUpdate(client_event::SessionUpdate {
event_id: None,
session: payload,
session: client_event::SessionUpdatePayload::Tagged(types::Session::Realtime(
session,
)),
}))
.await?;
debug!("Session updated");
Expand Down Expand Up @@ -551,34 +527,27 @@ impl Client {
tools,
tool_choice,
} => {
let (voice, audio) = match self.protocol {
RealtimeProtocol::OpenAI => (voice, None),
RealtimeProtocol::Azure => {
let audio = voice.map(|voice| types::AudioConfig {
input: None,
output: Some(types::AudioOutput {
format: None,
speed: 1.0,
voice: Some(voice),
}),
});
(None, audio)
}
};
let audio = voice.map(|voice| types::AudioConfig {
input: None,
output: Some(types::AudioOutput {
format: None,
speed: 1.0,
voice: Some(voice),
}),
});

let session = types::RealtimeSession {
instructions,
voice,
audio,
tools,
tool_choice,
..Default::default()
};

let payload = self.session_update_payload(session);

let event = ClientEvent::SessionUpdate(client_event::SessionUpdate {
session: payload,
session: client_event::SessionUpdatePayload::Tagged(
types::Session::Realtime(session),
),
..Default::default()
});
self.send_client_event(event).await?;
Expand Down
Loading