pragmatrix · pragmatrix · May 20, 2026 · May 19, 2026 · May 19, 2026 · May 19, 2026
diff --git a/.env.example b/.env.example
@@ -1,3 +1,7 @@
+# Gemini
+
+GEMINI_API_KEY=your_gemini_api_key
+
 # OpenAI Configuration
 OPENAI_API_KEY=your_openai_key
 OPENAI_REALTIME_API_MODEL=gpt-4o-mini-realtime-preview

diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
@@ -3,9 +3,13 @@
 ## Rust Style
 - Prefer imports over deeply-qualified module paths. As a rule of thumb, avoid using more than one module prefix inline (for example, prefer importing a type and using `TypeName` instead of writing `foo::bar::TypeName` repeatedly).
 - Prefer high-level flow first: when practical, place local supporting definitions (for example helper structs, impls, functions, and type aliases) below their first use.
+- In module files, keep definitions ordered top-down by call flow (entry points first, helpers after first use).
 - Keep imports grouped and sorted to match existing file style.
 - Avoid `maybe_` prefixes for optional variables; use neutral names and rely on type/context for optionality.
 - Avoid `_ref` suffixes for local variable names; use descriptive neutral names instead.
+- Prefer explicit imports over repeated relative module qualification.
+- Prefer private-by-default visibility; only widen visibility when required by a module boundary.
+- For trait-based APIs, prefer focused request/context types over passing broad configuration structs.
 
 ## Change Communication
 - Include a short rationale for each non-trivial code change.

diff --git a/.gitmodules b/.gitmodules
@@ -6,3 +6,8 @@
 	path = external/openai-api-rs
 	url = ../openai-api-rs.git
 	branch = "context-switch-v9.0.0"
+[submodule "external/gemini-live-rs"]
+	path = external/gemini-live-rs
+	url = ../gemini-live-rs.git
+	branch = context-switch
+
diff --git a/.harper-dictionary.txt b/.harper-dictionary.txt
@@ -2,6 +2,7 @@ AirPods
 BCP
 ContextSwitch
 FreeSWITCH
-Inband
+alsa
+inband
 seekable
 subtag
diff --git a/.vscode/settings.json b/.vscode/settings.json
diff --git a/Cargo.toml b/Cargo.toml
@@ -13,6 +13,7 @@ members = [
     "services/aristech",
     "services/azure",
     "services/elevenlabs",
+    "services/google-dialog",
     "services/google-transcribe", 
     "services/openai-dialog", 
     "services/playback",
@@ -21,6 +22,8 @@ members = [
 [workspace.package]
 version = "2.3.0"
 edition = "2024"
+license = "MIT"
+repository = "https://github.com/pragmatrix/context-switch"
 
 [dependencies]
 
@@ -29,6 +32,7 @@ edition = "2024"
 context-switch-core = { workspace = true }
 
 openai-dialog = { path = "services/openai-dialog" }
+google-dialog = { workspace = true }
 azure = { workspace = true }
 azure-speech = { workspace = true }
 aristech = { workspace = true }
@@ -73,10 +77,13 @@ rodio = { workspace = true, features = ["playback"] }
 azure = { workspace = true }
 aristech = { workspace = true }
 google-transcribe = { workspace = true }
+google-dialog = { workspace = true }
+gemini-live = { workspace = true }
 
 tokio = { workspace = true, features = ["rt-multi-thread"] }
+reqwest = { workspace = true }
 
-# For advanced params in openai-dialog
+# For advanced parameters in `openai-dialog`
 openai-api-rs = { workspace = true }
 serde_json = { workspace = true }
 chrono-tz = { version = "0.10.3" }
@@ -96,12 +103,28 @@ playback = { path = "services/playback" }
 aristech = { path = "services/aristech" }
 elevenlabs = { path = "services/elevenlabs" }
 google-transcribe = { path = "services/google-transcribe" }
+google-dialog = { path = "services/google-dialog" }
+gemini-live = { path = "external/gemini-live-rs/crates/gemini-live" }
+
+# Dependencies required by `external/gemini-live-rs/crates/gemini-live`.
+# The submodule crate inherits these via `workspace = true`, so we keep them
+# centralized here and grouped to make future sync/review straightforward.
+tokio-tungstenite = { version = "0.29", features = ["rustls-tls-webpki-roots"] }
+futures-util = "0.3.32"
+bytes = "1.11"
+thiserror = "2.0.18"
+rustls = { version = "0.23", features = ["ring"], default-features = false }
+google-cloud-auth = { version = "1.10.0", default-features = false }
 
 anyhow = "1.0.102"
 derive_more = { version = "2.1.1", features = ["full"] }
 static_assertions = "1.1.0"
 async-stream = { version = "0.3.6" }
-tokio = { version = "1.50.0", features = ["sync"] }
+# Tokio features are intentionally explicit:
+# - `sync`: channels/mutexes used throughout services
+# - `rt` + `macros`: runtime and `tokio::select!`/task macros used by `gemini-live`
+# - `time`: timeout/sleep used by `gemini-live` session/transport logic
+tokio = { version = "1.52.3", features = ["sync", "rt", "macros", "time"] }
 futures = "0.3.31"
 serde = { version = "1.0.215", features = ["derive"] }
 serde_json = "1.0.149"
@@ -111,7 +134,7 @@ async-trait = "0.1.83"
 tracing = "0.1.41"
 dotenvy = { version = "0.15.7" }
 url = { version = "2.5.8" }
-reqwest = { version = "0.13.2" }
+reqwest = { version = "0.13.3" }
 mime_guess2 = { version = "2.3.1" }
 hound = { version = "3.5.1" }
 chrono = { version = "0.4.44" }
@@ -121,13 +144,12 @@ chrono = { version = "0.4.44" }
 #
 
 azure-speech = { path = "external/azure-speech-sdk-rs" }
-# openai-api-rs = "5.2.3"
 openai-api-rs = { path = "external/openai-api-rs" }
-# - `symphonia-wav` is mandatory: The default WAV decoder does not seem to support A-Law and also
-#    panics with a few of our testcases.
+# - `symphonia-wav` is mandatory: The default WAV decoder does not seem to support A-Law and
+#    panics with a few of our test cases.
 # - No default features because we don't want to pull alsa on Linux by default for local playback.
 # - We have to define at least _one_ decoder, otherwise `cargo clippy --all-targets` fails, so we select `symphonia-mp3`.
 rodio = { version = "0.22.2", default-features = false, features = ["symphonia-mp3"] }
 
 rstest = { version = "0.26.1" }
-uuid = { version = "1.17.0", features = ["v4"] }
+uuid = { version = "1.23.1", features = ["v4"] }
diff --git a/audio-knife/src/main.rs b/audio-knife/src/main.rs
@@ -37,8 +37,8 @@ use tokio::{
 use tracing::{Instrument, Span, debug, error, info, info_span};
 
 use context_switch::{
-    AudioFormat, AudioFrame, ClientEvent, ContextSwitch, ConversationId, InputModality,
-    ServerEvent, audio, billing_collector::BillingCollector, conversation::BillingId,
+    AudioFormat, AudioFrame, BillingId, ClientEvent, ContextSwitch, ConversationId, InputModality,
+    ServerEvent, audio, billing_collector::BillingCollector,
 };
 use tracing_subscriber::{EnvFilter, fmt::format::FmtSpan};
 use uuid::Uuid;

diff --git a/core/src/billing_context.rs b/core/src/billing_context.rs
@@ -2,7 +2,9 @@ use std::sync::{Arc, Mutex};
 
 use anyhow::Result;
 
-use crate::{BillingRecord, billing_collector::BillingCollector, conversation::BillingId};
+use crate::BillingRecord;
+use crate::billing_collector::BillingCollector;
+use crate::conversation::BillingId;
 
 #[derive(Debug, Clone)]
 pub struct BillingContext {

diff --git a/core/src/conversation.rs b/core/src/conversation.rs
@@ -168,7 +168,7 @@ impl ConversationInput {
         self.input.recv().await
     }
 
-    /// Run a nested service conversation with one single input request and wait until its
+    /// Run a nested service conversation with one single input request and wait until it's
     /// completed.
     ///
     /// All output is sent to the conversation output.

diff --git a/core/src/lib.rs b/core/src/lib.rs
@@ -1,7 +1,7 @@
 pub mod audio;
 pub mod billing_collector;
 mod billing_context;
-pub mod conversation;
+mod conversation;
 mod duration;
 pub mod language;
 mod protocol;
@@ -15,6 +15,7 @@ use anyhow::{Context, Result, bail};
 use tokio::sync::mpsc::{self, UnboundedReceiver, UnboundedSender, unbounded_channel};
 
 pub use billing_context::BillingContext;
+pub use conversation::*;
 pub use duration::Duration;
 pub use protocol::*;
 pub use registry::*;

diff --git a/examples/aristech-synthesize.rs b/examples/aristech-synthesize.rs
@@ -1,23 +1,18 @@
-use std::{
-    env,
-    num::{NonZeroU16, NonZeroU32},
-    thread,
-    time::Duration,
-};
+use std::env;
+use std::num::{NonZeroU16, NonZeroU32};
+use std::thread;
+use std::time::Duration;
 
 use anyhow::{Context as AnyhowContext, Result};
+use aristech::synthesize::{AristechSynthesize, Params as AristechParams};
 use rodio::{DeviceSinkBuilder, Player, Source};
-use tokio::{
-    select,
-    sync::mpsc::{channel, unbounded_channel},
-};
+use tokio::select;
+use tokio::sync::mpsc::{channel, unbounded_channel};
 
-use aristech::synthesize::{AristechSynthesize, Params as AristechParams};
 use context_switch::{InputModality, OutputModality};
+use context_switch_core::service::Service;
 use context_switch_core::{
-    AudioFormat, AudioFrame, AudioProducer, audio,
-    conversation::{Conversation, Input, Output},
-    service::Service,
+    AudioFormat, AudioFrame, AudioProducer, Conversation, Input, Output, audio,
 };
 
 const SAMPLE_TEXT: &str = "Hallo! Dies ist eine Demonstration des Aristech Text-zu-Sprache-Dienstes. \

diff --git a/examples/aristech-transcribe.rs b/examples/aristech-transcribe.rs
@@ -9,12 +9,9 @@ use tokio::{
 };
 
 use aristech::transcribe::{ApiKeyAuth, AuthConfig, CredentialsAuth, Params as AristechParams};
+
 use context_switch::{InputModality, OutputModality, services::AristechTranscribe};
-use context_switch_core::{
-    AudioFormat, AudioFrame, audio,
-    conversation::{Conversation, Input},
-    service::Service,
-};
+use context_switch_core::{AudioFormat, AudioFrame, Conversation, Input, audio, service::Service};
 
 #[tokio::main]
 async fn main() -> Result<()> {
@@ -28,6 +25,7 @@ async fn main() -> Result<()> {
     let device = host
         .default_input_device()
         .expect("Failed to get default input device");
+    // spellcheck: ignore
     // let config = device
     //     .default_input_config()
     //     .expect("Failed to get default input config");

diff --git a/examples/azure-translate.rs b/examples/azure-translate.rs
@@ -1,26 +1,19 @@
 //! A context switch demo. Runs locally, gets voice data from your current microphone.
 
-use std::{
-    env,
-    num::{NonZeroU16, NonZeroU32},
-    thread,
-    time::Duration,
-};
+use std::env;
+use std::num::{NonZeroU16, NonZeroU32};
+use std::thread;
+use std::time::Duration;
 
 use anyhow::Result;
 use azure::AzureTranslate;
 use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
 use rodio::{DeviceSinkBuilder, Player, Source};
+use tokio::select;
+use tokio::sync::mpsc::{UnboundedReceiver, channel, unbounded_channel};
 
 use context_switch::{InputModality, OutputModality};
-use context_switch_core::{
-    AudioFormat, AudioFrame, Service, audio,
-    conversation::{Conversation, Input, Output},
-};
-use tokio::{
-    select,
-    sync::mpsc::{UnboundedReceiver, channel, unbounded_channel},
-};
+use context_switch_core::{AudioFormat, AudioFrame, Conversation, Input, Output, Service, audio};
 
 #[tokio::main]
 async fn main() -> Result<()> {