Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Gemini

GEMINI_API_KEY=your_gemini_api_key

# OpenAI Configuration
OPENAI_API_KEY=your_openai_key
OPENAI_REALTIME_API_MODEL=gpt-4o-mini-realtime-preview
Expand Down
4 changes: 4 additions & 0 deletions .github/copilot-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@
## Rust Style
- Prefer imports over deeply-qualified module paths. As a rule of thumb, avoid using more than one module prefix inline (for example, prefer importing a type and using `TypeName` instead of writing `foo::bar::TypeName` repeatedly).
- Prefer high-level flow first: when practical, place local supporting definitions (for example helper structs, impls, functions, and type aliases) below their first use.
- In module files, keep definitions ordered top-down by call flow (entry points first, helpers after first use).
- Keep imports grouped and sorted to match existing file style.
- Avoid `maybe_` prefixes for optional variables; use neutral names and rely on type/context for optionality.
- Avoid `_ref` suffixes for local variable names; use descriptive neutral names instead.
- Prefer explicit imports over repeated relative module qualification.
- Prefer private-by-default visibility; only widen visibility when required by a module boundary.
- For trait-based APIs, prefer focused request/context types over passing broad configuration structs.

## Change Communication
- Include a short rationale for each non-trivial code change.
Expand Down
5 changes: 5 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,8 @@
path = external/openai-api-rs
url = ../openai-api-rs.git
branch = "context-switch-v9.0.0"
[submodule "external/gemini-live-rs"]
path = external/gemini-live-rs
url = ../gemini-live-rs.git
branch = context-switch

3 changes: 2 additions & 1 deletion .harper-dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ AirPods
BCP
ContextSwitch
FreeSWITCH
Inband
alsa
inband
seekable
subtag
8 changes: 0 additions & 8 deletions .vscode/settings.json

This file was deleted.

36 changes: 29 additions & 7 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ members = [
"services/aristech",
"services/azure",
"services/elevenlabs",
"services/google-dialog",
"services/google-transcribe",
"services/openai-dialog",
"services/playback",
Expand All @@ -21,6 +22,8 @@ members = [
[workspace.package]
version = "2.3.0"
edition = "2024"
license = "MIT"
repository = "https://github.com/pragmatrix/context-switch"

[dependencies]

Expand All @@ -29,6 +32,7 @@ edition = "2024"
context-switch-core = { workspace = true }

openai-dialog = { path = "services/openai-dialog" }
google-dialog = { workspace = true }
azure = { workspace = true }
azure-speech = { workspace = true }
aristech = { workspace = true }
Expand Down Expand Up @@ -73,10 +77,13 @@ rodio = { workspace = true, features = ["playback"] }
azure = { workspace = true }
aristech = { workspace = true }
google-transcribe = { workspace = true }
google-dialog = { workspace = true }
gemini-live = { workspace = true }

tokio = { workspace = true, features = ["rt-multi-thread"] }
reqwest = { workspace = true }

# For advanced params in openai-dialog
# For advanced parameters in `openai-dialog`
openai-api-rs = { workspace = true }
serde_json = { workspace = true }
chrono-tz = { version = "0.10.3" }
Expand All @@ -96,12 +103,28 @@ playback = { path = "services/playback" }
aristech = { path = "services/aristech" }
elevenlabs = { path = "services/elevenlabs" }
google-transcribe = { path = "services/google-transcribe" }
google-dialog = { path = "services/google-dialog" }
gemini-live = { path = "external/gemini-live-rs/crates/gemini-live" }

# Dependencies required by `external/gemini-live-rs/crates/gemini-live`.
# The submodule crate inherits these via `workspace = true`, so we keep them
# centralized here and grouped to make future sync/review straightforward.
tokio-tungstenite = { version = "0.29", features = ["rustls-tls-webpki-roots"] }
futures-util = "0.3.32"
bytes = "1.11"
thiserror = "2.0.18"
rustls = { version = "0.23", features = ["ring"], default-features = false }
google-cloud-auth = { version = "1.10.0", default-features = false }

anyhow = "1.0.102"
derive_more = { version = "2.1.1", features = ["full"] }
static_assertions = "1.1.0"
async-stream = { version = "0.3.6" }
tokio = { version = "1.50.0", features = ["sync"] }
# Tokio features are intentionally explicit:
# - `sync`: channels/mutexes used throughout services
# - `rt` + `macros`: runtime and `tokio::select!`/task macros used by `gemini-live`
# - `time`: timeout/sleep used by `gemini-live` session/transport logic
tokio = { version = "1.52.3", features = ["sync", "rt", "macros", "time"] }
Comment thread
pragmatrix marked this conversation as resolved.
futures = "0.3.31"
serde = { version = "1.0.215", features = ["derive"] }
serde_json = "1.0.149"
Expand All @@ -111,7 +134,7 @@ async-trait = "0.1.83"
tracing = "0.1.41"
dotenvy = { version = "0.15.7" }
url = { version = "2.5.8" }
reqwest = { version = "0.13.2" }
reqwest = { version = "0.13.3" }
mime_guess2 = { version = "2.3.1" }
hound = { version = "3.5.1" }
chrono = { version = "0.4.44" }
Expand All @@ -121,13 +144,12 @@ chrono = { version = "0.4.44" }
#

azure-speech = { path = "external/azure-speech-sdk-rs" }
# openai-api-rs = "5.2.3"
openai-api-rs = { path = "external/openai-api-rs" }
# - `symphonia-wav` is mandatory: The default WAV decoder does not seem to support A-Law and also
# panics with a few of our testcases.
# - `symphonia-wav` is mandatory: The default WAV decoder does not seem to support A-Law and
# panics with a few of our test cases.
# - No default features because we don't want to pull alsa on Linux by default for local playback.
# - We have to define at least _one_ decoder, otherwise `cargo clippy --all-targets` fails, so we select `symphonia-mp3`.
rodio = { version = "0.22.2", default-features = false, features = ["symphonia-mp3"] }

rstest = { version = "0.26.1" }
uuid = { version = "1.17.0", features = ["v4"] }
uuid = { version = "1.23.1", features = ["v4"] }
4 changes: 2 additions & 2 deletions audio-knife/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ use tokio::{
use tracing::{Instrument, Span, debug, error, info, info_span};

use context_switch::{
AudioFormat, AudioFrame, ClientEvent, ContextSwitch, ConversationId, InputModality,
ServerEvent, audio, billing_collector::BillingCollector, conversation::BillingId,
AudioFormat, AudioFrame, BillingId, ClientEvent, ContextSwitch, ConversationId, InputModality,
ServerEvent, audio, billing_collector::BillingCollector,
};
use tracing_subscriber::{EnvFilter, fmt::format::FmtSpan};
use uuid::Uuid;
Expand Down
4 changes: 3 additions & 1 deletion core/src/billing_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ use std::sync::{Arc, Mutex};

use anyhow::Result;

use crate::{BillingRecord, billing_collector::BillingCollector, conversation::BillingId};
use crate::BillingRecord;
use crate::billing_collector::BillingCollector;
use crate::conversation::BillingId;

#[derive(Debug, Clone)]
pub struct BillingContext {
Expand Down
2 changes: 1 addition & 1 deletion core/src/conversation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ impl ConversationInput {
self.input.recv().await
}

/// Run a nested service conversation with one single input request and wait until its
/// Run a nested service conversation with one single input request and wait until it's
/// completed.
///
/// All output is sent to the conversation output.
Expand Down
3 changes: 2 additions & 1 deletion core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
pub mod audio;
pub mod billing_collector;
mod billing_context;
pub mod conversation;
mod conversation;
Comment thread
pragmatrix marked this conversation as resolved.
mod duration;
pub mod language;
mod protocol;
Expand All @@ -15,6 +15,7 @@ use anyhow::{Context, Result, bail};
use tokio::sync::mpsc::{self, UnboundedReceiver, UnboundedSender, unbounded_channel};

pub use billing_context::BillingContext;
pub use conversation::*;
pub use duration::Duration;
pub use protocol::*;
pub use registry::*;
Expand Down
23 changes: 9 additions & 14 deletions examples/aristech-synthesize.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,18 @@
use std::{
env,
num::{NonZeroU16, NonZeroU32},
thread,
time::Duration,
};
use std::env;
use std::num::{NonZeroU16, NonZeroU32};
use std::thread;
use std::time::Duration;

use anyhow::{Context as AnyhowContext, Result};
use aristech::synthesize::{AristechSynthesize, Params as AristechParams};
use rodio::{DeviceSinkBuilder, Player, Source};
use tokio::{
select,
sync::mpsc::{channel, unbounded_channel},
};
use tokio::select;
use tokio::sync::mpsc::{channel, unbounded_channel};

use aristech::synthesize::{AristechSynthesize, Params as AristechParams};
use context_switch::{InputModality, OutputModality};
use context_switch_core::service::Service;
use context_switch_core::{
AudioFormat, AudioFrame, AudioProducer, audio,
conversation::{Conversation, Input, Output},
service::Service,
AudioFormat, AudioFrame, AudioProducer, Conversation, Input, Output, audio,
};

const SAMPLE_TEXT: &str = "Hallo! Dies ist eine Demonstration des Aristech Text-zu-Sprache-Dienstes. \
Expand Down
8 changes: 3 additions & 5 deletions examples/aristech-transcribe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,9 @@ use tokio::{
};

use aristech::transcribe::{ApiKeyAuth, AuthConfig, CredentialsAuth, Params as AristechParams};

use context_switch::{InputModality, OutputModality, services::AristechTranscribe};
use context_switch_core::{
AudioFormat, AudioFrame, audio,
conversation::{Conversation, Input},
service::Service,
};
use context_switch_core::{AudioFormat, AudioFrame, Conversation, Input, audio, service::Service};

#[tokio::main]
async fn main() -> Result<()> {
Expand All @@ -28,6 +25,7 @@ async fn main() -> Result<()> {
let device = host
.default_input_device()
.expect("Failed to get default input device");
// spellcheck: ignore
// let config = device
// .default_input_config()
// .expect("Failed to get default input config");
Expand Down
21 changes: 7 additions & 14 deletions examples/azure-translate.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,19 @@
//! A context switch demo. Runs locally, gets voice data from your current microphone.

use std::{
env,
num::{NonZeroU16, NonZeroU32},
thread,
time::Duration,
};
use std::env;
use std::num::{NonZeroU16, NonZeroU32};
use std::thread;
use std::time::Duration;

use anyhow::Result;
use azure::AzureTranslate;
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
use rodio::{DeviceSinkBuilder, Player, Source};
use tokio::select;
use tokio::sync::mpsc::{UnboundedReceiver, channel, unbounded_channel};

use context_switch::{InputModality, OutputModality};
use context_switch_core::{
AudioFormat, AudioFrame, Service, audio,
conversation::{Conversation, Input, Output},
};
use tokio::{
select,
sync::mpsc::{UnboundedReceiver, channel, unbounded_channel},
};
use context_switch_core::{AudioFormat, AudioFrame, Conversation, Input, Output, Service, audio};

#[tokio::main]
async fn main() -> Result<()> {
Expand Down
Loading
Loading