diff --git a/apps/desktop/src/components/settings/ai/stt/shared.tsx b/apps/desktop/src/components/settings/ai/stt/shared.tsx
index c48d22dc71..80ff5b64b8 100644
--- a/apps/desktop/src/components/settings/ai/stt/shared.tsx
+++ b/apps/desktop/src/components/settings/ai/stt/shared.tsx
@@ -200,6 +200,16 @@ const _PROVIDERS = [
models: ["voxtral-mini-2602"],
requirements: [{ kind: "requires_config", fields: ["api_key"] }],
},
+ {
+ disabled: false,
+ id: "bedrock",
+ displayName: "Amazon Bedrock",
+ badge: "Beta",
+ icon: ,
+ baseUrl: "https://bedrock-mantle.us-east-1.api.aws/v1",
+ models: ["openai.gpt-4o-transcribe", "openai.whisper-1"],
+ requirements: [{ kind: "requires_config", fields: ["api_key"] }],
+ },
{
disabled: false,
id: "custom",
diff --git a/crates/listener-core/src/actors/listener/adapters.rs b/crates/listener-core/src/actors/listener/adapters.rs
index 041f876386..41fd439473 100644
--- a/crates/listener-core/src/actors/listener/adapters.rs
+++ b/crates/listener-core/src/actors/listener/adapters.rs
@@ -4,7 +4,7 @@ use bytes::Bytes;
use ractor::{ActorProcessingErr, ActorRef};
use owhisper_client::{
- AdapterKind, ArgmaxAdapter, AssemblyAIAdapter, CactusAdapter, DashScopeAdapter,
+ AdapterKind, ArgmaxAdapter, AssemblyAIAdapter, BedrockAdapter, CactusAdapter, DashScopeAdapter,
DeepgramAdapter, ElevenLabsAdapter, FireworksAdapter, GladiaAdapter, HyprnoteAdapter,
MistralAdapter, OpenAIAdapter, RealtimeSttAdapter, SonioxAdapter,
};
@@ -95,6 +95,12 @@ pub(super) async fn spawn_rx_task(
(AdapterKind::Mistral, true) => {
spawn_rx_task_dual_with_adapter::(args, myself).await
}
+ (AdapterKind::Bedrock, false) => {
+ spawn_rx_task_single_with_adapter::(args, myself).await
+ }
+ (AdapterKind::Bedrock, true) => {
+ spawn_rx_task_dual_with_adapter::(args, myself).await
+ }
(AdapterKind::Hyprnote, false) => {
spawn_rx_task_single_with_adapter::(args, myself).await
}
diff --git a/crates/listener2-core/src/batch.rs b/crates/listener2-core/src/batch.rs
index 4f952286ed..783406b0a6 100644
--- a/crates/listener2-core/src/batch.rs
+++ b/crates/listener2-core/src/batch.rs
@@ -4,7 +4,7 @@ use std::time::Duration;
use futures_util::StreamExt;
use owhisper_client::{
- AdapterKind, ArgmaxAdapter, AssemblyAIAdapter, BatchSttAdapter, CactusAdapter,
+ AdapterKind, ArgmaxAdapter, AssemblyAIAdapter, BatchSttAdapter, BedrockAdapter, CactusAdapter,
DashScopeAdapter, DeepgramAdapter, ElevenLabsAdapter, FireworksAdapter, GladiaAdapter,
HyprnoteAdapter, MistralAdapter, OpenAIAdapter, RealtimeSttAdapter, SonioxAdapter,
};
@@ -390,6 +390,7 @@ async fn spawn_batch_task(
spawn_batch_task_with_adapter::(args, myself).await
}
AdapterKind::Mistral => spawn_batch_task_with_adapter::(args, myself).await,
+ AdapterKind::Bedrock => spawn_batch_task_with_adapter::(args, myself).await,
AdapterKind::Hyprnote => {
spawn_batch_task_with_adapter::(args, myself).await
}
diff --git a/crates/listener2-core/src/lib.rs b/crates/listener2-core/src/lib.rs
index fdeb2ba3ab..f724470a0c 100644
--- a/crates/listener2-core/src/lib.rs
+++ b/crates/listener2-core/src/lib.rs
@@ -41,6 +41,7 @@ pub fn suggest_providers_for_languages_batch(languages: &[hypr_language::Languag
AdapterKind::ElevenLabs,
AdapterKind::DashScope,
AdapterKind::Mistral,
+ AdapterKind::Bedrock,
];
let mut with_support: Vec<_> = all_providers
diff --git a/crates/owhisper-client/src/adapter/bedrock/batch.rs b/crates/owhisper-client/src/adapter/bedrock/batch.rs
new file mode 100644
index 0000000000..31c840a76d
--- /dev/null
+++ b/crates/owhisper-client/src/adapter/bedrock/batch.rs
@@ -0,0 +1,164 @@
+use std::path::Path;
+
+use owhisper_interface::ListenParams;
+use owhisper_interface::batch::{
+ Alternatives as BatchAlternatives, Channel as BatchChannel, Response as BatchResponse,
+ Results as BatchResults, Word,
+};
+use serde::Deserialize;
+
+use super::BedrockAdapter;
+use crate::adapter::http::mime_type_from_extension;
+use crate::adapter::{BatchFuture, BatchSttAdapter, ClientWithMiddleware};
+use crate::error::Error;
+use crate::providers::{Provider, is_meta_model};
+
+// Amazon Bedrock supports OpenAI-compatible audio transcription via
+// the bedrock-mantle endpoint: POST /v1/audio/transcriptions
+// https://docs.aws.amazon.com/bedrock/latest/userguide/apis.html
+impl BatchSttAdapter for BedrockAdapter {
+ fn is_supported_languages(
+ &self,
+ languages: &[hypr_language::Language],
+ _model: Option<&str>,
+ ) -> bool {
+ BedrockAdapter::language_support_batch(languages).is_supported()
+ }
+
+ fn transcribe_file<'a, P: AsRef + Send + 'a>(
+ &'a self,
+ client: &'a ClientWithMiddleware,
+ api_base: &'a str,
+ api_key: &'a str,
+ params: &'a ListenParams,
+ file_path: P,
+ ) -> BatchFuture<'a> {
+ let path = file_path.as_ref().to_path_buf();
+ Box::pin(async move { do_transcribe_file(client, api_base, api_key, params, &path).await })
+ }
+}
+
+#[derive(Debug, Deserialize)]
+struct BedrockWord {
+ word: String,
+ start: f64,
+ end: f64,
+}
+
+#[derive(Debug, Deserialize)]
+struct BedrockTranscriptionResponse {
+ text: String,
+ #[serde(default)]
+ words: Option>,
+ #[serde(default)]
+ #[allow(dead_code)]
+ language: Option,
+ #[serde(default)]
+ #[allow(dead_code)]
+ duration: Option,
+}
+
+async fn do_transcribe_file(
+ client: &ClientWithMiddleware,
+ api_base: &str,
+ api_key: &str,
+ params: &ListenParams,
+ file_path: &Path,
+) -> Result {
+ let fallback_name = match file_path.extension().and_then(|e| e.to_str()) {
+ Some(ext) => format!("audio.{}", ext),
+ None => "audio".to_string(),
+ };
+
+ let file_name = file_path
+ .file_name()
+ .and_then(|n| n.to_str())
+ .map(ToOwned::to_owned)
+ .unwrap_or(fallback_name);
+
+ let file_bytes = tokio::fs::read(file_path)
+ .await
+ .map_err(|e| Error::AudioProcessing(e.to_string()))?;
+
+ let mime_type = mime_type_from_extension(file_path);
+
+ let file_part = reqwest::multipart::Part::bytes(file_bytes)
+ .file_name(file_name)
+ .mime_str(mime_type)
+ .map_err(|e| Error::AudioProcessing(e.to_string()))?;
+
+ let default = Provider::Bedrock.default_batch_model();
+ let model = match params.model.as_deref() {
+ Some(m) if is_meta_model(m) => default,
+ Some(m) => m,
+ None => default,
+ };
+
+ let mut form = reqwest::multipart::Form::new()
+ .part("file", file_part)
+ .text("model", model.to_string());
+
+ form = form.text("response_format", "verbose_json");
+ form = form.text("timestamp_granularities[]", "word");
+
+ if let Some(lang) = params.languages.first() {
+ form = form.text("language", lang.iso639().code().to_string());
+ }
+
+ let base = if api_base.is_empty() {
+ Provider::Bedrock.default_api_base()
+ } else {
+ api_base.trim_end_matches('/')
+ };
+ let url = format!("{}/audio/transcriptions", base);
+
+ let response = client
+ .post(&url)
+ .header("Authorization", format!("Bearer {}", api_key))
+ .multipart(form)
+ .send()
+ .await?;
+
+ let status = response.status();
+ if !status.is_success() {
+ let body = response.text().await.unwrap_or_default();
+ return Err(Error::UnexpectedStatus { status, body });
+ }
+
+ let bedrock_response: BedrockTranscriptionResponse = response.json().await?;
+
+ let words: Vec = bedrock_response
+ .words
+ .unwrap_or_default()
+ .into_iter()
+ .map(|w| Word {
+ word: w.word.clone(),
+ start: w.start,
+ end: w.end,
+ confidence: 1.0,
+ speaker: None,
+ punctuated_word: Some(w.word),
+ })
+ .collect();
+
+ let alternatives = BatchAlternatives {
+ transcript: bedrock_response.text.trim().to_string(),
+ confidence: 1.0,
+ words,
+ };
+
+ let channel = BatchChannel {
+ alternatives: vec![alternatives],
+ };
+
+ let metadata = serde_json::json!({
+ "language": bedrock_response.language,
+ });
+
+ Ok(BatchResponse {
+ metadata,
+ results: BatchResults {
+ channels: vec![channel],
+ },
+ })
+}
diff --git a/crates/owhisper-client/src/adapter/bedrock/live.rs b/crates/owhisper-client/src/adapter/bedrock/live.rs
new file mode 100644
index 0000000000..df582e60e3
--- /dev/null
+++ b/crates/owhisper-client/src/adapter/bedrock/live.rs
@@ -0,0 +1,372 @@
+use hypr_ws_client::client::Message;
+use owhisper_interface::ListenParams;
+use owhisper_interface::stream::{Alternatives, Channel, Metadata, StreamResponse};
+use serde::{Deserialize, Serialize};
+
+use super::BedrockAdapter;
+use crate::adapter::RealtimeSttAdapter;
+use crate::adapter::parsing::{WordBuilder, calculate_time_span};
+
+// Amazon Bedrock via bedrock-mantle exposes an OpenAI-compatible Realtime API.
+// This adapter follows the same protocol as the OpenAI adapter.
+impl RealtimeSttAdapter for BedrockAdapter {
+ fn provider_name(&self) -> &'static str {
+ "bedrock"
+ }
+
+ fn is_supported_languages(
+ &self,
+ languages: &[hypr_language::Language],
+ _model: Option<&str>,
+ ) -> bool {
+ BedrockAdapter::is_supported_languages_live(languages)
+ }
+
+ fn supports_native_multichannel(&self) -> bool {
+ false
+ }
+
+ fn build_ws_url(&self, api_base: &str, _params: &ListenParams, _channels: u8) -> url::Url {
+ let (mut url, existing_params) = Self::build_ws_url_from_base(api_base);
+
+ if !existing_params.is_empty() {
+ let mut query_pairs = url.query_pairs_mut();
+ for (key, value) in &existing_params {
+ query_pairs.append_pair(key, value);
+ }
+ }
+
+ url
+ }
+
+ fn build_auth_header(&self, api_key: Option<&str>) -> Option<(&'static str, String)> {
+ api_key.and_then(|k| crate::providers::Provider::Bedrock.build_auth_header(k))
+ }
+
+ fn keep_alive_message(&self) -> Option {
+ None
+ }
+
+ fn audio_to_message(&self, audio: bytes::Bytes) -> Message {
+ use base64::Engine;
+ let base64_audio = base64::engine::general_purpose::STANDARD.encode(&audio);
+ let event = InputAudioBufferAppend {
+ event_type: "input_audio_buffer.append".to_string(),
+ audio: base64_audio,
+ };
+ Message::Text(serde_json::to_string(&event).unwrap().into())
+ }
+
+ fn initial_message(
+ &self,
+ _api_key: Option<&str>,
+ params: &ListenParams,
+ _channels: u8,
+ ) -> Option {
+ let language = params
+ .languages
+ .first()
+ .map(|l| l.iso639().code().to_string());
+
+ let default = crate::providers::Provider::Bedrock.default_live_model();
+ let model = match params.model.as_deref() {
+ Some(m) if crate::providers::is_meta_model(m) => default,
+ Some(m) => m,
+ None => default,
+ };
+
+ let session_config = SessionUpdateEvent {
+ event_type: "session.update".to_string(),
+ session: SessionConfig {
+ session_type: "transcription".to_string(),
+ audio: Some(AudioConfig {
+ input: Some(AudioInputConfig {
+ format: Some(AudioFormat {
+ format_type: "audio/pcm".to_string(),
+ rate: params.sample_rate,
+ }),
+ transcription: Some(TranscriptionConfig {
+ model: model.to_string(),
+ language,
+ }),
+ turn_detection: Some(TurnDetection {
+ detection_type: "server_vad".to_string(),
+ threshold: Some(0.5),
+ prefix_padding_ms: Some(300),
+ silence_duration_ms: Some(500),
+ }),
+ }),
+ }),
+ include: Some(vec!["item.input_audio_transcription.logprobs".to_string()]),
+ },
+ };
+
+ let json = serde_json::to_string(&session_config).ok()?;
+ tracing::debug!(payload = %json, "bedrock_session_update_payload");
+ Some(Message::Text(json.into()))
+ }
+
+ fn finalize_message(&self) -> Message {
+ let commit = InputAudioBufferCommit {
+ event_type: "input_audio_buffer.commit".to_string(),
+ };
+ Message::Text(serde_json::to_string(&commit).unwrap().into())
+ }
+
+ fn parse_response(&self, raw: &str) -> Vec {
+ let event: BedrockEvent = match serde_json::from_str(raw) {
+ Ok(e) => e,
+ Err(e) => {
+ tracing::warn!(error = ?e, raw = raw, "bedrock_json_parse_failed");
+ return vec![];
+ }
+ };
+
+ match event {
+ BedrockEvent::SessionCreated { session } => {
+ tracing::debug!(session_id = %session.id, "bedrock_session_created");
+ vec![]
+ }
+ BedrockEvent::SessionUpdated { session } => {
+ tracing::debug!(session_id = %session.id, "bedrock_session_updated");
+ vec![]
+ }
+ BedrockEvent::InputAudioBufferCommitted { item_id } => {
+ tracing::debug!(item_id = %item_id, "bedrock_audio_buffer_committed");
+ vec![]
+ }
+ BedrockEvent::InputAudioBufferCleared => {
+ tracing::debug!("bedrock_audio_buffer_cleared");
+ vec![]
+ }
+ BedrockEvent::InputAudioBufferSpeechStarted { item_id } => {
+ tracing::debug!(item_id = %item_id, "bedrock_speech_started");
+ vec![]
+ }
+ BedrockEvent::InputAudioBufferSpeechStopped { item_id } => {
+ tracing::debug!(item_id = %item_id, "bedrock_speech_stopped");
+ vec![]
+ }
+ BedrockEvent::ConversationItemInputAudioTranscriptionCompleted {
+ item_id,
+ content_index,
+ transcript,
+ } => {
+ tracing::debug!(
+ item_id = %item_id,
+ content_index = content_index,
+ transcript = %transcript,
+ "bedrock_transcription_completed"
+ );
+ Self::build_transcript_response(&transcript, true, true)
+ }
+ BedrockEvent::ConversationItemInputAudioTranscriptionDelta {
+ item_id,
+ content_index,
+ delta,
+ } => {
+ tracing::debug!(
+ item_id = %item_id,
+ content_index = content_index,
+ delta = %delta,
+ "bedrock_transcription_delta"
+ );
+ Self::build_transcript_response(&delta, false, false)
+ }
+ BedrockEvent::ConversationItemInputAudioTranscriptionFailed {
+ item_id, error, ..
+ } => {
+ tracing::error!(
+ item_id = %item_id,
+ error_type = %error.error_type,
+ error_message = %error.message,
+ "bedrock_transcription_failed"
+ );
+ vec![StreamResponse::ErrorResponse {
+ error_code: None,
+ error_message: format!("{}: {}", error.error_type, error.message),
+ provider: "bedrock".to_string(),
+ }]
+ }
+ BedrockEvent::Error { error } => {
+ tracing::error!(
+ error_type = %error.error_type,
+ error_message = %error.message,
+ "bedrock_error"
+ );
+ vec![StreamResponse::ErrorResponse {
+ error_code: None,
+ error_message: format!("{}: {}", error.error_type, error.message),
+ provider: "bedrock".to_string(),
+ }]
+ }
+ BedrockEvent::Unknown => {
+ tracing::debug!(raw = raw, "bedrock_unknown_event");
+ vec![]
+ }
+ }
+ }
+}
+
+#[derive(Debug, Serialize)]
+struct SessionUpdateEvent {
+ #[serde(rename = "type")]
+ event_type: String,
+ session: SessionConfig,
+}
+
+#[derive(Debug, Serialize)]
+struct SessionConfig {
+ #[serde(rename = "type")]
+ session_type: String,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ audio: Option,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ include: Option>,
+}
+
+#[derive(Debug, Serialize)]
+struct AudioConfig {
+ #[serde(skip_serializing_if = "Option::is_none")]
+ input: Option,
+}
+
+#[derive(Debug, Serialize)]
+struct AudioInputConfig {
+ #[serde(skip_serializing_if = "Option::is_none")]
+ format: Option,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ transcription: Option,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ turn_detection: Option,
+}
+
+#[derive(Debug, Serialize)]
+struct AudioFormat {
+ #[serde(rename = "type")]
+ format_type: String,
+ rate: u32,
+}
+
+#[derive(Debug, Serialize)]
+struct TranscriptionConfig {
+ model: String,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ language: Option,
+}
+
+#[derive(Debug, Serialize)]
+struct TurnDetection {
+ #[serde(rename = "type")]
+ detection_type: String,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ threshold: Option,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ prefix_padding_ms: Option,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ silence_duration_ms: Option,
+}
+
+#[derive(Debug, Serialize)]
+struct InputAudioBufferAppend {
+ #[serde(rename = "type")]
+ event_type: String,
+ audio: String,
+}
+
+#[derive(Debug, Serialize)]
+struct InputAudioBufferCommit {
+ #[serde(rename = "type")]
+ event_type: String,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(tag = "type")]
+#[allow(dead_code)]
+enum BedrockEvent {
+ #[serde(rename = "session.created")]
+ SessionCreated { session: SessionInfo },
+ #[serde(rename = "session.updated")]
+ SessionUpdated { session: SessionInfo },
+ #[serde(rename = "input_audio_buffer.committed")]
+ InputAudioBufferCommitted { item_id: String },
+ #[serde(rename = "input_audio_buffer.cleared")]
+ InputAudioBufferCleared,
+ #[serde(rename = "input_audio_buffer.speech_started")]
+ InputAudioBufferSpeechStarted { item_id: String },
+ #[serde(rename = "input_audio_buffer.speech_stopped")]
+ InputAudioBufferSpeechStopped { item_id: String },
+ #[serde(rename = "conversation.item.input_audio_transcription.completed")]
+ ConversationItemInputAudioTranscriptionCompleted {
+ item_id: String,
+ content_index: u32,
+ transcript: String,
+ },
+ #[serde(rename = "conversation.item.input_audio_transcription.delta")]
+ ConversationItemInputAudioTranscriptionDelta {
+ item_id: String,
+ content_index: u32,
+ delta: String,
+ },
+ #[serde(rename = "conversation.item.input_audio_transcription.failed")]
+ ConversationItemInputAudioTranscriptionFailed {
+ item_id: String,
+ content_index: u32,
+ error: BedrockError,
+ },
+ #[serde(rename = "error")]
+ Error { error: BedrockError },
+ #[serde(other)]
+ Unknown,
+}
+
+#[derive(Debug, Deserialize)]
+struct SessionInfo {
+ id: String,
+}
+
+#[derive(Debug, Deserialize)]
+struct BedrockError {
+ #[serde(rename = "type")]
+ error_type: String,
+ message: String,
+}
+
+impl BedrockAdapter {
+ fn build_transcript_response(
+ transcript: &str,
+ is_final: bool,
+ speech_final: bool,
+ ) -> Vec {
+ if transcript.is_empty() {
+ return vec![];
+ }
+
+ let words: Vec<_> = transcript
+ .split_whitespace()
+ .map(|word| WordBuilder::new(word).confidence(1.0).build())
+ .collect();
+
+ let (start, duration) = calculate_time_span(&words);
+
+ let channel = Channel {
+ alternatives: vec![Alternatives {
+ transcript: transcript.to_string(),
+ words,
+ confidence: 1.0,
+ languages: vec![],
+ }],
+ };
+
+ vec![StreamResponse::TranscriptResponse {
+ is_final,
+ speech_final,
+ from_finalize: false,
+ start,
+ duration,
+ channel,
+ metadata: Metadata::default(),
+ channel_index: vec![0, 1],
+ }]
+ }
+}
diff --git a/crates/owhisper-client/src/adapter/bedrock/mod.rs b/crates/owhisper-client/src/adapter/bedrock/mod.rs
new file mode 100644
index 0000000000..f58c3e925f
--- /dev/null
+++ b/crates/owhisper-client/src/adapter/bedrock/mod.rs
@@ -0,0 +1,65 @@
+mod batch;
+mod live;
+
+use crate::providers::Provider;
+
+use super::{LanguageQuality, LanguageSupport};
+
+#[derive(Clone, Default)]
+pub struct BedrockAdapter;
+
+impl BedrockAdapter {
+ pub fn language_support_live(_languages: &[hypr_language::Language]) -> LanguageSupport {
+ LanguageSupport::Supported {
+ quality: LanguageQuality::NoData,
+ }
+ }
+
+ pub fn language_support_batch(_languages: &[hypr_language::Language]) -> LanguageSupport {
+ Self::language_support_live(_languages)
+ }
+
+ pub fn is_supported_languages_live(languages: &[hypr_language::Language]) -> bool {
+ Self::language_support_live(languages).is_supported()
+ }
+
+ pub fn is_supported_languages_batch(languages: &[hypr_language::Language]) -> bool {
+ Self::language_support_batch(languages).is_supported()
+ }
+
+ pub(crate) fn build_ws_url_from_base(api_base: &str) -> (url::Url, Vec<(String, String)>) {
+ // Bedrock Mantle is OpenAI-compatible and uses the same Realtime API surface.
+ // We follow the OpenAI adapter's URL behavior (including intent=transcription).
+ if api_base.is_empty() {
+ return (
+ Provider::Bedrock
+ .default_ws_url()
+ .parse()
+ .expect("invalid_default_ws_url"),
+ vec![("intent".to_string(), "transcription".to_string())],
+ );
+ }
+
+ if let Some(proxy_result) = super::build_proxy_ws_url(api_base) {
+ return proxy_result;
+ }
+
+ let parsed: url::Url = api_base.parse().expect("invalid_api_base");
+ let mut existing_params = super::extract_query_params(&parsed);
+
+ if !existing_params.iter().any(|(k, _)| k == "intent") {
+ existing_params.push(("intent".to_string(), "transcription".to_string()));
+ }
+
+ let host = parsed
+ .host_str()
+ .unwrap_or(Provider::Bedrock.default_ws_host());
+ let mut url: url::Url = format!("wss://{}{}", host, Provider::Bedrock.ws_path())
+ .parse()
+ .expect("invalid_ws_url");
+
+ super::set_scheme_from_host(&mut url);
+
+ (url, existing_params)
+ }
+}
diff --git a/crates/owhisper-client/src/adapter/mod.rs b/crates/owhisper-client/src/adapter/mod.rs
index 3eaf1cc694..2350a60aeb 100644
--- a/crates/owhisper-client/src/adapter/mod.rs
+++ b/crates/owhisper-client/src/adapter/mod.rs
@@ -1,5 +1,6 @@
mod argmax;
pub(crate) mod assemblyai;
+mod bedrock;
mod cactus;
mod dashscope;
pub mod deepgram;
@@ -19,6 +20,7 @@ mod url_builder;
pub use argmax::*;
pub use assemblyai::*;
+pub use bedrock::*;
pub use cactus::*;
pub use dashscope::*;
pub use deepgram::*;
@@ -362,6 +364,8 @@ pub enum AdapterKind {
DashScope,
#[strum(serialize = "mistral")]
Mistral,
+ #[strum(serialize = "bedrock")]
+ Bedrock,
#[strum(serialize = "hyprnote")]
Hyprnote,
#[strum(serialize = "cactus")]
@@ -413,6 +417,7 @@ impl AdapterKind {
Self::DashScope => DashScopeAdapter::language_support_live(languages),
Self::Argmax => ArgmaxAdapter::language_support_live(languages, model),
Self::Mistral => MistralAdapter::language_support_live(languages),
+ Self::Bedrock => BedrockAdapter::language_support_live(languages),
Self::Hyprnote | Self::Cactus => LanguageSupport::Supported {
quality: LanguageQuality::NoData,
},
@@ -438,6 +443,7 @@ impl AdapterKind {
Self::DashScope => DashScopeAdapter::language_support_batch(languages),
Self::Argmax => ArgmaxAdapter::language_support_batch(languages, model),
Self::Mistral => MistralAdapter::language_support_batch(languages),
+ Self::Bedrock => BedrockAdapter::language_support_batch(languages),
Self::Hyprnote | Self::Cactus => LanguageSupport::Supported {
quality: LanguageQuality::NoData,
},
@@ -484,6 +490,7 @@ impl From for AdapterKind {
Provider::ElevenLabs => Self::ElevenLabs,
Provider::DashScope => Self::DashScope,
Provider::Mistral => Self::Mistral,
+ Provider::Bedrock => Self::Bedrock,
}
}
}
diff --git a/crates/owhisper-client/src/lib.rs b/crates/owhisper-client/src/lib.rs
index 34b8e0fba3..eca4d05447 100644
--- a/crates/owhisper-client/src/lib.rs
+++ b/crates/owhisper-client/src/lib.rs
@@ -17,10 +17,10 @@ use std::marker::PhantomData;
pub use adapter::deepgram::DeepgramModel;
pub use adapter::{
- AdapterKind, ArgmaxAdapter, AssemblyAIAdapter, BatchSttAdapter, CactusAdapter, CallbackResult,
- CallbackSttAdapter, DashScopeAdapter, DeepgramAdapter, ElevenLabsAdapter, FireworksAdapter,
- GladiaAdapter, HyprnoteAdapter, LanguageQuality, LanguageSupport, MistralAdapter,
- OpenAIAdapter, RealtimeSttAdapter, SonioxAdapter, append_provider_param,
+ AdapterKind, ArgmaxAdapter, AssemblyAIAdapter, BatchSttAdapter, BedrockAdapter, CactusAdapter,
+ CallbackResult, CallbackSttAdapter, DashScopeAdapter, DeepgramAdapter, ElevenLabsAdapter,
+ FireworksAdapter, GladiaAdapter, HyprnoteAdapter, LanguageQuality, LanguageSupport,
+ MistralAdapter, OpenAIAdapter, RealtimeSttAdapter, SonioxAdapter, append_provider_param,
documented_language_codes_batch, documented_language_codes_live, is_hyprnote_proxy,
is_local_host, normalize_languages,
};
diff --git a/crates/owhisper-client/src/providers.rs b/crates/owhisper-client/src/providers.rs
index 3f3581c0ba..3cde3561c9 100644
--- a/crates/owhisper-client/src/providers.rs
+++ b/crates/owhisper-client/src/providers.rs
@@ -84,10 +84,12 @@ pub enum Provider {
DashScope,
#[strum(serialize = "mistral")]
Mistral,
+ #[strum(serialize = "bedrock")]
+ Bedrock,
}
impl Provider {
- const ALL: [Provider; 9] = [
+ const ALL: [Provider; 10] = [
Self::Deepgram,
Self::AssemblyAI,
Self::Soniox,
@@ -97,6 +99,7 @@ impl Provider {
Self::ElevenLabs,
Self::DashScope,
Self::Mistral,
+ Self::Bedrock,
];
pub fn from_host(host: &str) -> Option {
@@ -139,6 +142,10 @@ impl Provider {
name: "Authorization",
prefix: Some("Bearer "),
},
+ Self::Bedrock => Auth::Header {
+ name: "Authorization",
+ prefix: Some("Bearer "),
+ },
}
}
@@ -161,6 +168,7 @@ impl Provider {
Self::ElevenLabs => "api.elevenlabs.io",
Self::DashScope => "dashscope-intl.aliyuncs.com",
Self::Mistral => "api.mistral.ai",
+ Self::Bedrock => "bedrock-mantle.us-east-1.api.aws",
}
}
@@ -175,6 +183,7 @@ impl Provider {
Self::ElevenLabs => "api.elevenlabs.io",
Self::DashScope => "dashscope-intl.aliyuncs.com",
Self::Mistral => "api.mistral.ai",
+ Self::Bedrock => "bedrock-mantle.us-east-1.api.aws",
}
}
@@ -189,6 +198,7 @@ impl Provider {
Self::ElevenLabs => "/v1/speech-to-text/realtime",
Self::DashScope => "/api-ws/v1/realtime",
Self::Mistral => "/v1/audio/transcriptions/realtime",
+ Self::Bedrock => "/v1/realtime",
}
}
@@ -203,6 +213,7 @@ impl Provider {
Self::ElevenLabs => Some("https://api.elevenlabs.io/v1"),
Self::DashScope => None,
Self::Mistral => None,
+ Self::Bedrock => None,
}
}
@@ -217,6 +228,7 @@ impl Provider {
Self::ElevenLabs => "https://api.elevenlabs.io",
Self::DashScope => "https://dashscope-intl.aliyuncs.com",
Self::Mistral => "https://api.mistral.ai/v1",
+ Self::Bedrock => "https://bedrock-mantle.us-east-1.api.aws/v1",
}
}
@@ -231,6 +243,7 @@ impl Provider {
Self::ElevenLabs => "elevenlabs.io",
Self::DashScope => "aliyuncs.com",
Self::Mistral => "mistral.ai",
+ Self::Bedrock => "api.aws",
}
}
@@ -263,6 +276,7 @@ impl Provider {
Self::ElevenLabs => "ELEVENLABS_API_KEY",
Self::DashScope => "DASHSCOPE_API_KEY",
Self::Mistral => "MISTRAL_API_KEY",
+ Self::Bedrock => "BEDROCK_API_KEY",
}
}
@@ -277,6 +291,7 @@ impl Provider {
Self::ElevenLabs => "scribe_v2_realtime",
Self::DashScope => "qwen3-asr-flash-realtime",
Self::Mistral => "voxtral-mini-transcribe-realtime-2602",
+ Self::Bedrock => "openai.gpt-4o-transcribe",
}
}
@@ -284,6 +299,7 @@ impl Provider {
match self {
Self::OpenAI => 24000,
Self::ElevenLabs | Self::DashScope | Self::Mistral => 16000,
+ Self::Bedrock => 24000,
_ => 16000,
}
}
@@ -299,6 +315,7 @@ impl Provider {
Self::ElevenLabs => "scribe_v2",
Self::DashScope => "qwen3-asr-flash-filetrans",
Self::Mistral => "voxtral-mini-2602",
+ Self::Bedrock => "amazon.nova-sonic-v1:0",
}
}
@@ -306,7 +323,7 @@ impl Provider {
match self {
Self::Deepgram => &[("model", "nova-3-general"), ("mip_opt_out", "false")],
Self::OpenAI => &[("intent", "transcription")],
- Self::DashScope | Self::Mistral => &[],
+ Self::DashScope | Self::Mistral | Self::Bedrock => &[],
_ => &[],
}
}
@@ -320,7 +337,8 @@ impl Provider {
| Self::OpenAI
| Self::ElevenLabs
| Self::DashScope
- | Self::Mistral => false,
+ | Self::Mistral
+ | Self::Bedrock => false,
}
}
@@ -333,7 +351,7 @@ impl Provider {
Self::OpenAI => &[],
Self::Gladia => &[],
Self::ElevenLabs => &["commit"],
- Self::DashScope | Self::Mistral => &[],
+ Self::DashScope | Self::Mistral | Self::Bedrock => &[],
}
}
@@ -352,7 +370,7 @@ impl Provider {
"words_accurate_timestamps": true
}
})),
- Self::Mistral => None,
+ Self::Mistral | Self::Bedrock => None,
_ => None,
}
}
@@ -363,7 +381,12 @@ impl Provider {
Self::Soniox => soniox::error::detect_error(data),
Self::ElevenLabs => elevenlabs::error::detect_error(data),
Self::AssemblyAI => assemblyai::error::detect_error(data),
- Self::Fireworks | Self::OpenAI | Self::Gladia | Self::DashScope | Self::Mistral => None,
+ Self::Fireworks
+ | Self::OpenAI
+ | Self::Gladia
+ | Self::DashScope
+ | Self::Mistral
+ | Self::Bedrock => None,
}
}
diff --git a/crates/transcribe-proxy/src/env.rs b/crates/transcribe-proxy/src/env.rs
index 22449de50a..dccd1a3677 100644
--- a/crates/transcribe-proxy/src/env.rs
+++ b/crates/transcribe-proxy/src/env.rs
@@ -23,6 +23,8 @@ pub struct SttApiKeysEnv {
pub dashscope_api_key: Option,
#[serde(default)]
pub mistral_api_key: Option,
+ #[serde(default)]
+ pub bedrock_api_key: Option,
}
#[derive(Deserialize, Default)]
@@ -88,6 +90,9 @@ impl From<&SttApiKeysEnv> for ApiKeys {
if let Some(key) = env.mistral_api_key.as_ref().filter(|s| !s.is_empty()) {
map.insert(Provider::Mistral, key.clone());
}
+ if let Some(key) = env.bedrock_api_key.as_ref().filter(|s| !s.is_empty()) {
+ map.insert(Provider::Bedrock, key.clone());
+ }
Self(map)
}
}
diff --git a/crates/transcribe-proxy/src/routes/batch/sync.rs b/crates/transcribe-proxy/src/routes/batch/sync.rs
index 37592902b3..572a537e46 100644
--- a/crates/transcribe-proxy/src/routes/batch/sync.rs
+++ b/crates/transcribe-proxy/src/routes/batch/sync.rs
@@ -8,8 +8,8 @@ use axum::{
};
use backon::{ExponentialBuilder, Retryable};
use owhisper_client::{
- AssemblyAIAdapter, BatchClient, DeepgramAdapter, ElevenLabsAdapter, GladiaAdapter,
- MistralAdapter, OpenAIAdapter, Provider, SonioxAdapter,
+ AssemblyAIAdapter, BatchClient, BedrockAdapter, DeepgramAdapter, ElevenLabsAdapter,
+ GladiaAdapter, MistralAdapter, OpenAIAdapter, Provider, SonioxAdapter,
};
use owhisper_interface::ListenParams;
use owhisper_interface::batch::Response as BatchResponse;
@@ -185,6 +185,7 @@ pub(super) async fn transcribe_with_provider(
Provider::Gladia => batch_transcribe!(GladiaAdapter),
Provider::ElevenLabs => batch_transcribe!(ElevenLabsAdapter),
Provider::Mistral => batch_transcribe!(MistralAdapter),
+ Provider::Bedrock => batch_transcribe!(BedrockAdapter),
Provider::Fireworks | Provider::DashScope => {
return Err(format!(
"{:?} does not support batch transcription",
diff --git a/crates/transcribe-proxy/src/routes/streaming/hyprnote.rs b/crates/transcribe-proxy/src/routes/streaming/hyprnote.rs
index 9969c474d8..8154adf137 100644
--- a/crates/transcribe-proxy/src/routes/streaming/hyprnote.rs
+++ b/crates/transcribe-proxy/src/routes/streaming/hyprnote.rs
@@ -1,7 +1,7 @@
use std::sync::Arc;
use owhisper_client::{
- AssemblyAIAdapter, Auth, DashScopeAdapter, DeepgramAdapter, ElevenLabsAdapter,
+ AssemblyAIAdapter, Auth, BedrockAdapter, DashScopeAdapter, DeepgramAdapter, ElevenLabsAdapter,
FireworksAdapter, GladiaAdapter, MistralAdapter, OpenAIAdapter, Provider, RealtimeSttAdapter,
SonioxAdapter,
};
@@ -47,6 +47,7 @@ fn build_upstream_url_with_adapter(
Provider::ElevenLabs => ElevenLabsAdapter.build_ws_url(api_base, params, channels),
Provider::DashScope => DashScopeAdapter.build_ws_url(api_base, params, channels),
Provider::Mistral => MistralAdapter::default().build_ws_url(api_base, params, channels),
+ Provider::Bedrock => BedrockAdapter.build_ws_url(api_base, params, channels),
}
}
@@ -66,6 +67,7 @@ fn build_initial_message_with_adapter(
Provider::ElevenLabs => ElevenLabsAdapter.initial_message(api_key, params, channels),
Provider::DashScope => DashScopeAdapter.initial_message(api_key, params, channels),
Provider::Mistral => MistralAdapter::default().initial_message(api_key, params, channels),
+ Provider::Bedrock => BedrockAdapter.initial_message(api_key, params, channels),
};
msg.and_then(|m| match m {
@@ -89,6 +91,7 @@ fn build_response_transformer(
Provider::ElevenLabs => ElevenLabsAdapter.parse_response(raw),
Provider::DashScope => DashScopeAdapter.parse_response(raw),
Provider::Mistral => mistral_adapter.parse_response(raw),
+ Provider::Bedrock => BedrockAdapter.parse_response(raw),
};
if responses.is_empty() {
diff --git a/crates/transcribe-proxy/tests/common/mod.rs b/crates/transcribe-proxy/tests/common/mod.rs
index ca3bad2b6c..e0c00ca01c 100644
--- a/crates/transcribe-proxy/tests/common/mod.rs
+++ b/crates/transcribe-proxy/tests/common/mod.rs
@@ -87,6 +87,7 @@ pub fn env_with_provider(provider: Provider, api_key: String) -> transcribe_prox
Provider::ElevenLabs => env.stt.elevenlabs_api_key = Some(api_key),
Provider::DashScope => env.stt.dashscope_api_key = Some(api_key),
Provider::Mistral => env.stt.mistral_api_key = Some(api_key),
+ Provider::Bedrock => env.stt.bedrock_api_key = Some(api_key),
}
env
}