Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions apps/desktop/src/components/settings/ai/stt/shared.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,16 @@ const _PROVIDERS = [
models: ["voxtral-mini-2602"],
requirements: [{ kind: "requires_config", fields: ["api_key"] }],
},
{
disabled: false,
id: "bedrock",
displayName: "Amazon Bedrock",
badge: "Beta",
icon: <Icon icon="simple-icons:amazonaws" className="size-4" />,
baseUrl: "https://bedrock-mantle.us-east-1.api.aws/v1",
models: ["openai.gpt-4o-transcribe", "openai.whisper-1"],
requirements: [{ kind: "requires_config", fields: ["api_key"] }],
},
{
disabled: false,
id: "custom",
Expand Down
8 changes: 7 additions & 1 deletion crates/listener-core/src/actors/listener/adapters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use bytes::Bytes;
use ractor::{ActorProcessingErr, ActorRef};

use owhisper_client::{
AdapterKind, ArgmaxAdapter, AssemblyAIAdapter, CactusAdapter, DashScopeAdapter,
AdapterKind, ArgmaxAdapter, AssemblyAIAdapter, BedrockAdapter, CactusAdapter, DashScopeAdapter,
DeepgramAdapter, ElevenLabsAdapter, FireworksAdapter, GladiaAdapter, HyprnoteAdapter,
MistralAdapter, OpenAIAdapter, RealtimeSttAdapter, SonioxAdapter,
};
Expand Down Expand Up @@ -95,6 +95,12 @@ pub(super) async fn spawn_rx_task(
(AdapterKind::Mistral, true) => {
spawn_rx_task_dual_with_adapter::<MistralAdapter>(args, myself).await
}
(AdapterKind::Bedrock, false) => {
spawn_rx_task_single_with_adapter::<BedrockAdapter>(args, myself).await
}
(AdapterKind::Bedrock, true) => {
spawn_rx_task_dual_with_adapter::<BedrockAdapter>(args, myself).await
}
(AdapterKind::Hyprnote, false) => {
spawn_rx_task_single_with_adapter::<HyprnoteAdapter>(args, myself).await
}
Expand Down
3 changes: 2 additions & 1 deletion crates/listener2-core/src/batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::time::Duration;

use futures_util::StreamExt;
use owhisper_client::{
AdapterKind, ArgmaxAdapter, AssemblyAIAdapter, BatchSttAdapter, CactusAdapter,
AdapterKind, ArgmaxAdapter, AssemblyAIAdapter, BatchSttAdapter, BedrockAdapter, CactusAdapter,
DashScopeAdapter, DeepgramAdapter, ElevenLabsAdapter, FireworksAdapter, GladiaAdapter,
HyprnoteAdapter, MistralAdapter, OpenAIAdapter, RealtimeSttAdapter, SonioxAdapter,
};
Expand Down Expand Up @@ -390,6 +390,7 @@ async fn spawn_batch_task(
spawn_batch_task_with_adapter::<DashScopeAdapter>(args, myself).await
}
AdapterKind::Mistral => spawn_batch_task_with_adapter::<MistralAdapter>(args, myself).await,
AdapterKind::Bedrock => spawn_batch_task_with_adapter::<BedrockAdapter>(args, myself).await,
AdapterKind::Hyprnote => {
spawn_batch_task_with_adapter::<HyprnoteAdapter>(args, myself).await
}
Expand Down
1 change: 1 addition & 0 deletions crates/listener2-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ pub fn suggest_providers_for_languages_batch(languages: &[hypr_language::Languag
AdapterKind::ElevenLabs,
AdapterKind::DashScope,
AdapterKind::Mistral,
AdapterKind::Bedrock,
];

let mut with_support: Vec<_> = all_providers
Expand Down
164 changes: 164 additions & 0 deletions crates/owhisper-client/src/adapter/bedrock/batch.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
use std::path::Path;

use owhisper_interface::ListenParams;
use owhisper_interface::batch::{
Alternatives as BatchAlternatives, Channel as BatchChannel, Response as BatchResponse,
Results as BatchResults, Word,
};
use serde::Deserialize;

use super::BedrockAdapter;
use crate::adapter::http::mime_type_from_extension;
use crate::adapter::{BatchFuture, BatchSttAdapter, ClientWithMiddleware};
use crate::error::Error;
use crate::providers::{Provider, is_meta_model};

// Amazon Bedrock supports OpenAI-compatible audio transcription via
// the bedrock-mantle endpoint: POST /v1/audio/transcriptions
// https://docs.aws.amazon.com/bedrock/latest/userguide/apis.html
impl BatchSttAdapter for BedrockAdapter {
fn is_supported_languages(
&self,
languages: &[hypr_language::Language],
_model: Option<&str>,
) -> bool {
BedrockAdapter::language_support_batch(languages).is_supported()
}

fn transcribe_file<'a, P: AsRef<Path> + Send + 'a>(
&'a self,
client: &'a ClientWithMiddleware,
api_base: &'a str,
api_key: &'a str,
params: &'a ListenParams,
file_path: P,
) -> BatchFuture<'a> {
let path = file_path.as_ref().to_path_buf();
Box::pin(async move { do_transcribe_file(client, api_base, api_key, params, &path).await })
}
}

#[derive(Debug, Deserialize)]
struct BedrockWord {
word: String,
start: f64,
end: f64,
}

#[derive(Debug, Deserialize)]
struct BedrockTranscriptionResponse {
text: String,
#[serde(default)]
words: Option<Vec<BedrockWord>>,
#[serde(default)]
#[allow(dead_code)]
language: Option<String>,
#[serde(default)]
#[allow(dead_code)]
duration: Option<f64>,
}

async fn do_transcribe_file(
client: &ClientWithMiddleware,
api_base: &str,
api_key: &str,
params: &ListenParams,
file_path: &Path,
) -> Result<BatchResponse, Error> {
let fallback_name = match file_path.extension().and_then(|e| e.to_str()) {
Some(ext) => format!("audio.{}", ext),
None => "audio".to_string(),
};

let file_name = file_path
.file_name()
.and_then(|n| n.to_str())
.map(ToOwned::to_owned)
.unwrap_or(fallback_name);

let file_bytes = tokio::fs::read(file_path)
.await
.map_err(|e| Error::AudioProcessing(e.to_string()))?;

let mime_type = mime_type_from_extension(file_path);

let file_part = reqwest::multipart::Part::bytes(file_bytes)
.file_name(file_name)
.mime_str(mime_type)
.map_err(|e| Error::AudioProcessing(e.to_string()))?;

let default = Provider::Bedrock.default_batch_model();
let model = match params.model.as_deref() {
Some(m) if is_meta_model(m) => default,
Some(m) => m,
None => default,
};

let mut form = reqwest::multipart::Form::new()
.part("file", file_part)
.text("model", model.to_string());

form = form.text("response_format", "verbose_json");
form = form.text("timestamp_granularities[]", "word");

if let Some(lang) = params.languages.first() {
form = form.text("language", lang.iso639().code().to_string());
}

let base = if api_base.is_empty() {
Provider::Bedrock.default_api_base()
} else {
api_base.trim_end_matches('/')
};
let url = format!("{}/audio/transcriptions", base);

let response = client
.post(&url)
.header("Authorization", format!("Bearer {}", api_key))
.multipart(form)
.send()
.await?;

let status = response.status();
if !status.is_success() {
let body = response.text().await.unwrap_or_default();
return Err(Error::UnexpectedStatus { status, body });
}

let bedrock_response: BedrockTranscriptionResponse = response.json().await?;

let words: Vec<Word> = bedrock_response
.words
.unwrap_or_default()
.into_iter()
.map(|w| Word {
word: w.word.clone(),
start: w.start,
end: w.end,
confidence: 1.0,
speaker: None,
punctuated_word: Some(w.word),
})
.collect();

let alternatives = BatchAlternatives {
transcript: bedrock_response.text.trim().to_string(),
confidence: 1.0,
words,
};

let channel = BatchChannel {
alternatives: vec![alternatives],
};

let metadata = serde_json::json!({
"language": bedrock_response.language,
});

Ok(BatchResponse {
metadata,
results: BatchResults {
channels: vec![channel],
},
})
}
Loading