From 7cc57b1b35cccd18e69879ab22fbb7cb2ce64385 Mon Sep 17 00:00:00 2001 From: gsharp-aai Date: Wed, 24 Dec 2025 14:43:05 -0800 Subject: [PATCH] Add SDK code --- .../universal-streaming-keyterms.mdx | 335 ++++++++++++------ 1 file changed, 223 insertions(+), 112 deletions(-) diff --git a/fern/pages/02-speech-to-text/universal-streaming/universal-streaming-keyterms.mdx b/fern/pages/02-speech-to-text/universal-streaming/universal-streaming-keyterms.mdx index d0586f8c..7da8fb76 100644 --- a/fern/pages/02-speech-to-text/universal-streaming/universal-streaming-keyterms.mdx +++ b/fern/pages/02-speech-to-text/universal-streaming/universal-streaming-keyterms.mdx @@ -14,6 +14,14 @@ Keyterms Prompting costs an additional $0.04/hour. Firstly, install the required dependencies. + + +```bash +pip install assemblyai +``` + + + ```bash @@ -22,6 +30,34 @@ pip install websocket-client pyaudio + + +```bash +npm install assemblyai node-record-lpcm16 +``` + + + The module `node-record-lpcm16` requires [SoX](http://sox.sourceforge.net/) and it must be available in your `$PATH`. + + For Mac OS: + + ```bash + brew install sox + ``` + + For most linux disto's: + + ```bash + sudo apt-get install sox libsox-fmt-all + ``` + + For Windows: + + [download the binaries](http://sourceforge.net/projects/sox/files/latest/download) + + + + ```bash @@ -34,6 +70,88 @@ npm install ws mic + + +```python {15} +import logging +from typing import Type + +import assemblyai as aai +from assemblyai.streaming.v3 import ( + BeginEvent, + StreamingClient, + StreamingClientOptions, + StreamingError, + StreamingEvents, + StreamingParameters, + TerminationEvent, + TurnEvent, +) + +api_key = "" + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def on_begin(self: Type[StreamingClient], event: BeginEvent): + print(f"Session started: {event.id}") + + +def on_turn(self: Type[StreamingClient], event: TurnEvent): + if event.turn_is_formatted: + # Clear the line and print formatted final transcript on new line + print(f"\r{' ' * 100}\r{event.transcript}") + else: + # Overwrite current line with partial unformatted transcript + print(f"\r{event.transcript}", end='', flush=True) + + +def on_terminated(self: Type[StreamingClient], event: TerminationEvent): + print( + f"Session terminated: {event.audio_duration_seconds} seconds of audio processed" + ) + + +def on_error(self: Type[StreamingClient], error: StreamingError): + print(f"Error occurred: {error}") + + +def main(): + client = StreamingClient( + StreamingClientOptions( + api_key=api_key, + api_host="streaming.assemblyai.com", + ) + ) + + client.on(StreamingEvents.Begin, on_begin) + client.on(StreamingEvents.Turn, on_turn) + client.on(StreamingEvents.Termination, on_terminated) + client.on(StreamingEvents.Error, on_error) + + client.connect( + StreamingParameters( + sample_rate=16000, + format_turns=True, + keyterms_prompt=["Keanu Reeves", "AssemblyAI", "Universal-2"], + ) + ) + + try: + client.stream( + aai.extras.MicrophoneStream(sample_rate=16000) + ) + finally: + client.disconnect(terminate=True) + + +if __name__ == "__main__": + main() +``` + + + ```python {16} @@ -42,7 +160,6 @@ import websocket import json import threading import time -import wave from urllib.parse import urlencode from datetime import datetime @@ -70,10 +187,6 @@ ws_app = None audio_thread = None stop_event = threading.Event() # To signal the audio thread to stop -# WAV recording variables -recorded_frames = [] # Store audio frames for WAV file -recording_lock = threading.Lock() # Thread-safe access to recorded_frames - # --- WebSocket Event Handlers --- @@ -90,10 +203,6 @@ def on_open(ws): try: audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False) - # Store audio data for WAV recording - with recording_lock: - recorded_frames.append(audio_data) - # Send audio data as binary message ws.send(audio_data, websocket.ABNF.OPCODE_BINARY) except Exception as e: @@ -148,9 +257,6 @@ def on_close(ws, close_status_code, close_msg): """Called when the WebSocket connection is closed.""" print(f"\nWebSocket Disconnected: Status={close_status_code}, Msg={close_msg}") - # Save recorded audio to WAV file - save_wav_file() - # Ensure audio resources are released global stream, audio stop_event.set() # Signal audio thread just in case it's still running @@ -168,33 +274,6 @@ def on_close(ws, close_status_code, close_msg): audio_thread.join(timeout=1.0) -def save_wav_file(): - """Save recorded audio frames to a WAV file.""" - if not recorded_frames: - print("No audio data recorded.") - return - - # Generate filename with timestamp - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - filename = f"recorded_audio_{timestamp}.wav" - - try: - with wave.open(filename, 'wb') as wf: - wf.setnchannels(CHANNELS) - wf.setsampwidth(2) # 16-bit = 2 bytes - wf.setframerate(SAMPLE_RATE) - - # Write all recorded frames - with recording_lock: - wf.writeframes(b''.join(recorded_frames)) - - print(f"Audio saved to: {filename}") - print(f"Duration: {len(recorded_frames) * FRAMES_PER_BUFFER / SAMPLE_RATE:.2f} seconds") - - except Exception as e: - print(f"Error saving WAV file: {e}") - - # --- Main Execution --- def run(): global audio, stream, ws_app @@ -213,7 +292,6 @@ def run(): ) print("Microphone stream opened successfully.") print("Speak into your microphone. Press Ctrl+C to stop.") - print("Audio will be saved to a WAV file when the session ends.") except Exception as e: print(f"Error opening microphone stream: {e}") if audio: @@ -291,7 +369,6 @@ if __name__ == "__main__": const WebSocket = require("ws"); const mic = require("mic"); const querystring = require("querystring"); -const fs = require("fs"); // --- Configuration --- const YOUR_API_KEY = "YOUR-API-KEY"; // Replace with your actual API key @@ -317,78 +394,14 @@ let micInputStream = null; let ws = null; let stopRequested = false; -// WAV recording variables -let recordedFrames = []; // Store audio frames for WAV file - // --- Helper functions --- -function clearLine() { - process.stdout.write("\r" + " ".repeat(80) + "\r"); -} - function formatTimestamp(timestamp) { return new Date(timestamp * 1000).toISOString(); } -function createWavHeader(sampleRate, channels, dataLength) { - const buffer = Buffer.alloc(44); - - // RIFF header - buffer.write("RIFF", 0); - buffer.writeUInt32LE(36 + dataLength, 4); - buffer.write("WAVE", 8); - - // fmt chunk - buffer.write("fmt ", 12); - buffer.writeUInt32LE(16, 16); // fmt chunk size - buffer.writeUInt16LE(1, 20); // PCM format - buffer.writeUInt16LE(channels, 22); - buffer.writeUInt32LE(sampleRate, 24); - buffer.writeUInt32LE(sampleRate * channels * 2, 28); // byte rate - buffer.writeUInt16LE(channels * 2, 32); // block align - buffer.writeUInt16LE(16, 34); // bits per sample - - // data chunk - buffer.write("data", 36); - buffer.writeUInt32LE(dataLength, 40); - - return buffer; -} - -function saveWavFile() { - if (recordedFrames.length === 0) { - console.log("No audio data recorded."); - return; - } - - // Generate filename with timestamp - const timestamp = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19); - const filename = `recorded_audio_${timestamp}.wav`; - - try { - // Combine all recorded frames - const audioData = Buffer.concat(recordedFrames); - const dataLength = audioData.length; - - // Create WAV header - const wavHeader = createWavHeader(SAMPLE_RATE, CHANNELS, dataLength); - - // Write WAV file - const wavFile = Buffer.concat([wavHeader, audioData]); - fs.writeFileSync(filename, wavFile); - - console.log(`Audio saved to: ${filename}`); - console.log( - `Duration: ${(dataLength / (SAMPLE_RATE * CHANNELS * 2)).toFixed(2)} seconds` - ); - } catch (error) { - console.error(`Error saving WAV file: ${error}`); - } -} - // --- Main function --- async function run() { console.log("Starting AssemblyAI real-time transcription..."); - console.log("Audio will be saved to a WAV file when the session ends."); // Initialize WebSocket connection ws = new WebSocket(API_ENDPOINT, { @@ -421,9 +434,11 @@ async function run() { const formatted = data.turn_is_formatted; if (formatted) { - clearLine(); + // Clear the line and print formatted final transcript on new line + process.stdout.write("\r" + " ".repeat(100) + "\r"); console.log(transcript); } else { + // Overwrite current line with partial unformatted transcript process.stdout.write(`\r${transcript}`); } } else if (msgType === "Termination") { @@ -466,9 +481,6 @@ function startMicrophone() { micInputStream.on("data", (data) => { if (ws && ws.readyState === WebSocket.OPEN && !stopRequested) { - // Store audio data for WAV recording - recordedFrames.push(Buffer.from(data)); - // Send audio data to WebSocket ws.send(data); } @@ -491,9 +503,6 @@ function startMicrophone() { function cleanup() { stopRequested = true; - // Save recorded audio to WAV file - saveWavFile(); - // Stop microphone if it's running if (micInstance) { try { @@ -556,6 +565,84 @@ run(); + + +```javascript {10} +import { Readable } from 'stream' +import { AssemblyAI } from 'assemblyai' +import recorder from 'node-record-lpcm16' + +const run = async () => { + const client = new AssemblyAI({ + apiKey: "", + }); + + const transcriber = client.streaming.transcriber({ + sampleRate: 16_000, + formatTurns: true, + keytermsPrompt: ["Keanu Reeves", "AssemblyAI", "Universal-2"] + }); + + transcriber.on("open", ({ id }) => { + console.log(`Session opened with ID: ${id}`); + }); + + transcriber.on("error", (error) => { + console.error("Error:", error); + }); + + transcriber.on("close", (code, reason) => + console.log("Session closed:", code, reason), + ); + + transcriber.on("turn", (turn) => { + if (turn.turn_is_formatted) { + // Clear the line and print formatted final transcript on new line + process.stdout.write("\r" + " ".repeat(100) + "\r"); + console.log(turn.transcript); + } else { + // Overwrite current line with partial unformatted transcript + process.stdout.write(`\r${turn.transcript}`); + } + }); + + try { + console.log("Connecting to streaming transcript service"); + + await transcriber.connect(); + + console.log("Starting recording"); + + const recording = recorder.record({ + channels: 1, + sampleRate: 16_000, + audioType: "wav", // Linear PCM + }); + + Readable.toWeb(recording.stream()).pipeTo(transcriber.stream()); + + // Stop recording and close connection using Ctrl-C. + + process.on("SIGINT", async function () { + console.log(); + console.log("Stopping recording"); + recording.stop(); + + console.log("Closing streaming transcript connection"); + await transcriber.close(); + + process.exit(); + }); + } catch (error) { + console.error(error); + } +}; + +run(); +``` + + + ## Configuration @@ -589,6 +676,18 @@ To update keyterms while streaming, send an `UpdateConfiguration` message with a + + +```python +# Replace or establish new set of keyterms +client.update_configuration(keyterms_prompt=["Universal-3"]) + +# Remove keyterms and reset context biasing +client.update_configuration(keyterms_prompt=[]) +``` + + + ```python @@ -601,6 +700,18 @@ websocket.send('{"type": "UpdateConfiguration", "keyterms_prompt": []}') + + +```javascript +// Replace or establish new set of keyterms +transcriber.updateConfiguration({ keytermsPrompt: ["Universal-3"] }); + +// Remove keyterms and reset context biasing +transcriber.updateConfiguration({ keytermsPrompt: [] }); +``` + + + ```javascript