diff --git a/fern/pages/02-speech-to-text/universal-streaming/universal-streaming-keyterms.mdx b/fern/pages/02-speech-to-text/universal-streaming/universal-streaming-keyterms.mdx
index d0586f8c..7da8fb76 100644
--- a/fern/pages/02-speech-to-text/universal-streaming/universal-streaming-keyterms.mdx
+++ b/fern/pages/02-speech-to-text/universal-streaming/universal-streaming-keyterms.mdx
@@ -14,6 +14,14 @@ Keyterms Prompting costs an additional $0.04/hour.
Firstly, install the required dependencies.
+
+
+```bash
+pip install assemblyai
+```
+
+
+
```bash
@@ -22,6 +30,34 @@ pip install websocket-client pyaudio
+
+
+```bash
+npm install assemblyai node-record-lpcm16
+```
+
+
+ The module `node-record-lpcm16` requires [SoX](http://sox.sourceforge.net/) and it must be available in your `$PATH`.
+
+ For Mac OS:
+
+ ```bash
+ brew install sox
+ ```
+
+ For most linux disto's:
+
+ ```bash
+ sudo apt-get install sox libsox-fmt-all
+ ```
+
+ For Windows:
+
+ [download the binaries](http://sourceforge.net/projects/sox/files/latest/download)
+
+
+
+
```bash
@@ -34,6 +70,88 @@ npm install ws mic
+
+
+```python {15}
+import logging
+from typing import Type
+
+import assemblyai as aai
+from assemblyai.streaming.v3 import (
+ BeginEvent,
+ StreamingClient,
+ StreamingClientOptions,
+ StreamingError,
+ StreamingEvents,
+ StreamingParameters,
+ TerminationEvent,
+ TurnEvent,
+)
+
+api_key = ""
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+def on_begin(self: Type[StreamingClient], event: BeginEvent):
+ print(f"Session started: {event.id}")
+
+
+def on_turn(self: Type[StreamingClient], event: TurnEvent):
+ if event.turn_is_formatted:
+ # Clear the line and print formatted final transcript on new line
+ print(f"\r{' ' * 100}\r{event.transcript}")
+ else:
+ # Overwrite current line with partial unformatted transcript
+ print(f"\r{event.transcript}", end='', flush=True)
+
+
+def on_terminated(self: Type[StreamingClient], event: TerminationEvent):
+ print(
+ f"Session terminated: {event.audio_duration_seconds} seconds of audio processed"
+ )
+
+
+def on_error(self: Type[StreamingClient], error: StreamingError):
+ print(f"Error occurred: {error}")
+
+
+def main():
+ client = StreamingClient(
+ StreamingClientOptions(
+ api_key=api_key,
+ api_host="streaming.assemblyai.com",
+ )
+ )
+
+ client.on(StreamingEvents.Begin, on_begin)
+ client.on(StreamingEvents.Turn, on_turn)
+ client.on(StreamingEvents.Termination, on_terminated)
+ client.on(StreamingEvents.Error, on_error)
+
+ client.connect(
+ StreamingParameters(
+ sample_rate=16000,
+ format_turns=True,
+ keyterms_prompt=["Keanu Reeves", "AssemblyAI", "Universal-2"],
+ )
+ )
+
+ try:
+ client.stream(
+ aai.extras.MicrophoneStream(sample_rate=16000)
+ )
+ finally:
+ client.disconnect(terminate=True)
+
+
+if __name__ == "__main__":
+ main()
+```
+
+
+
```python {16}
@@ -42,7 +160,6 @@ import websocket
import json
import threading
import time
-import wave
from urllib.parse import urlencode
from datetime import datetime
@@ -70,10 +187,6 @@ ws_app = None
audio_thread = None
stop_event = threading.Event() # To signal the audio thread to stop
-# WAV recording variables
-recorded_frames = [] # Store audio frames for WAV file
-recording_lock = threading.Lock() # Thread-safe access to recorded_frames
-
# --- WebSocket Event Handlers ---
@@ -90,10 +203,6 @@ def on_open(ws):
try:
audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
- # Store audio data for WAV recording
- with recording_lock:
- recorded_frames.append(audio_data)
-
# Send audio data as binary message
ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
except Exception as e:
@@ -148,9 +257,6 @@ def on_close(ws, close_status_code, close_msg):
"""Called when the WebSocket connection is closed."""
print(f"\nWebSocket Disconnected: Status={close_status_code}, Msg={close_msg}")
- # Save recorded audio to WAV file
- save_wav_file()
-
# Ensure audio resources are released
global stream, audio
stop_event.set() # Signal audio thread just in case it's still running
@@ -168,33 +274,6 @@ def on_close(ws, close_status_code, close_msg):
audio_thread.join(timeout=1.0)
-def save_wav_file():
- """Save recorded audio frames to a WAV file."""
- if not recorded_frames:
- print("No audio data recorded.")
- return
-
- # Generate filename with timestamp
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
- filename = f"recorded_audio_{timestamp}.wav"
-
- try:
- with wave.open(filename, 'wb') as wf:
- wf.setnchannels(CHANNELS)
- wf.setsampwidth(2) # 16-bit = 2 bytes
- wf.setframerate(SAMPLE_RATE)
-
- # Write all recorded frames
- with recording_lock:
- wf.writeframes(b''.join(recorded_frames))
-
- print(f"Audio saved to: {filename}")
- print(f"Duration: {len(recorded_frames) * FRAMES_PER_BUFFER / SAMPLE_RATE:.2f} seconds")
-
- except Exception as e:
- print(f"Error saving WAV file: {e}")
-
-
# --- Main Execution ---
def run():
global audio, stream, ws_app
@@ -213,7 +292,6 @@ def run():
)
print("Microphone stream opened successfully.")
print("Speak into your microphone. Press Ctrl+C to stop.")
- print("Audio will be saved to a WAV file when the session ends.")
except Exception as e:
print(f"Error opening microphone stream: {e}")
if audio:
@@ -291,7 +369,6 @@ if __name__ == "__main__":
const WebSocket = require("ws");
const mic = require("mic");
const querystring = require("querystring");
-const fs = require("fs");
// --- Configuration ---
const YOUR_API_KEY = "YOUR-API-KEY"; // Replace with your actual API key
@@ -317,78 +394,14 @@ let micInputStream = null;
let ws = null;
let stopRequested = false;
-// WAV recording variables
-let recordedFrames = []; // Store audio frames for WAV file
-
// --- Helper functions ---
-function clearLine() {
- process.stdout.write("\r" + " ".repeat(80) + "\r");
-}
-
function formatTimestamp(timestamp) {
return new Date(timestamp * 1000).toISOString();
}
-function createWavHeader(sampleRate, channels, dataLength) {
- const buffer = Buffer.alloc(44);
-
- // RIFF header
- buffer.write("RIFF", 0);
- buffer.writeUInt32LE(36 + dataLength, 4);
- buffer.write("WAVE", 8);
-
- // fmt chunk
- buffer.write("fmt ", 12);
- buffer.writeUInt32LE(16, 16); // fmt chunk size
- buffer.writeUInt16LE(1, 20); // PCM format
- buffer.writeUInt16LE(channels, 22);
- buffer.writeUInt32LE(sampleRate, 24);
- buffer.writeUInt32LE(sampleRate * channels * 2, 28); // byte rate
- buffer.writeUInt16LE(channels * 2, 32); // block align
- buffer.writeUInt16LE(16, 34); // bits per sample
-
- // data chunk
- buffer.write("data", 36);
- buffer.writeUInt32LE(dataLength, 40);
-
- return buffer;
-}
-
-function saveWavFile() {
- if (recordedFrames.length === 0) {
- console.log("No audio data recorded.");
- return;
- }
-
- // Generate filename with timestamp
- const timestamp = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
- const filename = `recorded_audio_${timestamp}.wav`;
-
- try {
- // Combine all recorded frames
- const audioData = Buffer.concat(recordedFrames);
- const dataLength = audioData.length;
-
- // Create WAV header
- const wavHeader = createWavHeader(SAMPLE_RATE, CHANNELS, dataLength);
-
- // Write WAV file
- const wavFile = Buffer.concat([wavHeader, audioData]);
- fs.writeFileSync(filename, wavFile);
-
- console.log(`Audio saved to: ${filename}`);
- console.log(
- `Duration: ${(dataLength / (SAMPLE_RATE * CHANNELS * 2)).toFixed(2)} seconds`
- );
- } catch (error) {
- console.error(`Error saving WAV file: ${error}`);
- }
-}
-
// --- Main function ---
async function run() {
console.log("Starting AssemblyAI real-time transcription...");
- console.log("Audio will be saved to a WAV file when the session ends.");
// Initialize WebSocket connection
ws = new WebSocket(API_ENDPOINT, {
@@ -421,9 +434,11 @@ async function run() {
const formatted = data.turn_is_formatted;
if (formatted) {
- clearLine();
+ // Clear the line and print formatted final transcript on new line
+ process.stdout.write("\r" + " ".repeat(100) + "\r");
console.log(transcript);
} else {
+ // Overwrite current line with partial unformatted transcript
process.stdout.write(`\r${transcript}`);
}
} else if (msgType === "Termination") {
@@ -466,9 +481,6 @@ function startMicrophone() {
micInputStream.on("data", (data) => {
if (ws && ws.readyState === WebSocket.OPEN && !stopRequested) {
- // Store audio data for WAV recording
- recordedFrames.push(Buffer.from(data));
-
// Send audio data to WebSocket
ws.send(data);
}
@@ -491,9 +503,6 @@ function startMicrophone() {
function cleanup() {
stopRequested = true;
- // Save recorded audio to WAV file
- saveWavFile();
-
// Stop microphone if it's running
if (micInstance) {
try {
@@ -556,6 +565,84 @@ run();
+
+
+```javascript {10}
+import { Readable } from 'stream'
+import { AssemblyAI } from 'assemblyai'
+import recorder from 'node-record-lpcm16'
+
+const run = async () => {
+ const client = new AssemblyAI({
+ apiKey: "",
+ });
+
+ const transcriber = client.streaming.transcriber({
+ sampleRate: 16_000,
+ formatTurns: true,
+ keytermsPrompt: ["Keanu Reeves", "AssemblyAI", "Universal-2"]
+ });
+
+ transcriber.on("open", ({ id }) => {
+ console.log(`Session opened with ID: ${id}`);
+ });
+
+ transcriber.on("error", (error) => {
+ console.error("Error:", error);
+ });
+
+ transcriber.on("close", (code, reason) =>
+ console.log("Session closed:", code, reason),
+ );
+
+ transcriber.on("turn", (turn) => {
+ if (turn.turn_is_formatted) {
+ // Clear the line and print formatted final transcript on new line
+ process.stdout.write("\r" + " ".repeat(100) + "\r");
+ console.log(turn.transcript);
+ } else {
+ // Overwrite current line with partial unformatted transcript
+ process.stdout.write(`\r${turn.transcript}`);
+ }
+ });
+
+ try {
+ console.log("Connecting to streaming transcript service");
+
+ await transcriber.connect();
+
+ console.log("Starting recording");
+
+ const recording = recorder.record({
+ channels: 1,
+ sampleRate: 16_000,
+ audioType: "wav", // Linear PCM
+ });
+
+ Readable.toWeb(recording.stream()).pipeTo(transcriber.stream());
+
+ // Stop recording and close connection using Ctrl-C.
+
+ process.on("SIGINT", async function () {
+ console.log();
+ console.log("Stopping recording");
+ recording.stop();
+
+ console.log("Closing streaming transcript connection");
+ await transcriber.close();
+
+ process.exit();
+ });
+ } catch (error) {
+ console.error(error);
+ }
+};
+
+run();
+```
+
+
+
## Configuration
@@ -589,6 +676,18 @@ To update keyterms while streaming, send an `UpdateConfiguration` message with a
+
+
+```python
+# Replace or establish new set of keyterms
+client.update_configuration(keyterms_prompt=["Universal-3"])
+
+# Remove keyterms and reset context biasing
+client.update_configuration(keyterms_prompt=[])
+```
+
+
+
```python
@@ -601,6 +700,18 @@ websocket.send('{"type": "UpdateConfiguration", "keyterms_prompt": []}')
+
+
+```javascript
+// Replace or establish new set of keyterms
+transcriber.updateConfiguration({ keytermsPrompt: ["Universal-3"] });
+
+// Remove keyterms and reset context biasing
+transcriber.updateConfiguration({ keytermsPrompt: [] });
+```
+
+
+
```javascript