From 6040f0b0a00aaa1678625d325bd70584f45d1157 Mon Sep 17 00:00:00 2001 From: gsharp-aai Date: Wed, 24 Dec 2025 10:43:56 -0800 Subject: [PATCH 1/2] Add code --- .../universal-streaming/multilingual.mdx | 244 +++++++++++++++++- .../meeting-notetaker-best-practices.mdx | 5 +- .../voice-agent-best-practices.mdx | 3 +- 3 files changed, 245 insertions(+), 7 deletions(-) diff --git a/fern/pages/02-speech-to-text/universal-streaming/multilingual.mdx b/fern/pages/02-speech-to-text/universal-streaming/multilingual.mdx index 3184d795..46580d29 100644 --- a/fern/pages/02-speech-to-text/universal-streaming/multilingual.mdx +++ b/fern/pages/02-speech-to-text/universal-streaming/multilingual.mdx @@ -94,12 +94,52 @@ In the future, this built-in formatting capability will be extended to our Engli Firstly, install the required dependencies. + + +```bash +pip install assemblyai +``` + + + ```bash pip install websockets pyaudio ``` + + The Python example uses the `websockets` library. If you're using `websockets` version 13.0 or later, use `additional_headers` parameter. For older versions (< 13.0), use `extra_headers` instead. + + + + + + +```bash +npm install assemblyai node-record-lpcm16 +``` + + + The module `node-record-lpcm16` requires [SoX](http://sox.sourceforge.net/) and it must be available in your `$PATH`. + + For Mac OS: + + ```bash + brew install sox + ``` + + For most linux disto's: + + ```bash + sudo apt-get install sox libsox-fmt-all + ``` + + For Windows: + + [download the binaries](http://sourceforge.net/projects/sox/files/latest/download) + + @@ -114,6 +154,97 @@ npm install ws mic + + +```python {27} +import logging +from typing import Type + +import assemblyai as aai +from assemblyai.streaming.v3 import ( + BeginEvent, + StreamingClient, + StreamingClientOptions, + StreamingError, + StreamingEvents, + StreamingParameters, + TerminationEvent, + TurnEvent, +) + +api_key = "" + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def on_begin(self: Type[StreamingClient], event: BeginEvent): + print(f"Connecting websocket to url") + print(f"Session started: {event.id}") + print(f"Receiving SessionBegins ...") + print(f"Sending messages ...") + + +def on_turn(self: Type[StreamingClient], event: TurnEvent): + if not event.end_of_turn and event.transcript: + print(f"[PARTIAL TURN TRANSCRIPT]: {event.transcript}") + if event.utterance: + print(f"[PARTIAL TURN UTTERANCE]: {event.utterance}") + # Display language detection info if available + if event.language_code: + print(f"[UTTERANCE LANGUAGE DETECTION]: {event.language_code} - {event.language_confidence:.2%}") + if event.end_of_turn: + print(f"[FULL TURN TRANSCRIPT]: {event.transcript}") + # Display language detection info if available + if event.language_code: + print(f"[END OF TURN LANGUAGE DETECTION]: {event.language_code} - {event.language_confidence:.2%}") + + +def on_terminated(self: Type[StreamingClient], event: TerminationEvent): + print( + f"Session terminated: {event.audio_duration_seconds} seconds of audio processed" + ) + + +def on_error(self: Type[StreamingClient], error: StreamingError): + print(f"Error occurred: {error}") + + +def main(): + client = StreamingClient( + StreamingClientOptions( + api_key=api_key, + api_host="streaming.assemblyai.com", + ) + ) + + client.on(StreamingEvents.Begin, on_begin) + client.on(StreamingEvents.Turn, on_turn) + client.on(StreamingEvents.Termination, on_terminated) + client.on(StreamingEvents.Error, on_error) + + client.connect( + StreamingParameters( + sample_rate=48000, + speech_model="universal-streaming-multilingual", + language_detection=True, + ) + ) + + try: + client.stream( + aai.extras.MicrophoneStream(sample_rate=48000) + ) + finally: + client.disconnect(terminate=True) + + +if __name__ == "__main__": + main() +``` + + + ```python {26} @@ -152,7 +283,7 @@ async def send_receive(): async with websockets.connect( URL, - extra_headers={"Authorization": "YOUR-API-KEY"}, + additional_headers={"Authorization": "YOUR-API-KEY"}, ping_interval=5, ping_timeout=20 ) as _ws: @@ -183,13 +314,18 @@ async def send_receive(): utterance = data['utterance'] if data['type'] == 'Turn': + if not data.get('end_of_turn') and transcript: + print(f"[PARTIAL TURN TRANSCRIPT]: {transcript}") if data.get('utterance'): - print(f"\r[PARTIAL TURN UTTERANCE]: {utterance}") + print(f"[PARTIAL TURN UTTERANCE]: {utterance}") # Display language detection info if available if 'language_code' in data: - print(f"\r[UTTERANCE LANGUAGE DETECTION]: {data['language_code']} - {data['language_confidence']:.2%}") + print(f"[UTTERANCE LANGUAGE DETECTION]: {data['language_code']} - {data['language_confidence']:.2%}") if data.get('end_of_turn'): - print(f"\r[FULL TURN TRANSCRIPT]: {transcript}") + print(f"[FULL TURN TRANSCRIPT]: {transcript}") + # Display language detection info if available + if 'language_code' in data: + print(f"[END OF TURN LANGUAGE DETECTION]: {data['language_code']} - {data['language_confidence']:.2%}") else: pass @@ -348,6 +484,9 @@ async function run() { const transcript = data.transcript || ""; const utterance = data.utterance || ""; + if (!data.end_of_turn && transcript) { + console.log(`[PARTIAL TURN TRANSCRIPT]: ${transcript}`); + } if (data.utterance) { console.log(`[PARTIAL TURN UTTERANCE]: ${utterance}`); // Display language detection info if available @@ -358,6 +497,11 @@ async function run() { } if (data.end_of_turn) { console.log(`[FULL TURN TRANSCRIPT]: ${transcript}`); + // Display language detection info if available + if (data.language_code) { + const langConfidence = (data.language_confidence * 100).toFixed(2); + console.log(`[END OF TURN LANGUAGE DETECTION]: ${data.language_code} - ${langConfidence}%`); + } } } else if (msgType === "Termination") { const audioDuration = data.audio_duration_seconds; @@ -489,4 +633,96 @@ run(); + + +```javascript {11} +import { Readable } from 'stream' +import { AssemblyAI } from 'assemblyai' +import recorder from 'node-record-lpcm16' + +const run = async () => { + const client = new AssemblyAI({ + apiKey: "", + }); + + const transcriber = client.streaming.transcriber({ + sampleRate: 48_000, + speechModel: "universal-streaming-multilingual", + languageDetection: true + }); + + transcriber.on("open", ({ id }) => { + console.log(`Connecting websocket to url`); + console.log(`Session opened with ID: ${id}`); + console.log(`Receiving SessionBegins ...`); + console.log(`Sending messages ...`); + }); + + transcriber.on("error", (error) => { + console.error("Error:", error); + }); + + transcriber.on("close", (code, reason) => + console.log("Session closed:", code, reason), + ); + + transcriber.on("turn", (turn) => { + if (!turn.end_of_turn && turn.transcript) { + console.log(`[PARTIAL TURN TRANSCRIPT]: ${turn.transcript}`); + } + if (turn.utterance) { + console.log(`[PARTIAL TURN UTTERANCE]: ${turn.utterance}`); + // Display language detection info if available + if (turn.language_code) { + const langConfidence = (turn.language_confidence * 100).toFixed(2); + console.log(`[UTTERANCE LANGUAGE DETECTION]: ${turn.language_code} - ${langConfidence}%`); + } + } + if (turn.end_of_turn) { + console.log(`[FULL TURN TRANSCRIPT]: ${turn.transcript}`); + // Display language detection info if available + if (turn.language_code) { + const langConfidence = (turn.language_confidence * 100).toFixed(2); + console.log(`[END OF TURN LANGUAGE DETECTION]: ${turn.language_code} - ${langConfidence}%`); + } + } + }); + + try { + console.log("Connecting to streaming transcript service"); + + await transcriber.connect(); + + console.log("Starting recording"); + + const recording = recorder.record({ + channels: 1, + sampleRate: 48_000, + audioType: "wav", // Linear PCM + }); + + Readable.toWeb(recording.stream()).pipeTo(transcriber.stream()); + + // Stop recording and close connection using Ctrl-C. + + process.on("SIGINT", async function () { + console.log(); + console.log("Stopping recording"); + recording.stop(); + + console.log("Closing streaming transcript connection"); + await transcriber.close(); + + process.exit(); + }); + } catch (error) { + console.error(error); + } +}; + +run(); +``` + + + diff --git a/fern/pages/07-use-cases/meeting-notetaker-best-practices.mdx b/fern/pages/07-use-cases/meeting-notetaker-best-practices.mdx index 25811c32..b44f6550 100644 --- a/fern/pages/07-use-cases/meeting-notetaker-best-practices.mdx +++ b/fern/pages/07-use-cases/meeting-notetaker-best-practices.mdx @@ -610,7 +610,8 @@ class ChannelTranscriber: """Transcribe a single audio channel""" url = f"wss://streaming.assemblyai.com/v3/ws?{urlencode(self.connection_params)}" - async with websockets.connect(url, extra_headers={"Authorization": API_KEY}) as ws: + # If you're using `websockets` version 13.0 or later, use `additional_headers` parameter. For older versions (< 13.0), use `extra_headers` instead. + async with websockets.connect(url, additional_headers={"Authorization": API_KEY}) as ws: # Send audio from this channel only async for audio_chunk in audio_stream: await ws.send(audio_chunk) @@ -1439,7 +1440,7 @@ class StreamingResponseProcessor: # Example usage processor = StreamingResponseProcessor() -async with websockets.connect(API_ENDPOINT, extra_headers=headers) as ws: +async with websockets.connect(API_ENDPOINT, additional_headers=headers) as ws: async for message in ws: data = json.loads(message) result = processor.process_message(data) diff --git a/fern/pages/07-use-cases/voice-agent-best-practices.mdx b/fern/pages/07-use-cases/voice-agent-best-practices.mdx index 83d5f59c..f1b411dd 100644 --- a/fern/pages/07-use-cases/voice-agent-best-practices.mdx +++ b/fern/pages/07-use-cases/voice-agent-best-practices.mdx @@ -223,7 +223,8 @@ async def main(): headers = {"Authorization": API_KEY} try: - async with websockets.connect(API_ENDPOINT, extra_headers=headers) as websocket: + # If you're using `websockets` version 13.0 or later, use `additional_headers` parameter. For older versions (< 13.0), use `extra_headers` instead. + async with websockets.connect(API_ENDPOINT, additional_headers=headers) as websocket: print("✅ Connected to Universal-Streaming!") print("🎤 Start speaking... (Press Ctrl+C to stop)\n") From a7a646c7fb486f27565a7715d0117d338c3e1862 Mon Sep 17 00:00:00 2001 From: gsharp-aai Date: Wed, 24 Dec 2025 10:47:56 -0800 Subject: [PATCH 2/2] Add comment --- fern/pages/07-use-cases/meeting-notetaker-best-practices.mdx | 1 + 1 file changed, 1 insertion(+) diff --git a/fern/pages/07-use-cases/meeting-notetaker-best-practices.mdx b/fern/pages/07-use-cases/meeting-notetaker-best-practices.mdx index b44f6550..c5880eca 100644 --- a/fern/pages/07-use-cases/meeting-notetaker-best-practices.mdx +++ b/fern/pages/07-use-cases/meeting-notetaker-best-practices.mdx @@ -1440,6 +1440,7 @@ class StreamingResponseProcessor: # Example usage processor = StreamingResponseProcessor() +# If you're using `websockets` version 13.0 or later, use `additional_headers` parameter. For older versions (< 13.0), use `extra_headers` instead. async with websockets.connect(API_ENDPOINT, additional_headers=headers) as ws: async for message in ws: data = json.loads(message)