Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
244 changes: 240 additions & 4 deletions fern/pages/02-speech-to-text/universal-streaming/multilingual.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,52 @@ In the future, this built-in formatting capability will be extended to our Engli

Firstly, install the required dependencies.

<Tab language="python-sdk" title="Python SDK">

```bash
pip install assemblyai
```

</Tab>

<Tab language="python" title="Python">

```bash
pip install websockets pyaudio
```

<Note>
The Python example uses the `websockets` library. If you're using `websockets` version 13.0 or later, use `additional_headers` parameter. For older versions (< 13.0), use `extra_headers` instead.
</Note>

</Tab>

<Tab language="javascript-sdk" title="JavaScript SDK">

```bash
npm install assemblyai node-record-lpcm16
```

<Note>
The module `node-record-lpcm16` requires [SoX](http://sox.sourceforge.net/) and it must be available in your `$PATH`.

For Mac OS:

```bash
brew install sox
```

For most linux disto's:

```bash
sudo apt-get install sox libsox-fmt-all
```

For Windows:

[download the binaries](http://sourceforge.net/projects/sox/files/latest/download)
</Note>

</Tab>

<Tab language="javascript" title="Javascript">
Expand All @@ -114,6 +154,97 @@ npm install ws mic

<Tabs>

<Tab language="python-sdk" title="Python SDK">

```python {27}
import logging
from typing import Type

import assemblyai as aai
from assemblyai.streaming.v3 import (
BeginEvent,
StreamingClient,
StreamingClientOptions,
StreamingError,
StreamingEvents,
StreamingParameters,
TerminationEvent,
TurnEvent,
)

api_key = "<YOUR_API_KEY>"

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def on_begin(self: Type[StreamingClient], event: BeginEvent):
print(f"Connecting websocket to url")
print(f"Session started: {event.id}")
print(f"Receiving SessionBegins ...")
print(f"Sending messages ...")


def on_turn(self: Type[StreamingClient], event: TurnEvent):
if not event.end_of_turn and event.transcript:
print(f"[PARTIAL TURN TRANSCRIPT]: {event.transcript}")
if event.utterance:
print(f"[PARTIAL TURN UTTERANCE]: {event.utterance}")
# Display language detection info if available
if event.language_code:
print(f"[UTTERANCE LANGUAGE DETECTION]: {event.language_code} - {event.language_confidence:.2%}")
if event.end_of_turn:
print(f"[FULL TURN TRANSCRIPT]: {event.transcript}")
# Display language detection info if available
if event.language_code:
print(f"[END OF TURN LANGUAGE DETECTION]: {event.language_code} - {event.language_confidence:.2%}")


def on_terminated(self: Type[StreamingClient], event: TerminationEvent):
print(
f"Session terminated: {event.audio_duration_seconds} seconds of audio processed"
)


def on_error(self: Type[StreamingClient], error: StreamingError):
print(f"Error occurred: {error}")


def main():
client = StreamingClient(
StreamingClientOptions(
api_key=api_key,
api_host="streaming.assemblyai.com",
)
)

client.on(StreamingEvents.Begin, on_begin)
client.on(StreamingEvents.Turn, on_turn)
client.on(StreamingEvents.Termination, on_terminated)
client.on(StreamingEvents.Error, on_error)

client.connect(
StreamingParameters(
sample_rate=48000,
speech_model="universal-streaming-multilingual",
language_detection=True,
)
)

try:
client.stream(
aai.extras.MicrophoneStream(sample_rate=48000)
)
finally:
client.disconnect(terminate=True)


if __name__ == "__main__":
main()
```

</Tab>

<Tab language="python" title="Python">

```python {26}
Expand Down Expand Up @@ -152,7 +283,7 @@ async def send_receive():

async with websockets.connect(
URL,
extra_headers={"Authorization": "YOUR-API-KEY"},
additional_headers={"Authorization": "YOUR-API-KEY"},
ping_interval=5,
ping_timeout=20
) as _ws:
Expand Down Expand Up @@ -183,13 +314,18 @@ async def send_receive():
utterance = data['utterance']

if data['type'] == 'Turn':
if not data.get('end_of_turn') and transcript:
print(f"[PARTIAL TURN TRANSCRIPT]: {transcript}")
if data.get('utterance'):
print(f"\r[PARTIAL TURN UTTERANCE]: {utterance}")
print(f"[PARTIAL TURN UTTERANCE]: {utterance}")
# Display language detection info if available
if 'language_code' in data:
print(f"\r[UTTERANCE LANGUAGE DETECTION]: {data['language_code']} - {data['language_confidence']:.2%}")
print(f"[UTTERANCE LANGUAGE DETECTION]: {data['language_code']} - {data['language_confidence']:.2%}")
if data.get('end_of_turn'):
print(f"\r[FULL TURN TRANSCRIPT]: {transcript}")
print(f"[FULL TURN TRANSCRIPT]: {transcript}")
# Display language detection info if available
if 'language_code' in data:
print(f"[END OF TURN LANGUAGE DETECTION]: {data['language_code']} - {data['language_confidence']:.2%}")
else:
pass

Expand Down Expand Up @@ -348,6 +484,9 @@ async function run() {
const transcript = data.transcript || "";
const utterance = data.utterance || "";

if (!data.end_of_turn && transcript) {
console.log(`[PARTIAL TURN TRANSCRIPT]: ${transcript}`);
}
if (data.utterance) {
console.log(`[PARTIAL TURN UTTERANCE]: ${utterance}`);
// Display language detection info if available
Expand All @@ -358,6 +497,11 @@ async function run() {
}
if (data.end_of_turn) {
console.log(`[FULL TURN TRANSCRIPT]: ${transcript}`);
// Display language detection info if available
if (data.language_code) {
const langConfidence = (data.language_confidence * 100).toFixed(2);
console.log(`[END OF TURN LANGUAGE DETECTION]: ${data.language_code} - ${langConfidence}%`);
}
}
} else if (msgType === "Termination") {
const audioDuration = data.audio_duration_seconds;
Expand Down Expand Up @@ -489,4 +633,96 @@ run();

</Tab>

<Tab language="javascript-sdk" title="JavaScript SDK">

```javascript {11}
import { Readable } from 'stream'
import { AssemblyAI } from 'assemblyai'
import recorder from 'node-record-lpcm16'

const run = async () => {
const client = new AssemblyAI({
apiKey: "<YOUR_API_KEY>",
});

const transcriber = client.streaming.transcriber({
sampleRate: 48_000,
speechModel: "universal-streaming-multilingual",
languageDetection: true
});

transcriber.on("open", ({ id }) => {
console.log(`Connecting websocket to url`);
console.log(`Session opened with ID: ${id}`);
console.log(`Receiving SessionBegins ...`);
console.log(`Sending messages ...`);
});

transcriber.on("error", (error) => {
console.error("Error:", error);
});

transcriber.on("close", (code, reason) =>
console.log("Session closed:", code, reason),
);

transcriber.on("turn", (turn) => {
if (!turn.end_of_turn && turn.transcript) {
console.log(`[PARTIAL TURN TRANSCRIPT]: ${turn.transcript}`);
}
if (turn.utterance) {
console.log(`[PARTIAL TURN UTTERANCE]: ${turn.utterance}`);
// Display language detection info if available
if (turn.language_code) {
const langConfidence = (turn.language_confidence * 100).toFixed(2);
console.log(`[UTTERANCE LANGUAGE DETECTION]: ${turn.language_code} - ${langConfidence}%`);
}
}
if (turn.end_of_turn) {
console.log(`[FULL TURN TRANSCRIPT]: ${turn.transcript}`);
// Display language detection info if available
if (turn.language_code) {
const langConfidence = (turn.language_confidence * 100).toFixed(2);
console.log(`[END OF TURN LANGUAGE DETECTION]: ${turn.language_code} - ${langConfidence}%`);
}
}
});

try {
console.log("Connecting to streaming transcript service");

await transcriber.connect();

console.log("Starting recording");

const recording = recorder.record({
channels: 1,
sampleRate: 48_000,
audioType: "wav", // Linear PCM
});

Readable.toWeb(recording.stream()).pipeTo(transcriber.stream());

// Stop recording and close connection using Ctrl-C.

process.on("SIGINT", async function () {
console.log();
console.log("Stopping recording");
recording.stop();

console.log("Closing streaming transcript connection");
await transcriber.close();

process.exit();
});
} catch (error) {
console.error(error);
}
};

run();
```

</Tab>

</Tabs>
6 changes: 4 additions & 2 deletions fern/pages/07-use-cases/meeting-notetaker-best-practices.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,8 @@ class ChannelTranscriber:
"""Transcribe a single audio channel"""
url = f"wss://streaming.assemblyai.com/v3/ws?{urlencode(self.connection_params)}"

async with websockets.connect(url, extra_headers={"Authorization": API_KEY}) as ws:
# If you're using `websockets` version 13.0 or later, use `additional_headers` parameter. For older versions (< 13.0), use `extra_headers` instead.
async with websockets.connect(url, additional_headers={"Authorization": API_KEY}) as ws:
# Send audio from this channel only
async for audio_chunk in audio_stream:
await ws.send(audio_chunk)
Expand Down Expand Up @@ -1439,7 +1440,8 @@ class StreamingResponseProcessor:
# Example usage
processor = StreamingResponseProcessor()

async with websockets.connect(API_ENDPOINT, extra_headers=headers) as ws:
# If you're using `websockets` version 13.0 or later, use `additional_headers` parameter. For older versions (< 13.0), use `extra_headers` instead.
async with websockets.connect(API_ENDPOINT, additional_headers=headers) as ws:
async for message in ws:
data = json.loads(message)
result = processor.process_message(data)
Expand Down
3 changes: 2 additions & 1 deletion fern/pages/07-use-cases/voice-agent-best-practices.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,8 @@ async def main():
headers = {"Authorization": API_KEY}

try:
async with websockets.connect(API_ENDPOINT, extra_headers=headers) as websocket:
# If you're using `websockets` version 13.0 or later, use `additional_headers` parameter. For older versions (< 13.0), use `extra_headers` instead.
async with websockets.connect(API_ENDPOINT, additional_headers=headers) as websocket:
print("✅ Connected to Universal-Streaming!")
print("🎤 Start speaking... (Press Ctrl+C to stop)\n")

Expand Down
Loading