From e75fc42f488e17709448d7f169a890ddd15fa341 Mon Sep 17 00:00:00 2001
From: Michal Kulakowski <michalkulakowski@intel.com>
Date: Thu, 2 Apr 2026 20:35:16 +0200
Subject: [PATCH 1/3] Create speaker embeddings fix

---
 demos/audio/requirements.txt | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/demos/audio/requirements.txt b/demos/audio/requirements.txt
index b57ab92bd2..14a05423ee 100644
--- a/demos/audio/requirements.txt
+++ b/demos/audio/requirements.txt
@@ -1,5 +1,7 @@
 --extra-index-url "https://download.pytorch.org/whl/cpu"
-torch==2.9.1+cpu
-torchaudio==2.9.1+cpu
-speechbrain==1.0.3
-openai==2.21.0
\ No newline at end of file
+torch==2.5.1+cpu
+torchaudio==2.5.1+cpu
+speechbrain==1.0.2
+huggingface_hub<1.0
+openai==2.21.0
+requests==2.31.0
\ No newline at end of file

From 7427378cd4d85b03cef5bb656848f76c90308675 Mon Sep 17 00:00:00 2001
From: Michal Kulakowski <michalkulakowski@intel.com>
Date: Fri, 3 Apr 2026 11:11:08 +0200
Subject: [PATCH 2/3] fix

---
 demos/audio/requirements.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/demos/audio/requirements.txt b/demos/audio/requirements.txt
index 14a05423ee..c51b610b5d 100644
--- a/demos/audio/requirements.txt
+++ b/demos/audio/requirements.txt
@@ -1,6 +1,7 @@
 --extra-index-url "https://download.pytorch.org/whl/cpu"
-torch==2.5.1+cpu
-torchaudio==2.5.1+cpu
+torch==2.8.0+cpu
+torchaudio==2.8.0+cpu
+soundfile
 speechbrain==1.0.2
 huggingface_hub<1.0
 openai==2.21.0

From 73833425a2f594ac8e33e0ca2b270360045ef406 Mon Sep 17 00:00:00 2001
From: Michal Kulakowski <michalkulakowski@intel.com>
Date: Fri, 3 Apr 2026 11:17:49 +0200
Subject: [PATCH 3/3] fix

---
 demos/audio/README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/demos/audio/README.md b/demos/audio/README.md
index 9e49fc0ddc..65ba4f6519 100644
--- a/demos/audio/README.md
+++ b/demos/audio/README.md
@@ -19,7 +19,7 @@ Check supported [Speech Recognition Models](https://openvinotoolkit.github.io/op
 ### Prepare speaker embeddings
 When generating speech you can use default speaker voice or you can prepare your own speaker embedding file. Here you can see how to do it with downloaded file from online repository, but you can try with your own speech recording as well:
 ```bash
-pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/audio/requirements.txt
+pip install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/audio/requirements.txt
 mkdir -p audio_samples
 curl --output audio_samples/audio.wav "https://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0032_8k.wav"
 mkdir -p models
@@ -42,7 +42,7 @@ Execution parameters will be defined inside the `graph.pbtxt` file.
 Download export script, install it's dependencies and create directory for the models:
 ```console
 curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/common/export_models/export_model.py -o export_model.py
-pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/common/export_models/requirements.txt
+pip install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/common/export_models/requirements.txt
 mkdir models
 ```
 
@@ -53,7 +53,7 @@ Run `export_model.py` script to download and quantize the model:
 
 **CPU**
 ```console
-python export_model.py text2speech --source_model microsoft/speecht5_tts --weight-format fp16 --model_name microsoft/speecht5_tts --config_file_path models/config.json --model_repository_path models --overwrite_models --vocoder microsoft/speecht5_hifigan --speaker_name voice1 --speaker_path /models/speakers/voice1.bin
+python export_model.py text2speech --source_model microsoft/speecht5_tts --weight-format fp16 --model_name microsoft/speecht5_tts --config_file_path models/config.json --model_repository_path models --overwrite_models --vocoder microsoft/speecht5_hifigan --speaker_name voice1 --speaker_path models/speakers/voice1.bin
 ```
 
 > **Note:** Change the `--weight-format` to quantize the model to `int8` precision to reduce memory consumption and improve performance.
@@ -157,7 +157,7 @@ An asynchronous benchmarking client can be used to access the model server perfo
 git clone https://github.com/openvinotoolkit/model_server
 cd model_server/demos/benchmark/v3/
 pip install -r requirements.txt
-python benchmark.py --api_url http://localhost:8000/v3/audio/speech --model microsoft/speecht5_tts --batch_size 1 --limit 100 --request_rate inf --backend text2speech --dataset edinburghcstr/ami --hf-subset 'ihm' --tokenizer openai/whisper-large-v3-turbo --trust-remote-code True
+python benchmark.py --api_url http://localhost:8000/v3/audio/speech --model microsoft/speecht5_tts --batch_size 1 --limit 100 --request_rate inf --backend text2speech --dataset edinburghcstr/ami --hf-subset ihm --tokenizer openai/whisper-large-v3-turbo --trust-remote-code True
 Number of documents: 100
 100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:58<00:00,  1.19s/it]
 Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
@@ -181,7 +181,7 @@ Execution parameters will be defined inside the `graph.pbtxt` file.
 Download export script, install it's dependencies and create directory for the models:
 ```console
 curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/common/export_models/export_model.py -o export_model.py
-pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/common/export_models/requirements.txt
+pip install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/common/export_models/requirements.txt
 mkdir models
 ```