-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdiarization_example.py
More file actions
133 lines (104 loc) · 4.32 KB
/
diarization_example.py
File metadata and controls
133 lines (104 loc) · 4.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
"""Example usage of the diarization pipeline.
This script shows how to use the speaker diarization functionality from
the VideoAnnotator project.
"""
import logging
import os
from pathlib import Path
# Set up logging
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
def example_diarization(video_path: str, output_dir: str | None = None):
"""Example function showing how to use the diarization pipeline.
Args:
video_path: Path to the video file to process
output_dir: Optional output directory for audio files
"""
try:
from src.pipelines.audio_processing import (
DiarizationPipeline,
DiarizationPipelineConfig,
)
# Configure the pipeline
config = DiarizationPipelineConfig(
# Token will be read from HUGGINGFACE_TOKEN environment variable
diarization_model="pyannote/speaker-diarization-3.1",
use_gpu=True, # Use GPU if available
)
# Create and initialize the pipeline
pipeline = DiarizationPipeline(config)
pipeline.initialize()
# Process the video
logger.info(f"Processing video: {video_path}")
results = pipeline.process(video_path, output_dir=output_dir)
if results:
diarization = results[0]
# Print results
print(f"\nDIARIZATION RESULTS for {Path(video_path).name}")
print("=" * 60)
print(f"Number of speakers detected: {len(diarization.speakers)}")
print(f"Number of speaker segments: {len(diarization.segments)}")
print(f"Total speech time: {diarization.total_speech_time:.2f} seconds")
# Show speaker breakdown
print("\nSpeaker Breakdown:")
speaker_times = {}
for segment in diarization.segments:
speaker_id = segment["speaker_id"]
duration = segment["end_time"] - segment["start_time"]
if speaker_id not in speaker_times:
speaker_times[speaker_id] = 0
speaker_times[speaker_id] += duration
for speaker_id, total_time in speaker_times.items():
percentage = (total_time / diarization.total_speech_time) * 100
print(f" {speaker_id}: {total_time:.2f}s ({percentage:.1f}%)")
# Show timeline
print("\nSpeaker Timeline (first 10 segments):")
for _i, segment in enumerate(diarization.segments[:10]):
start = segment["start_time"]
end = segment["end_time"]
speaker = segment["speaker_id"]
duration = end - start
print(f" {start:6.2f}s - {end:6.2f}s ({duration:5.2f}s): {speaker}")
if len(diarization.segments) > 10:
print(f" ... and {len(diarization.segments) - 10} more segments")
return diarization
else:
logger.error("No diarization results returned")
return None
except Exception as e:
logger.error(f"Error in diarization example: {e}")
return None
def main():
"""Main function."""
# Check for HuggingFace token
if not os.getenv("HUGGINGFACE_TOKEN"):
print("HUGGINGFACE_TOKEN environment variable not set")
print("Please set your HuggingFace token:")
print("export HUGGINGFACE_TOKEN=your_token_here")
print("\nGet a token from: https://huggingface.co/settings/tokens")
return
# Find a test video
video_paths = []
# Look in common directories
for pattern in ["babyjokes videos/*.mp4", "data/demovideos/*.mp4", "data/*.mp4"]:
video_paths.extend(list(Path(".").glob(pattern)))
if not video_paths:
print("No video files found")
print("Please ensure video files are available in:")
print(" - babyjokes videos/")
print(" - data/demovideos/")
print(" - data/")
return
# Use the first video found
video_path = str(video_paths[0])
print(f"Using video: {video_path}")
# Run the example
result = example_diarization(video_path)
if result:
print("\nDiarization example completed successfully!")
else:
print("\nDiarization example failed")
if __name__ == "__main__":
main()