ecoute/TranscriberModels.py at main · kwasi-cpu/ecoute · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
"""
Ecoute – Transcriber model selection and wrappers

This module now supports engine selection by **name** and performs all heavy
imports lazily so simply importing `TranscriberModels` does **not** trigger
PyTorch / faster-whisper wheels to be loaded.  This prevents crashes on
platforms that cannot compile or install those packages (e.g. macOS Catalina).
"""

import os
import platform
from typing import Optional

from openai import OpenAI

# --------------------------------------------------------------------------- #
# Public factory
# --------------------------------------------------------------------------- #

def get_model(engine: str = "api"):
    """
    Return a transcriber instance depending on *engine*.

    Parameters
    ----------
    engine : str
        "api"  -> OpenAI Whisper API (default)
        "local"-> faster-whisper local inference
    """
    engine = (engine or "api").lower()
    if engine == "local":
        return FasterWhisperTranscriber()
    # Default / fallback
    return APIWhisperTranscriber()

class FasterWhisperTranscriber:
    """
    Local transcription using faster-whisper.

    All heavyweight modules are imported **inside** __init__ to keep
    top-level import cheap.
    """

    def __init__(self):
        # Basic platform guard – warn if running on old macOS where torch wheels
        # are unavailable.
        if platform.system() == "Darwin":
            ver = platform.mac_ver()[0]
            if ver.startswith("10.15"):  # Catalina
                msg = (
                    "[WARNING] Local engine requested on macOS 10.15 (Catalina). "
                    "Official PyTorch wheels are unavailable.  "
                    "Consider running with '--engine api' instead."
                )
                print(msg)

        # Lazy imports
        try:
            import torch  # noqa: F401
            from faster_whisper import WhisperModel  # noqa: F401
        except Exception as exc:
            raise RuntimeError(
                "Unable to import torch/faster_whisper. "
                "Install them or run with '--engine api'."
            ) from exc

        print("[INFO] Loading faster-whisper model (tiny.en)…")
        use_cuda = torch.cuda.is_available()  # type: ignore[attr-defined]
        compute_type = "float32" if use_cuda else "int8"

        # pylint: disable=import-error, no-name-in-module
        self.model = WhisperModel(
            "tiny.en",
            device="cuda" if use_cuda else "cpu",
            compute_type=compute_type,
        )

        print(f"[INFO] Faster-whisper ready.  GPU: {use_cuda}")

    def get_transcription(self, wav_file_path):
        try:
            segments, _ = self.model.transcribe(wav_file_path, beam_size=5)
            full_text = " ".join(segment.text for segment in segments)
            return full_text.strip()
        except Exception as e:
            print(e)
            return ''

class APIWhisperTranscriber:
    def __init__(self, api_key=None):
        self.client = OpenAI(api_key=api_key)

    def get_transcription(self, wav_file_path):
        try:
            with open(wav_file_path, "rb") as audio_file:
                result = self.client.audio.transcriptions.create(
                    model="whisper-1",
                    file=audio_file
                )
            return result.text.strip()
        except Exception as e:
            print(e)
            return ''