From 86336fed019776098f9eaab0282fdbb958471838 Mon Sep 17 00:00:00 2001
From: Jackson Holiday Wheeler <jhwheeler@gmail.com>
Date: Thu, 19 Feb 2026 23:59:25 +0800
Subject: [PATCH 1/2] feat: add voice-to-text push-to-talk transcription

Push-to-talk system using ffmpeg + faster-whisper for local speech-to-text.
Press ALT+R to start recording, press again to stop, transcribe, and type
the result into the focused window via wtype.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 transcribe.py | 20 +++++++++++++++
 voice-to-text | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+)
 create mode 100755 transcribe.py
 create mode 100755 voice-to-text

diff --git a/transcribe.py b/transcribe.py
new file mode 100755
index 0000000..556a37e
--- /dev/null
+++ b/transcribe.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+"""Transcribe a WAV file using faster-whisper (base model, CPU, int8)."""
+
+import sys
+from faster_whisper import WhisperModel
+
+
+def main():
+    if len(sys.argv) != 2:
+        print(f"Usage: {sys.argv[0]} <wav_file>", file=sys.stderr)
+        sys.exit(1)
+
+    model = WhisperModel("base", device="cpu", compute_type="int8")
+    segments, _ = model.transcribe(sys.argv[1], beam_size=5)
+    text = " ".join(seg.text.strip() for seg in segments)
+    print(text)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/voice-to-text b/voice-to-text
new file mode 100755
index 0000000..1aaae76
--- /dev/null
+++ b/voice-to-text
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+#
+# voice-to-text — push-to-talk toggle for Wayland
+# 1st press: start recording   2nd press: stop, transcribe, type into terminal
+#
+
+set -euo pipefail
+
+PID_FILE="/tmp/voice-to-text.pid"
+WAV_FILE="/tmp/voice-to-text.wav"
+LOG_FILE="/tmp/voice-to-text.log"
+TRANSCRIBE="$HOME/scripts/transcribe.py"
+VENV_PYTHON="$HOME/venvs/voice-to-text/bin/python"
+HINT="string:x-canonical-private-synchronous:voice-to-text"
+
+notify() {
+    local timeout="${1:--1}"; shift
+    notify-send -a "Voice Recorder" -t "$timeout" -h "$HINT" "$@"
+}
+
+# ── Stop recording & transcribe ──────────────────────────────────────
+if [[ -f "$PID_FILE" ]]; then
+    pid=$(cat "$PID_FILE")
+    rm -f "$PID_FILE"
+
+    if kill -0 "$pid" 2>/dev/null; then
+        kill -INT "$pid"
+        # Poll until ffmpeg exits (can't use wait — different shell)
+        for _ in $(seq 1 50); do
+            kill -0 "$pid" 2>/dev/null || break
+            sleep 0.1
+        done
+    fi
+
+    if [[ ! -s "$WAV_FILE" ]]; then
+        notify 5000 "Error" "Recording is empty — nothing to transcribe."
+        exit 1
+    fi
+
+    notify -1 "Transcribing..."
+
+    text=$("$VENV_PYTHON" "$TRANSCRIBE" "$WAV_FILE" 2>"$LOG_FILE") || true
+
+    if [[ -z "$text" ]]; then
+        err=$(cat "$LOG_FILE" 2>/dev/null || echo "unknown error")
+        notify 5000 "Error" "Transcription failed: $err"
+        exit 1
+    fi
+
+    printf '%s' "$text" | wl-copy
+    wtype -- "$text"
+
+    notify 5000 "Transcribed" "$text"
+    exit 0
+fi
+
+# ── Start recording ──────────────────────────────────────────────────
+
+if ! pactl info &>/dev/null; then
+    notify 5000 "Error" "PulseAudio/PipeWire not available."
+    exit 1
+fi
+
+rm -f "$WAV_FILE"
+
+ffmpeg -y -f pulse -i default -ac 1 -ar 16000 "$WAV_FILE" &>/dev/null &
+echo $! > "$PID_FILE"
+
+notify 0 "Recording..." "Press ALT+R again to stop."

From b21ce63a65f9b2edfe87a52119f9d0b5af88cccd Mon Sep 17 00:00:00 2001
From: Jackson Holiday Wheeler <jhwheeler@gmail.com>
Date: Mon, 9 Mar 2026 14:57:30 +0800
Subject: [PATCH 2/2] chore: cleanup

---
 transcribe.py | 20 ---------------
 voice-to-text | 69 ---------------------------------------------------
 2 files changed, 89 deletions(-)
 delete mode 100755 transcribe.py
 delete mode 100755 voice-to-text

diff --git a/transcribe.py b/transcribe.py
deleted file mode 100755
index 556a37e..0000000
--- a/transcribe.py
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/env python3
-"""Transcribe a WAV file using faster-whisper (base model, CPU, int8)."""
-
-import sys
-from faster_whisper import WhisperModel
-
-
-def main():
-    if len(sys.argv) != 2:
-        print(f"Usage: {sys.argv[0]} <wav_file>", file=sys.stderr)
-        sys.exit(1)
-
-    model = WhisperModel("base", device="cpu", compute_type="int8")
-    segments, _ = model.transcribe(sys.argv[1], beam_size=5)
-    text = " ".join(seg.text.strip() for seg in segments)
-    print(text)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/voice-to-text b/voice-to-text
deleted file mode 100755
index 1aaae76..0000000
--- a/voice-to-text
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/usr/bin/env bash
-#
-# voice-to-text — push-to-talk toggle for Wayland
-# 1st press: start recording   2nd press: stop, transcribe, type into terminal
-#
-
-set -euo pipefail
-
-PID_FILE="/tmp/voice-to-text.pid"
-WAV_FILE="/tmp/voice-to-text.wav"
-LOG_FILE="/tmp/voice-to-text.log"
-TRANSCRIBE="$HOME/scripts/transcribe.py"
-VENV_PYTHON="$HOME/venvs/voice-to-text/bin/python"
-HINT="string:x-canonical-private-synchronous:voice-to-text"
-
-notify() {
-    local timeout="${1:--1}"; shift
-    notify-send -a "Voice Recorder" -t "$timeout" -h "$HINT" "$@"
-}
-
-# ── Stop recording & transcribe ──────────────────────────────────────
-if [[ -f "$PID_FILE" ]]; then
-    pid=$(cat "$PID_FILE")
-    rm -f "$PID_FILE"
-
-    if kill -0 "$pid" 2>/dev/null; then
-        kill -INT "$pid"
-        # Poll until ffmpeg exits (can't use wait — different shell)
-        for _ in $(seq 1 50); do
-            kill -0 "$pid" 2>/dev/null || break
-            sleep 0.1
-        done
-    fi
-
-    if [[ ! -s "$WAV_FILE" ]]; then
-        notify 5000 "Error" "Recording is empty — nothing to transcribe."
-        exit 1
-    fi
-
-    notify -1 "Transcribing..."
-
-    text=$("$VENV_PYTHON" "$TRANSCRIBE" "$WAV_FILE" 2>"$LOG_FILE") || true
-
-    if [[ -z "$text" ]]; then
-        err=$(cat "$LOG_FILE" 2>/dev/null || echo "unknown error")
-        notify 5000 "Error" "Transcription failed: $err"
-        exit 1
-    fi
-
-    printf '%s' "$text" | wl-copy
-    wtype -- "$text"
-
-    notify 5000 "Transcribed" "$text"
-    exit 0
-fi
-
-# ── Start recording ──────────────────────────────────────────────────
-
-if ! pactl info &>/dev/null; then
-    notify 5000 "Error" "PulseAudio/PipeWire not available."
-    exit 1
-fi
-
-rm -f "$WAV_FILE"
-
-ffmpeg -y -f pulse -i default -ac 1 -ar 16000 "$WAV_FILE" &>/dev/null &
-echo $! > "$PID_FILE"
-
-notify 0 "Recording..." "Press ALT+R again to stop."