Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Sources/Meeting/MeetingSessionController.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1433,6 +1433,7 @@ final class MeetingSessionController: ObservableObject {
context: baseDiagnosticsContext(
extra: [
"error": message,
"failure_kind": analyticsFailureKind(from: message),
"queue_depth": "\(queuedTranscriptionJobs.count)",
"trigger": transcriptionTrigger.rawValue
]
Expand Down
22 changes: 16 additions & 6 deletions Sources/Speech/ParakeetEngine.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1184,6 +1184,7 @@ class ParakeetEngine: ObservableObject {

self.configRecoveryTimeoutTask = nil
self.publishRecoveryState()
let failureAction = ParakeetDeviceRecoveryFailurePolicy.action(wasRecording: wasRecording)
AnalyticsReporter.track(
"dictation_audio_route_recovery_timeout",
properties: self.dictationRouteAnalyticsContext(
Expand All @@ -1196,11 +1197,18 @@ class ParakeetEngine: ObservableObject {
]
)
)
let diagnosticsEvent = failureAction.reportSentryFailure
? "device_change_recovery_timeout"
: "device_change_recovery_deferred"
let diagnosticsLevel: EventLevel = failureAction.reportSentryFailure ? .error : .warning
let diagnosticsMessage = failureAction.reportSentryFailure
? "Audio device recovery timed out"
: "Idle audio route still settling after device change"
EventReporter.shared.capture(
level: .error,
level: diagnosticsLevel,
engine: "parakeet",
event: "device_change_recovery_timeout",
message: "Audio device recovery timed out",
event: diagnosticsEvent,
message: diagnosticsMessage,
context: self.dictationRouteDiagnosticsContext(
selection: Self.loadDictationInputDeviceSelection(),
extra: [
Expand All @@ -1211,7 +1219,7 @@ class ParakeetEngine: ObservableObject {
]
)
)
if wasRecording {
if failureAction.markRecordingInterrupted {
self.recordingInterrupted = true
EventReporter.shared.capture(
level: .error,
Expand All @@ -1228,8 +1236,10 @@ class ParakeetEngine: ObservableObject {
)
}
await self.rebuildAudioEngine(reason: "device_change_recovery_timeout")
self.prewarmRetryCount = 0
self.schedulePrewarmRetry()
if failureAction.schedulePrewarmRetry {
self.prewarmRetryCount = 0
self.schedulePrewarmRetry()
}
}
}

Expand Down
58 changes: 34 additions & 24 deletions Sources/TranscriptedCore/Audio/AudioLevelMonitor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,7 @@ extension Audio {
// MARK: - Mic Audio Level

func calculateLevel(buffer: AVAudioPCMBuffer) {
guard let data = buffer.floatChannelData else { return }

let channelData = data.pointee
let frameLength = Int(buffer.frameLength)
guard frameLength > 0 else { return }

var sum: Float = 0
vDSP_dotpr(channelData, 1, channelData, 1, &sum, vDSP_Length(frameLength))

let rms = sqrt(sum / Float(frameLength))
let power = 20 * log10(max(rms, 0.00001))
let level = max(0.0, min(1.0, (power + 60) / 60))
let level = normalizedRMSLevel(buffer: buffer)

DispatchQueue.main.async { [weak self] in
guard let self = self else { return }
Expand Down Expand Up @@ -61,6 +50,38 @@ extension Audio {
return peak.isFinite ? peak : 0
}

func normalizedRMSLevel(buffer: AVAudioPCMBuffer) -> Float {
let frameCount = vDSP_Length(buffer.frameLength)
guard frameCount > 0,
let channelData = buffer.floatChannelData else {
return 0
}

let channelCount = Int(buffer.format.channelCount)
guard channelCount > 0 else { return 0 }

var sum: Float = 0
if buffer.format.isInterleaved {
let totalLength = frameCount * vDSP_Length(channelCount)
vDSP_dotpr(channelData[0], 1, channelData[0], 1, &sum, totalLength)
} else {
for channel in 0..<channelCount {
var channelSum: Float = 0
vDSP_dotpr(channelData[channel], 1, channelData[channel], 1, &channelSum, frameCount)
sum += channelSum
}
}

guard sum.isFinite, sum > 0 else { return 0 }
let sampleCount = Float(Int(frameCount) * channelCount)
guard sampleCount > 0 else { return 0 }

let rms = sqrt(sum / sampleCount)
let power = 20 * log10(max(rms, 0.00001))
let level = max(0.0, min(1.0, (power + 60) / 60))
return level.isFinite ? level : 0
}

// MARK: - Silence Detection

/// Updates silence tracking based on current audio level
Expand Down Expand Up @@ -108,18 +129,7 @@ extension Audio {
}
guard shouldProcess else { return }

guard let data = buffer.floatChannelData else { return }

let channelData = data.pointee
let frameLength = Int(buffer.frameLength)
guard frameLength > 0 else { return }

var sum: Float = 0
vDSP_dotpr(channelData, 1, channelData, 1, &sum, vDSP_Length(frameLength))

let rms = sqrt(sum / Float(frameLength))
let power = 20 * log10(max(rms, 0.00001))
let level = max(0.0, min(1.0, (power + 60) / 60))
let level = normalizedRMSLevel(buffer: buffer)

// Track system audio silence for warning indicator
updateSystemAudioSilenceTracking(peakLevel: level)
Expand Down
36 changes: 36 additions & 0 deletions Tests/TranscriptedCoreTests/AudioDiagnosticsSnapshotTests.swift
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import XCTest
@preconcurrency import AVFoundation
@testable import TranscriptedCore

@available(macOS 14.0, *)
Expand Down Expand Up @@ -63,4 +64,39 @@ final class AudioDiagnosticsSnapshotTests: XCTestCase {
XCTAssertNotNil(context["default_output_volume_after"])
XCTAssertNotNil(context["default_system_output_volume_after"])
}

func testNormalizedSystemLevelUsesAllChannels() throws {
let audio = makeAudio()
let buffer = try makeStereoBuffer(left: 0, right: 0.5)

let level = audio.normalizedRMSLevel(buffer: buffer)

XCTAssertGreaterThan(level, 0.8)
}

private func makeStereoBuffer(left: Float, right: Float) throws -> AVAudioPCMBuffer {
let frameCount = 512
let format = try XCTUnwrap(AVAudioFormat(
commonFormat: .pcmFormatFloat32,
sampleRate: 48_000,
channels: 2,
interleaved: false
))
let buffer = try XCTUnwrap(AVAudioPCMBuffer(
pcmFormat: format,
frameCapacity: AVAudioFrameCount(frameCount)
))
buffer.frameLength = AVAudioFrameCount(frameCount)

guard let channelData = buffer.floatChannelData else {
XCTFail("Missing channel data")
return buffer
}

for index in 0..<frameCount {
channelData[0][index] = left
channelData[1][index] = right
}
return buffer
}
}