Skip to content

Commit a42d999

Browse files
authored
Merge pull request #101 from AdaWorldAPI/claude/risc-thought-engine-TCZw7
feat(hpc/audio): Opus CELT primitives — MDCT, band energies, PVQ, Aud…
2 parents ca3e8f5 + d75e8a5 commit a42d999

6 files changed

Lines changed: 651 additions & 0 deletions

File tree

src/hpc/audio/bands.rs

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
//! Opus CELT band energy computation.
2+
//!
3+
//! 21 quasi-Bark critical bands at 48kHz. Each band's energy is the
4+
//! gain component of gain-shape quantization. The normalized coefficients
5+
//! (after dividing by band energy) are the shape component → PVQ.
6+
//!
7+
//! Band boundaries from Opus `celt/modes.c` eBands48.
8+
9+
/// Opus CELT band boundaries at 48kHz, 960-sample frames (480 MDCT bins).
10+
/// 22 boundaries define 21 bands. Bin index = frequency / (48000 / 960).
11+
/// Band 0: bins 0-3 (~0-200 Hz), Band 20: bins 400-480 (~20-24 kHz).
12+
pub const CELT_BANDS_48K: [usize; 22] = [
13+
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 44, 52, 60, 68, 80, 96,
14+
112, 136, 160, 200, 256, 480,
15+
];
16+
17+
/// Number of critical bands.
18+
pub const N_BANDS: usize = 21;
19+
20+
/// Compute band energies from MDCT coefficients.
21+
///
22+
/// Returns 21 f32 energies (sqrt of sum-of-squares per band).
23+
/// These are the "gain" in gain-shape quantization.
24+
pub fn band_energies(coeffs: &[f32]) -> [f32; N_BANDS] {
25+
let mut energies = [0.0f32; N_BANDS];
26+
for band in 0..N_BANDS {
27+
let lo = CELT_BANDS_48K[band];
28+
let hi = CELT_BANDS_48K[band + 1].min(coeffs.len());
29+
let mut sum_sq = 0.0f32;
30+
for i in lo..hi {
31+
if i < coeffs.len() {
32+
sum_sq += coeffs[i] * coeffs[i];
33+
}
34+
}
35+
energies[band] = sum_sq.sqrt();
36+
}
37+
energies
38+
}
39+
40+
/// Normalize MDCT coefficients by band energy (produce unit-energy shape).
41+
///
42+
/// After normalization, each band has unit energy. The shape encodes
43+
/// the spectral tilt within the band. PVQ quantizes this shape.
44+
pub fn normalize_bands(coeffs: &[f32], energies: &[f32; N_BANDS]) -> Vec<f32> {
45+
let mut normalized = coeffs.to_vec();
46+
for band in 0..N_BANDS {
47+
let lo = CELT_BANDS_48K[band];
48+
let hi = CELT_BANDS_48K[band + 1].min(normalized.len());
49+
let e = energies[band].max(1e-10);
50+
for i in lo..hi {
51+
if i < normalized.len() {
52+
normalized[i] /= e;
53+
}
54+
}
55+
}
56+
normalized
57+
}
58+
59+
/// Denormalize: multiply shape coefficients by band energies.
60+
///
61+
/// Inverse of normalize_bands. Used in the decoder path:
62+
/// PVQ-decoded shape × band energies → MDCT coefficients → iMDCT → PCM.
63+
pub fn denormalize_bands(shape: &[f32], energies: &[f32; N_BANDS]) -> Vec<f32> {
64+
let mut coeffs = shape.to_vec();
65+
for band in 0..N_BANDS {
66+
let lo = CELT_BANDS_48K[band];
67+
let hi = CELT_BANDS_48K[band + 1].min(coeffs.len());
68+
let e = energies[band];
69+
for i in lo..hi {
70+
if i < coeffs.len() {
71+
coeffs[i] *= e;
72+
}
73+
}
74+
}
75+
coeffs
76+
}
77+
78+
/// Pack band energies to BF16 (21 × 2 bytes = 42 bytes).
79+
pub fn energies_to_bf16(energies: &[f32; N_BANDS]) -> [u16; N_BANDS] {
80+
let mut bf16 = [0u16; N_BANDS];
81+
for i in 0..N_BANDS {
82+
let bits = energies[i].to_bits();
83+
let lsb = (bits >> 16) & 1;
84+
let biased = bits.wrapping_add(0x7FFF).wrapping_add(lsb);
85+
bf16[i] = (biased >> 16) as u16;
86+
}
87+
bf16
88+
}
89+
90+
/// Unpack BF16 band energies to f32.
91+
pub fn bf16_to_energies(bf16: &[u16; N_BANDS]) -> [f32; N_BANDS] {
92+
let mut energies = [0.0f32; N_BANDS];
93+
for i in 0..N_BANDS {
94+
energies[i] = f32::from_bits((bf16[i] as u32) << 16);
95+
}
96+
energies
97+
}
98+
99+
#[cfg(test)]
100+
mod tests {
101+
use super::*;
102+
103+
#[test]
104+
fn band_count() {
105+
assert_eq!(CELT_BANDS_48K.len(), N_BANDS + 1);
106+
}
107+
108+
#[test]
109+
fn band_energies_nonzero() {
110+
let coeffs: Vec<f32> = (0..480).map(|i| (i as f32 * 0.05).sin()).collect();
111+
let e = band_energies(&coeffs);
112+
let total: f32 = e.iter().sum();
113+
assert!(total > 0.1, "Total band energy too low: {}", total);
114+
}
115+
116+
#[test]
117+
fn normalize_denormalize_roundtrip() {
118+
let coeffs: Vec<f32> = (0..480).map(|i| (i as f32 * 0.1).sin() * 2.0).collect();
119+
let e = band_energies(&coeffs);
120+
let shape = normalize_bands(&coeffs, &e);
121+
let recovered = denormalize_bands(&shape, &e);
122+
123+
for (orig, rec) in coeffs.iter().zip(recovered.iter()) {
124+
assert!((orig - rec).abs() < 0.01,
125+
"Roundtrip mismatch: {} vs {}", orig, rec);
126+
}
127+
}
128+
129+
#[test]
130+
fn bf16_energy_roundtrip() {
131+
let e = [1.0, 0.5, 2.0, 0.001, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0,
132+
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0];
133+
let bf16 = energies_to_bf16(&e);
134+
let recovered = bf16_to_energies(&bf16);
135+
for i in 0..5 {
136+
let err = (e[i] - recovered[i]).abs() / e[i].max(1e-6);
137+
assert!(err < 0.02, "BF16 roundtrip error for band {}: {:.4}", i, err);
138+
}
139+
}
140+
}

src/hpc/audio/codec.rs

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
//! AudioFrame: 48-byte codec for one frame of audio.
2+
//!
3+
//! The complete encode/decode pipeline:
4+
//! encode: PCM → MDCT → band energies (gain) + PVQ (shape) → AudioFrame
5+
//! decode: AudioFrame → band energies × PVQ shape → iMDCT → PCM
6+
//!
7+
//! One AudioFrame = one graph node in lance-graph. 48 bytes = CAM-compatible.
8+
9+
use super::mdct;
10+
use super::bands;
11+
use super::pvq;
12+
13+
/// One audio frame: 42 bytes gain + 6 bytes shape = 48 bytes.
14+
///
15+
/// Maps to SPO:
16+
/// Subject = spectral (WHAT frequencies) → band energies
17+
/// Predicate = temporal (WHEN they happen) → PVQ summary bytes 2-3
18+
/// Object = harmonic (HOW they ring) → PVQ summary bytes 4-5
19+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
20+
pub struct AudioFrame {
21+
/// 21 band energies as BF16 (42 bytes). The gain component.
22+
pub band_energies: [u16; bands::N_BANDS],
23+
/// PVQ shape fingerprint (6 bytes). HEEL/HIP/TWIG levels.
24+
pub pvq_summary: [u8; 6],
25+
}
26+
27+
impl AudioFrame {
28+
/// Total byte size: 42 (energies) + 6 (pvq) = 48.
29+
pub const BYTE_SIZE: usize = bands::N_BANDS * 2 + 6;
30+
31+
/// Encode one frame of PCM audio.
32+
///
33+
/// `pcm`: mono f32 samples (padded to power of 2 internally).
34+
/// `pvq_k`: PVQ pulse budget per band (higher = better quality, more bits).
35+
pub fn encode(pcm: &[f32], pvq_k: u32) -> Self {
36+
// MDCT: time → frequency
37+
let coeffs = mdct::mdct_forward(pcm);
38+
39+
// Band energies (gain)
40+
let energies = bands::band_energies(&coeffs);
41+
let bf16_energies = bands::energies_to_bf16(&energies);
42+
43+
// Normalize bands (remove gain, keep shape)
44+
let shape = bands::normalize_bands(&coeffs, &energies);
45+
46+
// PVQ encode the shape of the first (most important) band
47+
// For production: encode all 21 bands. For the POC: just first band's summary.
48+
let first_band_end = bands::CELT_BANDS_48K[1].min(shape.len());
49+
let pulses = pvq::pvq_encode(&shape[..first_band_end], pvq_k);
50+
let summary = pvq::pvq_summary(&pulses);
51+
52+
AudioFrame {
53+
band_energies: bf16_energies,
54+
pvq_summary: summary,
55+
}
56+
}
57+
58+
/// Decode: reconstruct PCM from AudioFrame + optional full PVQ data.
59+
///
60+
/// Without PVQ data: uses band energies only (coarse reconstruction).
61+
/// The PVQ summary gives the HHTL routing info, not the full shape.
62+
/// For full quality: pass the per-band PVQ pulse vectors.
63+
pub fn decode_coarse(&self) -> Vec<f32> {
64+
let energies = bands::bf16_to_energies(&self.band_energies);
65+
66+
// Synthesize a simple spectral envelope from band energies
67+
// Each band gets a flat spectrum at its energy level
68+
let n2 = bands::CELT_BANDS_48K[bands::N_BANDS].min(480);
69+
let mut coeffs = vec![0.0f32; n2];
70+
for band in 0..bands::N_BANDS {
71+
let lo = bands::CELT_BANDS_48K[band];
72+
let hi = bands::CELT_BANDS_48K[band + 1].min(n2);
73+
let n_bins = (hi - lo).max(1);
74+
let per_bin = energies[band] / (n_bins as f32).sqrt();
75+
for i in lo..hi {
76+
// Alternate signs for a more natural-sounding shape
77+
let sign = if (i - lo) % 2 == 0 { 1.0 } else { -1.0 };
78+
coeffs[i] = per_bin * sign;
79+
}
80+
}
81+
82+
// iMDCT: frequency → time
83+
mdct::mdct_backward(&coeffs)
84+
}
85+
86+
/// Serialize to 48 bytes.
87+
pub fn to_bytes(&self) -> [u8; Self::BYTE_SIZE] {
88+
let mut bytes = [0u8; Self::BYTE_SIZE];
89+
for i in 0..bands::N_BANDS {
90+
let b = self.band_energies[i].to_le_bytes();
91+
bytes[i * 2] = b[0];
92+
bytes[i * 2 + 1] = b[1];
93+
}
94+
bytes[42..48].copy_from_slice(&self.pvq_summary);
95+
bytes
96+
}
97+
98+
/// Deserialize from 48 bytes.
99+
pub fn from_bytes(bytes: &[u8; Self::BYTE_SIZE]) -> Self {
100+
let mut band_energies = [0u16; bands::N_BANDS];
101+
for i in 0..bands::N_BANDS {
102+
band_energies[i] = u16::from_le_bytes([bytes[i * 2], bytes[i * 2 + 1]]);
103+
}
104+
let mut pvq_summary = [0u8; 6];
105+
pvq_summary.copy_from_slice(&bytes[42..48]);
106+
AudioFrame { band_energies, pvq_summary }
107+
}
108+
}
109+
110+
#[cfg(test)]
111+
mod tests {
112+
use super::*;
113+
use core::f32::consts::PI;
114+
115+
#[test]
116+
fn frame_48_bytes() {
117+
assert_eq!(AudioFrame::BYTE_SIZE, 48);
118+
}
119+
120+
#[test]
121+
fn encode_decode_nonzero() {
122+
// 440Hz sine at 48kHz, 1024 samples
123+
let pcm: Vec<f32> = (0..1024)
124+
.map(|i| (2.0 * PI * 440.0 * i as f32 / 48000.0).sin())
125+
.collect();
126+
127+
let frame = AudioFrame::encode(&pcm, 8);
128+
129+
// Band energies should be nonzero (at least the band containing 440Hz)
130+
let total_energy: f32 = frame.band_energies.iter()
131+
.map(|&b| f32::from_bits((b as u32) << 16))
132+
.sum();
133+
assert!(total_energy > 0.01, "Encoded frame has no energy: {}", total_energy);
134+
135+
// Decode
136+
let decoded = frame.decode_coarse();
137+
assert!(!decoded.is_empty());
138+
let decoded_energy: f32 = decoded.iter().map(|s| s * s).sum();
139+
assert!(decoded_energy > 1e-10, "Decoded has no energy: {}", decoded_energy);
140+
}
141+
142+
#[test]
143+
fn serialize_roundtrip() {
144+
let frame = AudioFrame {
145+
band_energies: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21],
146+
pvq_summary: [0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF],
147+
};
148+
let bytes = frame.to_bytes();
149+
let recovered = AudioFrame::from_bytes(&bytes);
150+
assert_eq!(frame, recovered);
151+
}
152+
}

0 commit comments

Comments
 (0)