@@ -420,4 +420,67 @@ mod tests {
420420 let spo = SpoDistanceMatrices :: build ( & pal, & pal, & pal) ;
421421 assert_eq ! ( spo. byte_size( ) , 3 * 32 * 32 * 2 ) ;
422422 }
423+
424+ #[ test]
425+ fn test_4096_head_spo_throughput ( ) {
426+ // Build 256-entry palette
427+ let pal = make_palette ( 256 ) ;
428+ let spo = SpoDistanceMatrices :: build ( & pal, & pal, & pal) ;
429+
430+ // 4096 heads = 64×64, each with S/P/O palette index
431+ let mut heads_s = [ 0u8 ; 4096 ] ;
432+ let mut heads_p = [ 0u8 ; 4096 ] ;
433+ let mut heads_o = [ 0u8 ; 4096 ] ;
434+ for i in 0 ..4096 {
435+ heads_s[ i] = ( i % 256 ) as u8 ;
436+ heads_p[ i] = ( ( i * 7 ) % 256 ) as u8 ;
437+ heads_o[ i] = ( ( i * 13 ) % 256 ) as u8 ;
438+ }
439+
440+ // Benchmark: 4096 × 64 SPO lookups (one row attending to 64 targets)
441+ let start = std:: time:: Instant :: now ( ) ;
442+ let mut total_dist = 0u64 ;
443+ let iterations = 100 ;
444+ for _ in 0 ..iterations {
445+ for row in 0 ..64 {
446+ for col in 0 ..64 {
447+ let i = row * 64 + col;
448+ for target in 0 ..64 {
449+ let j = row * 64 + target;
450+ total_dist += spo. spo_distance (
451+ heads_s[ i] , heads_p[ i] , heads_o[ i] ,
452+ heads_s[ j] , heads_p[ j] , heads_o[ j] ,
453+ ) as u64 ;
454+ }
455+ }
456+ }
457+ }
458+ let elapsed = start. elapsed ( ) ;
459+ let total_lookups = 64u64 * 64 * 64 * iterations as u64 ;
460+ let lookups_per_sec = total_lookups as f64 / elapsed. as_secs_f64 ( ) ;
461+ let ns_per_lookup = elapsed. as_nanos ( ) as f64 / total_lookups as f64 ;
462+
463+ // Pearl 2³: multiply by 8 projections
464+ let pearl_ns = ns_per_lookup * 8.0 / 3.0 ; // each projection uses 1-3 planes
465+ let tokens_per_sec_spo = 1e9 / ( ns_per_lookup * 64.0 * 64.0 ) ; // one token = full 64×64 pass
466+ let tokens_per_sec_pearl = 1e9 / ( pearl_ns * 64.0 * 64.0 ) ;
467+
468+ eprintln ! ( ) ;
469+ eprintln ! ( "═══ Qwen3.5 + Opus 4.6: 4096-Head SPO Benchmark ═══" ) ;
470+ eprintln ! ( " Palette: 256 entries, SPO matrices: {} KB" , spo. byte_size( ) / 1024 ) ;
471+ eprintln ! ( " Lookups: {} total ({} iterations × 64×64×64)" , total_lookups, iterations) ;
472+ eprintln ! ( " Time: {:.3}ms" , elapsed. as_secs_f64( ) * 1000.0 ) ;
473+ eprintln ! ( " Rate: {:.0} M lookups/sec" , lookups_per_sec / 1e6 ) ;
474+ eprintln ! ( " Latency: {:.1} ns/lookup (SPO, 3 planes)" , ns_per_lookup) ;
475+ eprintln ! ( " Pearl: {:.1} ns/lookup (8 projections avg)" , pearl_ns) ;
476+ eprintln ! ( ) ;
477+ eprintln ! ( " Token throughput:" ) ;
478+ eprintln ! ( " SPO only: {:.0} tokens/sec (64×64 attention per token)" , tokens_per_sec_spo) ;
479+ eprintln ! ( " Pearl 2³: {:.0} tokens/sec (8 projections per head)" , tokens_per_sec_pearl) ;
480+ eprintln ! ( " Triple model: {:.0} tokens/sec (self+user+impact)" , tokens_per_sec_pearl / 3.0 ) ;
481+ eprintln ! ( ) ;
482+ eprintln ! ( " Memory: {} KB SPO tables + 4 KB head indices = {} KB total" ,
483+ spo. byte_size( ) / 1024 , spo. byte_size( ) / 1024 + 4 ) ;
484+ eprintln ! ( " (blackhole: {})" , total_dist) ; // prevent optimizer from eliding
485+ }
423486}
0 commit comments