@@ -718,4 +718,114 @@ mod tests {
718718 // Other layers untouched
719719 assert_eq ! ( p3d. layers[ 1 ] . rows[ 1 ] , 0 ) ;
720720 }
721+
722+ // ================================================================
723+ // GPT-2 → P64 rehydration: prove attention reconstruction works
724+ // ================================================================
725+
726+ #[ test]
727+ fn test_gpt2_palette_to_p64_rehydration ( ) {
728+ use crate :: hpc:: jina:: runtime:: GPT2 ;
729+
730+ let gpt2 = & * GPT2 ;
731+ eprintln ! ( "GPT-2 vocab: {} tokens" , gpt2. vocab_size( ) ) ;
732+
733+ // Flatten the 256×256 distance table
734+ let dt = & gpt2. palette . distance_table ;
735+ let mut flat = vec ! [ 0u16 ; 256 * 256 ] ;
736+ for i in 0 ..256 {
737+ for j in 0 ..256 {
738+ flat[ i * 256 + j] = dt[ i] [ j] ;
739+ }
740+ }
741+
742+ // Find a reasonable interaction radius from the distance distribution
743+ // Use median distance as threshold — roughly 50% density
744+ let mut all_dists: Vec < u16 > = Vec :: with_capacity ( 256 * 255 / 2 ) ;
745+ for i in 0 ..256 {
746+ for j in ( i + 1 ) ..256 {
747+ all_dists. push ( dt[ i] [ j] ) ;
748+ }
749+ }
750+ all_dists. sort ( ) ;
751+ let median = all_dists[ all_dists. len ( ) / 2 ] ;
752+ // Use 25th percentile for sparse palette (~12.5% density)
753+ let p25 = all_dists[ all_dists. len ( ) / 4 ] ;
754+ eprintln ! ( "Distance stats: median={}, p25={}, min={}, max={}" ,
755+ median, p25, all_dists[ 0 ] , all_dists. last( ) . unwrap( ) ) ;
756+
757+ // Build Palette64 from GPT-2's learned distance table
758+ let palette = palette_from_deepnsm_distances ( & flat, 256 , p25) ;
759+
760+ // Check it's not empty or full
761+ let density: u32 = palette. rows . iter ( ) . map ( |r| r. count_ones ( ) ) . sum ( ) ;
762+ let total_bits = 64 * 64 ;
763+ let pct = density as f64 / total_bits as f64 * 100.0 ;
764+ eprintln ! ( "Palette density: {}/{} bits ({:.1}%)" , density, total_bits, pct) ;
765+ assert ! ( density > 100 , "palette too sparse: {density}" ) ;
766+ assert ! ( density < 3500 , "palette too dense: {density}" ) ;
767+
768+ // Build Palette3D — same topology for all layers (GPT-2 is one model)
769+ let mut p3d_analytical = Palette3D :: new ( [ palette; 8 ] , ThinkingStyle :: ANALYTICAL ) ;
770+ let mut p3d_creative = Palette3D :: new ( [ palette; 8 ] , ThinkingStyle :: CREATIVE ) ;
771+
772+ // Infer from archetype 42 through both styles
773+ let r_analytical = p3d_analytical. infer ( 42 ) ;
774+ let r_creative = p3d_creative. infer ( 42 ) ;
775+
776+ eprintln ! ( "Analytical: attention={:064b}, tension={}, active_layers={}, new={}" ,
777+ r_analytical. attention, r_analytical. tension,
778+ r_analytical. active_layers, r_analytical. new_connections) ;
779+ eprintln ! ( "Creative: attention={:064b}, tension={}, active_layers={}, new={}" ,
780+ r_creative. attention, r_creative. tension,
781+ r_creative. active_layers, r_creative. new_connections) ;
782+
783+ // KEY ASSERTION: different styles produce different fan-out
784+ // Creative (Union, all layers, density 0.40) should activate MORE targets
785+ // than Analytical (Intersection, 6 layers, density 0.05)
786+ let analytical_popcount = r_analytical. attention . count_ones ( ) ;
787+ let creative_popcount = r_creative. attention . count_ones ( ) ;
788+ eprintln ! ( "Fan-out: analytical={}, creative={}" , analytical_popcount, creative_popcount) ;
789+
790+ assert ! ( creative_popcount >= analytical_popcount,
791+ "Creative should have wider fan-out than Analytical: {} vs {}" ,
792+ creative_popcount, analytical_popcount) ;
793+
794+ // Verify attention is non-trivial
795+ assert ! ( analytical_popcount > 0 , "Analytical should fire something" ) ;
796+ assert ! ( creative_popcount > 0 , "Creative should fire something" ) ;
797+
798+ // Check that the palette-based similarity correlates with GPT-2's actual distances
799+ // Pick two tokens that the palette says interact (bit set) and two that don't
800+ let mut interacting = None ;
801+ let mut non_interacting = None ;
802+ for i in 0 ..64 {
803+ for j in 0 ..64 {
804+ if i == j { continue ; }
805+ if palette. rows [ i] & ( 1 << j) != 0 && interacting. is_none ( ) {
806+ interacting = Some ( ( i, j) ) ;
807+ }
808+ if palette. rows [ i] & ( 1 << j) == 0 && non_interacting. is_none ( ) {
809+ non_interacting = Some ( ( i, j) ) ;
810+ }
811+ if interacting. is_some ( ) && non_interacting. is_some ( ) { break ; }
812+ }
813+ if interacting. is_some ( ) && non_interacting. is_some ( ) { break ; }
814+ }
815+
816+ if let ( Some ( ( ia, ib) ) , Some ( ( na, nb) ) ) = ( interacting, non_interacting) {
817+ // Interacting pair should have LOWER distance than non-interacting
818+ let d_interact = flat[ ia * 256 + ib] ;
819+ let d_non = flat[ na * 256 + nb] ;
820+ eprintln ! ( "Interacting ({},{}) distance={}, Non-interacting ({},{}) distance={}" ,
821+ ia, ib, d_interact, na, nb, d_non) ;
822+ assert ! ( d_interact <= d_non,
823+ "Interacting pair should be closer: {} vs {}" , d_interact, d_non) ;
824+ }
825+
826+ eprintln ! ( "GPT-2 → P64 rehydration: PASS" ) ;
827+ eprintln ! ( " 50K tokens → 256 archetypes → 64×64 palette → 8-layer Palette3D" ) ;
828+ eprintln ! ( " Thinking style modulates fan-out: Analytical={}, Creative={}" ,
829+ analytical_popcount, creative_popcount) ;
830+ }
721831}
0 commit comments