@@ -574,4 +574,126 @@ mod tests {
574574 assert ! ( stats. tensors_indexed > 0 , "should index at least some tensors" ) ;
575575 assert ! ( stats. overall_ratio( ) > 10.0 , "ratio should be significant: {:.1}" , stats. overall_ratio( ) ) ;
576576 }
577+
578+ #[ test]
579+ #[ ignore] // Streams from HuggingFace — requires network + time
580+ fn test_stream_index_llama4_scout_from_hf ( ) {
581+ use super :: super :: http_reader:: { HttpRangeReader , resolve_hf_url} ;
582+ use std:: io:: BufWriter ;
583+
584+ let repo = "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF" ;
585+ let filename = "Llama-4-Scout-17B-16E-Instruct-UD-IQ1_S.gguf" ;
586+
587+ eprintln ! ( "Resolving {} / {} ..." , repo, filename) ;
588+ let ( url, size) = match resolve_hf_url ( repo, filename) {
589+ Ok ( r) => r,
590+ Err ( e) => { eprintln ! ( "SKIP: {}" , e) ; return ; }
591+ } ;
592+ eprintln ! ( " URL resolved, size: {:.2} GB" , size as f64 / 1e9 ) ;
593+
594+ let mut reader = HttpRangeReader :: with_chunk_size ( url, size, 256 * 1024 * 1024 ) ; // 16 MB chunks
595+
596+ let out_path = "/tmp/llama4_scout.bgz7" ;
597+ let out = std:: fs:: File :: create ( out_path) . expect ( "create output" ) ;
598+ let mut writer = BufWriter :: new ( out) ;
599+
600+ eprintln ! ( "Streaming index..." ) ;
601+ let stats = stream_index_gguf (
602+ & mut reader,
603+ & mut writer,
604+ Some ( & |name, layer_type, orig, comp| {
605+ let ratio = if comp > 0 { orig as f64 / comp as f64 } else { 0.0 } ;
606+ eprintln ! ( " {:60} {:12?} {:>12} → {:>8} ({:.0}×)" ,
607+ name, layer_type, orig, comp, ratio) ;
608+ } ) ,
609+ ) . expect ( "stream_index_gguf" ) ;
610+
611+ drop ( writer) ;
612+ let out_size = std:: fs:: metadata ( out_path) . map ( |m| m. len ( ) ) . unwrap_or ( 0 ) ;
613+
614+ eprintln ! ( ) ;
615+ eprintln ! ( "=== Llama 4 Scout → bgz17 (streamed from HF) ===" ) ;
616+ eprintln ! ( " Source: {:.2} GB ({})" , size as f64 / 1e9 , filename) ;
617+ eprintln ! ( " Output: {:.2} MB ({})" , out_size as f64 / 1e6 , out_path) ;
618+ eprintln ! ( " Downloaded: {:.2} GB" , reader. bytes_downloaded( ) as f64 / 1e9 ) ;
619+ eprintln ! ( " Tensors: {} indexed, {} skipped" ,
620+ stats. tensors_indexed, stats. tensors_skipped) ;
621+ eprintln ! ( " Original (f32): {:.2} GB" , stats. original_bytes as f64 / 1e9 ) ;
622+ eprintln ! ( " Compressed: {:.2} MB" , stats. compressed_bytes as f64 / 1e6 ) ;
623+ eprintln ! ( " Ratio: {:.1}×" , stats. overall_ratio( ) ) ;
624+ eprintln ! ( " Peak tensor: {:.2} MB" , stats. peak_tensor_bytes as f64 / 1e6 ) ;
625+
626+ let type_names = [ "Attention" , "FeedForward" , "Conv2D" , "Norm" , "Embedding" , "Skip" ] ;
627+ for ( i, name) in type_names. iter ( ) . enumerate ( ) {
628+ let ( count, orig, comp) = stats. by_type [ i] ;
629+ if count > 0 {
630+ let ratio = if comp > 0 { orig as f64 / comp as f64 } else { 0.0 } ;
631+ eprintln ! ( " {:<12} {:>3} tensors: {:>10.2} GB → {:>8.2} MB ({:.1}×)" ,
632+ name, count, orig as f64 / 1e9 , comp as f64 / 1e6 , ratio) ;
633+ }
634+ }
635+
636+ assert ! ( stats. tensors_indexed > 0 ) ;
637+ }
638+
639+ #[ test]
640+ #[ ignore] // Streams BF16 shard 5 (18.2 GB) from HuggingFace
641+ fn test_stream_index_llama4_bf16_shard5 ( ) {
642+ use super :: super :: http_reader:: HttpRangeReader ;
643+ use std:: io:: BufWriter ;
644+
645+ let repo = "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF" ;
646+ let filename = "BF16/Llama-4-Scout-17B-16E-Instruct-BF16-00005-of-00005.gguf" ;
647+ let size: u64 = 18_220_000_000 ; // ~18.2 GB from metadata
648+
649+ let url = format ! ( "https://huggingface.co/{}/resolve/main/{}" , repo, filename) ;
650+ eprintln ! ( "Streaming shard 5: {:.2} GB" , size as f64 / 1e9 ) ;
651+ eprintln ! ( " URL: {}" , url) ;
652+
653+ // 16 MB chunks for fewer HTTP round-trips
654+ let mut reader = HttpRangeReader :: with_chunk_size ( url, size, 256 * 1024 * 1024 ) ;
655+
656+ let out_path = "/tmp/llama4_scout_shard5.bgz7" ;
657+ let out = std:: fs:: File :: create ( out_path) . expect ( "create output" ) ;
658+ let mut writer = BufWriter :: new ( out) ;
659+
660+ let stats = stream_index_gguf (
661+ & mut reader,
662+ & mut writer,
663+ Some ( & |name, layer_type, orig, comp| {
664+ let ratio = if comp > 0 { orig as f64 / comp as f64 } else { 0.0 } ;
665+ eprintln ! ( " {:60} {:12?} {:>12} → {:>8} ({:.0}×)" ,
666+ name, layer_type, orig, comp, ratio) ;
667+ } ) ,
668+ ) . expect ( "stream_index_gguf" ) ;
669+
670+ drop ( writer) ;
671+ let out_size = std:: fs:: metadata ( out_path) . map ( |m| m. len ( ) ) . unwrap_or ( 0 ) ;
672+
673+ eprintln ! ( ) ;
674+ eprintln ! ( "=== Llama 4 Scout BF16 Shard 5 → bgz17 ===" ) ;
675+ eprintln ! ( " Source: {:.2} GB (BF16, streamed from HF)" , size as f64 / 1e9 ) ;
676+ eprintln ! ( " Output: {:.2} MB" , out_size as f64 / 1e6 ) ;
677+ eprintln ! ( " Downloaded: {:.2} GB" , reader. bytes_downloaded( ) as f64 / 1e9 ) ;
678+ eprintln ! ( " Tensors: {} indexed, {} skipped" ,
679+ stats. tensors_indexed, stats. tensors_skipped) ;
680+ eprintln ! ( " Original (f32): {:.2} GB" , stats. original_bytes as f64 / 1e9 ) ;
681+ eprintln ! ( " Compressed: {:.2} MB" , stats. compressed_bytes as f64 / 1e6 ) ;
682+ eprintln ! ( " Ratio: {:.1}×" , stats. overall_ratio( ) ) ;
683+ eprintln ! ( " Peak tensor: {:.2} MB" , stats. peak_tensor_bytes as f64 / 1e6 ) ;
684+
685+ let type_names = [ "Attention" , "FeedForward" , "Conv2D" , "Norm" , "Embedding" , "Skip" ] ;
686+ for ( i, name) in type_names. iter ( ) . enumerate ( ) {
687+ let ( count, orig, comp) = stats. by_type [ i] ;
688+ if count > 0 {
689+ let ratio = if comp > 0 { orig as f64 / comp as f64 } else { 0.0 } ;
690+ eprintln ! ( " {:<12} {:>3} tensors: {:>10.2} GB → {:>8.2} MB ({:.1}×)" ,
691+ name, count, orig as f64 / 1e9 , comp as f64 / 1e6 , ratio) ;
692+ }
693+ }
694+
695+ assert ! ( stats. tensors_indexed > 0 ) ;
696+ // BF16 dequant to f32 doubles the size, so original_bytes > source size
697+ assert ! ( stats. original_bytes > 0 ) ;
698+ }
577699}
0 commit comments