Skip to content

Commit 43cfad0

Browse files
authored
feat: safetensors support — BF16 streaming indexer (#60)
* feat: safetensors header parser + streaming indexer Parses the safetensors JSON header (no serde dependency) and produces GgufFile-compatible types so stream_index_gguf_bf16_with_header works unchanged on safetensors files. Safetensors stores full BF16 weights — no quantization noise. For the reasoning diff pipeline, BF16→Base17 gives cleaner fingerprints than Q8_0→f32→Base17. Includes test_stream_index_qwen35_safetensors for 11-shard Qwen3.5-27B indexing at full BF16 precision. * mod: register safetensors module * refactor: extract stream_index_gguf_bf16_with_header for format-agnostic indexing Splits stream_index_gguf_bf16 into: - stream_index_gguf_bf16(): parses GGUF header, delegates to _with_header - stream_index_gguf_bf16_with_header(): the core loop, works with any pre-parsed header (GGUF or safetensors) No behavior change for existing callers.
1 parent 073fb0b commit 43cfad0

3 files changed

Lines changed: 439 additions & 6 deletions

File tree

src/hpc/gguf_indexer.rs

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -519,19 +519,34 @@ pub fn stream_index_gguf_bf16<R: Read + Seek, W: Write>(
519519
octave_stride: usize,
520520
callback: Option<&dyn Fn(&str, &LayerType, usize, usize)>,
521521
) -> Result<IndexStats, String> {
522-
let gguf_header = gguf::read_gguf_header(reader)?;
522+
let header = gguf::read_gguf_header(reader)?;
523+
stream_index_gguf_bf16_with_header(reader, writer, &header, octave_stride, callback)
524+
}
525+
526+
/// Core BF16-direct indexer — works with any pre-parsed header (GGUF or safetensors).
527+
///
528+
/// The header must have:
529+
/// - `tensor_data_offset`: absolute byte offset where tensor data starts
530+
/// - `tensors`: Vec<TensorInfo> with name, dimensions, dtype, offset (relative to data start)
531+
pub fn stream_index_gguf_bf16_with_header<R: Read + Seek, W: Write>(
532+
reader: &mut R,
533+
writer: &mut W,
534+
header: &gguf::GgufFile,
535+
octave_stride: usize,
536+
callback: Option<&dyn Fn(&str, &LayerType, usize, usize)>,
537+
) -> Result<IndexStats, String> {
523538
let mut stats = IndexStats::default();
524-
stats.tensors_total = gguf_header.tensors.len();
539+
stats.tensors_total = header.tensors.len();
525540

526541
writer.write_all(b"BGZ7").map_err(|e| e.to_string())?;
527-
writer.write_all(&(gguf_header.tensors.len() as u32).to_le_bytes()).map_err(|e| e.to_string())?;
542+
writer.write_all(&(header.tensors.len() as u32).to_le_bytes()).map_err(|e| e.to_string())?;
528543

529544
// Reusable buffer — capped at 128 MB (64M u16 elements).
530545
// Tensors larger than this are read in row batches.
531546
const MAX_BUF_ELEMS: usize = 64 * 1024 * 1024; // 128 MB of u16
532547
let mut bf16_buf: Vec<u16> = Vec::new();
533548

534-
for tensor in &gguf_header.tensors {
549+
for tensor in &header.tensors {
535550
let layer_type = classify_tensor(&tensor.name, &tensor.dimensions);
536551

537552
if matches!(layer_type, LayerType::Skip | LayerType::Norm) {
@@ -559,7 +574,7 @@ pub fn stream_index_gguf_bf16<R: Read + Seek, W: Write>(
559574
}
560575

561576
// Seek to tensor start
562-
let abs_offset = gguf_header.tensor_data_offset + tensor.offset;
577+
let abs_offset = header.tensor_data_offset + tensor.offset;
563578
reader.seek(std::io::SeekFrom::Start(abs_offset)).map_err(|e| e.to_string())?;
564579

565580
let mut rows: Vec<Base17> = Vec::with_capacity(n_rows);
@@ -636,7 +651,7 @@ pub fn stream_index_gguf_bf16<R: Read + Seek, W: Write>(
636651
}
637652
} else {
638653
// FALLBACK: non-BF16 — use original f32 path
639-
let data = gguf::read_tensor_f32(reader, &gguf_header, tensor)?;
654+
let data = gguf::read_tensor_f32(reader, &header, tensor)?;
640655
let tensor_bytes = data.len() as u64 * 4;
641656
if tensor_bytes > stats.peak_tensor_bytes {
642657
stats.peak_tensor_bytes = tensor_bytes;

src/hpc/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,10 @@ pub mod gguf;
172172
#[allow(missing_docs)]
173173
pub mod gguf_indexer;
174174

175+
/// Safetensors header parser + streaming indexer for BF16 model weights.
176+
#[allow(missing_docs)]
177+
pub mod safetensors;
178+
175179
/// HTTP range reader — Read + Seek over HTTP for streaming GGUF from HuggingFace.
176180
#[allow(missing_docs)]
177181
pub mod http_reader;

0 commit comments

Comments
 (0)