diff --git a/crates/iceberg/src/puffin/metadata.rs b/crates/iceberg/src/puffin/metadata.rs index e2dfc10c23..6b3d3703de 100644 --- a/crates/iceberg/src/puffin/metadata.rs +++ b/crates/iceberg/src/puffin/metadata.rs @@ -324,7 +324,11 @@ impl FileMetadata { return FileMetadata::read(input_file).await; } - // Read footer based on prefetchi hint + // Validate file header magic + let first_four_bytes = file_read.read(0..FileMetadata::MAGIC_LENGTH.into()).await?; + FileMetadata::check_magic(&first_four_bytes)?; + + // Read footer based on prefetch hint let start = input_file_length - prefetch_hint as u64; let end = input_file_length; let footer_bytes = file_read.read(start..end).await?; @@ -958,6 +962,33 @@ mod tests { assert_eq!(file_metadata, zstd_compressed_metric_file_metadata()); } + #[tokio::test] + async fn test_read_with_incorrect_header_magic() { + let temp_dir = TempDir::new().unwrap(); + + let prefetch_hint: u8 = 64; + let mut bytes = vec![]; + // Invalid header magic + bytes.extend([0x00, 0x00, 0x00, 0x00]); + // Intentionally keep file size larger than prefetch_hint. + bytes.extend(vec![0u8; prefetch_hint as usize]); + // Valid footer: magic + payload + footer struct + bytes.extend(FileMetadata::MAGIC); + bytes.extend(empty_footer_payload_bytes()); + bytes.extend(empty_footer_payload_bytes_length_bytes()); + bytes.extend(vec![0, 0, 0, 0]); // flags + bytes.extend(FileMetadata::MAGIC); + + let input_file = input_file_with_bytes(&temp_dir, &bytes).await; + + assert!(FileMetadata::read(&input_file).await.is_err(),); + assert!( + FileMetadata::read_with_prefetch(&input_file, prefetch_hint) + .await + .is_err(), + ); + } + #[tokio::test] async fn test_gzip_compression_allowed_in_metadata() { let temp_dir = TempDir::new().unwrap();