From 281a5157a8f36ee90b6afea033b5f4a329ca48d4 Mon Sep 17 00:00:00 2001
From: Madhava Jay <me@madhavajay.com>
Date: Mon, 11 May 2026 20:07:04 +1000
Subject: [PATCH 1/2] fixing issues with wasm reporting

---
 rust/bioscript-formats/src/inspect.rs         |   5 +-
 rust/bioscript-formats/src/inspect/sex.rs     |  16 +-
 .../src/inspect/sex/alignment_depth.rs        |  76 +++++++-
 rust/bioscript-formats/src/lib.rs             |   3 +-
 rust/bioscript-wasm/src/lookup_api.rs         |   2 +
 rust/bioscript-wasm/src/report_api.rs         |  71 ++++++-
 .../src/report_input_inspection.rs            | 177 ++++++++++++++++++
 .../src/report_workspace/analysis.rs          |   2 +-
 8 files changed, 328 insertions(+), 24 deletions(-)
 create mode 100644 rust/bioscript-wasm/src/report_input_inspection.rs

diff --git a/rust/bioscript-formats/src/inspect.rs b/rust/bioscript-formats/src/inspect.rs
index b19ef17..f282fe0 100644
--- a/rust/bioscript-formats/src/inspect.rs
+++ b/rust/bioscript-formats/src/inspect.rs
@@ -40,7 +40,10 @@ pub(crate) use heuristics::*;
 pub(crate) use io::*;
 #[cfg(test)]
 pub(crate) use render::*;
-pub use sex::{InferredSex, SexDetectionConfidence, SexInference};
+pub use sex::{
+    InferredSex, SexDetectionConfidence, SexInference, infer_sex_from_alignment_reader,
+    infer_sex_from_text_lines,
+};
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum FileContainer {
diff --git a/rust/bioscript-formats/src/inspect/sex.rs b/rust/bioscript-formats/src/inspect/sex.rs
index 9073355..4a26f50 100644
--- a/rust/bioscript-formats/src/inspect/sex.rs
+++ b/rust/bioscript-formats/src/inspect/sex.rs
@@ -14,6 +14,8 @@ use super::{DetectedKind, InspectOptions};
 mod alignment_depth;
 mod classify;
 
+pub use alignment_depth::infer_sex_from_alignment_reader;
+
 pub(crate) use alignment_depth::infer_sex_from_alignment_path;
 use classify::{classify_stats, supports_sex_detection, unsupported_sex_inference};
 
@@ -166,7 +168,7 @@ pub(crate) fn infer_sex_from_zip_bytes(
     infer_sex_from_bytes(selected_entry, &entry_bytes, kind)
 }
 
-pub(crate) fn infer_sex_from_text_lines(
+pub fn infer_sex_from_text_lines(
     lines: &[String],
     kind: DetectedKind,
 ) -> Result<SexInference, RuntimeError> {
@@ -194,11 +196,13 @@ fn infer_sex_from_reader<R: BufRead>(
     let mut stats = SexStats::default();
     let mut probe_lines = Vec::new();
     let mut line = String::new();
+    // Treat any I/O error mid-stream (e.g. truncated bgzf head when the
+    // wasm caller only loaded the first N MiB) as end-of-data: classify
+    // whatever we got rather than failing the whole inspection. The CLI
+    // path streams the full file so this is effectively unchanged for it.
     for _ in 0..64 {
         line.clear();
-        let bytes = reader
-            .read_line(&mut line)
-            .map_err(|err| RuntimeError::Io(format!("failed to scan sex markers: {err}")))?;
+        let bytes = reader.read_line(&mut line).unwrap_or_default();
         if bytes == 0 {
             let delimiter = detect_delimiter(&probe_lines);
             let mut column_indexes = None;
@@ -232,9 +236,7 @@ fn infer_sex_from_reader<R: BufRead>(
     }
     for _ in probe_lines.len()..MAX_SEX_DETECTION_LINES {
         line.clear();
-        let bytes = reader
-            .read_line(&mut line)
-            .map_err(|err| RuntimeError::Io(format!("failed to scan sex markers: {err}")))?;
+        let bytes = reader.read_line(&mut line).unwrap_or_default();
         if bytes == 0 {
             break;
         }
diff --git a/rust/bioscript-formats/src/inspect/sex/alignment_depth.rs b/rust/bioscript-formats/src/inspect/sex/alignment_depth.rs
index d90d49d..74954ad 100644
--- a/rust/bioscript-formats/src/inspect/sex/alignment_depth.rs
+++ b/rust/bioscript-formats/src/inspect/sex/alignment_depth.rs
@@ -70,7 +70,31 @@ pub(crate) fn infer_sex_from_alignment_path(
         ..GenotypeLoadOptions::default()
     };
 
-    let stats = sample_alignment_sex_windows(path, &load_options, reference_file)?;
+    let stats = sample_alignment_sex_windows_from_path(path, &load_options, reference_file)?;
+    let mut inference = classify_alignment_stats(&stats);
+    if options.input_index.is_none() {
+        inference
+            .evidence
+            .push("CRAM sex detection ran without explicit --input-index".to_owned());
+    }
+    Ok(inference)
+}
+
+/// Reader-based equivalent of `infer_sex_from_alignment_path`. Wasm callers
+/// build the `IndexedReader` from JS-supplied `readAt` callbacks; this lets
+/// them invoke the same Y/X-vs-autosome coverage analysis the CLI uses
+/// without ever touching `std::fs`.
+pub fn infer_sex_from_alignment_reader<R: Read + std::io::Seek>(
+    reader: &mut noodles::cram::io::indexed_reader::IndexedReader<R>,
+    label: &str,
+    allow_reference_md5_mismatch: bool,
+) -> Result<SexInference, RuntimeError> {
+    let stats =
+        sample_alignment_sex_windows_with_reader(reader, label, allow_reference_md5_mismatch)?;
+    Ok(classify_alignment_stats(&stats))
+}
+
+fn classify_alignment_stats(stats: &AlignmentSexStats) -> SexInference {
     let autosome_mean = mean_records(stats.autosome_records, stats.autosome_windows);
     let x_mean = mean_records(stats.x_records, stats.x_windows);
     let y_mean = mean_records(stats.y_records, stats.y_windows);
@@ -91,7 +115,7 @@ pub(crate) fn infer_sex_from_alignment_path(
         (InferredSex::Unknown, SexDetectionConfidence::Low)
     };
 
-    let mut evidence = vec![
+    let evidence = vec![
         format!("autosome_windows={}", stats.autosome_windows),
         format!("autosome_records={}", stats.autosome_records),
         format!("x_windows={}", stats.x_windows),
@@ -104,19 +128,55 @@ pub(crate) fn infer_sex_from_alignment_path(
         format!("x_to_autosome_ratio={x_ratio:.3}"),
         format!("y_to_autosome_ratio={y_ratio:.3}"),
     ];
-    if options.input_index.is_none() {
-        evidence.push("CRAM sex detection ran without explicit --input-index".to_owned());
-    }
 
-    Ok(SexInference {
+    SexInference {
         sex,
         confidence,
         method: "alignment_autosome_x_y_depth_ratio".to_owned(),
         evidence,
-    })
+    }
+}
+
+fn sample_alignment_sex_windows_with_reader<R: Read + std::io::Seek>(
+    reader: &mut noodles::cram::io::indexed_reader::IndexedReader<R>,
+    label: &str,
+    allow_reference_md5_mismatch: bool,
+) -> Result<AlignmentSexStats, RuntimeError> {
+    let mut stats = AlignmentSexStats::default();
+    for (chrom, center) in ALIGNMENT_AUTOSOME_WINDOWS {
+        stats.autosome_records += count_alignment_records_in_window(
+            reader,
+            label,
+            chrom,
+            *center,
+            allow_reference_md5_mismatch,
+        )?;
+        stats.autosome_windows += 1;
+    }
+    for center in ALIGNMENT_X_NON_PAR_WINDOWS {
+        stats.x_records += count_alignment_records_in_window(
+            reader,
+            label,
+            "X",
+            *center,
+            allow_reference_md5_mismatch,
+        )?;
+        stats.x_windows += 1;
+    }
+    for center in ALIGNMENT_Y_WINDOWS {
+        stats.y_records += count_alignment_records_in_window(
+            reader,
+            label,
+            "Y",
+            *center,
+            allow_reference_md5_mismatch,
+        )?;
+        stats.y_windows += 1;
+    }
+    Ok(stats)
 }
 
-fn sample_alignment_sex_windows(
+fn sample_alignment_sex_windows_from_path(
     path: &Path,
     options: &GenotypeLoadOptions,
     reference_file: &Path,
diff --git a/rust/bioscript-formats/src/lib.rs b/rust/bioscript-formats/src/lib.rs
index b01d3b7..b349f43 100644
--- a/rust/bioscript-formats/src/lib.rs
+++ b/rust/bioscript-formats/src/lib.rs
@@ -20,6 +20,7 @@ pub use genotype::{
 };
 pub use inspect::{
     DetectedKind, DetectionConfidence, FileContainer, FileInspection, InferredSex, InspectOptions,
-    SexDetectionConfidence, SexInference, SourceMetadata, inspect_bytes, inspect_file,
+    SexDetectionConfidence, SexInference, SourceMetadata, infer_sex_from_alignment_reader,
+    infer_sex_from_text_lines, inspect_bytes, inspect_file,
 };
 pub use prepare::{PrepareRequest, PreparedPaths, prepare_indexes, shell_flags};
diff --git a/rust/bioscript-wasm/src/lookup_api.rs b/rust/bioscript-wasm/src/lookup_api.rs
index a52297b..1aa133c 100644
--- a/rust/bioscript-wasm/src/lookup_api.rs
+++ b/rust/bioscript-wasm/src/lookup_api.rs
@@ -285,6 +285,8 @@ fn variant_input_to_spec(variant: &VariantInput) -> Result<VariantSpec, JsError>
         } else {
             None
         },
+        grch37_assembly_ref: None,
+        grch38_assembly_ref: None,
         reference: Some(variant.ref_base.clone()),
         alternate: Some(variant.alt_base.clone()),
         kind,
diff --git a/rust/bioscript-wasm/src/report_api.rs b/rust/bioscript-wasm/src/report_api.rs
index 2c33a32..b6aaad1 100644
--- a/rust/bioscript-wasm/src/report_api.rs
+++ b/rust/bioscript-wasm/src/report_api.rs
@@ -10,8 +10,7 @@ use bioscript_core::{
     VariantSpec,
 };
 use bioscript_formats::{
-    GenotypeLoadOptions, GenotypeStore, InferredSex, InspectOptions, SexDetectionConfidence,
-    SexInference, inspect_bytes as inspect_bytes_rs,
+    GenotypeLoadOptions, GenotypeStore, InspectOptions, inspect_bytes as inspect_bytes_rs,
 };
 use bioscript_runtime::{BioscriptRuntime, RuntimeConfig};
 use bioscript_schema::{
@@ -24,6 +23,8 @@ use wasm_bindgen::prelude::*;
 
 #[path = "report_helpers.rs"]
 mod report_helpers;
+#[path = "report_input_inspection.rs"]
+mod report_input_inspection;
 #[path = "report_lookup.rs"]
 mod report_lookup;
 #[path = "report_render.rs"]
@@ -32,6 +33,10 @@ mod report_render;
 mod report_workspace;
 
 use report_helpers::*;
+use report_input_inspection::{
+    decompress_vcf_head_lines, explicit_sex_from_options, inspect_head_via_js_reader,
+    vcf_sex_via_tabix,
+};
 use report_lookup::{CramReportLookup, VcfReportLookup};
 use report_render::{
     AppReportJsonInput, app_report_json, match_app_findings, render_app_html_document,
@@ -48,7 +53,7 @@ include!("../../bioscript-cli/src/report_html_helpers.rs");
 
 #[derive(Deserialize)]
 #[serde(rename_all = "camelCase")]
-struct PackageFileInput {
+pub(super) struct PackageFileInput {
     path: String,
     contents: String,
     #[serde(default)]
@@ -57,13 +62,18 @@ struct PackageFileInput {
 
 #[derive(Default, Deserialize)]
 #[serde(rename_all = "camelCase")]
-struct ReportOptionsInput {
+pub(super) struct ReportOptionsInput {
     #[serde(default = "default_analysis_max_duration_ms")]
     analysis_max_duration_ms: u64,
     #[serde(default)]
     detect_sex: bool,
     #[serde(default)]
     filters: Vec<String>,
+    /// Optional explicit sample sex (mirrors the CLI's `--sample-sex` flag).
+    /// When set, takes precedence over inference: the report carries
+    /// `method=explicit_sample_sex` like the CLI.
+    #[serde(default)]
+    sample_sex: Option<String>,
 }
 
 #[derive(Serialize)]
@@ -219,6 +229,12 @@ pub fn run_package_report_from_cram(
     let manifest_metadata = workspace.report_manifest_metadata(manifest_path)?;
     let findings = workspace.load_manifest_findings(manifest_path)?;
     let provenance = workspace.load_manifest_provenance_links(manifest_path)?;
+    let mut head_inspection = inspect_head_via_js_reader(
+        &cram_read_at,
+        cram_len as u64,
+        input_name,
+        false, // sex detection runs separately below via the indexed reader
+    );
 
     let crai_index = bioscript_formats::alignment::parse_crai_bytes(crai_bytes)
         .map_err(|err| JsError::new(&format!("parse crai: {err:?}")))?;
@@ -242,6 +258,26 @@ pub fn run_package_report_from_cram(
         label: input_name.to_owned(),
     };
 
+    // CRAM sex detection: explicit override wins, otherwise alignment Y/X
+    // coverage analysis through the same reader the variant lookup will use.
+    if let Some(explicit) = explicit_sex_from_options(&options) {
+        head_inspection.inferred_sex = Some(explicit);
+    } else if options.detect_sex {
+        let mut reader_borrow = lookup.reader.borrow_mut();
+        match bioscript_formats::infer_sex_from_alignment_reader(
+            &mut reader_borrow,
+            &lookup.label,
+            true,
+        ) {
+            Ok(inference) => head_inspection.inferred_sex = Some(inference),
+            Err(err) => {
+                head_inspection
+                    .evidence
+                    .push(format!("alignment sex detection failed: {err:?}"));
+            }
+        }
+    }
+
     let mut loader = GenotypeLoadOptions::default();
     loader.format = Some(bioscript_formats::GenotypeSourceFormat::Cram);
     loader.allow_reference_md5_mismatch = true;
@@ -276,7 +312,7 @@ pub fn run_package_report_from_cram(
         analyses: &analyses,
         findings: &matched_findings,
         provenance: &provenance,
-        input_inspection: None,
+        input_inspection: Some(&head_inspection),
         manifest_metadata: &manifest_metadata,
     })];
     let observations_tsv = render_app_observations_tsv(&observations)?;
@@ -329,6 +365,20 @@ pub fn run_package_report_from_vcf(
     let manifest_metadata = workspace.report_manifest_metadata(manifest_path)?;
     let findings = workspace.load_manifest_findings(manifest_path)?;
     let provenance = workspace.load_manifest_provenance_links(manifest_path)?;
+    // Inspect format/source/assembly from the head, but skip the byte-stream
+    // sex detection — we'll do that via tabix-targeted X non-PAR queries
+    // below, which works on indexed VCFs of any size.
+    let mut head_inspection = inspect_head_via_js_reader(
+        &vcf_read_at,
+        vcf_len as u64,
+        input_name,
+        false,
+    );
+    // Decompress the head once to grab the VCF header lines (## meta + #CHROM
+    // column header) — these are needed by `infer_sex_from_text_lines` to
+    // figure out delimiter / column indexes for the data lines we'll pull
+    // via tabix below.
+    let head_lines = decompress_vcf_head_lines(&vcf_read_at, vcf_len as u64);
 
     let tabix_index = bioscript_formats::alignment::parse_tbi_bytes(tbi_bytes)
         .map_err(|err| JsError::new(&format!("parse tbi: {err:?}")))?;
@@ -340,6 +390,15 @@ pub fn run_package_report_from_vcf(
         label: input_name.to_owned(),
     };
 
+    if let Some(explicit) = explicit_sex_from_options(&options) {
+        head_inspection.inferred_sex = Some(explicit);
+    } else if options.detect_sex {
+        let mut reader_borrow = lookup.reader.borrow_mut();
+        if let Some(inference) = vcf_sex_via_tabix(&mut reader_borrow, &head_lines) {
+            head_inspection.inferred_sex = Some(inference);
+        }
+    }
+
     let mut loader = GenotypeLoadOptions::default();
     loader.format = Some(bioscript_formats::GenotypeSourceFormat::Vcf);
     let manifest_output =
@@ -372,7 +431,7 @@ pub fn run_package_report_from_vcf(
         analyses: &analyses,
         findings: &matched_findings,
         provenance: &provenance,
-        input_inspection: None,
+        input_inspection: Some(&head_inspection),
         manifest_metadata: &manifest_metadata,
     })];
     let observations_tsv = render_app_observations_tsv(&observations)?;
diff --git a/rust/bioscript-wasm/src/report_input_inspection.rs b/rust/bioscript-wasm/src/report_input_inspection.rs
new file mode 100644
index 0000000..b9656a3
--- /dev/null
+++ b/rust/bioscript-wasm/src/report_input_inspection.rs
@@ -0,0 +1,177 @@
+use super::ReportOptionsInput;
+
+/// Pull a head sample from the JS-backed reader and run it through the same
+/// `inspect_bytes_rs` path the text/zip flow uses. This populates the
+/// "Input" metadata block in the rust HTML report (Format / Source /
+/// Assembly / Inferred sex / Evidence) for CRAM and VCF inputs, which
+/// otherwise had no inspection data.
+pub(super) fn inspect_head_via_js_reader(
+    read_at: &js_sys::Function,
+    total_len: u64,
+    input_name: &str,
+    detect_sex: bool,
+) -> bioscript_formats::FileInspection {
+    use crate::js_reader::JsReader;
+    use std::io::Read;
+
+    // For VCF/CRAM, sex detection and full assembly inference require
+    // scanning many records. 8 MiB is enough for hundreds of decompressed
+    // VCF records and several CRAM containers.
+    let head_len = total_len.min(8 * 1024 * 1024);
+    let mut reader = JsReader::new(read_at.clone(), total_len, "inspect");
+    let mut buf = vec![0u8; head_len as usize];
+    let mut filled = 0usize;
+    while filled < buf.len() {
+        match reader.read(&mut buf[filled..]) {
+            Ok(0) => break,
+            Ok(n) => filled += n,
+            Err(_) => break,
+        }
+    }
+    buf.truncate(filled);
+    let opts = bioscript_formats::InspectOptions {
+        input_index: None,
+        reference_file: None,
+        reference_index: None,
+        detect_sex,
+    };
+    match bioscript_formats::inspect_bytes(input_name, &buf, &opts) {
+        Ok(inspection) => inspection,
+        Err(err) => bioscript_formats::FileInspection {
+            path: std::path::PathBuf::from(input_name),
+            container: bioscript_formats::FileContainer::Plain,
+            detected_kind: bioscript_formats::DetectedKind::Unknown,
+            confidence: bioscript_formats::DetectionConfidence::Unknown,
+            source: None,
+            assembly: None,
+            phased: None,
+            selected_entry: None,
+            has_index: None,
+            index_path: None,
+            reference_matches: None,
+            inferred_sex: None,
+            evidence: vec![format!("inspect_bytes failed: {err:?}")],
+            warnings: Vec::new(),
+            duration_ms: 0,
+        },
+    }
+}
+
+/// Build the `SexInference` the CLI produces when `--sample-sex` is passed,
+/// without dragging in the bioscript-cli crate. Mirrors
+/// `bioscript_cli::report_options::explicit_sample_sex_inference`.
+pub(super) fn explicit_sex_from_options(
+    options: &ReportOptionsInput,
+) -> Option<bioscript_formats::SexInference> {
+    let raw = options.sample_sex.as_deref()?.trim().to_ascii_lowercase();
+    let sex = match raw.as_str() {
+        "male" | "m" => bioscript_formats::InferredSex::Male,
+        "female" | "f" => bioscript_formats::InferredSex::Female,
+        "unknown" | "u" | "" => bioscript_formats::InferredSex::Unknown,
+        _ => return None,
+    };
+    Some(bioscript_formats::SexInference {
+        sex,
+        confidence: bioscript_formats::SexDetectionConfidence::High,
+        method: "explicit_sample_sex".to_owned(),
+        evidence: vec!["source=sample_sex_option".to_owned()],
+    })
+}
+
+const VCF_X_NON_PAR_WINDOWS_GRCH38: &[(i64, i64)] = &[
+    (10_000_000, 11_000_000),
+    (40_000_000, 41_000_000),
+    (70_000_000, 71_000_000),
+    (100_000_000, 101_000_000),
+    (130_000_000, 131_000_000),
+];
+const VCF_Y_WINDOWS_GRCH38: &[(i64, i64)] = &[
+    (3_500_000, 4_500_000),
+    (10_000_000, 11_000_000),
+    (15_000_000, 16_000_000),
+];
+
+/// Sex inference for indexed VCFs that streams only X non-PAR + Y windows
+/// instead of scanning the whole file. Reuses the shared
+/// `infer_sex_from_text_lines` so classification rules match the CLI.
+pub(super) fn vcf_sex_via_tabix<R: std::io::Read + std::io::Seek>(
+    reader: &mut noodles::csi::io::IndexedReader<noodles::bgzf::io::Reader<R>, noodles::tabix::Index>,
+    head_lines: &[String],
+) -> Option<bioscript_formats::SexInference> {
+    let mut lines = head_lines.to_vec();
+    for chrom_label in ["X", "chrX"] {
+        for (start, end) in VCF_X_NON_PAR_WINDOWS_GRCH38 {
+            let Some(region) = build_region(chrom_label, *start, *end) else {
+                continue;
+            };
+            if let Ok(query) = reader.query(&region) {
+                for record_result in query {
+                    let Ok(record) = record_result else {
+                        continue;
+                    };
+                    let line: &str = record.as_ref();
+                    lines.push(line.to_owned());
+                }
+            }
+        }
+        for (start, end) in VCF_Y_WINDOWS_GRCH38 {
+            let y_label = if chrom_label == "X" { "Y" } else { "chrY" };
+            let Some(region) = build_region(y_label, *start, *end) else {
+                continue;
+            };
+            if let Ok(query) = reader.query(&region) {
+                for record_result in query {
+                    let Ok(record) = record_result else {
+                        continue;
+                    };
+                    let line: &str = record.as_ref();
+                    lines.push(line.to_owned());
+                }
+            }
+        }
+    }
+    bioscript_formats::infer_sex_from_text_lines(&lines, bioscript_formats::DetectedKind::Vcf).ok()
+}
+
+fn build_region(chrom: &str, start: i64, end: i64) -> Option<noodles::core::Region> {
+    use noodles::core::{Position, Region};
+    let s = Position::try_from(usize::try_from(start.max(1)).ok()?).ok()?;
+    let e = Position::try_from(usize::try_from(end.max(start)).ok()?).ok()?;
+    Some(Region::new(chrom, s..=e))
+}
+
+/// Pull the VCF header from the bgzf head so `infer_sex_from_text_lines` can
+/// resolve delimiter and column indexes for the X/Y records added via tabix.
+pub(super) fn decompress_vcf_head_lines(
+    read_at: &js_sys::Function,
+    total_len: u64,
+) -> Vec<String> {
+    use crate::js_reader::JsReader;
+    use std::io::{BufRead, BufReader, Read};
+
+    let head_len = total_len.min(2 * 1024 * 1024);
+    let mut reader = JsReader::new(read_at.clone(), total_len, "vcf-head");
+    let mut buf = vec![0u8; head_len as usize];
+    let mut filled = 0usize;
+    while filled < buf.len() {
+        match reader.read(&mut buf[filled..]) {
+            Ok(0) => break,
+            Ok(n) => filled += n,
+            Err(_) => break,
+        }
+    }
+    buf.truncate(filled);
+    let cursor = std::io::Cursor::new(buf);
+    let mut bgzf_reader = BufReader::new(noodles::bgzf::io::Reader::new(cursor));
+    let mut lines = Vec::new();
+    let mut line = String::new();
+    for _ in 0..1024 {
+        line.clear();
+        match bgzf_reader.read_line(&mut line) {
+            Ok(0) => break,
+            Ok(_) => lines.push(line.trim_end_matches(['\n', '\r']).to_owned()),
+            Err(_) => break,
+        }
+    }
+    lines
+}
diff --git a/rust/bioscript-wasm/src/report_workspace/analysis.rs b/rust/bioscript-wasm/src/report_workspace/analysis.rs
index c522419..1d7cdfa 100644
--- a/rust/bioscript-wasm/src/report_workspace/analysis.rs
+++ b/rust/bioscript-wasm/src/report_workspace/analysis.rs
@@ -1,7 +1,7 @@
 use super::*;
 
 impl PackageWorkspace {
-    pub(super) fn run_manifest_analyses(
+    pub(crate) fn run_manifest_analyses(
         &self,
         manifest_path: &str,
         input_name: &str,

From e66040a049f0b39e77f8811e79fa879608325465 Mon Sep 17 00:00:00 2001
From: Madhava Jay <me@madhavajay.com>
Date: Mon, 11 May 2026 21:48:41 +1000
Subject: [PATCH 2/2] fixing issues with wasm reporting versus rust

---
 rust/bioscript-formats/src/genotype.rs        | 124 ++++----------
 .../src/genotype/backends.rs                  |   9 +-
 .../src/genotype/delimited.rs                 |   2 +-
 rust/bioscript-formats/src/genotype/io.rs     |  20 +--
 .../bioscript-formats/src/genotype/loaders.rs | 160 ++++++++++++++++++
 rust/bioscript-formats/src/genotype/types.rs  |   5 +
 rust/bioscript-formats/src/genotype/vcf.rs    |   6 +-
 .../src/genotype/vcf/matching.rs              |   2 +-
 rust/bioscript-formats/src/lib.rs             |   5 +-
 rust/bioscript-wasm/Cargo.toml                |   7 +-
 rust/bioscript-wasm/src/report_api.rs         |  11 +-
 rust/bioscript-wasm/src/report_helpers.rs     |  47 ++++-
 rust/bioscript-wasm/src/report_lookup.rs      |  39 +++--
 rust/bioscript-wasm/src/report_workspace.rs   |   8 +-
 14 files changed, 300 insertions(+), 145 deletions(-)
 create mode 100644 rust/bioscript-formats/src/genotype/loaders.rs

diff --git a/rust/bioscript-formats/src/genotype.rs b/rust/bioscript-formats/src/genotype.rs
index 805c805..050f8cf 100644
--- a/rust/bioscript-formats/src/genotype.rs
+++ b/rust/bioscript-formats/src/genotype.rs
@@ -1,7 +1,7 @@
 use std::{
     collections::HashMap,
     fs::File,
-    io::{BufReader, Cursor},
+    io::{BufRead, BufReader, Cursor},
     path::Path,
 };
 
@@ -16,6 +16,7 @@ mod common;
 mod cram_backend;
 mod delimited;
 mod io;
+mod loaders;
 mod types;
 mod vcf;
 mod vcf_tokens;
@@ -32,7 +33,7 @@ use cram_backend::{
 };
 pub use cram_backend::{observe_cram_indel_with_reader, observe_cram_snp_with_reader};
 pub(crate) use delimited::{
-    DelimitedColumnIndexes, Delimiter, detect_delimiter, parse_streaming_row,
+    COMMENT_PREFIXES, DelimitedColumnIndexes, Delimiter, detect_delimiter, parse_streaming_row,
 };
 #[cfg(test)]
 use delimited::{GENOTYPE_ALIASES, split_csv_line, strip_bom, strip_inline_comment};
@@ -44,31 +45,27 @@ use delimited::{
 };
 #[cfg(test)]
 use io::looks_like_vcf_lines;
-use io::{
-    detect_source_format, is_bgzf_path, read_lines_from_reader, read_zip_entry_limited,
-    select_zip_entry,
-};
+use io::{detect_source_format, is_bgzf_path, read_lines_from_reader, select_zip_entry};
 pub use types::{
     BackendCapabilities, GenotypeLoadOptions, GenotypeSourceFormat, GenotypeStore, QueryKind,
 };
 use types::{CramBackend, DelimitedBackend, QueryBackend, RsidMapBackend, VcfBackend};
+pub use vcf::{
+    choose_variant_locus_for_assembly, imputed_reference_observation, observe_vcf_snp_with_reader,
+    observe_vcf_variant_with_reader,
+};
 #[cfg(test)]
 use vcf::{
-    choose_variant_locus_for_assembly, detect_vcf_assembly, extract_vcf_sample_genotype,
-    normalize_chromosome_name, parse_vcf_record, vcf_row_matches_variant,
-};
-pub use vcf::{
-    imputed_reference_observation, observe_vcf_snp_with_reader, observe_vcf_variant_with_reader,
+    detect_vcf_assembly, extract_vcf_sample_genotype, normalize_chromosome_name, parse_vcf_record,
+    vcf_row_matches_variant,
 };
 use vcf::{lookup_indexed_vcf_variants, scan_vcf_variants};
-use vcf_tokens::genotype_from_vcf_gt;
+pub(crate) use vcf_tokens::genotype_from_vcf_gt;
 #[cfg(test)]
 use vcf_tokens::{
     is_symbolic_vcf_alt, normalize_sequence_token, vcf_alt_token, vcf_reference_token,
 };
 
-const MAX_ZIP_ENTRY_BYTES: u64 = 128 * 1024 * 1024;
-
 impl GenotypeStore {
     pub fn from_file(path: &Path) -> Result<Self, RuntimeError> {
         Self::from_file_with_options(path, &GenotypeLoadOptions::default())
@@ -102,6 +99,7 @@ impl GenotypeStore {
             backend: QueryBackend::RsidMap(RsidMapBackend {
                 format: GenotypeSourceFormat::Text,
                 values: HashMap::new(),
+                source_lines: HashMap::new(),
             }),
         }
     }
@@ -131,13 +129,11 @@ impl GenotypeStore {
         if lower.ends_with(".zip") {
             return Self::from_zip_bytes(name, bytes);
         }
+        let reader = BufReader::new(Cursor::new(bytes));
         if lower.ends_with(".vcf") {
-            let lines =
-                read_lines_from_reader(BufReader::new(Cursor::new(bytes)), Path::new(name))?;
-            return Self::from_vcf_lines(lines);
+            return Self::from_vcf_reader(reader, name);
         }
-        let lines = read_lines_from_reader(BufReader::new(Cursor::new(bytes)), Path::new(name))?;
-        Self::from_delimited_lines(GenotypeSourceFormat::Text, lines)
+        Self::from_delimited_reader(GenotypeSourceFormat::Text, reader, name)
     }
 
     fn from_zip_bytes(name: &str, bytes: &[u8]) -> Result<Self, RuntimeError> {
@@ -168,22 +164,22 @@ impl GenotypeStore {
                 "zip archive {name} does not contain a supported genotype file"
             ))
         })?;
-        let mut entry = archive.by_name(&selected).map_err(|err| {
+        let entry = archive.by_name(&selected).map_err(|err| {
             RuntimeError::Io(format!(
                 "failed to open genotype entry {selected} in {name}: {err}"
             ))
         })?;
-        let contents = read_zip_entry_limited(
-            &mut entry,
-            MAX_ZIP_ENTRY_BYTES,
-            &format!("genotype entry {selected} in {name}"),
-        )?;
-        let lines =
-            read_lines_from_reader(BufReader::new(Cursor::new(contents)), Path::new(&selected))?;
+        let label = format!("genotype entry {selected} in {name}");
+        // Stream-decompress directly off the zip reader so we never have to
+        // materialize the entire decompressed entry in memory. GenesForGood
+        // exports decompress to >128MB which used to trip the old
+        // `read_zip_entry_limited` cap; the cap is gone because the streaming
+        // parser keeps memory bounded to the rsid map itself.
+        let reader = BufReader::new(entry);
         if selected.to_ascii_lowercase().ends_with(".vcf") {
-            return Self::from_vcf_lines(lines);
+            return Self::from_vcf_reader(reader, &label);
         }
-        Self::from_delimited_lines(GenotypeSourceFormat::Zip, lines)
+        Self::from_delimited_reader(GenotypeSourceFormat::Zip, reader, &label)
     }
 
     fn from_vcf_file(path: &Path, options: &GenotypeLoadOptions) -> Self {
@@ -236,63 +232,20 @@ impl GenotypeStore {
         })
     }
 
-    fn from_vcf_lines(lines: Vec<String>) -> Result<Self, RuntimeError> {
-        let mut values = HashMap::new();
-
-        for line in lines {
-            let trimmed = line.trim();
-            if trimmed.is_empty() || trimmed.starts_with("##") || trimmed.starts_with("#CHROM") {
-                continue;
-            }
-
-            let fields: Vec<&str> = trimmed.split('\t').collect();
-            if fields.len() < 10 {
-                continue;
-            }
-
-            let rsid = fields[2].trim();
-            if rsid.is_empty() || rsid == "." {
-                continue;
-            }
-
-            let reference = fields[3].trim();
-            let alternates: Vec<&str> = fields[4]
-                .split(',')
-                .map(str::trim)
-                .filter(|alt| !alt.is_empty() && *alt != ".")
-                .collect();
-            if reference.is_empty() || alternates.is_empty() {
-                continue;
-            }
-
-            let sample_gt = fields[9].split(':').next().unwrap_or(".");
-            if let Some(genotype) = genotype_from_vcf_gt(sample_gt, reference, &alternates) {
-                values.insert(rsid.to_owned(), genotype);
-            }
-        }
-
-        Ok(Self::from_rsid_map(GenotypeSourceFormat::Vcf, values))
+    fn from_vcf_reader<R: BufRead>(reader: R, label: &str) -> Result<Self, RuntimeError> {
+        loaders::from_vcf_reader(reader, label)
     }
 
-    fn from_delimited_lines(
+    fn from_delimited_reader<R: BufRead>(
         format: GenotypeSourceFormat,
-        lines: Vec<String>,
+        reader: R,
+        label: &str,
     ) -> Result<Self, RuntimeError> {
-        let delimiter = detect_delimiter(&lines);
-        let mut parser = RowParser::new(delimiter);
-        let mut values = HashMap::new();
-        for line in lines {
-            if let Some((rsid, genotype)) = parser.consume_line(&line)? {
-                values.insert(rsid, genotype);
-            }
-        }
-        Ok(Self::from_rsid_map(format, values))
+        loaders::from_delimited_reader(format, reader, label)
     }
 
-    fn from_rsid_map(format: GenotypeSourceFormat, values: HashMap<String, String>) -> Self {
-        Self {
-            backend: QueryBackend::RsidMap(RsidMapBackend { format, values }),
-        }
+    fn from_vcf_lines(lines: Vec<String>) -> Result<Self, RuntimeError> {
+        loaders::from_vcf_lines(lines)
     }
 
     fn from_delimited_file(
@@ -1683,15 +1636,4 @@ mod tests {
         .unwrap_err();
         assert!(err.to_string().contains("invalid VCF position"));
     }
-
-    #[test]
-    fn zip_entry_limited_reader_rejects_oversized_output() {
-        let mut reader = std::io::Cursor::new(b"abcdef".to_vec());
-        let err = read_zip_entry_limited(&mut reader, 5, "test zip entry").unwrap_err();
-        assert!(
-            err.to_string()
-                .contains("test zip entry exceeds decompressed limit of 5 bytes"),
-            "{err}"
-        );
-    }
 }
diff --git a/rust/bioscript-formats/src/genotype/backends.rs b/rust/bioscript-formats/src/genotype/backends.rs
index b2e70cd..32ca5a8 100644
--- a/rust/bioscript-formats/src/genotype/backends.rs
+++ b/rust/bioscript-formats/src/genotype/backends.rs
@@ -22,11 +22,18 @@ impl RsidMapBackend {
     ) -> Result<VariantObservation, RuntimeError> {
         for rsid in &variant.rsids {
             if let Some(value) = self.values.get(rsid) {
+                let mut evidence = vec![format!("resolved by rsid {rsid}")];
+                // Mirror DelimitedBackend's `| source line: …` evidence so
+                // wasm-side from_bytes loads produce byte-identical reports
+                // to the CLI's path-backed DelimitedBackend.
+                if let Some(source) = self.source_lines.get(rsid) {
+                    evidence.push(format!("source line: {source}"));
+                }
                 return Ok(VariantObservation {
                     backend: self.backend_name().to_owned(),
                     matched_rsid: Some(rsid.clone()),
                     genotype: Some(value.clone()),
-                    evidence: vec![format!("resolved by rsid {rsid}")],
+                    evidence,
                     ..VariantObservation::default()
                 });
             }
diff --git a/rust/bioscript-formats/src/genotype/delimited.rs b/rust/bioscript-formats/src/genotype/delimited.rs
index a4240a8..02b1da3 100644
--- a/rust/bioscript-formats/src/genotype/delimited.rs
+++ b/rust/bioscript-formats/src/genotype/delimited.rs
@@ -8,7 +8,7 @@ mod scan;
 
 pub(crate) use scan::scan_delimited_variants;
 
-const COMMENT_PREFIXES: [&str; 2] = ["#", "//"];
+pub(crate) const COMMENT_PREFIXES: [&str; 2] = ["#", "//"];
 const RSID_ALIASES: &[&str] = &["rsid", "name", "snp", "marker", "id", "snpid"];
 const CHROM_ALIASES: &[&str] = &["chromosome", "chr", "chrom"];
 const POSITION_ALIASES: &[&str] = &[
diff --git a/rust/bioscript-formats/src/genotype/io.rs b/rust/bioscript-formats/src/genotype/io.rs
index c70622d..8d69e55 100644
--- a/rust/bioscript-formats/src/genotype/io.rs
+++ b/rust/bioscript-formats/src/genotype/io.rs
@@ -1,6 +1,6 @@
 use std::{
     fs::File,
-    io::{BufRead, BufReader, Read},
+    io::{BufRead, BufReader},
     path::Path,
 };
 
@@ -96,24 +96,6 @@ pub(crate) fn read_lines_from_reader<R: BufRead>(
     Ok(lines)
 }
 
-pub(crate) fn read_zip_entry_limited<R: Read>(
-    reader: &mut R,
-    max_bytes: u64,
-    label: &str,
-) -> Result<Vec<u8>, RuntimeError> {
-    let mut contents = Vec::new();
-    reader
-        .take(max_bytes.saturating_add(1))
-        .read_to_end(&mut contents)
-        .map_err(|err| RuntimeError::Io(format!("failed to read {label}: {err}")))?;
-    if u64::try_from(contents.len()).unwrap_or(u64::MAX) > max_bytes {
-        return Err(RuntimeError::InvalidArguments(format!(
-            "{label} exceeds decompressed limit of {max_bytes} bytes"
-        )));
-    }
-    Ok(contents)
-}
-
 pub(crate) fn detect_source_format(
     path: &Path,
     forced: Option<GenotypeSourceFormat>,
diff --git a/rust/bioscript-formats/src/genotype/loaders.rs b/rust/bioscript-formats/src/genotype/loaders.rs
new file mode 100644
index 0000000..27079cf
--- /dev/null
+++ b/rust/bioscript-formats/src/genotype/loaders.rs
@@ -0,0 +1,160 @@
+use std::{collections::HashMap, io::BufRead};
+
+use bioscript_core::RuntimeError;
+
+use super::{
+    COMMENT_PREFIXES, GenotypeSourceFormat, GenotypeStore, QueryBackend, RowParser, RsidMapBackend,
+    detect_delimiter, vcf_tokens::genotype_from_vcf_gt,
+};
+
+pub(crate) fn from_vcf_reader<R: BufRead>(
+    mut reader: R,
+    label: &str,
+) -> Result<GenotypeStore, RuntimeError> {
+    let mut values = HashMap::new();
+    let mut buf = String::new();
+    loop {
+        buf.clear();
+        let bytes = reader
+            .read_line(&mut buf)
+            .map_err(|err| RuntimeError::Io(format!("failed to read {label}: {err}")))?;
+        if bytes == 0 {
+            break;
+        }
+        read_vcf_rsid_line(buf.trim_end_matches(['\n', '\r']), &mut values);
+    }
+
+    Ok(from_rsid_map(
+        GenotypeSourceFormat::Vcf,
+        values,
+        HashMap::new(),
+    ))
+}
+
+pub(crate) fn from_delimited_reader<R: BufRead>(
+    format: GenotypeSourceFormat,
+    mut reader: R,
+    label: &str,
+) -> Result<GenotypeStore, RuntimeError> {
+    // Buffer lines up to the first non-empty/non-comment line so delimiter
+    // detection sees representative input, then stream the rest directly.
+    let mut prelude: Vec<String> = Vec::new();
+    let mut buf = String::new();
+    let mut delimiter = None;
+    loop {
+        buf.clear();
+        let bytes = reader
+            .read_line(&mut buf)
+            .map_err(|err| RuntimeError::Io(format!("failed to read {label}: {err}")))?;
+        if bytes == 0 {
+            break;
+        }
+        let line = buf.trim_end_matches(['\n', '\r']).to_owned();
+        let trimmed = line.trim();
+        let is_data = !trimmed.is_empty()
+            && !COMMENT_PREFIXES
+                .iter()
+                .any(|prefix| trimmed.starts_with(prefix));
+        prelude.push(line);
+        if is_data {
+            delimiter = Some(detect_delimiter(&prelude));
+            break;
+        }
+    }
+
+    let mut parser = RowParser::new(delimiter.unwrap_or(super::Delimiter::Tab));
+    let mut values = HashMap::new();
+    let mut source_lines = HashMap::new();
+    for line in prelude {
+        consume_delimited_line(&mut parser, &line, &mut values, &mut source_lines)?;
+    }
+    loop {
+        buf.clear();
+        let bytes = reader
+            .read_line(&mut buf)
+            .map_err(|err| RuntimeError::Io(format!("failed to read {label}: {err}")))?;
+        if bytes == 0 {
+            break;
+        }
+        consume_delimited_line(
+            &mut parser,
+            buf.trim_end_matches(['\n', '\r']),
+            &mut values,
+            &mut source_lines,
+        )?;
+    }
+
+    Ok(from_rsid_map(format, values, source_lines))
+}
+
+pub(crate) fn from_vcf_lines(lines: Vec<String>) -> Result<GenotypeStore, RuntimeError> {
+    let mut values = HashMap::new();
+    for line in lines {
+        read_vcf_rsid_line(line.trim(), &mut values);
+    }
+    Ok(from_rsid_map(
+        GenotypeSourceFormat::Vcf,
+        values,
+        HashMap::new(),
+    ))
+}
+
+fn read_vcf_rsid_line(line: &str, values: &mut HashMap<String, String>) {
+    let trimmed = line.trim();
+    if trimmed.is_empty() || trimmed.starts_with("##") || trimmed.starts_with("#CHROM") {
+        return;
+    }
+
+    let fields: Vec<&str> = trimmed.split('\t').collect();
+    if fields.len() < 10 {
+        return;
+    }
+
+    let rsid = fields[2].trim();
+    if rsid.is_empty() || rsid == "." {
+        return;
+    }
+
+    let reference = fields[3].trim();
+    let alternates: Vec<&str> = fields[4]
+        .split(',')
+        .map(str::trim)
+        .filter(|alt| !alt.is_empty() && *alt != ".")
+        .collect();
+    if reference.is_empty() || alternates.is_empty() {
+        return;
+    }
+
+    let sample_gt = fields[9].split(':').next().unwrap_or(".");
+    if let Some(genotype) = genotype_from_vcf_gt(sample_gt, reference, &alternates) {
+        values.insert(rsid.to_owned(), genotype);
+    }
+}
+
+fn consume_delimited_line(
+    parser: &mut RowParser,
+    line: &str,
+    values: &mut HashMap<String, String>,
+    source_lines: &mut HashMap<String, String>,
+) -> Result<(), RuntimeError> {
+    let trimmed = line.trim().to_owned();
+    if let Some((rsid, genotype)) = parser.consume_line(line)? {
+        values.insert(rsid.clone(), genotype);
+        source_lines.insert(rsid, trimmed);
+    }
+    Ok(())
+}
+
+fn from_rsid_map(
+    format: GenotypeSourceFormat,
+    values: HashMap<String, String>,
+    source_lines: HashMap<String, String>,
+) -> GenotypeStore {
+    GenotypeStore {
+        backend: QueryBackend::RsidMap(RsidMapBackend {
+            format,
+            values,
+            source_lines,
+        }),
+    }
+}
diff --git a/rust/bioscript-formats/src/genotype/types.rs b/rust/bioscript-formats/src/genotype/types.rs
index b65d47b..3572faf 100644
--- a/rust/bioscript-formats/src/genotype/types.rs
+++ b/rust/bioscript-formats/src/genotype/types.rs
@@ -33,6 +33,11 @@ pub(crate) enum QueryBackend {
 pub(crate) struct RsidMapBackend {
     pub(crate) format: GenotypeSourceFormat,
     pub(crate) values: HashMap<String, String>,
+    /// Original input line per rsid, retained so wasm-side `from_bytes` loads
+    /// can emit the same `| source line: …` evidence that the CLI's
+    /// path-backed `DelimitedBackend` does on every lookup. Empty for
+    /// in-memory maps that don't have a line representation.
+    pub(crate) source_lines: HashMap<String, String>,
 }
 
 #[derive(Debug, Clone)]
diff --git a/rust/bioscript-formats/src/genotype/vcf.rs b/rust/bioscript-formats/src/genotype/vcf.rs
index cf8fa57..1440891 100644
--- a/rust/bioscript-formats/src/genotype/vcf.rs
+++ b/rust/bioscript-formats/src/genotype/vcf.rs
@@ -20,10 +20,8 @@ use super::{
 mod matching;
 mod reader;
 
-pub use matching::imputed_reference_observation;
-pub(crate) use matching::{
-    choose_variant_locus_for_assembly, normalize_chromosome_name, vcf_row_matches_variant,
-};
+pub use matching::{choose_variant_locus_for_assembly, imputed_reference_observation};
+pub(crate) use matching::{normalize_chromosome_name, vcf_row_matches_variant};
 pub use reader::{observe_vcf_snp_with_reader, observe_vcf_variant_with_reader};
 
 #[derive(Debug, Clone)]
diff --git a/rust/bioscript-formats/src/genotype/vcf/matching.rs b/rust/bioscript-formats/src/genotype/vcf/matching.rs
index afecd10..a0bbddc 100644
--- a/rust/bioscript-formats/src/genotype/vcf/matching.rs
+++ b/rust/bioscript-formats/src/genotype/vcf/matching.rs
@@ -4,7 +4,7 @@ use crate::inspect::InferredSex;
 
 use super::ParsedVcfRow;
 
-pub(crate) fn choose_variant_locus_for_assembly(
+pub fn choose_variant_locus_for_assembly(
     variant: &VariantSpec,
     assembly: Option<Assembly>,
 ) -> Option<GenomicLocus> {
diff --git a/rust/bioscript-formats/src/lib.rs b/rust/bioscript-formats/src/lib.rs
index b349f43..38241bf 100644
--- a/rust/bioscript-formats/src/lib.rs
+++ b/rust/bioscript-formats/src/lib.rs
@@ -15,8 +15,9 @@ mod prepare;
 
 pub use genotype::{
     BackendCapabilities, GenotypeLoadOptions, GenotypeSourceFormat, GenotypeStore, QueryKind,
-    imputed_reference_observation, observe_cram_indel_with_reader, observe_cram_snp_with_reader,
-    observe_vcf_snp_with_reader, observe_vcf_variant_with_reader,
+    choose_variant_locus_for_assembly, imputed_reference_observation,
+    observe_cram_indel_with_reader, observe_cram_snp_with_reader, observe_vcf_snp_with_reader,
+    observe_vcf_variant_with_reader,
 };
 pub use inspect::{
     DetectedKind, DetectionConfidence, FileContainer, FileInspection, InferredSex, InspectOptions,
diff --git a/rust/bioscript-wasm/Cargo.toml b/rust/bioscript-wasm/Cargo.toml
index 3dcee73..fc744af 100644
--- a/rust/bioscript-wasm/Cargo.toml
+++ b/rust/bioscript-wasm/Cargo.toml
@@ -17,7 +17,12 @@ noodles = { version = "0.109.0", features = ["bgzf", "cram", "csi", "fasta", "ta
 wasm-bindgen = "0.2"
 js-sys = "0.3"
 serde = { version = "1", features = ["derive"] }
-serde_json = "1"
+# `preserve_order` matches `bioscript-cli`'s feature set so the wasm-produced
+# JSON dumps key fields in the same insertion order the CLI does. Without
+# this, BTreeMap-backed alphabetical sorting moves `assay_id` after
+# `analyses` in the raw `index.html` JSON dump and the byte-diff vs the CLI
+# blows up.
+serde_json = { version = "1", features = ["preserve_order"] }
 serde_yaml = "0.9"
 sha2 = "0.10"
 zip = { version = "2.2.0", default-features = false, features = ["deflate"] }
diff --git a/rust/bioscript-wasm/src/report_api.rs b/rust/bioscript-wasm/src/report_api.rs
index b6aaad1..33d6231 100644
--- a/rust/bioscript-wasm/src/report_api.rs
+++ b/rust/bioscript-wasm/src/report_api.rs
@@ -10,7 +10,9 @@ use bioscript_core::{
     VariantSpec,
 };
 use bioscript_formats::{
-    GenotypeLoadOptions, GenotypeStore, InspectOptions, inspect_bytes as inspect_bytes_rs,
+    DetectedKind, DetectionConfidence, FileContainer, FileInspection, GenotypeLoadOptions,
+    GenotypeStore, InferredSex, InspectOptions, SexDetectionConfidence, SexInference,
+    inspect_bytes as inspect_bytes_rs,
 };
 use bioscript_runtime::{BioscriptRuntime, RuntimeConfig};
 use bioscript_schema::{
@@ -111,7 +113,7 @@ pub fn run_package_report_bytes(
     };
     let workspace = PackageWorkspace::new(package_files)?;
     let participant_id = participant_id_from_name(input_name);
-    let assay_id = app_assay_id(Path::new(manifest_path))?;
+    let assay_id = app_assay_id_from_workspace(&workspace, manifest_path)?;
     let manifest_metadata = workspace.report_manifest_metadata(manifest_path)?;
     let findings = workspace.load_manifest_findings(manifest_path)?;
     let provenance = workspace.load_manifest_provenance_links(manifest_path)?;
@@ -225,7 +227,7 @@ pub fn run_package_report_from_cram(
     };
     let workspace = PackageWorkspace::new(package_files)?;
     let participant_id = participant_id_from_name(input_name);
-    let assay_id = app_assay_id(Path::new(manifest_path))?;
+    let assay_id = app_assay_id_from_workspace(&workspace, manifest_path)?;
     let manifest_metadata = workspace.report_manifest_metadata(manifest_path)?;
     let findings = workspace.load_manifest_findings(manifest_path)?;
     let provenance = workspace.load_manifest_provenance_links(manifest_path)?;
@@ -361,7 +363,7 @@ pub fn run_package_report_from_vcf(
     };
     let workspace = PackageWorkspace::new(package_files)?;
     let participant_id = participant_id_from_name(input_name);
-    let assay_id = app_assay_id(Path::new(manifest_path))?;
+    let assay_id = app_assay_id_from_workspace(&workspace, manifest_path)?;
     let manifest_metadata = workspace.report_manifest_metadata(manifest_path)?;
     let findings = workspace.load_manifest_findings(manifest_path)?;
     let provenance = workspace.load_manifest_provenance_links(manifest_path)?;
@@ -388,6 +390,7 @@ pub fn run_package_report_from_vcf(
     let lookup = VcfReportLookup {
         reader: std::cell::RefCell::new(indexed),
         label: input_name.to_owned(),
+        detected_assembly: head_inspection.assembly,
     };
 
     if let Some(explicit) = explicit_sex_from_options(&options) {
diff --git a/rust/bioscript-wasm/src/report_helpers.rs b/rust/bioscript-wasm/src/report_helpers.rs
index ca7e284..e3a12ad 100644
--- a/rust/bioscript-wasm/src/report_helpers.rs
+++ b/rust/bioscript-wasm/src/report_helpers.rs
@@ -122,6 +122,32 @@ pub(super) fn participant_id_from_name(path: &str) -> String {
         .replace([' ', '\t', '\n'], "_")
 }
 
+/// Derive the assay id from a manifest path — matches the CLI's
+/// `bioscript-cli::report_execution::app_assay_id`, which loads the manifest
+/// and returns its `name:` field (panels / assays / variants all carry one).
+/// This function operates on a `PackageWorkspace` so it can find files in the
+/// in-memory map without touching disk.
+///
+/// Previously the wasm derived the id from the manifest filename stem (e.g.
+/// `manifest.yaml` -> `manifest`), which diverged from the CLI's `pgx-1`
+/// (panel `name:` field) and cascaded into the HTML report's
+/// `participant_id × assay_id` keys.
+pub(super) fn app_assay_id_from_workspace(
+    workspace: &PackageWorkspace,
+    manifest_path: &str,
+) -> Result<String, JsError> {
+    match workspace.schema(manifest_path)?.as_str() {
+        "bioscript:panel:1.0" => Ok(workspace.load_panel(manifest_path)?.name),
+        "bioscript:assay:1.0" => Ok(workspace.load_assay(manifest_path)?.name),
+        "bioscript:variant:1.0" | "bioscript:variant" => {
+            Ok(workspace.load_variant(manifest_path)?.name)
+        }
+        other => Err(JsError::new(&format!(
+            "unsupported manifest schema '{other}'"
+        ))),
+    }
+}
+
 pub(super) fn app_assay_id(path: &Path) -> Result<String, JsError> {
     path.file_stem()
         .and_then(|value| value.to_str())
@@ -262,20 +288,20 @@ pub(super) fn input_inspection_json(
             bioscript_formats::DetectedKind::ReferenceFasta => "reference_fasta",
             bioscript_formats::DetectedKind::Unknown => "unknown",
         },
-        "format_confidence": match inspection.confidence {
-            bioscript_formats::DetectionConfidence::Authoritative => "authoritative",
-            bioscript_formats::DetectionConfidence::StrongHeuristic => "strong_heuristic",
-            bioscript_formats::DetectionConfidence::WeakHeuristic => "weak_heuristic",
-            bioscript_formats::DetectionConfidence::Unknown => "unknown",
-        },
+        "format_confidence": detection_confidence_name(inspection.confidence),
         "assembly": inspection.assembly.map(|assembly| match assembly {
             Assembly::Grch37 => "grch37",
             Assembly::Grch38 => "grch38",
         }),
+        "phased": inspection.phased,
         "selected_entry": inspection.selected_entry,
+        "has_index": inspection.has_index,
+        "index_path": inspection.index_path.as_ref().map(|path| path.display().to_string()),
+        "reference_matches": inspection.reference_matches,
         "source": inspection.source.as_ref().map(|source| serde_json::json!({
             "vendor": source.vendor,
             "platform_version": source.platform_version,
+            "confidence": detection_confidence_name(source.confidence),
             "evidence": source.evidence,
         })),
         "inferred_sex": inspection.inferred_sex.as_ref().map(|sex| serde_json::json!({
@@ -290,6 +316,15 @@ pub(super) fn input_inspection_json(
     })
 }
 
+fn detection_confidence_name(value: bioscript_formats::DetectionConfidence) -> &'static str {
+    match value {
+        bioscript_formats::DetectionConfidence::Authoritative => "authoritative",
+        bioscript_formats::DetectionConfidence::StrongHeuristic => "strong_heuristic",
+        bioscript_formats::DetectionConfidence::WeakHeuristic => "weak_heuristic",
+        bioscript_formats::DetectionConfidence::Unknown => "unknown",
+    }
+}
+
 pub(super) fn yaml_string(value: &serde_yaml::Value, key: &str) -> Option<String> {
     value
         .get(key)
diff --git a/rust/bioscript-wasm/src/report_lookup.rs b/rust/bioscript-wasm/src/report_lookup.rs
index dee827b..ff0ffcd 100644
--- a/rust/bioscript-wasm/src/report_lookup.rs
+++ b/rust/bioscript-wasm/src/report_lookup.rs
@@ -182,12 +182,19 @@ pub(super) struct VcfReportLookup<R: std::io::Read + std::io::Seek> {
         noodles::csi::io::IndexedReader<noodles::bgzf::io::Reader<R>, noodles::tabix::Index>,
     >,
     pub(super) label: String,
+    /// Assembly resolved from the VCF header (via `inspect_head_via_js_reader`).
+    /// Matches the CLI's `lookup_indexed_vcf_variants` flow which calls
+    /// `detect_vcf_assembly_from_path`. Without this the wasm picks GRCh38
+    /// over GRCh37 for any panel variant that declares both loci, then
+    /// misses the variant in a GRCh37-coded VCF (NA06985.clean.vcf.gz etc.)
+    /// and falls through to "imputed reference".
+    pub(super) detected_assembly: Option<Assembly>,
 }
 
 impl<R: std::io::Read + std::io::Seek> report_workspace::VariantLookup for VcfReportLookup<R> {
     fn lookup_variant(&self, spec: &VariantSpec) -> Result<VariantObservation, RuntimeError> {
         let mut reader = self.reader.borrow_mut();
-        observe_vcf_variant(&mut reader, &self.label, spec)
+        observe_vcf_variant(&mut reader, &self.label, spec, self.detected_assembly)
     }
 
     fn lookup_variants(
@@ -197,7 +204,12 @@ impl<R: std::io::Read + std::io::Seek> report_workspace::VariantLookup for VcfRe
         let mut reader = self.reader.borrow_mut();
         let mut out = Vec::with_capacity(specs.len());
         for spec in specs {
-            out.push(observe_vcf_variant(&mut reader, &self.label, spec)?);
+            out.push(observe_vcf_variant(
+                &mut reader,
+                &self.label,
+                spec,
+                self.detected_assembly,
+            )?);
         }
         Ok(out)
     }
@@ -210,16 +222,12 @@ fn observe_vcf_variant<R: std::io::Read + std::io::Seek>(
     >,
     label: &str,
     variant: &VariantSpec,
+    detected_assembly: Option<Assembly>,
 ) -> Result<VariantObservation, RuntimeError> {
-    let assembly = variant
-        .grch38
-        .as_ref()
-        .map(|_| Assembly::Grch38)
-        .or_else(|| variant.grch37.as_ref().map(|_| Assembly::Grch37));
-    let raw_locus = variant
-        .grch38
-        .as_ref()
-        .or(variant.grch37.as_ref())
+    // Use the existing CLI helper so wasm picks the same locus the path-based
+    // path does: detected GRCh37 → grch37 first; detected GRCh38 → grch38
+    // first; None → grch37 first (CLI default for variant-only VCFs).
+    let raw_locus = bioscript_formats::choose_variant_locus_for_assembly(variant, detected_assembly)
         .ok_or_else(|| {
             RuntimeError::Io(format!(
                 "variant {} has no GRCh37/GRCh38 locus",
@@ -230,6 +238,15 @@ fn observe_vcf_variant<R: std::io::Read + std::io::Seek>(
                     .unwrap_or("variant")
             ))
         })?;
+    let assembly = detected_assembly.or_else(|| {
+        if variant.grch37.as_ref().is_some_and(|l| l == &raw_locus) {
+            Some(Assembly::Grch37)
+        } else if variant.grch38.as_ref().is_some_and(|l| l == &raw_locus) {
+            Some(Assembly::Grch38)
+        } else {
+            None
+        }
+    });
     let locus = GenomicLocus {
         chrom: raw_locus.chrom.clone(),
         start: raw_locus.start,
diff --git a/rust/bioscript-wasm/src/report_workspace.rs b/rust/bioscript-wasm/src/report_workspace.rs
index e075abd..6efd6ca 100644
--- a/rust/bioscript-wasm/src/report_workspace.rs
+++ b/rust/bioscript-wasm/src/report_workspace.rs
@@ -64,7 +64,7 @@ impl PackageWorkspace {
             .map_err(|err| JsError::new(&format!("failed to parse YAML {path}: {err}")))
     }
 
-    fn schema(&self, path: &str) -> Result<String, JsError> {
+    pub(super) fn schema(&self, path: &str) -> Result<String, JsError> {
         self.yaml(path)?
             .get("schema")
             .and_then(serde_yaml::Value::as_str)
@@ -77,17 +77,17 @@ impl PackageWorkspace {
         normalize_package_path(&base.join(relative).display().to_string())
     }
 
-    fn load_variant(&self, path: &str) -> Result<VariantManifest, JsError> {
+    pub(super) fn load_variant(&self, path: &str) -> Result<VariantManifest, JsError> {
         load_variant_manifest_text(path, self.text(path)?)
             .map_err(|err| JsError::new(&format!("load variant {path}: {err}")))
     }
 
-    fn load_panel(&self, path: &str) -> Result<PanelManifest, JsError> {
+    pub(super) fn load_panel(&self, path: &str) -> Result<PanelManifest, JsError> {
         load_panel_manifest_text(path, self.text(path)?)
             .map_err(|err| JsError::new(&format!("load panel {path}: {err}")))
     }
 
-    fn load_assay(&self, path: &str) -> Result<AssayManifest, JsError> {
+    pub(super) fn load_assay(&self, path: &str) -> Result<AssayManifest, JsError> {
         load_assay_manifest_text(path, self.text(path)?)
             .map_err(|err| JsError::new(&format!("load assay {path}: {err}")))
     }