diff --git a/rust/bioscript-cli/src/main.rs b/rust/bioscript-cli/src/main.rs index 8f7a9b9..bf99c12 100644 --- a/rust/bioscript-cli/src/main.rs +++ b/rust/bioscript-cli/src/main.rs @@ -7,6 +7,5 @@ include!("report_options.rs"); include!("package.rs"); include!("report_review.rs"); include!("report_execution.rs"); -include!("report_observations.rs"); include!("report_output.rs"); include!("manifest_runner.rs"); diff --git a/rust/bioscript-cli/src/report_execution.rs b/rust/bioscript-cli/src/report_execution.rs index df67b19..a14dcfe 100644 --- a/rust/bioscript-cli/src/report_execution.rs +++ b/rust/bioscript-cli/src/report_execution.rs @@ -1,43 +1,3 @@ -fn run_manifest_rows_for_report( - runtime_root: &Path, - manifest_path: &Path, - input_file: &Path, - participant_id: &str, - loader: &GenotypeLoadOptions, - filters: &[String], -) -> Result>, String> { - let input_text = input_file.display().to_string(); - let store = GenotypeStore::from_file_with_options(Path::new(&input_text), loader) - .map_err(|err| err.to_string())?; - let workspace = bioscript_reporting::FilesystemManifestWorkspace::new(runtime_root); - let manifest_path_text = manifest_path.display().to_string(); - let tasks = - bioscript_reporting::collect_variant_manifest_tasks(&workspace, &manifest_path_text, filters)?; - let observations = store - .lookup_variants( - &tasks - .iter() - .map(|task| task.manifest.spec.clone()) - .collect::>(), - ) - .map_err(|err| err.to_string())?; - Ok(tasks - .into_iter() - .zip(observations) - .map(|(task, observation)| { - let resolved = Path::new(&task.manifest_path); - variant_row( - runtime_root, - resolved, - &task.manifest.name, - &task.manifest.tags, - &observation, - Some(participant_id), - ) - }) - .collect()) -} - struct ReportAnalysisOptions<'a> { runtime_root: &'a Path, input_file: &'a Path, @@ -45,28 +5,42 @@ struct ReportAnalysisOptions<'a> { loader: &'a GenotypeLoadOptions, output_dir: &'a Path, observation_rows: &'a [BTreeMap], - filters: &'a [String], max_duration_ms: u64, } -fn run_manifest_analyses_for_report( - manifest_path: &Path, - options: &ReportAnalysisOptions<'_>, -) -> Result, String> { - let workspace = bioscript_reporting::FilesystemManifestWorkspace::new(options.runtime_root); - let manifest_path_text = manifest_path.display().to_string(); - let mut analyses = Vec::new(); - for task in - bioscript_reporting::collect_analysis_manifest_tasks(&workspace, &manifest_path_text, options.filters)? - { - analyses.extend(run_interpretations_for_report( +struct CliReportAnalysisRunner<'a> { + runtime_root: &'a Path, + input_file: &'a Path, + participant_id: &'a str, + loader: &'a GenotypeLoadOptions, + output_dir: &'a Path, + max_duration_ms: u64, +} + +impl bioscript_reporting::ReportAnalysisRunner for CliReportAnalysisRunner<'_> { + fn run_analysis_task( + &self, + task: &bioscript_reporting::AnalysisManifestTask, + observation_rows: &[BTreeMap], + _variant_observations: &[bioscript_core::VariantObservation], + _observations: &[serde_json::Value], + ) -> Result, String> { + let options = ReportAnalysisOptions { + runtime_root: self.runtime_root, + input_file: self.input_file, + participant_id: self.participant_id, + loader: self.loader, + output_dir: self.output_dir, + observation_rows, + max_duration_ms: self.max_duration_ms, + }; + run_interpretations_for_report( Path::new(&task.manifest_path), &task.manifest_name, &task.interpretations, - options, - )?); + &options, + ) } - Ok(analyses) } fn run_interpretations_for_report( @@ -317,7 +291,6 @@ mod app_report_execution_tests { loader: &loader, output_dir: &dir, observation_rows: &[], - filters: &[], max_duration_ms: 10, }; @@ -329,59 +302,6 @@ mod app_report_execution_tests { fs::remove_dir_all(dir).unwrap(); } - #[test] - fn run_manifest_rows_for_report_reads_text_input_and_variant_manifest() { - let dir = temp_dir("manifest-rows"); - let manifest = dir.join("variant.yaml"); - fs::write( - &manifest, - r#" -schema: bioscript:variant:1.0 -version: "1.0" -name: rs1 -gene: ABC -identifiers: - rsids: [rs1] -coordinates: - grch38: - chrom: "1" - pos: 100 -alleles: - kind: snv - ref: A - alts: [G] -"#, - ) - .unwrap(); - let input = dir.join("sample.txt"); - fs::write(&input, "rsid\tgenotype\nrs1\tA/G\n").unwrap(); - let loader = GenotypeLoadOptions { - format: Some(GenotypeSourceFormat::Text), - ..GenotypeLoadOptions::default() - }; - - let rows = - run_manifest_rows_for_report(&dir, &manifest, &input, "p1", &loader, &[]).unwrap(); - assert_eq!(rows.len(), 1); - assert_eq!(rows[0]["participant_id"], "p1"); - assert_eq!(rows[0]["matched_rsid"], "rs1"); - assert_eq!(rows[0]["genotype"], "AG"); - - let missing_input = dir.join("missing.txt"); - assert!(run_manifest_rows_for_report( - &dir, - &manifest, - &missing_input, - "p1", - &loader, - &[], - ) - .unwrap_err() - .contains("No such file")); - - fs::remove_dir_all(dir).unwrap(); - } - #[test] fn run_interpretations_executes_bioscript_analysis_and_builds_json_output() { let dir = temp_dir("analysis-success"); @@ -426,7 +346,6 @@ if __name__ == "__main__": loader: &loader, output_dir: &output, observation_rows: &rows, - filters: &[], max_duration_ms: 1000, }; diff --git a/rust/bioscript-cli/src/report_observations.rs b/rust/bioscript-cli/src/report_observations.rs deleted file mode 100644 index b31a65b..0000000 --- a/rust/bioscript-cli/src/report_observations.rs +++ /dev/null @@ -1,206 +0,0 @@ -fn app_observation_from_manifest_row( - runtime_root: &Path, - row: &BTreeMap, - assay_id: &str, - inferred_sex: Option<&SexInference>, - fallback_assembly: Option, -) -> Result { - let row_path = row.get("path").cloned().unwrap_or_default(); - let manifest_path = if Path::new(&row_path).is_absolute() { - PathBuf::from(&row_path) - } else { - runtime_root.join(&row_path) - }; - let manifest = load_variant_manifest(&manifest_path)?; - let gene = variant_manifest_gene(&manifest_path)?; - let observed_alt_alleles = variant_observed_alt_alleles(&manifest_path)?; - let source = variant_primary_source(&manifest_path)?; - Ok(bioscript_reporting::app_observation_from_manifest_row( - bioscript_reporting::AppObservationInput { - row, - row_path: &row_path, - assay_id, - manifest, - gene, - source, - observed_alt_alleles, - inferred_sex, - fallback_assembly, - }, - )) -} - -fn load_yaml_value(path: &Path) -> Result { - let text = fs::read_to_string(path) - .map_err(|err| format!("failed to read YAML {}: {err}", path.display()))?; - serde_yaml::from_str(&text) - .map_err(|err| format!("failed to parse YAML {}: {err}", path.display())) -} - -fn variant_primary_source(path: &Path) -> Result { - let value = load_yaml_value(path)?; - let mut links = BTreeMap::::new(); - bioscript_reporting::collect_manifest_provenance_entries(&value, &mut links)?; - if let Some(source) = links - .values() - .find(|source| source_url_contains(source, "ncbi.nlm.nih.gov/snp/rs")) - { - return Ok(source.clone()); - } - if let Some(rsid) = value - .get("identifiers") - .and_then(|identifiers| identifiers.get("rsids")) - .and_then(serde_yaml::Value::as_sequence) - .and_then(|items| items.iter().find_map(serde_yaml::Value::as_str)) - { - return Ok(serde_json::json!({ - "kind": "database", - "label": "dbSNP / NCBI SNP", - "url": format!("https://www.ncbi.nlm.nih.gov/snp/{rsid}"), - "fields": ["identifiers.rsids"], - })); - } - Ok(links.into_values().next().unwrap_or(serde_json::Value::Null)) -} - -fn source_url_contains(source: &serde_json::Value, needle: &str) -> bool { - source - .get("url") - .and_then(serde_json::Value::as_str) - .is_some_and(|url| url.contains(needle)) -} - -fn variant_manifest_gene(path: &Path) -> Result { - let text = fs::read_to_string(path) - .map_err(|err| format!("failed to read variant YAML {}: {err}", path.display()))?; - let value: serde_yaml::Value = serde_yaml::from_str(&text) - .map_err(|err| format!("failed to parse variant YAML {}: {err}", path.display()))?; - Ok(value - .as_mapping() - .and_then(|mapping| mapping.get(serde_yaml::Value::String("gene".to_owned()))) - .and_then(serde_yaml::Value::as_str) - .unwrap_or_default() - .to_owned()) -} - -fn variant_observed_alt_alleles(path: &Path) -> Result, String> { - let text = fs::read_to_string(path) - .map_err(|err| format!("failed to read variant YAML {}: {err}", path.display()))?; - let value: serde_yaml::Value = serde_yaml::from_str(&text) - .map_err(|err| format!("failed to parse variant YAML {}: {err}", path.display()))?; - let Some(items) = value - .as_mapping() - .and_then(|mapping| mapping.get(serde_yaml::Value::String("alleles".to_owned()))) - .and_then(serde_yaml::Value::as_mapping) - .and_then(|mapping| { - mapping - .get(serde_yaml::Value::String("observed_alts".to_owned())) - }) - .and_then(serde_yaml::Value::as_sequence) - else { - return Ok(Vec::new()); - }; - Ok(items - .iter() - .filter_map(serde_yaml::Value::as_str) - .map(ToOwned::to_owned) - .collect()) -} - -#[cfg(test)] -mod app_report_observation_tests { - use super::*; - use std::time::{SystemTime, UNIX_EPOCH}; - - fn temp_dir(name: &str) -> PathBuf { - let unique = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_nanos(); - let dir = env::temp_dir().join(format!( - "bioscript-report-observations-{name}-{}-{unique}", - std::process::id() - )); - fs::create_dir_all(&dir).unwrap(); - dir - } - - fn write_variant_yaml(dir: &Path, name: &str, body: &str) -> PathBuf { - let path = dir.join(name); - fs::write(&path, body).unwrap(); - path - } - - #[test] - fn manifest_helpers_extract_gene_alts_and_primary_source() { - let dir = temp_dir("variant"); - let path = write_variant_yaml( - &dir, - "variant.yaml", - r" -schema: bioscript:variant:1.0 -name: Test variant -gene: CYP2D6 -identifiers: - rsids: [rs123] -alleles: - observed_alts: [A, T] -evidence: - references: - - label: Primary - url: https://www.ncbi.nlm.nih.gov/snp/rs123 -", - ); - - assert_eq!(variant_manifest_gene(&path).unwrap(), "CYP2D6"); - assert_eq!( - variant_observed_alt_alleles(&path).unwrap(), - vec!["A".to_owned(), "T".to_owned()] - ); - let source = variant_primary_source(&path).unwrap(); - assert!(source["url"].as_str().unwrap().contains("rs123")); - assert!(source_url_contains(&source, "ncbi.nlm.nih.gov/snp")); - - fs::remove_dir_all(dir).unwrap(); - } - - #[test] - fn primary_source_falls_back_to_rsid_when_no_provenance_link_exists() { - let dir = temp_dir("rsid"); - let path = write_variant_yaml( - &dir, - "variant.yaml", - r" -schema: bioscript:variant:1.0 -name: Test variant -identifiers: - rsids: - - rs4242 -", - ); - - let source = variant_primary_source(&path).unwrap(); - assert_eq!(source["label"], "dbSNP / NCBI SNP"); - assert_eq!(source["url"], "https://www.ncbi.nlm.nih.gov/snp/rs4242"); - - fs::remove_dir_all(dir).unwrap(); - } - - #[test] - fn yaml_helpers_report_read_and_parse_errors() { - let dir = temp_dir("errors"); - let missing = dir.join("missing.yaml"); - assert!(load_yaml_value(&missing).unwrap_err().contains("failed to read YAML")); - - let bad = write_variant_yaml(&dir, "bad.yaml", "name: [unterminated"); - assert!(load_yaml_value(&bad).unwrap_err().contains("failed to parse YAML")); - assert!(variant_manifest_gene(&bad) - .unwrap_err() - .contains("failed to parse variant YAML")); - - let no_alts = write_variant_yaml(&dir, "no-alts.yaml", "gene: ABC\n"); - assert!(variant_observed_alt_alleles(&no_alts).unwrap().is_empty()); - - fs::remove_dir_all(dir).unwrap(); - } -} diff --git a/rust/bioscript-cli/src/report_options.rs b/rust/bioscript-cli/src/report_options.rs index fdd41c7..d89c4b0 100644 --- a/rust/bioscript-cli/src/report_options.rs +++ b/rust/bioscript-cli/src/report_options.rs @@ -237,8 +237,6 @@ fn generate_app_report(options: &AppReportOptions) -> Result<(), String> { let manifest_workspace = bioscript_reporting::FilesystemManifestWorkspace::new(&options.root); let manifest_path = options.manifest_path.display().to_string(); - let manifest_context = - bioscript_reporting::load_report_manifest_context(&manifest_workspace, &manifest_path)?; let mut observations = Vec::new(); let mut analyses = Vec::new(); let mut reports = Vec::new(); @@ -257,59 +255,39 @@ fn generate_app_report(options: &AppReportOptions) -> Result<(), String> { input_inspection.inferred_sex = Some(explicit_sample_sex_inference(sample_sex)); } let input_loader = loader_with_inspection(&options.loader, &input_inspection); - let rows = run_manifest_rows_for_report( - &options.root, - &options.manifest_path, - input_file, - &participant_id, - &input_loader, - &options.filters, - )?; - let input_observations = rows - .iter() - .map(|row| { - app_observation_from_manifest_row( - &options.root, - row, - &manifest_context.assay_id, - input_inspection.inferred_sex.as_ref(), - input_inspection.assembly, - ) - }) - .collect::, _>>()?; - observations.extend(input_observations.clone()); - let analysis_options = ReportAnalysisOptions { + let store = GenotypeStore::from_file_with_options(input_file, &input_loader) + .map_err(|err| err.to_string())?; + let analysis_runner = CliReportAnalysisRunner { runtime_root: &options.root, input_file, participant_id: &participant_id, loader: &input_loader, output_dir: &options.output_dir, - observation_rows: &rows, - filters: &options.filters, max_duration_ms: options.analysis_max_duration_ms, }; - let input_analyses = - run_manifest_analyses_for_report(&options.manifest_path, &analysis_options)?; - analyses.extend(input_analyses.clone()); let input_file_name = input_file .file_name() .and_then(|value| value.to_str()) .unwrap_or_default(); let input_file_path = input_file.display().to_string(); - reports.push(bioscript_reporting::app_input_report_json( - bioscript_reporting::AppInputReportInput { - assay_id: &manifest_context.assay_id, - participant_id: &participant_id, - input_file_name, - input_file_path: &input_file_path, - observations: &input_observations, - analyses: &input_analyses, - findings: &manifest_context.findings, - provenance: &manifest_context.provenance, - input_inspection: Some(&input_inspection), - manifest_metadata: &manifest_context.manifest_metadata, + let run = bioscript_reporting::run_report( + &manifest_workspace, + &manifest_path, + &store, + &analysis_runner, + bioscript_reporting::ReportInputContext { + participant_id: &participant_id, + input_file_name, + input_file_path: &input_file_path, + input_inspection: Some(&input_inspection), }, - )); + bioscript_reporting::ReportRunOptions { + filters: &options.filters, + }, + )?; + observations.extend(run.observations); + analyses.extend(run.analyses); + reports.push(run.report); } write_app_observations( diff --git a/rust/bioscript-cli/src/report_review.rs b/rust/bioscript-cli/src/report_review.rs index d4f96d6..7d033da 100644 --- a/rust/bioscript-cli/src/report_review.rs +++ b/rust/bioscript-cli/src/report_review.rs @@ -13,6 +13,12 @@ struct ReviewCase { genotypes: BTreeMap>, } +struct ReviewCaseReport { + observations: Vec, + analyses: Vec, + report: serde_json::Value, +} + fn run_review_report(args: Vec) -> Result<(), String> { let cwd = env::current_dir().map_err(|err| format!("failed to get cwd: {err}"))?; let mut manifest_path: Option = None; @@ -83,71 +89,30 @@ fn generate_review_report(options: &ReviewReportOptions) -> Result<(), String> { let manifest_workspace = bioscript_reporting::FilesystemManifestWorkspace::new(&options.root); let manifest_path = options.manifest_path.display().to_string(); - let manifest_context = - bioscript_reporting::load_report_manifest_context(&manifest_workspace, &manifest_path)?; let cases = load_review_cases(&options.cases_path)?; let mut observations = Vec::new(); let mut analyses = Vec::new(); let mut reports = Vec::new(); + let review_temp_dir = options.output_dir.join(".review-temp"); + fs::create_dir_all(&review_temp_dir).map_err(|err| { + format!( + "failed to create review temp dir {}: {err}", + review_temp_dir.display() + ) + })?; for case in cases { - let input_bytes = review_case_genotype_text(&case); - let store = GenotypeStore::from_bytes(&format!("{}.txt", case.id), input_bytes.as_bytes()) - .map_err(|err| err.to_string())?; - let input_observations = run_manifest_rows_with_store( - &options.root, - &options.manifest_path, - &store, - &case.id, - &options.filters, - )? - .iter() - .map(|row| { - app_observation_from_manifest_row( - &options.root, - row, - &manifest_context.assay_id, - None, - None, - ) - }) - .collect::, _>>()?; - observations.extend(input_observations.clone()); - - let input_analyses = run_review_analyses(options, &case, &input_bytes)?; - analyses.extend(input_analyses.clone()); - let synthetic_input = PathBuf::from(format!("review://{}", case.id)); - let synthetic_input_name = synthetic_input - .file_name() - .and_then(|value| value.to_str()) - .unwrap_or_default(); - let synthetic_input_path = synthetic_input.display().to_string(); - let mut report = bioscript_reporting::app_input_report_json( - bioscript_reporting::AppInputReportInput { - assay_id: &manifest_context.assay_id, - participant_id: &case.id, - input_file_name: synthetic_input_name, - input_file_path: &synthetic_input_path, - observations: &input_observations, - analyses: &input_analyses, - findings: &manifest_context.findings, - provenance: &manifest_context.provenance, - input_inspection: None, - manifest_metadata: &manifest_context.manifest_metadata, - }, - ); - if let Some(object) = report.as_object_mut() { - object.insert( - "review_case".to_owned(), - serde_json::json!({ - "id": case.id, - "label": case.label, - }), - ); - } - reports.push(report); + let case_report = generate_review_case_report( + options, + &manifest_workspace, + &manifest_path, + &review_temp_dir, + &case, + )?; + observations.extend(case_report.observations); + analyses.extend(case_report.analyses); + reports.push(case_report.report); } - let review_temp_dir = options.output_dir.join(".review-temp"); if review_temp_dir.exists() { fs::remove_dir_all(&review_temp_dir).map_err(|err| { format!( @@ -173,71 +138,79 @@ fn generate_review_report(options: &ReviewReportOptions) -> Result<(), String> { Ok(()) } -fn run_manifest_rows_with_store( - runtime_root: &Path, - manifest_path: &Path, - store: &GenotypeStore, - participant_id: &str, - filters: &[String], -) -> Result>, String> { - match manifest_schema(manifest_path)?.as_str() { - "bioscript:variant:1.0" | "bioscript:variant" => { - let manifest = load_variant_manifest(manifest_path)?; - Ok(vec![run_variant_manifest_with_store( - runtime_root, - &manifest, - store, - Some(participant_id), - )?]) - } - "bioscript:panel:1.0" => { - let manifest = load_panel_manifest(manifest_path)?; - run_panel_manifest_with_store(runtime_root, &manifest, store, Some(participant_id), filters) - } - "bioscript:assay:1.0" => { - let manifest = load_assay_manifest(manifest_path)?; - run_assay_manifest_with_store(runtime_root, &manifest, store, Some(participant_id), filters) - } - other => Err(format!("unsupported manifest schema '{other}'")), - } -} - -fn run_review_analyses( +fn generate_review_case_report( options: &ReviewReportOptions, + manifest_workspace: &bioscript_reporting::FilesystemManifestWorkspace, + manifest_path: &str, + review_temp_dir: &Path, case: &ReviewCase, - input_bytes: &str, -) -> Result, String> { - let temp_dir = options.output_dir.join(".review-temp"); - fs::create_dir_all(&temp_dir).map_err(|err| { +) -> Result { + let input_bytes = review_case_genotype_text(case); + let store = GenotypeStore::from_bytes(&format!("{}.txt", case.id), input_bytes.as_bytes()) + .map_err(|err| err.to_string())?; + let temp_path = review_temp_dir.join(format!("{}.txt", case.id)); + fs::write(&temp_path, &input_bytes).map_err(|err| { format!( - "failed to create review temp dir {}: {err}", - temp_dir.display() + "failed to write review temp input {}: {err}", + temp_path.display() ) })?; - let temp_path = temp_dir.join(format!("{}.txt", case.id)); - fs::write(&temp_path, input_bytes) - .map_err(|err| format!("failed to write review temp input {}: {err}", temp_path.display()))?; let loader = GenotypeLoadOptions { format: Some(GenotypeSourceFormat::Text), ..GenotypeLoadOptions::default() }; - let observation_rows = Vec::new(); - let analysis_options = ReportAnalysisOptions { + let analysis_runner = CliReportAnalysisRunner { runtime_root: &options.root, input_file: &temp_path, participant_id: &case.id, loader: &loader, output_dir: &options.output_dir, - observation_rows: &observation_rows, - filters: &options.filters, max_duration_ms: 1_000, }; - let result = run_manifest_analyses_for_report(&options.manifest_path, &analysis_options); + let synthetic_input = PathBuf::from(format!("review://{}", case.id)); + let synthetic_input_name = synthetic_input + .file_name() + .and_then(|value| value.to_str()) + .unwrap_or_default(); + let synthetic_input_path = synthetic_input.display().to_string(); + let run_result = bioscript_reporting::run_report( + manifest_workspace, + manifest_path, + &store, + &analysis_runner, + bioscript_reporting::ReportInputContext { + participant_id: &case.id, + input_file_name: synthetic_input_name, + input_file_path: &synthetic_input_path, + input_inspection: None, + }, + bioscript_reporting::ReportRunOptions { + filters: &options.filters, + }, + ); let cleanup = fs::remove_file(&temp_path); if let Err(err) = cleanup { - return Err(format!("failed to remove review temp input {}: {err}", temp_path.display())); + return Err(format!( + "failed to remove review temp input {}: {err}", + temp_path.display() + )); + } + let run = run_result?; + let mut report = run.report; + if let Some(object) = report.as_object_mut() { + object.insert( + "review_case".to_owned(), + serde_json::json!({ + "id": case.id, + "label": case.label, + }), + ); } - result + Ok(ReviewCaseReport { + observations: run.observations, + analyses: run.analyses, + report, + }) } fn load_review_cases(path: &Path) -> Result, String> { diff --git a/rust/bioscript-formats/src/inspect.rs b/rust/bioscript-formats/src/inspect.rs index f282fe0..b94fe05 100644 --- a/rust/bioscript-formats/src/inspect.rs +++ b/rust/bioscript-formats/src/inspect.rs @@ -42,7 +42,7 @@ pub(crate) use io::*; pub(crate) use render::*; pub use sex::{ InferredSex, SexDetectionConfidence, SexInference, infer_sex_from_alignment_reader, - infer_sex_from_text_lines, + infer_sex_from_named_reader, infer_sex_from_text_lines, }; #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/rust/bioscript-formats/src/inspect/sex.rs b/rust/bioscript-formats/src/inspect/sex.rs index 5057165..886e07c 100644 --- a/rust/bioscript-formats/src/inspect/sex.rs +++ b/rust/bioscript-formats/src/inspect/sex.rs @@ -123,6 +123,20 @@ pub(crate) fn infer_sex_from_path( infer_sex_from_reader(BufReader::new(file), kind) } +pub fn infer_sex_from_named_reader( + name: &str, + reader: R, + kind: DetectedKind, +) -> Result { + if !supports_sex_detection(kind) { + return Ok(unsupported_sex_inference()); + } + if name.to_ascii_lowercase().ends_with(".vcf.gz") { + return infer_sex_from_reader(BufReader::new(MultiGzDecoder::new(reader)), kind); + } + infer_sex_from_reader(BufReader::new(reader), kind) +} + pub(crate) fn infer_sex_from_bytes( name: &str, bytes: &[u8], @@ -131,14 +145,7 @@ pub(crate) fn infer_sex_from_bytes( if !supports_sex_detection(kind) { return Ok(unsupported_sex_inference()); } - let lower = name.to_ascii_lowercase(); - if lower.ends_with(".vcf.gz") { - return infer_sex_from_reader( - BufReader::new(MultiGzDecoder::new(Cursor::new(bytes))), - kind, - ); - } - infer_sex_from_reader(BufReader::new(Cursor::new(bytes)), kind) + infer_sex_from_named_reader(name, Cursor::new(bytes), kind) } pub(crate) fn infer_sex_from_zip_bytes( diff --git a/rust/bioscript-formats/src/lib.rs b/rust/bioscript-formats/src/lib.rs index f96cd4e..fe74db4 100644 --- a/rust/bioscript-formats/src/lib.rs +++ b/rust/bioscript-formats/src/lib.rs @@ -22,6 +22,6 @@ pub use genotype::{ pub use inspect::{ DetectedKind, DetectionConfidence, FileContainer, FileInspection, InferredSex, InspectOptions, SexDetectionConfidence, SexInference, SourceMetadata, infer_sex_from_alignment_reader, - infer_sex_from_text_lines, inspect_bytes, inspect_file, + infer_sex_from_named_reader, infer_sex_from_text_lines, inspect_bytes, inspect_file, }; pub use prepare::{PrepareRequest, PreparedPaths, prepare_indexes, shell_flags}; diff --git a/rust/bioscript-reporting/src/artifacts.rs b/rust/bioscript-reporting/src/artifacts.rs index 61e7ca6..e9f34fc 100644 --- a/rust/bioscript-reporting/src/artifacts.rs +++ b/rust/bioscript-reporting/src/artifacts.rs @@ -1,3 +1,4 @@ +#[derive(Clone, Debug)] pub struct ReportArtifactTexts { pub observations_tsv: String, pub analysis_jsonl: String, diff --git a/rust/bioscript-reporting/src/lib.rs b/rust/bioscript-reporting/src/lib.rs index 45d2f47..ce089f2 100644 --- a/rust/bioscript-reporting/src/lib.rs +++ b/rust/bioscript-reporting/src/lib.rs @@ -8,6 +8,7 @@ mod matching; mod observation; mod report_json; mod rows; +mod runner; pub use analysis::{ AnalysisOutputFormat, AnalysisOutputJsonInput, analysis_observations_relative_file, @@ -38,3 +39,7 @@ pub use report_json::{ pub use rows::{ MANIFEST_ROW_TSV_HEADERS, render_manifest_rows_tsv, render_manifest_trace_tsv, variant_row, }; +pub use runner::{ + NoopReportAnalysisRunner, ReportAnalysisRunner, ReportInputContext, ReportRunOptions, + ReportRunResult, ReportVariantLookup, ReportWorkspace, run_report, +}; diff --git a/rust/bioscript-reporting/src/manifest/provenance.rs b/rust/bioscript-reporting/src/manifest/provenance.rs deleted file mode 100644 index 79d82f6..0000000 --- a/rust/bioscript-reporting/src/manifest/provenance.rs +++ /dev/null @@ -1,71 +0,0 @@ -use std::collections::BTreeMap; - -use super::{ - ManifestWorkspace, manifest_supports_findings, traversable_manifest_member_paths, yaml_string, - yaml_to_json, -}; - -pub fn load_manifest_provenance_links( - workspace: &impl ManifestWorkspace, - path: &str, -) -> Result, String> { - let value = workspace.load_yaml(path)?; - let schema = yaml_string(&value, "schema").unwrap_or_default(); - let mut links = BTreeMap::::new(); - collect_manifest_provenance_entries(&value, &mut links)?; - - if manifest_supports_findings(&schema) - && let Some(items) = value - .get("findings") - .and_then(serde_yaml::Value::as_sequence) - { - for item in items { - let json_item = yaml_to_json(item.clone())?; - let Some(include) = json_item.get("include").and_then(serde_json::Value::as_str) else { - continue; - }; - let include_path = workspace.resolve(path, include)?; - for item in load_manifest_provenance_links(workspace, &include_path)? { - if let Some(url) = item.get("url").and_then(serde_json::Value::as_str) { - links.entry(url.to_owned()).or_insert(item); - } - } - } - } - - for member_path in traversable_manifest_member_paths(&schema, &value) { - let resolved = workspace.resolve(path, member_path)?; - for item in load_manifest_provenance_links(workspace, &resolved)? { - if let Some(url) = item.get("url").and_then(serde_json::Value::as_str) { - links.entry(url.to_owned()).or_insert(item); - } - } - } - - Ok(links.into_values().collect()) -} - -pub fn collect_manifest_provenance_entries( - value: &serde_yaml::Value, - links: &mut BTreeMap, -) -> Result<(), String> { - if let Some(sources) = value - .get("provenance") - .and_then(|provenance| provenance.get("sources")) - .and_then(serde_yaml::Value::as_sequence) - { - for source in sources { - let json = yaml_to_json(source.clone())?; - if let Some(url) = json.get("url").and_then(serde_json::Value::as_str) { - links.entry(url.to_owned()).or_insert(json); - } - } - } - if let Some(source) = value.get("source") { - let json = yaml_to_json(source.clone())?; - if let Some(url) = json.get("url").and_then(serde_json::Value::as_str) { - links.entry(url.to_owned()).or_insert(json); - } - } - Ok(()) -} diff --git a/rust/bioscript-reporting/src/runner.rs b/rust/bioscript-reporting/src/runner.rs new file mode 100644 index 0000000..4b1d742 --- /dev/null +++ b/rust/bioscript-reporting/src/runner.rs @@ -0,0 +1,455 @@ +use std::collections::BTreeMap; + +use bioscript_core::{Assembly, VariantObservation, VariantSpec}; +use bioscript_formats::SexInference; +use bioscript_schema::load_variant_manifest_text; + +use crate::{ + AppInputReportInput, FilesystemManifestWorkspace, ManifestWorkspace, app_input_report_json, + app_observation_from_manifest_row, collect_analysis_manifest_tasks, + collect_manifest_provenance_entries, collect_variant_manifest_tasks, + load_report_manifest_context, render_input_report_artifact_texts, variant_row, +}; + +pub trait ReportVariantLookup { + fn lookup_variants(&self, specs: &[VariantSpec]) -> Result, String>; +} + +pub trait ReportWorkspace: ManifestWorkspace { + fn app_observation_from_manifest_row( + &self, + row: &BTreeMap, + assay_id: &str, + inferred_sex: Option<&SexInference>, + fallback_assembly: Option, + ) -> Result; +} + +pub trait ReportAnalysisRunner { + fn run_analysis_task( + &self, + task: &crate::AnalysisManifestTask, + observation_rows: &[BTreeMap], + variant_observations: &[VariantObservation], + observations: &[serde_json::Value], + ) -> Result, String>; +} + +pub struct NoopReportAnalysisRunner; + +impl ReportAnalysisRunner for NoopReportAnalysisRunner { + fn run_analysis_task( + &self, + _task: &crate::AnalysisManifestTask, + _observation_rows: &[BTreeMap], + _variant_observations: &[VariantObservation], + _observations: &[serde_json::Value], + ) -> Result, String> { + Ok(Vec::new()) + } +} + +#[derive(Clone, Copy, Debug)] +pub struct ReportInputContext<'a> { + pub participant_id: &'a str, + pub input_file_name: &'a str, + pub input_file_path: &'a str, + pub input_inspection: Option<&'a bioscript_formats::FileInspection>, +} + +#[derive(Clone, Copy, Debug, Default)] +pub struct ReportRunOptions<'a> { + pub filters: &'a [String], +} + +#[derive(Clone, Debug)] +pub struct ReportRunResult { + pub observation_rows: Vec>, + pub variant_observations: Vec, + pub observations: Vec, + pub analyses: Vec, + pub report: serde_json::Value, + pub artifacts: crate::ReportArtifactTexts, +} + +pub fn run_report( + workspace: &impl ReportWorkspace, + manifest_path: &str, + lookup: &impl ReportVariantLookup, + analysis_runner: &impl ReportAnalysisRunner, + input: ReportInputContext<'_>, + options: ReportRunOptions<'_>, +) -> Result { + let manifest_context = load_report_manifest_context(workspace, manifest_path)?; + let variant_tasks = collect_variant_manifest_tasks(workspace, manifest_path, options.filters)?; + let variant_observations = lookup.lookup_variants( + &variant_tasks + .iter() + .map(|task| task.manifest.spec.clone()) + .collect::>(), + )?; + let observation_rows = variant_tasks + .into_iter() + .zip(variant_observations.iter()) + .map(|(task, observation)| { + variant_row( + &task.manifest_path, + &task.manifest.name, + &task.manifest.tags, + observation, + input.participant_id, + ) + }) + .collect::>(); + let observations = observation_rows + .iter() + .map(|row| { + workspace.app_observation_from_manifest_row( + row, + &manifest_context.assay_id, + input + .input_inspection + .and_then(|inspection| inspection.inferred_sex.as_ref()), + input + .input_inspection + .and_then(|inspection| inspection.assembly), + ) + }) + .collect::, _>>()?; + let analysis_variant_observations = + analysis_variant_observations(&variant_observations, &observations); + let mut analyses = Vec::new(); + for task in collect_analysis_manifest_tasks(workspace, manifest_path, options.filters)? { + analyses.extend(analysis_runner.run_analysis_task( + &task, + &observation_rows, + &analysis_variant_observations, + &observations, + )?); + } + let report_input = AppInputReportInput { + assay_id: &manifest_context.assay_id, + participant_id: input.participant_id, + input_file_name: input.input_file_name, + input_file_path: input.input_file_path, + observations: &observations, + analyses: &analyses, + findings: &manifest_context.findings, + provenance: &manifest_context.provenance, + input_inspection: input.input_inspection, + manifest_metadata: &manifest_context.manifest_metadata, + }; + let report = app_input_report_json(report_input); + let artifacts = render_input_report_artifact_texts(report_input)?; + Ok(ReportRunResult { + observation_rows, + variant_observations, + observations, + analyses, + report, + artifacts, + }) +} + +impl ReportVariantLookup for bioscript_formats::GenotypeStore { + fn lookup_variants(&self, specs: &[VariantSpec]) -> Result, String> { + bioscript_formats::GenotypeStore::lookup_variants(self, specs) + .map_err(|err| err.to_string()) + } +} + +impl ReportWorkspace for FilesystemManifestWorkspace { + fn app_observation_from_manifest_row( + &self, + row: &BTreeMap, + assay_id: &str, + inferred_sex: Option<&SexInference>, + fallback_assembly: Option, + ) -> Result { + let row_path = row.get("path").cloned().unwrap_or_default(); + let text = self.load_text(&row_path)?; + let manifest = load_variant_manifest_text(&row_path, &text)?; + let value = self.load_yaml(&row_path)?; + Ok(app_observation_from_manifest_row( + crate::AppObservationInput { + row, + row_path: &row_path, + assay_id, + manifest, + gene: yaml_string(&value, "gene").unwrap_or_default(), + source: variant_primary_source_from_yaml(&value)?, + observed_alt_alleles: variant_observed_alt_alleles_from_yaml(&value), + inferred_sex, + fallback_assembly, + }, + )) + } +} + +fn variant_primary_source_from_yaml( + value: &serde_yaml::Value, +) -> Result { + let mut links = BTreeMap::::new(); + collect_manifest_provenance_entries(value, &mut links)?; + if let Some(source) = links + .values() + .find(|source| source_url_contains(source, "ncbi.nlm.nih.gov/snp/rs")) + { + return Ok(source.clone()); + } + if let Some(rsid) = value + .get("identifiers") + .and_then(|identifiers| identifiers.get("rsids")) + .and_then(serde_yaml::Value::as_sequence) + .and_then(|items| items.iter().find_map(serde_yaml::Value::as_str)) + { + return Ok(serde_json::json!({ + "kind": "database", + "label": "dbSNP / NCBI SNP", + "url": format!("https://www.ncbi.nlm.nih.gov/snp/{rsid}"), + "fields": ["identifiers.rsids"], + })); + } + Ok(links + .into_values() + .next() + .unwrap_or(serde_json::Value::Null)) +} + +fn source_url_contains(source: &serde_json::Value, needle: &str) -> bool { + source + .get("url") + .and_then(serde_json::Value::as_str) + .is_some_and(|url| url.contains(needle)) +} + +fn variant_observed_alt_alleles_from_yaml(value: &serde_yaml::Value) -> Vec { + value + .get("alleles") + .and_then(serde_yaml::Value::as_mapping) + .and_then(|mapping| mapping.get(serde_yaml::Value::String("observed_alts".to_owned()))) + .and_then(serde_yaml::Value::as_sequence) + .into_iter() + .flatten() + .filter_map(serde_yaml::Value::as_str) + .map(ToOwned::to_owned) + .collect() +} + +fn yaml_string(value: &serde_yaml::Value, key: &str) -> Option { + value + .get(key) + .and_then(serde_yaml::Value::as_str) + .map(ToOwned::to_owned) +} + +fn analysis_variant_observations( + variant_observations: &[VariantObservation], + observations: &[serde_json::Value], +) -> Vec { + variant_observations + .iter() + .map(|observation| { + let mut observation = observation.clone(); + if let Some(app_observation) = matching_app_observation(&observation, observations) + && let Some(genotype_display) = app_observation + .get("genotype_display") + .and_then(serde_json::Value::as_str) + .filter(|value| !value.is_empty() && *value != "??") + { + observation.genotype = Some(genotype_display.to_owned()); + } + observation + }) + .collect() +} + +fn matching_app_observation<'a>( + observation: &VariantObservation, + observations: &'a [serde_json::Value], +) -> Option<&'a serde_json::Value> { + let matched_rsid = observation.matched_rsid.as_deref()?; + observations.iter().find(|app_observation| { + app_observation + .get("rsid") + .and_then(serde_json::Value::as_str) + == Some(matched_rsid) + }) +} + +#[cfg(test)] +mod tests { + use std::{cell::Cell, collections::BTreeMap}; + + use bioscript_core::{VariantObservation, VariantSpec}; + use bioscript_formats::SexInference; + + use super::{ + ReportAnalysisRunner, ReportInputContext, ReportRunOptions, ReportVariantLookup, + ReportWorkspace, run_report, + }; + use crate::ManifestWorkspace; + + struct MapWorkspace { + files: BTreeMap, + } + + impl ManifestWorkspace for MapWorkspace { + fn load_text(&self, path: &str) -> Result { + self.files + .get(path) + .cloned() + .ok_or_else(|| format!("missing file: {path}")) + } + + fn load_yaml(&self, path: &str) -> Result { + serde_yaml::from_str(&self.load_text(path)?).map_err(|err| err.to_string()) + } + + fn resolve(&self, base: &str, relative: &str) -> Result { + let base = std::path::Path::new(base) + .parent() + .unwrap_or_else(|| std::path::Path::new("")); + Ok(base.join(relative).display().to_string()) + } + } + + impl ReportWorkspace for MapWorkspace { + fn app_observation_from_manifest_row( + &self, + row: &BTreeMap, + assay_id: &str, + _inferred_sex: Option<&SexInference>, + _fallback_assembly: Option, + ) -> Result { + Ok(serde_json::json!({ + "assay_id": assay_id, + "participant_id": row.get("participant_id").cloned().unwrap_or_default(), + "path": row.get("path").cloned().unwrap_or_default(), + "rsid": row.get("matched_rsid").cloned().unwrap_or_default(), + "genotype_display": row.get("genotype").cloned().unwrap_or_default(), + })) + } + } + + struct StaticLookup; + + impl ReportVariantLookup for StaticLookup { + fn lookup_variants( + &self, + specs: &[VariantSpec], + ) -> Result, String> { + Ok(specs + .iter() + .map(|spec| VariantObservation { + backend: "test".to_owned(), + matched_rsid: spec.rsids.first().cloned(), + genotype: Some("AG".to_owned()), + ..VariantObservation::default() + }) + .collect()) + } + } + + struct CountingAnalysis { + calls: Cell, + } + + impl ReportAnalysisRunner for CountingAnalysis { + fn run_analysis_task( + &self, + task: &crate::AnalysisManifestTask, + observation_rows: &[BTreeMap], + variant_observations: &[VariantObservation], + observations: &[serde_json::Value], + ) -> Result, String> { + self.calls.set(self.calls.get() + 1); + assert_eq!(task.manifest_name, "panel"); + assert_eq!(observation_rows.len(), 1); + assert_eq!(variant_observations.len(), 1); + assert_eq!(observations.len(), 1); + Ok(vec![serde_json::json!({ + "participant_id": "sample", + "assay_id": task.manifest_name, + "rows": [{"score": 1}], + })]) + } + } + + #[test] + fn run_report_collects_rows_observations_analyses_report_and_artifacts() { + let workspace = MapWorkspace { + files: BTreeMap::from([ + ( + "panel.yaml".to_owned(), + r#" +schema: bioscript:panel:1.0 +version: "1.0" +name: panel +members: + - kind: variant + path: rs1.yaml +interpretations: + - id: score + kind: bioscript + path: score.bs + derived_from: [observations] + emits: + - key: score + label: Score + value_type: integer + format: number +"# + .to_owned(), + ), + ( + "rs1.yaml".to_owned(), + r#" +schema: bioscript:variant:1.0 +version: "1.0" +name: rs1 +identifiers: + rsids: [rs1] +coordinates: + grch38: + chrom: "1" + pos: 100 +alleles: + kind: snv + ref: A + alts: [G] +"# + .to_owned(), + ), + ]), + }; + let analysis = CountingAnalysis { + calls: Cell::new(0), + }; + + let result = run_report( + &workspace, + "panel.yaml", + &StaticLookup, + &analysis, + ReportInputContext { + participant_id: "sample", + input_file_name: "sample.txt", + input_file_path: "sample.txt", + input_inspection: None, + }, + ReportRunOptions { filters: &[] }, + ) + .unwrap(); + + assert_eq!(analysis.calls.get(), 1); + assert_eq!(result.observation_rows.len(), 1); + assert_eq!(result.observations[0]["assay_id"], "panel"); + assert_eq!(result.analyses[0]["assay_id"], "panel"); + assert_eq!(result.report["participant_id"], "sample"); + assert!(result.artifacts.observations_tsv.contains("sample")); + assert!(result.artifacts.analysis_jsonl.contains("\"score\"")); + assert!(result.artifacts.reports_jsonl.contains("\"panel\"")); + assert!(result.artifacts.html.contains("")); + } +} diff --git a/rust/bioscript-wasm/src/lib.rs b/rust/bioscript-wasm/src/lib.rs index f057ac9..7b80724 100644 --- a/rust/bioscript-wasm/src/lib.rs +++ b/rust/bioscript-wasm/src/lib.rs @@ -5,8 +5,8 @@ //! Current surface: //! - `inspectBytes(name, bytes, options)` — file classification / vendor sniff //! - `lookupCramVariants(cramReadAt, cramLen, craiBytes, fastaReadAt, fastaLen, -//! faiBytes, variantsJson)` — SNP lookups against an indexed CRAM + FASTA -//! through JS-supplied random-read callbacks. +//! faiBytes, variantsJson)` — SNP lookups against an indexed CRAM + FASTA +//! through JS-supplied random-read callbacks. //! //! Pending (see migration checklist in the architecture doc): //! - `loadGenotypesBytes(name, bytes)` / `lookupVariants(storeId, planJson)` diff --git a/rust/bioscript-wasm/src/report_api.rs b/rust/bioscript-wasm/src/report_api.rs index 459c621..c0d1299 100644 --- a/rust/bioscript-wasm/src/report_api.rs +++ b/rust/bioscript-wasm/src/report_api.rs @@ -17,8 +17,6 @@ use monty::{MontyObject, ResourceLimits}; use serde::{Deserialize, Serialize}; use wasm_bindgen::prelude::*; -#[path = "report_api/analysis_cache.rs"] -mod analysis_cache; #[path = "report_helpers.rs"] mod report_helpers; #[path = "report_input_inspection.rs"] @@ -28,11 +26,9 @@ mod report_lookup; #[path = "report_workspace.rs"] mod report_workspace; -use analysis_cache::analysis_cache_observations; use report_helpers::*; use report_input_inspection::{ - decompress_vcf_head_lines, explicit_sex_from_options, inspect_head_via_js_reader, - vcf_sex_via_tabix, + explicit_sex_from_options, inspect_head_via_js_reader, vcf_sex_via_js_reader, }; use report_lookup::{BamReportLookup, CramReportLookup, VcfReportLookup}; use report_workspace::PackageWorkspace; @@ -57,6 +53,14 @@ pub(super) struct ReportOptionsInput { filters: Vec, #[serde(default)] output_dir: Option, + #[serde(default)] + input_file_path: Option, + #[serde(default)] + input_index_path: Option, + #[serde(default)] + reference_file_path: Option, + #[serde(default)] + reference_index_path: Option, /// Optional explicit sample sex (mirrors the CLI's `--sample-sex` flag). /// When set, takes precedence over inference: the report carries /// `method=explicit_sample_sex` like the CLI. @@ -64,6 +68,17 @@ pub(super) struct ReportOptionsInput { sample_sex: Option, } +impl ReportOptionsInput { + fn inspect_options(&self, detect_sex: bool) -> InspectOptions { + InspectOptions { + input_index: self.input_index_path.as_ref().map(PathBuf::from), + reference_file: self.reference_file_path.as_ref().map(PathBuf::from), + reference_index: self.reference_index_path.as_ref().map(PathBuf::from), + detect_sex, + } + } +} + #[derive(Serialize)] #[serde(rename_all = "camelCase")] struct ReportArtifactOutput { @@ -99,13 +114,8 @@ pub fn run_package_report_bytes( }; let workspace = PackageWorkspace::new(package_files)?; let participant_id = participant_id_from_name(input_name); - let manifest_context = workspace.report_manifest_context(manifest_path)?; - let inspect_options = InspectOptions { - input_index: None, - reference_file: None, - reference_index: None, - detect_sex: options.detect_sex, - }; + let input_file_path = options.input_file_path.as_deref().unwrap_or(input_name); + let inspect_options = options.inspect_options(options.detect_sex); let input_inspection = inspect_bytes_rs(input_name, input_bytes, &inspect_options) .map_err(|err| JsError::new(&format!("inspect input failed: {err:?}")))?; let loader = GenotypeLoadOptions { @@ -118,45 +128,31 @@ pub fn run_package_report_bytes( }; let store = GenotypeStore::from_bytes(input_name, input_bytes) .map_err(|err| JsError::new(&format!("load genotypes failed: {err:?}")))?; - let manifest_output = - workspace.run_manifest_rows(manifest_path, &store, &participant_id, &options.filters)?; - let observations = manifest_output - .rows - .iter() - .map(|row| { - workspace.app_observation_from_manifest_row( - row, - &manifest_context.assay_id, - input_inspection.inferred_sex.as_ref(), - input_inspection.assembly, - ) - }) - .collect::, _>>()?; - let analyses = workspace.run_manifest_analyses( - manifest_path, + let analysis_runner = report_workspace::WasmReportAnalysisRunner { + workspace: &workspace, input_name, input_bytes, - &[], - &participant_id, - &loader, - &options, - )?; - let artifacts = bioscript_reporting::render_input_report_artifact_texts( - bioscript_reporting::AppInputReportInput { - assay_id: &manifest_context.assay_id, + participant_id: &participant_id, + loader: &loader, + options: &options, + }; + let run = bioscript_reporting::run_report( + &workspace, + manifest_path, + &store, + &analysis_runner, + bioscript_reporting::ReportInputContext { participant_id: &participant_id, input_file_name: input_name, - input_file_path: input_name, - observations: &observations, - analyses: &analyses, - findings: &manifest_context.findings, - provenance: &manifest_context.provenance, + input_file_path, input_inspection: Some(&input_inspection), - manifest_metadata: &manifest_context.manifest_metadata, + }, + bioscript_reporting::ReportRunOptions { + filters: &options.filters, }, ) .map_err(|err| JsError::new(&err))?; - encode_report_run_output(started_ms, artifacts) + encode_report_run_output(started_ms, run.artifacts) } /// Mirrors `runPackageReportBytes` but for CRAM input. The CRAM body and @@ -194,11 +190,12 @@ pub fn run_package_report_from_cram( }; let workspace = PackageWorkspace::new(package_files)?; let participant_id = participant_id_from_name(input_name); - let manifest_context = workspace.report_manifest_context(manifest_path)?; + let input_file_path = options.input_file_path.as_deref().unwrap_or(input_name); let mut head_inspection = inspect_head_via_js_reader( &cram_read_at, cram_len as u64, input_name, + &options.inspect_options(false), false, // sex detection runs separately below via the indexed reader ); @@ -221,7 +218,7 @@ pub fn run_package_report_from_cram( let lookup = CramReportLookup { reader: std::cell::RefCell::new(indexed), - label: input_name.to_owned(), + label: input_file_path.to_owned(), }; // CRAM sex detection: explicit override wins, otherwise alignment Y/X @@ -249,48 +246,31 @@ pub fn run_package_report_from_cram( allow_reference_md5_mismatch: true, ..Default::default() }; - let manifest_output = - workspace.run_manifest_rows(manifest_path, &lookup, &participant_id, &options.filters)?; - let observations = manifest_output - .rows - .iter() - .map(|row| { - workspace.app_observation_from_manifest_row(row, &manifest_context.assay_id, None, None) - }) - .collect::, _>>()?; - let analysis_observations = - analysis_cache_observations(&manifest_output.observations, &observations); - // Analysis scripts call `bioscript.load_genotypes(input_file)` then rsid - // lookups via `genotypes.lookup_variants(plan)`. The runtime now layers a - // pre-resolved-observation cache over whatever the input file resolves - // to (Plan B in genotype/types.rs:QueryBackend::Cached), so for CRAM the - // cache hits and we skip re-walking the genome. The input bytes can be - // empty since every spec the panel/assays declared is in the cache. - let analyses = workspace.run_manifest_analyses( - manifest_path, + let analysis_runner = report_workspace::WasmReportAnalysisRunner { + workspace: &workspace, input_name, - &[], - &analysis_observations, - &participant_id, - &loader, - &options, - )?; - let artifacts = bioscript_reporting::render_input_report_artifact_texts( - bioscript_reporting::AppInputReportInput { - assay_id: &manifest_context.assay_id, + input_bytes: &[], + participant_id: &participant_id, + loader: &loader, + options: &options, + }; + let run = bioscript_reporting::run_report( + &workspace, + manifest_path, + &lookup, + &analysis_runner, + bioscript_reporting::ReportInputContext { participant_id: &participant_id, input_file_name: input_name, - input_file_path: input_name, - observations: &observations, - analyses: &analyses, - findings: &manifest_context.findings, - provenance: &manifest_context.provenance, + input_file_path, input_inspection: Some(&head_inspection), - manifest_metadata: &manifest_context.manifest_metadata, + }, + bioscript_reporting::ReportRunOptions { + filters: &options.filters, }, ) .map_err(|err| JsError::new(&err))?; - encode_report_run_output(started_ms, artifacts) + encode_report_run_output(started_ms, run.artifacts) } /// Mirrors `runPackageReportBytes` but for BAM input. The BAM body is streamed @@ -317,9 +297,14 @@ pub fn run_package_report_from_bam( }; let workspace = PackageWorkspace::new(package_files)?; let participant_id = participant_id_from_name(input_name); - let manifest_context = workspace.report_manifest_context(manifest_path)?; - let mut head_inspection = - inspect_head_via_js_reader(&bam_read_at, bam_len as u64, input_name, false); + let input_file_path = options.input_file_path.as_deref().unwrap_or(input_name); + let mut head_inspection = inspect_head_via_js_reader( + &bam_read_at, + bam_len as u64, + input_name, + &options.inspect_options(false), + false, + ); let bai_index = bioscript_formats::alignment::parse_bai_bytes(bai_bytes) .map_err(|err| JsError::new(&format!("parse bai: {err:?}")))?; @@ -330,7 +315,7 @@ pub fn run_package_report_from_bam( let lookup = BamReportLookup { reader: std::cell::RefCell::new(indexed), - label: input_name.to_owned(), + label: input_file_path.to_owned(), }; if let Some(explicit) = explicit_sex_from_options(&options) { @@ -341,42 +326,31 @@ pub fn run_package_report_from_bam( format: Some(bioscript_formats::GenotypeSourceFormat::Bam), ..Default::default() }; - let manifest_output = - workspace.run_manifest_rows(manifest_path, &lookup, &participant_id, &options.filters)?; - let observations = manifest_output - .rows - .iter() - .map(|row| { - workspace.app_observation_from_manifest_row(row, &manifest_context.assay_id, None, None) - }) - .collect::, _>>()?; - let analysis_observations = - analysis_cache_observations(&manifest_output.observations, &observations); - let analyses = workspace.run_manifest_analyses( - manifest_path, + let analysis_runner = report_workspace::WasmReportAnalysisRunner { + workspace: &workspace, input_name, - &[], - &analysis_observations, - &participant_id, - &loader, - &options, - )?; - let artifacts = bioscript_reporting::render_input_report_artifact_texts( - bioscript_reporting::AppInputReportInput { - assay_id: &manifest_context.assay_id, + input_bytes: &[], + participant_id: &participant_id, + loader: &loader, + options: &options, + }; + let run = bioscript_reporting::run_report( + &workspace, + manifest_path, + &lookup, + &analysis_runner, + bioscript_reporting::ReportInputContext { participant_id: &participant_id, input_file_name: input_name, - input_file_path: input_name, - observations: &observations, - analyses: &analyses, - findings: &manifest_context.findings, - provenance: &manifest_context.provenance, + input_file_path, input_inspection: Some(&head_inspection), - manifest_metadata: &manifest_context.manifest_metadata, + }, + bioscript_reporting::ReportRunOptions { + filters: &options.filters, }, ) .map_err(|err| JsError::new(&err))?; - encode_report_run_output(started_ms, artifacts) + encode_report_run_output(started_ms, run.artifacts) } /// Mirrors `runPackageReportBytes` but for a bgzipped, tabix-indexed VCF @@ -403,81 +377,63 @@ pub fn run_package_report_from_vcf( }; let workspace = PackageWorkspace::new(package_files)?; let participant_id = participant_id_from_name(input_name); - let manifest_context = workspace.report_manifest_context(manifest_path)?; + let input_file_path = options.input_file_path.as_deref().unwrap_or(input_name); // Inspect format/source/assembly from the head, but skip the byte-stream // sex detection — we'll do that via tabix-targeted X non-PAR queries // below, which works on indexed VCFs of any size. - let mut head_inspection = - inspect_head_via_js_reader(&vcf_read_at, vcf_len as u64, input_name, false); - // Decompress the head once to grab the VCF header lines (## meta + #CHROM - // column header) — these are needed by `infer_sex_from_text_lines` to - // figure out delimiter / column indexes for the data lines we'll pull - // via tabix below. - let head_lines = decompress_vcf_head_lines(&vcf_read_at, vcf_len as u64); - + let mut head_inspection = inspect_head_via_js_reader( + &vcf_read_at, + vcf_len as u64, + input_name, + &options.inspect_options(false), + false, + ); let tabix_index = bioscript_formats::alignment::parse_tbi_bytes(tbi_bytes) .map_err(|err| JsError::new(&format!("parse tbi: {err:?}")))?; - let vcf_reader = JsReader::new(vcf_read_at, vcf_len as u64, "vcf"); + let vcf_reader = JsReader::new(vcf_read_at.clone(), vcf_len as u64, "vcf"); let indexed = noodles::csi::io::IndexedReader::new(vcf_reader, tabix_index); let lookup = VcfReportLookup { reader: std::cell::RefCell::new(indexed), - label: input_name.to_owned(), + label: input_file_path.to_owned(), detected_assembly: head_inspection.assembly, }; if let Some(explicit) = explicit_sex_from_options(&options) { head_inspection.inferred_sex = Some(explicit); - } else if options.detect_sex { - let mut reader_borrow = lookup.reader.borrow_mut(); - if let Some(inference) = vcf_sex_via_tabix(&mut reader_borrow, &head_lines) { - head_inspection.inferred_sex = Some(inference); - } + } else if options.detect_sex + && let Some(inference) = vcf_sex_via_js_reader(vcf_read_at, vcf_len as u64, input_name) + { + head_inspection.inferred_sex = Some(inference); } let loader = GenotypeLoadOptions { format: Some(bioscript_formats::GenotypeSourceFormat::Vcf), ..Default::default() }; - let manifest_output = - workspace.run_manifest_rows(manifest_path, &lookup, &participant_id, &options.filters)?; - let observations = manifest_output - .rows - .iter() - .map(|row| { - workspace.app_observation_from_manifest_row(row, &manifest_context.assay_id, None, None) - }) - .collect::, _>>()?; - let analysis_observations = - analysis_cache_observations(&manifest_output.observations, &observations); - // Pre-resolved observation cache replaces the synth approach: analysis - // scripts hit the cache via QueryBackend::Cached and skip re-opening the - // VCF. See report_api.rs:run_package_report_from_cram for the same - // pattern and bioscript-formats::genotype::types::QueryBackend::Cached - // for the dispatch. - let analyses = workspace.run_manifest_analyses( - manifest_path, + let analysis_runner = report_workspace::WasmReportAnalysisRunner { + workspace: &workspace, input_name, - &[], - &analysis_observations, - &participant_id, - &loader, - &options, - )?; - let artifacts = bioscript_reporting::render_input_report_artifact_texts( - bioscript_reporting::AppInputReportInput { - assay_id: &manifest_context.assay_id, + input_bytes: &[], + participant_id: &participant_id, + loader: &loader, + options: &options, + }; + let run = bioscript_reporting::run_report( + &workspace, + manifest_path, + &lookup, + &analysis_runner, + bioscript_reporting::ReportInputContext { participant_id: &participant_id, input_file_name: input_name, - input_file_path: input_name, - observations: &observations, - analyses: &analyses, - findings: &manifest_context.findings, - provenance: &manifest_context.provenance, + input_file_path, input_inspection: Some(&head_inspection), - manifest_metadata: &manifest_context.manifest_metadata, + }, + bioscript_reporting::ReportRunOptions { + filters: &options.filters, }, ) .map_err(|err| JsError::new(&err))?; - encode_report_run_output(started_ms, artifacts) + encode_report_run_output(started_ms, run.artifacts) } diff --git a/rust/bioscript-wasm/src/report_api/analysis_cache.rs b/rust/bioscript-wasm/src/report_api/analysis_cache.rs deleted file mode 100644 index 64d94e9..0000000 --- a/rust/bioscript-wasm/src/report_api/analysis_cache.rs +++ /dev/null @@ -1,35 +0,0 @@ -use bioscript_core::VariantObservation; - -pub(super) fn analysis_cache_observations( - manifest_observations: &[VariantObservation], - app_observations: &[serde_json::Value], -) -> Vec { - manifest_observations - .iter() - .map(|observation| { - let mut observation = observation.clone(); - if let Some(app_observation) = matching_app_observation(&observation, app_observations) - && let Some(genotype_display) = app_observation - .get("genotype_display") - .and_then(serde_json::Value::as_str) - .filter(|value| !value.is_empty() && *value != "??") - { - observation.genotype = Some(genotype_display.to_owned()); - } - observation - }) - .collect() -} - -fn matching_app_observation<'a>( - observation: &VariantObservation, - app_observations: &'a [serde_json::Value], -) -> Option<&'a serde_json::Value> { - let matched_rsid = observation.matched_rsid.as_deref()?; - app_observations.iter().find(|app_observation| { - app_observation - .get("rsid") - .and_then(serde_json::Value::as_str) - == Some(matched_rsid) - }) -} diff --git a/rust/bioscript-wasm/src/report_helpers.rs b/rust/bioscript-wasm/src/report_helpers.rs index e3fd5a1..c088cf7 100644 --- a/rust/bioscript-wasm/src/report_helpers.rs +++ b/rust/bioscript-wasm/src/report_helpers.rs @@ -38,16 +38,6 @@ pub(super) fn encode_report_run_output( .map_err(|err| JsError::new(&format!("failed to encode report output: {err}"))) } -pub(super) fn variant_row( - path: &str, - name: &str, - tags: &[String], - observation: &VariantObservation, - participant_id: &str, -) -> BTreeMap { - bioscript_reporting::variant_row(path, name, tags, observation, participant_id) -} - pub(super) fn normalize_package_path(path: &str) -> Result { let mut out = PathBuf::new(); for component in Path::new(path).components() { diff --git a/rust/bioscript-wasm/src/report_input_inspection.rs b/rust/bioscript-wasm/src/report_input_inspection.rs index 23039dc..3dfc5d8 100644 --- a/rust/bioscript-wasm/src/report_input_inspection.rs +++ b/rust/bioscript-wasm/src/report_input_inspection.rs @@ -9,6 +9,7 @@ pub(super) fn inspect_head_via_js_reader( read_at: &js_sys::Function, total_len: u64, input_name: &str, + options: &bioscript_formats::InspectOptions, detect_sex: bool, ) -> bioscript_formats::FileInspection { use crate::js_reader::JsReader; @@ -29,12 +30,8 @@ pub(super) fn inspect_head_via_js_reader( } } buf.truncate(filled); - let opts = bioscript_formats::InspectOptions { - input_index: None, - reference_file: None, - reference_index: None, - detect_sex, - }; + let mut opts = options.clone(); + opts.detect_sex = detect_sex; match bioscript_formats::inspect_bytes(input_name, &buf, &opts) { Ok(inspection) => inspection, Err(err) => bioscript_formats::FileInspection { @@ -78,100 +75,16 @@ pub(super) fn explicit_sex_from_options( }) } -const VCF_X_NON_PAR_WINDOWS_GRCH38: &[(i64, i64)] = &[ - (10_000_000, 11_000_000), - (40_000_000, 41_000_000), - (70_000_000, 71_000_000), - (100_000_000, 101_000_000), - (130_000_000, 131_000_000), -]; -const VCF_Y_WINDOWS_GRCH38: &[(i64, i64)] = &[ - (3_500_000, 4_500_000), - (10_000_000, 11_000_000), - (15_000_000, 16_000_000), -]; - -/// Sex inference for indexed VCFs that streams only X non-PAR + Y windows -/// instead of scanning the whole file. Reuses the shared -/// `infer_sex_from_text_lines` so classification rules match the CLI. -pub(super) fn vcf_sex_via_tabix( - reader: &mut noodles::csi::io::IndexedReader< - noodles::bgzf::io::Reader, - noodles::tabix::Index, - >, - head_lines: &[String], +pub(super) fn vcf_sex_via_js_reader( + read_at: js_sys::Function, + total_len: u64, + input_name: &str, ) -> Option { - let mut lines = head_lines.to_vec(); - for chrom_label in ["X", "chrX"] { - for (start, end) in VCF_X_NON_PAR_WINDOWS_GRCH38 { - let Some(region) = build_region(chrom_label, *start, *end) else { - continue; - }; - if let Ok(query) = reader.query(®ion) { - for record_result in query { - let Ok(record) = record_result else { - continue; - }; - let line: &str = record.as_ref(); - lines.push(line.to_owned()); - } - } - } - for (start, end) in VCF_Y_WINDOWS_GRCH38 { - let y_label = if chrom_label == "X" { "Y" } else { "chrY" }; - let Some(region) = build_region(y_label, *start, *end) else { - continue; - }; - if let Ok(query) = reader.query(®ion) { - for record_result in query { - let Ok(record) = record_result else { - continue; - }; - let line: &str = record.as_ref(); - lines.push(line.to_owned()); - } - } - } - } - bioscript_formats::infer_sex_from_text_lines(&lines, bioscript_formats::DetectedKind::Vcf).ok() -} - -fn build_region(chrom: &str, start: i64, end: i64) -> Option { - use noodles::core::{Position, Region}; - let s = Position::try_from(usize::try_from(start.max(1)).ok()?).ok()?; - let e = Position::try_from(usize::try_from(end.max(start)).ok()?).ok()?; - Some(Region::new(chrom, s..=e)) -} - -/// Pull the VCF header from the bgzf head so `infer_sex_from_text_lines` can -/// resolve delimiter and column indexes for the X/Y records added via tabix. -pub(super) fn decompress_vcf_head_lines(read_at: &js_sys::Function, total_len: u64) -> Vec { - use crate::js_reader::JsReader; - use std::io::{BufRead, BufReader, Read}; - - let head_len = total_len.min(2 * 1024 * 1024); - let mut reader = JsReader::new(read_at.clone(), total_len, "vcf-head"); - let mut buf = vec![0u8; head_len as usize]; - let mut filled = 0usize; - while filled < buf.len() { - match reader.read(&mut buf[filled..]) { - Ok(0) => break, - Ok(n) => filled += n, - Err(_) => break, - } - } - buf.truncate(filled); - let cursor = std::io::Cursor::new(buf); - let mut bgzf_reader = BufReader::new(noodles::bgzf::io::Reader::new(cursor)); - let mut lines = Vec::new(); - let mut line = String::new(); - for _ in 0..1024 { - line.clear(); - match bgzf_reader.read_line(&mut line) { - Ok(0) => break, - Ok(_) => lines.push(line.trim_end_matches(['\n', '\r']).to_owned()), - Err(_) => break, - } - } - lines + let reader = crate::js_reader::JsReader::new(read_at, total_len, "vcf-sex"); + bioscript_formats::infer_sex_from_named_reader( + input_name, + reader, + bioscript_formats::DetectedKind::Vcf, + ) + .ok() } diff --git a/rust/bioscript-wasm/src/report_lookup.rs b/rust/bioscript-wasm/src/report_lookup.rs index 9eab873..8cfa6ef 100644 --- a/rust/bioscript-wasm/src/report_lookup.rs +++ b/rust/bioscript-wasm/src/report_lookup.rs @@ -19,20 +19,17 @@ pub(super) struct VcfReportLookup { pub(super) detected_assembly: Option, } -impl report_workspace::VariantLookup for VcfReportLookup { - fn lookup_variants( - &self, - specs: &[VariantSpec], - ) -> Result, RuntimeError> { +impl bioscript_reporting::ReportVariantLookup + for VcfReportLookup +{ + fn lookup_variants(&self, specs: &[VariantSpec]) -> Result, String> { let mut reader = self.reader.borrow_mut(); let mut out = Vec::with_capacity(specs.len()); for spec in specs { - out.push(observe_vcf_variant( - &mut reader, - &self.label, - spec, - self.detected_assembly, - )?); + out.push( + observe_vcf_variant(&mut reader, &self.label, spec, self.detected_assembly) + .map_err(|err| err.to_string())?, + ); } Ok(out) } diff --git a/rust/bioscript-wasm/src/report_lookup/alignment.rs b/rust/bioscript-wasm/src/report_lookup/alignment.rs index 4a3cc88..e78fb8b 100644 --- a/rust/bioscript-wasm/src/report_lookup/alignment.rs +++ b/rust/bioscript-wasm/src/report_lookup/alignment.rs @@ -1,6 +1,6 @@ use super::*; -/// Per-variant CRAM lookup that satisfies the workspace's `VariantLookup` +/// Per-variant CRAM lookup that satisfies the reporting lookup /// trait. Holds the IndexedReader in a `RefCell` so &self lookup methods can /// mutably read while still being object-safe. pub(crate) struct CramReportLookup { @@ -13,15 +13,17 @@ mod bam; pub(crate) use bam::BamReportLookup; -impl report_workspace::VariantLookup for CramReportLookup { - fn lookup_variants( - &self, - specs: &[VariantSpec], - ) -> Result, RuntimeError> { +impl bioscript_reporting::ReportVariantLookup + for CramReportLookup +{ + fn lookup_variants(&self, specs: &[VariantSpec]) -> Result, String> { let mut reader = self.reader.borrow_mut(); let mut out = Vec::with_capacity(specs.len()); for spec in specs { - out.push(observe_cram_variant(&mut reader, &self.label, spec)?); + out.push( + observe_cram_variant(&mut reader, &self.label, spec) + .map_err(|err| err.to_string())?, + ); } Ok(out) } diff --git a/rust/bioscript-wasm/src/report_lookup/alignment/bam.rs b/rust/bioscript-wasm/src/report_lookup/alignment/bam.rs index d5743db..16923d2 100644 --- a/rust/bioscript-wasm/src/report_lookup/alignment/bam.rs +++ b/rust/bioscript-wasm/src/report_lookup/alignment/bam.rs @@ -7,15 +7,17 @@ pub(crate) struct BamReportLookup { pub(crate) label: String, } -impl report_workspace::VariantLookup for BamReportLookup { - fn lookup_variants( - &self, - specs: &[VariantSpec], - ) -> Result, RuntimeError> { +impl bioscript_reporting::ReportVariantLookup + for BamReportLookup +{ + fn lookup_variants(&self, specs: &[VariantSpec]) -> Result, String> { let mut reader = self.reader.borrow_mut(); let mut out = Vec::with_capacity(specs.len()); for spec in specs { - out.push(observe_bam_variant(&mut reader, &self.label, spec)?); + out.push( + observe_bam_variant(&mut reader, &self.label, spec) + .map_err(|err| err.to_string())?, + ); } Ok(out) } diff --git a/rust/bioscript-wasm/src/report_workspace.rs b/rust/bioscript-wasm/src/report_workspace.rs index 85451a5..51e5090 100644 --- a/rust/bioscript-wasm/src/report_workspace.rs +++ b/rust/bioscript-wasm/src/report_workspace.rs @@ -2,36 +2,7 @@ use super::*; #[path = "report_workspace/analysis.rs"] mod analysis; - -/// What a manifest row walk produces: human-readable rows for the -/// observation TSV/HTML, and the underlying `VariantObservation`s ready to -/// hand to the analysis runtime as a pre-resolved cache (so analysis Python -/// scripts' `genotypes.lookup_variants(plan)` call hits cache instead of -/// re-walking the genome). -pub(super) struct ManifestRowsOutput { - pub rows: Vec>, - pub observations: Vec, -} - -/// Abstract per-variant observation source so the workspace can run against -/// either a path-based `GenotypeStore` (text/zip — bytes already in memory) -/// or a CRAM/VCF-reader-backed lookup that streams through JS-supplied -/// `readAt` callbacks. -pub(super) trait VariantLookup { - fn lookup_variants( - &self, - specs: &[VariantSpec], - ) -> Result, RuntimeError>; -} - -impl VariantLookup for GenotypeStore { - fn lookup_variants( - &self, - specs: &[VariantSpec], - ) -> Result, RuntimeError> { - GenotypeStore::lookup_variants(self, specs) - } -} +pub(crate) use analysis::WasmReportAnalysisRunner; pub(super) struct PackageWorkspace { files: BTreeMap, @@ -69,53 +40,38 @@ impl PackageWorkspace { load_variant_manifest_text(path, self.text(path)?) .map_err(|err| JsError::new(&format!("load variant {path}: {err}"))) } +} - pub(super) fn run_manifest_rows( - &self, - manifest_path: &str, - store: &dyn VariantLookup, - participant_id: &str, - filters: &[String], - ) -> Result { - let tasks = - bioscript_reporting::collect_variant_manifest_tasks(self, manifest_path, filters) - .map_err(|err| JsError::new(&err))?; - let observations = store - .lookup_variants( - &tasks - .iter() - .map(|task| task.manifest.spec.clone()) - .collect::>(), - ) - .map_err(|err| JsError::new(&format!("manifest lookup failed: {err:?}")))?; - let mut rows = Vec::with_capacity(tasks.len()); - let mut collected = Vec::with_capacity(tasks.len()); - for (task, observation) in tasks.into_iter().zip(observations) { - rows.push(variant_row( - &task.manifest_path, - &task.manifest.name, - &task.manifest.tags, - &observation, - participant_id, - )); - collected.push(observation); - } - Ok(ManifestRowsOutput { - rows, - observations: collected, - }) +impl bioscript_reporting::ManifestWorkspace for PackageWorkspace { + fn load_text(&self, path: &str) -> Result { + self.text(path) + .map(str::to_owned) + .map_err(|err| format!("{err:?}")) + } + + fn load_yaml(&self, path: &str) -> Result { + self.yaml(path).map_err(|err| format!("{err:?}")) } - pub(super) fn app_observation_from_manifest_row( + fn resolve(&self, base: &str, relative: &str) -> Result { + self.resolve(base, relative) + .map_err(|err| format!("{err:?}")) + } +} + +impl bioscript_reporting::ReportWorkspace for PackageWorkspace { + fn app_observation_from_manifest_row( &self, row: &BTreeMap, assay_id: &str, inferred_sex: Option<&SexInference>, fallback_assembly: Option, - ) -> Result { + ) -> Result { let row_path = row.get("path").cloned().unwrap_or_default(); - let manifest = self.load_variant(&row_path)?; - let value = self.yaml(&row_path)?; + let manifest = self + .load_variant(&row_path) + .map_err(|err| format!("{err:?}"))?; + let value = self.yaml(&row_path).map_err(|err| format!("{err:?}"))?; let gene = yaml_string(&value, "gene").unwrap_or_default(); Ok(bioscript_reporting::app_observation_from_manifest_row( bioscript_reporting::AppObservationInput { @@ -131,29 +87,4 @@ impl PackageWorkspace { }, )) } - - pub(super) fn report_manifest_context( - &self, - path: &str, - ) -> Result { - bioscript_reporting::load_report_manifest_context(self, path) - .map_err(|err| JsError::new(&err)) - } -} - -impl bioscript_reporting::ManifestWorkspace for PackageWorkspace { - fn load_text(&self, path: &str) -> Result { - self.text(path) - .map(str::to_owned) - .map_err(|err| format!("{err:?}")) - } - - fn load_yaml(&self, path: &str) -> Result { - self.yaml(path).map_err(|err| format!("{err:?}")) - } - - fn resolve(&self, base: &str, relative: &str) -> Result { - self.resolve(base, relative) - .map_err(|err| format!("{err:?}")) - } } diff --git a/rust/bioscript-wasm/src/report_workspace/analysis.rs b/rust/bioscript-wasm/src/report_workspace/analysis.rs index c67ae08..5020b73 100644 --- a/rust/bioscript-wasm/src/report_workspace/analysis.rs +++ b/rust/bioscript-wasm/src/report_workspace/analysis.rs @@ -1,40 +1,39 @@ use super::*; -impl PackageWorkspace { - #[allow(clippy::too_many_arguments)] - pub(crate) fn run_manifest_analyses( +pub(crate) struct WasmReportAnalysisRunner<'a> { + pub(crate) workspace: &'a PackageWorkspace, + pub(crate) input_name: &'a str, + pub(crate) input_bytes: &'a [u8], + pub(crate) participant_id: &'a str, + pub(crate) loader: &'a GenotypeLoadOptions, + pub(crate) options: &'a ReportOptionsInput, +} + +impl bioscript_reporting::ReportAnalysisRunner for WasmReportAnalysisRunner<'_> { + fn run_analysis_task( &self, - manifest_path: &str, - input_name: &str, - input_bytes: &[u8], - preloaded_observations: &[VariantObservation], - participant_id: &str, - loader: &GenotypeLoadOptions, - options: &ReportOptionsInput, - ) -> Result, JsError> { - let tasks = bioscript_reporting::collect_analysis_manifest_tasks( - self, - manifest_path, - &options.filters, - ) - .map_err(|err| JsError::new(&err))?; - let mut analyses = Vec::new(); - for task in tasks { - analyses.extend(self.run_interpretations( + task: &bioscript_reporting::AnalysisManifestTask, + _observation_rows: &[BTreeMap], + variant_observations: &[VariantObservation], + _observations: &[serde_json::Value], + ) -> Result, String> { + self.workspace + .run_interpretations( &task.manifest_path, &task.manifest_name, &task.interpretations, - input_name, - input_bytes, - preloaded_observations, - participant_id, - loader, - options, - )?); - } - Ok(analyses) + self.input_name, + self.input_bytes, + variant_observations, + self.participant_id, + self.loader, + self.options, + ) + .map_err(|err| format!("{err:?}")) } +} +impl PackageWorkspace { #[allow(clippy::too_many_arguments)] fn run_interpretations( &self, @@ -83,6 +82,11 @@ impl PackageWorkspace { observations_file.clone(), bioscript_reporting::render_analysis_observations_tsv(preloaded_observations), ); + let runtime_observations = if input_bytes.is_empty() { + preloaded_observations.to_vec() + } else { + Vec::new() + }; let mut virtual_binary_files = BTreeMap::new(); virtual_binary_files.insert(input_name.to_owned(), input_bytes.to_vec()); let limits = ResourceLimits::new() @@ -98,7 +102,7 @@ impl PackageWorkspace { loader: loader.clone(), virtual_binary_files, virtual_text_files: std::mem::take(&mut virtual_text_files), - preloaded_observations: preloaded_observations.to_vec(), + preloaded_observations: runtime_observations, }, ) .map_err(|err| JsError::new(&format!("create analysis runtime failed: {err:?}")))?;