From 0dcfe891ff2f0fc2c4d49701e5d10261799c1fc3 Mon Sep 17 00:00:00 2001 From: Madhava Jay Date: Thu, 14 May 2026 16:19:40 +1000 Subject: [PATCH 1/3] adding more coverage --- coverage.sh | 9 +- rust/bioscript-cli/src/cli_bootstrap.rs | 159 +++++++ rust/bioscript-cli/src/cli_commands.rs | 175 +++++++ rust/bioscript-cli/src/package.rs | 429 ++++++++++++++++++ rust/bioscript-cli/src/report_execution.rs | 202 +++++++++ rust/bioscript-cli/src/report_observations.rs | 98 ++++ rust/bioscript-cli/src/report_options.rs | 264 +++++++++++ rust/bioscript-cli/src/report_output.rs | 86 ++++ rust/bioscript-cli/src/report_review.rs | 204 +++++++++ rust/bioscript-formats/src/alignment.rs | 53 +++ rust/bioscript-formats/src/genotype.rs | 57 +++ .../src/genotype/cram_backend/observation.rs | 207 +++++++++ .../src/genotype/cram_backend/store.rs | 113 +++++ rust/bioscript-formats/src/inspect/sex.rs | 81 ++++ .../src/inspect/sex/alignment_depth.rs | 107 +++++ rust/bioscript-reporting/src/artifacts.rs | 97 ++++ rust/bioscript-reporting/src/html.rs | 195 ++++++++ rust/bioscript-reporting/src/manifest.rs | 110 ++++- rust/bioscript-reporting/src/matching.rs | 207 +++++++++ rust/bioscript-reporting/src/observation.rs | 168 +++++++ .../src/observation/facets.rs | 87 ++++ rust/bioscript-reporting/src/report_json.rs | 180 ++++++++ rust/bioscript-schema/src/remote_resource.rs | 119 +++++ .../bioscript-schema/src/validator_alleles.rs | 78 ++++ .../src/validator_findings.rs | 144 ++++++ rust/bioscript-schema/src/validator_load.rs | 169 +++++++ rust/bioscript-schema/src/validator_panel.rs | 181 ++++++++ .../src/validator_resources.rs | 91 ++++ rust/bioscript-schema/src/validator_roots.rs | 168 +++++++ 29 files changed, 4233 insertions(+), 5 deletions(-) diff --git a/coverage.sh b/coverage.sh index a3f9a4d..815d9cd 100755 --- a/coverage.sh +++ b/coverage.sh @@ -25,7 +25,7 @@ Usage: ./coverage.sh [--full-clean|-c] [--open] [--large] [--all-tests] [--no-li --no-lint Skip cargo fmt and clippy checks --focused-test Run one focused integration test target: file_formats, formats_lib, inspect, prepare, cli, cli_bin, schema, core, - runtime_lib, runtime_security, or runtime_resources + reporting_lib, runtime_lib, runtime_security, or runtime_resources Environment: AUTO_INSTALL_LLVM_COV=0 Do not auto-install cargo-llvm-cov @@ -83,6 +83,7 @@ PACKAGES=( bioscript-cli bioscript-core bioscript-formats + bioscript-reporting bioscript-runtime bioscript-schema ) @@ -176,12 +177,16 @@ if [[ -n "$FOCUSED_TEST" ]]; then env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-cli --bin bioscript ;; schema) + env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-schema --lib env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-schema --test validate_variants -- --nocapture --test-threads="$TEST_THREADS" ;; core) env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-core --lib env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-core --test source_size -- --nocapture --test-threads="$TEST_THREADS" ;; + reporting_lib) + env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-reporting --lib + ;; runtime_lib) env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-runtime --lib ;; @@ -206,9 +211,11 @@ else env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-formats --test prepare -- --nocapture --test-threads="$TEST_THREADS" env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-cli --test cli -- --nocapture --test-threads="$TEST_THREADS" env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-cli --bin bioscript + env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-schema --lib env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-schema --test validate_variants -- --nocapture --test-threads="$TEST_THREADS" env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-core --lib env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-core --test source_size -- --nocapture --test-threads="$TEST_THREADS" + env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-reporting --lib env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-runtime --lib env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-runtime --test security -- --nocapture --test-threads="$TEST_THREADS" env "${COV_ENV[@]}" cargo llvm-cov --no-report -p bioscript-runtime --test resources_coverage -- --nocapture --test-threads="$TEST_THREADS" diff --git a/rust/bioscript-cli/src/cli_bootstrap.rs b/rust/bioscript-cli/src/cli_bootstrap.rs index 63bae41..56612f6 100644 --- a/rust/bioscript-cli/src/cli_bootstrap.rs +++ b/rust/bioscript-cli/src/cli_bootstrap.rs @@ -417,3 +417,162 @@ fn write_timing_report(path: &PathBuf, timings: &[StageTiming]) -> Result<(), St fs::write(path, output) .map_err(|err| format!("failed to write timing report {}: {err}", path.display())) } + +#[cfg(test)] +mod cli_bootstrap_tests { + use super::*; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn temp_dir(name: &str) -> PathBuf { + let unique = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let dir = env::temp_dir().join(format!( + "bioscript-cli-bootstrap-{name}-{}-{unique}", + std::process::id() + )); + fs::create_dir_all(&dir).unwrap(); + dir + } + + #[test] + fn parse_cli_options_consumes_paths_loader_limits_and_filters() { + let options = parse_cli_options(vec![ + "script.bs".to_owned(), + "--root".to_owned(), + "root".to_owned(), + "--input-file".to_owned(), + "input.txt".to_owned(), + "--output-file".to_owned(), + "output.txt".to_owned(), + "--participant-id".to_owned(), + "p1".to_owned(), + "--trace-report".to_owned(), + "trace.tsv".to_owned(), + "--timing-report".to_owned(), + "timing.tsv".to_owned(), + "--filter".to_owned(), + "tag=pgx".to_owned(), + "--cache-dir".to_owned(), + "cache".to_owned(), + "--input-format".to_owned(), + "text".to_owned(), + "--input-index".to_owned(), + "input.idx".to_owned(), + "--reference-file".to_owned(), + "ref.fa".to_owned(), + "--reference-index".to_owned(), + "ref.fa.fai".to_owned(), + "--max-duration-ms".to_owned(), + "250".to_owned(), + "--max-memory-bytes".to_owned(), + "1024".to_owned(), + "--max-allocations".to_owned(), + "2000".to_owned(), + "--max-recursion-depth".to_owned(), + "50".to_owned(), + "--auto-index".to_owned(), + ]) + .unwrap(); + + assert_eq!(options.script_path, Some(PathBuf::from("script.bs"))); + assert_eq!(options.root, Some(PathBuf::from("root"))); + assert_eq!(options.input_file.as_deref(), Some("input.txt")); + assert_eq!(options.output_file.as_deref(), Some("output.txt")); + assert_eq!(options.participant_id.as_deref(), Some("p1")); + assert_eq!(options.trace_report, Some(PathBuf::from("trace.tsv"))); + assert_eq!(options.timing_report, Some(PathBuf::from("timing.tsv"))); + assert_eq!(options.filters, vec!["tag=pgx"]); + assert_eq!(options.cache_dir, Some(PathBuf::from("cache"))); + assert_eq!(options.loader.format, Some(GenotypeSourceFormat::Text)); + assert_eq!(options.loader.input_index, Some(PathBuf::from("input.idx"))); + assert_eq!(options.loader.reference_file, Some(PathBuf::from("ref.fa"))); + assert_eq!( + options.loader.reference_index, + Some(PathBuf::from("ref.fa.fai")) + ); + assert!(options.auto_index); + } + + #[test] + fn parse_cli_options_reports_missing_values_and_unexpected_arguments() { + for (flag, message) in [ + ("--root", "--root requires"), + ("--input-file", "--input-file requires"), + ("--output-file", "--output-file requires"), + ("--participant-id", "--participant-id requires"), + ("--trace-report", "--trace-report requires"), + ("--timing-report", "--timing-report requires"), + ("--filter", "--filter requires"), + ("--cache-dir", "--cache-dir requires"), + ("--input-format", "--input-format requires"), + ("--input-index", "--input-index requires"), + ("--reference-file", "--reference-file requires"), + ("--reference-index", "--reference-index requires"), + ("--max-duration-ms", "--max-duration-ms requires"), + ("--max-memory-bytes", "--max-memory-bytes requires"), + ("--max-allocations", "--max-allocations requires"), + ("--max-recursion-depth", "--max-recursion-depth requires"), + ] { + assert!(parse_err(vec![flag.to_owned()]).contains(message)); + } + assert!(parse_err(vec![ + "script.bs".to_owned(), + "extra.bs".to_owned(), + ]) + .contains("unexpected argument")); + assert!(parse_err(vec![ + "--input-format".to_owned(), + "bad".to_owned(), + ]) + .contains("invalid --input-format")); + assert!(parse_err(vec![ + "--max-duration-ms".to_owned(), + "bad".to_owned(), + ]) + .contains("invalid --max-duration-ms")); + } + + #[test] + fn write_timing_report_creates_parent_and_sanitizes_tabs() { + let dir = temp_dir("timing"); + let path = dir.join("nested/timing.tsv"); + write_timing_report( + &path, + &[ + StageTiming { + stage: "stage1".to_owned(), + duration_ms: 12, + detail: "a\tb".to_owned(), + }, + StageTiming { + stage: "stage2".to_owned(), + duration_ms: 0, + detail: "ok".to_owned(), + }, + ], + ) + .unwrap(); + let text = fs::read_to_string(&path).unwrap(); + assert!(text.starts_with("stage\tduration_ms\tdetail\n")); + assert!(text.contains("stage1\t12\ta b")); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn prepare_cli_indexes_noops_when_auto_index_is_disabled() { + let mut options = default_cli_options(); + let timings = prepare_cli_indexes(Path::new("."), &mut options).unwrap(); + assert!(timings.is_empty()); + assert!(options.loader.input_index.is_none()); + } + + fn parse_err(args: Vec) -> String { + match parse_cli_options(args) { + Ok(_) => panic!("expected CLI parse to fail"), + Err(err) => err, + } + } +} diff --git a/rust/bioscript-cli/src/cli_commands.rs b/rust/bioscript-cli/src/cli_commands.rs index 26f7ac4..68898ca 100644 --- a/rust/bioscript-cli/src/cli_commands.rs +++ b/rust/bioscript-cli/src/cli_commands.rs @@ -258,3 +258,178 @@ fn run_validate_assays(args: Vec) -> Result<(), String> { Ok(()) } + +#[cfg(test)] +mod cli_command_tests { + use super::*; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn temp_dir(name: &str) -> PathBuf { + let unique = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let dir = env::temp_dir().join(format!( + "bioscript-cli-command-{name}-{}-{unique}", + std::process::id() + )); + fs::create_dir_all(&dir).unwrap(); + dir + } + + fn valid_variant_yaml() -> &'static str { + r#" +schema: bioscript:variant:1.0 +version: "1.0" +name: Test variant +gene: ABC +identifiers: + rsids: [rs1] +coordinates: + grch38: + chrom: "1" + pos: 100 +alleles: + kind: snv + ref: G + alts: [A] +"# + } + + #[test] + fn prepare_and_inspect_commands_validate_arguments() { + assert!(run_prepare(vec!["--root".to_owned()]) + .unwrap_err() + .contains("--root requires")); + assert!(run_prepare(vec!["--input-file".to_owned()]) + .unwrap_err() + .contains("--input-file requires")); + assert!(run_prepare(vec![ + "--input-format".to_owned(), + "not-a-format".to_owned(), + ]) + .unwrap_err() + .contains("invalid --input-format")); + assert!(run_prepare(vec!["--unexpected".to_owned()]) + .unwrap_err() + .contains("unexpected argument")); + + assert!(run_inspect(Vec::new()).unwrap_err().contains("usage")); + assert!(run_inspect(vec!["a.txt".to_owned(), "b.txt".to_owned()]) + .unwrap_err() + .contains("unexpected argument")); + assert!(run_inspect(vec!["sample.cram".to_owned(), "--input-index".to_owned()]) + .unwrap_err() + .contains("--input-index requires")); + } + + #[test] + fn yaml_manifest_extension_matching_is_case_sensitive_by_contract() { + assert!(is_yaml_manifest(Path::new("panel.yaml"))); + assert!(is_yaml_manifest(Path::new("panel.yml"))); + assert!(!is_yaml_manifest(Path::new("panel.YAML"))); + assert!(!is_yaml_manifest(Path::new("panel.json"))); + } + + #[test] + fn validate_variants_writes_report_and_surfaces_errors() { + let dir = temp_dir("variants"); + let valid = dir.join("variant.yaml"); + let report = dir.join("reports/variant.txt"); + fs::write(&valid, valid_variant_yaml()).unwrap(); + + run_validate_variants(vec![ + valid.display().to_string(), + "--report".to_owned(), + report.display().to_string(), + ]) + .unwrap(); + assert!(fs::read_to_string(&report).unwrap().contains("files_scanned")); + + let invalid = dir.join("invalid.yaml"); + fs::write(&invalid, "schema: bioscript:variant:1.0\n").unwrap(); + let err = run_validate_variants(vec![invalid.display().to_string()]).unwrap_err(); + assert!(err.contains("validation found")); + + assert!(run_validate_variants(Vec::new()).unwrap_err().contains("usage")); + assert!(run_validate_variants(vec![valid.display().to_string(), "--report".to_owned()]) + .unwrap_err() + .contains("--report requires")); + assert!(run_validate_variants(vec![ + valid.display().to_string(), + "extra".to_owned(), + ]) + .unwrap_err() + .contains("unexpected argument")); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn validate_panels_and_assays_cover_report_and_error_paths() { + let dir = temp_dir("panels-assays"); + let variant = dir.join("variant.yaml"); + fs::write(&variant, valid_variant_yaml()).unwrap(); + + let panel = dir.join("panel.yaml"); + fs::write( + &panel, + r#" +schema: bioscript:panel:1.0 +version: "1.0" +name: Test panel +members: + - kind: variant + path: variant.yaml +"#, + ) + .unwrap(); + let panel_report = dir.join("reports/panel.txt"); + run_validate_panels(vec![ + panel.display().to_string(), + "--report".to_owned(), + panel_report.display().to_string(), + ]) + .unwrap(); + assert!(panel_report.exists()); + + let assay = dir.join("assay.yaml"); + fs::write( + &assay, + r#" +schema: bioscript:assay:1.0 +version: "1.0" +name: Test assay +members: + - kind: variant + path: variant.yaml +"#, + ) + .unwrap(); + let assay_report = dir.join("reports/assay.txt"); + run_validate_assays(vec![ + assay.display().to_string(), + "--report".to_owned(), + assay_report.display().to_string(), + ]) + .unwrap(); + assert!(assay_report.exists()); + + assert!(run_validate_panels(Vec::new()).unwrap_err().contains("usage")); + assert!(run_validate_panels(vec![panel.display().to_string(), "--report".to_owned()]) + .unwrap_err() + .contains("--report requires")); + assert!(run_validate_panels(vec![panel.display().to_string(), "extra".to_owned()]) + .unwrap_err() + .contains("unexpected argument")); + assert!(run_validate_assays(Vec::new()).unwrap_err().contains("usage")); + assert!(run_validate_assays(vec![assay.display().to_string(), "--report".to_owned()]) + .unwrap_err() + .contains("--report requires")); + assert!(run_validate_assays(vec![assay.display().to_string(), "extra".to_owned()]) + .unwrap_err() + .contains("unexpected argument")); + + fs::remove_dir_all(dir).unwrap(); + } +} diff --git a/rust/bioscript-cli/src/package.rs b/rust/bioscript-cli/src/package.rs index 9bce723..35a0ad8 100644 --- a/rust/bioscript-cli/src/package.rs +++ b/rust/bioscript-cli/src/package.rs @@ -457,3 +457,432 @@ fn is_package_release_path(path: &Path) -> bool { .and_then(|ext| ext.to_str()) .is_some_and(|ext| matches!(ext.to_ascii_lowercase().as_str(), "yaml" | "yml")) } + +#[cfg(test)] +mod package_tests { + use super::*; + use std::{ + io::Write as _, + time::{SystemTime, UNIX_EPOCH}, + }; + + fn temp_dir(name: &str) -> PathBuf { + let unique = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let dir = env::temp_dir().join(format!( + "bioscript-package-{name}-{}-{unique}", + std::process::id() + )); + fs::create_dir_all(&dir).unwrap(); + dir + } + + fn write_zip(path: &Path, entries: &[(&str, &str)]) { + let file = fs::File::create(path).unwrap(); + let mut writer = zip::ZipWriter::new(file); + let options = zip::write::SimpleFileOptions::default(); + for (name, body) in entries { + if name.ends_with('/') { + writer.add_directory(*name, options).unwrap(); + } else { + writer.start_file(*name, options).unwrap(); + writer.write_all(body.as_bytes()).unwrap(); + } + } + writer.finish().unwrap(); + } + + fn variant_manifest(name: &str) -> String { + format!( + r#" +schema: bioscript:variant:1.0 +name: {name} +gene: ABC +identifiers: + rsids: [rs1] +"# + ) + } + + #[test] + fn package_path_and_type_helpers_validate_inputs() { + assert_eq!( + checked_relative_package_path("./nested/manifest.yaml").unwrap(), + PathBuf::from("nested/manifest.yaml") + ); + assert!(checked_relative_package_path("../escape.yaml") + .unwrap_err() + .contains("escapes")); + assert!(checked_relative_package_path("/absolute.yaml") + .unwrap_err() + .contains("must be relative")); + assert!(checked_relative_package_path(".").unwrap_err().contains("empty")); + + assert!(is_allowed_package_file(Path::new("manifest.yaml"))); + assert!(is_allowed_package_file(Path::new("docs/readme.md"))); + assert!(!is_allowed_package_file(Path::new("bin/tool.sh"))); + assert!(is_package_url("https://example.test/pkg.zip")); + assert!(is_package_url("http://example.test/pkg.zip")); + assert!(is_package_zip_path(Path::new("PKG.ZIP"))); + assert!(is_package_release_path(Path::new("package.yml"))); + assert!(!is_package_release_path(Path::new("package.zip"))); + } + + #[test] + fn package_descriptor_loads_supported_shapes_and_reports_errors() { + let dir = temp_dir("descriptors"); + + fs::write(dir.join("variant.yaml"), variant_manifest("Fallback")).unwrap(); + let descriptor = load_package_descriptor(&dir).unwrap(); + assert_eq!(descriptor.entrypoint, PathBuf::from("variant.yaml")); + assert_eq!(descriptor.name, None); + fs::remove_file(dir.join("variant.yaml")).unwrap(); + + fs::write(dir.join("manifest.yaml"), variant_manifest("Direct")).unwrap(); + let descriptor = load_package_descriptor(&dir).unwrap(); + assert_eq!(descriptor.entrypoint, PathBuf::from("manifest.yaml")); + assert_eq!(descriptor.name, Some("Direct".to_owned())); + fs::remove_file(dir.join("manifest.yaml")).unwrap(); + + fs::write( + dir.join("manifest.yaml"), + r#" +schema: bioscript:package:1.0 +name: Wrapped +entrypoint: nested/panel.yaml +"#, + ) + .unwrap(); + let descriptor = load_package_descriptor(&dir).unwrap(); + assert_eq!(descriptor.entrypoint, PathBuf::from("nested/panel.yaml")); + assert_eq!(descriptor.name, Some("Wrapped".to_owned())); + + fs::write( + dir.join("manifest.yaml"), + r#" +schema: bioscript:package:1.0 +name: Missing Entrypoint +"#, + ) + .unwrap(); + assert!(descriptor_err(&dir).contains("missing entrypoint")); + + fs::write( + dir.join("manifest.yaml"), + r#" +schema: bioscript:package:1.0 +entrypoint: ../escape.yaml +"#, + ) + .unwrap(); + assert!(descriptor_err(&dir).contains("escapes")); + + fs::write(dir.join("manifest.yaml"), "schema: unsupported\n").unwrap(); + assert!(descriptor_err(&dir).contains("unsupported schema")); + + fs::write(dir.join("manifest.yaml"), "name: missing schema\n").unwrap(); + assert!(descriptor_err(&dir).contains("missing schema")); + + fs::remove_file(dir.join("manifest.yaml")).unwrap(); + fs::write( + dir.join(LEGACY_PACKAGE_DESCRIPTOR), + r#" +schema: bioscript:package:1.0 +name: Legacy +entrypoint: assay.yaml +"#, + ) + .unwrap(); + let descriptor = load_package_descriptor(&dir).unwrap(); + assert_eq!(descriptor.entrypoint, PathBuf::from("assay.yaml")); + assert_eq!(descriptor.name, Some("Legacy".to_owned())); + fs::remove_file(dir.join(LEGACY_PACKAGE_DESCRIPTOR)).unwrap(); + + assert!(descriptor_err(&dir).contains("does not contain")); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn import_package_zip_extracts_and_validates_entrypoint() { + let dir = temp_dir("import"); + let zip_path = dir.join("pkg.zip"); + write_zip( + &zip_path, + &[ + ( + "manifest.yaml", + r#" +schema: bioscript:package:1.0 +name: Test Package +entrypoint: nested/variant.yaml +"#, + ), + ("nested/", ""), + ("nested/variant.yaml", &variant_manifest("Nested")), + ("docs/readme.md", "hello"), + ], + ); + + let imported = import_package_zip(&dir, &zip_path, Some(Path::new("out"))).unwrap(); + assert_eq!(imported.name, Some("Test Package".to_owned())); + assert!(imported.entrypoint.ends_with("nested/variant.yaml")); + assert!(imported.root.join("docs/readme.md").exists()); + + let occupied = dir.join("occupied"); + fs::create_dir_all(&occupied).unwrap(); + fs::write(occupied.join("existing.txt"), "x").unwrap(); + assert!(import_err(&dir, &zip_path, Some(Path::new("occupied"))).contains("not empty")); + + let empty_output = dir.join("empty-output"); + fs::create_dir_all(&empty_output).unwrap(); + let imported_empty = + import_package_zip(&dir, &zip_path, Some(Path::new("empty-output"))).unwrap(); + assert!(imported_empty.entrypoint.ends_with("nested/variant.yaml")); + + let cached = import_package_zip(&dir, &zip_path, None).unwrap(); + assert!(cached + .root + .starts_with(dir.join(PACKAGE_CACHE_DIR).canonicalize().unwrap())); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn extract_package_zip_rejects_bad_entries_and_bad_archives() { + let dir = temp_dir("extract-errors"); + let bad_zip = dir.join("not.zip"); + fs::write(&bad_zip, "not a zip").unwrap(); + assert!(extract_package_zip(&bad_zip, &dir.join("out")) + .unwrap_err() + .contains("failed to read package zip")); + + let unsupported = dir.join("unsupported.zip"); + write_zip(&unsupported, &[("script.sh", "echo no")]); + assert!(extract_package_zip(&unsupported, &dir.join("unsupported-out")) + .unwrap_err() + .contains("unsupported extension")); + + let unsafe_path = dir.join("unsafe.zip"); + write_zip(&unsafe_path, &[("../escape.yaml", "no")]); + assert!(extract_package_zip(&unsafe_path, &dir.join("unsafe-out")) + .unwrap_err() + .contains("unsafe path")); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn import_package_zip_reports_entrypoint_resolution_and_schema_errors() { + let dir = temp_dir("import-entrypoint-errors"); + + let missing_entrypoint = dir.join("missing-entrypoint.zip"); + write_zip( + &missing_entrypoint, + &[( + "manifest.yaml", + r#" +schema: bioscript:package:1.0 +entrypoint: missing.yaml +"#, + )], + ); + assert!(import_err(&dir, &missing_entrypoint, Some(Path::new("missing-out"))) + .contains("failed to resolve package entrypoint")); + + let bad_schema = dir.join("bad-schema.zip"); + write_zip( + &bad_schema, + &[ + ( + "manifest.yaml", + r#" +schema: bioscript:package:1.0 +entrypoint: nested/custom.yaml +"#, + ), + ("nested/custom.yaml", "schema: custom:schema\nname: bad\n"), + ], + ); + assert!(import_err(&dir, &bad_schema, Some(Path::new("bad-schema-out"))) + .contains("unsupported schema")); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn package_release_manifest_resolves_local_artifacts_and_hashes() { + let dir = temp_dir("release"); + let zip_path = dir.join("pkg.zip"); + write_zip(&zip_path, &[("manifest.yaml", &variant_manifest("Release"))]); + let digest = sha256_file(&zip_path).unwrap(); + let release_path = dir.join("package.yaml"); + fs::write( + &release_path, + format!( + r#" +schema: bioscript:package-release:1.0 +artifact: + path: pkg.zip + sha256: {digest} +"# + ), + ) + .unwrap(); + + let resolved = package_zip_from_release_manifest(&dir, &release_path, None) + .unwrap() + .unwrap(); + assert_eq!(resolved, zip_path); + + fs::write( + &release_path, + r#" +schema: bioscript:package-release:1.0 +artifact: + path: pkg.zip + sha256: bad +"#, + ) + .unwrap(); + assert!(package_zip_from_release_manifest(&dir, &release_path, None) + .unwrap_err() + .contains("sha256 mismatch")); + + fs::write(&release_path, "schema: bioscript:variant:1.0\n").unwrap(); + assert!(package_zip_from_release_manifest(&dir, &release_path, None) + .unwrap() + .is_none()); + assert!(package_zip_from_release_manifest(&dir, &dir.join("missing.yaml"), None) + .unwrap() + .is_none()); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn prepare_package_entrypoint_imports_zip_and_release_manifests() { + let dir = temp_dir("prepare-entrypoint"); + let zip_path = dir.join("pkg.zip"); + write_zip(&zip_path, &[("manifest.yaml", &variant_manifest("Prepared"))]); + + let entrypoint = prepare_package_entrypoint_from_arg(&dir, &zip_path).unwrap(); + assert!(entrypoint.ends_with("manifest.yaml")); + assert!(entrypoint.starts_with(dir.join(PACKAGE_CACHE_DIR).canonicalize().unwrap())); + + let digest = sha256_file(&zip_path).unwrap(); + let release_path = dir.join("package.yaml"); + fs::write( + &release_path, + format!( + r#" +schema: bioscript:package-release:1.0 +artifact: + path: pkg.zip + sha256: {digest} +"# + ), + ) + .unwrap(); + let release_entrypoint = prepare_package_entrypoint_from_arg(&dir, &release_path).unwrap(); + assert!(release_entrypoint.ends_with("manifest.yaml")); + + let plain = dir.join("panel.yaml"); + fs::write(&plain, "schema: bioscript:panel:1.0\n").unwrap(); + assert_eq!(prepare_package_entrypoint_from_arg(&dir, &plain).unwrap(), plain); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn package_release_helpers_cover_yaml_strings_urls_and_download_validation() { + let value: serde_yaml::Value = serde_yaml::from_str("name: Example\n").unwrap(); + assert_eq!(yaml_string(&value, "name"), Some("Example".to_owned())); + assert_eq!(yaml_string(&value, "missing"), None); + assert_eq!( + join_url("https://example.test/releases/package.yaml?download=1", "pkg.zip"), + "https://example.test/releases/pkg.zip" + ); + assert_eq!( + join_url("https://cdn.test/pkg.zip", "https://other.test/pkg.zip"), + "https://other.test/pkg.zip" + ); + + let dir = temp_dir("download-validation"); + assert!(download_package_url(&dir, "http://example.test/pkg.zip") + .unwrap_err() + .contains("must use https")); + assert!(download_package_url(&dir, "https://example.test/pkg.exe") + .unwrap_err() + .contains("must point")); + let target = package_cache_target(&dir, Path::new("weird package!.zip")); + assert!(target.display().to_string().contains("weird-package-")); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn import_package_command_validates_arguments() { + assert!(run_import_package(Vec::new()).unwrap_err().contains("usage")); + assert!(run_import_package(vec!["--bad".to_owned()]) + .unwrap_err() + .contains("unexpected argument")); + assert!(run_import_package(vec!["one.zip".to_owned(), "two.zip".to_owned()]) + .unwrap_err() + .contains("unexpected argument")); + assert!(run_import_package(vec!["one.zip".to_owned(), "--root".to_owned()]) + .unwrap_err() + .contains("--root requires")); + } + + #[test] + fn import_package_command_imports_local_zip_to_output_dir() { + let dir = temp_dir("import-command"); + let zip_path = dir.join("pkg.zip"); + write_zip( + &zip_path, + &[ + ( + "manifest.yaml", + r#" +schema: bioscript:package:1.0 +name: Import Command Package +entrypoint: variant.yaml +"#, + ), + ("variant.yaml", &variant_manifest("Imported")), + ], + ); + let output = dir.join("imported"); + + run_import_package(vec![ + zip_path.display().to_string(), + "--root".to_owned(), + dir.display().to_string(), + "--output-dir".to_owned(), + output.display().to_string(), + ]) + .unwrap(); + + assert!(output.join("manifest.yaml").exists()); + assert!(output.join("variant.yaml").exists()); + + fs::remove_dir_all(dir).unwrap(); + } + + fn descriptor_err(root: &Path) -> String { + match load_package_descriptor(root) { + Ok(_) => panic!("expected package descriptor to fail"), + Err(err) => err, + } + } + + fn import_err(runtime_root: &Path, zip_path: &Path, output_dir: Option<&Path>) -> String { + match import_package_zip(runtime_root, zip_path, output_dir) { + Ok(_) => panic!("expected package import to fail"), + Err(err) => err, + } + } +} diff --git a/rust/bioscript-cli/src/report_execution.rs b/rust/bioscript-cli/src/report_execution.rs index 227b257..df67b19 100644 --- a/rust/bioscript-cli/src/report_execution.rs +++ b/rust/bioscript-cli/src/report_execution.rs @@ -244,3 +244,205 @@ fn parse_analysis_output( fn participant_id_from_path(path: &Path) -> String { bioscript_reporting::participant_id_from_path(path) } + +#[cfg(test)] +mod app_report_execution_tests { + use super::*; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn temp_dir(name: &str) -> PathBuf { + let unique = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let dir = env::temp_dir().join(format!( + "bioscript-report-execution-{name}-{}-{unique}", + std::process::id() + )); + fs::create_dir_all(&dir).unwrap(); + dir + } + + #[test] + fn path_and_analysis_output_helpers_normalize_values() { + let root = Path::new("/tmp/runtime-root"); + let nested = root.join("analysis/p1/out.json"); + assert_eq!(runtime_path_string(root, &nested), "analysis/p1/out.json"); + assert_eq!( + runtime_path_string(root, Path::new("/outside/file.txt")), + "/outside/file.txt" + ); + assert_eq!( + participant_id_from_path(Path::new("/data/sample.vcf.gz")), + "sample" + ); + + let dir = temp_dir("analysis-output"); + let json = dir.join("rows.json"); + fs::write(&json, r#"{"rows":[{"score":2,"label":"ok"}]}"#).unwrap(); + let (rows, headers) = parse_analysis_output(&json, "json").unwrap(); + assert_eq!(rows[0]["score"], 2); + assert!(headers.contains(&"score".to_owned())); + + let bad = dir.join("bad.json"); + fs::write(&bad, "{").unwrap(); + assert!(parse_analysis_output(&bad, "json") + .unwrap_err() + .contains("failed to parse analysis output")); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn run_interpretations_rejects_unsupported_kinds_before_runtime() { + let dir = temp_dir("unsupported-kind"); + let manifest = dir.join("panel.yaml"); + let input = dir.join("input.txt"); + fs::write(&input, "rsid\tgenotype\nrs1\tA/G\n").unwrap(); + let interpretation = PanelInterpretation { + id: "not-bioscript".to_owned(), + label: Some("Not BioScript".to_owned()), + kind: "python".to_owned(), + path: "analysis.py".to_owned(), + output_format: Some("json".to_owned()), + derived_from: Vec::new(), + emits: Vec::new(), + logic: None, + }; + let loader = GenotypeLoadOptions::default(); + let options = ReportAnalysisOptions { + runtime_root: &dir, + input_file: &input, + participant_id: "p1", + loader: &loader, + output_dir: &dir, + observation_rows: &[], + filters: &[], + max_duration_ms: 10, + }; + + let err = + run_interpretations_for_report(&manifest, "assay", &[interpretation], &options) + .unwrap_err(); + assert!(err.contains("unsupported kind")); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn run_manifest_rows_for_report_reads_text_input_and_variant_manifest() { + let dir = temp_dir("manifest-rows"); + let manifest = dir.join("variant.yaml"); + fs::write( + &manifest, + r#" +schema: bioscript:variant:1.0 +version: "1.0" +name: rs1 +gene: ABC +identifiers: + rsids: [rs1] +coordinates: + grch38: + chrom: "1" + pos: 100 +alleles: + kind: snv + ref: A + alts: [G] +"#, + ) + .unwrap(); + let input = dir.join("sample.txt"); + fs::write(&input, "rsid\tgenotype\nrs1\tA/G\n").unwrap(); + let loader = GenotypeLoadOptions { + format: Some(GenotypeSourceFormat::Text), + ..GenotypeLoadOptions::default() + }; + + let rows = + run_manifest_rows_for_report(&dir, &manifest, &input, "p1", &loader, &[]).unwrap(); + assert_eq!(rows.len(), 1); + assert_eq!(rows[0]["participant_id"], "p1"); + assert_eq!(rows[0]["matched_rsid"], "rs1"); + assert_eq!(rows[0]["genotype"], "AG"); + + let missing_input = dir.join("missing.txt"); + assert!(run_manifest_rows_for_report( + &dir, + &manifest, + &missing_input, + "p1", + &loader, + &[], + ) + .unwrap_err() + .contains("No such file")); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn run_interpretations_executes_bioscript_analysis_and_builds_json_output() { + let dir = temp_dir("analysis-success"); + let manifest = dir.join("assay.yaml"); + let script = dir.join("analysis.bs"); + let input = dir.join("sample.txt"); + let output = dir.join("out"); + fs::write(&input, "rsid\tgenotype\nrs1\tA/G\n").unwrap(); + fs::write( + &script, + r#" +def main(): + bioscript.write_tsv(output_file, [ + {"participant": participant_id, "score": 7, "source": input_file, "observations": observations_file} + ]) + +if __name__ == "__main__": + main() +"#, + ) + .unwrap(); + let interpretation = PanelInterpretation { + id: "score".to_owned(), + label: Some("Score".to_owned()), + kind: "bioscript".to_owned(), + path: "analysis.bs".to_owned(), + output_format: Some("tsv".to_owned()), + derived_from: Vec::new(), + emits: Vec::new(), + logic: None, + }; + let rows = [BTreeMap::from([ + ("participant_id".to_owned(), "sample".to_owned()), + ("matched_rsid".to_owned(), "rs1".to_owned()), + ("genotype".to_owned(), "AG".to_owned()), + ])]; + let loader = GenotypeLoadOptions::default(); + let options = ReportAnalysisOptions { + runtime_root: &dir, + input_file: &input, + participant_id: "sample", + loader: &loader, + output_dir: &output, + observation_rows: &rows, + filters: &[], + max_duration_ms: 1000, + }; + + let outputs = + run_interpretations_for_report(&manifest, "assay-one", &[interpretation], &options) + .unwrap(); + assert_eq!(outputs.len(), 1); + assert_eq!(outputs[0]["assay_id"], "assay-one"); + assert_eq!(outputs[0]["participant_id"], "sample"); + assert_eq!(outputs[0]["rows"][0]["score"], "7"); + let headers = outputs[0]["row_headers"].as_array().unwrap(); + assert!(headers.contains(&serde_json::Value::String("participant".to_owned()))); + assert!(output + .join("analysis/sample/score.observations.tsv") + .exists()); + + fs::remove_dir_all(dir).unwrap(); + } +} diff --git a/rust/bioscript-cli/src/report_observations.rs b/rust/bioscript-cli/src/report_observations.rs index 261dada..d4b7f65 100644 --- a/rust/bioscript-cli/src/report_observations.rs +++ b/rust/bioscript-cli/src/report_observations.rs @@ -106,3 +106,101 @@ fn variant_observed_alt_alleles(path: &Path) -> Result, String> { .map(ToOwned::to_owned) .collect()) } + +#[cfg(test)] +mod app_report_observation_tests { + use super::*; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn temp_dir(name: &str) -> PathBuf { + let unique = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let dir = env::temp_dir().join(format!( + "bioscript-report-observations-{name}-{}-{unique}", + std::process::id() + )); + fs::create_dir_all(&dir).unwrap(); + dir + } + + fn write_variant_yaml(dir: &Path, name: &str, body: &str) -> PathBuf { + let path = dir.join(name); + fs::write(&path, body).unwrap(); + path + } + + #[test] + fn manifest_helpers_extract_gene_alts_and_primary_source() { + let dir = temp_dir("variant"); + let path = write_variant_yaml( + &dir, + "variant.yaml", + r#" +schema: bioscript:variant:1.0 +name: Test variant +gene: CYP2D6 +identifiers: + rsids: [rs123] +alleles: + observed_alts: [A, T] +evidence: + references: + - label: Primary + url: https://www.ncbi.nlm.nih.gov/snp/rs123 +"#, + ); + + assert_eq!(variant_manifest_gene(&path).unwrap(), "CYP2D6"); + assert_eq!( + variant_observed_alt_alleles(&path).unwrap(), + vec!["A".to_owned(), "T".to_owned()] + ); + let source = variant_primary_source(&path).unwrap(); + assert!(source["url"].as_str().unwrap().contains("rs123")); + assert!(source_url_contains(&source, "ncbi.nlm.nih.gov/snp")); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn primary_source_falls_back_to_rsid_when_no_provenance_link_exists() { + let dir = temp_dir("rsid"); + let path = write_variant_yaml( + &dir, + "variant.yaml", + r#" +schema: bioscript:variant:1.0 +name: Test variant +identifiers: + rsids: + - rs4242 +"#, + ); + + let source = variant_primary_source(&path).unwrap(); + assert_eq!(source["label"], "dbSNP / NCBI SNP"); + assert_eq!(source["url"], "https://www.ncbi.nlm.nih.gov/snp/rs4242"); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn yaml_helpers_report_read_and_parse_errors() { + let dir = temp_dir("errors"); + let missing = dir.join("missing.yaml"); + assert!(load_yaml_value(&missing).unwrap_err().contains("failed to read YAML")); + + let bad = write_variant_yaml(&dir, "bad.yaml", "name: [unterminated"); + assert!(load_yaml_value(&bad).unwrap_err().contains("failed to parse YAML")); + assert!(variant_manifest_gene(&bad) + .unwrap_err() + .contains("failed to parse variant YAML")); + + let no_alts = write_variant_yaml(&dir, "no-alts.yaml", "gene: ABC\n"); + assert!(variant_observed_alt_alleles(&no_alts).unwrap().is_empty()); + + fs::remove_dir_all(dir).unwrap(); + } +} diff --git a/rust/bioscript-cli/src/report_options.rs b/rust/bioscript-cli/src/report_options.rs index dde0487..fdd41c7 100644 --- a/rust/bioscript-cli/src/report_options.rs +++ b/rust/bioscript-cli/src/report_options.rs @@ -384,3 +384,267 @@ fn open_html_report(path: &Path) -> Result<(), String> { )) } } + +#[cfg(test)] +mod app_report_option_tests { + use super::*; + use bioscript_core::Assembly; + use bioscript_formats::{ + DetectedKind, DetectionConfidence, FileContainer, FileInspection, InferredSex, + SexDetectionConfidence, SexInference, + }; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn args(items: &[&str]) -> std::vec::IntoIter { + items + .iter() + .map(|item| (*item).to_owned()) + .collect::>() + .into_iter() + } + + fn inspection() -> FileInspection { + FileInspection { + path: PathBuf::from("sample.vcf"), + container: FileContainer::Plain, + detected_kind: DetectedKind::Vcf, + confidence: DetectionConfidence::Authoritative, + source: None, + assembly: Some(Assembly::Grch37), + phased: Some(false), + selected_entry: None, + has_index: Some(false), + index_path: None, + reference_matches: None, + inferred_sex: Some(SexInference { + sex: InferredSex::Male, + confidence: SexDetectionConfidence::High, + method: "fixture".to_owned(), + evidence: vec!["test".to_owned()], + }), + evidence: Vec::new(), + warnings: Vec::new(), + duration_ms: 0, + } + } + + fn temp_dir(name: &str) -> PathBuf { + let unique = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let dir = env::temp_dir().join(format!( + "bioscript-app-report-{name}-{}-{unique}", + std::process::id() + )); + fs::create_dir_all(&dir).unwrap(); + dir + } + + #[test] + fn output_format_and_sample_sex_parsers_accept_aliases_and_reject_unknowns() { + assert_eq!(parse_app_output_format("tsv").unwrap(), AppOutputFormat::Tsv); + assert_eq!(parse_app_output_format("json").unwrap(), AppOutputFormat::Json); + assert_eq!( + parse_app_output_format("jsonl").unwrap(), + AppOutputFormat::Jsonl + ); + assert_eq!(parse_app_output_format("both").unwrap(), AppOutputFormat::Both); + assert!(parse_app_output_format("xml").unwrap_err().contains("unsupported")); + + assert_eq!(parse_sample_sex("male").unwrap(), InferredSex::Male); + assert_eq!(parse_sample_sex("M").unwrap(), InferredSex::Male); + assert_eq!(parse_sample_sex("female").unwrap(), InferredSex::Female); + assert_eq!(parse_sample_sex("f").unwrap(), InferredSex::Female); + assert_eq!(parse_sample_sex("unknown").unwrap(), InferredSex::Unknown); + assert_eq!(parse_sample_sex("U").unwrap(), InferredSex::Unknown); + assert!(parse_sample_sex("other").unwrap_err().contains("unsupported")); + } + + #[test] + fn cli_state_consumes_flags_and_finishes_with_normalized_paths() { + let mut state = AppReportCliState::new().unwrap(); + let mut iter = args(&[ + "panel.yaml", + "--input-file", + "sample.vcf", + "--output-dir", + "out", + "--root", + ".", + "--html", + "--filter", + "tag=pgx", + "--detect-sex", + "--sample-sex", + "female", + "--observations-format", + "both", + "--reports-format", + "json", + "--analysis-max-duration-ms", + "2500", + "--input-format", + "vcf", + "--input-index", + "sample.vcf.tbi", + "--reference-file", + "ref.fa", + "--reference-index", + "ref.fa.fai", + "--allow-md5-mismatch", + ]); + while let Some(arg) = iter.next() { + state.consume_arg(&arg, &mut iter).unwrap(); + } + + let options = state.finish().unwrap(); + assert_eq!(options.manifest_path, PathBuf::from("./panel.yaml")); + assert_eq!(options.input_files, vec![PathBuf::from("./sample.vcf")]); + assert_eq!(options.output_dir, PathBuf::from("./out")); + assert!(options.html); + assert!(!options.open_report); + assert_eq!(options.filters, vec!["tag=pgx"]); + assert_eq!(options.observations_format, AppOutputFormat::Both); + assert_eq!(options.reports_format, AppOutputFormat::Json); + assert_eq!(options.analysis_max_duration_ms, 2500); + assert!(options.detect_sex); + assert_eq!(options.sample_sex, Some(InferredSex::Female)); + assert_eq!(options.loader.format, Some(GenotypeSourceFormat::Vcf)); + assert_eq!( + options.loader.input_index, + Some(PathBuf::from("./sample.vcf.tbi")) + ); + assert!(options.loader.allow_reference_md5_mismatch); + } + + #[test] + fn cli_state_reports_required_argument_errors() { + let missing_manifest = finish_err(AppReportCliState::new().unwrap()); + assert!(missing_manifest.contains("usage: bioscript report")); + + let mut state = AppReportCliState::new().unwrap(); + let mut iter = args(&["manifest.yaml"]); + while let Some(arg) = iter.next() { + state.consume_arg(&arg, &mut iter).unwrap(); + } + assert!(finish_err(state).contains("at least one --input-file")); + + let mut state = AppReportCliState::new().unwrap(); + let mut iter = args(&["manifest.yaml", "--input-file", "sample.txt"]); + while let Some(arg) = iter.next() { + state.consume_arg(&arg, &mut iter).unwrap(); + } + assert!(finish_err(state).contains("--output-dir")); + + let mut state = AppReportCliState::new().unwrap(); + let mut iter = args(&["--analysis-max-duration-ms", "not-a-number"]); + let arg = iter.next().unwrap(); + assert!(state.consume_arg(&arg, &mut iter).unwrap_err().contains("invalid")); + } + + #[test] + fn loader_and_path_helpers_preserve_explicit_settings() { + let base = GenotypeLoadOptions { + assembly: Some(Assembly::Grch38), + inferred_sex: None, + ..GenotypeLoadOptions::default() + }; + let loader = loader_with_inspection(&base, &inspection()); + assert_eq!(loader.assembly, Some(Assembly::Grch37)); + assert_eq!(loader.inferred_sex, Some(InferredSex::Male)); + + let explicit = explicit_sample_sex_inference(InferredSex::Unknown); + assert_eq!(explicit.sex, InferredSex::Unknown); + assert_eq!(explicit.method, "explicit_sample_sex"); + + let root = Path::new("/tmp/bioscript-root"); + assert_eq!(absolutize(root, Path::new("a/b")), root.join("a/b")); + assert_eq!( + absolutize(root, Path::new("/already/absolute")), + PathBuf::from("/already/absolute") + ); + } + + #[test] + fn run_app_report_generates_outputs_for_text_input_and_explicit_sex() { + let dir = temp_dir("generate"); + let manifest = dir.join("variant.yaml"); + fs::write( + &manifest, + r#" +schema: bioscript:variant:1.0 +version: "1.0" +name: rs1 +gene: ABC +identifiers: + rsids: [rs1] +coordinates: + grch38: + chrom: "X" + pos: 100 +alleles: + kind: snv + ref: A + alts: [G] +findings: + - schema: bioscript:trait:1.0 + summary: Variant present + binding: + source: variant + variant: variant.yaml + key: outcome + value: variant +"#, + ) + .unwrap(); + let input = dir.join("sample.txt"); + fs::write(&input, "rsid\tgenotype\nrs1\tG\n").unwrap(); + let output = dir.join("out"); + + run_app_report(vec![ + manifest.display().to_string(), + "--input-file".to_owned(), + input.display().to_string(), + "--output-dir".to_owned(), + output.display().to_string(), + "--root".to_owned(), + dir.display().to_string(), + "--html".to_owned(), + "--input-format".to_owned(), + "text".to_owned(), + "--sample-sex".to_owned(), + "male".to_owned(), + "--observations-format".to_owned(), + "both".to_owned(), + "--reports-format".to_owned(), + "both".to_owned(), + ]) + .unwrap(); + + assert!(fs::read_to_string(output.join("observations.tsv")) + .unwrap() + .contains("sample")); + assert!(fs::read_to_string(output.join("observations.jsonl")) + .unwrap() + .contains("detected_sex=male")); + assert!(fs::read_to_string(output.join("reports.jsonl")) + .unwrap() + .contains("Variant present")); + assert!(fs::read_to_string(output.join("reports.json")) + .unwrap() + .contains("report-set")); + assert!(fs::read_to_string(output.join("index.html")) + .unwrap() + .contains("")); + + fs::remove_dir_all(dir).unwrap(); + } + + fn finish_err(state: AppReportCliState) -> String { + match state.finish() { + Ok(_) => panic!("expected report CLI state to fail"), + Err(err) => err, + } + } +} diff --git a/rust/bioscript-cli/src/report_output.rs b/rust/bioscript-cli/src/report_output.rs index be6e86a..460edbb 100644 --- a/rust/bioscript-cli/src/report_output.rs +++ b/rust/bioscript-cli/src/report_output.rs @@ -64,3 +64,89 @@ fn write_app_html( fs::write(output_dir.join("index.html"), out) .map_err(|err| format!("failed to write index.html: {err}")) } + +#[cfg(test)] +mod app_report_output_tests { + use super::*; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn temp_dir(name: &str) -> PathBuf { + let unique = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let dir = env::temp_dir().join(format!( + "bioscript-report-output-{name}-{}-{unique}", + std::process::id() + )); + fs::create_dir_all(&dir).unwrap(); + dir + } + + #[test] + fn writes_observation_outputs_for_each_format() { + let rows = vec![serde_json::json!({ + "participant_id": "p1", + "gene": "CYP2D6", + "genotype": "A/G" + })]; + + let dir = temp_dir("observations"); + write_app_observations(&dir, &rows, AppOutputFormat::Both).unwrap(); + let tsv = fs::read_to_string(dir.join("observations.tsv")).unwrap(); + assert!(tsv.contains("participant_id")); + assert!(tsv.contains("p1")); + assert!(fs::read_to_string(dir.join("observations.jsonl")) + .unwrap() + .contains("\"participant_id\":\"p1\"")); + assert!(!dir.join("observations.json").exists()); + + write_app_observations(&dir, &rows, AppOutputFormat::Json).unwrap(); + let json: serde_json::Value = + serde_json::from_str(&fs::read_to_string(dir.join("observations.json")).unwrap()) + .unwrap(); + assert_eq!(json["observations"][0]["genotype"], "A/G"); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn writes_analysis_reports_and_html_outputs() { + let dir = temp_dir("reports"); + let analyses = vec![serde_json::json!({"id": "analysis-1"})]; + let reports = vec![serde_json::json!({ + "participant": {"id": "p1"}, + "observations": [] + })]; + let observations = vec![serde_json::json!({"participant_id": "p1"})]; + + write_app_analyses(&dir, &analyses).unwrap(); + assert!(fs::read_to_string(dir.join("analysis.jsonl")) + .unwrap() + .contains("analysis-1")); + + write_app_reports(&dir, &reports, AppOutputFormat::Both).unwrap(); + assert!(fs::read_to_string(dir.join("reports.jsonl")) + .unwrap() + .contains("\"participant\"")); + let report_set: serde_json::Value = + serde_json::from_str(&fs::read_to_string(dir.join("reports.json")).unwrap()).unwrap(); + assert_eq!(report_set["schema"], "bioscript:report-set:1.0"); + + write_app_html(&dir, &observations, &reports).unwrap(); + assert!(fs::read_to_string(dir.join("index.html")) + .unwrap() + .contains("")); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn write_helpers_report_filesystem_errors() { + let missing_parent = env::temp_dir() + .join(format!("bioscript-missing-parent-{}", std::process::id())) + .join("out.jsonl"); + let err = write_jsonl(&missing_parent, &[]).unwrap_err(); + assert!(err.contains("failed to write")); + } +} diff --git a/rust/bioscript-cli/src/report_review.rs b/rust/bioscript-cli/src/report_review.rs index d10932c..17fbe3e 100644 --- a/rust/bioscript-cli/src/report_review.rs +++ b/rust/bioscript-cli/src/report_review.rs @@ -305,3 +305,207 @@ fn review_case_genotype_text(case: &ReviewCase) -> String { } out } + +#[cfg(test)] +mod review_report_tests { + use super::*; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn temp_dir(name: &str) -> PathBuf { + let unique = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let dir = env::temp_dir().join(format!( + "bioscript-review-report-{name}-{}-{unique}", + std::process::id() + )); + fs::create_dir_all(&dir).unwrap(); + dir + } + + #[test] + fn run_review_report_validates_required_arguments() { + assert!(run_review_report(Vec::new()).unwrap_err().contains("usage")); + assert!(run_review_report(vec!["manifest.yaml".to_owned()]) + .unwrap_err() + .contains("--cases")); + assert!(run_review_report(vec![ + "manifest.yaml".to_owned(), + "--cases".to_owned(), + "cases.yaml".to_owned(), + ]) + .unwrap_err() + .contains("--output-dir")); + assert!(run_review_report(vec![ + "manifest.yaml".to_owned(), + "--unknown".to_owned(), + ]) + .unwrap_err() + .contains("unexpected argument")); + assert!(run_review_report(vec![ + "first.yaml".to_owned(), + "second.yaml".to_owned(), + ]) + .unwrap_err() + .contains("unexpected argument")); + } + + #[test] + fn review_cases_load_labels_variants_and_null_genotypes() { + let dir = temp_dir("cases"); + let cases_path = dir.join("cases.yaml"); + fs::write( + &cases_path, + r#" +cases: + - id: c1 + label: First case + genotypes: + rs1: A/G + rs2: null + - id: c2 + variants: + rs3: C/T +"#, + ) + .unwrap(); + + let cases = load_review_cases(&cases_path).unwrap(); + assert_eq!(cases.len(), 2); + assert_eq!(cases[0].label, "First case"); + assert_eq!(cases[0].genotypes["rs1"], Some("A/G".to_owned())); + assert_eq!(cases[0].genotypes["rs2"], None); + assert_eq!(cases[1].label, "c2"); + assert_eq!(cases[1].genotypes["rs3"], Some("C/T".to_owned())); + + let text = review_case_genotype_text(&cases[0]); + assert!(text.contains("rs1\tA/G")); + assert!(!text.contains("rs2")); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn review_case_parser_reports_shape_errors() { + let missing_cases = serde_yaml::from_str::("not_cases: []").unwrap(); + let path = temp_dir("errors").join("missing-cases.yaml"); + fs::write(&path, serde_yaml::to_string(&missing_cases).unwrap()).unwrap(); + assert!(review_cases_err(&path).contains("missing cases list")); + let dir = path.parent().unwrap().to_path_buf(); + + let missing_id = serde_yaml::from_str::( + r#"{label: no id, genotypes: {rs1: A/G}}"#, + ) + .unwrap(); + assert!(review_case_err(&missing_id).contains("missing id")); + + let missing_genotypes = + serde_yaml::from_str::(r#"{id: c1}"#).unwrap(); + assert!(review_case_err(&missing_genotypes).contains("missing genotypes")); + + let bad_key = + serde_yaml::from_str::(r#"{id: c1, genotypes: {1: A/G}}"#) + .unwrap(); + assert!(review_case_err(&bad_key).contains("non-string genotype key")); + + let bad_value = + serde_yaml::from_str::(r#"{id: c1, genotypes: {rs1: [A, G]}}"#) + .unwrap(); + assert!(review_case_err(&bad_value).contains("must be string or null")); + + fs::remove_dir_all(dir).unwrap(); + } + + fn review_cases_err(path: &Path) -> String { + match load_review_cases(path) { + Ok(_) => panic!("expected review cases to fail"), + Err(err) => err, + } + } + + fn review_case_err(value: &serde_yaml::Value) -> String { + match review_case_from_yaml(value) { + Ok(_) => panic!("expected review case to fail"), + Err(err) => err, + } + } + + #[test] + fn generate_review_report_writes_observations_reports_html_and_cleans_temp_input() { + let dir = temp_dir("generate"); + let manifest = dir.join("variant.yaml"); + fs::write( + &manifest, + r#" +schema: bioscript:variant:1.0 +version: "1.0" +name: rs1 +gene: ABC +identifiers: + rsids: [rs1] +coordinates: + grch38: + chrom: "1" + pos: 100 +alleles: + kind: snv + ref: A + alts: [G] +findings: + - schema: bioscript:trait:1.0 + summary: Variant present + binding: + source: variant + variant: variant.yaml + key: outcome + value: variant +provenance: + sources: + - kind: database + label: Fixture + url: https://example.test/rs1 +"#, + ) + .unwrap(); + let cases = dir.join("cases.yaml"); + fs::write( + &cases, + r#" +cases: + - id: case1 + label: Case One + genotypes: + rs1: A/G + - id: case2 + genotypes: + rs1: null +"#, + ) + .unwrap(); + let output = dir.join("out"); + let options = ReviewReportOptions { + manifest_path: manifest, + cases_path: cases, + output_dir: output.clone(), + root: dir.clone(), + html: true, + filters: Vec::new(), + }; + + generate_review_report(&options).unwrap(); + + assert!(fs::read_to_string(output.join("observations.tsv")) + .unwrap() + .contains("case1")); + let reports = fs::read_to_string(output.join("reports.jsonl")).unwrap(); + assert!(reports.contains("\"review_case\"")); + assert!(reports.contains("\"case1\"")); + assert!(fs::read_to_string(output.join("index.html")) + .unwrap() + .contains("")); + assert!(!output.join(".review-temp").exists()); + + fs::remove_dir_all(dir).unwrap(); + } +} diff --git a/rust/bioscript-formats/src/alignment.rs b/rust/bioscript-formats/src/alignment.rs index 382626f..60ac7fb 100644 --- a/rust/bioscript-formats/src/alignment.rs +++ b/rust/bioscript-formats/src/alignment.rs @@ -309,6 +309,30 @@ mod tests { .to_string() .contains("tabix index") ); + assert!( + parse_bai_bytes(b"not a bai") + .unwrap_err() + .to_string() + .contains("BAM index") + ); + assert!( + generate_cram_crai_bytes(b"not a cram") + .unwrap_err() + .to_string() + .contains("failed to read CRAM header") + ); + assert!( + generate_bam_bai_bytes(b"not a bam") + .unwrap_err() + .to_string() + .contains("failed to read BAM header") + ); + assert!( + generate_vcf_tbi_bytes(b"not a bgzipped vcf") + .unwrap_err() + .to_string() + .contains("failed to read VCF header") + ); assert!( build_reference_repository(Path::new("/definitely/missing/reference.fa")) .unwrap_err() @@ -348,6 +372,21 @@ mod tests { assert!(!is_reference_md5_mismatch(&err)); } + #[test] + fn alignment_helpers_generate_and_parse_fasta_index_bytes() { + let fasta_bytes = b">chr1\nACGT\n>chr2\nAACCGG\n"; + let fai_bytes = generate_fasta_fai_bytes(fasta_bytes).unwrap(); + let fai_text = String::from_utf8(fai_bytes.clone()).unwrap(); + assert!(fai_text.contains("chr1\t4\t6\t4\t5")); + assert!(fai_text.contains("chr2\t6")); + + let index = parse_fai_bytes(&fai_bytes).unwrap(); + let _repository = build_reference_repository_from_readers( + std::io::BufReader::new(std::io::Cursor::new(fasta_bytes.to_vec())), + index, + ); + } + #[test] fn alignment_path_and_reader_wrappers_stream_mini_cram_records() { let dir = mini_fixtures_dir(); @@ -406,4 +445,18 @@ mod tests { .unwrap(); assert_eq!(raw_seen, 2); } + + #[test] + fn alignment_helpers_generate_crai_from_mini_cram_fixture() { + let dir = mini_fixtures_dir(); + let cram = dir.join("mini.cram"); + let crai_bytes = generate_cram_crai_bytes(&std::fs::read(&cram).unwrap()).unwrap(); + let index = parse_crai_bytes(&crai_bytes).unwrap(); + assert!(!index.is_empty()); + + let crai_reader_bytes = + generate_cram_crai_reader(File::open(&cram).unwrap()).expect("reader CRAI"); + let index_from_reader = parse_crai_bytes(&crai_reader_bytes).unwrap(); + assert_eq!(index_from_reader.len(), index.len()); + } } diff --git a/rust/bioscript-formats/src/genotype.rs b/rust/bioscript-formats/src/genotype.rs index ff58d03..b11bd18 100644 --- a/rust/bioscript-formats/src/genotype.rs +++ b/rust/bioscript-formats/src/genotype.rs @@ -565,6 +565,63 @@ mod tests { } } + #[test] + fn genotype_public_cache_wrappers_and_empty_store_cover_lookup_contracts() { + let fallback = GenotypeStore::from_bytes("fallback.txt", b"rsid\tgenotype\nrs2\tCC\n") + .unwrap(); + let cached_observation = VariantObservation { + matched_rsid: Some("rs1".to_owned()), + genotype: Some("AG".to_owned()), + ..VariantObservation::default() + }; + let cached = + GenotypeStore::with_cached_observations(vec![cached_observation.clone()], fallback); + assert_eq!(cached.backend_name(), "cached"); + assert_eq!(cached.get("rs1").unwrap().as_deref(), Some("AG")); + assert_eq!(cached.get("rs2").unwrap().as_deref(), Some("CC")); + let observations = cached + .lookup_variants(&[ + VariantSpec { + rsids: vec!["rs1".to_owned()], + ..VariantSpec::default() + }, + VariantSpec { + rsids: vec!["rs2".to_owned()], + ..VariantSpec::default() + }, + ]) + .unwrap(); + assert_eq!(observations[0].genotype.as_deref(), Some("AG")); + assert_eq!(observations[1].genotype.as_deref(), Some("CC")); + + let required = GenotypeStore::with_required_cached_observations( + vec![cached_observation], + GenotypeStore::empty(), + ); + assert_eq!(required.get("rs1").unwrap().as_deref(), Some("AG")); + assert!( + required + .get("rs-missing") + .unwrap_err() + .to_string() + .contains("required preloaded genotype observation missing") + ); + assert!( + required + .lookup_variant(&VariantSpec { + rsids: vec!["rs-missing".to_owned()], + ..VariantSpec::default() + }) + .unwrap_err() + .to_string() + .contains("required preloaded genotype observation missing") + ); + + let empty = GenotypeStore::empty(); + assert!(empty.get("rs-any").unwrap().is_none()); + assert_eq!(empty.backend_name(), "text"); + } + #[test] fn genotype_private_helpers_cover_assembly_sorting_and_decision_rules() { let variant = variant_with_loci(); diff --git a/rust/bioscript-formats/src/genotype/cram_backend/observation.rs b/rust/bioscript-formats/src/genotype/cram_backend/observation.rs index 1618e15..70b9e96 100644 --- a/rust/bioscript-formats/src/genotype/cram_backend/observation.rs +++ b/rust/bioscript-formats/src/genotype/cram_backend/observation.rs @@ -372,3 +372,210 @@ impl CramBackend { ) } } + +#[cfg(test)] +mod tests { + use std::{fs, path::PathBuf}; + + use super::*; + use crate::genotype::GenotypeLoadOptions; + + fn fixtures_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures") + } + + fn locus() -> GenomicLocus { + GenomicLocus { + chrom: "chr_test".to_owned(), + start: 1000, + end: 1000, + } + } + + fn backend() -> CramBackend { + let dir = fixtures_dir(); + CramBackend { + path: dir.join("mini.cram"), + options: GenotypeLoadOptions { + input_index: Some(dir.join("mini.cram.crai")), + reference_file: Some(dir.join("mini.fa")), + ..GenotypeLoadOptions::default() + }, + } + } + + fn open_reader() -> cram::io::indexed_reader::IndexedReader { + let dir = fixtures_dir(); + let reference = dir.join("mini.fa"); + let repository = alignment::build_reference_repository(&reference).unwrap(); + let index = alignment::parse_crai_bytes(&fs::read(dir.join("mini.cram.crai")).unwrap()) + .unwrap(); + alignment::build_cram_indexed_reader_from_reader( + fs::File::open(dir.join("mini.cram")).unwrap(), + index, + repository, + ) + .unwrap() + } + + #[test] + fn observe_with_reader_dispatches_snp_deletion_indel_and_unsupported_kind() { + let backend = backend(); + let locus = locus(); + + let mut reader = open_reader(); + let snp = VariantSpec { + rsids: vec!["mini_snp".to_owned()], + reference: Some("A".to_owned()), + alternate: Some("C".to_owned()), + kind: Some(VariantKind::Snp), + ..VariantSpec::default() + }; + let observation = backend + .observe_with_reader(&mut reader, "mini.cram", &snp, Assembly::Grch38, &locus) + .unwrap(); + assert_eq!(observation.backend, "cram"); + assert_eq!(observation.matched_rsid.as_deref(), Some("mini_snp")); + assert_eq!(observation.genotype.as_deref(), Some("AA")); + assert_eq!(observation.ref_count, Some(50)); + assert_eq!(observation.alt_count, Some(0)); + assert_eq!(observation.depth, Some(50)); + + let mut reader = open_reader(); + let deletion = VariantSpec { + rsids: vec!["mini_del".to_owned()], + reference: Some("I".to_owned()), + alternate: Some("D".to_owned()), + kind: Some(VariantKind::Deletion), + deletion_length: Some(1), + ..VariantSpec::default() + }; + let observation = backend + .observe_with_reader( + &mut reader, + "mini.cram", + &deletion, + Assembly::Grch38, + &locus, + ) + .unwrap(); + assert_eq!(observation.genotype.as_deref(), Some("II")); + assert_eq!(observation.ref_count, Some(50)); + assert_eq!(observation.alt_count, Some(0)); + assert!(observation.evidence[0].contains("observed deletion anchor")); + + let mut reader = open_reader(); + let indel = VariantSpec { + rsids: vec!["mini_indel".to_owned()], + reference: Some("A".to_owned()), + alternate: Some("AT".to_owned()), + kind: Some(VariantKind::Insertion), + ..VariantSpec::default() + }; + let observation = backend + .observe_with_reader(&mut reader, "mini.cram", &indel, Assembly::Grch38, &locus) + .unwrap(); + assert_eq!(observation.matched_rsid.as_deref(), Some("mini_indel")); + assert_eq!(observation.genotype.as_deref(), Some("AA")); + + let mut reader = open_reader(); + let err = backend + .observe_with_reader( + &mut reader, + "mini.cram", + &VariantSpec { + kind: Some(VariantKind::Other), + ..VariantSpec::default() + }, + Assembly::Grch38, + &locus, + ) + .unwrap_err(); + assert!(err.to_string().contains("does not yet support")); + } + + #[test] + fn observe_with_reader_reports_required_variant_fields() { + let backend = backend(); + let locus = locus(); + + let mut reader = open_reader(); + let err = backend + .observe_with_reader( + &mut reader, + "mini.cram", + &VariantSpec { + alternate: Some("C".to_owned()), + kind: Some(VariantKind::Snp), + ..VariantSpec::default() + }, + Assembly::Grch38, + &locus, + ) + .unwrap_err(); + assert!(err.to_string().contains("SNP variant requires ref")); + + let mut reader = open_reader(); + let err = backend + .observe_with_reader( + &mut reader, + "mini.cram", + &VariantSpec { + reference: Some("A".to_owned()), + kind: Some(VariantKind::Snp), + ..VariantSpec::default() + }, + Assembly::Grch38, + &locus, + ) + .unwrap_err(); + assert!(err.to_string().contains("SNP variant requires alt")); + + let mut reader = open_reader(); + let err = backend + .observe_with_reader( + &mut reader, + "mini.cram", + &VariantSpec { + kind: Some(VariantKind::Deletion), + ..VariantSpec::default() + }, + Assembly::Grch38, + &locus, + ) + .unwrap_err(); + assert!(err.to_string().contains("deletion_length")); + + let mut reader = open_reader(); + let err = backend + .observe_with_reader( + &mut reader, + "mini.cram", + &VariantSpec { + alternate: Some("AT".to_owned()), + kind: Some(VariantKind::Indel), + ..VariantSpec::default() + }, + Assembly::Grch38, + &locus, + ) + .unwrap_err(); + assert!(err.to_string().contains("indel variant requires ref")); + + let mut reader = open_reader(); + let err = backend + .observe_with_reader( + &mut reader, + "mini.cram", + &VariantSpec { + reference: Some("A".to_owned()), + kind: Some(VariantKind::Indel), + ..VariantSpec::default() + }, + Assembly::Grch38, + &locus, + ) + .unwrap_err(); + assert!(err.to_string().contains("indel variant requires alt")); + } +} diff --git a/rust/bioscript-formats/src/genotype/cram_backend/store.rs b/rust/bioscript-formats/src/genotype/cram_backend/store.rs index 955e33a..b57abe7 100644 --- a/rust/bioscript-formats/src/genotype/cram_backend/store.rs +++ b/rust/bioscript-formats/src/genotype/cram_backend/store.rs @@ -228,3 +228,116 @@ fn cram_lookup_worker_count(job_count: usize) -> usize { .unwrap_or_else(|| available.min(DEFAULT_MAX_CRAM_WORKERS)) .min(job_count) } + +#[cfg(test)] +mod tests { + use std::path::{Path, PathBuf}; + + use bioscript_core::{GenomicLocus, VariantKind}; + + use super::*; + use crate::genotype::GenotypeLoadOptions; + + fn backend(reference_file: Option) -> CramBackend { + CramBackend { + path: PathBuf::from("sample.cram"), + options: GenotypeLoadOptions { + reference_file, + ..GenotypeLoadOptions::default() + }, + } + } + + #[test] + fn cram_store_reports_missing_reference_and_unsupported_locus_details() { + let variant = VariantSpec { + rsids: vec!["rs-test".to_owned()], + kind: Some(VariantKind::Snp), + reference: Some("A".to_owned()), + alternate: Some("C".to_owned()), + ..VariantSpec::default() + }; + + let err = backend(None).lookup_variant(&variant).unwrap_err(); + let message = err.to_string(); + assert!(message.contains("without --reference-file")); + assert!(message.contains("sample.cram")); + + let observation = backend(Some(PathBuf::from("ref.fa"))) + .unsupported_locus_observation(&variant, Path::new("ref.fa")); + assert_eq!(observation.backend, "cram"); + assert_eq!(observation.matched_rsid.as_deref(), Some("rs-test")); + assert_eq!(observation.evidence.len(), 1); + assert!(observation.evidence[0].contains("needs GRCh37/GRCh38 coordinates")); + } + + #[test] + fn cram_store_rejects_variants_without_coordinates_before_opening_cram() { + let mut options = GenotypeLoadOptions::default(); + options.reference_file = Some(PathBuf::from("ref.fa")); + options.reference_index = Some(PathBuf::from("ref.fa.fai")); + options.input_index = Some(PathBuf::from("sample.cram.crai")); + let store = CramBackend { + path: PathBuf::from("sample.cram"), + options, + }; + + let err = store + .lookup_variant(&VariantSpec { + rsids: vec!["rs-coordinate-only".to_owned()], + kind: Some(VariantKind::Snp), + reference: Some("A".to_owned()), + alternate: Some("C".to_owned()), + ..VariantSpec::default() + }) + .unwrap_err(); + + let message = err.to_string(); + assert!(message.contains("not only rsIDs")); + assert!(message.contains("reference index ref.fa.fai")); + assert!(message.contains("input index sample.cram.crai")); + } + + #[test] + fn cram_lookup_worker_count_bounds_requested_and_available_workers() { + assert_eq!(cram_lookup_worker_count(0), 1); + assert_eq!(cram_lookup_worker_count(1), 1); + + unsafe { + env::set_var("BIOSCRIPT_CRAM_THREADS", "2"); + } + assert_eq!(cram_lookup_worker_count(8), 2); + assert_eq!(cram_lookup_worker_count(1), 1); + + unsafe { + env::set_var("BIOSCRIPT_CRAM_THREADS", "999"); + } + assert_eq!(cram_lookup_worker_count(3), 3); + + unsafe { + env::set_var("BIOSCRIPT_CRAM_THREADS", "0"); + } + assert!(cram_lookup_worker_count(2) >= 1); + + unsafe { + env::remove_var("BIOSCRIPT_CRAM_THREADS"); + } + } + + #[test] + fn cram_lookup_variant_rejects_unsupported_kind_after_locus_resolution() { + let store = backend(Some(PathBuf::from("GRCh38.fa"))); + let err = store + .lookup_variant(&VariantSpec { + grch38: Some(GenomicLocus { + chrom: "chr1".to_owned(), + start: 10, + end: 10, + }), + kind: Some(VariantKind::Other), + ..VariantSpec::default() + }) + .unwrap_err(); + assert!(err.to_string().contains("does not yet support")); + } +} diff --git a/rust/bioscript-formats/src/inspect/sex.rs b/rust/bioscript-formats/src/inspect/sex.rs index 22a23ab..b00c846 100644 --- a/rust/bioscript-formats/src/inspect/sex.rs +++ b/rust/bioscript-formats/src/inspect/sex.rs @@ -455,6 +455,22 @@ fn select_sex_detection_zip_entry( #[cfg(test)] mod tests { use super::*; + use std::io::Write as _; + + fn zip_bytes(entries: &[(&str, &str)]) -> Vec { + let cursor = Cursor::new(Vec::new()); + let mut writer = zip::ZipWriter::new(cursor); + let options = zip::write::SimpleFileOptions::default(); + for (name, body) in entries { + if name.ends_with('/') { + writer.add_directory(*name, options).unwrap(); + } else { + writer.start_file(*name, options).unwrap(); + writer.write_all(body.as_bytes()).unwrap(); + } + } + writer.finish().unwrap().into_inner() + } #[test] fn y_fingerprint_detects_male_and_female_text_exports() { @@ -627,4 +643,69 @@ mod tests { .any(|item| item == "y_to_x_pct=30.00") ); } + + #[test] + fn sex_inference_bytes_and_zip_paths_cover_entry_selection_and_unsupported_kinds() { + let text = "rsid\tchromosome\tposition\tgenotype\nrs11575897\tY\t1\tG\n"; + let unsupported = + infer_sex_from_bytes("sample.txt", text.as_bytes(), DetectedKind::ReferenceFasta) + .unwrap(); + assert_eq!(unsupported.sex, InferredSex::Unknown); + assert_eq!(unsupported.method, "unsupported_source_type"); + + let result = infer_sex_from_bytes("sample.txt", text.as_bytes(), DetectedKind::GenotypeText) + .unwrap(); + assert_eq!(result.method, "snp_array_x_y_fingerprint"); + + let archive = zip_bytes(&[ + ("__MACOSX/._sample.txt", "ignored"), + ("notes.md", "ignored"), + ("nested/sample.txt", text), + ]); + let result = + infer_sex_from_zip_bytes(&archive, "nested/sample.txt", DetectedKind::GenotypeText) + .unwrap(); + assert_eq!(result.method, "snp_array_x_y_fingerprint"); + + let err = infer_sex_from_zip_bytes(&archive, "missing.txt", DetectedKind::GenotypeText) + .unwrap_err(); + assert!(err.to_string().contains("failed to open zip entry missing.txt")); + + let bad_zip = infer_sex_from_zip_bytes(b"not a zip", "sample.txt", DetectedKind::GenotypeText) + .unwrap_err(); + assert!(bad_zip.to_string().contains("failed to read zip bytes")); + + let mut zip = ZipArchive::new(Cursor::new(archive)).unwrap(); + assert_eq!( + select_sex_detection_zip_entry(&mut zip).unwrap(), + "nested/sample.txt" + ); + + let unsupported_zip = zip_bytes(&[("docs/readme.md", "ignored")]); + let mut zip = ZipArchive::new(Cursor::new(unsupported_zip)).unwrap(); + let err = select_sex_detection_zip_entry(&mut zip).unwrap_err(); + assert!(err.to_string().contains("does not contain a supported sex detection input")); + } + + #[test] + fn sex_inference_reader_probe_and_late_stream_paths_handle_vcf_edges() { + let mut text = String::new(); + text.push_str("chrY\tbad\t.\tC\tT\t.\tPASS\t.\tGT\t1\n"); + text.push_str("chrM\t1\t.\tC\tT\t.\tPASS\t.\tGT\t1\n"); + for idx in 0..70 { + let gt = if idx % 2 == 0 { "0|1" } else { "0|0" }; + text.push_str(&format!( + "23\t{}\t.\tC\tT\t.\tPASS\t.\tGT\t{gt}:99\n", + 3_000_000 + idx + )); + } + text.push_str("24\t1\t.\tC\tT\t.\tPASS\t.\tGT\t.\n"); + text.push_str("chrX\t60000\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n"); + text.push_str("chrX\t155000000\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n"); + + let result = infer_sex_from_bytes("sample.vcf", text.as_bytes(), DetectedKind::Vcf).unwrap(); + assert_eq!(result.sex, InferredSex::Female); + assert!(result.evidence.iter().any(|item| item == "x_non_par_sites=70")); + assert!(result.evidence.iter().any(|item| item == "x_het_gt_sites=35")); + } } diff --git a/rust/bioscript-formats/src/inspect/sex/alignment_depth.rs b/rust/bioscript-formats/src/inspect/sex/alignment_depth.rs index 74954ad..51dfb9c 100644 --- a/rust/bioscript-formats/src/inspect/sex/alignment_depth.rs +++ b/rust/bioscript-formats/src/inspect/sex/alignment_depth.rs @@ -265,3 +265,110 @@ fn ratio_to_autosome(value: f64, autosome_mean: f64) -> f64 { value / autosome_mean } } + +#[cfg(test)] +mod tests { + use super::*; + use std::{fs, path::PathBuf}; + + fn stats(autosome_records: usize, x_records: usize, y_records: usize) -> AlignmentSexStats { + AlignmentSexStats { + autosome_windows: 8, + autosome_records, + x_windows: 8, + x_records, + y_windows: 8, + y_records, + } + } + + #[test] + fn classifies_alignment_depth_patterns() { + let female = classify_alignment_stats(&stats(800, 760, 10)); + assert_eq!(female.sex, InferredSex::Female); + assert_eq!(female.confidence, SexDetectionConfidence::High); + assert!(female + .evidence + .iter() + .any(|value| value == "x_to_autosome_ratio=0.950")); + + let male = classify_alignment_stats(&stats(800, 360, 120)); + assert_eq!(male.sex, InferredSex::Male); + assert_eq!(male.confidence, SexDetectionConfidence::High); + + let medium_female = classify_alignment_stats(&stats(800, 620, 160)); + assert_eq!(medium_female.sex, InferredSex::Female); + assert_eq!(medium_female.confidence, SexDetectionConfidence::Medium); + + let medium_male = classify_alignment_stats(&stats(800, 560, 40)); + assert_eq!(medium_male.sex, InferredSex::Male); + assert_eq!(medium_male.confidence, SexDetectionConfidence::Medium); + + let low_depth = classify_alignment_stats(&stats(16, 8, 8)); + assert_eq!(low_depth.sex, InferredSex::Unknown); + assert_eq!(low_depth.confidence, SexDetectionConfidence::Low); + } + + #[test] + fn alignment_depth_math_handles_empty_and_saturating_inputs() { + assert_eq!(mean_records(10, 0), 0.0); + assert_eq!(mean_records(20, 4), 5.0); + assert!(mean_records(usize::MAX, 1) > 4_000_000_000.0); + assert_eq!(ratio_to_autosome(10.0, 0.0), 0.0); + assert_eq!(ratio_to_autosome(5.0, 10.0), 0.5); + } + + #[test] + fn non_cram_or_missing_reference_reports_unsupported_or_unknown() { + let non_cram = infer_sex_from_alignment_path( + Path::new("sample.bam"), + &InspectOptions::default(), + DetectedKind::AlignmentBam, + ) + .unwrap(); + assert_eq!(non_cram.sex, InferredSex::Unknown); + assert_eq!(non_cram.method, "unsupported_source_type"); + + let missing_reference = infer_sex_from_alignment_path( + Path::new("sample.cram"), + &InspectOptions::default(), + DetectedKind::AlignmentCram, + ) + .unwrap(); + assert_eq!(missing_reference.sex, InferredSex::Unknown); + assert_eq!(missing_reference.method, "alignment_y_x_coverage"); + assert!(missing_reference.evidence[0].contains("--reference-file")); + } + + fn fixtures_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures") + } + + #[test] + fn alignment_depth_path_and_reader_entrypoints_report_missing_standard_contigs() { + let dir = fixtures_dir(); + let cram = dir.join("mini.cram"); + let reference = dir.join("mini.fa"); + let index = dir.join("mini.cram.crai"); + let options = InspectOptions { + input_index: Some(index.clone()), + reference_file: Some(reference.clone()), + ..InspectOptions::default() + }; + + let err = infer_sex_from_alignment_path(&cram, &options, DetectedKind::AlignmentCram) + .unwrap_err(); + assert!(err.to_string().contains("does not contain contig")); + + let repository = alignment::build_reference_repository(&reference).unwrap(); + let crai = alignment::parse_crai_bytes(&fs::read(index).unwrap()).unwrap(); + let mut reader = alignment::build_cram_indexed_reader_from_reader( + fs::File::open(cram).unwrap(), + crai, + repository, + ) + .unwrap(); + let err = infer_sex_from_alignment_reader(&mut reader, "mini.cram", true).unwrap_err(); + assert!(err.to_string().contains("does not contain contig")); + } +} diff --git a/rust/bioscript-reporting/src/artifacts.rs b/rust/bioscript-reporting/src/artifacts.rs index 1590cba..27fd3de 100644 --- a/rust/bioscript-reporting/src/artifacts.rs +++ b/rust/bioscript-reporting/src/artifacts.rs @@ -63,3 +63,100 @@ pub fn json_field_as_tsv(value: Option<&serde_json::Value>) -> String { Some(value) => value.to_string().replace(['\t', '\n'], " "), } } + +#[cfg(test)] +mod tests { + use serde_json::json; + + use crate::AppInputReportInput; + + use super::{ + json_field_as_tsv, render_input_report_artifact_texts, render_jsonl, + render_observations_tsv, render_report_artifact_texts, standard_text_output, + }; + + fn observation() -> serde_json::Value { + json!({ + "participant_id": "P001", + "assay_id": "assay", + "assay_version": "1.0", + "variant_key": "rs123", + "rsid": "rs123", + "call_status": "called", + "outcome": "variant" + }) + } + + #[test] + fn artifact_renderers_emit_standard_files() { + let observations = vec![observation()]; + let analyses = vec![json!({"analysis_id": "a1", "rows": []})]; + let reports = vec![json!({ + "schema": "bioscript:report:1.0", + "participant_id": "P001", + "manifest": {"name": "panel"}, + "input": {"file_name": "sample.txt"}, + "analyses": analyses, + "findings": [], + "provenance": [] + })]; + + let artifacts = render_report_artifact_texts(&observations, &analyses, &reports).unwrap(); + + assert!(artifacts.observations_tsv.starts_with("participant_id\t")); + assert!(artifacts.analysis_jsonl.contains("\"analysis_id\":\"a1\"")); + assert!(artifacts.reports_jsonl.contains("bioscript:report:1.0")); + assert!(artifacts.html.contains("BioScript")); + assert_eq!(artifacts.text_output, standard_text_output()); + } + + #[test] + fn input_report_artifact_renderer_builds_report_json() { + let observations = vec![observation()]; + let analyses = Vec::new(); + let findings = Vec::new(); + let provenance = Vec::new(); + let manifest = json!({"name": "panel"}); + + let artifacts = render_input_report_artifact_texts(AppInputReportInput { + assay_id: "assay", + participant_id: "P001", + input_file_name: "sample.txt", + input_file_path: "/data/sample.txt", + observations: &observations, + analyses: &analyses, + findings: &findings, + provenance: &provenance, + input_inspection: None, + manifest_metadata: &manifest, + }) + .unwrap(); + + assert!(artifacts.reports_jsonl.contains("\"report_status\":\"complete\"")); + assert!(artifacts.html.contains("panel")); + } + + #[test] + fn low_level_serializers_escape_tsv_and_jsonl_rows() { + let observations = vec![json!({ + "participant_id": "P\t001", + "assay_id": "assay", + "variant_key": "rs123", + "call_status": "called", + "facets": {"note": "a\nb"} + })]; + + let tsv = render_observations_tsv(&observations); + assert!(tsv.contains("P 001")); + assert!(!tsv.contains("P\t001")); + assert_eq!( + json_field_as_tsv(Some(&json!("line\tbreak\nvalue"))), + "line break value" + ); + assert_eq!(json_field_as_tsv(Some(&serde_json::Value::Null)), ""); + assert_eq!(json_field_as_tsv(None), ""); + + let jsonl = render_jsonl(&[json!({"a": 1}), json!({"b": true})]).unwrap(); + assert_eq!(jsonl.lines().count(), 2); + } +} diff --git a/rust/bioscript-reporting/src/html.rs b/rust/bioscript-reporting/src/html.rs index 608f1b3..db23e8e 100644 --- a/rust/bioscript-reporting/src/html.rs +++ b/rust/bioscript-reporting/src/html.rs @@ -78,3 +78,198 @@ pub fn render_app_html_document( out.push_str(""); Ok(out) } + +#[cfg(test)] +mod tests { + use serde_json::json; + + use super::render_app_html_document; + + fn observation(participant_id: &str, outcome: &str, call_status: &str) -> serde_json::Value { + json!({ + "participant_id": participant_id, + "assay_id": "pgx-panel", + "assay_version": "1.0", + "variant_key": "rs123", + "variant_path": "variants/rs123.yaml", + "rsid": "rs123", + "gene": "CYP2C19", + "assembly": "grch38", + "chrom": "10", + "pos_start": 94781859, + "pos_end": 94781859, + "kind": "snv", + "ref": "G", + "alt": "A", + "match_status": if call_status == "called" { "matched" } else { "not_found" }, + "coverage_status": if call_status == "called" { "covered" } else { "not_covered" }, + "call_status": call_status, + "genotype": "G/A", + "genotype_display": "G/A", + "zygosity": "heterozygous", + "ref_count": 12, + "alt_count": 9, + "depth": 21, + "genotype_quality": 99, + "allele_balance": 0.43, + "outcome": outcome, + "evidence_type": "vcf", + "evidence_raw": if outcome == "reference" { + "imputed reference genotype from absent variant-only VCF record" + } else { + "consumer genotype weak indel match" + }, + "match_quality": if outcome == "variant" { "weak" } else { "strong" }, + "match_notes": "reported by fixture", + "facets": {"clinical": "example"}, + "source": { + "label": "dbSNP", + "url": "https://www.ncbi.nlm.nih.gov/snp/rs123" + } + }) + } + + fn analysis(participant_id: &str) -> serde_json::Value { + json!({ + "analysis_id": "cyp2c19-summary", + "analysis_label": "CYP2C19 Summary", + "participant_id": participant_id, + "derived_from": ["variants/rs123.yaml"], + "logic": { + "description": "Classify star allele status.", + "source": { + "name": "CPIC", + "url": "https://cpicpgx.org/" + } + }, + "emits": [ + {"key": "participant_id", "label": "Participant"}, + {"key": "metabolizer_status", "label": "Metabolizer"}, + {"key": "source_url", "label": "Source"} + ], + "row_headers": ["metabolizer_status", "source_url", "notes"], + "rows": [ + { + "participant_id": participant_id, + "metabolizer_status": "variant", + "source_url": "https://example.test/source", + "notes": "Use clinical context." + }, + { + "participant_id": participant_id, + "metabolizer_status": "normal", + "source_url": "https://example.test/normal", + "report_notes": "Second note." + } + ] + }) + } + + fn report(participant_id: &str) -> serde_json::Value { + let matched_observation = observation(participant_id, "variant", "called"); + json!({ + "schema": "bioscript:report:1.0", + "participant_id": participant_id, + "assay_id": "pgx-panel", + "manifest": { + "schema": "bioscript:panel:1.0", + "version": "1.0", + "name": "pgx-panel", + "label": "PGx Panel", + "tags": ["pgx", "demo"], + "members": [ + {"kind": "variant", "path": "variants/rs123.yaml", "version": "1.0"}, + {"kind": "assay", "path": "assays/cyp2c19.yaml", "version": "1.0"} + ] + }, + "input": { + "file_name": format!("{participant_id}.vcf"), + "file_path": format!("/data/{participant_id}.vcf"), + "debug": { + "format": "vcf", + "format_confidence": "authoritative", + "assembly": "grch38", + "vcf_missing_reference_imputation": true, + "source": { + "vendor": "Example", + "platform_version": "v1" + }, + "inferred_sex": { + "sex": "female", + "confidence": "high", + "method": "x_het" + }, + "evidence": ["header", "genotypes"] + } + }, + "analyses": [analysis(participant_id)], + "findings": [ + { + "schema": "bioscript:pgx-label:1.0", + "pgx_action_level": "Testing Required", + "prescribing_actions": ["Dose adjustment"], + "regulatory_sources": ["FDA"], + "prescribing_information": "Consider alternative therapy.", + "drugs": [{"name": "clopidogrel"}], + "evidence": {"url": "https://example.test/label"}, + "matched_observation": matched_observation + }, + { + "schema": "bioscript:pgx-summary:1.0", + "evidence_level": "1A", + "phenotype_categories": ["efficacy"], + "phenotypes": ["response"], + "drugs": [{"name": "clopidogrel"}], + "matched_effect": {"label": "A carrier", "text": "Reduced activation."}, + "matched_observation": observation(participant_id, "reference", "called"), + "evidence": {"url": "https://example.test/summary"} + } + ], + "provenance": [ + { + "kind": "database", + "label": "dbSNP", + "url": "https://www.ncbi.nlm.nih.gov/snp/rs123", + "fields": ["identifiers.rsids"] + }, + { + "kind": "guideline", + "label": "CPIC", + "url": "https://cpicpgx.org/" + } + ] + }) + } + + #[test] + fn render_app_html_document_covers_report_sections() { + let observations = vec![ + observation("P001", "variant", "called"), + observation("P002", "reference", "called"), + observation("P002", "unknown", "missing"), + ]; + let reports = vec![report("P001"), report("P002")]; + + let html = render_app_html_document(&observations, &reports).unwrap(); + + assert!(html.contains("PGx Panel")); + assert!(html.contains("participant-filter")); + assert!(html.contains("observations-table")); + assert!(html.contains("analysis-table-0")); + assert!(html.contains("pgx-variant-table")); + assert!(html.contains("pgx-drug-table-0")); + assert!(html.contains("dbSNP")); + assert!(html.contains("manifest-members-table")); + assert!(html.contains("Raw Reports JSON")); + } + + #[test] + fn render_app_html_document_handles_empty_inputs() { + let html = render_app_html_document(&[], &[]).unwrap(); + + assert!(html.contains("BioScript Report")); + assert!(html.contains("No input metadata.")); + assert!(html.contains("No analysis outputs.")); + assert!(html.contains("No provenance links.")); + } +} diff --git a/rust/bioscript-reporting/src/manifest.rs b/rust/bioscript-reporting/src/manifest.rs index 87baadc..8993abc 100644 --- a/rust/bioscript-reporting/src/manifest.rs +++ b/rust/bioscript-reporting/src/manifest.rs @@ -8,6 +8,7 @@ use bioscript_schema::{ load_variant_manifest_text, }; +#[path = "manifest_provenance.rs"] mod provenance; pub use provenance::{collect_manifest_provenance_entries, load_manifest_provenance_links}; @@ -512,9 +513,10 @@ mod tests { use super::{ ExecutableAssayMember, ExecutablePanelMember, ManifestWorkspace, ReportManifestKind, assay_executable_member, assay_executable_member_path, collect_analysis_manifest_tasks, - collect_variant_manifest_tasks, matches_analysis_path_filters, + collect_variant_manifest_tasks, load_manifest_findings, load_report_manifest_context, + matches_analysis_path_filters, matches_variant_manifest_filters, panel_executable_member, panel_executable_member_path, - report_assay_id, report_manifest_kind, report_manifest_schema, + report_assay_id, report_manifest_kind, report_manifest_metadata, report_manifest_schema, resolve_filesystem_manifest_path, traversable_manifest_member_paths, }; @@ -536,8 +538,8 @@ mod tests { } } - struct MapWorkspace { - files: BTreeMap, + pub(super) struct MapWorkspace { + pub(super) files: BTreeMap, } impl ManifestWorkspace for MapWorkspace { @@ -653,6 +655,32 @@ alleles: ); } + #[test] + fn report_manifest_metadata_collects_tags_members_and_fallback_label() { + let workspace = InlineWorkspace { + yaml: r#" +schema: bioscript:panel:1.0 +version: "1.0" +name: pgx-panel +tags: [pgx, cardiology, 7] +members: + - kind: variant + path: rs1.yaml + version: "1" + - kind: assay + path: assay.yaml + - not: a-member +"#, + }; + let metadata = report_manifest_metadata(&workspace, "panel.yaml").unwrap(); + assert_eq!(metadata["schema"], "bioscript:panel:1.0"); + assert_eq!(metadata["label"], "pgx-panel"); + assert_eq!(metadata["tags"], serde_json::json!(["pgx", "cardiology"])); + assert_eq!(metadata["members"][0]["kind"], "variant"); + assert_eq!(metadata["members"][0]["path"], "rs1.yaml"); + assert_eq!(metadata["members"][0]["version"], "1"); + } + #[test] fn report_assay_id_rejects_unsupported_schema() { let workspace = InlineWorkspace { @@ -812,6 +840,80 @@ analyses: .collect::>(), vec!["rs1.yaml", "assets/APOE/rs2.yaml"] ); + + let assay_tasks = + collect_variant_manifest_tasks(&workspace, "assets/APOE/assay.yaml", &[]).unwrap(); + assert_eq!(assay_tasks.len(), 1); + assert_eq!(assay_tasks[0].manifest_path, "assets/APOE/rs2.yaml"); + + let variant_tasks = collect_variant_manifest_tasks(&workspace, "rs1.yaml", &[]).unwrap(); + assert_eq!(variant_tasks.len(), 1); + assert_eq!(variant_tasks[0].manifest.name, "rs1"); + } + + #[test] + fn manifest_context_and_findings_follow_includes_members_and_inherited_bindings() { + let workspace = MapWorkspace { + files: BTreeMap::from([ + ( + "panel.yaml".to_owned(), + r#" +schema: bioscript:panel:1.0 +version: "1.0" +name: panel +findings: + - include: included.yaml + binding: + source: variant + variant: rs1.yaml + key: outcome + value: variant + - schema: bioscript:trait:1.0 + summary: panel direct +members: + - kind: variant + path: rs1.yaml +"# + .to_owned(), + ), + ( + "included.yaml".to_owned(), + r#" +schema: bioscript:pgx-findings:1.0 +version: "1.0" +rsid: rs1 +findings: + - schema: bioscript:trait:1.0 + summary: included inherited + - schema: bioscript:trait:1.0 + summary: included own effects + effects: [] +"# + .to_owned(), + ), + ("rs1.yaml".to_owned(), variant_yaml("rs1", 1, "keep")), + ]), + }; + + let findings = load_manifest_findings(&workspace, "panel.yaml").unwrap(); + assert!(findings + .iter() + .any(|finding| finding["summary"] == "panel direct")); + let inherited = findings + .iter() + .find(|finding| finding["summary"] == "included inherited") + .unwrap(); + assert_eq!(inherited["binding"]["variant"], "rs1.yaml"); + let own_effects = findings + .iter() + .find(|finding| finding["summary"] == "included own effects") + .unwrap(); + assert!(own_effects.get("binding").is_none()); + + let context = load_report_manifest_context(&workspace, "panel.yaml").unwrap(); + assert_eq!(context.assay_id, "panel"); + assert_eq!(context.manifest_metadata["name"], "panel"); + assert!(!context.findings.is_empty()); } #[test] diff --git a/rust/bioscript-reporting/src/matching.rs b/rust/bioscript-reporting/src/matching.rs index bb66823..0978afc 100644 --- a/rust/bioscript-reporting/src/matching.rs +++ b/rust/bioscript-reporting/src/matching.rs @@ -526,4 +526,211 @@ mod report_matching_tests { }); assert!(app_variant_binding_match_observation(&two_x_binding, &observations).is_some()); } + + #[test] + fn match_app_findings_matches_variant_effects_and_deduplicates_evidence() { + let findings = vec![ + serde_json::json!({ + "schema": "bioscript:finding:1.0", + "label": "Repeated", + "evidence": {"source": "db", "kind": "guideline", "id": "cpic-1"}, + "effects": [ + { + "id": "effect-a", + "binding": { + "source": "variant", + "variant": "variants/rs1.yaml", + "key": "outcome", + "value": "variant" + } + }, + { + "id": "effect-a", + "binding": { + "source": "variant", + "variant": "rs1.yaml", + "key": "outcome", + "value": "variant" + } + } + ] + }) + ]; + let observations = vec![serde_json::json!({ + "participant_id": "p1", + "variant_key": "rs1", + "variant_path": "variants/rs1.yaml", + "rsid": "rs1", + "gene": "ABC", + "ref": "A", + "alt": "G", + "genotype_display": "AG", + "zygosity": "het", + "outcome": "variant" + })]; + + let matched = match_app_findings(&findings, &observations, &[]); + assert_eq!(matched.len(), 1); + assert_eq!(matched[0]["matched"], true); + assert_eq!(matched[0]["matched_effect"]["id"], "effect-a"); + assert_eq!(matched[0]["matched_observation"]["participant_id"], "p1"); + assert!(matched[0].get("effects").is_none()); + } + + #[test] + fn match_app_findings_matches_analysis_bindings_with_alias_and_value_types() { + let findings = vec![ + serde_json::json!({ + "id": "analysis-finding", + "binding": { + "source": "analysis", + "analysis": "star-allele", + "key": "score", + "operator": "in", + "values": [1, 2, 3] + } + }), + serde_json::json!({ + "id": "non-match", + "binding": { + "source": "analysis", + "analysis_id": "other", + "key": "score", + "value": 2 + } + }) + ]; + let analyses = vec![ + serde_json::json!({ + "participant_id": "p1", + "assay_id": "assay", + "analysis_id": "star-allele", + "rows": [ + {"score": 2, "label": "ok"}, + {"score": 5, "label": "skip"} + ] + }), + serde_json::json!({ + "participant_id": "p1", + "assay_id": "assay", + "analysis_id": "other", + "rows": "not rows" + }) + ]; + + let matched = match_app_findings(&findings, &[], &analyses); + assert_eq!(matched.len(), 1); + assert_eq!(matched[0]["matched_analysis"]["analysis_id"], "star-allele"); + assert_eq!(matched[0]["matched_analysis"]["key"], "score"); + assert_eq!(matched[0]["matched_analysis"]["value"], 2); + } + + #[test] + fn binding_helpers_cover_value_dosage_reference_and_dedupe_edges() { + let observation = serde_json::json!({ + "variant_key": "rs1", + "variant_path": "nested/rs1.yaml", + "rsid": "rs1", + "ref": "A", + "alt": "G", + "genotype_display": "AG", + "zygosity": "het" + }); + assert!(!app_variant_ref_mismatch( + &serde_json::json!({"variant": "rs1.yaml"}), + &observation + )); + assert!(app_variant_ref_mismatch( + &serde_json::json!({"path": "rs2.yaml"}), + &observation + )); + + assert_eq!(app_observation_allele_dosage(&observation, "A"), Some(1)); + assert_eq!(app_observation_allele_dosage(&observation, "G"), Some(1)); + assert_eq!(app_observation_allele_dosage(&observation, "T"), Some(0)); + assert_eq!(app_observation_allele_dosage(&observation, "DEL"), None); + assert_eq!(app_observation_chromosome_count(&observation), Some(2)); + + assert!(app_binding_matches_value( + Some(&serde_json::json!(true)), + &serde_json::json!({"value": true}) + )); + assert!(app_binding_matches_value( + Some(&serde_json::json!(42)), + &serde_json::json!({"operator": "in", "values": ["41", 42]}) + )); + assert!(!app_binding_matches_value( + Some(&serde_json::json!({"object": true})), + &serde_json::json!({"value": "true"}) + )); + assert_eq!( + app_binding_expected_values(&serde_json::json!({ + "value": "A", + "values": ["B", 3, false, {"ignored": true}] + })), + vec!["A", "B", "3", "false"] + ); + + assert!(app_binding_matches_dosage( + Some(2), + &serde_json::json!({"operator": "dosage_in", "values": [1, 2]}) + )); + assert!(!app_binding_matches_dosage( + None, + &serde_json::json!({"operator": "dosage_equals", "value": 0}) + )); + + assert_eq!( + app_finding_dedupe_key(&serde_json::json!({ + "evidence": {"url": "https://example.test/evidence"}, + "matched_effect": {"label": "effect"} + })), + "evidence_url|https://example.test/evidence|effect" + ); + assert_eq!( + app_finding_dedupe_key(&serde_json::json!({ + "schema": "s", + "label": "l", + "notes": "n", + "matched_effect": {"text": "t"} + })), + "content|s|l|n|t" + ); + } + + #[test] + fn variant_binding_rejects_missing_keys_and_unsupported_operators() { + let observations = vec![serde_json::json!({ + "variant_path": "rs1.yaml", + "ref": "A", + "alt": "G", + "genotype_display": "AG", + "zygosity": "het" + })]; + assert!(app_variant_binding_match_observation( + &serde_json::json!({"source": "variant"}), + &observations + ) + .is_none()); + assert!(app_variant_binding_match_observation( + &serde_json::json!({ + "source": "variant", + "key": "alt", + "operator": "unknown", + "value": "G" + }), + &observations + ) + .is_none()); + assert!(app_variant_binding_match_observation( + &serde_json::json!({ + "source": "variant", + "allele": "", + "operator": "dosage_equals", + "value": 1 + }), + &observations + ) + .is_none()); + } } diff --git a/rust/bioscript-reporting/src/observation.rs b/rust/bioscript-reporting/src/observation.rs index 38e4ced..5db3cd7 100644 --- a/rust/bioscript-reporting/src/observation.rs +++ b/rust/bioscript-reporting/src/observation.rs @@ -598,4 +598,172 @@ mod tests { ("0".to_owned(), "hem_ref".to_owned()) ); } + + fn manifest(kind: VariantKind, chrom: &str, reference: &str, alternate: &str) -> VariantManifest { + VariantManifest { + path: PathBuf::from("variants/rs1.yaml"), + name: "rs1".to_owned(), + tags: vec!["tag:test".to_owned()], + spec: VariantSpec { + rsids: vec!["rs1".to_owned()], + grch37: Some(bioscript_core::GenomicLocus { + chrom: chrom.to_owned(), + start: 10, + end: 10, + }), + grch38: Some(bioscript_core::GenomicLocus { + chrom: chrom.to_owned(), + start: 20, + end: 20, + }), + reference: Some(reference.to_owned()), + alternate: Some(alternate.to_owned()), + kind: Some(kind), + ..VariantSpec::default() + }, + } + } + + fn base_row() -> BTreeMap { + BTreeMap::from([ + ("participant_id".to_owned(), "p1".to_owned()), + ("matched_rsid".to_owned(), "rs1".to_owned()), + ("backend".to_owned(), "text".to_owned()), + ("assembly".to_owned(), "grch38".to_owned()), + ("genotype".to_owned(), "AG".to_owned()), + ("ref_count".to_owned(), "8".to_owned()), + ("alt_count".to_owned(), "7".to_owned()), + ("depth".to_owned(), "15".to_owned()), + ("evidence".to_owned(), "fixture".to_owned()), + ]) + } + + #[test] + fn app_observation_json_covers_called_variant_fields() { + let row = base_row(); + let observation = app_observation_from_manifest_row(AppObservationInput { + row: &row, + row_path: "variants/rs1.yaml", + assay_id: "assay", + manifest: manifest(VariantKind::Snp, "1", "A", "G"), + gene: "ABC".to_owned(), + source: serde_json::json!({"kind": "database"}), + observed_alt_alleles: Vec::new(), + inferred_sex: None, + fallback_assembly: None, + }); + + assert_eq!(observation["participant_id"], "p1"); + assert_eq!(observation["assay_id"], "assay"); + assert_eq!(observation["variant_key"], "rs1"); + assert_eq!(observation["assembly"], "GRCH38"); + assert_eq!(observation["pos_start"], 20); + assert_eq!(observation["genotype"], "0/1"); + assert_eq!(observation["zygosity"], "het"); + assert_eq!(observation["outcome"], "variant"); + assert_eq!(observation["allele_balance"], serde_json::json!(7.0 / 15.0)); + assert_eq!(observation["facets"], serde_json::Value::Null); + } + + #[test] + fn app_observation_json_covers_no_call_not_covered_and_fallback_assembly() { + let mut no_call = BTreeMap::from([ + ("participant_id".to_owned(), "p2".to_owned()), + ("backend".to_owned(), "text".to_owned()), + ("depth".to_owned(), "0".to_owned()), + ]); + let observation = app_observation_from_manifest_row(AppObservationInput { + row: &no_call, + row_path: "variants/rs1.yaml", + assay_id: "assay", + manifest: manifest(VariantKind::Snp, "1", "A", "G"), + gene: "ABC".to_owned(), + source: serde_json::Value::Null, + observed_alt_alleles: Vec::new(), + inferred_sex: None, + fallback_assembly: Some(Assembly::Grch37), + }); + assert_eq!(observation["assembly"], "GRCH37"); + assert_eq!(observation["pos_start"], 10); + assert_eq!(observation["match_status"], "not_found"); + assert_eq!(observation["coverage_status"], "not_covered"); + assert_eq!(observation["call_status"], "no_call"); + assert_eq!(observation["genotype_display"], "??"); + assert_eq!(observation["outcome"], "not_covered"); + + no_call.insert("depth".to_owned(), "12".to_owned()); + let observation = app_observation_from_manifest_row(AppObservationInput { + row: &no_call, + row_path: "variants/rs1.yaml", + assay_id: "assay", + manifest: manifest(VariantKind::Snp, "1", "A", "G"), + gene: "ABC".to_owned(), + source: serde_json::Value::Null, + observed_alt_alleles: Vec::new(), + inferred_sex: None, + fallback_assembly: Some(Assembly::Grch38), + }); + assert_eq!(observation["outcome"], "no_call"); + } + + #[test] + fn app_observation_json_covers_non_reportable_and_sex_evidence_paths() { + let inferred_sex = SexInference { + sex: InferredSex::Male, + confidence: SexDetectionConfidence::Medium, + method: "fixture".to_owned(), + evidence: vec!["signal=present".to_owned()], + }; + let mut row = base_row(); + row.insert("genotype".to_owned(), "TT".to_owned()); + let observation = app_observation_from_manifest_row(AppObservationInput { + row: &row, + row_path: "variants/rsx.yaml", + assay_id: "assay", + manifest: manifest(VariantKind::Snp, "X", "A", "G"), + gene: "ABC".to_owned(), + source: serde_json::Value::Null, + observed_alt_alleles: vec!["T".to_owned()], + inferred_sex: Some(&inferred_sex), + fallback_assembly: None, + }); + + assert_eq!(observation["outcome"], "observed_alt"); + assert_eq!(observation["call_status"], "observed_alt"); + assert_eq!(observation["facets"], "observed_alt;known_observed_alts=T"); + assert!(observation["evidence_raw"] + .as_str() + .unwrap() + .contains("detected_sex=male")); + } + + #[test] + fn app_observation_json_covers_raw_counts_and_weak_indel_match() { + let mut row = BTreeMap::from([ + ("participant_id".to_owned(), "p3".to_owned()), + ("matched_rsid".to_owned(), "rs1".to_owned()), + ("backend".to_owned(), "zip".to_owned()), + ("raw_counts".to_owned(), r#"{"D": 8, "I": 6}"#.to_owned()), + ("depth".to_owned(), "14".to_owned()), + ]); + row.insert("genotype".to_owned(), "ID".to_owned()); + let observation = app_observation_from_manifest_row(AppObservationInput { + row: &row, + row_path: "variants/rs1.yaml", + assay_id: "assay", + manifest: manifest(VariantKind::Deletion, "22", "TTATAA", ""), + gene: "APOL1".to_owned(), + source: serde_json::Value::Null, + observed_alt_alleles: Vec::new(), + inferred_sex: None, + fallback_assembly: Some(Assembly::Grch38), + }); + assert_eq!(observation["kind"], "deletion"); + assert_eq!(observation["genotype"], "0/1"); + assert_eq!(observation["match_quality"], "weak"); + assert!(observation["match_notes"] + .as_str() + .unwrap() + .contains("insertion/deletion token")); + } } diff --git a/rust/bioscript-reporting/src/observation/facets.rs b/rust/bioscript-reporting/src/observation/facets.rs index 40a8951..bb39e6f 100644 --- a/rust/bioscript-reporting/src/observation/facets.rs +++ b/rust/bioscript-reporting/src/observation/facets.rs @@ -79,3 +79,90 @@ pub(super) fn observation_facets( pub(super) fn parse_optional_u32(value: Option<&String>) -> Option { value.and_then(|value| value.parse::().ok()) } + +#[cfg(test)] +mod tests { + use super::*; + use bioscript_core::VariantSpec; + use std::path::PathBuf; + + fn deletion_manifest(reference: &str) -> VariantManifest { + VariantManifest { + path: PathBuf::from("variant.yaml"), + name: "Deletion".to_owned(), + tags: Vec::new(), + spec: VariantSpec { + kind: Some(VariantKind::Deletion), + reference: Some(reference.to_owned()), + alternate: Some("".to_owned()), + ..VariantSpec::default() + }, + } + } + + #[test] + fn non_reportable_allele_classifier_distinguishes_known_and_unknown_alts() { + assert_eq!( + classify_non_reportable_alleles("A/T", "A", "G", &["T".to_owned()]), + Some("observed_alt") + ); + assert_eq!( + classify_non_reportable_alleles("A/C", "A", "G", &["T".to_owned()]), + Some("unknown_alt") + ); + assert_eq!( + classify_non_reportable_alleles("", "A", "G", &["T".to_owned()]), + None + ); + assert_eq!( + classify_non_reportable_alleles("A/T", "AT", "G", &["T".to_owned()]), + None + ); + assert_eq!( + classify_non_reportable_alleles("A/T", "A", "GT", &["T".to_owned()]), + None + ); + } + + #[test] + fn weak_delimited_indel_match_requires_text_deletion_shape() { + let manifest = deletion_manifest("TTATAA"); + let mut row = BTreeMap::new(); + row.insert("backend".to_owned(), "text".to_owned()); + assert!(is_weak_delimited_indel_match(&row, &manifest, "ID")); + assert!(is_weak_delimited_indel_match(&row, &manifest, "D/D")); + assert!(!is_weak_delimited_indel_match(&row, &manifest, "AG")); + + row.insert("backend".to_owned(), "cram".to_owned()); + assert!(!is_weak_delimited_indel_match(&row, &manifest, "ID")); + + row.insert("backend".to_owned(), "zip".to_owned()); + assert!(!is_weak_delimited_indel_match( + &row, + &deletion_manifest("A"), + "ID" + )); + let mut snv = deletion_manifest("TTATAA"); + snv.spec.kind = Some(VariantKind::Snp); + assert!(!is_weak_delimited_indel_match(&row, &snv, "ID")); + } + + #[test] + fn observation_facets_and_optional_integer_parsing_cover_edges() { + assert_eq!(observation_facets(None, &[]), serde_json::Value::Null); + assert_eq!( + observation_facets(Some("unknown_alt"), &[]), + serde_json::Value::String("unknown_alt".to_owned()) + ); + assert_eq!( + observation_facets(Some("observed_alt"), &["T".to_owned(), "C".to_owned()]), + serde_json::Value::String("observed_alt;known_observed_alts=T,C".to_owned()) + ); + + let good = "42".to_owned(); + let bad = "not-a-number".to_owned(); + assert_eq!(parse_optional_u32(Some(&good)), Some(42)); + assert_eq!(parse_optional_u32(Some(&bad)), None); + assert_eq!(parse_optional_u32(None), None); + } +} diff --git a/rust/bioscript-reporting/src/report_json.rs b/rust/bioscript-reporting/src/report_json.rs index e3c5e44..f4e7604 100644 --- a/rust/bioscript-reporting/src/report_json.rs +++ b/rust/bioscript-reporting/src/report_json.rs @@ -179,3 +179,183 @@ fn sex_detection_confidence_name(value: bioscript_formats::SexDetectionConfidenc bioscript_formats::SexDetectionConfidence::Low => "low", } } + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use bioscript_core::Assembly; + use bioscript_formats::{ + DetectedKind, DetectionConfidence, FileContainer, FileInspection, InferredSex, + SexDetectionConfidence, SexInference, SourceMetadata, + }; + use serde_json::json; + + use super::{AppInputReportInput, AppReportJsonInput, app_input_report_json, app_report_json}; + + fn inspection() -> FileInspection { + FileInspection { + path: PathBuf::from("sample.vcf"), + container: FileContainer::Zip, + detected_kind: DetectedKind::Vcf, + confidence: DetectionConfidence::Authoritative, + source: Some(SourceMetadata { + vendor: Some("ExampleVendor".to_owned()), + platform_version: Some("v1".to_owned()), + confidence: DetectionConfidence::StrongHeuristic, + evidence: vec!["filename".to_owned()], + }), + assembly: Some(Assembly::Grch38), + phased: Some(true), + selected_entry: Some("sample.vcf".to_owned()), + has_index: Some(true), + index_path: Some(PathBuf::from("sample.vcf.tbi")), + reference_matches: Some(true), + inferred_sex: Some(SexInference { + sex: InferredSex::Female, + confidence: SexDetectionConfidence::High, + method: "x_het".to_owned(), + evidence: vec!["x heterozygosity".to_owned()], + }), + evidence: vec!["vcf header".to_owned()], + warnings: vec!["demo warning".to_owned()], + duration_ms: 42, + } + } + + fn observation(call_status: &str, evidence_raw: &str) -> serde_json::Value { + json!({ + "variant_key": "rs123", + "call_status": call_status, + "outcome": if call_status == "called" { "variant" } else { "unknown" }, + "evidence_raw": evidence_raw, + "rsid": "rs123", + "gene": "CYP2C19", + "ref": "G", + "alt": "A", + "genotype_display": "G/A", + "zygosity": "het" + }) + } + + #[test] + fn app_report_json_counts_sites_and_serializes_input_debug() { + let observations = vec![ + observation("called", "observed genotype"), + observation( + "missing", + "imputed reference genotype from absent variant-only VCF record", + ), + ]; + let analyses = vec![json!({"analysis_id": "a1"})]; + let findings = vec![json!({"schema": "bioscript:pgx-summary:1.0"})]; + let provenance = vec![json!({"label": "dbSNP", "url": "https://example.test"})]; + let manifest = json!({"name": "panel", "label": "Panel"}); + + let report = app_report_json(AppReportJsonInput { + assay_id: "assay", + participant_id: "P001", + input_file_name: "sample.vcf", + input_file_path: "/data/sample.vcf", + observations: &observations, + analyses: &analyses, + findings: &findings, + provenance: &provenance, + input_inspection: Some(&inspection()), + manifest_metadata: &manifest, + }); + + assert_eq!(report["report_status"], "partial"); + assert_eq!(report["metrics"]["n_sites_tested"], 2); + assert_eq!(report["metrics"]["n_sites_called"], 1); + assert_eq!(report["input"]["debug"]["container"], "zip"); + assert_eq!(report["input"]["debug"]["format"], "vcf"); + assert_eq!( + report["input"]["debug"]["vcf_missing_reference_imputation"], + true + ); + assert_eq!(report["input"]["debug"]["inferred_sex"]["sex"], "female"); + } + + #[test] + fn app_input_report_json_matches_findings_before_building_report() { + let observations = vec![observation("called", "observed genotype")]; + let analyses = vec![json!({"analysis_id": "a1", "rows": []})]; + let findings = vec![json!({ + "schema": "bioscript:pgx-summary:1.0", + "effects": [{ + "binding": { + "source": "variant", + "variant": "rs123", + "key": "alt", + "value": "A" + }, + "text": "A allele observed" + }] + })]; + let provenance = Vec::new(); + let manifest = json!({"name": "panel"}); + + let report = app_input_report_json(AppInputReportInput { + assay_id: "assay", + participant_id: "P001", + input_file_name: "sample.txt", + input_file_path: "/data/sample.txt", + observations: &observations, + analyses: &analyses, + findings: &findings, + provenance: &provenance, + input_inspection: None, + manifest_metadata: &manifest, + }); + + assert_eq!(report["report_status"], "complete"); + assert_eq!(report["metrics"]["n_findings_matched"], 1); + assert_eq!( + report["findings"][0]["matched_effect"]["text"], + "A allele observed" + ); + } + + #[test] + fn enum_name_helpers_cover_all_known_values() { + assert_eq!(super::file_container_name(FileContainer::Plain), "plain"); + assert_eq!(super::file_container_name(FileContainer::Zip), "zip"); + assert_eq!(super::detected_kind_name(DetectedKind::GenotypeText), "genotype_text"); + assert_eq!(super::detected_kind_name(DetectedKind::Vcf), "vcf"); + assert_eq!( + super::detected_kind_name(DetectedKind::AlignmentCram), + "alignment_cram" + ); + assert_eq!( + super::detected_kind_name(DetectedKind::AlignmentBam), + "alignment_bam" + ); + assert_eq!( + super::detected_kind_name(DetectedKind::ReferenceFasta), + "reference_fasta" + ); + assert_eq!(super::detected_kind_name(DetectedKind::Unknown), "unknown"); + assert_eq!( + super::detection_confidence_name(DetectionConfidence::WeakHeuristic), + "weak_heuristic" + ); + assert_eq!( + super::detection_confidence_name(DetectionConfidence::Unknown), + "unknown" + ); + assert_eq!(super::assembly_name(Assembly::Grch37), "grch37"); + assert_eq!(super::assembly_name(Assembly::Grch38), "grch38"); + assert_eq!(super::inferred_sex_name(InferredSex::Male), "male"); + assert_eq!(super::inferred_sex_name(InferredSex::Female), "female"); + assert_eq!(super::inferred_sex_name(InferredSex::Unknown), "unknown"); + assert_eq!( + super::sex_detection_confidence_name(SexDetectionConfidence::Medium), + "medium" + ); + assert_eq!( + super::sex_detection_confidence_name(SexDetectionConfidence::Low), + "low" + ); + } +} diff --git a/rust/bioscript-schema/src/remote_resource.rs b/rust/bioscript-schema/src/remote_resource.rs index 40ad8d1..b1ea550 100644 --- a/rust/bioscript-schema/src/remote_resource.rs +++ b/rust/bioscript-schema/src/remote_resource.rs @@ -308,3 +308,122 @@ fn has_extension(value: &str, extensions: &[&str]) -> bool { .any(|item| extension.eq_ignore_ascii_case(item)) }) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn resolves_yaml_resource_kind_title_version_hash_and_dependencies() { + let text = r#" +schema: bioscript:panel:1.0 +version: "1.0" +label: Example Panel +members: + - kind: variant + path: variants/rs1.yaml + version: v1 +downloads: + - url: https://example.test/data.zip + version: v2 +metadata: + file: scripts/report.py + url: https://example.test/data.zip +"#; + let resolved = resolve_remote_resource_text( + "https://github.com/org/repo/blob/main/panels/panel.yaml", + "panel.yaml", + text, + ) + .unwrap(); + + assert_eq!(resolved.kind, RemoteResourceKind::Panel); + assert_eq!(resolved.schema.as_deref(), Some("bioscript:panel:1.0")); + assert_eq!(resolved.title, "Example Panel"); + assert_eq!(resolved.version.as_deref(), Some("1.0")); + assert_eq!(resolved.sha256, sha256_hex(text.as_bytes())); + assert_eq!(resolved.dependencies.len(), 3); + assert!(resolved + .dependencies + .iter() + .any(|dep| dep.kind == "member" + && dep.url == "https://github.com/org/repo/blob/main/panels/variants/rs1.yaml")); + assert!(resolved + .dependencies + .iter() + .any(|dep| dep.kind == "download" && dep.version.as_deref() == Some("v2"))); + } + + #[test] + fn infers_resource_kind_from_schema_extension_and_content_shape() { + assert_eq!( + resolve_remote_resource_text("https://example.test/script.py", "script.py", "print(1)") + .unwrap() + .kind, + RemoteResourceKind::Python + ); + assert_eq!( + resolve_remote_resource_text( + "https://example.test/catalog.json", + "catalog.json", + r#"{"assays": []}"#, + ) + .unwrap() + .kind, + RemoteResourceKind::Catalogue + ); + assert_eq!( + resolve_remote_resource_text( + "https://example.test/variant.yaml", + "variant.yaml", + "rsid: rs1\n", + ) + .unwrap() + .kind, + RemoteResourceKind::Variant + ); + assert_eq!( + resolve_remote_resource_text( + "https://example.test/assay.yaml", + "assay.yaml", + "assay:\n version: v1\n", + ) + .unwrap() + .version + .as_deref(), + Some("v1") + ); + } + + #[test] + fn dependency_url_resolution_handles_github_absolute_repo_paths_and_standard_relative_paths() { + assert_eq!( + resolve_resource_url( + "https://github.com/org/repo/blob/main/panels/panel.yaml", + "/variants/rs1.yaml", + ) + .as_deref(), + Some("https://github.com/org/repo/blob/main/variants/rs1.yaml") + ); + assert_eq!( + resolve_resource_url("https://example.test/a/b/panel.yaml", "../v/rs1.yaml") + .as_deref(), + Some("https://example.test/a/v/rs1.yaml") + ); + assert!(resolve_resource_url("not a url", "relative.yaml").is_none()); + } + + #[test] + fn parse_structured_text_reports_yaml_and_json_errors() { + assert!(parse_structured_text("bad.yaml", "{") + .unwrap_err() + .contains("failed to parse YAML")); + assert!(parse_structured_text("bad.json", "{") + .unwrap_err() + .contains("failed to parse JSON")); + assert!(parse_structured_text("notes.txt", "not structured") + .unwrap() + .is_none()); + assert!(has_extension("PANEL.YAML", &["yaml"])); + } +} diff --git a/rust/bioscript-schema/src/validator_alleles.rs b/rust/bioscript-schema/src/validator_alleles.rs index 12b2759..205cf2d 100644 --- a/rust/bioscript-schema/src/validator_alleles.rs +++ b/rust/bioscript-schema/src/validator_alleles.rs @@ -144,3 +144,81 @@ fn validate_snv_alleles(kind: &str, reference: &str, alts: &[String], issues: &m } } +#[cfg(test)] +mod allele_validator_tests { + use super::*; + + fn yaml(text: &str) -> Value { + serde_yaml::from_str(text).unwrap() + } + + fn issue_paths(issues: &[Issue]) -> Vec { + issues.iter().map(|issue| issue.path.clone()).collect() + } + + #[test] + fn allele_validator_reports_required_shape_and_sequence_errors() { + let mut issues = Vec::new(); + validate_alleles(&yaml("{}"), &mut issues); + let paths = issue_paths(&issues); + assert!(paths.contains(&"alleles".to_owned())); + assert!(paths.contains(&"alleles.kind".to_owned())); + assert!(paths.contains(&"alleles.ref".to_owned())); + assert!(paths.contains(&"alleles.alts".to_owned())); + + let mut issues = Vec::new(); + validate_alleles( + &yaml( + r#" +alleles: + kind: other + canonical_alt: A + ref: "" + alts: not-a-list +"#, + ), + &mut issues, + ); + let paths = issue_paths(&issues); + assert!(paths.contains(&"alleles.kind".to_owned())); + assert!(paths.contains(&"alleles.canonical_alt".to_owned())); + assert!(paths.contains(&"alleles.ref".to_owned())); + assert!(paths.contains(&"alleles.alts".to_owned())); + } + + #[test] + fn allele_validator_reports_bad_alts_and_observed_alt_mismatches() { + let root = yaml( + r#" +alleles: + kind: snv + ref: N + alts: [A, "", 3, D] + observed_alts: [A] +"#, + ); + let mut issues = Vec::new(); + validate_alleles(&root, &mut issues); + let paths = issue_paths(&issues); + assert!(paths.contains(&"alleles.ref".to_owned())); + assert!(paths.contains(&"alleles.alts[1]".to_owned())); + assert!(paths.contains(&"alleles.alts[2]".to_owned())); + assert!(paths.contains(&"alleles.observed_alts".to_owned())); + } + + #[test] + fn allele_validator_accepts_indel_observed_alt_superset() { + let root = yaml( + r#" +alleles: + kind: indel + ref: AT + alts: [A] + observed_alts: [A, ATT] +"#, + ); + let mut issues = Vec::new(); + validate_alleles(&root, &mut issues); + assert!(issues.is_empty()); + } +} diff --git a/rust/bioscript-schema/src/validator_findings.rs b/rust/bioscript-schema/src/validator_findings.rs index 065c55c..8296c30 100644 --- a/rust/bioscript-schema/src/validator_findings.rs +++ b/rust/bioscript-schema/src/validator_findings.rs @@ -242,3 +242,147 @@ fn validate_finding_binding_operator(parent: &str, binding: &Mapping, issues: &m }), } } + +#[cfg(test)] +mod finding_validator_tests { + use super::*; + + fn yaml(text: &str) -> Value { + serde_yaml::from_str(text).unwrap() + } + + fn issue_paths(issues: &[Issue]) -> Vec { + issues.iter().map(|issue| issue.path.clone()).collect() + } + + #[test] + fn validate_findings_reports_shape_schema_alt_and_content_issues() { + let root = yaml( + r#" +alleles: + alts: [A] +findings: + - not a mapping + - {} + - schema: "" + - schema: bioscript:pgx:1.0 + alt: T + - schema: bioscript:trait:1.0 + alt: "*" + summary: ok +"#, + ); + let mut issues = Vec::new(); + validate_findings(&root, &mut issues); + let paths = issue_paths(&issues); + assert!(paths.contains(&"findings[0]".to_owned())); + assert!(paths.contains(&"findings[1].schema".to_owned())); + assert!(paths.contains(&"findings[2].schema".to_owned())); + assert!(paths.contains(&"findings[3].schema".to_owned())); + assert!(paths.contains(&"findings[3].alt".to_owned())); + assert!(paths.contains(&"findings[3]".to_owned())); + } + + #[test] + fn variant_and_analysis_binding_validation_covers_required_fields() { + let root = yaml( + r#" +findings: + - schema: bioscript:trait:1.0 + summary: ok + binding: + source: variant + operator: equals + - schema: bioscript:trait:1.0 + summary: ok + binding: + source: analysis + operator: equals + key: "" + - schema: bioscript:trait:1.0 + summary: ok + binding: + source: external + operator: in + key: outcome + values: [] +"#, + ); + let mut issues = Vec::new(); + validate_findings(&root, &mut issues); + let paths = issue_paths(&issues); + assert!(paths.contains(&"findings[0].binding.variant".to_owned())); + assert!(paths.contains(&"findings[0].binding.value".to_owned())); + assert!(paths.contains(&"findings[1].binding.key".to_owned())); + assert!(paths.contains(&"findings[1].binding.analysis_id".to_owned())); + assert!(paths.contains(&"findings[1].binding.value".to_owned())); + assert!(paths.contains(&"findings[2].binding.source".to_owned())); + assert!(paths.contains(&"findings[2].binding.values".to_owned())); + } + + #[test] + fn dosage_binding_validation_covers_equals_in_and_unknown_operators() { + let root = yaml( + r#" +findings: + - schema: bioscript:trait:1.0 + summary: ok + binding: + source: variant + variant: rs1.yaml + operator: dosage_equals + allele: "" + value: 4 + - schema: bioscript:trait:1.0 + summary: ok + binding: + source: variant + variant: rs1.yaml + operator: dosage_in + allele: A + values: [0, bad, 2] + - schema: bioscript:trait:1.0 + summary: ok + binding: + source: variant + variant: rs1.yaml + operator: contains +"#, + ); + let mut issues = Vec::new(); + validate_findings(&root, &mut issues); + let paths = issue_paths(&issues); + assert!(paths.contains(&"findings[0].binding.allele".to_owned())); + assert!(paths.contains(&"findings[0].binding.value".to_owned())); + assert!(paths.contains(&"findings[1].binding.values".to_owned())); + assert!(paths.contains(&"findings[2].binding.operator".to_owned())); + } + + #[test] + fn effect_validation_covers_non_sequence_and_nested_effect_bindings() { + let root = yaml( + r#" +findings: + - schema: bioscript:trait:1.0 + summary: ok + effects: not-a-sequence + - schema: bioscript:trait:1.0 + summary: ok + effects: + - not-a-mapping + - binding: + source: variant + variant: rs1.yaml + operator: dosage_in + allele: A + values: [] +"#, + ); + let mut issues = Vec::new(); + validate_findings(&root, &mut issues); + let paths = issue_paths(&issues); + assert!(paths.contains(&"findings[0].effects".to_owned())); + assert!(paths.contains(&"findings[1].effects[0]".to_owned())); + assert!(paths.contains(&"findings[1].effects[1].binding.values".to_owned())); + } +} diff --git a/rust/bioscript-schema/src/validator_load.rs b/rust/bioscript-schema/src/validator_load.rs index 776314f..e37d437 100644 --- a/rust/bioscript-schema/src/validator_load.rs +++ b/rust/bioscript-schema/src/validator_load.rs @@ -378,3 +378,172 @@ fn validate_panel_file(path: &Path) -> Result { issues, }) } + +#[cfg(test)] +mod load_validator_tests { + use super::*; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn temp_dir(name: &str) -> PathBuf { + let unique = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let dir = std::env::temp_dir().join(format!( + "bioscript-schema-load-{name}-{}-{unique}", + std::process::id() + )); + fs::create_dir_all(&dir).unwrap(); + dir + } + + fn variant_yaml(name: &str) -> String { + format!( + r#" +schema: bioscript:variant:1.0 +version: "1.0" +name: {name} +tags: [tag:test] +identifiers: + rsids: [rs1] +coordinates: + grch38: + chrom: "1" + pos: 100 +alleles: + kind: snv + ref: A + alts: [G] +"# + ) + } + + #[test] + fn manifest_loaders_parse_variant_panel_and_assay_text() { + let variant = load_variant_manifest_text("variant.yaml", &variant_yaml("rs1")).unwrap(); + assert_eq!(variant.name, "rs1"); + assert_eq!(variant.tags, vec!["tag:test"]); + + let lookup = load_variant_manifest_text_for_lookup( + "legacy.yaml", + &variant_yaml("rs1").replace("bioscript:variant:1.0", "bioscript:variant"), + ) + .unwrap(); + assert_eq!(lookup.spec.rsids, vec!["rs1"]); + + let panel = load_panel_manifest_text( + "panel.yaml", + r#" +schema: bioscript:panel:1.0 +version: "1.0" +name: panel +label: Panel +permissions: + domains: [https://example.test] +downloads: + - id: dl + url: https://example.test/file.yaml + sha256: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + version: "1" +members: + - kind: variant + path: variant.yaml +analyses: + - id: a + kind: bioscript + path: analysis.bs + output_format: json + derived_from: [variant.yaml] +"#, + ) + .unwrap(); + assert_eq!(panel.downloads.len(), 1); + assert_eq!(panel.members.len(), 1); + assert_eq!(panel.interpretations.len(), 1); + + let assay = load_assay_manifest_text( + "assay.yaml", + r#" +schema: bioscript:assay:1.0 +version: "1.0" +name: assay +members: + - kind: variant + path: variant.yaml +"#, + ) + .unwrap(); + assert_eq!(assay.members.len(), 1); + } + + #[test] + fn manifest_loaders_report_parse_and_validation_errors() { + assert!(load_variant_manifest_text("bad.yaml", "{") + .unwrap_err() + .contains("failed to parse YAML")); + assert!(load_variant_manifest_text("bad.yaml", "schema: bioscript:variant:1.0\n") + .unwrap_err() + .contains("missing required field")); + assert!(load_variant_manifest_text_for_lookup("bad.yaml", "schema: bad\n") + .unwrap_err() + .contains("expected schema")); + assert!(load_panel_manifest_text("bad.yaml", "schema: bad\n") + .unwrap_err() + .contains("expected schema")); + assert!(load_assay_manifest_text("bad.yaml", "schema: bad\n") + .unwrap_err() + .contains("expected schema")); + } + + #[test] + fn validate_manifest_path_collects_yaml_files_recursively_and_ignores_other_schemas() { + let dir = temp_dir("collect"); + fs::write(dir.join("variant.yaml"), variant_yaml("rs1")).unwrap(); + fs::write(dir.join("panel.yml"), "schema: bioscript:panel:1.0\n").unwrap(); + fs::write(dir.join("notes.txt"), "ignored").unwrap(); + fs::create_dir_all(dir.join("nested")).unwrap(); + fs::write(dir.join("nested/missing-schema.yaml"), "name: missing\n").unwrap(); + + let files = collect_yaml_files(&dir).unwrap(); + assert_eq!(files.len(), 3); + + let report = validate_variants_path(&dir).unwrap(); + assert_eq!(report.files_scanned, 3); + assert_eq!(report.total_errors(), 1); + assert!(report.render_text().contains("missing schema")); + + let panel_report = validate_panels_path(&dir).unwrap(); + assert_eq!(panel_report.files_scanned, 3); + assert!(panel_report.total_errors() >= 1); + + fs::remove_dir_all(dir).unwrap(); + } + + #[test] + fn validate_file_helpers_skip_unrelated_schemas_and_handle_missing_schema() { + let dir = temp_dir("files"); + let missing = dir.join("missing.yaml"); + fs::write(&missing, "name: missing\n").unwrap(); + assert_eq!(validate_variant_file(&missing).unwrap().issues[0].path, "schema"); + assert_eq!(validate_panel_file(&missing).unwrap().issues[0].path, "schema"); + assert_eq!(validate_assay_file(&missing).unwrap().issues[0].path, "schema"); + + let panel = dir.join("panel.yaml"); + fs::write(&panel, "schema: bioscript:panel:1.0\n").unwrap(); + assert!(validate_variant_file(&panel).unwrap().issues.is_empty()); + + let pgx = dir.join("pgx.yaml"); + fs::write( + &pgx, + r#" +schema: bioscript:pgx-findings:1.0 +version: "1.0" +findings: [] +"#, + ) + .unwrap(); + assert!(!validate_variant_file(&pgx).unwrap().issues.is_empty()); + + fs::remove_dir_all(dir).unwrap(); + } +} diff --git a/rust/bioscript-schema/src/validator_panel.rs b/rust/bioscript-schema/src/validator_panel.rs index ba6f9e5..fdfdf54 100644 --- a/rust/bioscript-schema/src/validator_panel.rs +++ b/rust/bioscript-schema/src/validator_panel.rs @@ -447,3 +447,184 @@ fn locus_from_root(root: &Value, assembly: &str) -> Result, end, })) } + +#[cfg(test)] +mod panel_validator_tests { + use super::*; + + fn yaml(text: &str) -> Value { + serde_yaml::from_str(text).unwrap() + } + + fn issue_paths(issues: &[Issue]) -> Vec { + issues.iter().map(|issue| issue.path.clone()).collect() + } + + #[test] + fn panel_member_validation_reports_missing_empty_and_unknown_references() { + let root = yaml( + r#" +downloads: + - id: known +members: + - not-a-mapping + - kind: unknown + - kind: variant + path: "" + download: known + version: "" + sha256: BAD + - kind: variant + download: missing + - kind: variant + download: "" +"#, + ); + let mut issues = Vec::new(); + validate_panel_members(&root, &["variant"], &mut issues); + let paths = issue_paths(&issues); + assert!(paths.contains(&"members[0]".to_owned())); + assert!(paths.contains(&"members[1].kind".to_owned())); + assert!(paths.contains(&"members[1]".to_owned())); + assert!(paths.contains(&"members[2]".to_owned())); + assert!(paths.contains(&"members[2].path".to_owned())); + assert!(paths.contains(&"members[2].version".to_owned())); + assert!(paths.contains(&"members[2].sha256".to_owned())); + assert!(paths.contains(&"members[3].download".to_owned())); + assert!(paths.contains(&"members[4].download".to_owned())); + } + + #[test] + fn panel_member_validation_reports_missing_and_empty_lists() { + let mut issues = Vec::new(); + validate_panel_members(&yaml("{}"), &["variant"], &mut issues); + assert_eq!(issues[0].path, "members"); + assert_eq!(issues[0].message, "missing required field"); + + let mut issues = Vec::new(); + validate_panel_members(&yaml("members: []"), &["variant"], &mut issues); + assert_eq!(issues[0].path, "members"); + assert_eq!(issues[0].message, "expected at least one member"); + } + + #[test] + fn interpretation_validation_reports_shape_and_required_field_errors() { + let root = yaml( + r#" +analyses: + - not-a-mapping + - id: "" + kind: python + path: "" + output_format: xml + derived_from: [] + - id: a + kind: bioscript + path: a.bs + derived_from: ["", 3] + logic: not-a-mapping + - id: b + kind: bioscript + path: b.bs + derived_from: [rs1] + emits: not-a-sequence +"#, + ); + let mut issues = Vec::new(); + validate_panel_interpretations(&root, &mut issues); + let paths = issue_paths(&issues); + assert!(paths.contains(&"analyses[0]".to_owned())); + assert!(paths.contains(&"analyses[1].id".to_owned())); + assert!(paths.contains(&"analyses[1].kind".to_owned())); + assert!(paths.contains(&"analyses[1].path".to_owned())); + assert!(paths.contains(&"analyses[1].output_format".to_owned())); + assert!(paths.contains(&"analyses[1].derived_from".to_owned())); + assert!(paths.contains(&"analyses[2].derived_from[0]".to_owned())); + assert!(paths.contains(&"analyses[2].derived_from[1]".to_owned())); + assert!(paths.contains(&"analyses[2].logic".to_owned())); + assert!(paths.contains(&"analyses[3].emits".to_owned())); + } + + #[test] + fn interpretation_logic_and_emits_validate_nested_strings_and_urls() { + let root = yaml( + r#" +analyses: + - id: a + kind: bioscript + path: a.bs + derived_from: [rs1] + logic: + description: "" + source: + name: 3 + url: not-a-url + emits: + - not-a-mapping + - key: "" + label: 3 + value_type: "" + format: {} +interpretations: + - id: old + kind: bioscript + path: old.bs + derived_from: [rs1] +"#, + ); + let mut issues = Vec::new(); + validate_panel_interpretations(&root, &mut issues); + let paths = issue_paths(&issues); + assert!(paths.contains(&"interpretations".to_owned())); + assert!(paths.contains(&"analyses[0].logic.description".to_owned())); + assert!(paths.contains(&"analyses[0].logic.source.name".to_owned())); + assert!(paths.contains(&"analyses[0].logic.source.url".to_owned())); + assert!(paths.contains(&"analyses[0].emits[0]".to_owned())); + assert!(paths.contains(&"analyses[0].emits[1].key".to_owned())); + assert!(paths.contains(&"analyses[0].emits[1].label".to_owned())); + assert!(paths.contains(&"analyses[0].emits[1].value_type".to_owned())); + assert!(paths.contains(&"analyses[0].emits[1].format".to_owned())); + } + + #[test] + fn variant_spec_parsing_extracts_coordinates_alleles_and_motifs() { + let root = yaml( + r#" +identifiers: + rsids: [rs1] +coordinates: + grch37: + chrom: "1" + pos: 10 + assembly_ref: b37 + grch38: + chrom: "1" + start: 20 + end: 22 + assembly_ref: b38 +alleles: + kind: deletion + ref: AT + alts: [A] + deletion_length: 1 + motifs: [T] +"#, + ); + let spec = variant_spec_from_root(&root).unwrap(); + assert_eq!(spec.rsids, vec!["rs1"]); + assert_eq!(spec.grch37.unwrap().start, 10); + assert_eq!(spec.grch38.unwrap().end, 22); + assert_eq!(spec.grch37_assembly_ref.as_deref(), Some("b37")); + assert_eq!(spec.grch38_assembly_ref.as_deref(), Some("b38")); + assert_eq!(spec.reference.as_deref(), Some("AT")); + assert_eq!(spec.alternate.as_deref(), Some("A")); + assert_eq!(spec.kind, Some(VariantKind::Deletion)); + assert_eq!(spec.deletion_length, Some(1)); + assert_eq!(spec.motifs, vec!["T"]); + + let missing = yaml("coordinates: {grch38: {chrom: '1'}}"); + assert!(locus_from_root(&missing, "grch38") + .unwrap_err() + .contains("start missing")); + } +} diff --git a/rust/bioscript-schema/src/validator_resources.rs b/rust/bioscript-schema/src/validator_resources.rs index 9f5ff22..6398a74 100644 --- a/rust/bioscript-schema/src/validator_resources.rs +++ b/rust/bioscript-schema/src/validator_resources.rs @@ -171,3 +171,94 @@ fn validate_downloads(root: &Value, issues: &mut Vec) { } } +#[cfg(test)] +mod resource_validator_tests { + use super::*; + + fn yaml(text: &str) -> Value { + serde_yaml::from_str(text).unwrap() + } + + fn issue_paths(issues: &[Issue]) -> Vec { + issues.iter().map(|issue| issue.path.clone()).collect() + } + + #[test] + fn provenance_validation_reports_source_shape_and_url_errors() { + let root = yaml( + r#" +provenance: + sources: + - not-a-map + - kind: "" + label: 3 + url: not-a-url +"#, + ); + let mut issues = Vec::new(); + validate_provenance(&root, &mut issues); + let paths = issue_paths(&issues); + assert!(paths.contains(&"provenance.sources[0]".to_owned())); + assert!(paths.contains(&"provenance.sources[1].kind".to_owned())); + assert!(paths.contains(&"provenance.sources[1].label".to_owned())); + assert!(paths.contains(&"provenance.sources[1].url".to_owned())); + } + + #[test] + fn permissions_validation_reports_origin_shape_duplicates_and_bad_urls() { + let root = yaml( + r#" +permissions: + domains: + - https://example.com + - https://example.com + - 3 + - ftp://example.com +"#, + ); + let mut issues = Vec::new(); + validate_permissions(&root, &mut issues); + let paths = issue_paths(&issues); + assert!(paths.contains(&"permissions.domains[1]".to_owned())); + assert!(paths.contains(&"permissions.domains[2]".to_owned())); + assert!(paths.contains(&"permissions.domains[3]".to_owned())); + + let mut issues = Vec::new(); + validate_permissions(&yaml("permissions: {domains: nope}"), &mut issues); + assert_eq!(issues[0].path, "permissions.domains"); + } + + #[test] + fn downloads_validation_reports_required_hash_duplicate_and_origin_errors() { + let root = yaml( + r#" +permissions: + domains: [https://allowed.example] +downloads: + - not-a-map + - id: dl + url: https://blocked.example/file.txt + sha256: BAD + version: "" + - id: dl + url: not-a-url + sha256: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + version: "1" + - id: "" + sha256: "" +"#, + ); + let mut issues = Vec::new(); + validate_downloads(&root, &mut issues); + let paths = issue_paths(&issues); + assert!(paths.contains(&"downloads[0]".to_owned())); + assert!(paths.contains(&"downloads[1].url".to_owned())); + assert!(paths.contains(&"downloads[1].sha256".to_owned())); + assert!(paths.contains(&"downloads[1].version".to_owned())); + assert!(paths.contains(&"downloads[2].id".to_owned())); + assert!(paths.contains(&"downloads[2].url".to_owned())); + assert!(paths.contains(&"downloads[3].id".to_owned())); + assert!(paths.contains(&"downloads[3].url".to_owned())); + assert!(paths.contains(&"downloads[3].version".to_owned())); + } +} diff --git a/rust/bioscript-schema/src/validator_roots.rs b/rust/bioscript-schema/src/validator_roots.rs index 9600eb7..08a7616 100644 --- a/rust/bioscript-schema/src/validator_roots.rs +++ b/rust/bioscript-schema/src/validator_roots.rs @@ -332,3 +332,171 @@ fn validate_coordinate_range_values(start: i64, end: i64, assembly: &str, issues } } +#[cfg(test)] +mod root_validator_tests { + use super::*; + + fn yaml(text: &str) -> Value { + serde_yaml::from_str(text).unwrap() + } + + fn messages(issues: &[Issue]) -> Vec { + issues + .iter() + .map(|issue| format!("{}:{}", issue.path, issue.message)) + .collect() + } + + #[test] + fn variant_root_reports_identity_optional_tag_identifier_and_coordinate_edges() { + let root = yaml( + r#" +schema: bioscript:variant +version: "1.0" +name: "" +label: "" +gene: [not, string] +tags: ["", 7] +variant_id: legacy +identifiers: + rsids: [bad, rs1, rs1, 9] + aliases: bad-shape +coordinates: + grch37: + chrom: 99 + pos: 0 + grch38: + chrom: "1" + pos: 1 + start: 1 + end: 1 +"#, + ); + let mut issues = Vec::new(); + validate_variant_root(&root, &mut issues); + let rendered = messages(&issues).join("\n"); + + assert!(rendered.contains("schema:legacy schema value")); + assert!(rendered.contains("name:empty string")); + assert!(rendered.contains("label:empty string")); + assert!(rendered.contains("gene:expected string")); + assert!(rendered.contains("tags[0]:empty tag string")); + assert!(rendered.contains("tags[1]:expected string")); + assert!(rendered.contains("variant_id:variant_id is legacy")); + assert!(rendered.contains("identifiers.rsids[0]:expected rsid")); + assert!(rendered.contains("identifiers.rsids[2]:duplicate identifier")); + assert!(rendered.contains("identifiers.rsids[3]:expected string")); + assert!(rendered.contains("identifiers.aliases:expected a sequence")); + assert!(rendered.contains("coordinates.grch37.chrom:missing chrom")); + assert!(rendered.contains("coordinates.grch38:use either pos or start/end")); + } + + #[test] + fn coordinate_range_validation_reports_missing_non_integer_bounds_and_single_position() { + let root = yaml( + r#" +schema: bioscript:variant:1.0 +version: "1.0" +name: coordinate-errors +coordinates: + grch37: + chrom: Z + start: zero + end: 1 + grch38: + chrom: X + start: 5 + end: 5 +alleles: + kind: snv + ref: A + alts: [G] +"#, + ); + let mut issues = Vec::new(); + validate_variant_root(&root, &mut issues); + let rendered = messages(&issues).join("\n"); + + assert!(rendered.contains("coordinates.grch37.chrom:invalid chromosome")); + assert!(rendered.contains("coordinates.grch37:expected integer start/end")); + assert!(rendered.contains("coordinates.grch38:single-position coordinate")); + + let root = yaml( + r#" +schema: bioscript:variant:1.0 +version: "1.0" +name: bad-range +coordinates: + grch38: + chrom: MT + start: 0 + end: -1 +alleles: + kind: snv + ref: A + alts: [G] +"#, + ); + let mut issues = Vec::new(); + validate_variant_root(&root, &mut issues); + let rendered = messages(&issues).join("\n"); + assert!(rendered.contains("coordinates.grch38.start:expected integer >= 1")); + assert!(rendered.contains("coordinates.grch38.end:expected integer >= 1")); + assert!(rendered.contains("coordinates.grch38.end:expected end >= start")); + } + + #[test] + fn panel_assay_and_pgx_roots_report_root_level_shape_errors() { + let root = yaml( + r#" +schema: bioscript:panel:1.0 +version: "1.0" +name: panel +tags: not-a-list +"#, + ); + let mut issues = Vec::new(); + validate_panel_root(&root, &mut issues); + let rendered = messages(&issues).join("\n"); + assert!(rendered.contains("tags:expected a sequence of strings")); + + let root = yaml( + r#" +schema: bioscript:assay:1.0 +version: "1.0" +name: assay +tags: [ok, ""] +"#, + ); + let mut issues = Vec::new(); + validate_assay_root(&root, &mut issues); + let rendered = messages(&issues).join("\n"); + assert!(rendered.contains("tags[1]:empty tag string")); + + let root = yaml( + r#" +schema: bioscript:pgx-findings:1.0 +version: "1.0" +gene: ABC +findings: not-a-list +"#, + ); + let mut issues = Vec::new(); + validate_pgx_findings_root(&root, &mut issues); + let rendered = messages(&issues).join("\n"); + assert!(rendered.contains("variant/rsid:expected at least one variant identifier")); + assert!(rendered.contains("findings:expected a sequence of findings")); + + let root = yaml( + r#" +schema: bioscript:pgx-findings:1.0 +version: "1.0" +rsid: rs1 +"#, + ); + let mut issues = Vec::new(); + validate_pgx_findings_root(&root, &mut issues); + let rendered = messages(&issues).join("\n"); + assert!(rendered.contains("findings:missing required field")); + } +} From 984fafe4d1a1fa1fc191b8870e591572ee6e49d8 Mon Sep 17 00:00:00 2001 From: Madhava Jay Date: Thu, 14 May 2026 16:19:56 +1000 Subject: [PATCH 2/3] added missing file --- .../src/manifest_provenance.rs | 166 ++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 rust/bioscript-reporting/src/manifest_provenance.rs diff --git a/rust/bioscript-reporting/src/manifest_provenance.rs b/rust/bioscript-reporting/src/manifest_provenance.rs new file mode 100644 index 0000000..defddf8 --- /dev/null +++ b/rust/bioscript-reporting/src/manifest_provenance.rs @@ -0,0 +1,166 @@ +use std::collections::BTreeMap; + +use super::{ManifestWorkspace, traversable_manifest_member_paths, yaml_string, yaml_to_json}; + +pub fn load_manifest_provenance_links( + workspace: &impl ManifestWorkspace, + path: &str, +) -> Result, String> { + let value = workspace.load_yaml(path)?; + let schema = yaml_string(&value, "schema").unwrap_or_default(); + let mut links = BTreeMap::new(); + collect_manifest_provenance_entries(&value, &mut links)?; + + for member_path in traversable_manifest_member_paths(&schema, &value) { + let resolved = workspace.resolve(path, member_path)?; + for source in load_manifest_provenance_links(workspace, &resolved)? { + let key = provenance_source_key(&source); + links.entry(key).or_insert(source); + } + } + + Ok(links.into_values().collect()) +} + +pub fn collect_manifest_provenance_entries( + value: &serde_yaml::Value, + links: &mut BTreeMap, +) -> Result<(), String> { + let Some(sources) = value + .get("provenance") + .and_then(|provenance| provenance.get("sources")) + .and_then(serde_yaml::Value::as_sequence) + else { + return Ok(()); + }; + + for source in sources { + let json_source = yaml_to_json(source.clone())?; + let key = provenance_source_key(&json_source); + links.entry(key).or_insert(json_source); + } + + Ok(()) +} + +fn provenance_source_key(source: &serde_json::Value) -> String { + source + .get("url") + .and_then(serde_json::Value::as_str) + .or_else(|| source.get("label").and_then(serde_json::Value::as_str)) + .map(ToOwned::to_owned) + .unwrap_or_else(|| source.to_string()) +} + +#[cfg(test)] +mod tests { + use std::collections::BTreeMap; + + use super::*; + use crate::manifest::tests::MapWorkspace; + + #[test] + fn collect_manifest_provenance_entries_deduplicates_sources() { + let value: serde_yaml::Value = serde_yaml::from_str( + r#" +schema: bioscript:variant:1.0 +provenance: + sources: + - kind: database + label: dbSNP + url: https://www.ncbi.nlm.nih.gov/snp/rs1 + - kind: database + label: Duplicate dbSNP + url: https://www.ncbi.nlm.nih.gov/snp/rs1 +"#, + ) + .unwrap(); + + let mut links = BTreeMap::new(); + collect_manifest_provenance_entries(&value, &mut links).unwrap(); + assert_eq!(links.len(), 1); + assert_eq!( + links.values().next().unwrap()["label"], + serde_json::Value::String("dbSNP".to_owned()) + ); + } + + #[test] + fn load_manifest_provenance_links_walks_members() { + let workspace = MapWorkspace { + files: BTreeMap::from([ + ( + "panel.yaml".to_owned(), + r#" +schema: bioscript:panel:1.0 +members: + - kind: variant + path: rs1.yaml + - kind: assay + path: assay/manifest.yaml +provenance: + sources: + - kind: database + label: Panel + url: https://example.test/panel +"# + .to_owned(), + ), + ( + "rs1.yaml".to_owned(), + r#" +schema: bioscript:variant:1.0 +provenance: + sources: + - kind: database + label: Variant + url: https://example.test/variant +"# + .to_owned(), + ), + ( + "assay/manifest.yaml".to_owned(), + r#" +schema: bioscript:assay:1.0 +members: + - kind: variant + path: rs2.yaml +provenance: + sources: + - kind: database + label: Assay + url: https://example.test/assay +"# + .to_owned(), + ), + ( + "assay/rs2.yaml".to_owned(), + r#" +schema: bioscript:variant:1.0 +provenance: + sources: + - kind: database + label: Nested Variant + url: https://example.test/nested +"# + .to_owned(), + ), + ]), + }; + + let links = load_manifest_provenance_links(&workspace, "panel.yaml").unwrap(); + let urls = links + .iter() + .filter_map(|link| link.get("url").and_then(serde_json::Value::as_str)) + .collect::>(); + assert_eq!( + urls, + vec![ + "https://example.test/assay", + "https://example.test/nested", + "https://example.test/panel", + "https://example.test/variant", + ] + ); + } +} From 13a325d16489063e34a28c74d4af18a935d6119d Mon Sep 17 00:00:00 2001 From: Madhava Jay Date: Thu, 14 May 2026 16:27:47 +1000 Subject: [PATCH 3/3] Fix lint issues in coverage tests --- rust/bioscript-cli/src/package.rs | 48 ++++---- rust/bioscript-cli/src/report_observations.rs | 8 +- rust/bioscript-cli/src/report_review.rs | 16 +-- rust/bioscript-formats/src/genotype.rs | 4 +- .../src/genotype/cram_backend/observation.rs | 4 +- .../src/genotype/cram_backend/store.rs | 11 +- rust/bioscript-formats/src/inspect/sex.rs | 45 +++++-- .../src/inspect/sex/alignment_depth.rs | 22 ++-- rust/bioscript-reporting/src/artifacts.rs | 6 +- rust/bioscript-reporting/src/manifest.rs | 16 +-- .../src/manifest_provenance.rs | 3 +- rust/bioscript-reporting/src/matching.rs | 110 +++++++++--------- rust/bioscript-reporting/src/observation.rs | 27 +++-- rust/bioscript-reporting/src/report_json.rs | 5 +- rust/bioscript-schema/src/remote_resource.rs | 44 +++---- .../bioscript-schema/src/validator_alleles.rs | 4 +- .../src/validator_findings.rs | 4 +- .../src/validator_resources.rs | 4 +- 18 files changed, 215 insertions(+), 166 deletions(-) diff --git a/rust/bioscript-cli/src/package.rs b/rust/bioscript-cli/src/package.rs index 35a0ad8..808457d 100644 --- a/rust/bioscript-cli/src/package.rs +++ b/rust/bioscript-cli/src/package.rs @@ -496,13 +496,13 @@ mod package_tests { fn variant_manifest(name: &str) -> String { format!( - r#" + r" schema: bioscript:variant:1.0 name: {name} gene: ABC identifiers: rsids: [rs1] -"# +" ) } @@ -548,11 +548,11 @@ identifiers: fs::write( dir.join("manifest.yaml"), - r#" + r" schema: bioscript:package:1.0 name: Wrapped entrypoint: nested/panel.yaml -"#, +", ) .unwrap(); let descriptor = load_package_descriptor(&dir).unwrap(); @@ -561,20 +561,20 @@ entrypoint: nested/panel.yaml fs::write( dir.join("manifest.yaml"), - r#" + r" schema: bioscript:package:1.0 name: Missing Entrypoint -"#, +", ) .unwrap(); assert!(descriptor_err(&dir).contains("missing entrypoint")); fs::write( dir.join("manifest.yaml"), - r#" + r" schema: bioscript:package:1.0 entrypoint: ../escape.yaml -"#, +", ) .unwrap(); assert!(descriptor_err(&dir).contains("escapes")); @@ -588,11 +588,11 @@ entrypoint: ../escape.yaml fs::remove_file(dir.join("manifest.yaml")).unwrap(); fs::write( dir.join(LEGACY_PACKAGE_DESCRIPTOR), - r#" + r" schema: bioscript:package:1.0 name: Legacy entrypoint: assay.yaml -"#, +", ) .unwrap(); let descriptor = load_package_descriptor(&dir).unwrap(); @@ -614,11 +614,11 @@ entrypoint: assay.yaml &[ ( "manifest.yaml", - r#" + r" schema: bioscript:package:1.0 name: Test Package entrypoint: nested/variant.yaml -"#, +", ), ("nested/", ""), ("nested/variant.yaml", &variant_manifest("Nested")), @@ -683,10 +683,10 @@ entrypoint: nested/variant.yaml &missing_entrypoint, &[( "manifest.yaml", - r#" + r" schema: bioscript:package:1.0 entrypoint: missing.yaml -"#, +", )], ); assert!(import_err(&dir, &missing_entrypoint, Some(Path::new("missing-out"))) @@ -698,10 +698,10 @@ entrypoint: missing.yaml &[ ( "manifest.yaml", - r#" + r" schema: bioscript:package:1.0 entrypoint: nested/custom.yaml -"#, +", ), ("nested/custom.yaml", "schema: custom:schema\nname: bad\n"), ], @@ -722,12 +722,12 @@ entrypoint: nested/custom.yaml fs::write( &release_path, format!( - r#" + r" schema: bioscript:package-release:1.0 artifact: path: pkg.zip sha256: {digest} -"# +" ), ) .unwrap(); @@ -739,12 +739,12 @@ artifact: fs::write( &release_path, - r#" + r" schema: bioscript:package-release:1.0 artifact: path: pkg.zip sha256: bad -"#, +", ) .unwrap(); assert!(package_zip_from_release_manifest(&dir, &release_path, None) @@ -777,12 +777,12 @@ artifact: fs::write( &release_path, format!( - r#" + r" schema: bioscript:package-release:1.0 artifact: path: pkg.zip sha256: {digest} -"# +" ), ) .unwrap(); @@ -846,11 +846,11 @@ artifact: &[ ( "manifest.yaml", - r#" + r" schema: bioscript:package:1.0 name: Import Command Package entrypoint: variant.yaml -"#, +", ), ("variant.yaml", &variant_manifest("Imported")), ], diff --git a/rust/bioscript-cli/src/report_observations.rs b/rust/bioscript-cli/src/report_observations.rs index d4b7f65..b31a65b 100644 --- a/rust/bioscript-cli/src/report_observations.rs +++ b/rust/bioscript-cli/src/report_observations.rs @@ -137,7 +137,7 @@ mod app_report_observation_tests { let path = write_variant_yaml( &dir, "variant.yaml", - r#" + r" schema: bioscript:variant:1.0 name: Test variant gene: CYP2D6 @@ -149,7 +149,7 @@ evidence: references: - label: Primary url: https://www.ncbi.nlm.nih.gov/snp/rs123 -"#, +", ); assert_eq!(variant_manifest_gene(&path).unwrap(), "CYP2D6"); @@ -170,13 +170,13 @@ evidence: let path = write_variant_yaml( &dir, "variant.yaml", - r#" + r" schema: bioscript:variant:1.0 name: Test variant identifiers: rsids: - rs4242 -"#, +", ); let source = variant_primary_source(&path).unwrap(); diff --git a/rust/bioscript-cli/src/report_review.rs b/rust/bioscript-cli/src/report_review.rs index 17fbe3e..d4f96d6 100644 --- a/rust/bioscript-cli/src/report_review.rs +++ b/rust/bioscript-cli/src/report_review.rs @@ -357,7 +357,7 @@ mod review_report_tests { let cases_path = dir.join("cases.yaml"); fs::write( &cases_path, - r#" + r" cases: - id: c1 label: First case @@ -367,7 +367,7 @@ cases: - id: c2 variants: rs3: C/T -"#, +", ) .unwrap(); @@ -395,22 +395,22 @@ cases: let dir = path.parent().unwrap().to_path_buf(); let missing_id = serde_yaml::from_str::( - r#"{label: no id, genotypes: {rs1: A/G}}"#, + r"{label: no id, genotypes: {rs1: A/G}}", ) .unwrap(); assert!(review_case_err(&missing_id).contains("missing id")); let missing_genotypes = - serde_yaml::from_str::(r#"{id: c1}"#).unwrap(); + serde_yaml::from_str::(r"{id: c1}").unwrap(); assert!(review_case_err(&missing_genotypes).contains("missing genotypes")); let bad_key = - serde_yaml::from_str::(r#"{id: c1, genotypes: {1: A/G}}"#) + serde_yaml::from_str::(r"{id: c1, genotypes: {1: A/G}}") .unwrap(); assert!(review_case_err(&bad_key).contains("non-string genotype key")); let bad_value = - serde_yaml::from_str::(r#"{id: c1, genotypes: {rs1: [A, G]}}"#) + serde_yaml::from_str::(r"{id: c1, genotypes: {rs1: [A, G]}}") .unwrap(); assert!(review_case_err(&bad_value).contains("must be string or null")); @@ -471,7 +471,7 @@ provenance: let cases = dir.join("cases.yaml"); fs::write( &cases, - r#" + r" cases: - id: case1 label: Case One @@ -480,7 +480,7 @@ cases: - id: case2 genotypes: rs1: null -"#, +", ) .unwrap(); let output = dir.join("out"); diff --git a/rust/bioscript-formats/src/genotype.rs b/rust/bioscript-formats/src/genotype.rs index b11bd18..6372657 100644 --- a/rust/bioscript-formats/src/genotype.rs +++ b/rust/bioscript-formats/src/genotype.rs @@ -567,8 +567,8 @@ mod tests { #[test] fn genotype_public_cache_wrappers_and_empty_store_cover_lookup_contracts() { - let fallback = GenotypeStore::from_bytes("fallback.txt", b"rsid\tgenotype\nrs2\tCC\n") - .unwrap(); + let fallback = + GenotypeStore::from_bytes("fallback.txt", b"rsid\tgenotype\nrs2\tCC\n").unwrap(); let cached_observation = VariantObservation { matched_rsid: Some("rs1".to_owned()), genotype: Some("AG".to_owned()), diff --git a/rust/bioscript-formats/src/genotype/cram_backend/observation.rs b/rust/bioscript-formats/src/genotype/cram_backend/observation.rs index 70b9e96..33f78c6 100644 --- a/rust/bioscript-formats/src/genotype/cram_backend/observation.rs +++ b/rust/bioscript-formats/src/genotype/cram_backend/observation.rs @@ -408,8 +408,8 @@ mod tests { let dir = fixtures_dir(); let reference = dir.join("mini.fa"); let repository = alignment::build_reference_repository(&reference).unwrap(); - let index = alignment::parse_crai_bytes(&fs::read(dir.join("mini.cram.crai")).unwrap()) - .unwrap(); + let index = + alignment::parse_crai_bytes(&fs::read(dir.join("mini.cram.crai")).unwrap()).unwrap(); alignment::build_cram_indexed_reader_from_reader( fs::File::open(dir.join("mini.cram")).unwrap(), index, diff --git a/rust/bioscript-formats/src/genotype/cram_backend/store.rs b/rust/bioscript-formats/src/genotype/cram_backend/store.rs index b57abe7..0b5fa25 100644 --- a/rust/bioscript-formats/src/genotype/cram_backend/store.rs +++ b/rust/bioscript-formats/src/genotype/cram_backend/store.rs @@ -273,13 +273,14 @@ mod tests { #[test] fn cram_store_rejects_variants_without_coordinates_before_opening_cram() { - let mut options = GenotypeLoadOptions::default(); - options.reference_file = Some(PathBuf::from("ref.fa")); - options.reference_index = Some(PathBuf::from("ref.fa.fai")); - options.input_index = Some(PathBuf::from("sample.cram.crai")); let store = CramBackend { path: PathBuf::from("sample.cram"), - options, + options: GenotypeLoadOptions { + reference_file: Some(PathBuf::from("ref.fa")), + reference_index: Some(PathBuf::from("ref.fa.fai")), + input_index: Some(PathBuf::from("sample.cram.crai")), + ..GenotypeLoadOptions::default() + }, }; let err = store diff --git a/rust/bioscript-formats/src/inspect/sex.rs b/rust/bioscript-formats/src/inspect/sex.rs index b00c846..5057165 100644 --- a/rust/bioscript-formats/src/inspect/sex.rs +++ b/rust/bioscript-formats/src/inspect/sex.rs @@ -455,6 +455,7 @@ fn select_sex_detection_zip_entry( #[cfg(test)] mod tests { use super::*; + use std::fmt::Write as _; use std::io::Write as _; fn zip_bytes(entries: &[(&str, &str)]) -> Vec { @@ -653,8 +654,9 @@ mod tests { assert_eq!(unsupported.sex, InferredSex::Unknown); assert_eq!(unsupported.method, "unsupported_source_type"); - let result = infer_sex_from_bytes("sample.txt", text.as_bytes(), DetectedKind::GenotypeText) - .unwrap(); + let result = + infer_sex_from_bytes("sample.txt", text.as_bytes(), DetectedKind::GenotypeText) + .unwrap(); assert_eq!(result.method, "snp_array_x_y_fingerprint"); let archive = zip_bytes(&[ @@ -669,10 +671,14 @@ mod tests { let err = infer_sex_from_zip_bytes(&archive, "missing.txt", DetectedKind::GenotypeText) .unwrap_err(); - assert!(err.to_string().contains("failed to open zip entry missing.txt")); + assert!( + err.to_string() + .contains("failed to open zip entry missing.txt") + ); - let bad_zip = infer_sex_from_zip_bytes(b"not a zip", "sample.txt", DetectedKind::GenotypeText) - .unwrap_err(); + let bad_zip = + infer_sex_from_zip_bytes(b"not a zip", "sample.txt", DetectedKind::GenotypeText) + .unwrap_err(); assert!(bad_zip.to_string().contains("failed to read zip bytes")); let mut zip = ZipArchive::new(Cursor::new(archive)).unwrap(); @@ -684,7 +690,10 @@ mod tests { let unsupported_zip = zip_bytes(&[("docs/readme.md", "ignored")]); let mut zip = ZipArchive::new(Cursor::new(unsupported_zip)).unwrap(); let err = select_sex_detection_zip_entry(&mut zip).unwrap_err(); - assert!(err.to_string().contains("does not contain a supported sex detection input")); + assert!( + err.to_string() + .contains("does not contain a supported sex detection input") + ); } #[test] @@ -694,18 +703,30 @@ mod tests { text.push_str("chrM\t1\t.\tC\tT\t.\tPASS\t.\tGT\t1\n"); for idx in 0..70 { let gt = if idx % 2 == 0 { "0|1" } else { "0|0" }; - text.push_str(&format!( - "23\t{}\t.\tC\tT\t.\tPASS\t.\tGT\t{gt}:99\n", + let _ = writeln!( + text, + "23\t{}\t.\tC\tT\t.\tPASS\t.\tGT\t{gt}:99", 3_000_000 + idx - )); + ); } text.push_str("24\t1\t.\tC\tT\t.\tPASS\t.\tGT\t.\n"); text.push_str("chrX\t60000\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n"); text.push_str("chrX\t155000000\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n"); - let result = infer_sex_from_bytes("sample.vcf", text.as_bytes(), DetectedKind::Vcf).unwrap(); + let result = + infer_sex_from_bytes("sample.vcf", text.as_bytes(), DetectedKind::Vcf).unwrap(); assert_eq!(result.sex, InferredSex::Female); - assert!(result.evidence.iter().any(|item| item == "x_non_par_sites=70")); - assert!(result.evidence.iter().any(|item| item == "x_het_gt_sites=35")); + assert!( + result + .evidence + .iter() + .any(|item| item == "x_non_par_sites=70") + ); + assert!( + result + .evidence + .iter() + .any(|item| item == "x_het_gt_sites=35") + ); } } diff --git a/rust/bioscript-formats/src/inspect/sex/alignment_depth.rs b/rust/bioscript-formats/src/inspect/sex/alignment_depth.rs index 51dfb9c..f476c09 100644 --- a/rust/bioscript-formats/src/inspect/sex/alignment_depth.rs +++ b/rust/bioscript-formats/src/inspect/sex/alignment_depth.rs @@ -287,10 +287,12 @@ mod tests { let female = classify_alignment_stats(&stats(800, 760, 10)); assert_eq!(female.sex, InferredSex::Female); assert_eq!(female.confidence, SexDetectionConfidence::High); - assert!(female - .evidence - .iter() - .any(|value| value == "x_to_autosome_ratio=0.950")); + assert!( + female + .evidence + .iter() + .any(|value| value == "x_to_autosome_ratio=0.950") + ); let male = classify_alignment_stats(&stats(800, 360, 120)); assert_eq!(male.sex, InferredSex::Male); @@ -311,11 +313,11 @@ mod tests { #[test] fn alignment_depth_math_handles_empty_and_saturating_inputs() { - assert_eq!(mean_records(10, 0), 0.0); - assert_eq!(mean_records(20, 4), 5.0); + assert!(mean_records(10, 0).abs() <= f64::EPSILON); + assert!((mean_records(20, 4) - 5.0).abs() <= f64::EPSILON); assert!(mean_records(usize::MAX, 1) > 4_000_000_000.0); - assert_eq!(ratio_to_autosome(10.0, 0.0), 0.0); - assert_eq!(ratio_to_autosome(5.0, 10.0), 0.5); + assert!(ratio_to_autosome(10.0, 0.0).abs() <= f64::EPSILON); + assert!((ratio_to_autosome(5.0, 10.0) - 0.5).abs() <= f64::EPSILON); } #[test] @@ -361,10 +363,10 @@ mod tests { assert!(err.to_string().contains("does not contain contig")); let repository = alignment::build_reference_repository(&reference).unwrap(); - let crai = alignment::parse_crai_bytes(&fs::read(index).unwrap()).unwrap(); + let cram_index = alignment::parse_crai_bytes(&fs::read(index).unwrap()).unwrap(); let mut reader = alignment::build_cram_indexed_reader_from_reader( fs::File::open(cram).unwrap(), - crai, + cram_index, repository, ) .unwrap(); diff --git a/rust/bioscript-reporting/src/artifacts.rs b/rust/bioscript-reporting/src/artifacts.rs index 27fd3de..61e7ca6 100644 --- a/rust/bioscript-reporting/src/artifacts.rs +++ b/rust/bioscript-reporting/src/artifacts.rs @@ -132,7 +132,11 @@ mod tests { }) .unwrap(); - assert!(artifacts.reports_jsonl.contains("\"report_status\":\"complete\"")); + assert!( + artifacts + .reports_jsonl + .contains("\"report_status\":\"complete\"") + ); assert!(artifacts.html.contains("panel")); } diff --git a/rust/bioscript-reporting/src/manifest.rs b/rust/bioscript-reporting/src/manifest.rs index 8993abc..1c3fd26 100644 --- a/rust/bioscript-reporting/src/manifest.rs +++ b/rust/bioscript-reporting/src/manifest.rs @@ -514,10 +514,10 @@ mod tests { ExecutableAssayMember, ExecutablePanelMember, ManifestWorkspace, ReportManifestKind, assay_executable_member, assay_executable_member_path, collect_analysis_manifest_tasks, collect_variant_manifest_tasks, load_manifest_findings, load_report_manifest_context, - matches_analysis_path_filters, - matches_variant_manifest_filters, panel_executable_member, panel_executable_member_path, - report_assay_id, report_manifest_kind, report_manifest_metadata, report_manifest_schema, - resolve_filesystem_manifest_path, traversable_manifest_member_paths, + matches_analysis_path_filters, matches_variant_manifest_filters, panel_executable_member, + panel_executable_member_path, report_assay_id, report_manifest_kind, + report_manifest_metadata, report_manifest_schema, resolve_filesystem_manifest_path, + traversable_manifest_member_paths, }; struct InlineWorkspace { @@ -896,9 +896,11 @@ findings: }; let findings = load_manifest_findings(&workspace, "panel.yaml").unwrap(); - assert!(findings - .iter() - .any(|finding| finding["summary"] == "panel direct")); + assert!( + findings + .iter() + .any(|finding| finding["summary"] == "panel direct") + ); let inherited = findings .iter() .find(|finding| finding["summary"] == "included inherited") diff --git a/rust/bioscript-reporting/src/manifest_provenance.rs b/rust/bioscript-reporting/src/manifest_provenance.rs index defddf8..f7ba225 100644 --- a/rust/bioscript-reporting/src/manifest_provenance.rs +++ b/rust/bioscript-reporting/src/manifest_provenance.rs @@ -48,8 +48,7 @@ fn provenance_source_key(source: &serde_json::Value) -> String { .get("url") .and_then(serde_json::Value::as_str) .or_else(|| source.get("label").and_then(serde_json::Value::as_str)) - .map(ToOwned::to_owned) - .unwrap_or_else(|| source.to_string()) + .map_or_else(|| source.to_string(), ToOwned::to_owned) } #[cfg(test)] diff --git a/rust/bioscript-reporting/src/matching.rs b/rust/bioscript-reporting/src/matching.rs index 0978afc..d778100 100644 --- a/rust/bioscript-reporting/src/matching.rs +++ b/rust/bioscript-reporting/src/matching.rs @@ -529,33 +529,31 @@ mod report_matching_tests { #[test] fn match_app_findings_matches_variant_effects_and_deduplicates_evidence() { - let findings = vec![ - serde_json::json!({ - "schema": "bioscript:finding:1.0", - "label": "Repeated", - "evidence": {"source": "db", "kind": "guideline", "id": "cpic-1"}, - "effects": [ - { - "id": "effect-a", - "binding": { - "source": "variant", - "variant": "variants/rs1.yaml", - "key": "outcome", - "value": "variant" - } - }, - { - "id": "effect-a", - "binding": { - "source": "variant", - "variant": "rs1.yaml", - "key": "outcome", - "value": "variant" - } + let findings = vec![serde_json::json!({ + "schema": "bioscript:finding:1.0", + "label": "Repeated", + "evidence": {"source": "db", "kind": "guideline", "id": "cpic-1"}, + "effects": [ + { + "id": "effect-a", + "binding": { + "source": "variant", + "variant": "variants/rs1.yaml", + "key": "outcome", + "value": "variant" } - ] - }) - ]; + }, + { + "id": "effect-a", + "binding": { + "source": "variant", + "variant": "rs1.yaml", + "key": "outcome", + "value": "variant" + } + } + ] + })]; let observations = vec![serde_json::json!({ "participant_id": "p1", "variant_key": "rs1", @@ -598,7 +596,7 @@ mod report_matching_tests { "key": "score", "value": 2 } - }) + }), ]; let analyses = vec![ serde_json::json!({ @@ -615,7 +613,7 @@ mod report_matching_tests { "assay_id": "assay", "analysis_id": "other", "rows": "not rows" - }) + }), ]; let matched = match_app_findings(&findings, &[], &analyses); @@ -707,30 +705,36 @@ mod report_matching_tests { "genotype_display": "AG", "zygosity": "het" })]; - assert!(app_variant_binding_match_observation( - &serde_json::json!({"source": "variant"}), - &observations - ) - .is_none()); - assert!(app_variant_binding_match_observation( - &serde_json::json!({ - "source": "variant", - "key": "alt", - "operator": "unknown", - "value": "G" - }), - &observations - ) - .is_none()); - assert!(app_variant_binding_match_observation( - &serde_json::json!({ - "source": "variant", - "allele": "", - "operator": "dosage_equals", - "value": 1 - }), - &observations - ) - .is_none()); + assert!( + app_variant_binding_match_observation( + &serde_json::json!({"source": "variant"}), + &observations + ) + .is_none() + ); + assert!( + app_variant_binding_match_observation( + &serde_json::json!({ + "source": "variant", + "key": "alt", + "operator": "unknown", + "value": "G" + }), + &observations + ) + .is_none() + ); + assert!( + app_variant_binding_match_observation( + &serde_json::json!({ + "source": "variant", + "allele": "", + "operator": "dosage_equals", + "value": 1 + }), + &observations + ) + .is_none() + ); } } diff --git a/rust/bioscript-reporting/src/observation.rs b/rust/bioscript-reporting/src/observation.rs index 5db3cd7..3023f83 100644 --- a/rust/bioscript-reporting/src/observation.rs +++ b/rust/bioscript-reporting/src/observation.rs @@ -599,7 +599,12 @@ mod tests { ); } - fn manifest(kind: VariantKind, chrom: &str, reference: &str, alternate: &str) -> VariantManifest { + fn manifest( + kind: VariantKind, + chrom: &str, + reference: &str, + alternate: &str, + ) -> VariantManifest { VariantManifest { path: PathBuf::from("variants/rs1.yaml"), name: "rs1".to_owned(), @@ -731,10 +736,12 @@ mod tests { assert_eq!(observation["outcome"], "observed_alt"); assert_eq!(observation["call_status"], "observed_alt"); assert_eq!(observation["facets"], "observed_alt;known_observed_alts=T"); - assert!(observation["evidence_raw"] - .as_str() - .unwrap() - .contains("detected_sex=male")); + assert!( + observation["evidence_raw"] + .as_str() + .unwrap() + .contains("detected_sex=male") + ); } #[test] @@ -761,9 +768,11 @@ mod tests { assert_eq!(observation["kind"], "deletion"); assert_eq!(observation["genotype"], "0/1"); assert_eq!(observation["match_quality"], "weak"); - assert!(observation["match_notes"] - .as_str() - .unwrap() - .contains("insertion/deletion token")); + assert!( + observation["match_notes"] + .as_str() + .unwrap() + .contains("insertion/deletion token") + ); } } diff --git a/rust/bioscript-reporting/src/report_json.rs b/rust/bioscript-reporting/src/report_json.rs index f4e7604..1652809 100644 --- a/rust/bioscript-reporting/src/report_json.rs +++ b/rust/bioscript-reporting/src/report_json.rs @@ -321,7 +321,10 @@ mod tests { fn enum_name_helpers_cover_all_known_values() { assert_eq!(super::file_container_name(FileContainer::Plain), "plain"); assert_eq!(super::file_container_name(FileContainer::Zip), "zip"); - assert_eq!(super::detected_kind_name(DetectedKind::GenotypeText), "genotype_text"); + assert_eq!( + super::detected_kind_name(DetectedKind::GenotypeText), + "genotype_text" + ); assert_eq!(super::detected_kind_name(DetectedKind::Vcf), "vcf"); assert_eq!( super::detected_kind_name(DetectedKind::AlignmentCram), diff --git a/rust/bioscript-schema/src/remote_resource.rs b/rust/bioscript-schema/src/remote_resource.rs index b1ea550..250b1a1 100644 --- a/rust/bioscript-schema/src/remote_resource.rs +++ b/rust/bioscript-schema/src/remote_resource.rs @@ -343,15 +343,14 @@ metadata: assert_eq!(resolved.version.as_deref(), Some("1.0")); assert_eq!(resolved.sha256, sha256_hex(text.as_bytes())); assert_eq!(resolved.dependencies.len(), 3); - assert!(resolved - .dependencies - .iter() - .any(|dep| dep.kind == "member" - && dep.url == "https://github.com/org/repo/blob/main/panels/variants/rs1.yaml")); - assert!(resolved - .dependencies - .iter() - .any(|dep| dep.kind == "download" && dep.version.as_deref() == Some("v2"))); + assert!(resolved.dependencies.iter().any(|dep| dep.kind == "member" + && dep.url == "https://github.com/org/repo/blob/main/panels/variants/rs1.yaml")); + assert!( + resolved + .dependencies + .iter() + .any(|dep| dep.kind == "download" && dep.version.as_deref() == Some("v2")) + ); } #[test] @@ -406,8 +405,7 @@ metadata: Some("https://github.com/org/repo/blob/main/variants/rs1.yaml") ); assert_eq!( - resolve_resource_url("https://example.test/a/b/panel.yaml", "../v/rs1.yaml") - .as_deref(), + resolve_resource_url("https://example.test/a/b/panel.yaml", "../v/rs1.yaml").as_deref(), Some("https://example.test/a/v/rs1.yaml") ); assert!(resolve_resource_url("not a url", "relative.yaml").is_none()); @@ -415,15 +413,21 @@ metadata: #[test] fn parse_structured_text_reports_yaml_and_json_errors() { - assert!(parse_structured_text("bad.yaml", "{") - .unwrap_err() - .contains("failed to parse YAML")); - assert!(parse_structured_text("bad.json", "{") - .unwrap_err() - .contains("failed to parse JSON")); - assert!(parse_structured_text("notes.txt", "not structured") - .unwrap() - .is_none()); + assert!( + parse_structured_text("bad.yaml", "{") + .unwrap_err() + .contains("failed to parse YAML") + ); + assert!( + parse_structured_text("bad.json", "{") + .unwrap_err() + .contains("failed to parse JSON") + ); + assert!( + parse_structured_text("notes.txt", "not structured") + .unwrap() + .is_none() + ); assert!(has_extension("PANEL.YAML", &["yaml"])); } } diff --git a/rust/bioscript-schema/src/validator_alleles.rs b/rust/bioscript-schema/src/validator_alleles.rs index 205cf2d..c05e581 100644 --- a/rust/bioscript-schema/src/validator_alleles.rs +++ b/rust/bioscript-schema/src/validator_alleles.rs @@ -209,13 +209,13 @@ alleles: #[test] fn allele_validator_accepts_indel_observed_alt_superset() { let root = yaml( - r#" + r" alleles: kind: indel ref: AT alts: [A] observed_alts: [A, ATT] -"#, +", ); let mut issues = Vec::new(); validate_alleles(&root, &mut issues); diff --git a/rust/bioscript-schema/src/validator_findings.rs b/rust/bioscript-schema/src/validator_findings.rs index 8296c30..1e2e278 100644 --- a/rust/bioscript-schema/src/validator_findings.rs +++ b/rust/bioscript-schema/src/validator_findings.rs @@ -361,7 +361,7 @@ findings: #[test] fn effect_validation_covers_non_sequence_and_nested_effect_bindings() { let root = yaml( - r#" + r" findings: - schema: bioscript:trait:1.0 summary: ok @@ -376,7 +376,7 @@ findings: operator: dosage_in allele: A values: [] -"#, +", ); let mut issues = Vec::new(); validate_findings(&root, &mut issues); diff --git a/rust/bioscript-schema/src/validator_resources.rs b/rust/bioscript-schema/src/validator_resources.rs index 6398a74..9d91d93 100644 --- a/rust/bioscript-schema/src/validator_resources.rs +++ b/rust/bioscript-schema/src/validator_resources.rs @@ -207,14 +207,14 @@ provenance: #[test] fn permissions_validation_reports_origin_shape_duplicates_and_bad_urls() { let root = yaml( - r#" + r" permissions: domains: - https://example.com - https://example.com - 3 - ftp://example.com -"#, +", ); let mut issues = Vec::new(); validate_permissions(&root, &mut issues);