From 15c15f62cd8982ba5995875c6a442e069c913e46 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 12 Aug 2025 14:32:57 -0400 Subject: [PATCH 01/52] first commit for custom/fingerprintvcfparser --- .../fingerprintvcfparser/environment.yml | 10 +++ .../msk/custom/fingerprintvcfparser/main.nf | 49 ++++++++++++ .../msk/custom/fingerprintvcfparser/meta.yml | 37 +++++++++ .../usr/bin/parse_fingerprint_vcf.py | 70 +++++++++++++++++ .../fingerprintvcfparser/tests/main.nf.test | 77 +++++++++++++++++++ 5 files changed, 243 insertions(+) create mode 100644 modules/msk/custom/fingerprintvcfparser/environment.yml create mode 100644 modules/msk/custom/fingerprintvcfparser/main.nf create mode 100644 modules/msk/custom/fingerprintvcfparser/meta.yml create mode 100755 modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py create mode 100644 modules/msk/custom/fingerprintvcfparser/tests/main.nf.test diff --git a/modules/msk/custom/fingerprintvcfparser/environment.yml b/modules/msk/custom/fingerprintvcfparser/environment.yml new file mode 100644 index 00000000..cc119fe8 --- /dev/null +++ b/modules/msk/custom/fingerprintvcfparser/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # TODO nf-core: List required Conda package(s). + # Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + # For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + - "bioconda::pysam=0.23.3" diff --git a/modules/msk/custom/fingerprintvcfparser/main.nf b/modules/msk/custom/fingerprintvcfparser/main.nf new file mode 100644 index 00000000..78beae2a --- /dev/null +++ b/modules/msk/custom/fingerprintvcfparser/main.nf @@ -0,0 +1,49 @@ +process CUSTOM_FINGERPRINTVCFPARSER { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pysam:0.23.0--py39hdd5828d_0': + 'biocontainers/pysam:0.23.0--py39hdd5828d_0' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + parse_fingerprint_vcf.py \\ + --input ${vcf} \\ + --output ${prefix}.fp.tsv \\ + --samplename ${prefix} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + parse_fingerprint_vcf.py: 0.1.0 + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo $args + + touch ${prefix}.fp.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + parse_fingerprint_vcf.py: 0.1.0 + END_VERSIONS + """ +} diff --git a/modules/msk/custom/fingerprintvcfparser/meta.yml b/modules/msk/custom/fingerprintvcfparser/meta.yml new file mode 100644 index 00000000..453b71bd --- /dev/null +++ b/modules/msk/custom/fingerprintvcfparser/meta.yml @@ -0,0 +1,37 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +# # TODO nf-core: Add a description of the module and list keywords +name: "custom_fingerprintvcfparser" +description: write your description here +keywords: +- sort +- example +- genomics +tools: +## TODO nf-core: Add a description and other details for the software below +- "custom": + description: "Pysam is a Python module for reading and manipulating SAM/BAM/VCF/BCF + files. It's a lightweight wrapper of the htslib C-API, the same one that powers + samtools, bcftools, and tabix." + homepage: "None" + documentation: "None" + tool_dev_url: "None" + doi: "" + licence: ['MIT'] + identifier: biotools:pysam + +input: +# TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct + [] +output: +# TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct + versions: - + versions.yml: + type: file + description: File containing software versions + pattern: versions.yml + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: +- "@anoronh4" +maintainers: +- "@anoronh4" diff --git a/modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py b/modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py new file mode 100755 index 00000000..851bd498 --- /dev/null +++ b/modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +import argparse + +""" +Converts fingerprint vcf to a formatted table +""" + +__author__ = "Anne Marie Noronha" +__email__ = "noronhaa@mskcc.org" +__version__ = "0.1.0" +__status__ = "Dev" + +import sys, os +from pysam import VariantFile # version >= 0.15.2 +from itertools import groupby + +def usage(): + parser = argparse.ArgumentParser() + parser.add_argument('--input','-i', help = 'input file', required = True) + parser.add_argument('--samplename','-n', help = 'sample name', required = True) + parser.add_argument('--output','-o', help = 'output file', required = True) + parser.add_argument('--depth-filter','-d', default = 20, type = int, help = 'minimum read depth for outputting a minor allele frequency [default = 20]') + return parser.parse_args() + +def main(): + args = usage() + + fp_out_list = [] + + vcf_in = VariantFile(args.input, "r") + for vcf_rec in vcf_in.fetch(): + ref_allele = vcf_rec.ref + alt_allele = vcf_rec.alts[0] + ref_allele_count = vcf_rec.samples[args.samplename]["RD"] + alt_allele_count = vcf_rec.samples[args.samplename]["AD"] + if ref_allele_count >= alt_allele_count and ref_allele_count > 0: + maf = alt_allele_count / float(ref_allele_count + alt_allele_count) + if maf < .1: + genotype = ref_allele*2 + else: + genotype = ref_allele + alt_allele + elif alt_allele_count > ref_allele_count: + maf = ref_allele_count / float(ref_allele_count + alt_allele_count) + if maf < .1: + genotype = alt_allele*2 + #else: genotype = alt_allele + ref_allele + else: + genotype = ref_allele + alt_allele + elif ref_allele_count == 0: + genotype = "--" + else: + genotype = ref_allele + alt_allele + if ref_allele_count + alt_allele_count < args.depth_filter or genotype == "--": + maf = "" + + + formatted_counts = "{}:{} {}:{}".format(ref_allele,ref_allele_count,alt_allele,alt_allele_count) + + locus = "{}:{}".format(vcf_rec.chrom,vcf_rec.pos) + depth = vcf_rec.samples[args.samplename]["DP"] + + fp_out_list += [[locus,formatted_counts, genotype, maf]] + + with open(args.output,'w') as f: + f.write("\t".join(['Locus', args.samplename + '_Counts', args.samplename + '_Genotypes', args.samplename + '_MinorAlleleFreq']) + "\n") + for i in fp_out_list: + f.write("\t".join([str(j) for j in i]) + "\n") + +if __name__ == "__main__": + main() diff --git a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test new file mode 100644 index 00000000..e8254d24 --- /dev/null +++ b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test @@ -0,0 +1,77 @@ +// nf-core modules test custom/fingerprintvcfparser +nextflow_process { + + name "Test Process CUSTOM_FINGERPRINTVCFPARSER" + script "../main.nf" + process "CUSTOM_FINGERPRINTVCFPARSER" + + tag "modules" + tag "modules_msk" + tag "custom" + tag "custom/fingerprintvcfparser" + tag "gbcms" + + test("sarscov2 - vcf") { + setup { + run("GBCMS"){ + script "../../gbcms/main.nf" + process { + """ + input[0] = [ + [ id:'test', sample:'197' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ] + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + } + when { + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. + test("sarscov2 - vcf - stub") { + + options "-stub" + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} From 785cd7c1696b6c9177ddd2a361193f36b26449f7 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 12 Aug 2025 15:42:12 -0400 Subject: [PATCH 02/52] update module and nf-test --- .../msk/custom/fingerprintvcfparser/main.nf | 8 +-- .../fingerprintvcfparser/tests/main.nf.test | 13 ++-- .../tests/main.nf.test.snap | 72 +++++++++++++++++++ 3 files changed, 84 insertions(+), 9 deletions(-) create mode 100644 modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap diff --git a/modules/msk/custom/fingerprintvcfparser/main.nf b/modules/msk/custom/fingerprintvcfparser/main.nf index 78beae2a..53e7d7b2 100644 --- a/modules/msk/custom/fingerprintvcfparser/main.nf +++ b/modules/msk/custom/fingerprintvcfparser/main.nf @@ -23,9 +23,9 @@ process CUSTOM_FINGERPRINTVCFPARSER { """ parse_fingerprint_vcf.py \\ --input ${vcf} \\ - --output ${prefix}.fp.tsv \\ - --samplename ${prefix} \\ - $args + --output ${prefix}.fp.tsv \\ + --samplename ${prefix} \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -38,7 +38,7 @@ process CUSTOM_FINGERPRINTVCFPARSER { def prefix = task.ext.prefix ?: "${meta.id}" """ echo $args - + touch ${prefix}.fp.tsv cat <<-END_VERSIONS > versions.yml diff --git a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test index e8254d24..3d5f6a03 100644 --- a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test +++ b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test @@ -1,3 +1,4 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: // nf-core modules test custom/fingerprintvcfparser nextflow_process { @@ -11,14 +12,15 @@ nextflow_process { tag "custom/fingerprintvcfparser" tag "gbcms" + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used test("sarscov2 - vcf") { setup { run("GBCMS"){ - script "../../gbcms/main.nf" + script "../../../gbcms/main.nf" process { """ input[0] = [ - [ id:'test', sample:'197' ], // meta map + [ id:'test', sample:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), @@ -33,6 +35,7 @@ nextflow_process { when { process { """ + // TODO nf-core: define inputs of the process here. Example: input[0] = GBCMS.out.variant_file """ } @@ -42,6 +45,8 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. ) } @@ -55,11 +60,9 @@ nextflow_process { when { process { """ - // TODO nf-core: define inputs of the process here. Example: - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ] """ } diff --git a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap new file mode 100644 index 00000000..5751d885 --- /dev/null +++ b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - vcf": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.fp.tsv:md5,9fa9a081f17ee52f03463c96d46a23aa" + ] + ], + "1": [ + "versions.yml:md5,c8c9b3fa1b9110ca83a71490a317f2fa" + ], + "tsv": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.fp.tsv:md5,9fa9a081f17ee52f03463c96d46a23aa" + ] + ], + "versions": [ + "versions.yml:md5,c8c9b3fa1b9110ca83a71490a317f2fa" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-12T15:07:39.656085692" + }, + "sarscov2 - vcf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fp.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,c8c9b3fa1b9110ca83a71490a317f2fa" + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fp.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,c8c9b3fa1b9110ca83a71490a317f2fa" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-12T15:07:46.098292727" + } +} \ No newline at end of file From 7f47996637653701c5ce1ec8b052f4a080220f39 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 13 Aug 2025 23:43:18 -0400 Subject: [PATCH 03/52] set enable_conda to false --- modules/msk/custom/fingerprintvcfparser/tests/main.nf.test | 1 + modules/msk/custom/fingerprintvcfparser/tests/nextflow.config | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 modules/msk/custom/fingerprintvcfparser/tests/nextflow.config diff --git a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test index 3d5f6a03..574668ea 100644 --- a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test +++ b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test @@ -14,6 +14,7 @@ nextflow_process { // TODO nf-core: Change the test name preferably indicating the test-data and file-format used test("sarscov2 - vcf") { + config "./nextflow.config" setup { run("GBCMS"){ script "../../../gbcms/main.nf" diff --git a/modules/msk/custom/fingerprintvcfparser/tests/nextflow.config b/modules/msk/custom/fingerprintvcfparser/tests/nextflow.config new file mode 100644 index 00000000..f2cf46a3 --- /dev/null +++ b/modules/msk/custom/fingerprintvcfparser/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + enable_conda = false +} From af7d9028759724ac12a55a2f2347d8c60416a23b Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 13 Aug 2025 23:44:00 -0400 Subject: [PATCH 04/52] update meta.yml --- .../msk/custom/fingerprintvcfparser/main.nf | 4 +- .../msk/custom/fingerprintvcfparser/meta.yml | 59 ++++++++++++------- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/modules/msk/custom/fingerprintvcfparser/main.nf b/modules/msk/custom/fingerprintvcfparser/main.nf index 53e7d7b2..2e1f4a4b 100644 --- a/modules/msk/custom/fingerprintvcfparser/main.nf +++ b/modules/msk/custom/fingerprintvcfparser/main.nf @@ -11,7 +11,7 @@ process CUSTOM_FINGERPRINTVCFPARSER { tuple val(meta), path(vcf) output: - tuple val(meta), path("*.tsv"), emit: tsv + tuple val(meta), path("${prefix}.fp.tsv"), emit: tsv path "versions.yml" , emit: versions when: @@ -19,7 +19,7 @@ process CUSTOM_FINGERPRINTVCFPARSER { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" """ parse_fingerprint_vcf.py \\ --input ${vcf} \\ diff --git a/modules/msk/custom/fingerprintvcfparser/meta.yml b/modules/msk/custom/fingerprintvcfparser/meta.yml index 453b71bd..d4ae3ed7 100644 --- a/modules/msk/custom/fingerprintvcfparser/meta.yml +++ b/modules/msk/custom/fingerprintvcfparser/meta.yml @@ -1,36 +1,55 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -# # TODO nf-core: Add a description of the module and list keywords name: "custom_fingerprintvcfparser" -description: write your description here +description: Custom script to parse fingerprint VCF files, generated by the GBCMS module. keywords: -- sort -- example -- genomics +- custom +- fingerprint +- vcf +- pysam tools: -## TODO nf-core: Add a description and other details for the software below - "custom": description: "Pysam is a Python module for reading and manipulating SAM/BAM/VCF/BCF files. It's a lightweight wrapper of the htslib C-API, the same one that powers samtools, bcftools, and tabix." - homepage: "None" - documentation: "None" - tool_dev_url: "None" - doi: "" + homepage: "https://pysam.readthedocs.io/en/latest/api.html" + documentation: "https://pysam.readthedocs.io/en/latest/api.html" + tool_dev_url: "https://github.com/pysam-developers/pysam" licence: ['MIT'] identifier: biotools:pysam input: -# TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct - [] + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - vcf: + type: file + description: Fasta file containing scaffold + pattern: "*.vcf" + ontologies: + - edam: http://edamontology.org/format_3016 # VCF output: -# TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: versions.yml - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + tsv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}.fp.tsv: + type: file + description: Tab-separated values (TSV) file containing parsed fingerprint data + pattern: "${prefix}.fp.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: versions.yml + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@anoronh4" maintainers: From ed05eb0198f1c4ff27165320c87975d2bf9128ec Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 13 Aug 2025 23:47:16 -0400 Subject: [PATCH 05/52] remove TODO lines --- modules/msk/custom/fingerprintvcfparser/environment.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/modules/msk/custom/fingerprintvcfparser/environment.yml b/modules/msk/custom/fingerprintvcfparser/environment.yml index cc119fe8..a5547b5c 100644 --- a/modules/msk/custom/fingerprintvcfparser/environment.yml +++ b/modules/msk/custom/fingerprintvcfparser/environment.yml @@ -4,7 +4,4 @@ channels: - conda-forge - bioconda dependencies: - # TODO nf-core: List required Conda package(s). - # Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - # For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - "bioconda::pysam=0.23.3" From 5dc010eb578721a1ebfe0c7f581b4bfcc5027af7 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 13 Aug 2025 23:48:33 -0400 Subject: [PATCH 06/52] remove more TODO lines --- modules/msk/custom/fingerprintvcfparser/tests/main.nf.test | 6 ------ 1 file changed, 6 deletions(-) diff --git a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test index 574668ea..e4454cba 100644 --- a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test +++ b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test @@ -1,4 +1,3 @@ -// TODO nf-core: Once you have added the required tests, please run the following command to build this file: // nf-core modules test custom/fingerprintvcfparser nextflow_process { @@ -12,7 +11,6 @@ nextflow_process { tag "custom/fingerprintvcfparser" tag "gbcms" - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used test("sarscov2 - vcf") { config "./nextflow.config" setup { @@ -36,7 +34,6 @@ nextflow_process { when { process { """ - // TODO nf-core: define inputs of the process here. Example: input[0] = GBCMS.out.variant_file """ } @@ -46,14 +43,11 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot(process.out).match() } - //TODO nf-core: Add all required assertions to verify the test output. - // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. ) } } - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. test("sarscov2 - vcf - stub") { options "-stub" From 112ac67c9ac0e5366a85a6c3db7877fc9da75383 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 13 Aug 2025 23:53:24 -0400 Subject: [PATCH 07/52] add module to skipped nf-tests for conda --- .github/skip_nf_test.json | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/skip_nf_test.json b/.github/skip_nf_test.json index 4ea16616..25ca5dce 100644 --- a/.github/skip_nf_test.json +++ b/.github/skip_nf_test.json @@ -1,5 +1,6 @@ { "conda": [ + "modules/msk/custom/fingerprintvcfparser", "modules/msk/calculatenoise", "modules/msk/ppflagfixer", "modules/msk/facets", From e09936f8569542d0bcad713a832bfa5e930a3e33 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 13 Aug 2025 23:56:03 -0400 Subject: [PATCH 08/52] bugfix --- modules/msk/custom/fingerprintvcfparser/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/msk/custom/fingerprintvcfparser/main.nf b/modules/msk/custom/fingerprintvcfparser/main.nf index 2e1f4a4b..6a46b512 100644 --- a/modules/msk/custom/fingerprintvcfparser/main.nf +++ b/modules/msk/custom/fingerprintvcfparser/main.nf @@ -35,7 +35,7 @@ process CUSTOM_FINGERPRINTVCFPARSER { stub: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" """ echo $args From a8805ae4765c93276ac9e69ede0b858044734099 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 14 Aug 2025 19:51:39 -0400 Subject: [PATCH 09/52] add subworkflow for generating fingerprints with gbcms --- subworkflows/msk/fingerprint_gbcms/main.nf | 35 +++++++++++ subworkflows/msk/fingerprint_gbcms/meta.yml | 62 +++++++++++++++++++ .../msk/fingerprint_gbcms/tests/main.nf.test | 43 +++++++++++++ .../fingerprint_gbcms/tests/main.nf.test.snap | 39 ++++++++++++ 4 files changed, 179 insertions(+) create mode 100644 subworkflows/msk/fingerprint_gbcms/main.nf create mode 100644 subworkflows/msk/fingerprint_gbcms/meta.yml create mode 100644 subworkflows/msk/fingerprint_gbcms/tests/main.nf.test create mode 100644 subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap diff --git a/subworkflows/msk/fingerprint_gbcms/main.nf b/subworkflows/msk/fingerprint_gbcms/main.nf new file mode 100644 index 00000000..d121fd55 --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms/main.nf @@ -0,0 +1,35 @@ +include { GBCMS } from '../../../modules/msk/gbcms/main' +include { CUSTOM_FINGERPRINTVCFPARSER } from '../../../modules/msk/custom/fingerprintvcfparser/main' + +workflow FINGERPRINT_GBCMS { + + take: + ch_bam // channel: [ val(meta), [ bam ] ] + ch_bai // channel: [ val(meta), [ bai ] ] + ch_fp_vcf // channel: [ val(meta), [ vcf ] ] + ch_fasta // channel: [ fasta ] + ch_fastafai // channel: [ fastafai ] + + main: + + ch_versions = Channel.empty() + + GBCMS ( + ch_bam + .combine(ch_bai, by:[0]) + .combine(ch_fp_vcf.map{ if (it.size() > 1){ it[1] } else { it }}.first()) + .map{ meta, bam, bai, vcf -> [meta, bam, bai, vcf, meta.id + ".fp.vcf" ] }, + ch_fasta.first(), + ch_fastafai.first() + //ch_fasta.view().map{ if (it[0] instanceof Map){ it[1] } else { it }}.first(), + //ch_fastafai.view().map{ if (it[0] instanceof Map){ it[1] } else { it }}.first() + ) + ch_versions = ch_versions.mix(GBCMS.out.versions.first()) + + CUSTOM_FINGERPRINTVCFPARSER ( GBCMS.out.variant_file ) + ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTVCFPARSER.out.versions.first()) + + emit: + fp_tsv = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/msk/fingerprint_gbcms/meta.yml b/subworkflows/msk/fingerprint_gbcms/meta.yml new file mode 100644 index 00000000..f90038e5 --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms/meta.yml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fingerprint_gbcms" +description: | + Get base counts for all fingerprinting sites from BAM/CRAM/SAM files using the GBCMS module, + and parse the resulting VCF files into standardized TSV format using a custom parser. +keywords: + - fingerprint + - fingerprinting + - loci + - vcf + - bam +components: + - gbcms + - custom/fingerprintvcfparser +input: + - ch_bam: + type: file + description: | + The input channel containing the BAM/CRAM/SAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.{bam/cram/sam}" + - ch_bai: + type: file + description: | + The input channel containing the BAM index files (BAI/CSI) + Structure: [ val(meta), path(bai) ] # or path(csi) + pattern: "*.{bai/csi}" + - ch_fp_vcf: + type: file + description: | + Channel containing fingerprint VCF files + Structure: [ val(meta), path(vcf) ] + pattern: "*.vcf" + - ch_fasta: + type: file + description: | + Channel containing reference FASTA files + Structure: [ path(fasta) ] + pattern: "*.{fasta,fa}" + - ch_fastafai: + type: file + description: | + Channel containing reference FASTA index files + Structure: [ path(fasta.fai) ] + pattern: "*.{fasta,fa}.fai" +output: + - tsv: + type: file + description: | + Channel containing standardized fingerprint TSV files + Structure: [ val(meta), path(tsv) ] + pattern: "*.fp.tsv" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@anoronh4" +maintainers: + - "@anoronh4" diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test new file mode 100644 index 00000000..55de93ca --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test @@ -0,0 +1,43 @@ +nextflow_workflow { + + name "Test Subworkflow FINGERPRINT_GBCMS" + script "../main.nf" + workflow "FINGERPRINT_GBCMS" + + tag "subworkflows" + tag "subworkflows_msk" + tag "subworkflows/fingerprint_gbcms" + tag "gbcms" + tag "custom/fingerprintvcfparser" + + test("sarscov2 - bam") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + ]) + input[2] = Channel.of([ + [:], + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + ]) + input[3] = Channel.of(file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)) + input[4] = Channel.of(file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } +} diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap new file mode 100644 index 00000000..9952b0bd --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap @@ -0,0 +1,39 @@ +{ + "sarscov2 - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.fp.tsv:md5,9fa9a081f17ee52f03463c96d46a23aa" + ] + ], + "1": [ + "versions.yml:md5,41ff30ed71b1d19e95a6095b9ac3ca94", + "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8" + ], + "fp_tsv": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.fp.tsv:md5,9fa9a081f17ee52f03463c96d46a23aa" + ] + ], + "versions": [ + "versions.yml:md5,41ff30ed71b1d19e95a6095b9ac3ca94", + "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-14T19:50:44.771060618" + } +} \ No newline at end of file From c9cf10ffa2977bd2038fa8be3e80baf295d827be Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 14 Aug 2025 20:00:11 -0400 Subject: [PATCH 10/52] add subworkflow to skipped nf-tests for conda --- .github/skip_nf_test.json | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/skip_nf_test.json b/.github/skip_nf_test.json index 25ca5dce..2991ca79 100644 --- a/.github/skip_nf_test.json +++ b/.github/skip_nf_test.json @@ -29,6 +29,7 @@ "modules/msk/phylowgs/parsecnvs", "modules/msk/pvmaf/concat", "modules/msk/pvmaf/tagtraceback", + "subworkflows/msk/fingerprint_gbcms", "subworkflows/msk/genome_nexus" ], "docker": [ From e3a7bedd6b5b5933c04c481ff439a9a0e2a249c8 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 25 Sep 2025 17:51:43 -0400 Subject: [PATCH 11/52] add fingerprint contamination module --- .../fingerprintcontamination/environment.yml | 7 ++ .../custom/fingerprintcontamination/main.nf | 47 ++++++++ .../custom/fingerprintcontamination/meta.yml | 60 ++++++++++ .../usr/bin/calculate_contamination.py | 100 ++++++++++++++++ .../tests/main.nf.test | 108 +++++++++++++++++ .../tests/main.nf.test.snap | 72 +++++++++++ .../tests/nextflow.config | 10 ++ .../tests/stash_main.nf.test_stash | 113 ++++++++++++++++++ subworkflows/msk/fingerprint_gbcms/main.nf | 40 ++++++- subworkflows/msk/fingerprint_gbcms/meta.yml | 1 + 10 files changed, 552 insertions(+), 6 deletions(-) create mode 100644 modules/msk/custom/fingerprintcontamination/environment.yml create mode 100644 modules/msk/custom/fingerprintcontamination/main.nf create mode 100644 modules/msk/custom/fingerprintcontamination/meta.yml create mode 100755 modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py create mode 100644 modules/msk/custom/fingerprintcontamination/tests/main.nf.test create mode 100644 modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap create mode 100644 modules/msk/custom/fingerprintcontamination/tests/nextflow.config create mode 100644 modules/msk/custom/fingerprintcontamination/tests/stash_main.nf.test_stash diff --git a/modules/msk/custom/fingerprintcontamination/environment.yml b/modules/msk/custom/fingerprintcontamination/environment.yml new file mode 100644 index 00000000..21c00633 --- /dev/null +++ b/modules/msk/custom/fingerprintcontamination/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - numpy=2.3.3 + - pandas=2.3.2 diff --git a/modules/msk/custom/fingerprintcontamination/main.nf b/modules/msk/custom/fingerprintcontamination/main.nf new file mode 100644 index 00000000..bbdcdb51 --- /dev/null +++ b/modules/msk/custom/fingerprintcontamination/main.nf @@ -0,0 +1,47 @@ +process CUSTOM_FINGERPRINTCONTAMINATION { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + //'oras://community.wave.seqera.io/library/numpy_pandas:1f8cb70bfdb82865': + 'docker://community.wave.seqera.io/library/numpy_pandas:f27ed83387b3c038': + 'community.wave.seqera.io/library/numpy_pandas:f27ed83387b3c038' }" + + input: + tuple val(meta), path(fp_tumor), path(fp_normal) + + output: + tuple val(meta), path("*.contamination.tsv"), emit: contamination_tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + calculate_contamination.py \\ + -t ${fp_tumor} \\ + -n ${fp_normal ?: fp_tumor} \\ + -o ${prefix}.contamination.tsv \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + calculate_contamination.py: \$( calculate_contamination.py --version | rev | cut -f 1 -d " " | rev ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.contamination.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + calculate_contamination.py: \$( calculate_contamination.py --version | rev | cut -f 1 -d " " | rev ) + END_VERSIONS + """ +} diff --git a/modules/msk/custom/fingerprintcontamination/meta.yml b/modules/msk/custom/fingerprintcontamination/meta.yml new file mode 100644 index 00000000..4fde47a5 --- /dev/null +++ b/modules/msk/custom/fingerprintcontamination/meta.yml @@ -0,0 +1,60 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "custom_fingerprintcontamination" +description: "Calculate major and minor contamination from fingerprint tables" +version: "0.1.0" +keywords: + - fingerprint + - contamination + - qc +tools: + - "pandas": + description: "Python Data Analysis Library" + homepage: "https://pandas.pydata.org/" + documentation: "https://pandas.pydata.org/docs/" + - "numpy": + description: "Scientific computing library for Python" + homepage: "https://numpy.org/" + documentation: "https://numpy.org/doc/" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fp_tumor: + type: file + description: Fingerprint table file for tumor sample + pattern: "*.fp.tsv" + ontologies: + - edam: "http://edamontology.org/format_3750" # TSV + - fp_normal: + type: file + description: Fingerprint table file for normal sample + pattern: "*.fp.tsv" + ontologies: + - edam: "http://edamontology.org/format_3750" + +output: + - contamination_tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.contamination.tsv": + type: file + description: Contamination results table + pattern: "*.contamination.tsv" + ontologies: + - edam: "http://edamontology.org/format_3750" # TSV + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@anoronh4" +maintainers: + - "@anoronh4" diff --git a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py new file mode 100755 index 00000000..dea92003 --- /dev/null +++ b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python + + +""" +Calculates contamination from fingerprint table +""" + +__author__ = "Hanan Salim" +__email__ = "salimh@mskcc.org" +__contributors__ = "Anne Marie Noronha (noronhaa@mskcc.org)" +__version__ = "0.1.0" +__status__ = "Dev" + +import argparse +import pandas as pd +import numpy as np +import os +import sys + +def major_contamination(tumor, depth_filter): + tumor_filtered = get_coverage(tumor, depth_filter) + + homozygous = ['AA','CC','GG','TT','A','C','G','T'] + heterozygous = ~tumor_filtered['Genotype'].isin(homozygous) + + return sum(heterozygous)/tumor_filtered.shape[0] + +def get_coverage(file, depth_filter): + #print(file['Alleles'].str.split(' ', expand=True)) + file[['A1', 'A2']] = file['Alleles'].str.split(' ', expand=True) + + A1_count = list(file['A1'].str.split(':', expand=True)[1]) + A2_count = list(file['A2'].str.split(':', expand=True)[1]) + A1_int = list(map(int, A1_count)) + A2_int = list(map(int, A2_count)) + + file['coverage'] = list(map(lambda x, y: x + y, A1_int, A2_int)) + + filtered_data = file[file['coverage'] > depth_filter] + + return(filtered_data) + +def minor_contamination(normal, tumor, depth_filter): + homozygous_sites = normal.index[normal['MAF'] < .10] + + print(homozygous_sites) + + tumor_homozygous = tumor.loc[homozygous_sites] + print(tumor_homozygous) + tumor_homozygous_filtered = get_coverage(tumor_homozygous, depth_filter) + + return tumor_homozygous_filtered['MAF'].mean() + +def main(): + parser = argparse.ArgumentParser(prog=sys.argv[0], description='Calculate major and minor contamination') + + parser.add_argument('-t','--tumor', + required=True, + help='Tumor fingerprint table file') + + parser.add_argument('-n','--normal', + required=True, + help='Normal fingerprint table file') + + parser.add_argument('-o','--output', + required=True, + help='Output file for contamination results') + + parser.add_argument('-d','--depthfilter', + required=False, + default=20, + type=int, + help='Depth filter for coverage (default: 20)' + ) + + parser.add_argument('--version', + action='version', + version='%(prog)s ' + __version__ + ) + + args = parser.parse_args() + + fields = ['Position', 'Alleles', 'Genotype', 'MAF'] + + tumor = pd.read_csv(args.tumor, sep='\t',names=fields,header=0) + normal = pd.read_csv(args.normal, sep='\t',names=fields,header=0) + + major_contam = major_contamination(tumor, depth_filter=args.depthfilter) + minor_contam = minor_contamination(normal, tumor, depth_filter=args.depthfilter) + + with open(args.output,'w') as f: + f.write("Tumor\tNormal\tMajor_Contamination\tMinor_Contamination\n") + f.write("{}\t{}\t{:.4f}\t{:.4f}\n".format( + os.path.basename(args.tumor), + os.path.basename(args.normal), + major_contam, + minor_contam)) + +if __name__== "__main__": + main() diff --git a/modules/msk/custom/fingerprintcontamination/tests/main.nf.test b/modules/msk/custom/fingerprintcontamination/tests/main.nf.test new file mode 100644 index 00000000..bb89f9e0 --- /dev/null +++ b/modules/msk/custom/fingerprintcontamination/tests/main.nf.test @@ -0,0 +1,108 @@ +nextflow_process { + + name "Test Process CUSTOM_FINGERPRINTCONTAMINATION" + script "../main.nf" + process "CUSTOM_FINGERPRINTCONTAMINATION" + config "./nextflow.config" + + tag "modules" + tag "modules_msk" + tag "custom" + tag "custom/fingerprintcontamination" + tag "gbcms" + tag "custom/fingerprintvcfparser" + + test("sarscov2 - bam") { + setup { + run("GBCMS"){ + script "../../../gbcms/main.nf" + process { + """ + input[0] = [ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ] + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + run("CUSTOM_FINGERPRINTVCFPARSER"){ + script "../../fingerprintvcfparser/main.nf" + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + } + + when { + process { + """ + input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.map{ meta, tsv -> [meta,tsv,[]]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + setup { + run("GBCMS"){ + script "../../../gbcms/main.nf" + process { + """ + input[0] = [ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ] + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + run("CUSTOM_FINGERPRINTVCFPARSER"){ + script "../../fingerprintvcfparser/main.nf" + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + } + + when { + process { + """ + input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.map{ meta, tsv -> [meta,tsv,[]]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap new file mode 100644 index 00000000..7ceedf6b --- /dev/null +++ b/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.contamination.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,f1635e715bcdf39792aa9f7fc3cf4d84" + ], + "contamination_tsv": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.contamination.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f1635e715bcdf39792aa9f7fc3cf4d84" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-25T17:13:52.297869395" + }, + "sarscov2 - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.contamination.tsv:md5,5b533c60b8eff1f4d2c5fe58a8262303" + ] + ], + "1": [ + "versions.yml:md5,f1635e715bcdf39792aa9f7fc3cf4d84" + ], + "contamination_tsv": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.contamination.tsv:md5,5b533c60b8eff1f4d2c5fe58a8262303" + ] + ], + "versions": [ + "versions.yml:md5,f1635e715bcdf39792aa9f7fc3cf4d84" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-25T17:13:42.805178656" + } +} \ No newline at end of file diff --git a/modules/msk/custom/fingerprintcontamination/tests/nextflow.config b/modules/msk/custom/fingerprintcontamination/tests/nextflow.config new file mode 100644 index 00000000..fbd2b2d0 --- /dev/null +++ b/modules/msk/custom/fingerprintcontamination/tests/nextflow.config @@ -0,0 +1,10 @@ +process { + + withName: 'CUSTOM_FINGERPRINTCONTAMINATION' { + ext.args = "-d 0" + } + + withName: 'CUSTOM_FINGERPRINTVCFPARSER' { + ext.args = "-d 0" + } +} diff --git a/modules/msk/custom/fingerprintcontamination/tests/stash_main.nf.test_stash b/modules/msk/custom/fingerprintcontamination/tests/stash_main.nf.test_stash new file mode 100644 index 00000000..ba4496a5 --- /dev/null +++ b/modules/msk/custom/fingerprintcontamination/tests/stash_main.nf.test_stash @@ -0,0 +1,113 @@ +nextflow_process { + + name "Test Process CUSTOM_FINGERPRINTCONTAMINATION" + script "../main.nf" + process "CUSTOM_FINGERPRINTCONTAMINATION" + config "./nextflow.config" + + tag "modules" + tag "modules_msk" + tag "custom" + tag "custom/fingerprintcontamination" + + test("homo sapiens - chr 22 bam") { + setup { + run("GBCMS"){ + script "../../../gbcms/main.nf" + + params{ + input = "NA12878_GIAB.chr22.vcf" + } + + process { + """ + input[0] = [ + [ id:'test', sample:'test' ], // meta map + file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/bam/NA12878.chr22.bam", checkIfExists:true), + file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/bam/NA12878.chr22.bam.bai", checkIfExists:true), + file("$baseDir/modules/msk/custom/fingerprintcontamination/tests/NA12878_GIAB.chr22.vcf", checkIfExists:true), + "variant_file.vcf" + ] + input[1] = file(params.test_data_mskcc['calculate_noise']['test_chr22_fa'], checkIfExists: true) + input[2] = file(params.test_data_mskcc['calculate_noise']['test_chr22_fa_fai'], checkIfExists: true) + //input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + //input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + run("CUSTOM_FINGERPRINTVCFPARSER"){ + script "../../fingerprintvcfparser/main.nf" + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + } + + when { + process { + """ + input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.map{ meta, tsv -> [meta,tsv,[]]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + setup { + run("GBCMS"){ + script "../../../gbcms/main.nf" + process { + """ + input[0] = [ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ] + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + run("CUSTOM_FINGERPRINTVCFPARSER"){ + script "../../fingerprintvcfparser/main.nf" + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + } + + when { + process { + """ + input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.map{ meta, tsv -> [meta,tsv,[]]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/subworkflows/msk/fingerprint_gbcms/main.nf b/subworkflows/msk/fingerprint_gbcms/main.nf index d121fd55..823225a3 100644 --- a/subworkflows/msk/fingerprint_gbcms/main.nf +++ b/subworkflows/msk/fingerprint_gbcms/main.nf @@ -1,12 +1,20 @@ -include { GBCMS } from '../../../modules/msk/gbcms/main' -include { CUSTOM_FINGERPRINTVCFPARSER } from '../../../modules/msk/custom/fingerprintvcfparser/main' +include { GBCMS } from '../../../modules/msk/gbcms/main' +include { CUSTOM_FINGERPRINTVCFPARSER } from '../../../modules/msk/custom/fingerprintvcfparser/main' +include { CUSTOM_FINGERPRINTCONTAMINATION } from '../../../modules/msk/custom/fingerprintcontamination/main' workflow FINGERPRINT_GBCMS { take: + //ch_bam // channel: [ val(meta), [ bam ] ] + //ch_bai // channel: [ val(meta), [ bai ] ] + //ch_fp_loci_vcf // channel: [ val(meta), [ vcf ] ] + //ch_fasta // channel: [ fasta ] + //ch_fastafai // channel: [ fastafai ] ch_bam // channel: [ val(meta), [ bam ] ] ch_bai // channel: [ val(meta), [ bai ] ] - ch_fp_vcf // channel: [ val(meta), [ vcf ] ] + ch_fp_tsv // channel: [ val(meta), [ tsv ] ] + ch_fp_loci_vcf // channel: [ val(meta), [ vcf ] ] + ch_liftover_loci_mapping // channel: [ liftover_loci_mapping ] ch_fasta // channel: [ fasta ] ch_fastafai // channel: [ fastafai ] @@ -17,7 +25,7 @@ workflow FINGERPRINT_GBCMS { GBCMS ( ch_bam .combine(ch_bai, by:[0]) - .combine(ch_fp_vcf.map{ if (it.size() > 1){ it[1] } else { it }}.first()) + .combine(ch_fp_loci_vcf.map{ if (it.size() > 1){ it[1] } else { it }}.first()) .map{ meta, bam, bai, vcf -> [meta, bam, bai, vcf, meta.id + ".fp.vcf" ] }, ch_fasta.first(), ch_fastafai.first() @@ -29,7 +37,27 @@ workflow FINGERPRINT_GBCMS { CUSTOM_FINGERPRINTVCFPARSER ( GBCMS.out.variant_file ) ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTVCFPARSER.out.versions.first()) + all_fps = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.mix(ch_fp_tsv) + + paired_fps = all_fps + .filter{ meta, tsv -> meta.case_id != null && meta.control_id != null && meta.id == meta.case_id } + .combine(all_fps.out.tsv) + .filter{ meta1, fp1, meta2, fp2 -> + meta1.control_id == meta2.id + }.map{ meta1, fp1, meta2, fp2 -> + [ meta1, fp1, fp2] + } + + unpaired_fps = all_fps + .filter{ meta, tsv -> meta.id != meta.case_id || meta.control_id == null } + .map{ meta, tsv -> [ meta, tsv, null ] } + + CUSTOM_FINGERPRINTCONTAMINATION ( paired_fps.mix(unpaired_fps) ) + ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTCONTAMINATION.out.versions.first()) + + emit: - fp_tsv = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] - versions = ch_versions // channel: [ versions.yml ] + fp_tsv = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] + contamination_tsv = CUSTOM_FINGERPRINTCONTAMINATION.out.contamination_tsv // channel: [ val(meta), contamination_tsv ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/msk/fingerprint_gbcms/meta.yml b/subworkflows/msk/fingerprint_gbcms/meta.yml index f90038e5..c0002dbb 100644 --- a/subworkflows/msk/fingerprint_gbcms/meta.yml +++ b/subworkflows/msk/fingerprint_gbcms/meta.yml @@ -9,6 +9,7 @@ keywords: - loci - vcf - bam + - qc components: - gbcms - custom/fingerprintvcfparser From 95548d1980cbb95f3e993bad974bc5c5ea01f735 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 25 Sep 2025 23:50:37 -0400 Subject: [PATCH 12/52] add custom/fingerprintcombine module --- .../custom/fingerprintcombine/environment.yml | 11 ++ modules/msk/custom/fingerprintcombine/main.nf | 66 +++++++++++ .../msk/custom/fingerprintcombine/meta.yml | 55 +++++++++ .../resources/usr/bin/complete_FP_table.R | 108 ++++++++++++++++++ .../fingerprintcombine/tests/loci_mapping.tsv | 10 ++ .../fingerprintcombine/tests/main.nf.test | 102 +++++++++++++++++ .../tests/main.nf.test.snap | 48 ++++++++ .../fingerprintcombine/tests/nextflow.config | 5 + 8 files changed, 405 insertions(+) create mode 100644 modules/msk/custom/fingerprintcombine/environment.yml create mode 100644 modules/msk/custom/fingerprintcombine/main.nf create mode 100644 modules/msk/custom/fingerprintcombine/meta.yml create mode 100755 modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R create mode 100644 modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv create mode 100644 modules/msk/custom/fingerprintcombine/tests/main.nf.test create mode 100644 modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap create mode 100644 modules/msk/custom/fingerprintcombine/tests/nextflow.config diff --git a/modules/msk/custom/fingerprintcombine/environment.yml b/modules/msk/custom/fingerprintcombine/environment.yml new file mode 100644 index 00000000..8a3b7591 --- /dev/null +++ b/modules/msk/custom/fingerprintcombine/environment.yml @@ -0,0 +1,11 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: +- conda-forge +- bioconda +dependencies: +- conda-forge::r-argparse=2.2.5 +- conda-forge::r-data.table=1.17.8 +- conda-forge::r-dplyr=1.1.4 +- conda-forge::r-plyr=1.8.9 +- conda-forge::r-tidyverse=2.0.0 diff --git a/modules/msk/custom/fingerprintcombine/main.nf b/modules/msk/custom/fingerprintcombine/main.nf new file mode 100644 index 00000000..4d7b5e6d --- /dev/null +++ b/modules/msk/custom/fingerprintcombine/main.nf @@ -0,0 +1,66 @@ +process CUSTOM_FINGERPRINTCOMBINE { + tag '$bam' + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/r-argparse_r-data.table_r-dplyr_r-plyr_r-tidyverse:8c0daffb3624cb66': + 'community.wave.seqera.io/library/r-argparse_r-data.table_r-dplyr_r-plyr_r-tidyverse:8c0daffb3624cb66' }" + //' oras://community.wave.seqera.io/library/r-argparse_r-data.table_r-dplyr_r-plyr_r-tidyverse:d96a65055f79744c': + + + input: + tuple path(fp_tsv), // list of paths to fingerprint TSV files + val(sample), // list of sample identifiers, one per TSV file, in the same order + val(genome_build) // list of genome builds, one per TSV file, in the same order + path(liftover_loci_mapping) + + output: + path "*DPfilter_ALL_FP.txt", emit: combined_fp_tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + declare -a fp_tsv_list + declare -a sample_list + declare -a genome_build_list + fp_tsv_list=(${fp_tsv.join(' ')}) + sample_list=(${sample.join(' ')}) + genome_build_list=(${genome_build.join(' ')}) + echo -e "sample_id\tgenome_build\tfp_tsv" > input.tsv + for i in \$(seq 0 1 \$((\${#fp_tsv_list[@]}-1)) ) ; do + fp_tsv=\${fp_tsv_list[i]} + sample=\${sample_list[i]} + genome=\${genome_build_list[i]} + echo -e "\$sample\t\$genome\t\$fp_tsv" + done >> input.tsv + + complete_FP_table.R \\ + -i input.tsv \\ + -l $liftover_loci_mapping \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + complete_FP_table.R: 0.1.0 + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + + """ + echo $args + + touch XDPfilter_ALL_FP.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + complete_FP_table.R: 0.1.0 + END_VERSIONS + """ +} diff --git a/modules/msk/custom/fingerprintcombine/meta.yml b/modules/msk/custom/fingerprintcombine/meta.yml new file mode 100644 index 00000000..52b4ed5a --- /dev/null +++ b/modules/msk/custom/fingerprintcombine/meta.yml @@ -0,0 +1,55 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +# # TODO nf-core: Add a description of the module and list keywords +name: "custom_fingerprintcombine" +description: | + A module to combine multiple fingerprint TSV files into a single comprehensive + table, with optional liftover of loci coordinates. +keywords: +- fingerprint +- qc +- loci +- tsv +- correlation +tools: +## TODO nf-core: Add a description and other details for the software below +- "custom": + description: "A custom R script to combine fingerprint TSV files" + homepage: "https://github.com/mskcc-omics-workflows/modules/tree/main/modules/msk/custom/fingerprintcombine/meta.yml" + +input: + - - fp_tsv: + type: file + description: | + Fingerprint TSV files to be combined. + Structure: [ val(sample), val(genome_build), path(fp_tsv) ] + - sample: + type: string + description: Sample identifier corresponding to each fingerprint TSV file. + - genome_build: + type: string + description: Genome build (e.g., hg19, hg38) corresponding to each fingerprint TSV file. + - liftover_loci_mapping: + type: file + description: | + A TSV file mapping original loci to liftover loci. + Format: original_chr, original_pos, liftover_chr, liftover_pos + pattern: "*.tsv" +output: + combined_fp_tsv: + - "*DPfilter_ALL_FP.txt": + type: file + description: Wide table combining all input fingerprint TSV files. + pattern: '*DPfilter_ALL_FP.txt' + ontologies: + - edam: http://edamontology.org/format_3750 # TSV + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: versions.yml + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: +- "@anoronh4" +maintainers: +- "@anoronh4" diff --git a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R new file mode 100755 index 00000000..3265f903 --- /dev/null +++ b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R @@ -0,0 +1,108 @@ +#! /usr/bin/env Rscript + +#------------------------------------------------------------------------------- +# Script: complete_FP_table.R +# Author: Erika Gedvilaite +# Date: 2025-09-23 +# Version: 0.1.0 +# +# Description: This script takes in standard fingerprint tables and combines +# them into a single, wide table for downstream plotting and analysis. +# +# Annotation: +# - Input table should have three columns: sample_id, genome_build, fp_tsv +# - Genome build should be either "hg19" or "hg38" or "GRCh37" or "GRCh38" +# (case insensitive) +# +#------------------------------------------------------------------------------- + + +rm(list=ls()) + +library(argparse, quietly = T) +library(plyr, quietly = T) +library(dplyr, quietly = T) +library(data.table, quietly = T) +library(tidyverse, quietly = T) + +`%notin%` <- Negate(`%in%`) +`%notlike%` <- Negate(`%like%`) + +parser = ArgumentParser(description = 'Generate FP tables for plotting') +parser$add_argument('-i', '--input_table', required = TRUE, + help = 'Input table with paths to individual fingerprint TSV files, sample ids, and genome build') +parser$add_argument('-o', '--analysis_folder', required = FALSE, default = ".", + help = 'Output folder') +parser$add_argument('-l', '--loci_mapper', required = TRUE, + help = 'Loci mapper file') +parser$add_argument('-d', '--depth_filter', required = FALSE, default = 20, + help = 'Depth filter to apply to individual fingerprint TSV files (default: 20)') +args = parser$parse_args() + + + +message("Reading in Liftover file") + +hg19_hg38_mapper = fread(args$loci_mapper,header = T) +hg19_hg38_mapper$Loci_hg19 = paste(hg19_hg38_mapper$GRCH37_CHROM,hg19_hg38_mapper$GRCH37_POS,sep=":") +hg19_hg38_mapper$Loci_hg38 = paste(hg19_hg38_mapper$GRCH38_CHROM,hg19_hg38_mapper$GRCH38_POS,sep=":") +hg19_hg38_mapper = hg19_hg38_mapper %>% select(Loci_hg19, Loci_hg38) %>% unique() + +message("Loading Samples") +input_table = fread(args$input_table, header = T) +for (i in 1:nrow(input_table)){ + sample = input_table$sample_id[i] + genome_build = input_table$genome_build[i] + print(genome_build) + if (tolower(genome_build) %notin% c("hg19","grch37","hg38","grch38")){ + stop(paste0("Genome build not recognized: ", genome_build, ". Must be in the following list: hg19, hg38, grch37, grch38 (case will be ignored).")) + } + file = input_table$fp_tsv[i] + if (!file.exists(file)){ + stop(paste0("File does not exist: ", input_table$fp_tsv[i])) + } + temp_dataset <- fread(file, header = T, sep="\t") + colnames(temp_dataset) = c("Locus", "Count", "Genotype","VAF") + temp_dataset = separate(temp_dataset, Count, into = c(NA,'DP1',NA,'DP2'), remove = F) + temp_dataset$DP2[is.na(temp_dataset$DP2)==T] <- 0 + temp_dataset$DP = as.numeric(temp_dataset$DP1) + as.numeric(temp_dataset$DP2) + temp_dataset = temp_dataset[temp_dataset$DP >= args$depth_filter,] ## keeping loci >= 20 dp by default + temp_dataset$VAF[is.na(temp_dataset$VAF)==T] <- 0 + temp_dataset$Sample = sample #only loci with DP >= depth filter will have Sample info + temp_dataset = temp_dataset %>% select("Locus","Genotype","Sample","VAF") + temp_dataset$Locus = str_replace(temp_dataset$Locus,"chr","") + + if (tolower(genome_build) %in% c("hg19","grch37")){ + temp_dataset = merge(hg19_hg38_mapper, temp_dataset, by.x = "Loci_hg19", by.y = "Locus", all.x = T) + temp_dataset$VAF[is.na(temp_dataset$VAF)==T] <- 0 + } else if (tolower(genome_build) %in% c("hg38","grch38")){ + temp_dataset = merge(hg19_hg38_mapper, temp_dataset, by.x = "Loci_hg38", by.y = "Locus", all.x = T) + temp_dataset$VAF[is.na(temp_dataset$VAF)==T] <- 0 + } + + if (!exists("all_gbcm")){ + all_gbcm = temp_dataset + } else { + all_gbcm = rbind(all_gbcm, temp_dataset) + } +} +all_gbcm = all_gbcm[is.na(all_gbcm$Sample)==F,] # filters out loci that don't have Sample info (i.e. loci not passing DP filter) +all_gbcm$VAF = round(as.numeric(all_gbcm$VAF), 5) + +wide_all_gbcm = all_gbcm %>% pivot_wider(names_from = Sample, values_from = c(Genotype, VAF)) + +message("Creating final GBCM file") + +all_fp_gbcm_final = merge(hg19_hg38_mapper, wide_all_gbcm,all.x = T) + +if (!dir.exists(args$analysis_folder)) { + dir.create(args$analysis_folder, recursive = TRUE) +} else { + print(paste("Directory already exists:", args$analysis_folder)) +} + +message(paste("Output file: ", args$analysis_folder,"/",args$depth_filter,"DPfilter_ALL_FP.txt", sep="")) + +write.table(all_fp_gbcm_final, file = paste(args$analysis_folder,"/",args$depth_filter,"DPfilter_ALL_FP.txt", sep=""), append = F, sep = "\t", row.names = F, quote = F) + +message("FP file completed") diff --git a/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv b/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv new file mode 100644 index 00000000..7592a2e3 --- /dev/null +++ b/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv @@ -0,0 +1,10 @@ +GRCH37_CHROM GRCH37_POS GRCH38_CHROM GRCH38_POS +MT192765.1 197 MT192765.1 199 +MT192765.1 4788 MT192765.1 4900 +MT192765.1 8236 MT192765.1 8257 +MT192765.1 10506 MT192765.1 10528 +MT192765.1 11037 MT192765.1 11059 +MT192765.1 15009 MT192765.1 15500 +MT192765.1 18807 MT192765.1 18929 +MT192765.1 23813 MT192765.1 24835 +MT192765.1 24103 MT192765.1 25125 diff --git a/modules/msk/custom/fingerprintcombine/tests/main.nf.test b/modules/msk/custom/fingerprintcombine/tests/main.nf.test new file mode 100644 index 00000000..11176e34 --- /dev/null +++ b/modules/msk/custom/fingerprintcombine/tests/main.nf.test @@ -0,0 +1,102 @@ +// nf-core modules test custom/fingerprintcombine +nextflow_process { + + name "Test Process CUSTOM_FINGERPRINTCOMBINE" + script "../main.nf" + process "CUSTOM_FINGERPRINTCOMBINE" + config "./nextflow.config" + + tag "modules" + tag "modules_msk" + tag "custom" + tag "custom/fingerprintcombine" + tag "gbcms" + tag "custom/fingerprintvcfparser" + + test("sarscov2 - bam") { + + setup { + run("GBCMS"){ + script "../../../gbcms/main.nf" + process { + """ + input[0] = Channel.of( + [ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ], + [ + [ id:'test2', sample:'test2' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ], + ) + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + run("CUSTOM_FINGERPRINTVCFPARSER"){ + script "../../fingerprintvcfparser/main.nf" + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + } + + when { + process { + """ + input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv + .map{ meta, tsv -> ["placeholder",tsv, meta.id, "hg19"] } + .groupTuple(by:[0]) + .map{ placeholder, tsv, sampleid, genome -> [tsv, sampleid, genome] } + input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)], + ["testsample"], + ["hg19"] + ] + input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap new file mode 100644 index 00000000..e8576d2f --- /dev/null +++ b/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap @@ -0,0 +1,48 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + "XDPfilter_ALL_FP.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + "versions.yml:md5,3bd40a0fd11a907f31110dd113fd88c2" + ], + "combined_fp_tsv": [ + "XDPfilter_ALL_FP.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,3bd40a0fd11a907f31110dd113fd88c2" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-25T22:47:09.499353594" + }, + "sarscov2 - bam": { + "content": [ + { + "0": [ + "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + ], + "1": [ + "versions.yml:md5,3bd40a0fd11a907f31110dd113fd88c2" + ], + "combined_fp_tsv": [ + "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + ], + "versions": [ + "versions.yml:md5,3bd40a0fd11a907f31110dd113fd88c2" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-25T22:47:03.219089934" + } +} \ No newline at end of file diff --git a/modules/msk/custom/fingerprintcombine/tests/nextflow.config b/modules/msk/custom/fingerprintcombine/tests/nextflow.config new file mode 100644 index 00000000..583ce385 --- /dev/null +++ b/modules/msk/custom/fingerprintcombine/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'CUSTOM_FINGERPRINTCOMBINE' { + ext.args = "-d 0" + } +} From b6e5229b2b74f50ce2a162e46708a5d7ada8bdd6 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 25 Sep 2025 23:55:57 -0400 Subject: [PATCH 13/52] add subworkflow fingerprint_gbcms_batch --- .../msk/fingerprint_gbcms_batch/main.nf | 27 +++++++ .../msk/fingerprint_gbcms_batch/meta.yml | 41 +++++++++++ .../tests/main.nf.test | 72 +++++++++++++++++++ .../tests/main.nf.test.snap | 25 +++++++ .../tests/nextflow.config | 5 ++ 5 files changed, 170 insertions(+) create mode 100644 subworkflows/msk/fingerprint_gbcms_batch/main.nf create mode 100644 subworkflows/msk/fingerprint_gbcms_batch/meta.yml create mode 100644 subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test create mode 100644 subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap create mode 100644 subworkflows/msk/fingerprint_gbcms_batch/tests/nextflow.config diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf new file mode 100644 index 00000000..98e227b7 --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -0,0 +1,27 @@ + +include { CUSTOM_FINGERPRINTCOMBINE } from '../../../modules/msk/custom/fingerprintcombine/main' + +workflow FINGERPRINT_GBCMS_BATCH { + + take: + ch_fp // channel: [ val(meta), [ bam ] ] + ch_liftover_loci_mapping // channel: [ liftover_loci_mapping ] + + main: + + ch_versions = Channel.empty() + + + CUSTOM_FINGERPRINTCOMBINE( + ch_fp + .map{ meta, tsv -> ["placeholder",tsv, meta.id, "hg19"] } + .groupTuple(by:[0]) + .map{ placeholder, tsv, sampleid, genome -> [tsv, sampleid, genome] }, + ch_liftover_loci_mapping.first() + ) + ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTCOMBINE.out.versions.first()) + + emit: + combined_fp_tsv = CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv // channel: [ val(meta), [ bam ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/msk/fingerprint_gbcms_batch/meta.yml b/subworkflows/msk/fingerprint_gbcms_batch/meta.yml new file mode 100644 index 00000000..ca573350 --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms_batch/meta.yml @@ -0,0 +1,41 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fingerprint_gbcms_batch" +description: "Subworkflow to combine and compare Fingerprint files from different samples" +keywords: + - fingerprint + - qc + - liftover + - batch + - pool +components: + - modules/msk/custom/fingerprintcombine +input: + - ch_fp: + type: file + description: | + The input channel containing one fingerprint file per sample + Structure: [ val(meta), path(fp_tsv) ] + pattern: "*.fp.tsv" + - ch_liftover_loci_mapping: + type: file + description: | + The input channel containing the loci mapping file for liftover + Structure: [ path(loci_mapping.tsv) ] + pattern: "*.tsv" +output: + - combined_fp_tsv: + type: file + description: | + Channel containing combined fingerprint TSV file + Structure: [ path(combined_fp_tsv) ] + pattern: "*DPfilter_ALL_FP.txt" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@anoronh4" +maintainers: + - "@anoronh4" diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test new file mode 100644 index 00000000..bfdd825a --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test @@ -0,0 +1,72 @@ +// nf-core subworkflows test fingerprint_gbcms_batch +nextflow_workflow { + + name "Test Subworkflow FINGERPRINT_GBCMS_BATCH" + script "../main.nf" + config "./nextflow.config" + workflow "FINGERPRINT_GBCMS_BATCH" + + tag "subworkflows" + tag "subworkflows_msk" + tag "subworkflows/fingerprint_gbcms_batch" + tag "gbcms" + tag "custom/fingerprintvcfparser" + tag "custom/fingerprintcombine" + + + test("sarscov2 - bam - single_end") { + + setup { + run("GBCMS"){ + script "../../../../modules/msk/gbcms/main.nf" + process { + """ + input[0] = Channel.of( + [ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ], + [ + [ id:'test2', sample:'test2' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ], + ) + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + run("CUSTOM_FINGERPRINTVCFPARSER"){ + script "../../../../modules/msk/custom/fingerprintvcfparser/main.nf" + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + } + + when { + workflow { + """ + input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv + input[1] = [file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + + } +} diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap new file mode 100644 index 00000000..7611bc83 --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap @@ -0,0 +1,25 @@ +{ + "sarscov2 - bam - single_end": { + "content": [ + { + "0": [ + "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + ], + "1": [ + "versions.yml:md5,bc54e025756d97cd9b14d51a3c9e3667" + ], + "combined_fp_tsv": [ + "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + ], + "versions": [ + "versions.yml:md5,bc54e025756d97cd9b14d51a3c9e3667" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-25T23:49:26.050835746" + } +} \ No newline at end of file diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/nextflow.config b/subworkflows/msk/fingerprint_gbcms_batch/tests/nextflow.config new file mode 100644 index 00000000..583ce385 --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'CUSTOM_FINGERPRINTCOMBINE' { + ext.args = "-d 0" + } +} From 257e8f9f134ff3f5ff42c7c592031d134ece774f Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 26 Sep 2025 00:27:27 -0400 Subject: [PATCH 14/52] update subworkflow to designate a genome to each sample fingerprint file --- subworkflows/msk/fingerprint_gbcms_batch/main.nf | 3 ++- subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf index 98e227b7..ee16fcd6 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/main.nf +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -6,6 +6,7 @@ workflow FINGERPRINT_GBCMS_BATCH { take: ch_fp // channel: [ val(meta), [ bam ] ] ch_liftover_loci_mapping // channel: [ liftover_loci_mapping ] + default_genome main: @@ -14,7 +15,7 @@ workflow FINGERPRINT_GBCMS_BATCH { CUSTOM_FINGERPRINTCOMBINE( ch_fp - .map{ meta, tsv -> ["placeholder",tsv, meta.id, "hg19"] } + .map{ meta, tsv -> ["placeholder", tsv, meta.id, meta.genome ?: default_genome ] } .groupTuple(by:[0]) .map{ placeholder, tsv, sampleid, genome -> [tsv, sampleid, genome] }, ch_liftover_loci_mapping.first() diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test index bfdd825a..8500dcfd 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test @@ -57,6 +57,7 @@ nextflow_workflow { """ input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv input[1] = [file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] + input[2] = "hg19" """ } } From 33c6cbda5ede1535a7249c1713e071c79e9cf81a Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 26 Sep 2025 01:29:19 -0400 Subject: [PATCH 15/52] remove tag attribute on custom/fingerprintcombine --- modules/msk/custom/fingerprintcombine/main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/msk/custom/fingerprintcombine/main.nf b/modules/msk/custom/fingerprintcombine/main.nf index 4d7b5e6d..be6b873d 100644 --- a/modules/msk/custom/fingerprintcombine/main.nf +++ b/modules/msk/custom/fingerprintcombine/main.nf @@ -1,5 +1,4 @@ process CUSTOM_FINGERPRINTCOMBINE { - tag '$bam' label 'process_single' conda "${moduleDir}/environment.yml" From 404b8ef8f38597f47f272eed4ddb8937ab3476a0 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 26 Sep 2025 01:31:16 -0400 Subject: [PATCH 16/52] update fingerprint_gbcms subworkflow, including fingerprint_gbcms_batch subworkflow --- subworkflows/msk/fingerprint_gbcms/main.nf | 38 +++++++---- .../msk/fingerprint_gbcms/tests/main.nf.test | 43 ++++++++---- .../fingerprint_gbcms/tests/main.nf.test.snap | 66 +++++++++++++++++-- .../fingerprint_gbcms/tests/nextflow.config | 13 ++++ 4 files changed, 130 insertions(+), 30 deletions(-) create mode 100644 subworkflows/msk/fingerprint_gbcms/tests/nextflow.config diff --git a/subworkflows/msk/fingerprint_gbcms/main.nf b/subworkflows/msk/fingerprint_gbcms/main.nf index 823225a3..5f67f594 100644 --- a/subworkflows/msk/fingerprint_gbcms/main.nf +++ b/subworkflows/msk/fingerprint_gbcms/main.nf @@ -1,15 +1,11 @@ include { GBCMS } from '../../../modules/msk/gbcms/main' include { CUSTOM_FINGERPRINTVCFPARSER } from '../../../modules/msk/custom/fingerprintvcfparser/main' include { CUSTOM_FINGERPRINTCONTAMINATION } from '../../../modules/msk/custom/fingerprintcontamination/main' +include { FINGERPRINT_GBCMS_BATCH } from '../fingerprint_gbcms_batch/main' workflow FINGERPRINT_GBCMS { take: - //ch_bam // channel: [ val(meta), [ bam ] ] - //ch_bai // channel: [ val(meta), [ bai ] ] - //ch_fp_loci_vcf // channel: [ val(meta), [ vcf ] ] - //ch_fasta // channel: [ fasta ] - //ch_fastafai // channel: [ fastafai ] ch_bam // channel: [ val(meta), [ bam ] ] ch_bai // channel: [ val(meta), [ bai ] ] ch_fp_tsv // channel: [ val(meta), [ tsv ] ] @@ -17,16 +13,21 @@ workflow FINGERPRINT_GBCMS { ch_liftover_loci_mapping // channel: [ liftover_loci_mapping ] ch_fasta // channel: [ fasta ] ch_fastafai // channel: [ fastafai ] + default_genome // channel: [ genome ] + run_correlation main: ch_versions = Channel.empty() - GBCMS ( + println ch_fp_loci_vcf.getClass() + println ch_fasta.getClass() + + GBCMS( ch_bam .combine(ch_bai, by:[0]) - .combine(ch_fp_loci_vcf.map{ if (it.size() > 1){ it[1] } else { it }}.first()) - .map{ meta, bam, bai, vcf -> [meta, bam, bai, vcf, meta.id + ".fp.vcf" ] }, + .combine(ch_fp_loci_vcf.map{ if ( [it].flatten().size() > 1){ it[1] } else { it }}.first()) + .map{ meta, bam, bai, vcf -> [ meta, bam, bai, vcf, meta.id + ".fp.vcf" ] }.view(), ch_fasta.first(), ch_fastafai.first() //ch_fasta.view().map{ if (it[0] instanceof Map){ it[1] } else { it }}.first(), @@ -34,14 +35,17 @@ workflow FINGERPRINT_GBCMS { ) ch_versions = ch_versions.mix(GBCMS.out.versions.first()) + + CUSTOM_FINGERPRINTVCFPARSER ( GBCMS.out.variant_file ) ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTVCFPARSER.out.versions.first()) all_fps = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.mix(ch_fp_tsv) + paired_fps = all_fps .filter{ meta, tsv -> meta.case_id != null && meta.control_id != null && meta.id == meta.case_id } - .combine(all_fps.out.tsv) + .combine(all_fps) .filter{ meta1, fp1, meta2, fp2 -> meta1.control_id == meta2.id }.map{ meta1, fp1, meta2, fp2 -> @@ -50,14 +54,26 @@ workflow FINGERPRINT_GBCMS { unpaired_fps = all_fps .filter{ meta, tsv -> meta.id != meta.case_id || meta.control_id == null } - .map{ meta, tsv -> [ meta, tsv, null ] } + .map{ meta, tsv -> [ meta, tsv, [] ] } - CUSTOM_FINGERPRINTCONTAMINATION ( paired_fps.mix(unpaired_fps) ) + CUSTOM_FINGERPRINTCONTAMINATION ( paired_fps.mix(unpaired_fps).view() ) ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTCONTAMINATION.out.versions.first()) + if (run_correlation) { + FINGERPRINT_GBCMS_BATCH ( + all_fps, + ch_liftover_loci_mapping, + default_genome + ) + ch_versions = ch_versions.mix(FINGERPRINT_GBCMS_BATCH.out.versions.first()) + } else { + FINGERPRINT_GBCMS_BATCH.out.combined_fp_tsv = Channel.empty() + } emit: fp_tsv = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] contamination_tsv = CUSTOM_FINGERPRINTCONTAMINATION.out.contamination_tsv // channel: [ val(meta), contamination_tsv ] + combined_fp_tsv = FINGERPRINT_GBCMS_BATCH.out.combined_fp_tsv // channel: [ tsv ] versions = ch_versions // channel: [ versions.yml ] + } diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test index 55de93ca..46a8bdd8 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test @@ -2,11 +2,13 @@ nextflow_workflow { name "Test Subworkflow FINGERPRINT_GBCMS" script "../main.nf" + config "./nextflow.config" workflow "FINGERPRINT_GBCMS" tag "subworkflows" tag "subworkflows_msk" tag "subworkflows/fingerprint_gbcms" + tag "subworkflows/fingerprint_gbcms_batch" tag "gbcms" tag "custom/fingerprintvcfparser" @@ -15,20 +17,33 @@ nextflow_workflow { when { workflow { """ - input[0] = Channel.of([ - [ id:'test', sample:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) - ]) - input[1] = Channel.of([ - [ id:'test', sample:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), - ]) - input[2] = Channel.of([ - [:], - file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) - ]) - input[3] = Channel.of(file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)) - input[4] = Channel.of(file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)) + input[0] = Channel.of( + [ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) + ], + [ + [ id:'test2', sample:'test2' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) + ], + ) + input[1] = Channel.of( + [ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true) + ], + [ + [ id:'test2', sample:'test2' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true) + ], + ) + input[2] = Channel.empty() + input[3] = Channel.of(file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)) + input[4] = [file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] + input[5] = Channel.of(file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)) + input[6] = Channel.of(file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)) + input[7] = "hg19" + input[8] = true """ } } diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap index 9952b0bd..17ed67dc 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap @@ -8,12 +8,59 @@ "id": "test", "sample": "test" }, - "test.fp.tsv:md5,9fa9a081f17ee52f03463c96d46a23aa" + "test.fp.tsv:md5,c467328eb3c7fb534b555b83b0227206" + ], + [ + { + "id": "test2", + "sample": "test2" + }, + "test2.fp.tsv:md5,c3fbcee584048e9bc4fc93bc6ca487d2" ] ], "1": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.contamination.tsv:md5,5b533c60b8eff1f4d2c5fe58a8262303" + ], + [ + { + "id": "test2", + "sample": "test2" + }, + "test2.contamination.tsv:md5,2eb950d4d5e0f9b4f7ae53d41d22fb5f" + ] + ], + "2": [ + "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + ], + "3": [ + "versions.yml:md5,16a7edd0fbcb47825904a8cb939c7620", "versions.yml:md5,41ff30ed71b1d19e95a6095b9ac3ca94", - "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8" + "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8", + "versions.yml:md5,4fb29c6ff25ce4e29f6cf293a70aa8e6" + ], + "combined_fp_tsv": [ + "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + ], + "contamination_tsv": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.contamination.tsv:md5,5b533c60b8eff1f4d2c5fe58a8262303" + ], + [ + { + "id": "test2", + "sample": "test2" + }, + "test2.contamination.tsv:md5,2eb950d4d5e0f9b4f7ae53d41d22fb5f" + ] ], "fp_tsv": [ [ @@ -21,12 +68,21 @@ "id": "test", "sample": "test" }, - "test.fp.tsv:md5,9fa9a081f17ee52f03463c96d46a23aa" + "test.fp.tsv:md5,c467328eb3c7fb534b555b83b0227206" + ], + [ + { + "id": "test2", + "sample": "test2" + }, + "test2.fp.tsv:md5,c3fbcee584048e9bc4fc93bc6ca487d2" ] ], "versions": [ + "versions.yml:md5,16a7edd0fbcb47825904a8cb939c7620", "versions.yml:md5,41ff30ed71b1d19e95a6095b9ac3ca94", - "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8" + "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8", + "versions.yml:md5,4fb29c6ff25ce4e29f6cf293a70aa8e6" ] } ], @@ -34,6 +90,6 @@ "nf-test": "0.9.2", "nextflow": "25.04.6" }, - "timestamp": "2025-08-14T19:50:44.771060618" + "timestamp": "2025-09-26T01:19:25.852151971" } } \ No newline at end of file diff --git a/subworkflows/msk/fingerprint_gbcms/tests/nextflow.config b/subworkflows/msk/fingerprint_gbcms/tests/nextflow.config new file mode 100644 index 00000000..17e225ec --- /dev/null +++ b/subworkflows/msk/fingerprint_gbcms/tests/nextflow.config @@ -0,0 +1,13 @@ +process { + withName: 'CUSTOM_FINGERPRINTCOMBINE' { + ext.args = "-d 0" + } + + withName: 'CUSTOM_FINGERPRINTCONTAMINATION' { + ext.args = "-d 0" + } + + withName: 'CUSTOM_FINGERPRINTVCFPARSER' { + ext.args = "-d 0" + } +} From ebf5800fff8c95b95434aa519bef8f80236397fa Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 26 Sep 2025 10:19:22 -0400 Subject: [PATCH 17/52] skip conda tests for fingerprint modules and subworkflows --- .github/skip_nf_test.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/skip_nf_test.json b/.github/skip_nf_test.json index 2991ca79..443e1390 100644 --- a/.github/skip_nf_test.json +++ b/.github/skip_nf_test.json @@ -1,6 +1,8 @@ { "conda": [ "modules/msk/custom/fingerprintvcfparser", + "modules/msk/custom/fingerprintcontamination", + "modules/msk/custom/fingerprintcombine", "modules/msk/calculatenoise", "modules/msk/ppflagfixer", "modules/msk/facets", From eb9e073ad88e221e897feb5acaf43bf00cfbbdcc Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 26 Sep 2025 10:32:13 -0400 Subject: [PATCH 18/52] update version output of contamination script --- .../resources/usr/bin/calculate_contamination.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py index dea92003..40e1a210 100755 --- a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py +++ b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py @@ -52,7 +52,7 @@ def minor_contamination(normal, tumor, depth_filter): return tumor_homozygous_filtered['MAF'].mean() def main(): - parser = argparse.ArgumentParser(prog=sys.argv[0], description='Calculate major and minor contamination') + parser = argparse.ArgumentParser(prog=os.path.basename(sys.argv[0]), description='Calculate major and minor contamination') parser.add_argument('-t','--tumor', required=True, From e0cf039445a2b64df9c3cc103e27e296964a7c68 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 26 Sep 2025 10:34:32 -0400 Subject: [PATCH 19/52] update snapshot --- .../fingerprintcontamination/tests/main.nf.test.snap | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap index 7ceedf6b..5b51c22a 100644 --- a/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap +++ b/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap @@ -12,7 +12,7 @@ ] ], "1": [ - "versions.yml:md5,f1635e715bcdf39792aa9f7fc3cf4d84" + "versions.yml:md5,904a4c1ae690600f67c1ceb3d72c5ce1" ], "contamination_tsv": [ [ @@ -24,7 +24,7 @@ ] ], "versions": [ - "versions.yml:md5,f1635e715bcdf39792aa9f7fc3cf4d84" + "versions.yml:md5,904a4c1ae690600f67c1ceb3d72c5ce1" ] } ], @@ -32,7 +32,7 @@ "nf-test": "0.9.2", "nextflow": "25.04.6" }, - "timestamp": "2025-09-25T17:13:52.297869395" + "timestamp": "2025-09-26T10:33:23.354208776" }, "sarscov2 - bam": { "content": [ @@ -47,7 +47,7 @@ ] ], "1": [ - "versions.yml:md5,f1635e715bcdf39792aa9f7fc3cf4d84" + "versions.yml:md5,904a4c1ae690600f67c1ceb3d72c5ce1" ], "contamination_tsv": [ [ @@ -59,7 +59,7 @@ ] ], "versions": [ - "versions.yml:md5,f1635e715bcdf39792aa9f7fc3cf4d84" + "versions.yml:md5,904a4c1ae690600f67c1ceb3d72c5ce1" ] } ], @@ -67,6 +67,6 @@ "nf-test": "0.9.2", "nextflow": "25.04.6" }, - "timestamp": "2025-09-25T17:13:42.805178656" + "timestamp": "2025-09-26T10:33:12.245205382" } } \ No newline at end of file From 66910630f9ee65a6b1326042c81b8963ba9aeb6c Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 26 Sep 2025 11:07:12 -0400 Subject: [PATCH 20/52] update snapshot --- subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap index 17ed67dc..492f6ee5 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap @@ -38,7 +38,7 @@ "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" ], "3": [ - "versions.yml:md5,16a7edd0fbcb47825904a8cb939c7620", + "versions.yml:md5,1d2160eb0eb102d00d5786a8d056328e", "versions.yml:md5,41ff30ed71b1d19e95a6095b9ac3ca94", "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8", "versions.yml:md5,4fb29c6ff25ce4e29f6cf293a70aa8e6" @@ -79,7 +79,7 @@ ] ], "versions": [ - "versions.yml:md5,16a7edd0fbcb47825904a8cb939c7620", + "versions.yml:md5,1d2160eb0eb102d00d5786a8d056328e", "versions.yml:md5,41ff30ed71b1d19e95a6095b9ac3ca94", "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8", "versions.yml:md5,4fb29c6ff25ce4e29f6cf293a70aa8e6" @@ -90,6 +90,6 @@ "nf-test": "0.9.2", "nextflow": "25.04.6" }, - "timestamp": "2025-09-26T01:19:25.852151971" + "timestamp": "2025-09-26T11:05:45.091814897" } } \ No newline at end of file From de1c5f604ccb7906711cb162fea9d956f6c37fa3 Mon Sep 17 00:00:00 2001 From: anoronh4 Date: Wed, 5 Nov 2025 16:41:53 -0500 Subject: [PATCH 21/52] exclude X, Y chromosomes from contamination calculations --- .../resources/usr/bin/calculate_contamination.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py index 40e1a210..a8d23c06 100755 --- a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py +++ b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py @@ -42,11 +42,7 @@ def get_coverage(file, depth_filter): def minor_contamination(normal, tumor, depth_filter): homozygous_sites = normal.index[normal['MAF'] < .10] - - print(homozygous_sites) - tumor_homozygous = tumor.loc[homozygous_sites] - print(tumor_homozygous) tumor_homozygous_filtered = get_coverage(tumor_homozygous, depth_filter) return tumor_homozygous_filtered['MAF'].mean() @@ -83,7 +79,9 @@ def main(): fields = ['Position', 'Alleles', 'Genotype', 'MAF'] tumor = pd.read_csv(args.tumor, sep='\t',names=fields,header=0) + tumor = tumor[~tumor['Position'].str.contains('X|Y', na=False)] normal = pd.read_csv(args.normal, sep='\t',names=fields,header=0) + normal = normal[~normal['Position'].str.contains('X|Y', na=False)] major_contam = major_contamination(tumor, depth_filter=args.depthfilter) minor_contam = minor_contamination(normal, tumor, depth_filter=args.depthfilter) From a461d1eaa857d2e6f21586d2a2ed6765544a2e90 Mon Sep 17 00:00:00 2001 From: anoronh4 Date: Thu, 6 Nov 2025 15:57:23 -0500 Subject: [PATCH 22/52] set index of table to 'Position' --- .../resources/usr/bin/calculate_contamination.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py index a8d23c06..8a0dc2ee 100755 --- a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py +++ b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py @@ -80,8 +80,10 @@ def main(): tumor = pd.read_csv(args.tumor, sep='\t',names=fields,header=0) tumor = tumor[~tumor['Position'].str.contains('X|Y', na=False)] + tumor = tumor.set_index('Position') normal = pd.read_csv(args.normal, sep='\t',names=fields,header=0) normal = normal[~normal['Position'].str.contains('X|Y', na=False)] + normal = normal.set_index('Position') major_contam = major_contamination(tumor, depth_filter=args.depthfilter) minor_contam = minor_contamination(normal, tumor, depth_filter=args.depthfilter) From a280d3020773097ee1ed08245adb6369724a18b7 Mon Sep 17 00:00:00 2001 From: anoronh4 Date: Thu, 6 Nov 2025 19:31:17 -0500 Subject: [PATCH 23/52] fixed filtering of table by index labels --- .../resources/usr/bin/calculate_contamination.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py index 8a0dc2ee..3f5ece93 100755 --- a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py +++ b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py @@ -42,7 +42,7 @@ def get_coverage(file, depth_filter): def minor_contamination(normal, tumor, depth_filter): homozygous_sites = normal.index[normal['MAF'] < .10] - tumor_homozygous = tumor.loc[homozygous_sites] + tumor_homozygous = tumor.loc[[i for i in homozygous_sites if i in tumor.index]] tumor_homozygous_filtered = get_coverage(tumor_homozygous, depth_filter) return tumor_homozygous_filtered['MAF'].mean() From 04ff5ad4590ab2ca48623e782cace313fdf8e339 Mon Sep 17 00:00:00 2001 From: anoronh4 Date: Thu, 6 Nov 2025 19:32:49 -0500 Subject: [PATCH 24/52] fix indentation --- .github/skip_nf_test.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/skip_nf_test.json b/.github/skip_nf_test.json index 443e1390..41eb3ca5 100644 --- a/.github/skip_nf_test.json +++ b/.github/skip_nf_test.json @@ -1,8 +1,8 @@ { "conda": [ "modules/msk/custom/fingerprintvcfparser", - "modules/msk/custom/fingerprintcontamination", - "modules/msk/custom/fingerprintcombine", + "modules/msk/custom/fingerprintcontamination", + "modules/msk/custom/fingerprintcombine", "modules/msk/calculatenoise", "modules/msk/ppflagfixer", "modules/msk/facets", From e5fb9eda245ecc87f5a59648b9ca5ff660ff73f5 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 16 Dec 2025 23:26:09 -0500 Subject: [PATCH 25/52] fix file formatting and spacing --- .../fingerprintcombine/tests/loci_mapping.tsv | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv b/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv index 7592a2e3..0339b805 100644 --- a/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv +++ b/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv @@ -1,10 +1,10 @@ -GRCH37_CHROM GRCH37_POS GRCH38_CHROM GRCH38_POS -MT192765.1 197 MT192765.1 199 -MT192765.1 4788 MT192765.1 4900 -MT192765.1 8236 MT192765.1 8257 -MT192765.1 10506 MT192765.1 10528 -MT192765.1 11037 MT192765.1 11059 -MT192765.1 15009 MT192765.1 15500 -MT192765.1 18807 MT192765.1 18929 -MT192765.1 23813 MT192765.1 24835 -MT192765.1 24103 MT192765.1 25125 +GRCH37_CHROM GRCH37_POS GRCH38_CHROM GRCH38_POS +MT192765.1 197 MT192765.1 199 +MT192765.1 4788 MT192765.1 4900 +MT192765.1 8236 MT192765.1 8257 +MT192765.1 10506 MT192765.1 10528 +MT192765.1 11037 MT192765.1 11059 +MT192765.1 15009 MT192765.1 15500 +MT192765.1 18807 MT192765.1 18929 +MT192765.1 23813 MT192765.1 24835 +MT192765.1 24103 MT192765.1 25125 From 0c3c341de7152f696929ad86e5a4b4805f8333d3 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 16 Dec 2025 23:27:11 -0500 Subject: [PATCH 26/52] add meta map to custom/fingerprintcombine --- modules/msk/custom/fingerprintcombine/main.nf | 8 +++++--- .../msk/custom/fingerprintcombine/tests/main.nf.test | 11 ++++++----- subworkflows/msk/fingerprint_gbcms_batch/main.nf | 10 +++++++--- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/modules/msk/custom/fingerprintcombine/main.nf b/modules/msk/custom/fingerprintcombine/main.nf index be6b873d..175a140c 100644 --- a/modules/msk/custom/fingerprintcombine/main.nf +++ b/modules/msk/custom/fingerprintcombine/main.nf @@ -1,4 +1,5 @@ process CUSTOM_FINGERPRINTCOMBINE { + tag '$meta.id' label 'process_single' conda "${moduleDir}/environment.yml" @@ -9,14 +10,15 @@ process CUSTOM_FINGERPRINTCOMBINE { input: - tuple path(fp_tsv), // list of paths to fingerprint TSV files + tuple val(meta), + path(fp_tsv), // list of paths to fingerprint TSV files val(sample), // list of sample identifiers, one per TSV file, in the same order val(genome_build) // list of genome builds, one per TSV file, in the same order path(liftover_loci_mapping) output: - path "*DPfilter_ALL_FP.txt", emit: combined_fp_tsv - path "versions.yml" , emit: versions + tuple val(meta), path("*DPfilter_ALL_FP.txt"), emit: combined_fp_tsv + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/msk/custom/fingerprintcombine/tests/main.nf.test b/modules/msk/custom/fingerprintcombine/tests/main.nf.test index 11176e34..005f6a8c 100644 --- a/modules/msk/custom/fingerprintcombine/tests/main.nf.test +++ b/modules/msk/custom/fingerprintcombine/tests/main.nf.test @@ -22,14 +22,14 @@ nextflow_process { """ input[0] = Channel.of( [ - [ id:'test', sample:'test' ], // meta map + [ id:'test', sample:'test', pool:'mypool' ], // meta map file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), "variant_file.vcf" ], [ - [ id:'test2', sample:'test2' ], // meta map + [ id:'test2', sample:'test2', pool:'mypool' ], // meta map file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), @@ -55,9 +55,10 @@ nextflow_process { process { """ input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv - .map{ meta, tsv -> ["placeholder",tsv, meta.id, "hg19"] } - .groupTuple(by:[0]) - .map{ placeholder, tsv, sampleid, genome -> [tsv, sampleid, genome] } + .map{ meta, tsv -> + def meta2 = [id:meta.pool] + [[id:meta.pool], tsv, meta.id, "hg19"] + }.groupTuple(by:[0]) input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) """ } diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf index ee16fcd6..2b42fe0c 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/main.nf +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -15,9 +15,13 @@ workflow FINGERPRINT_GBCMS_BATCH { CUSTOM_FINGERPRINTCOMBINE( ch_fp - .map{ meta, tsv -> ["placeholder", tsv, meta.id, meta.genome ?: default_genome ] } - .groupTuple(by:[0]) - .map{ placeholder, tsv, sampleid, genome -> [tsv, sampleid, genome] }, + .map{ meta, tsv -> + def meta2 = [id:'defaultbatch'] + if (meta.pool) { + meta2.id = meta.pool + } + [meta2, tsv, meta.id, meta.genome ?: default_genome ] + }.groupTuple(by:[0]), ch_liftover_loci_mapping.first() ) ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTCOMBINE.out.versions.first()) From 3cc109ef3f4840d5a571b767cd2ee0623808179a Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 17 Dec 2025 19:10:50 -0500 Subject: [PATCH 27/52] add custom/fingerprintcorrelation --- modules/msk/custom/fingerprintcombine/main.nf | 19 +- .../msk/custom/fingerprintcombine/meta.yml | 93 +++++--- .../resources/usr/bin/complete_FP_table.R | 2 +- .../fingerprintcombine/tests/main.nf.test | 3 +- .../tests/main.nf.test.snap | 68 ++++-- .../custom/fingerprintcontamination/main.nf | 12 +- .../custom/fingerprintcontamination/meta.yml | 21 +- .../tests/main.nf.test.snap | 40 +++- .../fingerprintcorrelation/environment.yml | 18 ++ .../msk/custom/fingerprintcorrelation/main.nf | 41 ++++ .../custom/fingerprintcorrelation/meta.yml | 51 +++++ .../resources/usr/bin/plot_gbcm.R | 209 ++++++++++++++++++ .../fingerprintcorrelation/tests/main.nf.test | 109 +++++++++ .../tests/main.nf.test.snap | 50 +++++ .../tests/nextflow.config | 8 + .../msk/custom/fingerprintvcfparser/main.nf | 12 +- .../msk/custom/fingerprintvcfparser/meta.yml | 52 ++--- .../usr/bin/parse_fingerprint_vcf.py | 3 +- .../tests/main.nf.test.snap | 40 +++- modules/msk/gbcms/main.nf | 10 +- modules/msk/gbcms/meta.yml | 63 ++++-- modules/msk/gbcms/tests/main.nf.test.snap | 20 +- subworkflows/msk/fingerprint_gbcms/main.nf | 8 - subworkflows/msk/fingerprint_gbcms/meta.yml | 2 + .../msk/fingerprint_gbcms/tests/main.nf.test | 1 + .../fingerprint_gbcms/tests/main.nf.test.snap | 32 ++- .../msk/fingerprint_gbcms_batch/main.nf | 13 +- .../msk/fingerprint_gbcms_batch/meta.yml | 3 +- .../tests/main.nf.test | 1 + .../tests/main.nf.test.snap | 26 ++- 30 files changed, 803 insertions(+), 227 deletions(-) create mode 100644 modules/msk/custom/fingerprintcorrelation/environment.yml create mode 100644 modules/msk/custom/fingerprintcorrelation/main.nf create mode 100644 modules/msk/custom/fingerprintcorrelation/meta.yml create mode 100755 modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R create mode 100644 modules/msk/custom/fingerprintcorrelation/tests/main.nf.test create mode 100644 modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap create mode 100644 modules/msk/custom/fingerprintcorrelation/tests/nextflow.config diff --git a/modules/msk/custom/fingerprintcombine/main.nf b/modules/msk/custom/fingerprintcombine/main.nf index 175a140c..121beb5f 100644 --- a/modules/msk/custom/fingerprintcombine/main.nf +++ b/modules/msk/custom/fingerprintcombine/main.nf @@ -10,15 +10,12 @@ process CUSTOM_FINGERPRINTCOMBINE { input: - tuple val(meta), - path(fp_tsv), // list of paths to fingerprint TSV files - val(sample), // list of sample identifiers, one per TSV file, in the same order - val(genome_build) // list of genome builds, one per TSV file, in the same order + tuple val(meta), path(fp_tsv), val(sample), val(genome_build) path(liftover_loci_mapping) output: - tuple val(meta), path("*DPfilter_ALL_FP.txt"), emit: combined_fp_tsv - path "versions.yml" , emit: versions + tuple val(meta), path("*DPfilter_ALL_FP.txt") , emit: combined_fp_tsv + tuple val("${task.process}"), val('complete_FP_table.R'), val('0.1.0'), emit: versions_fingerprintcombine, topic: versions when: task.ext.when == null || task.ext.when @@ -44,11 +41,6 @@ process CUSTOM_FINGERPRINTCOMBINE { -i input.tsv \\ -l $liftover_loci_mapping \\ $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - complete_FP_table.R: 0.1.0 - END_VERSIONS """ stub: @@ -58,10 +50,5 @@ process CUSTOM_FINGERPRINTCOMBINE { echo $args touch XDPfilter_ALL_FP.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - complete_FP_table.R: 0.1.0 - END_VERSIONS """ } diff --git a/modules/msk/custom/fingerprintcombine/meta.yml b/modules/msk/custom/fingerprintcombine/meta.yml index 52b4ed5a..7ed95b68 100644 --- a/modules/msk/custom/fingerprintcombine/meta.yml +++ b/modules/msk/custom/fingerprintcombine/meta.yml @@ -1,55 +1,82 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -# # TODO nf-core: Add a description of the module and list keywords name: "custom_fingerprintcombine" description: | A module to combine multiple fingerprint TSV files into a single comprehensive table, with optional liftover of loci coordinates. keywords: -- fingerprint -- qc -- loci -- tsv -- correlation + - fingerprint + - qc + - loci + - tsv + - correlation tools: -## TODO nf-core: Add a description and other details for the software below -- "custom": - description: "A custom R script to combine fingerprint TSV files" - homepage: "https://github.com/mskcc-omics-workflows/modules/tree/main/modules/msk/custom/fingerprintcombine/meta.yml" - + - "custom": + description: "A custom R script to combine fingerprint TSV files" + homepage: "https://github.com/mskcc-omics-workflows/modules/tree/main/modules/msk/custom/fingerprintcombine/meta.yml" + identifier: "" input: - - - fp_tsv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - fp_tsv: type: file description: | Fingerprint TSV files to be combined. Structure: [ val(sample), val(genome_build), path(fp_tsv) ] + ontologies: [] - sample: type: string description: Sample identifier corresponding to each fingerprint TSV file. - genome_build: type: string - description: Genome build (e.g., hg19, hg38) corresponding to each fingerprint TSV file. - - liftover_loci_mapping: - type: file - description: | - A TSV file mapping original loci to liftover loci. - Format: original_chr, original_pos, liftover_chr, liftover_pos - pattern: "*.tsv" -output: - combined_fp_tsv: - - "*DPfilter_ALL_FP.txt": + description: + Genome build (e.g., hg19, hg38) corresponding to each fingerprint + TSV file. + - - liftover_loci_mapping: type: file - description: Wide table combining all input fingerprint TSV files. - pattern: '*DPfilter_ALL_FP.txt' + description: | + A TSV file mapping original loci to liftover loci. + Format: original_chr, original_pos, liftover_chr, liftover_pos + pattern: "*.tsv" ontologies: - - edam: http://edamontology.org/format_3750 # TSV + - edam: http://edamontology.org/format_3475 # TSV +output: + combined_fp_tsv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*DPfilter_ALL_FP.txt": + type: file + description: Wide table combining all input fingerprint TSV files. + pattern: "*DPfilter_ALL_FP.txt" + ontologies: + - edam: http://edamontology.org/format_3750 # TSV + versions_fingerprintcombine: + - - ${task.process}: + type: string + description: The name of the process + - complete_FP_table.R: + type: string + description: The name of the tool + - 0.1.0: + type: string + description: Version of the custom script +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: versions.yml - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The name of the process + - complete_FP_table.R: + type: string + description: The name of the tool + - 0.1.0: + type: string + description: Version of the custom script authors: -- "@anoronh4" + - "@anoronh4" maintainers: -- "@anoronh4" + - "@anoronh4" diff --git a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R index 3265f903..7a8e3ad5 100755 --- a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R +++ b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R @@ -49,7 +49,7 @@ hg19_hg38_mapper$Loci_hg38 = paste(hg19_hg38_mapper$GRCH38_CHROM,hg19_hg38_mappe hg19_hg38_mapper = hg19_hg38_mapper %>% select(Loci_hg19, Loci_hg38) %>% unique() message("Loading Samples") -input_table = fread(args$input_table, header = T) +input_table = fread(args$input_table, header = T) %>% arrange(sample_id) for (i in 1:nrow(input_table)){ sample = input_table$sample_id[i] genome_build = input_table$genome_build[i] diff --git a/modules/msk/custom/fingerprintcombine/tests/main.nf.test b/modules/msk/custom/fingerprintcombine/tests/main.nf.test index 005f6a8c..03b3388b 100644 --- a/modules/msk/custom/fingerprintcombine/tests/main.nf.test +++ b/modules/msk/custom/fingerprintcombine/tests/main.nf.test @@ -56,7 +56,7 @@ nextflow_process { """ input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv .map{ meta, tsv -> - def meta2 = [id:meta.pool] + println meta [[id:meta.pool], tsv, meta.id, "hg19"] }.groupTuple(by:[0]) input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) @@ -82,6 +82,7 @@ nextflow_process { process { """ input[0] = [ + [id:"testsample"], [file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)], ["testsample"], ["hg19"] diff --git a/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap index e8576d2f..68a0b5c4 100644 --- a/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap +++ b/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap @@ -3,46 +3,82 @@ "content": [ { "0": [ - "XDPfilter_ALL_FP.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + { + "id": "testsample" + }, + "XDPfilter_ALL_FP.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ], "1": [ - "versions.yml:md5,3bd40a0fd11a907f31110dd113fd88c2" + [ + "CUSTOM_FINGERPRINTCOMBINE", + "complete_FP_table.R", + "0.1.0" + ] ], "combined_fp_tsv": [ - "XDPfilter_ALL_FP.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + { + "id": "testsample" + }, + "XDPfilter_ALL_FP.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ], - "versions": [ - "versions.yml:md5,3bd40a0fd11a907f31110dd113fd88c2" + "versions_fingerprintvcfparser": [ + [ + "CUSTOM_FINGERPRINTCOMBINE", + "complete_FP_table.R", + "0.1.0" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-25T22:47:09.499353594" + "timestamp": "2025-12-17T13:28:48.061258305" }, "sarscov2 - bam": { "content": [ { "0": [ - "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + [ + { + "id": "mypool" + }, + "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" + ] ], "1": [ - "versions.yml:md5,3bd40a0fd11a907f31110dd113fd88c2" + [ + "CUSTOM_FINGERPRINTCOMBINE", + "complete_FP_table.R", + "0.1.0" + ] ], "combined_fp_tsv": [ - "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + [ + { + "id": "mypool" + }, + "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" + ] ], - "versions": [ - "versions.yml:md5,3bd40a0fd11a907f31110dd113fd88c2" + "versions_fingerprintvcfparser": [ + [ + "CUSTOM_FINGERPRINTCOMBINE", + "complete_FP_table.R", + "0.1.0" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-25T22:47:03.219089934" + "timestamp": "2025-12-17T13:28:39.908034467" } } \ No newline at end of file diff --git a/modules/msk/custom/fingerprintcontamination/main.nf b/modules/msk/custom/fingerprintcontamination/main.nf index bbdcdb51..4d48deda 100644 --- a/modules/msk/custom/fingerprintcontamination/main.nf +++ b/modules/msk/custom/fingerprintcontamination/main.nf @@ -12,8 +12,8 @@ process CUSTOM_FINGERPRINTCONTAMINATION { tuple val(meta), path(fp_tumor), path(fp_normal) output: - tuple val(meta), path("*.contamination.tsv"), emit: contamination_tsv - path "versions.yml" , emit: versions + tuple val(meta), path("*.contamination.tsv") , emit: contamination_tsv + tuple val("${task.process}"), val('calculate_contamination.py'), eval('calculate_contamination.py -v | cut -f 2 -d" "'), emit: versions_fingerprintvcfparser, topic: versions when: task.ext.when == null || task.ext.when @@ -28,10 +28,6 @@ process CUSTOM_FINGERPRINTCONTAMINATION { -o ${prefix}.contamination.tsv \\ ${args} - cat <<-END_VERSIONS > versions.yml - "${task.process}": - calculate_contamination.py: \$( calculate_contamination.py --version | rev | cut -f 1 -d " " | rev ) - END_VERSIONS """ stub: @@ -39,9 +35,5 @@ process CUSTOM_FINGERPRINTCONTAMINATION { """ touch ${prefix}.contamination.tsv - cat <<-END_VERSIONS > versions.yml - "${task.process}": - calculate_contamination.py: \$( calculate_contamination.py --version | rev | cut -f 1 -d " " | rev ) - END_VERSIONS """ } diff --git a/modules/msk/custom/fingerprintcontamination/meta.yml b/modules/msk/custom/fingerprintcontamination/meta.yml index 4fde47a5..162fff70 100644 --- a/modules/msk/custom/fingerprintcontamination/meta.yml +++ b/modules/msk/custom/fingerprintcontamination/meta.yml @@ -11,11 +11,13 @@ tools: description: "Python Data Analysis Library" homepage: "https://pandas.pydata.org/" documentation: "https://pandas.pydata.org/docs/" + identifier: biotools:pandas - "numpy": description: "Scientific computing library for Python" homepage: "https://numpy.org/" documentation: "https://numpy.org/doc/" + identifier: biotools:numpy input: - - meta: type: map @@ -27,7 +29,8 @@ input: description: Fingerprint table file for tumor sample pattern: "*.fp.tsv" ontologies: - - edam: "http://edamontology.org/format_3750" # TSV + - edam: "http://edamontology.org/format_3750" # TSV + - edam: http://edamontology.org/format_3475 # TSV - fp_normal: type: file description: Fingerprint table file for normal sample @@ -35,9 +38,10 @@ input: ontologies: - edam: "http://edamontology.org/format_3750" + - edam: http://edamontology.org/format_3475 # TSV output: - - contamination_tsv: - - meta: + contamination_tsv: + - - meta: type: map description: | Groovy Map containing sample information @@ -48,12 +52,11 @@ output: pattern: "*.contamination.tsv" ontologies: - edam: "http://edamontology.org/format_3750" # TSV - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - + - edam: http://edamontology.org/format_3475 # TSV + versions_fingerprintvcfparser: + - - ${task.process}: {} + - calculate_contamination.py: {} + - 'calculate_contamination.py -v | cut -f 2 -d" ': {} authors: - "@anoronh4" maintainers: diff --git a/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap index 5b51c22a..233a4680 100644 --- a/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap +++ b/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap @@ -12,7 +12,11 @@ ] ], "1": [ - "versions.yml:md5,904a4c1ae690600f67c1ceb3d72c5ce1" + [ + "CUSTOM_FINGERPRINTCONTAMINATION", + "calculate_contamination.py", + "" + ] ], "contamination_tsv": [ [ @@ -23,16 +27,20 @@ "test.contamination.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,904a4c1ae690600f67c1ceb3d72c5ce1" + "versions_fingerprintvcfparser": [ + [ + "CUSTOM_FINGERPRINTCONTAMINATION", + "calculate_contamination.py", + "" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-26T10:33:23.354208776" + "timestamp": "2025-12-17T13:12:25.869022442" }, "sarscov2 - bam": { "content": [ @@ -47,7 +55,11 @@ ] ], "1": [ - "versions.yml:md5,904a4c1ae690600f67c1ceb3d72c5ce1" + [ + "CUSTOM_FINGERPRINTCONTAMINATION", + "calculate_contamination.py", + "" + ] ], "contamination_tsv": [ [ @@ -58,15 +70,19 @@ "test.contamination.tsv:md5,5b533c60b8eff1f4d2c5fe58a8262303" ] ], - "versions": [ - "versions.yml:md5,904a4c1ae690600f67c1ceb3d72c5ce1" + "versions_fingerprintvcfparser": [ + [ + "CUSTOM_FINGERPRINTCONTAMINATION", + "calculate_contamination.py", + "" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-26T10:33:12.245205382" + "timestamp": "2025-12-17T13:12:16.153445117" } } \ No newline at end of file diff --git a/modules/msk/custom/fingerprintcorrelation/environment.yml b/modules/msk/custom/fingerprintcorrelation/environment.yml new file mode 100644 index 00000000..acabcada --- /dev/null +++ b/modules/msk/custom/fingerprintcorrelation/environment.yml @@ -0,0 +1,18 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::r-argparse=2.3.1 + - conda-forge::r-data.table=1.17.8 + - conda-forge::r-dplyr=1.1.4 + - conda-forge::r-ggforce=0.5.0 + - conda-forge::r-ggiraph=0.8.12 + - conda-forge::r-gtools=3.9.5 + - conda-forge::r-htmlwidgets=1.6.4 + - conda-forge::r-plotly=4.11.0 + - conda-forge::r-plyr=1.8.9 + - conda-forge::r-reshape2=1.4.4 + - conda-forge::r-scales=1.4.0 + - conda-forge::r-tidyverse=2.0.0 diff --git a/modules/msk/custom/fingerprintcorrelation/main.nf b/modules/msk/custom/fingerprintcorrelation/main.nf new file mode 100644 index 00000000..eafc51a7 --- /dev/null +++ b/modules/msk/custom/fingerprintcorrelation/main.nf @@ -0,0 +1,41 @@ +process CUSTOM_FINGERPRINTCORRELATION { + tag {'$prefix'} + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/r-argparse_r-data.table_r-dplyr_r-ggforce_pruned:5c045bc9fea1dbd5': + 'community.wave.seqera.io/library/r-argparse_r-data.table_r-dplyr_r-ggforce_pruned:5c045bc9fea1dbd5' } " + // 'oras://community.wave.seqera.io/library/r-argparse_r-data.table_r-dplyr_r-ggforce_pruned:8211a2010a4712ea': + + input: + tuple val(meta), path(combined_fp_tsv) + + output: + tuple val(meta), path("*_gbcm_sample-to-sample4.pdf"), emit: heatmap_pdf + tuple val(meta), path("*_interactive4.html"), emit: heatmap_html + tuple val(meta), path("*_observations.tab"), emit: observations_tab + tuple val("${task.process}"), val('plot_gbcm.R'), val("0.1.0"), topic: versions, emit: versions_fingerprintcorrelation + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = meta.id ?: "batch" + """ + plot_gbcm.R \\ + -t ${combined_fp_tsv} \\ + -o ./ \\ + -p ${prefix} + """ + + stub: + def args = task.ext.args ?: '' + def prefix = meta.id ?: "batch" + """ + touch ${prefix}_gbcm_sample-to-sample4.pdf + touch ${prefix}_interactive4.html + touch ${prefix}_observations.tab + """ +} diff --git a/modules/msk/custom/fingerprintcorrelation/meta.yml b/modules/msk/custom/fingerprintcorrelation/meta.yml new file mode 100644 index 00000000..8e5e1d37 --- /dev/null +++ b/modules/msk/custom/fingerprintcorrelation/meta.yml @@ -0,0 +1,51 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "custom_fingerprintcorrelation" +description: null +keywords: + - sort + - example + - genomics +tools: + - "custom": + description: "" + homepage: "" + documentation: "" + tool_dev_url: "" + doi: "" + licence: null + identifier: null + +input: + - - meta: {} + - combined_fp_tsv: {} +output: + heatmap_pdf: + - - meta: {} + - "*_gbcm_sample-to-sample4.pdf": {} + heatmap_html: + - - meta: {} + - "*_interactive4.html": {} + observations_tab: + - - meta: {} + - "*_observations.tab": {} + versions_fingerprintcorrelation: + - - ${task.process}: + type: string + description: The name of the process + - plot_gbcm.R: {} + - 0.1.0: {} +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - plot_gbcm.R: + type: string + description: The name of the tool + - 0.1.0: + type: eval + description: The expression to obtain the version of the tool +authors: + - "@anoronh4" +maintainers: + - "@anoronh4" diff --git a/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R b/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R new file mode 100755 index 00000000..5ba3460c --- /dev/null +++ b/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R @@ -0,0 +1,209 @@ +#!/usr/bin/env Rscript +#------------------------------------------------------------------------------- +# Script: plot_gbcm.R +# Author: Hanan Salim +# Date: 2025-11-03 +# Version: 0.1.0 +# +# Description: This script takes in a wide fingerprinting table pertaining +# to multiple samples and plots in pdf and html formats. +# Additionally, a table with the number of observations for each correlation +# is also written to an output file. +# +#------------------------------------------------------------------------------- + + +rm(list=ls()) + +library(argparse, quietly = T) +library(plyr, quietly = T) +library(dplyr, quietly = T) +library(data.table, quietly = T) +library(tidyverse, quietly = T) +library(scales, quietly = T) +library(ggforce, quietly = T) +library(gtools, quietly = T) +library(plotly) +library(htmlwidgets) +library(ggiraph) +library(reshape2) + +`%notin%` <- Negate(`%in%`) +`%notlike%` <- Negate(`%like%`) + +parser = ArgumentParser(description = 'create correlation plots for a given sample') + +parser$add_argument('-t', '--table', required = TRUE, + help = 'summary table') + +parser$add_argument('-o', '--analysis_folder', required = TRUE, + help = 'output folder') + +parser$add_argument('-p', '--pool', required = TRUE, + help = 'pool ID') + +args = parser$parse_args() + +all_fp_gbcm_final = fread(args$table, sep = '\t') +outdir = args$analysis_folder +sample = args$pool + +all_fp_gbcm_final <- all_fp_gbcm_final %>% select(-contains(c('Loci_hg19', 'Loci_hg38'))) +cols <- grep("VAF", names(all_fp_gbcm_final), value = TRUE) +#print(class(all_fp_gbcm_final)) +all_fp_gbcm_final <- all_fp_gbcm_final[, ..cols] + +for ( col in 1:ncol(all_fp_gbcm_final)){ + colnames(all_fp_gbcm_final)[col] <- sub("VAF_", "", colnames(all_fp_gbcm_final)[col]) +} + +title = paste("Patient:", sample,"; ", nrow(all_fp_gbcm_final)," Loci used",sep = "") + +all_fp_gbcm_final_matrix <- data.matrix(all_fp_gbcm_final) +all_fp_gbcm_final_matrix = cor(as.matrix(all_fp_gbcm_final_matrix), method = c("pearson"), use = "pairwise.complete.obs") + +gbcm_data_long <- reshape2::melt(all_fp_gbcm_final_matrix) +gbcm_observation = crossprod(!is.na(all_fp_gbcm_final)) +gbcm_obs_long <- reshape2::melt(gbcm_observation) +gbcm_combo_data <- data.frame(gbcm_data_long, size = gbcm_obs_long$value) + +# plot +#pdf(paste(outdir,"/",sample,'_sample-to-sample.pdf', sep = ""), width = 25, height = 25) + +n_x <- length(unique(gbcm_combo_data$Var1)) +n_y <- length(unique(gbcm_combo_data$Var2)) + +# Define your plot size (in inches) +plot_width_in <- 20 +plot_height_in <- 20 + +# Convert to mm (1 inch = 25.4 mm) +plot_width_mm <- plot_width_in * 25.4 +plot_height_mm <- plot_height_in * 25.4 + +# Calculate tile size in mm +tile_width_mm <- plot_width_mm / n_x +tile_height_mm <- plot_height_mm / n_y + +# Max circle diameter (fits inside smallest tile dimension) +max_diameter_mm <- min(tile_width_mm, tile_height_mm) + +# Approximate max point size for geom_point (radius in mm) +max_point_size <- max_diameter_mm + +# Calculate log2 size column +gbcm_combo_data$log2_size <- log2(gbcm_combo_data$size) +#print(gbcm_combo_data$log2_size) + + +gbcm_combo_data$Var1 <- factor(gbcm_combo_data$Var1, levels = mixedsort(unique(gbcm_combo_data$Var1))) +gbcm_combo_data$Var2 <- factor(gbcm_combo_data$Var2, levels = mixedsort(unique(gbcm_combo_data$Var2))) + +p <- ggplot(gbcm_combo_data, aes(x = Var1, y = Var2)) + + geom_tile(color = "black", linewidth = 0.5, fill = NA) + + geom_point(aes(size = log2_size, fill = value), shape = 21, color = "black") + + #geom_text(aes(label = size), color = "white", size = 4) + + scale_x_discrete(limits = sort(levels(gbcm_combo_data$Var1))) + + scale_y_discrete(limits = sort(levels(gbcm_combo_data$Var2))) + + scale_fill_viridis_c( + name = "Correlation", + option = "viridis", + direction = -1, + alpha = 0.75, + begin = 0, + end = 1, + limits = c(-1, 1), + guide = guide_colorbar(direction = "vertical", + title.position = "top" + )) + + scale_size_continuous(limits = c(0, 14.2), # known max of log2(size) + range = c(0, max_point_size), + name = "Sites (log2)", + guide = guide_legend(direction = "vertical", + title.position = "top") + ) + + #scale_size_identity(name = "Sites (log2)", + # guide = guide_legend(direction = "vertical", + # title.position = "top"), + # breaks = rescale(c(2, 5, 10, 14.2), to = c(1, 10), from = c(0, max_log2)), + # labels = c("2", "5", "10", "14.2")) + + + #scale_size_continuous(name = "Sites (log2)", + #range = c(4, 32), + #guide = guide_legend(direction = "vertical", + #title.position = "top")) + + labs(title = title) + + theme_minimal() + + theme( + panel.grid = element_blank(), + axis.text.x = element_text(angle = 90, hjust = 1, size = 10, color = "black"), + axis.text.y = element_text(size = 10, color = "black"), + axis.title = element_blank(), + plot.title = element_text(hjust = 0.5, size = 20, margin = margin(b = 15)), + legend.position = "right", + legend.box = "horizontal", + legend.box.just = "left", + legend.title.align = 0.5, + legend.spacing.x = unit(1, "cm"), + aspect.ratio = 1 + ) + +p2 <- ggplot(gbcm_combo_data, aes(x = Var1, y = Var2)) + + geom_tile(color = "black", linewidth = 0.5, fill = NA) + + geom_point_interactive( + aes(size = log2_size, + fill = value, + tooltip = paste0( + "x: ", Var1, "\n", + "y: ", Var2, "\n", + "Size: ", size, "\n", + "Correlation: ", round(value, 2) + )), + shape = 21, + color = "black" + ) + + scale_x_discrete(limits = sort(levels(gbcm_combo_data$Var1))) + + scale_y_discrete(limits = sort(levels(gbcm_combo_data$Var2))) + + scale_fill_viridis_c( + name = "Correlation", + option = "viridis", + direction = -1, + alpha = 0.75, + begin = 0, + end = 1, + limits = c(-1, 1), + guide = guide_colorbar(direction = "vertical", + title.position = "top" + )) + + scale_size_continuous(limits = c(0, 14.2), # known max of log2(size) + range = c(0, max_point_size), + name = "Sites (log2)", + guide = guide_legend(direction = "vertical", + title.position = "top") + ) + + labs(title = title) + + theme_minimal() + + theme( + text = element_text(family = "Courier"), + panel.grid = element_blank(), + axis.text.x = element_text(angle = 90, hjust = 1, size = 10, color = "black"), + axis.text.y = element_text(size = 10, color = "black"), + axis.title = element_blank(), + plot.title = element_text(hjust = 0.5, size = 24, margin = margin(b = 15)), + legend.position = "right", + legend.box = "horizontal", + legend.box.just = "left", + legend.title.align = 0.5, + legend.spacing.x = unit(1, "cm"), + aspect.ratio = 1, + ) + + +pg = girafe(ggobj = p2, width_svg = 25, height_svg = 25, + options = list(opts_tooltip(css = "padding:5pt; font-size:16pt; color:white; background-color:black;"))) + +saveWidget(pg, paste(outdir,"/",sample,'_interactive4.html', sep = ""), selfcontained = TRUE) + + +ggsave(paste(outdir,"/",sample,'_gbcm_sample-to-sample4.pdf', sep = ""), plot = p, width = 25, height = 25, units = "in", device = cairo_pdf) +write.table(gbcm_observation, paste(outdir,"/",sample,'_observations.tab', sep = ''), sep = '\t') diff --git a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test new file mode 100644 index 00000000..e6231546 --- /dev/null +++ b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test @@ -0,0 +1,109 @@ +nextflow_process { + + name "Test Process CUSTOM_FINGERPRINTCORRELATION" + script "../main.nf" + process "CUSTOM_FINGERPRINTCORRELATION" + config "./nextflow.config" + + tag "modules" + tag "modules_msk" + tag "custom" + tag "custom/fingerprintcorrelation" + tag "custom/fingerprintcombine" + tag "gbcms" + tag "custom/fingerprintvcfparser" + + test("sarscov2 - bam") { + setup { + run("GBCMS"){ + script "../../../gbcms/main.nf" + process { + """ + input[0] = Channel.of( + [ + [ id:'test', sample:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ], + [ + [ id:'test2', sample:'test2' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ], + ) + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + run("CUSTOM_FINGERPRINTVCFPARSER"){ + script "../../fingerprintvcfparser/main.nf" + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + run("CUSTOM_FINGERPRINTCOMBINE"){ + script "../../fingerprintcombine/main.nf" + process { + """ + input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv + .map{ meta, tsv -> + def meta2 = [id:meta.pool] + [[id:meta.pool], tsv, meta.id, "hg19"] + }.groupTuple(by:[0]) + input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) + """ + } + } + } + when { + process { + """ + input[0] = CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.observations_tab, + process.out.versions_fingerprintcorrelation + ).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [[id:'thispool'], file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.observations_tab, + process.out.versions_fingerprintcorrelation + ).match() } + ) + } + + } + +} diff --git a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap new file mode 100644 index 00000000..98ee5274 --- /dev/null +++ b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap @@ -0,0 +1,50 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + [ + [ + { + "id": "thispool" + }, + "thispool_observations.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + [ + "CUSTOM_FINGERPRINTCORRELATION", + "plot_gbcm.R", + "0.1.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-17T12:40:10.210681589" + }, + "sarscov2 - bam": { + "content": [ + [ + [ + { + "id": null + }, + "batch_observations.tab:md5,858d6d115a4da81652bb98dcc8b8077f" + ] + ], + [ + [ + "CUSTOM_FINGERPRINTCORRELATION", + "plot_gbcm.R", + "0.1.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-17T12:40:03.812030372" + } +} \ No newline at end of file diff --git a/modules/msk/custom/fingerprintcorrelation/tests/nextflow.config b/modules/msk/custom/fingerprintcorrelation/tests/nextflow.config new file mode 100644 index 00000000..b676d906 --- /dev/null +++ b/modules/msk/custom/fingerprintcorrelation/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: 'CUSTOM_FINGERPRINTCOMBINE' { + ext.args = "-d 0" + } + withName: 'CUSTOM_FINGERPRINTVCFPARSER' { + ext.args = "-d 0" + } +} diff --git a/modules/msk/custom/fingerprintvcfparser/main.nf b/modules/msk/custom/fingerprintvcfparser/main.nf index 6a46b512..b5924ce8 100644 --- a/modules/msk/custom/fingerprintvcfparser/main.nf +++ b/modules/msk/custom/fingerprintvcfparser/main.nf @@ -11,8 +11,8 @@ process CUSTOM_FINGERPRINTVCFPARSER { tuple val(meta), path(vcf) output: - tuple val(meta), path("${prefix}.fp.tsv"), emit: tsv - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}.fp.tsv") , emit: tsv + tuple val("${task.process}"), val('parse_fingerprint_vcf.py'), eval('parse_fingerprint_vcf.py -v | cut -f 2 -d" "'), emit: versions_fingerprintvcfparser, topic: versions when: task.ext.when == null || task.ext.when @@ -27,10 +27,6 @@ process CUSTOM_FINGERPRINTVCFPARSER { --samplename ${prefix} \\ $args - cat <<-END_VERSIONS > versions.yml - "${task.process}": - parse_fingerprint_vcf.py: 0.1.0 - END_VERSIONS """ stub: @@ -41,9 +37,5 @@ process CUSTOM_FINGERPRINTVCFPARSER { touch ${prefix}.fp.tsv - cat <<-END_VERSIONS > versions.yml - "${task.process}": - parse_fingerprint_vcf.py: 0.1.0 - END_VERSIONS """ } diff --git a/modules/msk/custom/fingerprintvcfparser/meta.yml b/modules/msk/custom/fingerprintvcfparser/meta.yml index d4ae3ed7..922f1504 100644 --- a/modules/msk/custom/fingerprintvcfparser/meta.yml +++ b/modules/msk/custom/fingerprintvcfparser/meta.yml @@ -1,21 +1,24 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "custom_fingerprintvcfparser" -description: Custom script to parse fingerprint VCF files, generated by the GBCMS module. +description: + Custom script to parse fingerprint VCF files, generated by the GBCMS + module. keywords: -- custom -- fingerprint -- vcf -- pysam + - custom + - fingerprint + - vcf + - pysam tools: -- "custom": - description: "Pysam is a Python module for reading and manipulating SAM/BAM/VCF/BCF - files. It's a lightweight wrapper of the htslib C-API, the same one that powers - samtools, bcftools, and tabix." - homepage: "https://pysam.readthedocs.io/en/latest/api.html" - documentation: "https://pysam.readthedocs.io/en/latest/api.html" - tool_dev_url: "https://github.com/pysam-developers/pysam" - licence: ['MIT'] - identifier: biotools:pysam + - "custom": + description: + "Pysam is a Python module for reading and manipulating SAM/BAM/VCF/BCF + files. It's a lightweight wrapper of the htslib C-API, the same one that powers + samtools, bcftools, and tabix." + homepage: "https://pysam.readthedocs.io/en/latest/api.html" + documentation: "https://pysam.readthedocs.io/en/latest/api.html" + tool_dev_url: "https://github.com/pysam-developers/pysam" + licence: ["MIT"] + identifier: biotools:pysam input: - - meta: @@ -38,19 +41,18 @@ output: e.g. [ id:'test' ] - ${prefix}.fp.tsv: type: file - description: Tab-separated values (TSV) file containing parsed fingerprint data + description: + Tab-separated values (TSV) file containing parsed fingerprint + data pattern: "${prefix}.fp.tsv" ontologies: - - edam: http://edamontology.org/format_3475 # TSV + - edam: http://edamontology.org/format_3475 # TSV - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: versions.yml - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + versions_fingerprintvcfparser: + - - ${task.process}: {} + - parse_fingerprint_vcf.py: {} + - 'parse_fingerprint_vcf.py -v | cut -f 2 -d" ': {} authors: -- "@anoronh4" + - "@anoronh4" maintainers: -- "@anoronh4" + - "@anoronh4" diff --git a/modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py b/modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py index 851bd498..b4ddd044 100755 --- a/modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py +++ b/modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py @@ -15,11 +15,12 @@ from itertools import groupby def usage(): - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(prog='parse_fingerprint_vcf.py') parser.add_argument('--input','-i', help = 'input file', required = True) parser.add_argument('--samplename','-n', help = 'sample name', required = True) parser.add_argument('--output','-o', help = 'output file', required = True) parser.add_argument('--depth-filter','-d', default = 20, type = int, help = 'minimum read depth for outputting a minor allele frequency [default = 20]') + parser.add_argument('--version','-v',action='version',version='%(prog)s ' + __version__, help="Show program's version number and exit.") return parser.parse_args() def main(): diff --git a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap index 5751d885..fb734f9f 100644 --- a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap +++ b/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap @@ -12,7 +12,11 @@ ] ], "1": [ - "versions.yml:md5,c8c9b3fa1b9110ca83a71490a317f2fa" + [ + "CUSTOM_FINGERPRINTVCFPARSER", + "parse_fingerprint_vcf.py", + "0.1.0" + ] ], "tsv": [ [ @@ -23,16 +27,20 @@ "test.fp.tsv:md5,9fa9a081f17ee52f03463c96d46a23aa" ] ], - "versions": [ - "versions.yml:md5,c8c9b3fa1b9110ca83a71490a317f2fa" + "versions_fingerprintvcfparser": [ + [ + "CUSTOM_FINGERPRINTVCFPARSER", + "parse_fingerprint_vcf.py", + "0.1.0" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-08-12T15:07:39.656085692" + "timestamp": "2025-12-17T13:02:44.951823372" }, "sarscov2 - vcf - stub": { "content": [ @@ -47,7 +55,11 @@ ] ], "1": [ - "versions.yml:md5,c8c9b3fa1b9110ca83a71490a317f2fa" + [ + "CUSTOM_FINGERPRINTVCFPARSER", + "parse_fingerprint_vcf.py", + "0.1.0" + ] ], "tsv": [ [ @@ -58,15 +70,19 @@ "test.fp.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,c8c9b3fa1b9110ca83a71490a317f2fa" + "versions_fingerprintvcfparser": [ + [ + "CUSTOM_FINGERPRINTVCFPARSER", + "parse_fingerprint_vcf.py", + "0.1.0" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-08-12T15:07:46.098292727" + "timestamp": "2025-12-17T13:02:51.967429606" } } \ No newline at end of file diff --git a/modules/msk/gbcms/main.nf b/modules/msk/gbcms/main.nf index 38922559..bd0d8dbf 100644 --- a/modules/msk/gbcms/main.nf +++ b/modules/msk/gbcms/main.nf @@ -12,7 +12,7 @@ process GBCMS { output: tuple val(meta), path('*.{vcf,maf}'), emit: variant_file - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('gbcms'), eval("GetBaseCountsMultiSample --help | grep -oP '[0-9]\\.[0-9]\\.[0-9]'"), emit: versions_gbcms, topic: versions when: task.ext.when == null || task.ext.when @@ -44,10 +44,6 @@ process GBCMS { --output ${output} \\ --bam $sample:${bam} $args - cat <<-END_VERSIONS > versions.yml - "${task.process}": - GetBaseCountsMultiSample: \$(echo \$(GetBaseCountsMultiSample --help) | grep -oP '[0-9]\\.[0-9]\\.[0-9]') - END_VERSIONS """ stub: @@ -56,9 +52,5 @@ process GBCMS { """ touch variant_file.maf - cat <<-END_VERSIONS > versions.yml - "${task.process}": - GetBaseCountsMultiSample: 1.2.5 - END_VERSIONS """ } diff --git a/modules/msk/gbcms/meta.yml b/modules/msk/gbcms/meta.yml index 170a3e3c..a782f77a 100644 --- a/modules/msk/gbcms/meta.yml +++ b/modules/msk/gbcms/meta.yml @@ -14,10 +14,9 @@ tools: in a given VCF file or MAF file" homepage: "https://github.com/msk-access/GetBaseCountsMultiSample" documentation: "https://github.com/msk-access/GetBaseCountsMultiSample/blob/master/README.md" - identifier: "" + input: - # Only when we have meta - - meta: type: map description: | @@ -29,45 +28,65 @@ input: Input bam file, in the format of SAMPLE_NAME:BAM_FILE. This paramter need to be specified at least once pattern: "*.bam" + ontologies: [] - bambai: type: file description: Index of Bam pattern: "*.bai" + ontologies: [] - variant_file: type: file description: Input variant file in TCGA maf format. --maf or --vcf need to be specified at least once. But --maf and --vcf are mutually exclusive pattern: "*.{maf,vcf}" + ontologies: [] - output: type: string description: Output file - - - fasta: - type: file - description: Input reference sequence file - pattern: "*.fasta" - - - fastafai: - type: file - description: Index of the reference Fasta - pattern: "*.fai" + - fasta: + type: file + description: Input reference sequence file + pattern: "*.fasta" + ontologies: [] + - fastafai: + type: file + description: Index of the reference Fasta + pattern: "*.fai" + + ontologies: [] output: - - variant_file: - - meta: - type: file - description: - base counts in multiple BAM files for all the sites in a given - VCF file or MAF file - pattern: "*.{vcf,maf}" + variant_file: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` - "*.{vcf,maf}": type: file description: base counts in multiple BAM files for all the sites in a given VCF file or MAF file pattern: "*.{vcf,maf}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_gbcms: + - - ${task.process}: + type: string + description: The name of the process + - gbcms: + type: string + description: The name of the tool + - GetBaseCountsMultiSample --help | grep -oP '[0-9]\\.[0-9]\\.[0-9]': {} +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - gbcms: + type: string + description: The name of the tool + - GetBaseCountsMultiSample --help | grep -oP '[0-9]\\.[0-9]\\.[0-9]': + type: eval + description: The expression to obtain the version of the tool authors: - "@buehlere" diff --git a/modules/msk/gbcms/tests/main.nf.test.snap b/modules/msk/gbcms/tests/main.nf.test.snap index 31b547e2..60ff40c5 100644 --- a/modules/msk/gbcms/tests/main.nf.test.snap +++ b/modules/msk/gbcms/tests/main.nf.test.snap @@ -12,7 +12,11 @@ ] ], "1": [ - "versions.yml:md5,a94265ed3bc4b5631d85b9b9b5d2b7e5" + [ + "GBCMS", + "gbcms", + "1.2.4" + ] ], "variant_file": [ [ @@ -23,15 +27,19 @@ "variant_file.vcf:md5,28c8df33c7ea5ed5d1cf9997d8e00ffa" ] ], - "versions": [ - "versions.yml:md5,a94265ed3bc4b5631d85b9b9b5d2b7e5" + "versions_gbcms": [ + [ + "GBCMS", + "gbcms", + "1.2.4" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-02-13T17:19:51.302342" + "timestamp": "2025-12-17T12:07:13.813792199" } } \ No newline at end of file diff --git a/subworkflows/msk/fingerprint_gbcms/main.nf b/subworkflows/msk/fingerprint_gbcms/main.nf index 5f67f594..d6c90f14 100644 --- a/subworkflows/msk/fingerprint_gbcms/main.nf +++ b/subworkflows/msk/fingerprint_gbcms/main.nf @@ -18,8 +18,6 @@ workflow FINGERPRINT_GBCMS { main: - ch_versions = Channel.empty() - println ch_fp_loci_vcf.getClass() println ch_fasta.getClass() @@ -33,16 +31,13 @@ workflow FINGERPRINT_GBCMS { //ch_fasta.view().map{ if (it[0] instanceof Map){ it[1] } else { it }}.first(), //ch_fastafai.view().map{ if (it[0] instanceof Map){ it[1] } else { it }}.first() ) - ch_versions = ch_versions.mix(GBCMS.out.versions.first()) CUSTOM_FINGERPRINTVCFPARSER ( GBCMS.out.variant_file ) - ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTVCFPARSER.out.versions.first()) all_fps = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.mix(ch_fp_tsv) - paired_fps = all_fps .filter{ meta, tsv -> meta.case_id != null && meta.control_id != null && meta.id == meta.case_id } .combine(all_fps) @@ -57,7 +52,6 @@ workflow FINGERPRINT_GBCMS { .map{ meta, tsv -> [ meta, tsv, [] ] } CUSTOM_FINGERPRINTCONTAMINATION ( paired_fps.mix(unpaired_fps).view() ) - ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTCONTAMINATION.out.versions.first()) if (run_correlation) { FINGERPRINT_GBCMS_BATCH ( @@ -65,7 +59,6 @@ workflow FINGERPRINT_GBCMS { ch_liftover_loci_mapping, default_genome ) - ch_versions = ch_versions.mix(FINGERPRINT_GBCMS_BATCH.out.versions.first()) } else { FINGERPRINT_GBCMS_BATCH.out.combined_fp_tsv = Channel.empty() } @@ -74,6 +67,5 @@ workflow FINGERPRINT_GBCMS { fp_tsv = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] contamination_tsv = CUSTOM_FINGERPRINTCONTAMINATION.out.contamination_tsv // channel: [ val(meta), contamination_tsv ] combined_fp_tsv = FINGERPRINT_GBCMS_BATCH.out.combined_fp_tsv // channel: [ tsv ] - versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/msk/fingerprint_gbcms/meta.yml b/subworkflows/msk/fingerprint_gbcms/meta.yml index c0002dbb..8deac1af 100644 --- a/subworkflows/msk/fingerprint_gbcms/meta.yml +++ b/subworkflows/msk/fingerprint_gbcms/meta.yml @@ -13,6 +13,8 @@ keywords: components: - gbcms - custom/fingerprintvcfparser + - custom/fingerprintcontamination + - fingerprint_gbcms_batch input: - ch_bam: type: file diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test index 46a8bdd8..171495b5 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test @@ -11,6 +11,7 @@ nextflow_workflow { tag "subworkflows/fingerprint_gbcms_batch" tag "gbcms" tag "custom/fingerprintvcfparser" + tag "custom/fingerprintcontamination" test("sarscov2 - bam") { diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap index 492f6ee5..4f6067f4 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap @@ -35,16 +35,20 @@ ] ], "2": [ - "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" - ], - "3": [ - "versions.yml:md5,1d2160eb0eb102d00d5786a8d056328e", - "versions.yml:md5,41ff30ed71b1d19e95a6095b9ac3ca94", - "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8", - "versions.yml:md5,4fb29c6ff25ce4e29f6cf293a70aa8e6" + [ + { + "id": "defaultbatch" + }, + "0DPfilter_ALL_FP.txt:md5,21ecaf823768ac5d6787fa6a6b2aca37" + ] ], "combined_fp_tsv": [ - "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" + [ + { + "id": "defaultbatch" + }, + "0DPfilter_ALL_FP.txt:md5,21ecaf823768ac5d6787fa6a6b2aca37" + ] ], "contamination_tsv": [ [ @@ -77,19 +81,13 @@ }, "test2.fp.tsv:md5,c3fbcee584048e9bc4fc93bc6ca487d2" ] - ], - "versions": [ - "versions.yml:md5,1d2160eb0eb102d00d5786a8d056328e", - "versions.yml:md5,41ff30ed71b1d19e95a6095b9ac3ca94", - "versions.yml:md5,4b4b3ad40aa1c2c3a002c0c347c385b8", - "versions.yml:md5,4fb29c6ff25ce4e29f6cf293a70aa8e6" ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-26T11:05:45.091814897" + "timestamp": "2025-12-17T13:30:35.667469411" } } \ No newline at end of file diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf index 2b42fe0c..e0df5c92 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/main.nf +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -1,5 +1,5 @@ - -include { CUSTOM_FINGERPRINTCOMBINE } from '../../../modules/msk/custom/fingerprintcombine/main' +include { CUSTOM_FINGERPRINTCOMBINE } from '../../../modules/msk/custom/fingerprintcombine/main' +include { CUSTOM_FINGERPRINTCORRELATION } from '../../../modules/msk/custom/fingerprintcorrelation/main' workflow FINGERPRINT_GBCMS_BATCH { @@ -10,9 +10,6 @@ workflow FINGERPRINT_GBCMS_BATCH { main: - ch_versions = Channel.empty() - - CUSTOM_FINGERPRINTCOMBINE( ch_fp .map{ meta, tsv -> @@ -24,9 +21,11 @@ workflow FINGERPRINT_GBCMS_BATCH { }.groupTuple(by:[0]), ch_liftover_loci_mapping.first() ) - ch_versions = ch_versions.mix(CUSTOM_FINGERPRINTCOMBINE.out.versions.first()) + + CUSTOM_FINGERPRINTCORRELATION( + CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv + ) emit: combined_fp_tsv = CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv // channel: [ val(meta), [ bam ] ] - versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/msk/fingerprint_gbcms_batch/meta.yml b/subworkflows/msk/fingerprint_gbcms_batch/meta.yml index ca573350..1646d1c9 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/meta.yml +++ b/subworkflows/msk/fingerprint_gbcms_batch/meta.yml @@ -8,7 +8,8 @@ keywords: - batch - pool components: - - modules/msk/custom/fingerprintcombine + - custom/fingerprintcombine + - custom/fingerprintcorrelation input: - ch_fp: type: file diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test index 8500dcfd..c1bd3701 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test @@ -12,6 +12,7 @@ nextflow_workflow { tag "gbcms" tag "custom/fingerprintvcfparser" tag "custom/fingerprintcombine" + tag "custom/fingerprintcorrelation" test("sarscov2 - bam - single_end") { diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap index 7611bc83..3441d510 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap @@ -3,23 +3,27 @@ "content": [ { "0": [ - "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" - ], - "1": [ - "versions.yml:md5,bc54e025756d97cd9b14d51a3c9e3667" + [ + { + "id": "defaultbatch" + }, + "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" + ] ], "combined_fp_tsv": [ - "0DPfilter_ALL_FP.txt:md5,509d9f7c1d89b9f8e2825bcc4793da3a" - ], - "versions": [ - "versions.yml:md5,bc54e025756d97cd9b14d51a3c9e3667" + [ + { + "id": "defaultbatch" + }, + "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-25T23:49:26.050835746" + "timestamp": "2025-12-17T13:30:57.724162129" } } \ No newline at end of file From 54fff1bcae4ce676a70b20276bb91e70697a354c Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 17 Dec 2025 20:28:30 -0500 Subject: [PATCH 28/52] fix failing test --- .../msk/custom/fingerprintcombine/tests/main.nf.test.snap | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap index 68a0b5c4..85f90edd 100644 --- a/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap +++ b/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap @@ -25,7 +25,7 @@ "XDPfilter_ALL_FP.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions_fingerprintvcfparser": [ + "versions_fingerprintcombine": [ [ "CUSTOM_FINGERPRINTCOMBINE", "complete_FP_table.R", @@ -38,7 +38,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2025-12-17T13:28:48.061258305" + "timestamp": "2025-12-17T20:26:07.925718004" }, "sarscov2 - bam": { "content": [ @@ -66,7 +66,7 @@ "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" ] ], - "versions_fingerprintvcfparser": [ + "versions_fingerprintcombine": [ [ "CUSTOM_FINGERPRINTCOMBINE", "complete_FP_table.R", @@ -79,6 +79,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2025-12-17T13:28:39.908034467" + "timestamp": "2025-12-17T20:25:58.985229402" } } \ No newline at end of file From 53a03e7847ddfff0de5a71564d6286fda1a33f74 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 17 Dec 2025 20:31:09 -0500 Subject: [PATCH 29/52] exclude one more module from conda tests --- .github/skip_nf_test.json | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/skip_nf_test.json b/.github/skip_nf_test.json index b2ec96b8..8b5688a8 100644 --- a/.github/skip_nf_test.json +++ b/.github/skip_nf_test.json @@ -3,6 +3,7 @@ "modules/msk/custom/fingerprintvcfparser", "modules/msk/custom/fingerprintcontamination", "modules/msk/custom/fingerprintcombine", + "modules/msk/custom/fingerprintcorrelation", "modules/msk/calculatenoise", "modules/msk/ppflagfixer", "modules/msk/facets", From 8a596c9921a7aa4f94e99e10671e57050501a296 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 18 Dec 2025 20:23:34 -0500 Subject: [PATCH 30/52] update custom/fingerprintcorrelation to output table with correlation values --- .../msk/custom/fingerprintcorrelation/main.nf | 8 +++++--- .../resources/usr/bin/plot_gbcm.R | 1 + .../fingerprintcorrelation/tests/main.nf.test | 2 ++ .../tests/main.nf.test.snap | 20 +++++++++++++++++-- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/modules/msk/custom/fingerprintcorrelation/main.nf b/modules/msk/custom/fingerprintcorrelation/main.nf index eafc51a7..fbd5cfeb 100644 --- a/modules/msk/custom/fingerprintcorrelation/main.nf +++ b/modules/msk/custom/fingerprintcorrelation/main.nf @@ -12,9 +12,10 @@ process CUSTOM_FINGERPRINTCORRELATION { tuple val(meta), path(combined_fp_tsv) output: - tuple val(meta), path("*_gbcm_sample-to-sample4.pdf"), emit: heatmap_pdf - tuple val(meta), path("*_interactive4.html"), emit: heatmap_html - tuple val(meta), path("*_observations.tab"), emit: observations_tab + tuple val(meta), path("*_gbcm_sample-to-sample4.pdf") , emit: heatmap_pdf + tuple val(meta), path("*_interactive4.html") , emit: heatmap_html + tuple val(meta), path("*_observations.tab") , emit: observations_tab + tuple val(meta), path("*_correlations.tab") , emit: correlations_tab tuple val("${task.process}"), val('plot_gbcm.R'), val("0.1.0"), topic: versions, emit: versions_fingerprintcorrelation when: @@ -37,5 +38,6 @@ process CUSTOM_FINGERPRINTCORRELATION { touch ${prefix}_gbcm_sample-to-sample4.pdf touch ${prefix}_interactive4.html touch ${prefix}_observations.tab + touch ${prefix}_correlations.tab """ } diff --git a/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R b/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R index 5ba3460c..8fbf5971 100755 --- a/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R +++ b/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R @@ -61,6 +61,7 @@ title = paste("Patient:", sample,"; ", nrow(all_fp_gbcm_final)," Loci used",sep all_fp_gbcm_final_matrix <- data.matrix(all_fp_gbcm_final) all_fp_gbcm_final_matrix = cor(as.matrix(all_fp_gbcm_final_matrix), method = c("pearson"), use = "pairwise.complete.obs") +write.table(all_fp_gbcm_final_matrix, paste(outdir,"/",sample,'_correlations.tab', sep = ''), sep = '\t',quote=F) gbcm_data_long <- reshape2::melt(all_fp_gbcm_final_matrix) gbcm_observation = crossprod(!is.na(all_fp_gbcm_final)) diff --git a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test index e6231546..b23d8356 100644 --- a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test +++ b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test @@ -74,6 +74,7 @@ nextflow_process { assert process.success assertAll( { assert snapshot( + process.out.correlations_tab, process.out.observations_tab, process.out.versions_fingerprintcorrelation ).match() } @@ -98,6 +99,7 @@ nextflow_process { assert process.success assertAll( { assert snapshot( + process.out.correlations_tab, process.out.observations_tab, process.out.versions_fingerprintcorrelation ).match() } diff --git a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap index 98ee5274..c261cb94 100644 --- a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap +++ b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap @@ -1,6 +1,14 @@ { "sarscov2 - bam - stub": { "content": [ + [ + [ + { + "id": "thispool" + }, + "thispool_correlations.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], [ [ { @@ -21,10 +29,18 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2025-12-17T12:40:10.210681589" + "timestamp": "2025-12-18T20:20:30.919363465" }, "sarscov2 - bam": { "content": [ + [ + [ + { + "id": null + }, + "batch_correlations.tab:md5,4622cb8a7eff25e7bbd28ed23b74b239" + ] + ], [ [ { @@ -45,6 +61,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2025-12-17T12:40:03.812030372" + "timestamp": "2025-12-18T20:20:24.501583538" } } \ No newline at end of file From 51047f1752fa13cdd98fe9aaaaa3b31a97b2c6af Mon Sep 17 00:00:00 2001 From: NoronhaA Date: Thu, 8 Jan 2026 22:58:19 -0500 Subject: [PATCH 31/52] add grouping logic for ordering samples in fingerprintcombine --- modules/msk/custom/fingerprintcombine/main.nf | 9 ++++++--- .../resources/usr/bin/complete_FP_table.R | 3 ++- subworkflows/msk/fingerprint_gbcms_batch/main.nf | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/modules/msk/custom/fingerprintcombine/main.nf b/modules/msk/custom/fingerprintcombine/main.nf index 121beb5f..47a32ba7 100644 --- a/modules/msk/custom/fingerprintcombine/main.nf +++ b/modules/msk/custom/fingerprintcombine/main.nf @@ -10,7 +10,7 @@ process CUSTOM_FINGERPRINTCOMBINE { input: - tuple val(meta), path(fp_tsv), val(sample), val(genome_build) + tuple val(meta), path(fp_tsv), val(sample), val(genome_build), val(group) path(liftover_loci_mapping) output: @@ -26,15 +26,18 @@ process CUSTOM_FINGERPRINTCOMBINE { declare -a fp_tsv_list declare -a sample_list declare -a genome_build_list + declare -a group_list fp_tsv_list=(${fp_tsv.join(' ')}) sample_list=(${sample.join(' ')}) genome_build_list=(${genome_build.join(' ')}) - echo -e "sample_id\tgenome_build\tfp_tsv" > input.tsv + group_list=(${group.join(' ')}) + echo -e "sample_id\tgenome_build\tfp_tsv\tgroup" > input.tsv for i in \$(seq 0 1 \$((\${#fp_tsv_list[@]}-1)) ) ; do fp_tsv=\${fp_tsv_list[i]} sample=\${sample_list[i]} genome=\${genome_build_list[i]} - echo -e "\$sample\t\$genome\t\$fp_tsv" + group=\${group_list[i]} + echo -e "\$sample\t\$genome\t\$fp_tsv\t\$group" done >> input.tsv complete_FP_table.R \\ diff --git a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R index 7a8e3ad5..b9d4567f 100755 --- a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R +++ b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R @@ -49,7 +49,7 @@ hg19_hg38_mapper$Loci_hg38 = paste(hg19_hg38_mapper$GRCH38_CHROM,hg19_hg38_mappe hg19_hg38_mapper = hg19_hg38_mapper %>% select(Loci_hg19, Loci_hg38) %>% unique() message("Loading Samples") -input_table = fread(args$input_table, header = T) %>% arrange(sample_id) +input_table = fread(args$input_table, header = T) %>% arrange(group, sample_id) for (i in 1:nrow(input_table)){ sample = input_table$sample_id[i] genome_build = input_table$genome_build[i] @@ -103,6 +103,7 @@ if (!dir.exists(args$analysis_folder)) { message(paste("Output file: ", args$analysis_folder,"/",args$depth_filter,"DPfilter_ALL_FP.txt", sep="")) +all_fp_gbcm_final <- apply(all_fp_gbcm_final,2,as.character) write.table(all_fp_gbcm_final, file = paste(args$analysis_folder,"/",args$depth_filter,"DPfilter_ALL_FP.txt", sep=""), append = F, sep = "\t", row.names = F, quote = F) message("FP file completed") diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf index e0df5c92..9f2e10c3 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/main.nf +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -17,7 +17,7 @@ workflow FINGERPRINT_GBCMS_BATCH { if (meta.pool) { meta2.id = meta.pool } - [meta2, tsv, meta.id, meta.genome ?: default_genome ] + [meta2, tsv, meta.id, meta.genome ?: default_genome, meta.group ?: "default" ] }.groupTuple(by:[0]), ch_liftover_loci_mapping.first() ) From f45b4d99e1a09a44a1a84f6b2a449bde5355b265 Mon Sep 17 00:00:00 2001 From: NoronhaA Date: Thu, 8 Jan 2026 23:01:08 -0500 Subject: [PATCH 32/52] add logic to handle exception for when denominator of fraction is zero --- .../resources/usr/bin/calculate_contamination.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py index 3f5ece93..23febaff 100755 --- a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py +++ b/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py @@ -23,7 +23,10 @@ def major_contamination(tumor, depth_filter): homozygous = ['AA','CC','GG','TT','A','C','G','T'] heterozygous = ~tumor_filtered['Genotype'].isin(homozygous) - return sum(heterozygous)/tumor_filtered.shape[0] + try: + return sum(heterozygous)/tumor_filtered.shape[0] + except Exception as e: + return 0 def get_coverage(file, depth_filter): #print(file['Alleles'].str.split(' ', expand=True)) From 18db500ec5f5f3a1fdae3787ff15bb76f11839d1 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 8 Jan 2026 23:38:30 -0500 Subject: [PATCH 33/52] fix failing nf-tests --- modules/msk/custom/fingerprintcombine/tests/main.nf.test | 5 +++-- .../msk/custom/fingerprintcorrelation/tests/main.nf.test | 2 +- subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/modules/msk/custom/fingerprintcombine/tests/main.nf.test b/modules/msk/custom/fingerprintcombine/tests/main.nf.test index 03b3388b..0cb6e4d9 100644 --- a/modules/msk/custom/fingerprintcombine/tests/main.nf.test +++ b/modules/msk/custom/fingerprintcombine/tests/main.nf.test @@ -57,7 +57,7 @@ nextflow_process { input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv .map{ meta, tsv -> println meta - [[id:meta.pool], tsv, meta.id, "hg19"] + [[id:meta.pool], tsv, meta.id, "hg19","default"] }.groupTuple(by:[0]) input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) """ @@ -85,7 +85,8 @@ nextflow_process { [id:"testsample"], [file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)], ["testsample"], - ["hg19"] + ["hg19"], + ["default"] ] input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) """ diff --git a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test index b23d8356..00d54193 100644 --- a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test +++ b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test @@ -55,7 +55,7 @@ nextflow_process { input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv .map{ meta, tsv -> def meta2 = [id:meta.pool] - [[id:meta.pool], tsv, meta.id, "hg19"] + [[id:meta.pool], tsv, meta.id, "hg19", "default"] }.groupTuple(by:[0]) input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) """ diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap index 4f6067f4..c0e01878 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap @@ -39,7 +39,7 @@ { "id": "defaultbatch" }, - "0DPfilter_ALL_FP.txt:md5,21ecaf823768ac5d6787fa6a6b2aca37" + "0DPfilter_ALL_FP.txt:md5,2b376a207fd1bd6bec55fa765e3a3947" ] ], "combined_fp_tsv": [ @@ -47,7 +47,7 @@ { "id": "defaultbatch" }, - "0DPfilter_ALL_FP.txt:md5,21ecaf823768ac5d6787fa6a6b2aca37" + "0DPfilter_ALL_FP.txt:md5,2b376a207fd1bd6bec55fa765e3a3947" ] ], "contamination_tsv": [ @@ -88,6 +88,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2025-12-17T13:30:35.667469411" + "timestamp": "2026-01-08T23:23:53.894051098" } } \ No newline at end of file From d6ecf3c586ad88a583b08f19bf05db3358207a18 Mon Sep 17 00:00:00 2001 From: NoronhaA Date: Fri, 30 Jan 2026 14:28:11 -0500 Subject: [PATCH 34/52] put process tag in double quotes --- modules/msk/custom/fingerprintcombine/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/msk/custom/fingerprintcombine/main.nf b/modules/msk/custom/fingerprintcombine/main.nf index 47a32ba7..60abf46d 100644 --- a/modules/msk/custom/fingerprintcombine/main.nf +++ b/modules/msk/custom/fingerprintcombine/main.nf @@ -1,5 +1,5 @@ process CUSTOM_FINGERPRINTCOMBINE { - tag '$meta.id' + tag "$meta.id" label 'process_single' conda "${moduleDir}/environment.yml" From 6d6c0e1a3f59f7cd9f968dd88d886659c157b2fc Mon Sep 17 00:00:00 2001 From: NoronhaA Date: Fri, 30 Jan 2026 14:28:58 -0500 Subject: [PATCH 35/52] change method of adding a column in order to handle empty table --- .../fingerprintcombine/resources/usr/bin/complete_FP_table.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R index b9d4567f..aa2869ef 100755 --- a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R +++ b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R @@ -68,7 +68,8 @@ for (i in 1:nrow(input_table)){ temp_dataset$DP = as.numeric(temp_dataset$DP1) + as.numeric(temp_dataset$DP2) temp_dataset = temp_dataset[temp_dataset$DP >= args$depth_filter,] ## keeping loci >= 20 dp by default temp_dataset$VAF[is.na(temp_dataset$VAF)==T] <- 0 - temp_dataset$Sample = sample #only loci with DP >= depth filter will have Sample info + #temp_dataset$Sample = sample #only loci with DP >= depth filter will have Sample info + temp_dataset$Sample <- rep(sample, nrow(temp_dataset)) temp_dataset = temp_dataset %>% select("Locus","Genotype","Sample","VAF") temp_dataset$Locus = str_replace(temp_dataset$Locus,"chr","") From acfa784029bb3ae2fd0b301de6264219dbff4146 Mon Sep 17 00:00:00 2001 From: NoronhaA Date: Fri, 30 Jan 2026 14:34:24 -0500 Subject: [PATCH 36/52] change output channel to include mix of run-computed and previously-computed FPs --- subworkflows/msk/fingerprint_gbcms/main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/subworkflows/msk/fingerprint_gbcms/main.nf b/subworkflows/msk/fingerprint_gbcms/main.nf index d6c90f14..1cdd7ea9 100644 --- a/subworkflows/msk/fingerprint_gbcms/main.nf +++ b/subworkflows/msk/fingerprint_gbcms/main.nf @@ -64,7 +64,8 @@ workflow FINGERPRINT_GBCMS { } emit: - fp_tsv = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] + fp_tsv_from_bam = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] + fp_tsv = all_fps // channel: [ val(meta), tsv ] contamination_tsv = CUSTOM_FINGERPRINTCONTAMINATION.out.contamination_tsv // channel: [ val(meta), contamination_tsv ] combined_fp_tsv = FINGERPRINT_GBCMS_BATCH.out.combined_fp_tsv // channel: [ tsv ] From baff4ae32cebe7c098f0ee95016726cec26d5be8 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 12 Feb 2026 10:38:37 -0500 Subject: [PATCH 37/52] updates --- modules/msk/custom/fingerprintcombine/main.nf | 12 +- .../resources/usr/bin/complete_FP_table.R | 2 +- .../msk/custom/fingerprintcorrelation/main.nf | 12 +- .../resources/usr/bin/plot_gbcm.R | 331 +++++++++--------- .../fingerprintcorrelation/tests/main.nf.test | 2 + .../tests/main.nf.test.snap | 12 +- subworkflows/msk/fingerprint_gbcms/main.nf | 8 +- .../fingerprint_gbcms/tests/main.nf.test.snap | 38 +- .../msk/fingerprint_gbcms_batch/main.nf | 8 +- .../tests/main.nf.test | 2 + 10 files changed, 235 insertions(+), 192 deletions(-) diff --git a/modules/msk/custom/fingerprintcombine/main.nf b/modules/msk/custom/fingerprintcombine/main.nf index 60abf46d..ffb406d5 100644 --- a/modules/msk/custom/fingerprintcombine/main.nf +++ b/modules/msk/custom/fingerprintcombine/main.nf @@ -10,7 +10,7 @@ process CUSTOM_FINGERPRINTCOMBINE { input: - tuple val(meta), path(fp_tsv), val(sample), val(genome_build), val(group) + tuple val(meta), path(fp_tsv), val(sample), val(genome_build), val(patient) path(liftover_loci_mapping) output: @@ -26,18 +26,18 @@ process CUSTOM_FINGERPRINTCOMBINE { declare -a fp_tsv_list declare -a sample_list declare -a genome_build_list - declare -a group_list + declare -a patient_list fp_tsv_list=(${fp_tsv.join(' ')}) sample_list=(${sample.join(' ')}) genome_build_list=(${genome_build.join(' ')}) - group_list=(${group.join(' ')}) - echo -e "sample_id\tgenome_build\tfp_tsv\tgroup" > input.tsv + patient_list=(${patient.join(' ')}) + echo -e "sample_id\tgenome_build\tfp_tsv\tpatient" > input.tsv for i in \$(seq 0 1 \$((\${#fp_tsv_list[@]}-1)) ) ; do fp_tsv=\${fp_tsv_list[i]} sample=\${sample_list[i]} genome=\${genome_build_list[i]} - group=\${group_list[i]} - echo -e "\$sample\t\$genome\t\$fp_tsv\t\$group" + patient=\${patient_list[i]} + echo -e "\$sample\t\$genome\t\$fp_tsv\t\$patient" done >> input.tsv complete_FP_table.R \\ diff --git a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R index aa2869ef..f551459e 100755 --- a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R +++ b/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R @@ -49,7 +49,7 @@ hg19_hg38_mapper$Loci_hg38 = paste(hg19_hg38_mapper$GRCH38_CHROM,hg19_hg38_mappe hg19_hg38_mapper = hg19_hg38_mapper %>% select(Loci_hg19, Loci_hg38) %>% unique() message("Loading Samples") -input_table = fread(args$input_table, header = T) %>% arrange(group, sample_id) +input_table = fread(args$input_table, header = T) %>% arrange(patient, sample_id) for (i in 1:nrow(input_table)){ sample = input_table$sample_id[i] genome_build = input_table$genome_build[i] diff --git a/modules/msk/custom/fingerprintcorrelation/main.nf b/modules/msk/custom/fingerprintcorrelation/main.nf index fbd5cfeb..1eec8ff8 100644 --- a/modules/msk/custom/fingerprintcorrelation/main.nf +++ b/modules/msk/custom/fingerprintcorrelation/main.nf @@ -10,10 +10,11 @@ process CUSTOM_FINGERPRINTCORRELATION { input: tuple val(meta), path(combined_fp_tsv) + val(filter_term) output: - tuple val(meta), path("*_gbcm_sample-to-sample4.pdf") , emit: heatmap_pdf - tuple val(meta), path("*_interactive4.html") , emit: heatmap_html + tuple val(meta), path("*.pdf") , emit: heatmap_pdf + tuple val(meta), path("*.html") , emit: heatmap_html tuple val(meta), path("*_observations.tab") , emit: observations_tab tuple val(meta), path("*_correlations.tab") , emit: correlations_tab tuple val("${task.process}"), val('plot_gbcm.R'), val("0.1.0"), topic: versions, emit: versions_fingerprintcorrelation @@ -24,19 +25,20 @@ process CUSTOM_FINGERPRINTCORRELATION { script: def args = task.ext.args ?: '' def prefix = meta.id ?: "batch" + def filter_args = (filter_term && filter_term != "") ? "-p ${filter_term} -f" : "" """ plot_gbcm.R \\ -t ${combined_fp_tsv} \\ -o ./ \\ - -p ${prefix} + ${filter_args} """ stub: def args = task.ext.args ?: '' def prefix = meta.id ?: "batch" """ - touch ${prefix}_gbcm_sample-to-sample4.pdf - touch ${prefix}_interactive4.html + touch ${prefix}.pdf + touch ${prefix}.html touch ${prefix}_observations.tab touch ${prefix}_correlations.tab """ diff --git a/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R b/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R index 8fbf5971..7e985759 100755 --- a/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R +++ b/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R @@ -1,9 +1,10 @@ #!/usr/bin/env Rscript + #------------------------------------------------------------------------------- # Script: plot_gbcm.R # Author: Hanan Salim -# Date: 2025-11-03 -# Version: 0.1.0 +# Date: 2026-02-09 +# Version: 0.2.0 # # Description: This script takes in a wide fingerprinting table pertaining # to multiple samples and plots in pdf and html formats. @@ -12,8 +13,7 @@ # #------------------------------------------------------------------------------- - -rm(list=ls()) +rm(list=ls()) library(argparse, quietly = T) library(plyr, quietly = T) @@ -23,14 +23,112 @@ library(tidyverse, quietly = T) library(scales, quietly = T) library(ggforce, quietly = T) library(gtools, quietly = T) -library(plotly) library(htmlwidgets) library(ggiraph) -library(reshape2) + `%notin%` <- Negate(`%in%`) `%notlike%` <- Negate(`%like%`) + +#function to size the dots +calculate_point_size <- function(x,y) { + n_x <- length(unique(x)) + n_y <- length(unique(y)) + + #define your plot size (in inches) + plot_width_in <- 20 + plot_height_in <- 20 + + #convert to mm (1 inch = 25.4 mm) + plot_width_mm <- plot_width_in * 25.4 + plot_height_mm <- plot_height_in * 25.4 + + #calculate tile size in mm + tile_width_mm <- plot_width_mm / n_x + tile_height_mm <- plot_height_mm / n_y + + #max circle diameter (fits inside smallest tile dimension) + max_diameter_mm <- min(tile_width_mm, tile_height_mm) + + #approximate max point size for geom_point (radius in mm) + max_point_size <- max_diameter_mm + + return(max_point_size) +} + + +#function to create static plots +static_plot <- function(data, max_point_size) { + n = length(unique(data$Var1)) + legend_size = max_point_size * n * .4 + + axis_text_size = if (n < 25) 14 else 10 + + p <- ggplot(data, aes(x = Var1, y = Var2)) + + geom_tile(color = "gray50", linewidth = 0.25, fill = NA) + + geom_point_interactive( + aes(size = log2_size, + fill = value, + tooltip = paste0( + "x: ", Var1, "\n", + "y: ", Var2, "\n", + "Loci Overlap: ", size, "\n", + "Correlation: ", round(value, 2) + )), + shape = 21, + color="NA" + ) + + scale_x_discrete(limits = sort(levels(data$Var1))) + + scale_y_discrete(limits = rev(sort(levels(data$Var2)))) + + scale_fill_viridis_c( + name = "Correlation", + option = "viridis", + direction = -1, + alpha = 0.75, + begin = 0, + end = 1, + limits = c(-1, 1), + breaks = seq(-1, 1, by = .25), + guide = guide_colorbar(direction = "vertical", + title.position = "top", + barheight = unit(legend_size, "mm"), + barwidth = unit(legend_size*.05, "mm") + )) + + scale_size_continuous( + limits = c(0, 14.2), #known max of log2(size) + range = c(0, max_point_size), + breaks = seq(2, 14, by = 4), + name = "Loci Overlap (log2)", + guide = guide_legend(direction = "vertical", + title.position = "top", + keyheight = unit(legend_size/4, "mm"), + override.aes = list( + color = "black", + stroke = 0.5 + )) + ) + + labs(title = title) + + theme_minimal() + + theme( + text = element_text(family = "Courier"), + panel.grid = element_blank(), + axis.text.x = element_text(angle = 90, hjust = 1, size = 10, color = "black"), + axis.text.y = element_text(size = 10, color = "black"), + axis.title = element_blank(), + plot.title = element_text(hjust = 0.5, size = 24, margin = margin(b = 15)), + legend.position = "right", + legend.box = "horizontal", + legend.box.just = "left", + legend.title.align = 0.5, + legend.spacing.x = unit(1, "cm"), + aspect.ratio = 1 + ) + + return(p) +} + + parser = ArgumentParser(description = 'create correlation plots for a given sample') parser$add_argument('-t', '--table', required = TRUE, @@ -39,172 +137,75 @@ parser$add_argument('-t', '--table', required = TRUE, parser$add_argument('-o', '--analysis_folder', required = TRUE, help = 'output folder') -parser$add_argument('-p', '--pool', required = TRUE, +parser$add_argument('-p', '--pool', required = FALSE, + default = "fp_plots", help = 'pool ID') +parser$add_argument('-f', '--filter', + action = "store_true", + default = FALSE, + help = "create pool levelel plots instead of extended plots" +) + args = parser$parse_args() -all_fp_gbcm_final = fread(args$table, sep = '\t') +fingerprints = fread(args$table, sep = '\t') outdir = args$analysis_folder sample = args$pool -all_fp_gbcm_final <- all_fp_gbcm_final %>% select(-contains(c('Loci_hg19', 'Loci_hg38'))) -cols <- grep("VAF", names(all_fp_gbcm_final), value = TRUE) -#print(class(all_fp_gbcm_final)) -all_fp_gbcm_final <- all_fp_gbcm_final[, ..cols] -for ( col in 1:ncol(all_fp_gbcm_final)){ - colnames(all_fp_gbcm_final)[col] <- sub("VAF_", "", colnames(all_fp_gbcm_final)[col]) +#format data +fingerprints <- fingerprints %>% select(-contains(c('Loci_hg19', 'Loci_hg38'))) +cols <- grep("VAF", names(fingerprints), value = TRUE) +fingerprints <- fingerprints[, ..cols] + +for ( col in 1:ncol(fingerprints)){ + colnames(fingerprints)[col] <- sub("VAF_", "", colnames(fingerprints)[col]) +} + +title = paste("Pool:", sample,"; ", nrow(fingerprints)," Loci used",sep = "") + +fp_matrix <- data.matrix(fingerprints) +fp_matrix = cor(as.matrix(fp_matrix), method = c("pearson"), use = "pairwise.complete.obs") + +fp_long <- reshape2::melt(fp_matrix) +observations = crossprod(!is.na(fingerprints)) +obs_long <- reshape2::melt(observations) +final <- data.frame(fp_long, size = obs_long$value) + +#calculate log2 size column +final$log2_size <- log2(final$size) + +if (args$filter) { + + if (identical(args$pool, "fp_plots")) { + message("A pool ID is required to create pool level plots") + quit(status = 1) + } + + message("Creating pool level plots") + type="pool" + + final = final %>% filter(grepl(args$pool, Var1) & grepl(args$pool, Var2)) + final = droplevels(final) + +} else { + message("Creating extended plots") + type="extended" } -title = paste("Patient:", sample,"; ", nrow(all_fp_gbcm_final)," Loci used",sep = "") - -all_fp_gbcm_final_matrix <- data.matrix(all_fp_gbcm_final) -all_fp_gbcm_final_matrix = cor(as.matrix(all_fp_gbcm_final_matrix), method = c("pearson"), use = "pairwise.complete.obs") -write.table(all_fp_gbcm_final_matrix, paste(outdir,"/",sample,'_correlations.tab', sep = ''), sep = '\t',quote=F) - -gbcm_data_long <- reshape2::melt(all_fp_gbcm_final_matrix) -gbcm_observation = crossprod(!is.na(all_fp_gbcm_final)) -gbcm_obs_long <- reshape2::melt(gbcm_observation) -gbcm_combo_data <- data.frame(gbcm_data_long, size = gbcm_obs_long$value) - -# plot -#pdf(paste(outdir,"/",sample,'_sample-to-sample.pdf', sep = ""), width = 25, height = 25) - -n_x <- length(unique(gbcm_combo_data$Var1)) -n_y <- length(unique(gbcm_combo_data$Var2)) - -# Define your plot size (in inches) -plot_width_in <- 20 -plot_height_in <- 20 - -# Convert to mm (1 inch = 25.4 mm) -plot_width_mm <- plot_width_in * 25.4 -plot_height_mm <- plot_height_in * 25.4 - -# Calculate tile size in mm -tile_width_mm <- plot_width_mm / n_x -tile_height_mm <- plot_height_mm / n_y - -# Max circle diameter (fits inside smallest tile dimension) -max_diameter_mm <- min(tile_width_mm, tile_height_mm) - -# Approximate max point size for geom_point (radius in mm) -max_point_size <- max_diameter_mm - -# Calculate log2 size column -gbcm_combo_data$log2_size <- log2(gbcm_combo_data$size) -#print(gbcm_combo_data$log2_size) - - -gbcm_combo_data$Var1 <- factor(gbcm_combo_data$Var1, levels = mixedsort(unique(gbcm_combo_data$Var1))) -gbcm_combo_data$Var2 <- factor(gbcm_combo_data$Var2, levels = mixedsort(unique(gbcm_combo_data$Var2))) - -p <- ggplot(gbcm_combo_data, aes(x = Var1, y = Var2)) + - geom_tile(color = "black", linewidth = 0.5, fill = NA) + - geom_point(aes(size = log2_size, fill = value), shape = 21, color = "black") + - #geom_text(aes(label = size), color = "white", size = 4) + - scale_x_discrete(limits = sort(levels(gbcm_combo_data$Var1))) + - scale_y_discrete(limits = sort(levels(gbcm_combo_data$Var2))) + - scale_fill_viridis_c( - name = "Correlation", - option = "viridis", - direction = -1, - alpha = 0.75, - begin = 0, - end = 1, - limits = c(-1, 1), - guide = guide_colorbar(direction = "vertical", - title.position = "top" - )) + - scale_size_continuous(limits = c(0, 14.2), # known max of log2(size) - range = c(0, max_point_size), - name = "Sites (log2)", - guide = guide_legend(direction = "vertical", - title.position = "top") - ) + - #scale_size_identity(name = "Sites (log2)", - # guide = guide_legend(direction = "vertical", - # title.position = "top"), - # breaks = rescale(c(2, 5, 10, 14.2), to = c(1, 10), from = c(0, max_log2)), - # labels = c("2", "5", "10", "14.2")) + - - #scale_size_continuous(name = "Sites (log2)", - #range = c(4, 32), - #guide = guide_legend(direction = "vertical", - #title.position = "top")) + - labs(title = title) + - theme_minimal() + - theme( - panel.grid = element_blank(), - axis.text.x = element_text(angle = 90, hjust = 1, size = 10, color = "black"), - axis.text.y = element_text(size = 10, color = "black"), - axis.title = element_blank(), - plot.title = element_text(hjust = 0.5, size = 20, margin = margin(b = 15)), - legend.position = "right", - legend.box = "horizontal", - legend.box.just = "left", - legend.title.align = 0.5, - legend.spacing.x = unit(1, "cm"), - aspect.ratio = 1 - ) - -p2 <- ggplot(gbcm_combo_data, aes(x = Var1, y = Var2)) + - geom_tile(color = "black", linewidth = 0.5, fill = NA) + - geom_point_interactive( - aes(size = log2_size, - fill = value, - tooltip = paste0( - "x: ", Var1, "\n", - "y: ", Var2, "\n", - "Size: ", size, "\n", - "Correlation: ", round(value, 2) - )), - shape = 21, - color = "black" - ) + - scale_x_discrete(limits = sort(levels(gbcm_combo_data$Var1))) + - scale_y_discrete(limits = sort(levels(gbcm_combo_data$Var2))) + - scale_fill_viridis_c( - name = "Correlation", - option = "viridis", - direction = -1, - alpha = 0.75, - begin = 0, - end = 1, - limits = c(-1, 1), - guide = guide_colorbar(direction = "vertical", - title.position = "top" - )) + - scale_size_continuous(limits = c(0, 14.2), # known max of log2(size) - range = c(0, max_point_size), - name = "Sites (log2)", - guide = guide_legend(direction = "vertical", - title.position = "top") - ) + - labs(title = title) + - theme_minimal() + - theme( - text = element_text(family = "Courier"), - panel.grid = element_blank(), - axis.text.x = element_text(angle = 90, hjust = 1, size = 10, color = "black"), - axis.text.y = element_text(size = 10, color = "black"), - axis.title = element_blank(), - plot.title = element_text(hjust = 0.5, size = 24, margin = margin(b = 15)), - legend.position = "right", - legend.box = "horizontal", - legend.box.just = "left", - legend.title.align = 0.5, - legend.spacing.x = unit(1, "cm"), - aspect.ratio = 1, - ) - - -pg = girafe(ggobj = p2, width_svg = 25, height_svg = 25, - options = list(opts_tooltip(css = "padding:5pt; font-size:16pt; color:white; background-color:black;"))) - -saveWidget(pg, paste(outdir,"/",sample,'_interactive4.html', sep = ""), selfcontained = TRUE) - - -ggsave(paste(outdir,"/",sample,'_gbcm_sample-to-sample4.pdf', sep = ""), plot = p, width = 25, height = 25, units = "in", device = cairo_pdf) -write.table(gbcm_observation, paste(outdir,"/",sample,'_observations.tab', sep = ''), sep = '\t') +#get max point size +max_point_size = calculate_point_size(final$Var1, final$Var2) + +#create static plot +s <- static_plot(final, max_point_size) +ggsave(paste(outdir,"/",sample,"_", type, '.pdf', sep = ""), plot = s, width = 25, height = 25, units = "in", device = cairo_pdf) + +#create interactive plot +i = girafe(ggobj = s, width_svg = 25, height_svg = 25, + options = list(opts_tooltip(css = "padding:5pt; font-size:16pt; color:white; background-color:black;"))) +saveWidget(i, paste(outdir,"/",sample,"_", type,'.html', sep = ""), selfcontained = TRUE) + +#save tables +write.table(observations, paste(outdir,"/",sample, '_observations.tab', sep = ''), sep = '\t') +write.table(fp_matrix, paste(outdir,"/",sample, '_correlations.tab', sep = ''), sep = '\t') diff --git a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test index 00d54193..8142af7b 100644 --- a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test +++ b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test @@ -66,6 +66,7 @@ nextflow_process { process { """ input[0] = CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv + input[1] = "" """ } } @@ -91,6 +92,7 @@ nextflow_process { process { """ input[0] = [[id:'thispool'], file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] + input[1] = "" """ } } diff --git a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap index c261cb94..13fe33ac 100644 --- a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap +++ b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap @@ -27,9 +27,9 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.3" }, - "timestamp": "2025-12-18T20:20:30.919363465" + "timestamp": "2026-02-11T12:20:48.405942771" }, "sarscov2 - bam": { "content": [ @@ -38,7 +38,7 @@ { "id": null }, - "batch_correlations.tab:md5,4622cb8a7eff25e7bbd28ed23b74b239" + "fp_plots_correlations.tab:md5,dbc55d8829950501d3ed2db9a832165c" ] ], [ @@ -46,7 +46,7 @@ { "id": null }, - "batch_observations.tab:md5,858d6d115a4da81652bb98dcc8b8077f" + "fp_plots_observations.tab:md5,858d6d115a4da81652bb98dcc8b8077f" ] ], [ @@ -59,8 +59,8 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.3" }, - "timestamp": "2025-12-18T20:20:24.501583538" + "timestamp": "2026-02-11T12:20:41.807879336" } } \ No newline at end of file diff --git a/subworkflows/msk/fingerprint_gbcms/main.nf b/subworkflows/msk/fingerprint_gbcms/main.nf index 1cdd7ea9..bc63c22f 100644 --- a/subworkflows/msk/fingerprint_gbcms/main.nf +++ b/subworkflows/msk/fingerprint_gbcms/main.nf @@ -57,16 +57,18 @@ workflow FINGERPRINT_GBCMS { FINGERPRINT_GBCMS_BATCH ( all_fps, ch_liftover_loci_mapping, - default_genome + default_genome, + [] ) + combined_fp_tsv = FINGERPRINT_GBCMS_BATCH.out.combined_fp_tsv } else { - FINGERPRINT_GBCMS_BATCH.out.combined_fp_tsv = Channel.empty() + combined_fp_tsv = Channel.empty() } emit: fp_tsv_from_bam = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] fp_tsv = all_fps // channel: [ val(meta), tsv ] contamination_tsv = CUSTOM_FINGERPRINTCONTAMINATION.out.contamination_tsv // channel: [ val(meta), contamination_tsv ] - combined_fp_tsv = FINGERPRINT_GBCMS_BATCH.out.combined_fp_tsv // channel: [ tsv ] + combined_fp_tsv = combined_fp_tsv // channel: [ tsv ] } diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap index c0e01878..414032c9 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap @@ -19,6 +19,22 @@ ] ], "1": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.fp.tsv:md5,c467328eb3c7fb534b555b83b0227206" + ], + [ + { + "id": "test2", + "sample": "test2" + }, + "test2.fp.tsv:md5,c3fbcee584048e9bc4fc93bc6ca487d2" + ] + ], + "2": [ [ { "id": "test", @@ -34,7 +50,7 @@ "test2.contamination.tsv:md5,2eb950d4d5e0f9b4f7ae53d41d22fb5f" ] ], - "2": [ + "3": [ [ { "id": "defaultbatch" @@ -81,13 +97,29 @@ }, "test2.fp.tsv:md5,c3fbcee584048e9bc4fc93bc6ca487d2" ] + ], + "fp_tsv_from_bam": [ + [ + { + "id": "test", + "sample": "test" + }, + "test.fp.tsv:md5,c467328eb3c7fb534b555b83b0227206" + ], + [ + { + "id": "test2", + "sample": "test2" + }, + "test2.fp.tsv:md5,c3fbcee584048e9bc4fc93bc6ca487d2" + ] ] } ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.3" }, - "timestamp": "2026-01-08T23:23:53.894051098" + "timestamp": "2026-02-10T14:58:55.67145979" } } \ No newline at end of file diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf index 9f2e10c3..19a19169 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/main.nf +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -4,9 +4,10 @@ include { CUSTOM_FINGERPRINTCORRELATION } from '../../../modules/msk/custom/fing workflow FINGERPRINT_GBCMS_BATCH { take: - ch_fp // channel: [ val(meta), [ bam ] ] + ch_fp // channel: [ val(meta), [ bam ] ] ch_liftover_loci_mapping // channel: [ liftover_loci_mapping ] default_genome + filter_terms // channel: filterterm main: @@ -17,13 +18,14 @@ workflow FINGERPRINT_GBCMS_BATCH { if (meta.pool) { meta2.id = meta.pool } - [meta2, tsv, meta.id, meta.genome ?: default_genome, meta.group ?: "default" ] + [meta2, tsv, meta.id, meta.genome ?: default_genome, meta.patient ?: meta.sample ] }.groupTuple(by:[0]), ch_liftover_loci_mapping.first() ) CUSTOM_FINGERPRINTCORRELATION( - CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv + CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv, + filter_terms.unique() ) emit: diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test index c1bd3701..d705e2c7 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test @@ -59,6 +59,8 @@ nextflow_workflow { input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv input[1] = [file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] input[2] = "hg19" + input[3] = Channel.empty() + //input[3] = Channel.of("") """ } } From 6c7624bfbeb3a7dd4b580ce0cfbea67527b05e19 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 24 Feb 2026 18:00:10 -0500 Subject: [PATCH 38/52] fix grouping --- subworkflows/msk/fingerprint_gbcms_batch/main.nf | 3 --- 1 file changed, 3 deletions(-) diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf index 19a19169..5dab82ee 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/main.nf +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -15,9 +15,6 @@ workflow FINGERPRINT_GBCMS_BATCH { ch_fp .map{ meta, tsv -> def meta2 = [id:'defaultbatch'] - if (meta.pool) { - meta2.id = meta.pool - } [meta2, tsv, meta.id, meta.genome ?: default_genome, meta.patient ?: meta.sample ] }.groupTuple(by:[0]), ch_liftover_loci_mapping.first() From eeedcad580f3a1c050c817afd8e585fc32d527ea Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 26 Feb 2026 12:37:06 -0500 Subject: [PATCH 39/52] cleanup and fix filter for unpaired samples --- subworkflows/msk/fingerprint_gbcms/main.nf | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/subworkflows/msk/fingerprint_gbcms/main.nf b/subworkflows/msk/fingerprint_gbcms/main.nf index bc63c22f..3ea916cc 100644 --- a/subworkflows/msk/fingerprint_gbcms/main.nf +++ b/subworkflows/msk/fingerprint_gbcms/main.nf @@ -18,8 +18,6 @@ workflow FINGERPRINT_GBCMS { main: - println ch_fp_loci_vcf.getClass() - println ch_fasta.getClass() GBCMS( ch_bam @@ -28,8 +26,6 @@ workflow FINGERPRINT_GBCMS { .map{ meta, bam, bai, vcf -> [ meta, bam, bai, vcf, meta.id + ".fp.vcf" ] }.view(), ch_fasta.first(), ch_fastafai.first() - //ch_fasta.view().map{ if (it[0] instanceof Map){ it[1] } else { it }}.first(), - //ch_fastafai.view().map{ if (it[0] instanceof Map){ it[1] } else { it }}.first() ) @@ -48,7 +44,7 @@ workflow FINGERPRINT_GBCMS { } unpaired_fps = all_fps - .filter{ meta, tsv -> meta.id != meta.case_id || meta.control_id == null } + .filter{ meta, tsv -> ! meta.control_id } .map{ meta, tsv -> [ meta, tsv, [] ] } CUSTOM_FINGERPRINTCONTAMINATION ( paired_fps.mix(unpaired_fps).view() ) From 5e7d9f1c40a7e2a490245bf9af46163adb369c76 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 26 Feb 2026 22:50:57 -0500 Subject: [PATCH 40/52] cleanup, remove fingerprint_gbcms_batch from fingerprint_gbcms, and fix pool processing --- .../msk/custom/fingerprintcorrelation/main.nf | 5 ++-- subworkflows/msk/fingerprint_gbcms/main.nf | 23 ++----------------- .../msk/fingerprint_gbcms/tests/main.nf.test | 8 ++----- .../fingerprint_gbcms/tests/main.nf.test.snap | 20 ++-------------- .../msk/fingerprint_gbcms_batch/main.nf | 13 +++++++---- .../tests/main.nf.test | 3 +-- .../tests/main.nf.test.snap | 8 +++---- 7 files changed, 23 insertions(+), 57 deletions(-) diff --git a/modules/msk/custom/fingerprintcorrelation/main.nf b/modules/msk/custom/fingerprintcorrelation/main.nf index 1eec8ff8..c5179e0c 100644 --- a/modules/msk/custom/fingerprintcorrelation/main.nf +++ b/modules/msk/custom/fingerprintcorrelation/main.nf @@ -24,8 +24,9 @@ process CUSTOM_FINGERPRINTCORRELATION { script: def args = task.ext.args ?: '' - def prefix = meta.id ?: "batch" - def filter_args = (filter_term && filter_term != "") ? "-p ${filter_term} -f" : "" + prefix = meta.id ?: "batch" + def pool_arg = "-p ${prefix}" + filter_args = (filter_term && filter_term != "") ? pool_arg + " -f" : pool_arg """ plot_gbcm.R \\ -t ${combined_fp_tsv} \\ diff --git a/subworkflows/msk/fingerprint_gbcms/main.nf b/subworkflows/msk/fingerprint_gbcms/main.nf index 3ea916cc..c61fe690 100644 --- a/subworkflows/msk/fingerprint_gbcms/main.nf +++ b/subworkflows/msk/fingerprint_gbcms/main.nf @@ -1,7 +1,6 @@ include { GBCMS } from '../../../modules/msk/gbcms/main' include { CUSTOM_FINGERPRINTVCFPARSER } from '../../../modules/msk/custom/fingerprintvcfparser/main' include { CUSTOM_FINGERPRINTCONTAMINATION } from '../../../modules/msk/custom/fingerprintcontamination/main' -include { FINGERPRINT_GBCMS_BATCH } from '../fingerprint_gbcms_batch/main' workflow FINGERPRINT_GBCMS { @@ -10,11 +9,8 @@ workflow FINGERPRINT_GBCMS { ch_bai // channel: [ val(meta), [ bai ] ] ch_fp_tsv // channel: [ val(meta), [ tsv ] ] ch_fp_loci_vcf // channel: [ val(meta), [ vcf ] ] - ch_liftover_loci_mapping // channel: [ liftover_loci_mapping ] ch_fasta // channel: [ fasta ] ch_fastafai // channel: [ fastafai ] - default_genome // channel: [ genome ] - run_correlation main: @@ -23,13 +19,11 @@ workflow FINGERPRINT_GBCMS { ch_bam .combine(ch_bai, by:[0]) .combine(ch_fp_loci_vcf.map{ if ( [it].flatten().size() > 1){ it[1] } else { it }}.first()) - .map{ meta, bam, bai, vcf -> [ meta, bam, bai, vcf, meta.id + ".fp.vcf" ] }.view(), + .map{ meta, bam, bai, vcf -> [ meta, bam, bai, vcf, meta.id + ".fp.vcf" ] }, ch_fasta.first(), ch_fastafai.first() ) - - CUSTOM_FINGERPRINTVCFPARSER ( GBCMS.out.variant_file ) all_fps = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.mix(ch_fp_tsv) @@ -47,24 +41,11 @@ workflow FINGERPRINT_GBCMS { .filter{ meta, tsv -> ! meta.control_id } .map{ meta, tsv -> [ meta, tsv, [] ] } - CUSTOM_FINGERPRINTCONTAMINATION ( paired_fps.mix(unpaired_fps).view() ) - - if (run_correlation) { - FINGERPRINT_GBCMS_BATCH ( - all_fps, - ch_liftover_loci_mapping, - default_genome, - [] - ) - combined_fp_tsv = FINGERPRINT_GBCMS_BATCH.out.combined_fp_tsv - } else { - combined_fp_tsv = Channel.empty() - } + CUSTOM_FINGERPRINTCONTAMINATION ( paired_fps.mix(unpaired_fps) ) emit: fp_tsv_from_bam = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] fp_tsv = all_fps // channel: [ val(meta), tsv ] contamination_tsv = CUSTOM_FINGERPRINTCONTAMINATION.out.contamination_tsv // channel: [ val(meta), contamination_tsv ] - combined_fp_tsv = combined_fp_tsv // channel: [ tsv ] } diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test index 171495b5..c76cea90 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test @@ -8,7 +8,6 @@ nextflow_workflow { tag "subworkflows" tag "subworkflows_msk" tag "subworkflows/fingerprint_gbcms" - tag "subworkflows/fingerprint_gbcms_batch" tag "gbcms" tag "custom/fingerprintvcfparser" tag "custom/fingerprintcontamination" @@ -40,11 +39,8 @@ nextflow_workflow { ) input[2] = Channel.empty() input[3] = Channel.of(file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)) - input[4] = [file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] - input[5] = Channel.of(file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)) - input[6] = Channel.of(file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)) - input[7] = "hg19" - input[8] = true + input[4] = Channel.of(file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)) + input[5] = Channel.of(file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)) """ } } diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap index 414032c9..76692254 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test.snap @@ -50,22 +50,6 @@ "test2.contamination.tsv:md5,2eb950d4d5e0f9b4f7ae53d41d22fb5f" ] ], - "3": [ - [ - { - "id": "defaultbatch" - }, - "0DPfilter_ALL_FP.txt:md5,2b376a207fd1bd6bec55fa765e3a3947" - ] - ], - "combined_fp_tsv": [ - [ - { - "id": "defaultbatch" - }, - "0DPfilter_ALL_FP.txt:md5,2b376a207fd1bd6bec55fa765e3a3947" - ] - ], "contamination_tsv": [ [ { @@ -118,8 +102,8 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.3" + "nextflow": "26.01.1" }, - "timestamp": "2026-02-10T14:58:55.67145979" + "timestamp": "2026-02-26T22:35:39.868458916" } } \ No newline at end of file diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf index 5dab82ee..1a739db3 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/main.nf +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -7,14 +7,18 @@ workflow FINGERPRINT_GBCMS_BATCH { ch_fp // channel: [ val(meta), [ bam ] ] ch_liftover_loci_mapping // channel: [ liftover_loci_mapping ] default_genome - filter_terms // channel: filterterm + pool // channel: [ poolid ] main: CUSTOM_FINGERPRINTCOMBINE( ch_fp - .map{ meta, tsv -> - def meta2 = [id:'defaultbatch'] + .combine(pool.ifEmpty("").unique()) + .filter{meta, tsv, pool -> + (pool == "") || (! pool) || (pool == meta.pool) + } + .map{ meta, tsv,pool -> + def meta2 = [id:pool] [meta2, tsv, meta.id, meta.genome ?: default_genome, meta.patient ?: meta.sample ] }.groupTuple(by:[0]), ch_liftover_loci_mapping.first() @@ -22,7 +26,8 @@ workflow FINGERPRINT_GBCMS_BATCH { CUSTOM_FINGERPRINTCORRELATION( CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv, - filter_terms.unique() + [] + ) emit: diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test index d705e2c7..2754c19a 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test @@ -59,8 +59,7 @@ nextflow_workflow { input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv input[1] = [file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] input[2] = "hg19" - input[3] = Channel.empty() - //input[3] = Channel.of("") + input[3] = Channel.empty() """ } } diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap index 3441d510..5080aa9f 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap @@ -5,7 +5,7 @@ "0": [ [ { - "id": "defaultbatch" + "id": "" }, "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" ] @@ -13,7 +13,7 @@ "combined_fp_tsv": [ [ { - "id": "defaultbatch" + "id": "" }, "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" ] @@ -22,8 +22,8 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "26.01.1" }, - "timestamp": "2025-12-17T13:30:57.724162129" + "timestamp": "2026-02-26T22:07:57.665276743" } } \ No newline at end of file From f8d47f16ac51ba40fe95acc81c145f35ca9f8edb Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 27 Feb 2026 10:14:01 -0500 Subject: [PATCH 41/52] remove alphabetic axis sorting from correlation plot --- .../fingerprintcorrelation/resources/usr/bin/plot_gbcm.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R b/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R index 7e985759..b0503983 100755 --- a/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R +++ b/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R @@ -79,8 +79,8 @@ static_plot <- function(data, max_point_size) { shape = 21, color="NA" ) + - scale_x_discrete(limits = sort(levels(data$Var1))) + - scale_y_discrete(limits = rev(sort(levels(data$Var2)))) + + scale_x_discrete(limits = levels(data$Var1)) + + scale_y_discrete(limits = rev(levels(data$Var2))) + scale_fill_viridis_c( name = "Correlation", option = "viridis", From 79c9cbf3b424e5d47dcc89449ddeffa98419ad52 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 20 Mar 2026 22:41:52 -0400 Subject: [PATCH 42/52] fix custom/fingerprintcorrelation snapshot file --- .../fingerprintcorrelation/tests/main.nf.test.snap | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap index 13fe33ac..42ebc83b 100644 --- a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap +++ b/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap @@ -27,9 +27,9 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.3" + "nextflow": "26.01.1" }, - "timestamp": "2026-02-11T12:20:48.405942771" + "timestamp": "2026-03-20T22:20:19.897509559" }, "sarscov2 - bam": { "content": [ @@ -38,7 +38,7 @@ { "id": null }, - "fp_plots_correlations.tab:md5,dbc55d8829950501d3ed2db9a832165c" + "batch_correlations.tab:md5,dbc55d8829950501d3ed2db9a832165c" ] ], [ @@ -46,7 +46,7 @@ { "id": null }, - "fp_plots_observations.tab:md5,858d6d115a4da81652bb98dcc8b8077f" + "batch_observations.tab:md5,858d6d115a4da81652bb98dcc8b8077f" ] ], [ @@ -59,8 +59,8 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.3" + "nextflow": "26.01.1" }, - "timestamp": "2026-02-11T12:20:41.807879336" + "timestamp": "2026-03-20T22:20:13.979058971" } } \ No newline at end of file From f7f509b59fcfcaa3ae540c760084b32ad082235b Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 20 Mar 2026 22:44:55 -0400 Subject: [PATCH 43/52] add new module custom/fingerprintmislabels/ --- .../fingerprintmislabels/environment.yml | 11 + .../msk/custom/fingerprintmislabels/main.nf | 45 +++ .../msk/custom/fingerprintmislabels/meta.yml | 99 ++++++ .../usr/bin/unexpected_match_mismatch.R | 320 ++++++++++++++++++ .../tests/correlations.tab | 0 .../fingerprintmislabels/tests/main.nf.test | 125 +++++++ .../tests/main.nf.test.snap | 123 +++++++ .../tests/nextflow.config | 8 + .../tests/observations.tab | 0 .../tests/sample_sheet.csv | 3 + 10 files changed, 734 insertions(+) create mode 100644 modules/msk/custom/fingerprintmislabels/environment.yml create mode 100644 modules/msk/custom/fingerprintmislabels/main.nf create mode 100644 modules/msk/custom/fingerprintmislabels/meta.yml create mode 100755 modules/msk/custom/fingerprintmislabels/resources/usr/bin/unexpected_match_mismatch.R create mode 100644 modules/msk/custom/fingerprintmislabels/tests/correlations.tab create mode 100644 modules/msk/custom/fingerprintmislabels/tests/main.nf.test create mode 100644 modules/msk/custom/fingerprintmislabels/tests/main.nf.test.snap create mode 100644 modules/msk/custom/fingerprintmislabels/tests/nextflow.config create mode 100644 modules/msk/custom/fingerprintmislabels/tests/observations.tab create mode 100644 modules/msk/custom/fingerprintmislabels/tests/sample_sheet.csv diff --git a/modules/msk/custom/fingerprintmislabels/environment.yml b/modules/msk/custom/fingerprintmislabels/environment.yml new file mode 100644 index 00000000..db92c11c --- /dev/null +++ b/modules/msk/custom/fingerprintmislabels/environment.yml @@ -0,0 +1,11 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::r-argparse=2.3.1 + - conda-forge::r-data.table=1.17.8 + - conda-forge::r-dplyr=1.1.4 + - conda-forge::r-plyr=1.8.9 + - conda-forge::r-tidyverse=2.0.0 diff --git a/modules/msk/custom/fingerprintmislabels/main.nf b/modules/msk/custom/fingerprintmislabels/main.nf new file mode 100644 index 00000000..4a212860 --- /dev/null +++ b/modules/msk/custom/fingerprintmislabels/main.nf @@ -0,0 +1,45 @@ +process CUSTOM_FINGERPRINTMISLABELS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/r-argparse_r-data.table_r-dplyr_r-plyr_r-tidyverse:8c0daffb3624cb66': + 'community.wave.seqera.io/library/r-argparse_r-data.table_r-dplyr_r-plyr_r-tidyverse:8c0daffb3624cb66' }" + + input: + tuple val(meta), path(correlations_tab), path(observations_tab) + path(sample_sheet) + + output: + tuple val(meta), path("*_unexpected_match.pdf"), emit: unexpected_match_pdf + tuple val(meta), path("*_unexpected_match.txt"), emit: unexpected_match_txt + tuple val(meta), path("*_unexpected_mismatch.pdf"), emit: unexpected_mismatch_pdf + tuple val(meta), path("*_unexpected_mismatch.txt"), emit: unexpected_mismatch_txt + tuple val("${task.process}"), val('unexpected_match_mismatch.R'), val("0.1.0"), emit: versions_fingerprintmislabels, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + unexpected_match_mismatch.R \\ + -r ${prefix} \\ + -o ./ \\ + -i ${sample_sheet} \\ + -c ${correlations_tab} \\ + -n ${observations_tab} \\ + ${args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_unexpected_match.pdf + touch ${prefix}_unexpected_match.txt + touch ${prefix}_unexpected_mismatch.pdf + touch ${prefix}_unexpected_mismatch.txt + """ +} diff --git a/modules/msk/custom/fingerprintmislabels/meta.yml b/modules/msk/custom/fingerprintmislabels/meta.yml new file mode 100644 index 00000000..b42087df --- /dev/null +++ b/modules/msk/custom/fingerprintmislabels/meta.yml @@ -0,0 +1,99 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "custom_fingerprintmislabels" +description: "Identify unexpected matches and mismatches from fingerprint correlations and observations based on patient labels" +version: "0.1.0" +keywords: + - fingerprint + - mislabels + - qc + - sample_swap +tools: + - "custom": + description: "In-house R script for detecting unexpected sample matches and mismatches." + homepage: "" + documentation: "" + tool_dev_url: "" + doi: "" + licence: null + identifier: null + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'pool1' ]` + - correlations_tab: + type: file + description: Fingerprint correlations file from CUSTOM_FINGERPRINTCORRELATION + pattern: "*_correlations.tab" + ontologies: + - edam: http://edamontology.org/format_3475 + - observations_tab: + type: file + description: Fingerprint observations file from CUSTOM_FINGERPRINTCORRELATION + pattern: "*_observations.tab" + ontologies: + - edam: http://edamontology.org/format_3475 + - - sample_sheet: + type: file + description: CSV sample sheet with columns sample, patient, is_donor + pattern: "*.csv" + ontologies: + - edam: http://edamontology.org/format_3752 + +output: + unexpected_match_pdf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'pool1' ]` + - "*_unexpected_match.pdf": + type: file + description: PDF plots of unexpected match analysis + pattern: "*_unexpected_match.pdf" + unexpected_match_txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'pool1' ]` + - "*_unexpected_match.txt": + type: file + description: Table of flagged unexpected matches + pattern: "*_unexpected_match.txt" + unexpected_mismatch_pdf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'pool1' ]` + - "*_unexpected_mismatch.pdf": + type: file + description: PDF plots of unexpected mismatch analysis + pattern: "*_unexpected_mismatch.pdf" + unexpected_mismatch_txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'pool1' ]` + - "*_unexpected_mismatch.txt": + type: file + description: Table of flagged unexpected mismatches + pattern: "*_unexpected_mismatch.txt" + versions_fingerprintmislabels: + - - ${task.process}: + type: string + description: The name of the process + - unexpected_match_mismatch.R: + type: string + description: The name of the tool + - 0.1.0: + type: eval + description: The version of the tool +authors: + - "@anoronh4" +maintainers: + - "@anoronh4" diff --git a/modules/msk/custom/fingerprintmislabels/resources/usr/bin/unexpected_match_mismatch.R b/modules/msk/custom/fingerprintmislabels/resources/usr/bin/unexpected_match_mismatch.R new file mode 100755 index 00000000..2f1e14ea --- /dev/null +++ b/modules/msk/custom/fingerprintmislabels/resources/usr/bin/unexpected_match_mismatch.R @@ -0,0 +1,320 @@ +#!/usr/bin/env Rscript + +#------------------------------------------------------------------------------- +# Script: unexpected_match_mismatch.R +# Author: Erika Gedvilaite +# Date: 2026-03-10 +# Version: 0.1.0 +# +# Description: This script takes in fingerprint corrations and observation +# counts and identifies patient mismatches and matches based on patient labels. +# +#------------------------------------------------------------------------------- + +rm(list=ls()) +library(plyr, quietly = T) +library(dplyr, quietly = T) +library(data.table, quietly = T) +library(tidyverse, quietly = T) +library(argparse, quietly = T) + +`%notin%` <- Negate(`%in%`) +`%notlike%` <- Negate(`%like%`) + +parser = ArgumentParser(description = 'Generate Unexpected Match and Mismatch results for FPv3 (TRACE)') +parser$add_argument('-r', '--run_id', required = TRUE, + help = 'Sequencing Run') +parser$add_argument('-o', '--output_folder', required = TRUE, + help = 'Output folder') +parser$add_argument('-i', '--sample_sheet', required = TRUE, + help = 'Sample Sheet') +parser$add_argument('-c', '--correlations', required = TRUE, + help = 'Path to fingerprint correlations file') +parser$add_argument('-n', '--observations', required = TRUE, + help = 'Path to fingerprint observations file') +args = parser$parse_args() +theme_set(theme_classic()) + +# Helper: build seq() breakpoints for cut(); expands range when all values +# fall within the same integer interval (avoids "invalid number of intervals"). +make_corr_breaks <- function(x) { + lo <- floor(min(x, na.rm = TRUE)) + hi <- ceiling(max(x, na.rm = TRUE)) + if (lo == hi) { lo <- lo - 1L; hi <- hi + 1L } + seq(lo, hi, by = 0.1) +} + +# Setting up input collection + +poolID = args$run_id +samplesheetpath = args$sample_sheet +outputpath = args$output_folder + +print(paste("Correlations file: ", args$correlations, sep = "")) +print(paste("Observations file: ", args$observations, sep = "")) +print(paste("Output directory: ",outputpath, sep = "")) +print(paste("Sample Sheet: ",samplesheetpath, sep = "")) +print(paste("Run ID: ",poolID, sep = "")) + + +sample_sheet = read.csv(samplesheetpath,header = T, sep = ",", check.names = F) +sample_sheet = sample_sheet %>% select(sample, patient, is_donor) %>% unique() +sample_sheet$patient = str_pad(sample_sheet$patient, 8, pad = "0") +colnames(sample_sheet) = c("Sample","Patient","IsDonor") + +sample_sheet <- sample_sheet %>% + mutate( + Transplant = case_when(IsDonor == "true" ~ "Donor Found", + TRUE ~ "No Donor Found") + ) + +sample_sheet_transplant = sample_sheet %>% select(Patient, Transplant) %>% unique() +sample_sheet_transplant = sample_sheet_transplant[sample_sheet_transplant$Transplant == "Donor Found",] + +sample_sheet = sample_sheet %>% select(Patient, Sample) %>% unique() +sample_sheet = merge(sample_sheet, sample_sheet_transplant, by = "Patient", all.x = T) +sample_sheet$Transplant[is.na(sample_sheet$Transplant)==T] <- "No Donor Found" + +correlation_f = read.csv(args$correlations, header = T, sep = "\t", check.names = F) + +observations_f = read.csv(args$observations, header = T, sep = "\t", check.names = F) + +correlation_f = as.data.frame(correlation_f) +observations_f = as.data.frame(observations_f) + +correlation_f[is.na(correlation_f)] <- 0 +observations_f[is.na(observations_f)] <- 0 + +correlation_wide_df <- as.data.frame(correlation_f) + +correlation_wide_df$Assay1 <- rownames(correlation_wide_df) +rownames(correlation_wide_df) <- NULL +correlation_wide_df <- correlation_wide_df[, c("Assay1", colnames(correlation_wide_df))] + +correlation_wide_df <- correlation_wide_df[, !(names(correlation_wide_df) %in% c("Assay1.1"))] + +correlation_long_df <- melt(setDT(correlation_wide_df), id.vars = c("Assay1"), variable.name = "Sample") + +colnames(correlation_long_df) = c("Sample1", "Sample2", "Correlation") + +correlation_long_df = correlation_long_df %>% select(Sample1, Sample2, Correlation) %>% unique() + +observations_wide_df <- as.data.frame(observations_f) + +observations_wide_df$Assay1 <- rownames(observations_wide_df) +rownames(observations_wide_df) <- NULL +observations_wide_df <- observations_wide_df[, c("Assay1", colnames(observations_wide_df))] + +observations_wide_df <- observations_wide_df[, !(names(observations_wide_df) %in% c("Assay1.1"))] + +observations_long_df <- melt(setDT(observations_wide_df), id.vars = c("Assay1"), variable.name = "Sample") + +colnames(observations_long_df) = c("Sample1", "Sample2", "Observation") + +observations_long_df = observations_long_df %>% select(Sample1, Sample2, Observation) %>% unique() + +correlation_long_df = merge(correlation_long_df, observations_long_df) %>% unique() %>% drop_na() + +correlation_long_df$Correlation = round(correlation_long_df$Correlation,2) + +correlation_long_df = merge(correlation_long_df, sample_sheet, by.x = "Sample1", by.y = "Sample", all.x = T) +correlation_long_df = merge(correlation_long_df, sample_sheet, by.x = "Sample2", by.y = "Sample", all.x = T) + +colnames(correlation_long_df) = c("Sample2", "Sample1", "Correlation", "Observation", "Patient1", "Donor_Status1", "Patient2", "Donor_Status2") + +## Data clean-out +### 1. Remove same sample-to-sample comparison (assume 1 for these) +### 2. Only keeping one pair per match (removing pair duplicates) + +key <- t(apply(correlation_long_df[, c("Sample1", "Sample2")], 1, sort)) +correlation_long_df_clean <- correlation_long_df[!duplicated(key), ] + +## Analysis organization +### 1. Unexpected match: Sample 1 and Sample 2 are coming from DIFFERENT Patient ID +### 2. Unexpected mismatch: Sample 1 and Sample 2 are coming from the SAME Patient ID + +unexpected_match = correlation_long_df_clean[correlation_long_df_clean$Patient1!=correlation_long_df_clean$Patient2,] +unexpected_mismatch = correlation_long_df_clean[correlation_long_df_clean$Patient1==correlation_long_df_clean$Patient2,] + +## Unexpected match calculation - sample + +unexpected_match_sample = copy(unexpected_match) +unexpected_match_sample$Loci_Status = ifelse(unexpected_match_sample$Observation >= 10, "Loci Pass","Loci Low") +unexpected_match_sample$Donor_Status = ifelse((unexpected_match_sample$Donor_Status1 == "Donor Found" | unexpected_match_sample$Donor_Status2 == "Donor Found"), "Donor Present","No Donor") + +unexpected_match_sample$Pool_mean = round(mean(unexpected_match_sample$Correlation),2) +unexpected_match_sample$Pool_SD = round(sd(unexpected_match_sample$Correlation),2) + +unexpected_match_sample$Cohort_mean = 0.02 +unexpected_match_sample$Cohort_SD = 0.07 + +unexpected_match_sample$Pool_meanplussd = unexpected_match_sample$Pool_mean + unexpected_match_sample$Pool_SD +unexpected_match_sample$Pool_meanplussd = round(unexpected_match_sample$Pool_meanplussd,2) + +unexpected_match_sample$Mean_plusSD = unexpected_match_sample$Cohort_mean+unexpected_match_sample$Cohort_SD +unexpected_match_sample$Mean_plus2SD = unexpected_match_sample$Cohort_mean+2*unexpected_match_sample$Cohort_SD +unexpected_match_sample$Mean_plus25SD = unexpected_match_sample$Cohort_mean+2.5*unexpected_match_sample$Cohort_SD +unexpected_match_sample$Mean_minusSD = unexpected_match_sample$Cohort_mean-unexpected_match_sample$Cohort_SD +unexpected_match_sample$Mean_minus2SD = unexpected_match_sample$Cohort_mean-2*unexpected_match_sample$Cohort_SD +unexpected_match_sample$Mean_minus25SD = unexpected_match_sample$Cohort_mean-2.5*unexpected_match_sample$Cohort_SD + + +unexpected_match_sample$Match_Status = ifelse(unexpected_match_sample$Correlation >= unexpected_match_sample$Mean_plus25SD, "Matching","Pass") + +unexpected_match_sample$key = paste(unexpected_match_sample$Sample1, unexpected_match_sample$Sample2, sep=":") + +unexpected_match_sample_intervals_corr = unexpected_match_sample %>% + mutate(interval = cut(Correlation, breaks = make_corr_breaks(Correlation), include.lowest = TRUE)) %>% + count(interval) + +intervals_set <- c("[-1,-0.9]", "(-0.9,-0.8]", "(-0.8,-0.7]", "(-0.7,-0.6]", "(-0.6,-0.5]","(-0.5,-0.4]", "(-0.4,-0.3]", "(-0.3,-0.2]", "(-0.2,-0.1]", "(-0.1,0]", "(0,0.1]", "(0.1,0.2]", "(0.2,0.3]", "(0.3,0.4]", "(0.4,0.5]", "(0.5,0.6]", "(0.6,0.7]", "(0.7,0.8]", "(0.8,0.9]","(0.9,1]") +intervals_df <- data.frame( + interval = intervals_set +) + +intervals_df = merge(intervals_df, unexpected_match_sample_intervals_corr, all.x = T) +intervals_df$n[is.na(intervals_df$n)] <- 0 +intervals_df$percent = round(intervals_df$n/nrow(unexpected_match_sample),digits = 2) + +intervals_df$interval <- factor(intervals_df$interval, levels = c("[-1,-0.9]", "(-0.9,-0.8]", "(-0.8,-0.7]", "(-0.7,-0.6]", "(-0.6,-0.5]","(-0.5,-0.4]", "(-0.4,-0.3]", "(-0.3,-0.2]", "(-0.2,-0.1]", "(-0.1,0]", "(0,0.1]", "(0.1,0.2]", "(0.2,0.3]", "(0.3,0.4]", "(0.4,0.5]", "(0.5,0.6]", "(0.6,0.7]", "(0.7,0.8]", "(0.8,0.9]","(0.9,1]")) + +pdf(file = paste(outputpath,"/",poolID,"_unexpected_match.pdf",sep = ""), width = 10, height = 6) + + +group_colors <- c(Pass = "#D3D3D3", Matching = "#CC6600") + +ggplot(unexpected_match_sample, aes(x = key, y = Correlation)) + + geom_point(aes(colour = Match_Status, shape = Donor_Status), size = 1.0) + + geom_hline(aes(yintercept = Mean_plus25SD, linetype = "Mean+2.5SD"), size = 0.5) + + geom_hline(aes(yintercept = Mean_minus25SD, linetype = "Mean-2.5SD"), size = 0.5) + + geom_hline(aes(yintercept = Mean_plusSD, linetype = "Mean+SD"), size = 0.5) + + geom_hline(aes(yintercept = Mean_minusSD, linetype = "Mean-SD"), size = 0.5) + + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank()) + + labs(colour = "Match Status") + + labs(shape = "Donor Status") + + ylim(-1,1) + + scale_color_manual(values = group_colors) + + labs(linetype = "Limits") + + ggtitle(paste("Pool:",poolID,sep=""),subtitle = "Unexpected Match Overall") + +ggplot(unexpected_match_sample, aes(x = key, y = Correlation)) + + geom_point(aes(colour = Match_Status, shape = Donor_Status), size = 1.0) + + geom_hline(aes(yintercept = Mean_plus25SD, linetype = "Mean+2.5SD"), size = 0.5) + + geom_hline(aes(yintercept = Mean_minus25SD, linetype = "Mean-2.5SD"), size = 0.5) + + geom_hline(aes(yintercept = Mean_plusSD, linetype = "Mean+SD"), size = 0.5) + + geom_hline(aes(yintercept = Mean_minusSD, linetype = "Mean-SD"), size = 0.5) + + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank()) + + labs(colour = "Match Status") + + labs(shape = "Donor Status") + + ylim(-1,1) + + scale_color_manual(values = group_colors) + + labs(linetype = "Limits") + + facet_wrap(~Patient2, scales = "free_x") + + ggtitle(paste("Pool:",poolID, sep=""),subtitle = "Unexpected Match Overall") + +ggplot(intervals_df, aes(x=interval, y = log10(n))) + + geom_bar(stat = "identity", position = "dodge") + + geom_text(aes(label = paste(n,"\n",percent,sep="")), vjust = -0.5, color = "black")+ + annotate("text", x=1, y=5, label= paste("Threshold Mean + SD: ",unexpected_match_sample$Mean_plusSD,sep=""), hjust = 0) + + annotate("text", x=1, y=4.5, label= paste("Threshold Mean + 2.5*SD: ",unexpected_match_sample$Mean_plus25SD,sep=""), hjust = 0) + + annotate("text", x=1, y=4, label= paste("Threshold Mean - SD: ",unexpected_match_sample$Mean_minusSD,sep=""), hjust = 0) + + annotate("text", x=1, y=3.5, label= paste("Threshold Mean - 2.5SD: ",unexpected_match_sample$Mean_minus25SD,sep=""), hjust = 0) + + annotate("text", x=1, y=3., label= paste("Pool Mean + SD: ",unexpected_match_sample$Pool_meanplussd,sep=""), hjust = 0) + + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + + xlab("Intervals") + + ylab("log10(Compared Pairs)") + + ggtitle(paste("Pool:",poolID, sep=""),subtitle = "Unexpected Match Intervals") + +dev.off() + +unexpected_match_sample_table = unexpected_match_sample[unexpected_match_sample$Match_Status == "Matching",] + +unexpected_match_sample_table = unexpected_match_sample_table %>% select(Sample1, Sample2, Correlation, Observation, Mean_minusSD, Mean_minus2SD, Mean_minus25SD, Mean_plusSD, Mean_plus2SD, Mean_plus25SD, Match_Status, Loci_Status, Donor_Status) %>% unique() +write.table(unexpected_match_sample_table,file = paste(outputpath,"/",poolID,"_unexpected_match.txt",sep = ""), append = F, quote = F, sep = "\t", row.names = F) + +## Unexpected mismatch calculation - sample + +unexpected_mismatch_sample = copy(unexpected_mismatch) +unexpected_mismatch_sample$Loci_Status = ifelse(unexpected_mismatch_sample$Observation >= 10, "Loci Pass","Loci Low") +unexpected_mismatch_sample$Donor_Status = ifelse((unexpected_mismatch_sample$Donor_Status1 == "Donor Found" | unexpected_mismatch_sample$Donor_Status2 == "Donor Found"), "Donor Present","No Donor") +unexpected_mismatch_sample$Correlation = as.numeric(unexpected_mismatch_sample$Correlation) + +unexpected_mismatch_sample$Pool_mean = round(mean(unexpected_mismatch_sample$Correlation),2) +unexpected_mismatch_sample$Pool_sd = round(sd(unexpected_mismatch_sample$Correlation),2) + +unexpected_mismatch_sample$Pool_meanminussd = unexpected_mismatch_sample$Pool_mean - unexpected_mismatch_sample$Pool_sd + +unexpected_mismatch_sample$Cohort_mean = 0.96 +unexpected_mismatch_sample$Cohort_SD = 0.07 + +unexpected_mismatch_sample$Mean_minus25SD = unexpected_mismatch_sample$Cohort_mean-2.5*unexpected_mismatch_sample$Cohort_SD +unexpected_mismatch_sample$Mean_minusSD = unexpected_mismatch_sample$Cohort_mean-unexpected_mismatch_sample$Cohort_SD +unexpected_mismatch_sample$Mean_minus2SD = unexpected_mismatch_sample$Cohort_mean-2*unexpected_mismatch_sample$Cohort_SD + +unexpected_mismatch_sample$Match_Status = ifelse(unexpected_mismatch_sample$Correlation <= unexpected_mismatch_sample$Mean_minus25SD, "Mismatching","Pass") + +unexpected_mismatch_sample$key = paste(unexpected_mismatch_sample$Sample1, unexpected_mismatch_sample$Sample2, sep=":") + +unexpected_mismatch_sample_intervals_corr = unexpected_mismatch_sample %>% + mutate(interval = cut(Correlation, breaks = make_corr_breaks(Correlation), include.lowest = TRUE)) %>% + count(interval) + +intervals_set <- c("[-1,-0.9]", "(-0.9,-0.8]", "(-0.8,-0.7]", "(-0.7,-0.6]", "(-0.6,-0.5]","(-0.5,-0.4]", "(-0.4,-0.3]", "(-0.3,-0.2]", "(-0.2,-0.1]", "(-0.1,0]", "(0,0.1]", "(0.1,0.2]", "(0.2,0.3]", "(0.3,0.4]", "(0.4,0.5]", "(0.5,0.6]", "(0.6,0.7]", "(0.7,0.8]", "(0.8,0.9]","(0.9,1]") +intervals_df <- data.frame( + interval = intervals_set +) + +intervals_df = merge(intervals_df, unexpected_mismatch_sample_intervals_corr, all.x = T) +intervals_df$n[is.na(intervals_df$n)] <- 0 +intervals_df$percent = round(intervals_df$n/nrow(unexpected_mismatch_sample),digits = 2) + +intervals_df$interval <- factor(intervals_df$interval, levels = c("[-1,-0.9]", "(-0.9,-0.8]", "(-0.8,-0.7]", "(-0.7,-0.6]", "(-0.6,-0.5]","(-0.5,-0.4]", "(-0.4,-0.3]", "(-0.3,-0.2]", "(-0.2,-0.1]", "(-0.1,0]", "(0,0.1]", "(0.1,0.2]", "(0.2,0.3]", "(0.3,0.4]", "(0.4,0.5]", "(0.5,0.6]", "(0.6,0.7]", "(0.7,0.8]", "(0.8,0.9]","(0.9,1]")) + + +pdf(file = paste(outputpath,"/",poolID,"_unexpected_mismatch.pdf",sep = ""), width = 10, height = 6) + +group_colors <- c(Pass = "#D3D3D3", Mismatching = "#CC6600") + + +ggplot(unexpected_mismatch_sample, aes(x = key, y = Correlation)) + + geom_point(aes(colour = Match_Status, shape = Donor_Status), size = 1.0) + + geom_hline(aes(yintercept = Mean_minus25SD, linetype = "Mean-2.5SD"), size = 0.5) + + geom_hline(aes(yintercept = Mean_minusSD, linetype = "Mean-SD"), size = 0.5) + + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank()) + + labs(colour = "Match Status") + + labs(shape = "Donor Status") + + ylim(0,1) + + scale_color_manual(values = group_colors) + + labs(linetype = "Limits") + + ggtitle(paste("Pool:",poolID,sep=""),subtitle = "Unexpected Mismatch Overall") + +ggplot(unexpected_mismatch_sample, aes(x = key, y = Correlation)) + + geom_point(aes(colour = Match_Status, shape = Donor_Status), size = 1.0) + + geom_hline(aes(yintercept = Mean_minus25SD, linetype = "Mean-2.5SD"), size = 0.5) + + geom_hline(aes(yintercept = Mean_minusSD, linetype = "Mean-SD"), size = 0.5) + + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank()) + + labs(colour = "Match Status") + + labs(shape = "Donor Status") + + ylim(0,1) + + scale_color_manual(values = group_colors) + + labs(linetype = "Limits") + + facet_wrap(~Patient2, scales = "free_x") + + ggtitle(paste("Pool:",poolID,sep=""),subtitle = "Unexpected Mismatch Overall") + +ggplot(intervals_df, aes(x=interval, y = log10(n))) + + geom_bar(stat = "identity", position = "dodge") + + geom_text(aes(label = paste(n,"\n",percent,sep="")), vjust = -0.5, color = "black")+ + annotate("text", x=1, y=4.5, label= paste("Threshold Mean - SD: ",unexpected_mismatch_sample$Mean_minusSD,sep=""), hjust = 0) + + annotate("text", x=1, y=4, label= paste("Threshold Mean - 2.5SD: ",unexpected_mismatch_sample$Mean_minus25SD,sep=""), hjust = 0) + + annotate("text", x=1, y=3.5, label= paste("Pool Mean + SD: ",unexpected_mismatch_sample$Pool_meanminussd,sep=""), hjust = 0) + + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + + ggtitle(paste("Pool:",poolID,sep=""),subtitle = "Unexpected Mismatch Intervals") + + xlab("Intervals") + + ylab("log10(Compared Pairs)") + +dev.off() + +unexpected_mismatch_sample_table = unexpected_mismatch_sample[unexpected_mismatch_sample$Match_Status == "Mismatching",] + +unexpected_mismatch_sample_table = unexpected_mismatch_sample_table %>% select(Sample1, Sample2, Correlation, Observation, Mean_minusSD, Mean_minus2SD, Mean_minus25SD, Loci_Status, Donor_Status) %>% unique() +write.table(unexpected_mismatch_sample_table,file = paste(outputpath,"/",poolID,"_unexpected_mismatch.txt",sep = ""), append = F, quote = F, sep = "\t", row.names = F) diff --git a/modules/msk/custom/fingerprintmislabels/tests/correlations.tab b/modules/msk/custom/fingerprintmislabels/tests/correlations.tab new file mode 100644 index 00000000..e69de29b diff --git a/modules/msk/custom/fingerprintmislabels/tests/main.nf.test b/modules/msk/custom/fingerprintmislabels/tests/main.nf.test new file mode 100644 index 00000000..5238ea5e --- /dev/null +++ b/modules/msk/custom/fingerprintmislabels/tests/main.nf.test @@ -0,0 +1,125 @@ +nextflow_process { + + name "Test Process CUSTOM_FINGERPRINTMISLABELS" + script "../main.nf" + process "CUSTOM_FINGERPRINTMISLABELS" + config "./nextflow.config" + + tag "modules" + tag "modules_msk" + tag "custom" + tag "custom/fingerprintmislabels" + tag "custom/fingerprintcorrelation" + tag "custom/fingerprintcombine" + tag "gbcms" + tag "custom/fingerprintvcfparser" + + test("sarscov2 - bam") { + setup { + run("GBCMS") { + script "../../../gbcms/main.nf" + process { + """ + input[0] = Channel.of( + [ + [ id:'test', sample:'test' ], + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ], + [ + [ id:'test2', sample:'test2' ], + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ], + ) + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + run("CUSTOM_FINGERPRINTVCFPARSER") { + script "../../fingerprintvcfparser/main.nf" + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + run("CUSTOM_FINGERPRINTCOMBINE") { + script "../../fingerprintcombine/main.nf" + process { + """ + input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv + .map{ meta, tsv -> + def meta2 = [id:meta.pool] + [[id:meta.pool], tsv, meta.id, "hg19", "default"] + }.groupTuple(by:[0]) + input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) + """ + } + } + run("CUSTOM_FINGERPRINTCORRELATION") { + script "../../fingerprintcorrelation/main.nf" + process { + """ + input[0] = CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv + input[1] = "" + """ + } + } + } + + when { + process { + """ + input[0] = CUSTOM_FINGERPRINTCORRELATION.out.correlations_tab + .join(CUSTOM_FINGERPRINTCORRELATION.out.observations_tab) + input[1] = file("$baseDir/modules/msk/custom/fingerprintmislabels/tests/sample_sheet.csv", checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.unexpected_match_txt, + process.out.unexpected_mismatch_txt, + process.out.versions_fingerprintmislabels + ).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [id:'thispool'], + file("$baseDir/modules/msk/custom/fingerprintmislabels/tests/correlations.tab", checkIfExists: true), + file("$baseDir/modules/msk/custom/fingerprintmislabels/tests/observations.tab", checkIfExists: true) + ] + input[1] = file("$baseDir/modules/msk/custom/fingerprintmislabels/tests/sample_sheet.csv", checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/msk/custom/fingerprintmislabels/tests/main.nf.test.snap b/modules/msk/custom/fingerprintmislabels/tests/main.nf.test.snap new file mode 100644 index 00000000..100b060e --- /dev/null +++ b/modules/msk/custom/fingerprintmislabels/tests/main.nf.test.snap @@ -0,0 +1,123 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "thispool" + }, + "thispool_unexpected_match.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "thispool" + }, + "thispool_unexpected_match.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "thispool" + }, + "thispool_unexpected_mismatch.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "thispool" + }, + "thispool_unexpected_mismatch.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + "CUSTOM_FINGERPRINTMISLABELS", + "unexpected_match_mismatch.R", + "0.1.0" + ] + ], + "unexpected_match_pdf": [ + [ + { + "id": "thispool" + }, + "thispool_unexpected_match.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "unexpected_match_txt": [ + [ + { + "id": "thispool" + }, + "thispool_unexpected_match.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "unexpected_mismatch_pdf": [ + [ + { + "id": "thispool" + }, + "thispool_unexpected_mismatch.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "unexpected_mismatch_txt": [ + [ + { + "id": "thispool" + }, + "thispool_unexpected_mismatch.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fingerprintmislabels": [ + [ + "CUSTOM_FINGERPRINTMISLABELS", + "unexpected_match_mismatch.R", + "0.1.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "26.01.1" + }, + "timestamp": "2026-03-20T22:37:04.106330307" + }, + "sarscov2 - bam": { + "content": [ + [ + [ + { + "id": null + }, + "null_unexpected_match.txt:md5,14af9ffece921578088528e9c1663886" + ] + ], + [ + [ + { + "id": null + }, + "null_unexpected_mismatch.txt:md5,d5c8b66fd71b1e4b20ee403d822cd7b9" + ] + ], + [ + [ + "CUSTOM_FINGERPRINTMISLABELS", + "unexpected_match_mismatch.R", + "0.1.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "26.01.1" + }, + "timestamp": "2026-03-20T22:36:57.641442061" + } +} \ No newline at end of file diff --git a/modules/msk/custom/fingerprintmislabels/tests/nextflow.config b/modules/msk/custom/fingerprintmislabels/tests/nextflow.config new file mode 100644 index 00000000..b676d906 --- /dev/null +++ b/modules/msk/custom/fingerprintmislabels/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: 'CUSTOM_FINGERPRINTCOMBINE' { + ext.args = "-d 0" + } + withName: 'CUSTOM_FINGERPRINTVCFPARSER' { + ext.args = "-d 0" + } +} diff --git a/modules/msk/custom/fingerprintmislabels/tests/observations.tab b/modules/msk/custom/fingerprintmislabels/tests/observations.tab new file mode 100644 index 00000000..e69de29b diff --git a/modules/msk/custom/fingerprintmislabels/tests/sample_sheet.csv b/modules/msk/custom/fingerprintmislabels/tests/sample_sheet.csv new file mode 100644 index 00000000..19476b3f --- /dev/null +++ b/modules/msk/custom/fingerprintmislabels/tests/sample_sheet.csv @@ -0,0 +1,3 @@ +sample,patient,is_donor +test,1,false +test2,2,false From b71fbb198a2a27721b0b0227fdf90f48ea8df926 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Mon, 23 Mar 2026 16:55:55 -0400 Subject: [PATCH 44/52] integrate custom/fingerprintmislabels into the fingerprint_gbcms_batch subworkflow --- .../msk/custom/fingerprintmislabels/main.nf | 1 + .../msk/fingerprint_gbcms_batch/main.nf | 27 ++++++- .../tests/main.nf.test | 70 ++++++++++++++++++- .../tests/main.nf.test.snap | 53 +++++++++----- 4 files changed, 129 insertions(+), 22 deletions(-) diff --git a/modules/msk/custom/fingerprintmislabels/main.nf b/modules/msk/custom/fingerprintmislabels/main.nf index 4a212860..cf03c8a3 100644 --- a/modules/msk/custom/fingerprintmislabels/main.nf +++ b/modules/msk/custom/fingerprintmislabels/main.nf @@ -24,6 +24,7 @@ process CUSTOM_FINGERPRINTMISLABELS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + prefix = (prefix && prefix != "") ?: "batch" """ unexpected_match_mismatch.R \\ -r ${prefix} \\ diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf index 1a739db3..d4d346c9 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/main.nf +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -1,5 +1,6 @@ include { CUSTOM_FINGERPRINTCOMBINE } from '../../../modules/msk/custom/fingerprintcombine/main' include { CUSTOM_FINGERPRINTCORRELATION } from '../../../modules/msk/custom/fingerprintcorrelation/main' +include { CUSTOM_FINGERPRINTMISLABELS } from '../../../modules/msk/custom/fingerprintmislabels/main' workflow FINGERPRINT_GBCMS_BATCH { @@ -11,6 +12,19 @@ workflow FINGERPRINT_GBCMS_BATCH { main: + ch_sample_sheet = ch_fp + .filter { meta, tsv -> meta.patient != null } + .map { meta, tsv -> + def is_donor = meta.is_donor != null ? meta.is_donor : false + "${meta.sample},${meta.patient},${is_donor}\n" + } + .collectFile( + name: 'sample_sheet.csv', + seed: 'sample,patient,is_donor\n', + newLine: false, + sort: true + ) + CUSTOM_FINGERPRINTCOMBINE( ch_fp .combine(pool.ifEmpty("").unique()) @@ -27,9 +41,20 @@ workflow FINGERPRINT_GBCMS_BATCH { CUSTOM_FINGERPRINTCORRELATION( CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv, [] + ) + CUSTOM_FINGERPRINTMISLABELS( + CUSTOM_FINGERPRINTCORRELATION.out.correlations_tab + .join(CUSTOM_FINGERPRINTCORRELATION.out.observations_tab), + ch_sample_sheet + .filter { csv -> csv.readLines().size() >= 3 } + .first() ) emit: - combined_fp_tsv = CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv // channel: [ val(meta), [ bam ] ] + combined_fp_tsv = CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv // channel: [ val(meta), tsv ] + unexpected_match_pdf = CUSTOM_FINGERPRINTMISLABELS.out.unexpected_match_pdf // channel: [ val(meta), pdf ] + unexpected_match_txt = CUSTOM_FINGERPRINTMISLABELS.out.unexpected_match_txt // channel: [ val(meta), txt ] + unexpected_mismatch_pdf = CUSTOM_FINGERPRINTMISLABELS.out.unexpected_mismatch_pdf // channel: [ val(meta), pdf ] + unexpected_mismatch_txt = CUSTOM_FINGERPRINTMISLABELS.out.unexpected_mismatch_txt // channel: [ val(meta), txt ] } diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test index 2754c19a..00a3182a 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test @@ -13,9 +13,10 @@ nextflow_workflow { tag "custom/fingerprintvcfparser" tag "custom/fingerprintcombine" tag "custom/fingerprintcorrelation" + tag "custom/fingerprintmislabels" - test("sarscov2 - bam - single_end") { + test("sarscov2 - bam - single_end - no patient - fingerprintmislabels skipped") { setup { run("GBCMS"){ @@ -67,7 +68,72 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, - { assert snapshot(workflow.out).match() } + { assert workflow.out.unexpected_match_txt == [] }, + { assert workflow.out.unexpected_mismatch_txt == [] }, + { assert snapshot(workflow.out.combined_fp_tsv).match() } + ) + } + + } + + test("sarscov2 - bam - single_end - with patient - fingerprintmislabels runs") { + + setup { + run("GBCMS"){ + script "../../../../modules/msk/gbcms/main.nf" + process { + """ + input[0] = Channel.of( + [ + [ id:'test', sample:'test', patient: 1 ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ], + [ + [ id:'test2', sample:'test2', patient: 2 ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ], + ) + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + run("CUSTOM_FINGERPRINTVCFPARSER"){ + script "../../../../modules/msk/custom/fingerprintvcfparser/main.nf" + process { + """ + input[0] = GBCMS.out.variant_file + """ + } + } + } + + when { + workflow { + """ + input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv + input[1] = [file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] + input[2] = "hg19" + input[3] = Channel.empty() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.out.unexpected_match_txt.size() == 1 }, + { assert workflow.out.unexpected_mismatch_txt.size() == 1 }, + { assert snapshot( + workflow.out.unexpected_match_txt, + workflow.out.unexpected_mismatch_txt + ).match() } ) } diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap index 5080aa9f..be82f4fd 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap @@ -1,29 +1,44 @@ { - "sarscov2 - bam - single_end": { + "sarscov2 - bam - single_end - with patient - fingerprintmislabels runs": { "content": [ - { - "0": [ - [ - { - "id": "" - }, - "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" - ] - ], - "combined_fp_tsv": [ - [ - { - "id": "" - }, - "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" - ] + [ + [ + { + "id": "" + }, + "batch_unexpected_match.txt:md5,2db9ebf03a048e6aab1e5d51e7457429" ] - } + ], + [ + [ + { + "id": "" + }, + "batch_unexpected_mismatch.txt:md5,c5824b697fe80bcaccc43d466150866b" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "26.01.1" + }, + "timestamp": "2026-03-23T15:15:56.531495175" + }, + "sarscov2 - bam - single_end - no patient - fingerprintmislabels skipped": { + "content": [ + [ + [ + { + "id": "" + }, + "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" + ] + ] ], "meta": { "nf-test": "0.9.3", "nextflow": "26.01.1" }, - "timestamp": "2026-02-26T22:07:57.665276743" + "timestamp": "2026-03-23T15:15:34.299247621" } } \ No newline at end of file From db5795927a961f921571d6575c5798c4bf99f4ef Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Mon, 23 Mar 2026 17:14:17 -0400 Subject: [PATCH 45/52] bring components to compliance with nextflow strict syntax parsing --- modules/msk/custom/fingerprintcorrelation/main.nf | 4 ++-- subworkflows/msk/fingerprint_gbcms_batch/main.nf | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/msk/custom/fingerprintcorrelation/main.nf b/modules/msk/custom/fingerprintcorrelation/main.nf index c5179e0c..08615284 100644 --- a/modules/msk/custom/fingerprintcorrelation/main.nf +++ b/modules/msk/custom/fingerprintcorrelation/main.nf @@ -1,5 +1,5 @@ process CUSTOM_FINGERPRINTCORRELATION { - tag {'$prefix'} + tag {"$prefix"} label 'process_single' conda "${moduleDir}/environment.yml" @@ -36,7 +36,7 @@ process CUSTOM_FINGERPRINTCORRELATION { stub: def args = task.ext.args ?: '' - def prefix = meta.id ?: "batch" + prefix = meta.id ?: "batch" """ touch ${prefix}.pdf touch ${prefix}.html diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf index d4d346c9..43da90d8 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/main.nf +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -8,7 +8,7 @@ workflow FINGERPRINT_GBCMS_BATCH { ch_fp // channel: [ val(meta), [ bam ] ] ch_liftover_loci_mapping // channel: [ liftover_loci_mapping ] default_genome - pool // channel: [ poolid ] + ch_pool // channel: [ poolid ] main: @@ -27,11 +27,11 @@ workflow FINGERPRINT_GBCMS_BATCH { CUSTOM_FINGERPRINTCOMBINE( ch_fp - .combine(pool.ifEmpty("").unique()) + .combine(ch_pool.ifEmpty("").unique()) .filter{meta, tsv, pool -> (pool == "") || (! pool) || (pool == meta.pool) } - .map{ meta, tsv,pool -> + .map{ meta, tsv, pool -> def meta2 = [id:pool] [meta2, tsv, meta.id, meta.genome ?: default_genome, meta.patient ?: meta.sample ] }.groupTuple(by:[0]), From 69eddef47aa8811adadafbe74dc24fee318e8827 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Mon, 23 Mar 2026 21:18:57 -0400 Subject: [PATCH 46/52] rework logic --- .../msk/custom/fingerprintmislabels/main.nf | 2 +- .../msk/fingerprint_gbcms_batch/main.nf | 74 ++++++++++++++----- .../tests/main.nf.test | 4 +- .../tests/main.nf.test.snap | 14 ++-- 4 files changed, 68 insertions(+), 26 deletions(-) diff --git a/modules/msk/custom/fingerprintmislabels/main.nf b/modules/msk/custom/fingerprintmislabels/main.nf index cf03c8a3..d9ea1271 100644 --- a/modules/msk/custom/fingerprintmislabels/main.nf +++ b/modules/msk/custom/fingerprintmislabels/main.nf @@ -24,7 +24,7 @@ process CUSTOM_FINGERPRINTMISLABELS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - prefix = (prefix && prefix != "") ?: "batch" + prefix = (prefix && prefix != "") ? prefix : "batch" """ unexpected_match_mismatch.R \\ -r ${prefix} \\ diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf index 43da90d8..c30c9f33 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/main.nf +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -1,14 +1,19 @@ -include { CUSTOM_FINGERPRINTCOMBINE } from '../../../modules/msk/custom/fingerprintcombine/main' -include { CUSTOM_FINGERPRINTCORRELATION } from '../../../modules/msk/custom/fingerprintcorrelation/main' +include { CUSTOM_FINGERPRINTCOMBINE as CUSTOM_FINGERPRINTCOMBINE_ALL } from '../../../modules/msk/custom/fingerprintcombine/main' +include { CUSTOM_FINGERPRINTCOMBINE as CUSTOM_FINGERPRINTCOMBINE_POOLS } from '../../../modules/msk/custom/fingerprintcombine/main' +include { CUSTOM_FINGERPRINTCOMBINE as CUSTOM_FINGERPRINTCOMBINE_PATIENTS } from '../../../modules/msk/custom/fingerprintcombine/main' +include { CUSTOM_FINGERPRINTCORRELATION as CUSTOM_FINGERPRINTCORRELATION_ALL } from '../../../modules/msk/custom/fingerprintcorrelation/main' +include { CUSTOM_FINGERPRINTCORRELATION as CUSTOM_FINGERPRINTCORRELATION_POOLS } from '../../../modules/msk/custom/fingerprintcorrelation/main' +include { CUSTOM_FINGERPRINTCORRELATION as CUSTOM_FINGERPRINTCORRELATION_PATIENTS } from '../../../modules/msk/custom/fingerprintcorrelation/main' include { CUSTOM_FINGERPRINTMISLABELS } from '../../../modules/msk/custom/fingerprintmislabels/main' workflow FINGERPRINT_GBCMS_BATCH { take: - ch_fp // channel: [ val(meta), [ bam ] ] + ch_fp // channel: [ val(meta), tsv ] ch_liftover_loci_mapping // channel: [ liftover_loci_mapping ] default_genome ch_pool // channel: [ poolid ] + ch_patients // channel: [ patientid ] main: @@ -25,36 +30,71 @@ workflow FINGERPRINT_GBCMS_BATCH { sort: true ) - CUSTOM_FINGERPRINTCOMBINE( + // All samples combined into a single group + CUSTOM_FINGERPRINTCOMBINE_ALL( + ch_fp + .map { meta, tsv -> + [[id:"all"], tsv, meta.id, meta.genome ?: default_genome, meta.patient ?: meta.sample] + }.groupTuple(by:[0]), + ch_liftover_loci_mapping.first() + ) + + // Samples grouped by pool + CUSTOM_FINGERPRINTCOMBINE_POOLS( ch_fp .combine(ch_pool.ifEmpty("").unique()) - .filter{meta, tsv, pool -> + .filter { meta, tsv, pool -> (pool == "") || (! pool) || (pool == meta.pool) } - .map{ meta, tsv, pool -> - def meta2 = [id:pool] - [meta2, tsv, meta.id, meta.genome ?: default_genome, meta.patient ?: meta.sample ] + .map { meta, tsv, pool -> + [[id:pool], tsv, meta.id, meta.genome ?: default_genome, meta.patient ?: meta.sample] }.groupTuple(by:[0]), ch_liftover_loci_mapping.first() ) - CUSTOM_FINGERPRINTCORRELATION( - CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv, + // Samples grouped by patient + CUSTOM_FINGERPRINTCOMBINE_PATIENTS( + ch_fp + .filter { meta, tsv -> meta.patient != null } + .combine(ch_patients.ifEmpty("").unique()) + .filter { meta, tsv, patient -> + (patient == "") || (! patient) || (patient.toString() == meta.patient.toString()) + } + .map { meta, tsv, patient -> + [[id:meta.patient.toString()], tsv, meta.id, meta.genome ?: default_genome, meta.patient ?: meta.sample] + }.groupTuple(by:[0]), + ch_liftover_loci_mapping.first() + ) + + CUSTOM_FINGERPRINTCORRELATION_ALL( + CUSTOM_FINGERPRINTCOMBINE_ALL.out.combined_fp_tsv, + [] + ) + + CUSTOM_FINGERPRINTCORRELATION_POOLS( + CUSTOM_FINGERPRINTCOMBINE_POOLS.out.combined_fp_tsv, + [] + ) + + CUSTOM_FINGERPRINTCORRELATION_PATIENTS( + CUSTOM_FINGERPRINTCOMBINE_PATIENTS.out.combined_fp_tsv, [] ) CUSTOM_FINGERPRINTMISLABELS( - CUSTOM_FINGERPRINTCORRELATION.out.correlations_tab - .join(CUSTOM_FINGERPRINTCORRELATION.out.observations_tab), + CUSTOM_FINGERPRINTCORRELATION_ALL.out.correlations_tab + .join(CUSTOM_FINGERPRINTCORRELATION_ALL.out.observations_tab), ch_sample_sheet .filter { csv -> csv.readLines().size() >= 3 } .first() ) emit: - combined_fp_tsv = CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv // channel: [ val(meta), tsv ] - unexpected_match_pdf = CUSTOM_FINGERPRINTMISLABELS.out.unexpected_match_pdf // channel: [ val(meta), pdf ] - unexpected_match_txt = CUSTOM_FINGERPRINTMISLABELS.out.unexpected_match_txt // channel: [ val(meta), txt ] - unexpected_mismatch_pdf = CUSTOM_FINGERPRINTMISLABELS.out.unexpected_mismatch_pdf // channel: [ val(meta), pdf ] - unexpected_mismatch_txt = CUSTOM_FINGERPRINTMISLABELS.out.unexpected_mismatch_txt // channel: [ val(meta), txt ] + combined_fp_tsv_all = CUSTOM_FINGERPRINTCOMBINE_ALL.out.combined_fp_tsv // channel: [ val(meta), tsv ] + combined_fp_tsv_pools = CUSTOM_FINGERPRINTCOMBINE_POOLS.out.combined_fp_tsv // channel: [ val(meta), tsv ] + combined_fp_tsv_patients = CUSTOM_FINGERPRINTCOMBINE_PATIENTS.out.combined_fp_tsv // channel: [ val(meta), tsv ] + unexpected_match_pdf = CUSTOM_FINGERPRINTMISLABELS.out.unexpected_match_pdf // channel: [ val(meta), pdf ] + unexpected_match_txt = CUSTOM_FINGERPRINTMISLABELS.out.unexpected_match_txt // channel: [ val(meta), txt ] + unexpected_mismatch_pdf = CUSTOM_FINGERPRINTMISLABELS.out.unexpected_mismatch_pdf // channel: [ val(meta), pdf ] + unexpected_mismatch_txt = CUSTOM_FINGERPRINTMISLABELS.out.unexpected_mismatch_txt // channel: [ val(meta), txt ] } diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test index 00a3182a..7c8aa2ae 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test @@ -61,6 +61,7 @@ nextflow_workflow { input[1] = [file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] input[2] = "hg19" input[3] = Channel.empty() + input[4] = Channel.empty() """ } } @@ -70,7 +71,7 @@ nextflow_workflow { { assert workflow.success }, { assert workflow.out.unexpected_match_txt == [] }, { assert workflow.out.unexpected_mismatch_txt == [] }, - { assert snapshot(workflow.out.combined_fp_tsv).match() } + { assert snapshot(workflow.out.combined_fp_tsv_all).match() } ) } @@ -121,6 +122,7 @@ nextflow_workflow { input[1] = [file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] input[2] = "hg19" input[3] = Channel.empty() + input[4] = Channel.empty() """ } } diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap index be82f4fd..ee7ffadd 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap @@ -4,17 +4,17 @@ [ [ { - "id": "" + "id": "all" }, - "batch_unexpected_match.txt:md5,2db9ebf03a048e6aab1e5d51e7457429" + "all_unexpected_match.txt:md5,2db9ebf03a048e6aab1e5d51e7457429" ] ], [ [ { - "id": "" + "id": "all" }, - "batch_unexpected_mismatch.txt:md5,c5824b697fe80bcaccc43d466150866b" + "all_unexpected_mismatch.txt:md5,c5824b697fe80bcaccc43d466150866b" ] ] ], @@ -22,14 +22,14 @@ "nf-test": "0.9.3", "nextflow": "26.01.1" }, - "timestamp": "2026-03-23T15:15:56.531495175" + "timestamp": "2026-03-23T21:12:34.169649457" }, "sarscov2 - bam - single_end - no patient - fingerprintmislabels skipped": { "content": [ [ [ { - "id": "" + "id": "all" }, "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" ] @@ -39,6 +39,6 @@ "nf-test": "0.9.3", "nextflow": "26.01.1" }, - "timestamp": "2026-03-23T15:15:34.299247621" + "timestamp": "2026-03-23T21:11:18.127341056" } } \ No newline at end of file From 4b5313be536228a3fb503d21381659f2d922c578 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 26 Mar 2026 15:22:58 -0400 Subject: [PATCH 47/52] updated snap with later version of nf-test --- .../fingerprint_gbcms_batch/tests/main.nf.test.snap | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap index ee7ffadd..2f25adbe 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap @@ -18,11 +18,11 @@ ] ] ], + "timestamp": "2026-03-26T10:17:37.524850124", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "26.01.1" - }, - "timestamp": "2026-03-23T21:12:34.169649457" + } }, "sarscov2 - bam - single_end - no patient - fingerprintmislabels skipped": { "content": [ @@ -35,10 +35,10 @@ ] ] ], + "timestamp": "2026-03-26T10:17:13.331779222", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "26.01.1" - }, - "timestamp": "2026-03-23T21:11:18.127341056" + } } } \ No newline at end of file From c35dd2405fee22777fe35aa2d68ad5298d0abd3a Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 26 Mar 2026 15:25:33 -0400 Subject: [PATCH 48/52] Require explicit pool/patient IDs in FINGERPRINTCOMBINE_POOLS and _PATIENTS; remove ifEmpty pass-through fallback --- subworkflows/msk/fingerprint_gbcms_batch/main.nf | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf index c30c9f33..6a44e4a0 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/main.nf +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -42,9 +42,9 @@ workflow FINGERPRINT_GBCMS_BATCH { // Samples grouped by pool CUSTOM_FINGERPRINTCOMBINE_POOLS( ch_fp - .combine(ch_pool.ifEmpty("").unique()) + .combine(ch_pool.unique()) .filter { meta, tsv, pool -> - (pool == "") || (! pool) || (pool == meta.pool) + pool == meta.pool } .map { meta, tsv, pool -> [[id:pool], tsv, meta.id, meta.genome ?: default_genome, meta.patient ?: meta.sample] @@ -55,12 +55,10 @@ workflow FINGERPRINT_GBCMS_BATCH { // Samples grouped by patient CUSTOM_FINGERPRINTCOMBINE_PATIENTS( ch_fp - .filter { meta, tsv -> meta.patient != null } - .combine(ch_patients.ifEmpty("").unique()) + .combine(ch_patients.unique()) .filter { meta, tsv, patient -> - (patient == "") || (! patient) || (patient.toString() == meta.patient.toString()) - } - .map { meta, tsv, patient -> + patient.toString() == meta.patient.toString() + }.map { meta, tsv, patient -> [[id:meta.patient.toString()], tsv, meta.id, meta.genome ?: default_genome, meta.patient ?: meta.sample] }.groupTuple(by:[0]), ch_liftover_loci_mapping.first() From 5dd6240f56a2a14de262f095642ebf9db804f400 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 31 Mar 2026 11:45:06 -0400 Subject: [PATCH 49/52] move fingerprint modules from custom/fingerprint* to fingerprint/* --- .../tests/nextflow.config | 10 -- .../tests/stash_main.nf.test_stash | 113 ------------------ .../custom/fingerprintcorrelation/meta.yml | 51 -------- .../tests/nextflow.config | 8 -- .../tests/correlations.tab | 0 .../tests/nextflow.config | 8 -- .../tests/observations.tab | 0 .../tests/sample_sheet.csv | 3 - .../combine}/environment.yml | 0 .../combine}/main.nf | 6 +- .../combine}/meta.yml | 19 +-- .../resources/usr/bin/complete_FP_table.R | 0 .../combine}/tests/loci_mapping.tsv | 0 .../combine}/tests/main.nf.test | 18 +-- .../combine}/tests/main.nf.test.snap | 0 .../combine}/tests/nextflow.config | 2 +- .../contamination}/environment.yml | 0 .../contamination}/main.nf | 4 +- .../contamination}/meta.yml | 4 +- .../usr/bin/calculate_contamination.py | 0 .../contamination}/tests/main.nf.test | 22 ++-- .../contamination}/tests/main.nf.test.snap | 0 .../contamination/tests/nextflow.config | 10 ++ .../correlation}/environment.yml | 0 .../correlation}/main.nf | 6 +- modules/msk/fingerprint/correlation/meta.yml | 72 +++++++++++ .../resources/usr/bin/plot_gbcm.R | 30 ++--- .../correlation}/tests/main.nf.test | 30 ++--- .../correlation}/tests/main.nf.test.snap | 0 .../correlation/tests/nextflow.config | 8 ++ .../mislabels}/environment.yml | 0 .../mislabels}/main.nf | 4 +- .../mislabels}/meta.yml | 10 +- .../usr/bin/unexpected_match_mismatch.R | 5 +- .../mislabels}/tests/main.nf.test | 36 +++--- .../mislabels}/tests/main.nf.test.snap | 0 .../mislabels/tests/nextflow.config | 8 ++ .../vcfparser}/environment.yml | 0 .../vcfparser}/main.nf | 6 +- .../vcfparser}/meta.yml | 10 +- .../usr/bin/parse_fingerprint_vcf.py | 0 .../vcfparser}/tests/main.nf.test | 10 +- .../vcfparser}/tests/main.nf.test.snap | 0 .../vcfparser}/tests/nextflow.config | 0 subworkflows/msk/fingerprint_gbcms/main.nf | 14 +-- subworkflows/msk/fingerprint_gbcms/meta.yml | 5 +- .../msk/fingerprint_gbcms/tests/main.nf.test | 4 +- .../fingerprint_gbcms/tests/nextflow.config | 8 +- .../msk/fingerprint_gbcms_batch/main.nf | 52 ++++---- .../msk/fingerprint_gbcms_batch/meta.yml | 5 +- .../tests/main.nf.test | 24 ++-- .../tests/main.nf.test.snap | 10 +- .../tests/nextflow.config | 10 +- 53 files changed, 269 insertions(+), 376 deletions(-) delete mode 100644 modules/msk/custom/fingerprintcontamination/tests/nextflow.config delete mode 100644 modules/msk/custom/fingerprintcontamination/tests/stash_main.nf.test_stash delete mode 100644 modules/msk/custom/fingerprintcorrelation/meta.yml delete mode 100644 modules/msk/custom/fingerprintcorrelation/tests/nextflow.config delete mode 100644 modules/msk/custom/fingerprintmislabels/tests/correlations.tab delete mode 100644 modules/msk/custom/fingerprintmislabels/tests/nextflow.config delete mode 100644 modules/msk/custom/fingerprintmislabels/tests/observations.tab delete mode 100644 modules/msk/custom/fingerprintmislabels/tests/sample_sheet.csv rename modules/msk/{custom/fingerprintcombine => fingerprint/combine}/environment.yml (100%) rename modules/msk/{custom/fingerprintcombine => fingerprint/combine}/main.nf (93%) rename modules/msk/{custom/fingerprintcombine => fingerprint/combine}/meta.yml (83%) rename modules/msk/{custom/fingerprintcombine => fingerprint/combine}/resources/usr/bin/complete_FP_table.R (100%) rename modules/msk/{custom/fingerprintcombine => fingerprint/combine}/tests/loci_mapping.tsv (100%) rename modules/msk/{custom/fingerprintcombine => fingerprint/combine}/tests/main.nf.test (89%) rename modules/msk/{custom/fingerprintcombine => fingerprint/combine}/tests/main.nf.test.snap (100%) rename modules/msk/{custom/fingerprintcombine => fingerprint/combine}/tests/nextflow.config (50%) rename modules/msk/{custom/fingerprintcontamination => fingerprint/contamination}/environment.yml (100%) rename modules/msk/{custom/fingerprintcontamination => fingerprint/contamination}/main.nf (93%) rename modules/msk/{custom/fingerprintcontamination => fingerprint/contamination}/meta.yml (96%) rename modules/msk/{custom/fingerprintcontamination => fingerprint/contamination}/resources/usr/bin/calculate_contamination.py (100%) rename modules/msk/{custom/fingerprintcontamination => fingerprint/contamination}/tests/main.nf.test (83%) rename modules/msk/{custom/fingerprintcontamination => fingerprint/contamination}/tests/main.nf.test.snap (100%) create mode 100644 modules/msk/fingerprint/contamination/tests/nextflow.config rename modules/msk/{custom/fingerprintcorrelation => fingerprint/correlation}/environment.yml (100%) rename modules/msk/{custom/fingerprintcorrelation => fingerprint/correlation}/main.nf (92%) create mode 100644 modules/msk/fingerprint/correlation/meta.yml rename modules/msk/{custom/fingerprintcorrelation => fingerprint/correlation}/resources/usr/bin/plot_gbcm.R (96%) rename modules/msk/{custom/fingerprintcorrelation => fingerprint/correlation}/tests/main.nf.test (82%) rename modules/msk/{custom/fingerprintcorrelation => fingerprint/correlation}/tests/main.nf.test.snap (100%) create mode 100644 modules/msk/fingerprint/correlation/tests/nextflow.config rename modules/msk/{custom/fingerprintmislabels => fingerprint/mislabels}/environment.yml (100%) rename modules/msk/{custom/fingerprintmislabels => fingerprint/mislabels}/main.nf (93%) rename modules/msk/{custom/fingerprintmislabels => fingerprint/mislabels}/meta.yml (91%) rename modules/msk/{custom/fingerprintmislabels => fingerprint/mislabels}/resources/usr/bin/unexpected_match_mismatch.R (99%) rename modules/msk/{custom/fingerprintmislabels => fingerprint/mislabels}/tests/main.nf.test (79%) rename modules/msk/{custom/fingerprintmislabels => fingerprint/mislabels}/tests/main.nf.test.snap (100%) create mode 100644 modules/msk/fingerprint/mislabels/tests/nextflow.config rename modules/msk/{custom/fingerprintvcfparser => fingerprint/vcfparser}/environment.yml (100%) rename modules/msk/{custom/fingerprintvcfparser => fingerprint/vcfparser}/main.nf (87%) rename modules/msk/{custom/fingerprintvcfparser => fingerprint/vcfparser}/meta.yml (91%) rename modules/msk/{custom/fingerprintvcfparser => fingerprint/vcfparser}/resources/usr/bin/parse_fingerprint_vcf.py (100%) rename modules/msk/{custom/fingerprintvcfparser => fingerprint/vcfparser}/tests/main.nf.test (91%) rename modules/msk/{custom/fingerprintvcfparser => fingerprint/vcfparser}/tests/main.nf.test.snap (100%) rename modules/msk/{custom/fingerprintvcfparser => fingerprint/vcfparser}/tests/nextflow.config (100%) diff --git a/modules/msk/custom/fingerprintcontamination/tests/nextflow.config b/modules/msk/custom/fingerprintcontamination/tests/nextflow.config deleted file mode 100644 index fbd2b2d0..00000000 --- a/modules/msk/custom/fingerprintcontamination/tests/nextflow.config +++ /dev/null @@ -1,10 +0,0 @@ -process { - - withName: 'CUSTOM_FINGERPRINTCONTAMINATION' { - ext.args = "-d 0" - } - - withName: 'CUSTOM_FINGERPRINTVCFPARSER' { - ext.args = "-d 0" - } -} diff --git a/modules/msk/custom/fingerprintcontamination/tests/stash_main.nf.test_stash b/modules/msk/custom/fingerprintcontamination/tests/stash_main.nf.test_stash deleted file mode 100644 index ba4496a5..00000000 --- a/modules/msk/custom/fingerprintcontamination/tests/stash_main.nf.test_stash +++ /dev/null @@ -1,113 +0,0 @@ -nextflow_process { - - name "Test Process CUSTOM_FINGERPRINTCONTAMINATION" - script "../main.nf" - process "CUSTOM_FINGERPRINTCONTAMINATION" - config "./nextflow.config" - - tag "modules" - tag "modules_msk" - tag "custom" - tag "custom/fingerprintcontamination" - - test("homo sapiens - chr 22 bam") { - setup { - run("GBCMS"){ - script "../../../gbcms/main.nf" - - params{ - input = "NA12878_GIAB.chr22.vcf" - } - - process { - """ - input[0] = [ - [ id:'test', sample:'test' ], // meta map - file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/bam/NA12878.chr22.bam", checkIfExists:true), - file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/bam/NA12878.chr22.bam.bai", checkIfExists:true), - file("$baseDir/modules/msk/custom/fingerprintcontamination/tests/NA12878_GIAB.chr22.vcf", checkIfExists:true), - "variant_file.vcf" - ] - input[1] = file(params.test_data_mskcc['calculate_noise']['test_chr22_fa'], checkIfExists: true) - input[2] = file(params.test_data_mskcc['calculate_noise']['test_chr22_fa_fai'], checkIfExists: true) - //input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - //input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) - """ - } - } - run("CUSTOM_FINGERPRINTVCFPARSER"){ - script "../../fingerprintvcfparser/main.nf" - process { - """ - input[0] = GBCMS.out.variant_file - """ - } - } - } - - when { - process { - """ - input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.map{ meta, tsv -> [meta,tsv,[]]} - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("sarscov2 - bam - stub") { - - options "-stub" - - setup { - run("GBCMS"){ - script "../../../gbcms/main.nf" - process { - """ - input[0] = [ - [ id:'test', sample:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), - "variant_file.vcf" - ] - input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) - """ - } - } - run("CUSTOM_FINGERPRINTVCFPARSER"){ - script "../../fingerprintvcfparser/main.nf" - process { - """ - input[0] = GBCMS.out.variant_file - """ - } - } - } - - when { - process { - """ - input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.map{ meta, tsv -> [meta,tsv,[]]} - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - -} diff --git a/modules/msk/custom/fingerprintcorrelation/meta.yml b/modules/msk/custom/fingerprintcorrelation/meta.yml deleted file mode 100644 index 8e5e1d37..00000000 --- a/modules/msk/custom/fingerprintcorrelation/meta.yml +++ /dev/null @@ -1,51 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "custom_fingerprintcorrelation" -description: null -keywords: - - sort - - example - - genomics -tools: - - "custom": - description: "" - homepage: "" - documentation: "" - tool_dev_url: "" - doi: "" - licence: null - identifier: null - -input: - - - meta: {} - - combined_fp_tsv: {} -output: - heatmap_pdf: - - - meta: {} - - "*_gbcm_sample-to-sample4.pdf": {} - heatmap_html: - - - meta: {} - - "*_interactive4.html": {} - observations_tab: - - - meta: {} - - "*_observations.tab": {} - versions_fingerprintcorrelation: - - - ${task.process}: - type: string - description: The name of the process - - plot_gbcm.R: {} - - 0.1.0: {} -topics: - versions: - - - ${task.process}: - type: string - description: The name of the process - - plot_gbcm.R: - type: string - description: The name of the tool - - 0.1.0: - type: eval - description: The expression to obtain the version of the tool -authors: - - "@anoronh4" -maintainers: - - "@anoronh4" diff --git a/modules/msk/custom/fingerprintcorrelation/tests/nextflow.config b/modules/msk/custom/fingerprintcorrelation/tests/nextflow.config deleted file mode 100644 index b676d906..00000000 --- a/modules/msk/custom/fingerprintcorrelation/tests/nextflow.config +++ /dev/null @@ -1,8 +0,0 @@ -process { - withName: 'CUSTOM_FINGERPRINTCOMBINE' { - ext.args = "-d 0" - } - withName: 'CUSTOM_FINGERPRINTVCFPARSER' { - ext.args = "-d 0" - } -} diff --git a/modules/msk/custom/fingerprintmislabels/tests/correlations.tab b/modules/msk/custom/fingerprintmislabels/tests/correlations.tab deleted file mode 100644 index e69de29b..00000000 diff --git a/modules/msk/custom/fingerprintmislabels/tests/nextflow.config b/modules/msk/custom/fingerprintmislabels/tests/nextflow.config deleted file mode 100644 index b676d906..00000000 --- a/modules/msk/custom/fingerprintmislabels/tests/nextflow.config +++ /dev/null @@ -1,8 +0,0 @@ -process { - withName: 'CUSTOM_FINGERPRINTCOMBINE' { - ext.args = "-d 0" - } - withName: 'CUSTOM_FINGERPRINTVCFPARSER' { - ext.args = "-d 0" - } -} diff --git a/modules/msk/custom/fingerprintmislabels/tests/observations.tab b/modules/msk/custom/fingerprintmislabels/tests/observations.tab deleted file mode 100644 index e69de29b..00000000 diff --git a/modules/msk/custom/fingerprintmislabels/tests/sample_sheet.csv b/modules/msk/custom/fingerprintmislabels/tests/sample_sheet.csv deleted file mode 100644 index 19476b3f..00000000 --- a/modules/msk/custom/fingerprintmislabels/tests/sample_sheet.csv +++ /dev/null @@ -1,3 +0,0 @@ -sample,patient,is_donor -test,1,false -test2,2,false diff --git a/modules/msk/custom/fingerprintcombine/environment.yml b/modules/msk/fingerprint/combine/environment.yml similarity index 100% rename from modules/msk/custom/fingerprintcombine/environment.yml rename to modules/msk/fingerprint/combine/environment.yml diff --git a/modules/msk/custom/fingerprintcombine/main.nf b/modules/msk/fingerprint/combine/main.nf similarity index 93% rename from modules/msk/custom/fingerprintcombine/main.nf rename to modules/msk/fingerprint/combine/main.nf index ffb406d5..25e0cae6 100644 --- a/modules/msk/custom/fingerprintcombine/main.nf +++ b/modules/msk/fingerprint/combine/main.nf @@ -1,4 +1,4 @@ -process CUSTOM_FINGERPRINTCOMBINE { +process FINGERPRINT_COMBINE { tag "$meta.id" label 'process_single' @@ -14,8 +14,8 @@ process CUSTOM_FINGERPRINTCOMBINE { path(liftover_loci_mapping) output: - tuple val(meta), path("*DPfilter_ALL_FP.txt") , emit: combined_fp_tsv - tuple val("${task.process}"), val('complete_FP_table.R'), val('0.1.0'), emit: versions_fingerprintcombine, topic: versions + tuple val(meta), path("*DPfilter_ALL_FP.txt") , emit: combined_fp_tsv + tuple val("${task.process}"), val('complete_FP_table.R'), val('0.1.0'), emit: versions_combine, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/msk/custom/fingerprintcombine/meta.yml b/modules/msk/fingerprint/combine/meta.yml similarity index 83% rename from modules/msk/custom/fingerprintcombine/meta.yml rename to modules/msk/fingerprint/combine/meta.yml index 7ed95b68..726cb86d 100644 --- a/modules/msk/custom/fingerprintcombine/meta.yml +++ b/modules/msk/fingerprint/combine/meta.yml @@ -1,5 +1,5 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "custom_fingerprintcombine" +name: "fingerprint_combine" description: | A module to combine multiple fingerprint TSV files into a single comprehensive table, with optional liftover of loci coordinates. @@ -10,9 +10,9 @@ keywords: - tsv - correlation tools: - - "custom": + - "complete_FP_table.R": description: "A custom R script to combine fingerprint TSV files" - homepage: "https://github.com/mskcc-omics-workflows/modules/tree/main/modules/msk/custom/fingerprintcombine/meta.yml" + homepage: "https://github.com/mskcc-omics-workflows/modules/tree/main/modules/msk/fingerprint/combine/meta.yml" identifier: "" input: - - meta: @@ -55,18 +55,7 @@ output: pattern: "*DPfilter_ALL_FP.txt" ontologies: - edam: http://edamontology.org/format_3750 # TSV - versions_fingerprintcombine: - - - ${task.process}: - type: string - description: The name of the process - - complete_FP_table.R: - type: string - description: The name of the tool - - 0.1.0: - type: string - description: Version of the custom script -topics: - versions: + versions_combine: - - ${task.process}: type: string description: The name of the process diff --git a/modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R b/modules/msk/fingerprint/combine/resources/usr/bin/complete_FP_table.R similarity index 100% rename from modules/msk/custom/fingerprintcombine/resources/usr/bin/complete_FP_table.R rename to modules/msk/fingerprint/combine/resources/usr/bin/complete_FP_table.R diff --git a/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv b/modules/msk/fingerprint/combine/tests/loci_mapping.tsv similarity index 100% rename from modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv rename to modules/msk/fingerprint/combine/tests/loci_mapping.tsv diff --git a/modules/msk/custom/fingerprintcombine/tests/main.nf.test b/modules/msk/fingerprint/combine/tests/main.nf.test similarity index 89% rename from modules/msk/custom/fingerprintcombine/tests/main.nf.test rename to modules/msk/fingerprint/combine/tests/main.nf.test index 0cb6e4d9..b088dcc3 100644 --- a/modules/msk/custom/fingerprintcombine/tests/main.nf.test +++ b/modules/msk/fingerprint/combine/tests/main.nf.test @@ -1,17 +1,17 @@ -// nf-core modules test custom/fingerprintcombine +// nf-core modules test fingerprint/combine nextflow_process { - name "Test Process CUSTOM_FINGERPRINTCOMBINE" + name "Test Process FINGERPRINT_COMBINE" script "../main.nf" - process "CUSTOM_FINGERPRINTCOMBINE" + process "FINGERPRINT_COMBINE" config "./nextflow.config" tag "modules" tag "modules_msk" - tag "custom" - tag "custom/fingerprintcombine" + tag "fingerprint" + tag "fingerprint/combine" tag "gbcms" - tag "custom/fingerprintvcfparser" + tag "fingerprint/vcfparser" test("sarscov2 - bam") { @@ -41,8 +41,8 @@ nextflow_process { """ } } - run("CUSTOM_FINGERPRINTVCFPARSER"){ - script "../../fingerprintvcfparser/main.nf" + run("FINGERPRINT_VCFPARSER"){ + script "../../vcfparser/main.nf" process { """ input[0] = GBCMS.out.variant_file @@ -54,7 +54,7 @@ nextflow_process { when { process { """ - input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv + input[0] = FINGERPRINT_VCFPARSER.out.tsv .map{ meta, tsv -> println meta [[id:meta.pool], tsv, meta.id, "hg19","default"] diff --git a/modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap b/modules/msk/fingerprint/combine/tests/main.nf.test.snap similarity index 100% rename from modules/msk/custom/fingerprintcombine/tests/main.nf.test.snap rename to modules/msk/fingerprint/combine/tests/main.nf.test.snap diff --git a/modules/msk/custom/fingerprintcombine/tests/nextflow.config b/modules/msk/fingerprint/combine/tests/nextflow.config similarity index 50% rename from modules/msk/custom/fingerprintcombine/tests/nextflow.config rename to modules/msk/fingerprint/combine/tests/nextflow.config index 583ce385..7a504791 100644 --- a/modules/msk/custom/fingerprintcombine/tests/nextflow.config +++ b/modules/msk/fingerprint/combine/tests/nextflow.config @@ -1,5 +1,5 @@ process { - withName: 'CUSTOM_FINGERPRINTCOMBINE' { + withName: 'FINGERPRINT_COMBINE' { ext.args = "-d 0" } } diff --git a/modules/msk/custom/fingerprintcontamination/environment.yml b/modules/msk/fingerprint/contamination/environment.yml similarity index 100% rename from modules/msk/custom/fingerprintcontamination/environment.yml rename to modules/msk/fingerprint/contamination/environment.yml diff --git a/modules/msk/custom/fingerprintcontamination/main.nf b/modules/msk/fingerprint/contamination/main.nf similarity index 93% rename from modules/msk/custom/fingerprintcontamination/main.nf rename to modules/msk/fingerprint/contamination/main.nf index 4d48deda..a0014547 100644 --- a/modules/msk/custom/fingerprintcontamination/main.nf +++ b/modules/msk/fingerprint/contamination/main.nf @@ -1,4 +1,4 @@ -process CUSTOM_FINGERPRINTCONTAMINATION { +process FINGERPRINT_CONTAMINATION { tag "$meta.id" label 'process_single' @@ -13,7 +13,7 @@ process CUSTOM_FINGERPRINTCONTAMINATION { output: tuple val(meta), path("*.contamination.tsv") , emit: contamination_tsv - tuple val("${task.process}"), val('calculate_contamination.py'), eval('calculate_contamination.py -v | cut -f 2 -d" "'), emit: versions_fingerprintvcfparser, topic: versions + tuple val("${task.process}"), val('calculate_contamination.py'), eval('calculate_contamination.py -v | cut -f 2 -d" "'), emit: versions_contamination, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/msk/custom/fingerprintcontamination/meta.yml b/modules/msk/fingerprint/contamination/meta.yml similarity index 96% rename from modules/msk/custom/fingerprintcontamination/meta.yml rename to modules/msk/fingerprint/contamination/meta.yml index 162fff70..08acef21 100644 --- a/modules/msk/custom/fingerprintcontamination/meta.yml +++ b/modules/msk/fingerprint/contamination/meta.yml @@ -1,5 +1,5 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "custom_fingerprintcontamination" +name: "fingerprint_contamination" description: "Calculate major and minor contamination from fingerprint tables" version: "0.1.0" keywords: @@ -53,7 +53,7 @@ output: ontologies: - edam: "http://edamontology.org/format_3750" # TSV - edam: http://edamontology.org/format_3475 # TSV - versions_fingerprintvcfparser: + versions_contamination: - - ${task.process}: {} - calculate_contamination.py: {} - 'calculate_contamination.py -v | cut -f 2 -d" ': {} diff --git a/modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py b/modules/msk/fingerprint/contamination/resources/usr/bin/calculate_contamination.py similarity index 100% rename from modules/msk/custom/fingerprintcontamination/resources/usr/bin/calculate_contamination.py rename to modules/msk/fingerprint/contamination/resources/usr/bin/calculate_contamination.py diff --git a/modules/msk/custom/fingerprintcontamination/tests/main.nf.test b/modules/msk/fingerprint/contamination/tests/main.nf.test similarity index 83% rename from modules/msk/custom/fingerprintcontamination/tests/main.nf.test rename to modules/msk/fingerprint/contamination/tests/main.nf.test index bb89f9e0..ed558200 100644 --- a/modules/msk/custom/fingerprintcontamination/tests/main.nf.test +++ b/modules/msk/fingerprint/contamination/tests/main.nf.test @@ -1,16 +1,16 @@ nextflow_process { - name "Test Process CUSTOM_FINGERPRINTCONTAMINATION" + name "Test Process FINGERPRINT_CONTAMINATION" script "../main.nf" - process "CUSTOM_FINGERPRINTCONTAMINATION" + process "FINGERPRINT_CONTAMINATION" config "./nextflow.config" tag "modules" tag "modules_msk" - tag "custom" - tag "custom/fingerprintcontamination" + tag "fingerprint" + tag "fingerprint/contamination" tag "gbcms" - tag "custom/fingerprintvcfparser" + tag "fingerprint/vcfparser" test("sarscov2 - bam") { setup { @@ -30,8 +30,8 @@ nextflow_process { """ } } - run("CUSTOM_FINGERPRINTVCFPARSER"){ - script "../../fingerprintvcfparser/main.nf" + run("FINGERPRINT_VCFPARSER"){ + script "../../vcfparser/main.nf" process { """ input[0] = GBCMS.out.variant_file @@ -43,7 +43,7 @@ nextflow_process { when { process { """ - input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.map{ meta, tsv -> [meta,tsv,[]]} + input[0] = FINGERPRINT_VCFPARSER.out.tsv.map{ meta, tsv -> [meta,tsv,[]]} """ } } @@ -78,8 +78,8 @@ nextflow_process { """ } } - run("CUSTOM_FINGERPRINTVCFPARSER"){ - script "../../fingerprintvcfparser/main.nf" + run("FINGERPRINT_VCFPARSER"){ + script "../../vcfparser/main.nf" process { """ input[0] = GBCMS.out.variant_file @@ -91,7 +91,7 @@ nextflow_process { when { process { """ - input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.map{ meta, tsv -> [meta,tsv,[]]} + input[0] = FINGERPRINT_VCFPARSER.out.tsv.map{ meta, tsv -> [meta,tsv,[]]} """ } } diff --git a/modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap b/modules/msk/fingerprint/contamination/tests/main.nf.test.snap similarity index 100% rename from modules/msk/custom/fingerprintcontamination/tests/main.nf.test.snap rename to modules/msk/fingerprint/contamination/tests/main.nf.test.snap diff --git a/modules/msk/fingerprint/contamination/tests/nextflow.config b/modules/msk/fingerprint/contamination/tests/nextflow.config new file mode 100644 index 00000000..fe62a31a --- /dev/null +++ b/modules/msk/fingerprint/contamination/tests/nextflow.config @@ -0,0 +1,10 @@ +process { + + withName: 'FINGERPRINT_CONTAMINATION' { + ext.args = "-d 0" + } + + withName: 'FINGERPRINT_VCFPARSER' { + ext.args = "-d 0" + } +} diff --git a/modules/msk/custom/fingerprintcorrelation/environment.yml b/modules/msk/fingerprint/correlation/environment.yml similarity index 100% rename from modules/msk/custom/fingerprintcorrelation/environment.yml rename to modules/msk/fingerprint/correlation/environment.yml diff --git a/modules/msk/custom/fingerprintcorrelation/main.nf b/modules/msk/fingerprint/correlation/main.nf similarity index 92% rename from modules/msk/custom/fingerprintcorrelation/main.nf rename to modules/msk/fingerprint/correlation/main.nf index 08615284..50f807e3 100644 --- a/modules/msk/custom/fingerprintcorrelation/main.nf +++ b/modules/msk/fingerprint/correlation/main.nf @@ -1,4 +1,4 @@ -process CUSTOM_FINGERPRINTCORRELATION { +process FINGERPRINT_CORRELATION { tag {"$prefix"} label 'process_single' @@ -17,7 +17,7 @@ process CUSTOM_FINGERPRINTCORRELATION { tuple val(meta), path("*.html") , emit: heatmap_html tuple val(meta), path("*_observations.tab") , emit: observations_tab tuple val(meta), path("*_correlations.tab") , emit: correlations_tab - tuple val("${task.process}"), val('plot_gbcm.R'), val("0.1.0"), topic: versions, emit: versions_fingerprintcorrelation + tuple val("${task.process}"), val('plot_gbcm.R'), val("0.1.0"), topic: versions, emit: versions_correlation when: task.ext.when == null || task.ext.when @@ -25,7 +25,7 @@ process CUSTOM_FINGERPRINTCORRELATION { script: def args = task.ext.args ?: '' prefix = meta.id ?: "batch" - def pool_arg = "-p ${prefix}" + def pool_arg = "-p ${prefix}" filter_args = (filter_term && filter_term != "") ? pool_arg + " -f" : pool_arg """ plot_gbcm.R \\ diff --git a/modules/msk/fingerprint/correlation/meta.yml b/modules/msk/fingerprint/correlation/meta.yml new file mode 100644 index 00000000..ce40abf4 --- /dev/null +++ b/modules/msk/fingerprint/correlation/meta.yml @@ -0,0 +1,72 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "fingerprint_correlation" +description: "Generate fingerprint correlation heatmaps and tables from a combined fingerprint table" +keywords: + - fingerprint + - correlation + - qc + - heatmap +tools: + - "plot_gbcm.R": + description: "In-house R script for fingerprint correlation plotting" + homepage: "" + documentation: "" + tool_dev_url: "" + doi: "" + licence: null + identifier: null + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'pool1' ]` + - combined_fp_tsv: + type: file + description: Combined fingerprint TSV from FINGERPRINT_COMBINE + pattern: "*DPfilter_ALL_FP.txt" + - - filter_term: + type: string + description: Optional filter term to create pool-level plots +output: + heatmap_pdf: + - - meta: + type: map + description: | + Groovy Map containing sample information + - "*.pdf": + type: file + description: PDF heatmap of fingerprint correlations + heatmap_html: + - - meta: + type: map + - "*.html": + type: file + description: Interactive HTML heatmap + observations_tab: + - - meta: + type: map + - "*_observations.tab": + type: file + description: Table of loci overlap observations + correlations_tab: + - - meta: + type: map + - "*_correlations.tab": + type: file + description: Table of pairwise correlations + versions_correlation: + - - ${task.process}: + type: string + description: The name of the process + - plot_gbcm.R: + type: string + description: The name of the tool + - 0.1.0: + type: eval + description: The expression to obtain the version of the tool +authors: + - "@anoronh4" +maintainers: + - "@anoronh4" diff --git a/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R b/modules/msk/fingerprint/correlation/resources/usr/bin/plot_gbcm.R similarity index 96% rename from modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R rename to modules/msk/fingerprint/correlation/resources/usr/bin/plot_gbcm.R index b0503983..176b388b 100755 --- a/modules/msk/custom/fingerprintcorrelation/resources/usr/bin/plot_gbcm.R +++ b/modules/msk/fingerprint/correlation/resources/usr/bin/plot_gbcm.R @@ -13,7 +13,7 @@ # #------------------------------------------------------------------------------- -rm(list=ls()) +rm(list=ls()) library(argparse, quietly = T) library(plyr, quietly = T) @@ -35,25 +35,25 @@ library(ggiraph) calculate_point_size <- function(x,y) { n_x <- length(unique(x)) n_y <- length(unique(y)) - + #define your plot size (in inches) plot_width_in <- 20 plot_height_in <- 20 - + #convert to mm (1 inch = 25.4 mm) plot_width_mm <- plot_width_in * 25.4 plot_height_mm <- plot_height_in * 25.4 - + #calculate tile size in mm tile_width_mm <- plot_width_mm / n_x tile_height_mm <- plot_height_mm / n_y - + #max circle diameter (fits inside smallest tile dimension) max_diameter_mm <- min(tile_width_mm, tile_height_mm) - + #approximate max point size for geom_point (radius in mm) - max_point_size <- max_diameter_mm - + max_point_size <- max_diameter_mm + return(max_point_size) } @@ -64,7 +64,7 @@ static_plot <- function(data, max_point_size) { legend_size = max_point_size * n * .4 axis_text_size = if (n < 25) 14 else 10 - + p <- ggplot(data, aes(x = Var1, y = Var2)) + geom_tile(color = "gray50", linewidth = 0.25, fill = NA) + geom_point_interactive( @@ -90,7 +90,7 @@ static_plot <- function(data, max_point_size) { end = 1, limits = c(-1, 1), breaks = seq(-1, 1, by = .25), - guide = guide_colorbar(direction = "vertical", + guide = guide_colorbar(direction = "vertical", title.position = "top", barheight = unit(legend_size, "mm"), barwidth = unit(legend_size*.05, "mm") @@ -141,7 +141,7 @@ parser$add_argument('-p', '--pool', required = FALSE, default = "fp_plots", help = 'pool ID') -parser$add_argument('-f', '--filter', +parser$add_argument('-f', '--filter', action = "store_true", default = FALSE, help = "create pool levelel plots instead of extended plots" @@ -182,13 +182,13 @@ if (args$filter) { message("A pool ID is required to create pool level plots") quit(status = 1) } - + message("Creating pool level plots") type="pool" - + final = final %>% filter(grepl(args$pool, Var1) & grepl(args$pool, Var2)) final = droplevels(final) - + } else { message("Creating extended plots") type="extended" @@ -202,7 +202,7 @@ s <- static_plot(final, max_point_size) ggsave(paste(outdir,"/",sample,"_", type, '.pdf', sep = ""), plot = s, width = 25, height = 25, units = "in", device = cairo_pdf) #create interactive plot -i = girafe(ggobj = s, width_svg = 25, height_svg = 25, +i = girafe(ggobj = s, width_svg = 25, height_svg = 25, options = list(opts_tooltip(css = "padding:5pt; font-size:16pt; color:white; background-color:black;"))) saveWidget(i, paste(outdir,"/",sample,"_", type,'.html', sep = ""), selfcontained = TRUE) diff --git a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test b/modules/msk/fingerprint/correlation/tests/main.nf.test similarity index 82% rename from modules/msk/custom/fingerprintcorrelation/tests/main.nf.test rename to modules/msk/fingerprint/correlation/tests/main.nf.test index 8142af7b..2f3ad51a 100644 --- a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test +++ b/modules/msk/fingerprint/correlation/tests/main.nf.test @@ -1,17 +1,17 @@ nextflow_process { - name "Test Process CUSTOM_FINGERPRINTCORRELATION" + name "Test Process FINGERPRINT_CORRELATION" script "../main.nf" - process "CUSTOM_FINGERPRINTCORRELATION" + process "FINGERPRINT_CORRELATION" config "./nextflow.config" tag "modules" tag "modules_msk" - tag "custom" - tag "custom/fingerprintcorrelation" - tag "custom/fingerprintcombine" + tag "fingerprint" + tag "fingerprint/correlation" + tag "fingerprint/combine" tag "gbcms" - tag "custom/fingerprintvcfparser" + tag "fingerprint/vcfparser" test("sarscov2 - bam") { setup { @@ -40,19 +40,19 @@ nextflow_process { """ } } - run("CUSTOM_FINGERPRINTVCFPARSER"){ - script "../../fingerprintvcfparser/main.nf" + run("FINGERPRINT_VCFPARSER"){ + script "../../vcfparser/main.nf" process { """ input[0] = GBCMS.out.variant_file """ } } - run("CUSTOM_FINGERPRINTCOMBINE"){ - script "../../fingerprintcombine/main.nf" + run("FINGERPRINT_COMBINE"){ + script "../../combine/main.nf" process { """ - input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv + input[0] = FINGERPRINT_VCFPARSER.out.tsv .map{ meta, tsv -> def meta2 = [id:meta.pool] [[id:meta.pool], tsv, meta.id, "hg19", "default"] @@ -65,8 +65,8 @@ nextflow_process { when { process { """ - input[0] = CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv - input[1] = "" + input[0] = FINGERPRINT_COMBINE.out.combined_fp_tsv + input[1] = "" """ } } @@ -77,7 +77,7 @@ nextflow_process { { assert snapshot( process.out.correlations_tab, process.out.observations_tab, - process.out.versions_fingerprintcorrelation + process.out.versions_correlation ).match() } ) } @@ -103,7 +103,7 @@ nextflow_process { { assert snapshot( process.out.correlations_tab, process.out.observations_tab, - process.out.versions_fingerprintcorrelation + process.out.versions_correlation ).match() } ) } diff --git a/modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap b/modules/msk/fingerprint/correlation/tests/main.nf.test.snap similarity index 100% rename from modules/msk/custom/fingerprintcorrelation/tests/main.nf.test.snap rename to modules/msk/fingerprint/correlation/tests/main.nf.test.snap diff --git a/modules/msk/fingerprint/correlation/tests/nextflow.config b/modules/msk/fingerprint/correlation/tests/nextflow.config new file mode 100644 index 00000000..06367761 --- /dev/null +++ b/modules/msk/fingerprint/correlation/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: 'FINGERPRINT_COMBINE' { + ext.args = "-d 0" + } + withName: 'FINGERPRINT_VCFPARSER' { + ext.args = "-d 0" + } +} diff --git a/modules/msk/custom/fingerprintmislabels/environment.yml b/modules/msk/fingerprint/mislabels/environment.yml similarity index 100% rename from modules/msk/custom/fingerprintmislabels/environment.yml rename to modules/msk/fingerprint/mislabels/environment.yml diff --git a/modules/msk/custom/fingerprintmislabels/main.nf b/modules/msk/fingerprint/mislabels/main.nf similarity index 93% rename from modules/msk/custom/fingerprintmislabels/main.nf rename to modules/msk/fingerprint/mislabels/main.nf index d9ea1271..35989422 100644 --- a/modules/msk/custom/fingerprintmislabels/main.nf +++ b/modules/msk/fingerprint/mislabels/main.nf @@ -1,4 +1,4 @@ -process CUSTOM_FINGERPRINTMISLABELS { +process FINGERPRINT_MISLABELS { tag "$meta.id" label 'process_single' @@ -16,7 +16,7 @@ process CUSTOM_FINGERPRINTMISLABELS { tuple val(meta), path("*_unexpected_match.txt"), emit: unexpected_match_txt tuple val(meta), path("*_unexpected_mismatch.pdf"), emit: unexpected_mismatch_pdf tuple val(meta), path("*_unexpected_mismatch.txt"), emit: unexpected_mismatch_txt - tuple val("${task.process}"), val('unexpected_match_mismatch.R'), val("0.1.0"), emit: versions_fingerprintmislabels, topic: versions + tuple val("${task.process}"), val('unexpected_match_mismatch.R'), val("0.1.0"), emit: versions_mislabels, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/msk/custom/fingerprintmislabels/meta.yml b/modules/msk/fingerprint/mislabels/meta.yml similarity index 91% rename from modules/msk/custom/fingerprintmislabels/meta.yml rename to modules/msk/fingerprint/mislabels/meta.yml index b42087df..f8fe26e7 100644 --- a/modules/msk/custom/fingerprintmislabels/meta.yml +++ b/modules/msk/fingerprint/mislabels/meta.yml @@ -1,5 +1,5 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "custom_fingerprintmislabels" +name: "fingerprint_mislabels" description: "Identify unexpected matches and mismatches from fingerprint correlations and observations based on patient labels" version: "0.1.0" keywords: @@ -8,7 +8,7 @@ keywords: - qc - sample_swap tools: - - "custom": + - "unexpected_match_mismatch.R": description: "In-house R script for detecting unexpected sample matches and mismatches." homepage: "" documentation: "" @@ -25,13 +25,13 @@ input: e.g. `[ id:'pool1' ]` - correlations_tab: type: file - description: Fingerprint correlations file from CUSTOM_FINGERPRINTCORRELATION + description: Fingerprint correlations file from FINGERPRINT_CORRELATION pattern: "*_correlations.tab" ontologies: - edam: http://edamontology.org/format_3475 - observations_tab: type: file - description: Fingerprint observations file from CUSTOM_FINGERPRINTCORRELATION + description: Fingerprint observations file from FINGERPRINT_CORRELATION pattern: "*_observations.tab" ontologies: - edam: http://edamontology.org/format_3475 @@ -83,7 +83,7 @@ output: type: file description: Table of flagged unexpected mismatches pattern: "*_unexpected_mismatch.txt" - versions_fingerprintmislabels: + versions_mislabels: - - ${task.process}: type: string description: The name of the process diff --git a/modules/msk/custom/fingerprintmislabels/resources/usr/bin/unexpected_match_mismatch.R b/modules/msk/fingerprint/mislabels/resources/usr/bin/unexpected_match_mismatch.R similarity index 99% rename from modules/msk/custom/fingerprintmislabels/resources/usr/bin/unexpected_match_mismatch.R rename to modules/msk/fingerprint/mislabels/resources/usr/bin/unexpected_match_mismatch.R index 2f1e14ea..2cfbd617 100755 --- a/modules/msk/custom/fingerprintmislabels/resources/usr/bin/unexpected_match_mismatch.R +++ b/modules/msk/fingerprint/mislabels/resources/usr/bin/unexpected_match_mismatch.R @@ -82,9 +82,6 @@ observations_f = read.csv(args$observations, header = T, sep = "\t", check.names correlation_f = as.data.frame(correlation_f) observations_f = as.data.frame(observations_f) -correlation_f[is.na(correlation_f)] <- 0 -observations_f[is.na(observations_f)] <- 0 - correlation_wide_df <- as.data.frame(correlation_f) correlation_wide_df$Assay1 <- rownames(correlation_wide_df) @@ -126,7 +123,7 @@ colnames(correlation_long_df) = c("Sample2", "Sample1", "Correlation", "Observat ### 1. Remove same sample-to-sample comparison (assume 1 for these) ### 2. Only keeping one pair per match (removing pair duplicates) -key <- t(apply(correlation_long_df[, c("Sample1", "Sample2")], 1, sort)) +key <- apply(correlation_long_df[, c("Sample1", "Sample2")], 1, function(x) paste(sort(x), collapse = "|")) correlation_long_df_clean <- correlation_long_df[!duplicated(key), ] ## Analysis organization diff --git a/modules/msk/custom/fingerprintmislabels/tests/main.nf.test b/modules/msk/fingerprint/mislabels/tests/main.nf.test similarity index 79% rename from modules/msk/custom/fingerprintmislabels/tests/main.nf.test rename to modules/msk/fingerprint/mislabels/tests/main.nf.test index 5238ea5e..c940587c 100644 --- a/modules/msk/custom/fingerprintmislabels/tests/main.nf.test +++ b/modules/msk/fingerprint/mislabels/tests/main.nf.test @@ -1,18 +1,18 @@ nextflow_process { - name "Test Process CUSTOM_FINGERPRINTMISLABELS" + name "Test Process FINGERPRINT_MISLABELS" script "../main.nf" - process "CUSTOM_FINGERPRINTMISLABELS" + process "FINGERPRINT_MISLABELS" config "./nextflow.config" tag "modules" tag "modules_msk" - tag "custom" - tag "custom/fingerprintmislabels" - tag "custom/fingerprintcorrelation" - tag "custom/fingerprintcombine" + tag "fingerprint" + tag "fingerprint/mislabels" + tag "fingerprint/correlation" + tag "fingerprint/combine" tag "gbcms" - tag "custom/fingerprintvcfparser" + tag "fingerprint/vcfparser" test("sarscov2 - bam") { setup { @@ -41,19 +41,19 @@ nextflow_process { """ } } - run("CUSTOM_FINGERPRINTVCFPARSER") { - script "../../fingerprintvcfparser/main.nf" + run("FINGERPRINT_VCFPARSER") { + script "../../vcfparser/main.nf" process { """ input[0] = GBCMS.out.variant_file """ } } - run("CUSTOM_FINGERPRINTCOMBINE") { - script "../../fingerprintcombine/main.nf" + run("FINGERPRINT_COMBINE") { + script "../../combine/main.nf" process { """ - input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv + input[0] = FINGERPRINT_VCFPARSER.out.tsv .map{ meta, tsv -> def meta2 = [id:meta.pool] [[id:meta.pool], tsv, meta.id, "hg19", "default"] @@ -62,11 +62,11 @@ nextflow_process { """ } } - run("CUSTOM_FINGERPRINTCORRELATION") { - script "../../fingerprintcorrelation/main.nf" + run("FINGERPRINT_CORRELATION") { + script "../../correlation/main.nf" process { """ - input[0] = CUSTOM_FINGERPRINTCOMBINE.out.combined_fp_tsv + input[0] = FINGERPRINT_COMBINE.out.combined_fp_tsv input[1] = "" """ } @@ -76,8 +76,8 @@ nextflow_process { when { process { """ - input[0] = CUSTOM_FINGERPRINTCORRELATION.out.correlations_tab - .join(CUSTOM_FINGERPRINTCORRELATION.out.observations_tab) + input[0] = FINGERPRINT_CORRELATION.out.correlations_tab + .join(FINGERPRINT_CORRELATION.out.observations_tab) input[1] = file("$baseDir/modules/msk/custom/fingerprintmislabels/tests/sample_sheet.csv", checkIfExists: true) """ } @@ -89,7 +89,7 @@ nextflow_process { { assert snapshot( process.out.unexpected_match_txt, process.out.unexpected_mismatch_txt, - process.out.versions_fingerprintmislabels + process.out.versions_mislabels ).match() } ) } diff --git a/modules/msk/custom/fingerprintmislabels/tests/main.nf.test.snap b/modules/msk/fingerprint/mislabels/tests/main.nf.test.snap similarity index 100% rename from modules/msk/custom/fingerprintmislabels/tests/main.nf.test.snap rename to modules/msk/fingerprint/mislabels/tests/main.nf.test.snap diff --git a/modules/msk/fingerprint/mislabels/tests/nextflow.config b/modules/msk/fingerprint/mislabels/tests/nextflow.config new file mode 100644 index 00000000..06367761 --- /dev/null +++ b/modules/msk/fingerprint/mislabels/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: 'FINGERPRINT_COMBINE' { + ext.args = "-d 0" + } + withName: 'FINGERPRINT_VCFPARSER' { + ext.args = "-d 0" + } +} diff --git a/modules/msk/custom/fingerprintvcfparser/environment.yml b/modules/msk/fingerprint/vcfparser/environment.yml similarity index 100% rename from modules/msk/custom/fingerprintvcfparser/environment.yml rename to modules/msk/fingerprint/vcfparser/environment.yml diff --git a/modules/msk/custom/fingerprintvcfparser/main.nf b/modules/msk/fingerprint/vcfparser/main.nf similarity index 87% rename from modules/msk/custom/fingerprintvcfparser/main.nf rename to modules/msk/fingerprint/vcfparser/main.nf index b5924ce8..a82ddd9f 100644 --- a/modules/msk/custom/fingerprintvcfparser/main.nf +++ b/modules/msk/fingerprint/vcfparser/main.nf @@ -1,4 +1,4 @@ -process CUSTOM_FINGERPRINTVCFPARSER { +process FINGERPRINT_VCFPARSER { tag "$meta.id" label 'process_single' @@ -11,8 +11,8 @@ process CUSTOM_FINGERPRINTVCFPARSER { tuple val(meta), path(vcf) output: - tuple val(meta), path("${prefix}.fp.tsv") , emit: tsv - tuple val("${task.process}"), val('parse_fingerprint_vcf.py'), eval('parse_fingerprint_vcf.py -v | cut -f 2 -d" "'), emit: versions_fingerprintvcfparser, topic: versions + tuple val(meta), path("${prefix}.fp.tsv") , emit: tsv + tuple val("${task.process}"), val('parse_fingerprint_vcf.py'), eval('parse_fingerprint_vcf.py -v | cut -f 2 -d" "'), emit: versions_vcfparser, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/msk/custom/fingerprintvcfparser/meta.yml b/modules/msk/fingerprint/vcfparser/meta.yml similarity index 91% rename from modules/msk/custom/fingerprintvcfparser/meta.yml rename to modules/msk/fingerprint/vcfparser/meta.yml index 922f1504..15034410 100644 --- a/modules/msk/custom/fingerprintvcfparser/meta.yml +++ b/modules/msk/fingerprint/vcfparser/meta.yml @@ -1,15 +1,14 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "custom_fingerprintvcfparser" +name: "fingerprint_vcfparser" description: Custom script to parse fingerprint VCF files, generated by the GBCMS module. keywords: - - custom - fingerprint - vcf - pysam tools: - - "custom": + - "parse_fingerprint_vcf.py": description: "Pysam is a Python module for reading and manipulating SAM/BAM/VCF/BCF files. It's a lightweight wrapper of the htslib C-API, the same one that powers @@ -28,7 +27,7 @@ input: e.g. [ id:'test' ] - vcf: type: file - description: Fasta file containing scaffold + description: VCF file output from GBCMS pattern: "*.vcf" ontologies: - edam: http://edamontology.org/format_3016 # VCF @@ -47,8 +46,7 @@ output: pattern: "${prefix}.fp.tsv" ontologies: - edam: http://edamontology.org/format_3475 # TSV - - versions_fingerprintvcfparser: + versions_vcfparser: - - ${task.process}: {} - parse_fingerprint_vcf.py: {} - 'parse_fingerprint_vcf.py -v | cut -f 2 -d" ': {} diff --git a/modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py b/modules/msk/fingerprint/vcfparser/resources/usr/bin/parse_fingerprint_vcf.py similarity index 100% rename from modules/msk/custom/fingerprintvcfparser/resources/usr/bin/parse_fingerprint_vcf.py rename to modules/msk/fingerprint/vcfparser/resources/usr/bin/parse_fingerprint_vcf.py diff --git a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test b/modules/msk/fingerprint/vcfparser/tests/main.nf.test similarity index 91% rename from modules/msk/custom/fingerprintvcfparser/tests/main.nf.test rename to modules/msk/fingerprint/vcfparser/tests/main.nf.test index e4454cba..d18ec635 100644 --- a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test +++ b/modules/msk/fingerprint/vcfparser/tests/main.nf.test @@ -1,14 +1,14 @@ -// nf-core modules test custom/fingerprintvcfparser +// nf-core modules test fingerprint/vcfparser nextflow_process { - name "Test Process CUSTOM_FINGERPRINTVCFPARSER" + name "Test Process FINGERPRINT_VCFPARSER" script "../main.nf" - process "CUSTOM_FINGERPRINTVCFPARSER" + process "FINGERPRINT_VCFPARSER" tag "modules" tag "modules_msk" - tag "custom" - tag "custom/fingerprintvcfparser" + tag "fingerprint" + tag "fingerprint/vcfparser" tag "gbcms" test("sarscov2 - vcf") { diff --git a/modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap b/modules/msk/fingerprint/vcfparser/tests/main.nf.test.snap similarity index 100% rename from modules/msk/custom/fingerprintvcfparser/tests/main.nf.test.snap rename to modules/msk/fingerprint/vcfparser/tests/main.nf.test.snap diff --git a/modules/msk/custom/fingerprintvcfparser/tests/nextflow.config b/modules/msk/fingerprint/vcfparser/tests/nextflow.config similarity index 100% rename from modules/msk/custom/fingerprintvcfparser/tests/nextflow.config rename to modules/msk/fingerprint/vcfparser/tests/nextflow.config diff --git a/subworkflows/msk/fingerprint_gbcms/main.nf b/subworkflows/msk/fingerprint_gbcms/main.nf index c61fe690..16bbf38b 100644 --- a/subworkflows/msk/fingerprint_gbcms/main.nf +++ b/subworkflows/msk/fingerprint_gbcms/main.nf @@ -1,6 +1,6 @@ include { GBCMS } from '../../../modules/msk/gbcms/main' -include { CUSTOM_FINGERPRINTVCFPARSER } from '../../../modules/msk/custom/fingerprintvcfparser/main' -include { CUSTOM_FINGERPRINTCONTAMINATION } from '../../../modules/msk/custom/fingerprintcontamination/main' +include { FINGERPRINT_VCFPARSER } from '../../../modules/msk/fingerprint/vcfparser/main' +include { FINGERPRINT_CONTAMINATION } from '../../../modules/msk/fingerprint/contamination/main' workflow FINGERPRINT_GBCMS { @@ -24,9 +24,9 @@ workflow FINGERPRINT_GBCMS { ch_fastafai.first() ) - CUSTOM_FINGERPRINTVCFPARSER ( GBCMS.out.variant_file ) + FINGERPRINT_VCFPARSER ( GBCMS.out.variant_file ) - all_fps = CUSTOM_FINGERPRINTVCFPARSER.out.tsv.mix(ch_fp_tsv) + all_fps = FINGERPRINT_VCFPARSER.out.tsv.mix(ch_fp_tsv) paired_fps = all_fps .filter{ meta, tsv -> meta.case_id != null && meta.control_id != null && meta.id == meta.case_id } @@ -41,11 +41,11 @@ workflow FINGERPRINT_GBCMS { .filter{ meta, tsv -> ! meta.control_id } .map{ meta, tsv -> [ meta, tsv, [] ] } - CUSTOM_FINGERPRINTCONTAMINATION ( paired_fps.mix(unpaired_fps) ) + FINGERPRINT_CONTAMINATION ( paired_fps.mix(unpaired_fps) ) emit: - fp_tsv_from_bam = CUSTOM_FINGERPRINTVCFPARSER.out.tsv // channel: [ val(meta), tsv ] + fp_tsv_from_bam = FINGERPRINT_VCFPARSER.out.tsv // channel: [ val(meta), tsv ] fp_tsv = all_fps // channel: [ val(meta), tsv ] - contamination_tsv = CUSTOM_FINGERPRINTCONTAMINATION.out.contamination_tsv // channel: [ val(meta), contamination_tsv ] + contamination_tsv = FINGERPRINT_CONTAMINATION.out.contamination_tsv // channel: [ val(meta), contamination_tsv ] } diff --git a/subworkflows/msk/fingerprint_gbcms/meta.yml b/subworkflows/msk/fingerprint_gbcms/meta.yml index 8deac1af..0f2da6e7 100644 --- a/subworkflows/msk/fingerprint_gbcms/meta.yml +++ b/subworkflows/msk/fingerprint_gbcms/meta.yml @@ -12,9 +12,8 @@ keywords: - qc components: - gbcms - - custom/fingerprintvcfparser - - custom/fingerprintcontamination - - fingerprint_gbcms_batch + - fingerprint/vcfparser + - fingerprint/contamination input: - ch_bam: type: file diff --git a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test index c76cea90..11c1db5a 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test +++ b/subworkflows/msk/fingerprint_gbcms/tests/main.nf.test @@ -9,8 +9,8 @@ nextflow_workflow { tag "subworkflows_msk" tag "subworkflows/fingerprint_gbcms" tag "gbcms" - tag "custom/fingerprintvcfparser" - tag "custom/fingerprintcontamination" + tag "fingerprint/vcfparser" + tag "fingerprint/contamination" test("sarscov2 - bam") { diff --git a/subworkflows/msk/fingerprint_gbcms/tests/nextflow.config b/subworkflows/msk/fingerprint_gbcms/tests/nextflow.config index 17e225ec..75bb2dd5 100644 --- a/subworkflows/msk/fingerprint_gbcms/tests/nextflow.config +++ b/subworkflows/msk/fingerprint_gbcms/tests/nextflow.config @@ -1,13 +1,9 @@ process { - withName: 'CUSTOM_FINGERPRINTCOMBINE' { + withName: 'FINGERPRINT_CONTAMINATION' { ext.args = "-d 0" } - withName: 'CUSTOM_FINGERPRINTCONTAMINATION' { - ext.args = "-d 0" - } - - withName: 'CUSTOM_FINGERPRINTVCFPARSER' { + withName: 'FINGERPRINT_VCFPARSER' { ext.args = "-d 0" } } diff --git a/subworkflows/msk/fingerprint_gbcms_batch/main.nf b/subworkflows/msk/fingerprint_gbcms_batch/main.nf index 6a44e4a0..7a56fd51 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/main.nf +++ b/subworkflows/msk/fingerprint_gbcms_batch/main.nf @@ -1,10 +1,10 @@ -include { CUSTOM_FINGERPRINTCOMBINE as CUSTOM_FINGERPRINTCOMBINE_ALL } from '../../../modules/msk/custom/fingerprintcombine/main' -include { CUSTOM_FINGERPRINTCOMBINE as CUSTOM_FINGERPRINTCOMBINE_POOLS } from '../../../modules/msk/custom/fingerprintcombine/main' -include { CUSTOM_FINGERPRINTCOMBINE as CUSTOM_FINGERPRINTCOMBINE_PATIENTS } from '../../../modules/msk/custom/fingerprintcombine/main' -include { CUSTOM_FINGERPRINTCORRELATION as CUSTOM_FINGERPRINTCORRELATION_ALL } from '../../../modules/msk/custom/fingerprintcorrelation/main' -include { CUSTOM_FINGERPRINTCORRELATION as CUSTOM_FINGERPRINTCORRELATION_POOLS } from '../../../modules/msk/custom/fingerprintcorrelation/main' -include { CUSTOM_FINGERPRINTCORRELATION as CUSTOM_FINGERPRINTCORRELATION_PATIENTS } from '../../../modules/msk/custom/fingerprintcorrelation/main' -include { CUSTOM_FINGERPRINTMISLABELS } from '../../../modules/msk/custom/fingerprintmislabels/main' +include { FINGERPRINT_COMBINE as FINGERPRINT_COMBINE_ALL } from '../../../modules/msk/fingerprint/combine/main' +include { FINGERPRINT_COMBINE as FINGERPRINT_COMBINE_POOLS } from '../../../modules/msk/fingerprint/combine/main' +include { FINGERPRINT_COMBINE as FINGERPRINT_COMBINE_PATIENTS } from '../../../modules/msk/fingerprint/combine/main' +include { FINGERPRINT_CORRELATION as FINGERPRINT_CORRELATION_ALL } from '../../../modules/msk/fingerprint/correlation/main' +include { FINGERPRINT_CORRELATION as FINGERPRINT_CORRELATION_POOLS } from '../../../modules/msk/fingerprint/correlation/main' +include { FINGERPRINT_CORRELATION as FINGERPRINT_CORRELATION_PATIENTS } from '../../../modules/msk/fingerprint/correlation/main' +include { FINGERPRINT_MISLABELS } from '../../../modules/msk/fingerprint/mislabels/main' workflow FINGERPRINT_GBCMS_BATCH { @@ -31,7 +31,7 @@ workflow FINGERPRINT_GBCMS_BATCH { ) // All samples combined into a single group - CUSTOM_FINGERPRINTCOMBINE_ALL( + FINGERPRINT_COMBINE_ALL( ch_fp .map { meta, tsv -> [[id:"all"], tsv, meta.id, meta.genome ?: default_genome, meta.patient ?: meta.sample] @@ -40,7 +40,7 @@ workflow FINGERPRINT_GBCMS_BATCH { ) // Samples grouped by pool - CUSTOM_FINGERPRINTCOMBINE_POOLS( + FINGERPRINT_COMBINE_POOLS( ch_fp .combine(ch_pool.unique()) .filter { meta, tsv, pool -> @@ -53,7 +53,7 @@ workflow FINGERPRINT_GBCMS_BATCH { ) // Samples grouped by patient - CUSTOM_FINGERPRINTCOMBINE_PATIENTS( + FINGERPRINT_COMBINE_PATIENTS( ch_fp .combine(ch_patients.unique()) .filter { meta, tsv, patient -> @@ -64,35 +64,35 @@ workflow FINGERPRINT_GBCMS_BATCH { ch_liftover_loci_mapping.first() ) - CUSTOM_FINGERPRINTCORRELATION_ALL( - CUSTOM_FINGERPRINTCOMBINE_ALL.out.combined_fp_tsv, + FINGERPRINT_CORRELATION_ALL( + FINGERPRINT_COMBINE_ALL.out.combined_fp_tsv, [] ) - CUSTOM_FINGERPRINTCORRELATION_POOLS( - CUSTOM_FINGERPRINTCOMBINE_POOLS.out.combined_fp_tsv, + FINGERPRINT_CORRELATION_POOLS( + FINGERPRINT_COMBINE_POOLS.out.combined_fp_tsv, [] ) - CUSTOM_FINGERPRINTCORRELATION_PATIENTS( - CUSTOM_FINGERPRINTCOMBINE_PATIENTS.out.combined_fp_tsv, + FINGERPRINT_CORRELATION_PATIENTS( + FINGERPRINT_COMBINE_PATIENTS.out.combined_fp_tsv, [] ) - CUSTOM_FINGERPRINTMISLABELS( - CUSTOM_FINGERPRINTCORRELATION_ALL.out.correlations_tab - .join(CUSTOM_FINGERPRINTCORRELATION_ALL.out.observations_tab), + FINGERPRINT_MISLABELS( + FINGERPRINT_CORRELATION_ALL.out.correlations_tab + .join(FINGERPRINT_CORRELATION_ALL.out.observations_tab), ch_sample_sheet .filter { csv -> csv.readLines().size() >= 3 } .first() ) emit: - combined_fp_tsv_all = CUSTOM_FINGERPRINTCOMBINE_ALL.out.combined_fp_tsv // channel: [ val(meta), tsv ] - combined_fp_tsv_pools = CUSTOM_FINGERPRINTCOMBINE_POOLS.out.combined_fp_tsv // channel: [ val(meta), tsv ] - combined_fp_tsv_patients = CUSTOM_FINGERPRINTCOMBINE_PATIENTS.out.combined_fp_tsv // channel: [ val(meta), tsv ] - unexpected_match_pdf = CUSTOM_FINGERPRINTMISLABELS.out.unexpected_match_pdf // channel: [ val(meta), pdf ] - unexpected_match_txt = CUSTOM_FINGERPRINTMISLABELS.out.unexpected_match_txt // channel: [ val(meta), txt ] - unexpected_mismatch_pdf = CUSTOM_FINGERPRINTMISLABELS.out.unexpected_mismatch_pdf // channel: [ val(meta), pdf ] - unexpected_mismatch_txt = CUSTOM_FINGERPRINTMISLABELS.out.unexpected_mismatch_txt // channel: [ val(meta), txt ] + combined_fp_tsv_all = FINGERPRINT_COMBINE_ALL.out.combined_fp_tsv // channel: [ val(meta), tsv ] + combined_fp_tsv_pools = FINGERPRINT_COMBINE_POOLS.out.combined_fp_tsv // channel: [ val(meta), tsv ] + combined_fp_tsv_patients = FINGERPRINT_COMBINE_PATIENTS.out.combined_fp_tsv // channel: [ val(meta), tsv ] + unexpected_match_pdf = FINGERPRINT_MISLABELS.out.unexpected_match_pdf // channel: [ val(meta), pdf ] + unexpected_match_txt = FINGERPRINT_MISLABELS.out.unexpected_match_txt // channel: [ val(meta), txt ] + unexpected_mismatch_pdf = FINGERPRINT_MISLABELS.out.unexpected_mismatch_pdf // channel: [ val(meta), pdf ] + unexpected_mismatch_txt = FINGERPRINT_MISLABELS.out.unexpected_mismatch_txt // channel: [ val(meta), txt ] } diff --git a/subworkflows/msk/fingerprint_gbcms_batch/meta.yml b/subworkflows/msk/fingerprint_gbcms_batch/meta.yml index 1646d1c9..832e3761 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/meta.yml +++ b/subworkflows/msk/fingerprint_gbcms_batch/meta.yml @@ -8,8 +8,9 @@ keywords: - batch - pool components: - - custom/fingerprintcombine - - custom/fingerprintcorrelation + - fingerprint/combine + - fingerprint/correlation + - fingerprint/mislabels input: - ch_fp: type: file diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test index 7c8aa2ae..b99f5a6a 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test @@ -10,10 +10,10 @@ nextflow_workflow { tag "subworkflows_msk" tag "subworkflows/fingerprint_gbcms_batch" tag "gbcms" - tag "custom/fingerprintvcfparser" - tag "custom/fingerprintcombine" - tag "custom/fingerprintcorrelation" - tag "custom/fingerprintmislabels" + tag "fingerprint/vcfparser" + tag "fingerprint/combine" + tag "fingerprint/correlation" + tag "fingerprint/mislabels" test("sarscov2 - bam - single_end - no patient - fingerprintmislabels skipped") { @@ -44,8 +44,8 @@ nextflow_workflow { """ } } - run("CUSTOM_FINGERPRINTVCFPARSER"){ - script "../../../../modules/msk/custom/fingerprintvcfparser/main.nf" + run("FINGERPRINT_VCFPARSER"){ + script "../../../../modules/msk/fingerprint/vcfparser/main.nf" process { """ input[0] = GBCMS.out.variant_file @@ -57,8 +57,8 @@ nextflow_workflow { when { workflow { """ - input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv - input[1] = [file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] + input[0] = FINGERPRINT_VCFPARSER.out.tsv + input[1] = [file("$baseDir/modules/msk/fingerprint/combine/tests/loci_mapping.tsv", checkIfExists:true)] input[2] = "hg19" input[3] = Channel.empty() input[4] = Channel.empty() @@ -105,8 +105,8 @@ nextflow_workflow { """ } } - run("CUSTOM_FINGERPRINTVCFPARSER"){ - script "../../../../modules/msk/custom/fingerprintvcfparser/main.nf" + run("FINGERPRINT_VCFPARSER"){ + script "../../../../modules/msk/fingerprint/vcfparser/main.nf" process { """ input[0] = GBCMS.out.variant_file @@ -118,8 +118,8 @@ nextflow_workflow { when { workflow { """ - input[0] = CUSTOM_FINGERPRINTVCFPARSER.out.tsv - input[1] = [file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] + input[0] = FINGERPRINT_VCFPARSER.out.tsv + input[1] = [file("$baseDir/modules/msk/fingerprint/combine/tests/loci_mapping.tsv", checkIfExists:true)] input[2] = "hg19" input[3] = Channel.empty() input[4] = Channel.empty() diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap index 2f25adbe..1a655c94 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/main.nf.test.snap @@ -6,7 +6,7 @@ { "id": "all" }, - "all_unexpected_match.txt:md5,2db9ebf03a048e6aab1e5d51e7457429" + "all_unexpected_match.txt:md5,14af9ffece921578088528e9c1663886" ] ], [ @@ -14,11 +14,11 @@ { "id": "all" }, - "all_unexpected_mismatch.txt:md5,c5824b697fe80bcaccc43d466150866b" + "all_unexpected_mismatch.txt:md5,d5c8b66fd71b1e4b20ee403d822cd7b9" ] ] ], - "timestamp": "2026-03-26T10:17:37.524850124", + "timestamp": "2026-03-26T17:19:28.68144948", "meta": { "nf-test": "0.9.5", "nextflow": "26.01.1" @@ -31,11 +31,11 @@ { "id": "all" }, - "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" + "0DPfilter_ALL_FP.txt:md5,2b376a207fd1bd6bec55fa765e3a3947" ] ] ], - "timestamp": "2026-03-26T10:17:13.331779222", + "timestamp": "2026-03-26T17:19:06.918847051", "meta": { "nf-test": "0.9.5", "nextflow": "26.01.1" diff --git a/subworkflows/msk/fingerprint_gbcms_batch/tests/nextflow.config b/subworkflows/msk/fingerprint_gbcms_batch/tests/nextflow.config index 583ce385..144a1d90 100644 --- a/subworkflows/msk/fingerprint_gbcms_batch/tests/nextflow.config +++ b/subworkflows/msk/fingerprint_gbcms_batch/tests/nextflow.config @@ -1,5 +1,13 @@ process { - withName: 'CUSTOM_FINGERPRINTCOMBINE' { + withName: 'FINGERPRINT_CONTAMINATION' { + ext.args = "-d 0" + } + + withName: 'FINGERPRINT_VCFPARSER' { + ext.args = "-d 0" + } + + withName: 'FINGERPRINT_COMBINE' { ext.args = "-d 0" } } From baff2d1b84e98ff9e9497b48f55237ff32b6006e Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 31 Mar 2026 12:22:12 -0400 Subject: [PATCH 50/52] update snapshots --- .../combine/tests/main.nf.test.snap | 28 +++++++++---------- .../contamination/tests/main.nf.test.snap | 28 +++++++++---------- .../correlation/tests/main.nf.test.snap | 16 +++++------ .../mislabels/tests/main.nf.test.snap | 20 ++++++------- .../vcfparser/tests/main.nf.test.snap | 28 +++++++++---------- 5 files changed, 60 insertions(+), 60 deletions(-) diff --git a/modules/msk/fingerprint/combine/tests/main.nf.test.snap b/modules/msk/fingerprint/combine/tests/main.nf.test.snap index 85f90edd..4016b7bd 100644 --- a/modules/msk/fingerprint/combine/tests/main.nf.test.snap +++ b/modules/msk/fingerprint/combine/tests/main.nf.test.snap @@ -12,7 +12,7 @@ ], "1": [ [ - "CUSTOM_FINGERPRINTCOMBINE", + "FINGERPRINT_COMBINE", "complete_FP_table.R", "0.1.0" ] @@ -25,20 +25,20 @@ "XDPfilter_ALL_FP.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions_fingerprintcombine": [ + "versions_combine": [ [ - "CUSTOM_FINGERPRINTCOMBINE", + "FINGERPRINT_COMBINE", "complete_FP_table.R", "0.1.0" ] ] } ], + "timestamp": "2026-03-31T11:50:45.066162946", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2025-12-17T20:26:07.925718004" + "nf-test": "0.9.5", + "nextflow": "26.01.1" + } }, "sarscov2 - bam": { "content": [ @@ -53,7 +53,7 @@ ], "1": [ [ - "CUSTOM_FINGERPRINTCOMBINE", + "FINGERPRINT_COMBINE", "complete_FP_table.R", "0.1.0" ] @@ -66,19 +66,19 @@ "0DPfilter_ALL_FP.txt:md5,66113c255cf1f52e27802183764a406d" ] ], - "versions_fingerprintcombine": [ + "versions_combine": [ [ - "CUSTOM_FINGERPRINTCOMBINE", + "FINGERPRINT_COMBINE", "complete_FP_table.R", "0.1.0" ] ] } ], + "timestamp": "2026-03-31T11:50:39.126837772", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2025-12-17T20:25:58.985229402" + "nf-test": "0.9.5", + "nextflow": "26.01.1" + } } } \ No newline at end of file diff --git a/modules/msk/fingerprint/contamination/tests/main.nf.test.snap b/modules/msk/fingerprint/contamination/tests/main.nf.test.snap index 233a4680..6016aace 100644 --- a/modules/msk/fingerprint/contamination/tests/main.nf.test.snap +++ b/modules/msk/fingerprint/contamination/tests/main.nf.test.snap @@ -13,7 +13,7 @@ ], "1": [ [ - "CUSTOM_FINGERPRINTCONTAMINATION", + "FINGERPRINT_CONTAMINATION", "calculate_contamination.py", "" ] @@ -27,20 +27,20 @@ "test.contamination.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions_fingerprintvcfparser": [ + "versions_contamination": [ [ - "CUSTOM_FINGERPRINTCONTAMINATION", + "FINGERPRINT_CONTAMINATION", "calculate_contamination.py", "" ] ] } ], + "timestamp": "2026-03-31T11:55:42.453612823", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2025-12-17T13:12:25.869022442" + "nf-test": "0.9.5", + "nextflow": "26.01.1" + } }, "sarscov2 - bam": { "content": [ @@ -56,7 +56,7 @@ ], "1": [ [ - "CUSTOM_FINGERPRINTCONTAMINATION", + "FINGERPRINT_CONTAMINATION", "calculate_contamination.py", "" ] @@ -70,19 +70,19 @@ "test.contamination.tsv:md5,5b533c60b8eff1f4d2c5fe58a8262303" ] ], - "versions_fingerprintvcfparser": [ + "versions_contamination": [ [ - "CUSTOM_FINGERPRINTCONTAMINATION", + "FINGERPRINT_CONTAMINATION", "calculate_contamination.py", "" ] ] } ], + "timestamp": "2026-03-31T11:55:33.454910171", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2025-12-17T13:12:16.153445117" + "nf-test": "0.9.5", + "nextflow": "26.01.1" + } } } \ No newline at end of file diff --git a/modules/msk/fingerprint/correlation/tests/main.nf.test.snap b/modules/msk/fingerprint/correlation/tests/main.nf.test.snap index 42ebc83b..9ccb6af1 100644 --- a/modules/msk/fingerprint/correlation/tests/main.nf.test.snap +++ b/modules/msk/fingerprint/correlation/tests/main.nf.test.snap @@ -19,17 +19,17 @@ ], [ [ - "CUSTOM_FINGERPRINTCORRELATION", + "FINGERPRINT_CORRELATION", "plot_gbcm.R", "0.1.0" ] ] ], + "timestamp": "2026-03-31T11:51:06.685431978", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "26.01.1" - }, - "timestamp": "2026-03-20T22:20:19.897509559" + } }, "sarscov2 - bam": { "content": [ @@ -51,16 +51,16 @@ ], [ [ - "CUSTOM_FINGERPRINTCORRELATION", + "FINGERPRINT_CORRELATION", "plot_gbcm.R", "0.1.0" ] ] ], + "timestamp": "2026-03-31T11:51:01.173687804", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "26.01.1" - }, - "timestamp": "2026-03-20T22:20:13.979058971" + } } } \ No newline at end of file diff --git a/modules/msk/fingerprint/mislabels/tests/main.nf.test.snap b/modules/msk/fingerprint/mislabels/tests/main.nf.test.snap index 100b060e..5fe6fb12 100644 --- a/modules/msk/fingerprint/mislabels/tests/main.nf.test.snap +++ b/modules/msk/fingerprint/mislabels/tests/main.nf.test.snap @@ -36,7 +36,7 @@ ], "4": [ [ - "CUSTOM_FINGERPRINTMISLABELS", + "FINGERPRINT_MISLABELS", "unexpected_match_mismatch.R", "0.1.0" ] @@ -73,20 +73,20 @@ "thispool_unexpected_mismatch.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions_fingerprintmislabels": [ + "versions_mislabels": [ [ - "CUSTOM_FINGERPRINTMISLABELS", + "FINGERPRINT_MISLABELS", "unexpected_match_mismatch.R", "0.1.0" ] ] } ], + "timestamp": "2026-03-31T11:51:32.987116913", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "26.01.1" - }, - "timestamp": "2026-03-20T22:37:04.106330307" + } }, "sarscov2 - bam": { "content": [ @@ -108,16 +108,16 @@ ], [ [ - "CUSTOM_FINGERPRINTMISLABELS", + "FINGERPRINT_MISLABELS", "unexpected_match_mismatch.R", "0.1.0" ] ] ], + "timestamp": "2026-03-31T11:51:27.508330482", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "26.01.1" - }, - "timestamp": "2026-03-20T22:36:57.641442061" + } } } \ No newline at end of file diff --git a/modules/msk/fingerprint/vcfparser/tests/main.nf.test.snap b/modules/msk/fingerprint/vcfparser/tests/main.nf.test.snap index fb734f9f..21623821 100644 --- a/modules/msk/fingerprint/vcfparser/tests/main.nf.test.snap +++ b/modules/msk/fingerprint/vcfparser/tests/main.nf.test.snap @@ -13,7 +13,7 @@ ], "1": [ [ - "CUSTOM_FINGERPRINTVCFPARSER", + "FINGERPRINT_VCFPARSER", "parse_fingerprint_vcf.py", "0.1.0" ] @@ -27,20 +27,20 @@ "test.fp.tsv:md5,9fa9a081f17ee52f03463c96d46a23aa" ] ], - "versions_fingerprintvcfparser": [ + "versions_vcfparser": [ [ - "CUSTOM_FINGERPRINTVCFPARSER", + "FINGERPRINT_VCFPARSER", "parse_fingerprint_vcf.py", "0.1.0" ] ] } ], + "timestamp": "2026-03-31T11:56:37.863052588", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2025-12-17T13:02:44.951823372" + "nf-test": "0.9.5", + "nextflow": "26.01.1" + } }, "sarscov2 - vcf - stub": { "content": [ @@ -56,7 +56,7 @@ ], "1": [ [ - "CUSTOM_FINGERPRINTVCFPARSER", + "FINGERPRINT_VCFPARSER", "parse_fingerprint_vcf.py", "0.1.0" ] @@ -70,19 +70,19 @@ "test.fp.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions_fingerprintvcfparser": [ + "versions_vcfparser": [ [ - "CUSTOM_FINGERPRINTVCFPARSER", + "FINGERPRINT_VCFPARSER", "parse_fingerprint_vcf.py", "0.1.0" ] ] } ], + "timestamp": "2026-03-31T11:56:43.764600116", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2025-12-17T13:02:51.967429606" + "nf-test": "0.9.5", + "nextflow": "26.01.1" + } } } \ No newline at end of file From 357bf03e61fbab57ae04adfc93f42a25e8d77710 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 31 Mar 2026 16:20:40 -0400 Subject: [PATCH 51/52] update local paths for inputs in nf-tests --- modules/msk/fingerprint/combine/tests/main.nf.test | 4 ++-- modules/msk/fingerprint/correlation/tests/main.nf.test | 4 ++-- .../msk/fingerprint/mislabels/tests/correlations.tab | 0 modules/msk/fingerprint/mislabels/tests/main.nf.test | 10 +++++----- .../msk/fingerprint/mislabels/tests/observations.tab | 0 .../msk/fingerprint/mislabels/tests/sample_sheet.csv | 3 +++ 6 files changed, 12 insertions(+), 9 deletions(-) create mode 100644 modules/msk/fingerprint/mislabels/tests/correlations.tab create mode 100644 modules/msk/fingerprint/mislabels/tests/observations.tab create mode 100644 modules/msk/fingerprint/mislabels/tests/sample_sheet.csv diff --git a/modules/msk/fingerprint/combine/tests/main.nf.test b/modules/msk/fingerprint/combine/tests/main.nf.test index b088dcc3..effbbbf2 100644 --- a/modules/msk/fingerprint/combine/tests/main.nf.test +++ b/modules/msk/fingerprint/combine/tests/main.nf.test @@ -59,7 +59,7 @@ nextflow_process { println meta [[id:meta.pool], tsv, meta.id, "hg19","default"] }.groupTuple(by:[0]) - input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) + input[1] = file("$baseDir/modules/msk/fingerprint/combine/tests/loci_mapping.tsv", checkIfExists:true) """ } } @@ -88,7 +88,7 @@ nextflow_process { ["hg19"], ["default"] ] - input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) + input[1] = file("$baseDir/modules/msk/fingerprint/combine/tests/loci_mapping.tsv", checkIfExists:true) """ } } diff --git a/modules/msk/fingerprint/correlation/tests/main.nf.test b/modules/msk/fingerprint/correlation/tests/main.nf.test index 2f3ad51a..c7ff43e8 100644 --- a/modules/msk/fingerprint/correlation/tests/main.nf.test +++ b/modules/msk/fingerprint/correlation/tests/main.nf.test @@ -57,7 +57,7 @@ nextflow_process { def meta2 = [id:meta.pool] [[id:meta.pool], tsv, meta.id, "hg19", "default"] }.groupTuple(by:[0]) - input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) + input[1] = file("$baseDir/modules/msk/fingerprint/combine/tests/loci_mapping.tsv", checkIfExists:true) """ } } @@ -91,7 +91,7 @@ nextflow_process { when { process { """ - input[0] = [[id:'thispool'], file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true)] + input[0] = [[id:'thispool'], file("$baseDir/modules/msk/fingerprint/combine/tests/loci_mapping.tsv", checkIfExists:true)] input[1] = "" """ } diff --git a/modules/msk/fingerprint/mislabels/tests/correlations.tab b/modules/msk/fingerprint/mislabels/tests/correlations.tab new file mode 100644 index 00000000..e69de29b diff --git a/modules/msk/fingerprint/mislabels/tests/main.nf.test b/modules/msk/fingerprint/mislabels/tests/main.nf.test index c940587c..41e205f0 100644 --- a/modules/msk/fingerprint/mislabels/tests/main.nf.test +++ b/modules/msk/fingerprint/mislabels/tests/main.nf.test @@ -58,7 +58,7 @@ nextflow_process { def meta2 = [id:meta.pool] [[id:meta.pool], tsv, meta.id, "hg19", "default"] }.groupTuple(by:[0]) - input[1] = file("$baseDir/modules/msk/custom/fingerprintcombine/tests/loci_mapping.tsv", checkIfExists:true) + input[1] = file("$baseDir/modules/msk/fingerprint/combine/tests/loci_mapping.tsv", checkIfExists:true) """ } } @@ -78,7 +78,7 @@ nextflow_process { """ input[0] = FINGERPRINT_CORRELATION.out.correlations_tab .join(FINGERPRINT_CORRELATION.out.observations_tab) - input[1] = file("$baseDir/modules/msk/custom/fingerprintmislabels/tests/sample_sheet.csv", checkIfExists: true) + input[1] = file("$baseDir/modules/msk/fingerprint/mislabels/tests/sample_sheet.csv", checkIfExists: true) """ } } @@ -105,10 +105,10 @@ nextflow_process { """ input[0] = [ [id:'thispool'], - file("$baseDir/modules/msk/custom/fingerprintmislabels/tests/correlations.tab", checkIfExists: true), - file("$baseDir/modules/msk/custom/fingerprintmislabels/tests/observations.tab", checkIfExists: true) + file("$baseDir/modules/msk/fingerprint/mislabels/tests/correlations.tab", checkIfExists: true), + file("$baseDir/modules/msk/fingerprint/mislabels/tests/observations.tab", checkIfExists: true) ] - input[1] = file("$baseDir/modules/msk/custom/fingerprintmislabels/tests/sample_sheet.csv", checkIfExists: true) + input[1] = file("$baseDir/modules/msk/fingerprint/mislabels/tests/sample_sheet.csv", checkIfExists: true) """ } } diff --git a/modules/msk/fingerprint/mislabels/tests/observations.tab b/modules/msk/fingerprint/mislabels/tests/observations.tab new file mode 100644 index 00000000..e69de29b diff --git a/modules/msk/fingerprint/mislabels/tests/sample_sheet.csv b/modules/msk/fingerprint/mislabels/tests/sample_sheet.csv new file mode 100644 index 00000000..19476b3f --- /dev/null +++ b/modules/msk/fingerprint/mislabels/tests/sample_sheet.csv @@ -0,0 +1,3 @@ +sample,patient,is_donor +test,1,false +test2,2,false From 04ee0ed6f58e6891084bdbebdf591e5b1744e980 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 14 May 2026 11:31:27 -0400 Subject: [PATCH 52/52] update environment.yml to be congruent with container images --- modules/msk/fingerprint/correlation/environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/msk/fingerprint/correlation/environment.yml b/modules/msk/fingerprint/correlation/environment.yml index acabcada..73a37db1 100644 --- a/modules/msk/fingerprint/correlation/environment.yml +++ b/modules/msk/fingerprint/correlation/environment.yml @@ -13,6 +13,6 @@ dependencies: - conda-forge::r-htmlwidgets=1.6.4 - conda-forge::r-plotly=4.11.0 - conda-forge::r-plyr=1.8.9 - - conda-forge::r-reshape2=1.4.4 + - conda-forge::r-reshape2=1.4.5 - conda-forge::r-scales=1.4.0 - conda-forge::r-tidyverse=2.0.0