From 235a316442a7d01f5bc60b5b39294e3b2e710b13 Mon Sep 17 00:00:00 2001 From: Pieter Moris <13552343+pmoris@users.noreply.github.com> Date: Thu, 12 Feb 2026 09:45:26 +0100 Subject: [PATCH 1/3] Avoid redundant index steps for shared refs This deacon subworkflow supports per-sample references, but the previous implementation ran the indexing step for each sample, regardless if samples shared the same reference. This update allows the index step to only run once for each unique reference, rather than once per-sample. I.e. shared references among samples are only indexed once. The output of the previous implementation is conserved, but this could still be changed for the index output (by adding fasta-based meta.id values rather than the sample metadata). --- .../nf-core/fastq_index_filter_deacon/main.nf | 41 +++++++++++++++---- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/subworkflows/nf-core/fastq_index_filter_deacon/main.nf b/subworkflows/nf-core/fastq_index_filter_deacon/main.nf index 0be4ed8c42ac..d7a53c2f8af0 100644 --- a/subworkflows/nf-core/fastq_index_filter_deacon/main.nf +++ b/subworkflows/nf-core/fastq_index_filter_deacon/main.nf @@ -8,8 +8,6 @@ workflow FASTQ_INDEX_FILTER_DEACON { main: - ch_fasta = ch_fasta_reads - .map { meta, fasta, reads -> [ meta, fasta ] } // Check if fastqs are single-end or paired-end and run Deacon accordingly ch_reads = ch_fasta_reads .map { meta, fasta, reads -> @@ -17,21 +15,50 @@ workflow FASTQ_INDEX_FILTER_DEACON { if (reads instanceof List && reads.size() != 1) { error("Error: Check your meta.single_end value. Single-end reads should contain one file only.") } - return [ meta, reads ] + return [ meta, fasta, reads ] } else { if (!(reads instanceof List) || reads.size() != 2) { error("Error: Check your meta.single_end value. Paired-end reads should contain two files; a forward and a reverse.") } - return [ meta, reads ] + return [ meta, fasta, reads ] } } - DEACON_INDEX ( ch_fasta ) + // Extract unique reference fasta files and create fasta-specific metadata + // This ensures each unique reference is indexed only once + ch_unique_fastas = ch_reads + .map { _meta, fasta, _reads -> fasta } + .unique() + .map { fasta -> + def fasta_meta = [ id: fasta.baseName ] + [ fasta_meta, fasta ] + } + + // Index unique FASTA files only + DEACON_INDEX ( ch_unique_fastas ) + + // Match indexes back to original samples + ch_indexes = DEACON_INDEX.out.index + .map { fasta_meta, index -> [ fasta_meta.id, index ] } + ch_reads_with_index = ch_reads + .map { meta, fasta, reads -> + [ fasta.baseName, meta, reads ] + } + .combine(ch_indexes, by: 0) + .map { _fasta_id, meta, reads, index -> + [ meta, index, reads ] + } + + // Filter reads using the matched index + DEACON_FILTER(ch_reads_with_index) - DEACON_FILTER(DEACON_INDEX.out.index.join(ch_reads)) + // TODO: optionally create output channel with indexes and their original sample-level metadata, + // this preserves the original behaviour of the workflow + // ch_index_with_meta = ch_reads_with_index + // .map { meta, index, _reads -> [ meta, index ] } emit: - index = DEACON_INDEX.out.index // channel: [ val(meta), [ index ] ] + index = DEACON_INDEX.out.index // channel: [ val(meta), [ index ] ] // TODO: optional emit ch_index_with_meta instead fastq_filtered = DEACON_FILTER.out.fastq_filtered // channel: [ val(meta), [ fastq ] ] summary = DEACON_FILTER.out.log // channel: [ val(meta), [ log ] ] } From e784cbf54cdf0ebdeb611aac5566bc4e7feafd26 Mon Sep 17 00:00:00 2001 From: Pieter Moris <13552343+pmoris@users.noreply.github.com> Date: Fri, 13 Feb 2026 14:01:45 +0100 Subject: [PATCH 2/3] Retain sample-level metadata in index output channel This commit ensures that the sample-level metadata associated with the sample reads is retained in the index output channel. The baseName of the index - which itself should be equal to the baseName of the used reference fasta, or the chosen `ext.prefix` for the deacon/index module - is added as an additional index_id key in the meta map. Note that the index channel has the same length as the number of input samples (i.e. it does not only contain the unique indexes). --- .../nf-core/fastq_index_filter_deacon/main.nf | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/subworkflows/nf-core/fastq_index_filter_deacon/main.nf b/subworkflows/nf-core/fastq_index_filter_deacon/main.nf index d7a53c2f8af0..370cda8df5c3 100644 --- a/subworkflows/nf-core/fastq_index_filter_deacon/main.nf +++ b/subworkflows/nf-core/fastq_index_filter_deacon/main.nf @@ -30,8 +30,8 @@ workflow FASTQ_INDEX_FILTER_DEACON { .map { _meta, fasta, _reads -> fasta } .unique() .map { fasta -> - def fasta_meta = [ id: fasta.baseName ] - [ fasta_meta, fasta ] + def meta_fasta = [ id: fasta.baseName ] + [ meta_fasta, fasta ] } // Index unique FASTA files only @@ -39,7 +39,7 @@ workflow FASTQ_INDEX_FILTER_DEACON { // Match indexes back to original samples ch_indexes = DEACON_INDEX.out.index - .map { fasta_meta, index -> [ fasta_meta.id, index ] } + .map { meta_fasta, index -> [ meta_fasta.id, index ] } ch_reads_with_index = ch_reads .map { meta, fasta, reads -> [ fasta.baseName, meta, reads ] @@ -52,13 +52,13 @@ workflow FASTQ_INDEX_FILTER_DEACON { // Filter reads using the matched index DEACON_FILTER(ch_reads_with_index) - // TODO: optionally create output channel with indexes and their original sample-level metadata, - // this preserves the original behaviour of the workflow - // ch_index_with_meta = ch_reads_with_index - // .map { meta, index, _reads -> [ meta, index ] } + // Create output channel with indexes, the original sample-level metadata and index id + ch_index_with_meta = ch_reads_with_index + .map { meta, index, _reads -> [ meta + [ index_id: index.baseName ], index ] } + emit: - index = DEACON_INDEX.out.index // channel: [ val(meta), [ index ] ] // TODO: optional emit ch_index_with_meta instead + index = ch_index_with_meta // channel: [ val(meta), [ index ] ] fastq_filtered = DEACON_FILTER.out.fastq_filtered // channel: [ val(meta), [ fastq ] ] summary = DEACON_FILTER.out.log // channel: [ val(meta), [ log ] ] } From da400bcaa9e001f187140b9da690b1acbf522a26 Mon Sep 17 00:00:00 2001 From: Pieter Moris <13552343+pmoris@users.noreply.github.com> Date: Fri, 13 Feb 2026 15:13:34 +0100 Subject: [PATCH 3/3] Update test snapshots Snapshots needed to be updated because the index output channel was updated with an additional index_id key in its meta map. --- .../tests/main.nf.test.snap | 42 ++++++++++--------- .../tests/main.nf.test.snap | 34 ++++++++------- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/subworkflows/nf-core/fastq_decontaminate_deacon_hostile/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_decontaminate_deacon_hostile/tests/main.nf.test.snap index adf3cc1a7c50..880efeccc7e7 100644 --- a/subworkflows/nf-core/fastq_decontaminate_deacon_hostile/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_decontaminate_deacon_hostile/tests/main.nf.test.snap @@ -14,18 +14,19 @@ [ { "id": "test", - "single_end": true + "single_end": true, + "index_id": "genome" }, - "test.idx:md5,84e4985c91800686db9c9dca28fabd1a" + "genome.idx:md5,84e4985c91800686db9c9dca28fabd1a" ] ], "test.json" ], + "timestamp": "2026-02-16T13:18:48.758651665", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.2" - }, - "timestamp": "2026-02-13T09:28:11.112943934" + } }, "sarscov2 - fastq - single-end - hostile": { "content": [ @@ -53,11 +54,11 @@ ], "test_single.json" ], + "timestamp": "2026-02-13T09:27:36.178537005", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.2" - }, - "timestamp": "2026-02-13T09:27:36.178537005" + } }, "sarscov2 - fastq - paired-end - deacon": { "content": [ @@ -77,18 +78,19 @@ [ { "id": "test", - "single_end": false + "single_end": false, + "index_id": "genome" }, - "test.idx:md5,84e4985c91800686db9c9dca28fabd1a" + "genome.idx:md5,84e4985c91800686db9c9dca28fabd1a" ] ], "test.json" ], + "timestamp": "2026-02-16T13:18:57.869797967", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.2" - }, - "timestamp": "2026-02-13T09:28:24.987001016" + } }, "sarscov2 - fastq - paired-end - hostile": { "content": [ @@ -119,11 +121,11 @@ ], "test_paired.json" ], + "timestamp": "2026-02-13T09:27:57.460392559", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.2" - }, - "timestamp": "2026-02-13T09:27:57.460392559" + } }, "sarscov2 - fastq - single-end - deacon - stub": { "content": [ @@ -147,7 +149,8 @@ [ { "id": "test", - "single_end": true + "single_end": true, + "index_id": "genome" }, "genome.idx:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -174,7 +177,8 @@ [ { "id": "test", - "single_end": true + "single_end": true, + "index_id": "genome" }, "genome.idx:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -196,10 +200,10 @@ ] } ], + "timestamp": "2026-02-16T13:19:06.320189196", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.2" - }, - "timestamp": "2026-02-13T09:28:38.421681888" + } } } \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_index_filter_deacon/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_index_filter_deacon/tests/main.nf.test.snap index 3ebf4eb7742f..7e81cbfcffa6 100644 --- a/subworkflows/nf-core/fastq_index_filter_deacon/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_index_filter_deacon/tests/main.nf.test.snap @@ -6,7 +6,8 @@ [ { "id": "test", - "single_end": true + "single_end": true, + "index_id": "genome" }, "genome.idx:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -42,7 +43,8 @@ [ { "id": "test", - "single_end": true + "single_end": true, + "index_id": "genome" }, "genome.idx:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -58,11 +60,11 @@ ] } ], + "timestamp": "2026-02-13T15:10:09.178241099", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.2" - }, - "timestamp": "2026-02-13T09:19:12.052209052" + } }, "sarscov2 - fastq - paired-end": { "content": [ @@ -70,9 +72,10 @@ [ { "id": "test", - "single_end": false + "single_end": false, + "index_id": "genome" }, - "test.idx:md5,84e4985c91800686db9c9dca28fabd1a" + "genome.idx:md5,84e4985c91800686db9c9dca28fabd1a" ] ], [ @@ -89,11 +92,11 @@ ], "test.json" ], + "timestamp": "2026-02-13T15:09:59.820753976", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.2" - }, - "timestamp": "2026-02-13T09:19:03.559444963" + } }, "sarscov2 - fastq - single-end": { "content": [ @@ -101,9 +104,10 @@ [ { "id": "test", - "single_end": true + "single_end": true, + "index_id": "genome" }, - "test.idx:md5,84e4985c91800686db9c9dca28fabd1a" + "genome.idx:md5,84e4985c91800686db9c9dca28fabd1a" ] ], [ @@ -117,10 +121,10 @@ ], "test.json" ], + "timestamp": "2026-02-13T15:09:50.38883241", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.2" - }, - "timestamp": "2026-02-13T09:18:55.835377502" + } } } \ No newline at end of file