From 235a316442a7d01f5bc60b5b39294e3b2e710b13 Mon Sep 17 00:00:00 2001
From: Pieter Moris <13552343+pmoris@users.noreply.github.com>
Date: Thu, 12 Feb 2026 09:45:26 +0100
Subject: [PATCH 1/3] Avoid redundant index steps for shared refs

This deacon subworkflow supports per-sample
references, but the previous implementation
ran the indexing step for each sample,
regardless if samples shared the same reference.

This update allows the index step to only run
once for each unique reference, rather than once per-sample.
I.e. shared references among samples are only indexed once.

The output of the previous implementation is
conserved, but this could still be changed for
the index output (by adding fasta-based meta.id
values rather than the sample metadata).
---
 .../nf-core/fastq_index_filter_deacon/main.nf | 41 +++++++++++++++----
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/subworkflows/nf-core/fastq_index_filter_deacon/main.nf b/subworkflows/nf-core/fastq_index_filter_deacon/main.nf
index 0be4ed8c42ac..d7a53c2f8af0 100644
--- a/subworkflows/nf-core/fastq_index_filter_deacon/main.nf
+++ b/subworkflows/nf-core/fastq_index_filter_deacon/main.nf
@@ -8,8 +8,6 @@ workflow FASTQ_INDEX_FILTER_DEACON {
 
     main:
 
-    ch_fasta = ch_fasta_reads
-        .map  { meta, fasta, reads -> [ meta, fasta ] }
     // Check if fastqs are single-end or paired-end and run Deacon accordingly
     ch_reads = ch_fasta_reads
         .map  { meta, fasta, reads ->
@@ -17,21 +15,50 @@ workflow FASTQ_INDEX_FILTER_DEACON {
                 if (reads instanceof List && reads.size() != 1) {
                     error("Error: Check your meta.single_end value. Single-end reads should contain one file only.")
                 }
-                return [ meta, reads ]
+                return [ meta, fasta, reads ]
             } else {
                 if (!(reads instanceof List) || reads.size() != 2) {
                     error("Error: Check your meta.single_end value. Paired-end reads should contain two files; a forward and a reverse.")
                 }
-                return [ meta, reads ]
+                return [ meta, fasta, reads ]
             }
         }
 
-    DEACON_INDEX ( ch_fasta )
+    // Extract unique reference fasta files and create fasta-specific metadata
+    // This ensures each unique reference is indexed only once
+    ch_unique_fastas = ch_reads
+        .map { _meta, fasta, _reads -> fasta }
+        .unique()
+        .map { fasta ->
+            def fasta_meta = [ id: fasta.baseName ]
+            [ fasta_meta, fasta ]
+        }
+
+    // Index unique FASTA files only
+    DEACON_INDEX ( ch_unique_fastas )
+
+    // Match indexes back to original samples
+    ch_indexes = DEACON_INDEX.out.index
+        .map { fasta_meta, index -> [ fasta_meta.id, index ] }
+    ch_reads_with_index = ch_reads
+        .map { meta, fasta, reads ->
+            [ fasta.baseName, meta, reads ]
+        }
+        .combine(ch_indexes, by: 0)
+        .map { _fasta_id, meta, reads, index ->
+            [ meta, index, reads ]
+        }
+
+    // Filter reads using the matched index
+    DEACON_FILTER(ch_reads_with_index)
 
-    DEACON_FILTER(DEACON_INDEX.out.index.join(ch_reads))
+    // TODO: optionally create output channel with indexes and their original sample-level metadata,
+    // this preserves the original behaviour of the workflow
+    // ch_index_with_meta = ch_reads_with_index
+    //     .map { meta, index, _reads -> [ meta, index ] }
 
     emit:
-    index          = DEACON_INDEX.out.index           // channel: [ val(meta), [ index ] ]
+    index          = DEACON_INDEX.out.index           // channel: [ val(meta), [ index ] ]  // TODO: optional emit ch_index_with_meta instead
     fastq_filtered = DEACON_FILTER.out.fastq_filtered // channel: [ val(meta), [ fastq ] ]
     summary        = DEACON_FILTER.out.log            // channel: [ val(meta), [ log ] ]
 }

From e784cbf54cdf0ebdeb611aac5566bc4e7feafd26 Mon Sep 17 00:00:00 2001
From: Pieter Moris <13552343+pmoris@users.noreply.github.com>
Date: Fri, 13 Feb 2026 14:01:45 +0100
Subject: [PATCH 2/3] Retain sample-level metadata in index output channel

This commit ensures that the sample-level metadata
associated with the sample reads is retained in the index
output channel. The baseName of the index - which itself
should be equal to the baseName of the used reference
fasta, or the chosen `ext.prefix` for the deacon/index module -
is added as an additional index_id key in the meta map.

Note that the index channel has the same length as the
number of input samples (i.e. it does not only contain
the unique indexes).
---
 .../nf-core/fastq_index_filter_deacon/main.nf    | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/subworkflows/nf-core/fastq_index_filter_deacon/main.nf b/subworkflows/nf-core/fastq_index_filter_deacon/main.nf
index d7a53c2f8af0..370cda8df5c3 100644
--- a/subworkflows/nf-core/fastq_index_filter_deacon/main.nf
+++ b/subworkflows/nf-core/fastq_index_filter_deacon/main.nf
@@ -30,8 +30,8 @@ workflow FASTQ_INDEX_FILTER_DEACON {
         .map { _meta, fasta, _reads -> fasta }
         .unique()
         .map { fasta ->
-            def fasta_meta = [ id: fasta.baseName ]
-            [ fasta_meta, fasta ]
+            def meta_fasta = [ id: fasta.baseName ]
+            [ meta_fasta, fasta ]
         }
 
     // Index unique FASTA files only
@@ -39,7 +39,7 @@ workflow FASTQ_INDEX_FILTER_DEACON {
 
     // Match indexes back to original samples
     ch_indexes = DEACON_INDEX.out.index
-        .map { fasta_meta, index -> [ fasta_meta.id, index ] }
+        .map { meta_fasta, index -> [ meta_fasta.id, index ] }
     ch_reads_with_index = ch_reads
         .map { meta, fasta, reads ->
             [ fasta.baseName, meta, reads ]
@@ -52,13 +52,13 @@ workflow FASTQ_INDEX_FILTER_DEACON {
     // Filter reads using the matched index
     DEACON_FILTER(ch_reads_with_index)
 
-    // TODO: optionally create output channel with indexes and their original sample-level metadata,
-    // this preserves the original behaviour of the workflow
-    // ch_index_with_meta = ch_reads_with_index
-    //     .map { meta, index, _reads -> [ meta, index ] }
+    // Create output channel with indexes, the original sample-level metadata and index id
+    ch_index_with_meta = ch_reads_with_index
+        .map { meta, index, _reads -> [ meta + [ index_id: index.baseName ], index ] }
+
 
     emit:
-    index          = DEACON_INDEX.out.index           // channel: [ val(meta), [ index ] ]  // TODO: optional emit ch_index_with_meta instead
+    index          = ch_index_with_meta               // channel: [ val(meta), [ index ] ]
     fastq_filtered = DEACON_FILTER.out.fastq_filtered // channel: [ val(meta), [ fastq ] ]
     summary        = DEACON_FILTER.out.log            // channel: [ val(meta), [ log ] ]
 }

From da400bcaa9e001f187140b9da690b1acbf522a26 Mon Sep 17 00:00:00 2001
From: Pieter Moris <13552343+pmoris@users.noreply.github.com>
Date: Fri, 13 Feb 2026 15:13:34 +0100
Subject: [PATCH 3/3] Update test snapshots

Snapshots needed to be updated because the
index output channel was updated with an additional
index_id key in its meta map.
---
 .../tests/main.nf.test.snap                   | 42 ++++++++++---------
 .../tests/main.nf.test.snap                   | 34 ++++++++-------
 2 files changed, 42 insertions(+), 34 deletions(-)

diff --git a/subworkflows/nf-core/fastq_decontaminate_deacon_hostile/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_decontaminate_deacon_hostile/tests/main.nf.test.snap
index adf3cc1a7c50..880efeccc7e7 100644
--- a/subworkflows/nf-core/fastq_decontaminate_deacon_hostile/tests/main.nf.test.snap
+++ b/subworkflows/nf-core/fastq_decontaminate_deacon_hostile/tests/main.nf.test.snap
@@ -14,18 +14,19 @@
                 [
                     {
                         "id": "test",
-                        "single_end": true
+                        "single_end": true,
+                        "index_id": "genome"
                     },
-                    "test.idx:md5,84e4985c91800686db9c9dca28fabd1a"
+                    "genome.idx:md5,84e4985c91800686db9c9dca28fabd1a"
                 ]
             ],
             "test.json"
         ],
+        "timestamp": "2026-02-16T13:18:48.758651665",
         "meta": {
-            "nf-test": "0.9.3",
+            "nf-test": "0.9.4",
             "nextflow": "25.10.2"
-        },
-        "timestamp": "2026-02-13T09:28:11.112943934"
+        }
     },
     "sarscov2 - fastq - single-end - hostile": {
         "content": [
@@ -53,11 +54,11 @@
             ],
             "test_single.json"
         ],
+        "timestamp": "2026-02-13T09:27:36.178537005",
         "meta": {
             "nf-test": "0.9.3",
             "nextflow": "25.10.2"
-        },
-        "timestamp": "2026-02-13T09:27:36.178537005"
+        }
     },
     "sarscov2 - fastq - paired-end - deacon": {
         "content": [
@@ -77,18 +78,19 @@
                 [
                     {
                         "id": "test",
-                        "single_end": false
+                        "single_end": false,
+                        "index_id": "genome"
                     },
-                    "test.idx:md5,84e4985c91800686db9c9dca28fabd1a"
+                    "genome.idx:md5,84e4985c91800686db9c9dca28fabd1a"
                 ]
             ],
             "test.json"
         ],
+        "timestamp": "2026-02-16T13:18:57.869797967",
         "meta": {
-            "nf-test": "0.9.3",
+            "nf-test": "0.9.4",
             "nextflow": "25.10.2"
-        },
-        "timestamp": "2026-02-13T09:28:24.987001016"
+        }
     },
     "sarscov2 - fastq - paired-end - hostile": {
         "content": [
@@ -119,11 +121,11 @@
             ],
             "test_paired.json"
         ],
+        "timestamp": "2026-02-13T09:27:57.460392559",
         "meta": {
             "nf-test": "0.9.3",
             "nextflow": "25.10.2"
-        },
-        "timestamp": "2026-02-13T09:27:57.460392559"
+        }
     },
     "sarscov2 - fastq - single-end - deacon - stub": {
         "content": [
@@ -147,7 +149,8 @@
                     [
                         {
                             "id": "test",
-                            "single_end": true
+                            "single_end": true,
+                            "index_id": "genome"
                         },
                         "genome.idx:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
@@ -174,7 +177,8 @@
                     [
                         {
                             "id": "test",
-                            "single_end": true
+                            "single_end": true,
+                            "index_id": "genome"
                         },
                         "genome.idx:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
@@ -196,10 +200,10 @@
                 ]
             }
         ],
+        "timestamp": "2026-02-16T13:19:06.320189196",
         "meta": {
-            "nf-test": "0.9.3",
+            "nf-test": "0.9.4",
             "nextflow": "25.10.2"
-        },
-        "timestamp": "2026-02-13T09:28:38.421681888"
+        }
     }
 }
\ No newline at end of file
diff --git a/subworkflows/nf-core/fastq_index_filter_deacon/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_index_filter_deacon/tests/main.nf.test.snap
index 3ebf4eb7742f..7e81cbfcffa6 100644
--- a/subworkflows/nf-core/fastq_index_filter_deacon/tests/main.nf.test.snap
+++ b/subworkflows/nf-core/fastq_index_filter_deacon/tests/main.nf.test.snap
@@ -6,7 +6,8 @@
                     [
                         {
                             "id": "test",
-                            "single_end": true
+                            "single_end": true,
+                            "index_id": "genome"
                         },
                         "genome.idx:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
@@ -42,7 +43,8 @@
                     [
                         {
                             "id": "test",
-                            "single_end": true
+                            "single_end": true,
+                            "index_id": "genome"
                         },
                         "genome.idx:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
@@ -58,11 +60,11 @@
                 ]
             }
         ],
+        "timestamp": "2026-02-13T15:10:09.178241099",
         "meta": {
-            "nf-test": "0.9.3",
+            "nf-test": "0.9.4",
             "nextflow": "25.10.2"
-        },
-        "timestamp": "2026-02-13T09:19:12.052209052"
+        }
     },
     "sarscov2 - fastq - paired-end": {
         "content": [
@@ -70,9 +72,10 @@
                 [
                     {
                         "id": "test",
-                        "single_end": false
+                        "single_end": false,
+                        "index_id": "genome"
                     },
-                    "test.idx:md5,84e4985c91800686db9c9dca28fabd1a"
+                    "genome.idx:md5,84e4985c91800686db9c9dca28fabd1a"
                 ]
             ],
             [
@@ -89,11 +92,11 @@
             ],
             "test.json"
         ],
+        "timestamp": "2026-02-13T15:09:59.820753976",
         "meta": {
-            "nf-test": "0.9.3",
+            "nf-test": "0.9.4",
             "nextflow": "25.10.2"
-        },
-        "timestamp": "2026-02-13T09:19:03.559444963"
+        }
     },
     "sarscov2 - fastq - single-end": {
         "content": [
@@ -101,9 +104,10 @@
                 [
                     {
                         "id": "test",
-                        "single_end": true
+                        "single_end": true,
+                        "index_id": "genome"
                     },
-                    "test.idx:md5,84e4985c91800686db9c9dca28fabd1a"
+                    "genome.idx:md5,84e4985c91800686db9c9dca28fabd1a"
                 ]
             ],
             [
@@ -117,10 +121,10 @@
             ],
             "test.json"
         ],
+        "timestamp": "2026-02-13T15:09:50.38883241",
         "meta": {
-            "nf-test": "0.9.3",
+            "nf-test": "0.9.4",
             "nextflow": "25.10.2"
-        },
-        "timestamp": "2026-02-13T09:18:55.835377502"
+        }
     }
 }
\ No newline at end of file