From e61b84ff4960c14190da531d517430842a9ab2cc Mon Sep 17 00:00:00 2001 From: Jim Downie Date: Thu, 7 May 2026 15:40:21 +0100 Subject: [PATCH 1/2] feat: begin writing binette --- modules/nf-core/binette/environment.yml | 7 ++ modules/nf-core/binette/main.nf | 64 ++++++++++++++++++ modules/nf-core/binette/meta.yml | 66 +++++++++++++++++++ modules/nf-core/binette/tests/main.nf.test | 75 ++++++++++++++++++++++ 4 files changed, 212 insertions(+) create mode 100644 modules/nf-core/binette/environment.yml create mode 100644 modules/nf-core/binette/main.nf create mode 100644 modules/nf-core/binette/meta.yml create mode 100644 modules/nf-core/binette/tests/main.nf.test diff --git a/modules/nf-core/binette/environment.yml b/modules/nf-core/binette/environment.yml new file mode 100644 index 00000000000..402e0b37ff5 --- /dev/null +++ b/modules/nf-core/binette/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::binette=1.2.1 diff --git a/modules/nf-core/binette/main.nf b/modules/nf-core/binette/main.nf new file mode 100644 index 00000000000..e38d89d338c --- /dev/null +++ b/modules/nf-core/binette/main.nf @@ -0,0 +1,64 @@ +process BINETTE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? + 'community.wave.seqera.io/library/binette:1.2.1--cc07d41be4a5b0b2': + 'quay.io/biocontainers/YOUR-TOOL-HERE' }" + + input: + tuple val(meta), path(contig2bin), path(contigs) + tuple val(meta), path(checkm2_db) + + output: + tuple val(meta), path("final_bins/*.fa.gz") , emit: final_bins + tuple val(meta), path("${prefix}.final_contig_to_bin.tsv") , emit: contig2bin + tuple val(meta), path("input_bins_quality_reports/*.tsv") , emit: input_bins_quality_reports + tuple val(meta), path("${prefix}.final_bins_quality_reports.tsv"), emit: final_bins_quality_report + tuple val("${task.process}"), val('binette'), eval("binette --version"), topic: versions, emit: versions_binette + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + binette \\ + --contig2bin_tables ${contig2bin} \\ + --contigs ${contigs} \\ + --outdir . \\ + ${args} + + find final_bins/ -maxdepth 1 -name "*.fa" -type f | while read file; do + newname="final_bins/${prefix}.\$(basename "\$file")" + mv "\$file" "\$newname" + gzip "\$newname" + done + + find input_bins_quality_reports/ -maxdepth 1 -name "*.tsv" -type f | while read file; do + newname="final_bins/${prefix}.\$(basename "\$file")" + mv "\$file" "\$newname" + done + + mv final_contig_to_bin.tsv ${prefix}/${prefix}.final_contig_to_bin.tsv + mv final_bins_quality_reports.tsv ${prefix}/${prefix}.final_bins_quality_reports.tsv + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p final_bins + mkdir -p input_bins_quality_reports + + echo "" | gzip > final_bins/${prefix}.binette_bin1.fa.gz + echo "" | gzip > final_bins/${prefix}.binette_bin2.fa.gz + + touch ${prefix}/${prefix}.final_contig_to_bin.tsv + touch ${prefix}/${prefix}.final_bins_quality_reports.tsv + touch ${prefix}/input_bins_quality_reports/input_bins_1.concoct_bins.tsv + touch ${prefix}/input_bins_quality_reports/input_bins_1.metabat2_bins.tsv + """ +} diff --git a/modules/nf-core/binette/meta.yml b/modules/nf-core/binette/meta.yml new file mode 100644 index 00000000000..cf5ceb8aade --- /dev/null +++ b/modules/nf-core/binette/meta.yml @@ -0,0 +1,66 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "binette" +description: write your description here +keywords: + - sort + - example + - genomics +tools: + - "binette": + description: "A fast and accurate binning refinement tool to constructs high quality + MAGs from the output of multiple binning tools." + homepage: "https://binette.readthedocs.io" + documentation: "https://binette.readthedocs.io" + tool_dev_url: "None" + doi: "" + licence: ["GPL v3"] + identifier: biotools:binette + +input: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ + id:'sample1' ]` + - input: + type: file + description: "" + pattern: "" + ontologies: + - edam: "" +output: + output: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ + id:'sample1' ]` + - "*": + type: file + description: "" + pattern: "" + ontologies: + - edam: "" + versions_binette: + - - ${task.process}: + type: string + description: The name of the process + - binette: + type: string + description: The name of the tool + - binette --version: + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - binette: + type: string + description: The name of the tool + - binette --version: + type: eval + description: The expression to obtain the version of the tool +authors: + - "@prototaxites" +maintainers: + - "@prototaxites" diff --git a/modules/nf-core/binette/tests/main.nf.test b/modules/nf-core/binette/tests/main.nf.test new file mode 100644 index 00000000000..6e4d2859663 --- /dev/null +++ b/modules/nf-core/binette/tests/main.nf.test @@ -0,0 +1,75 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core modules test binette +nextflow_process { + + name "Test Process BINETTE" + script "../main.nf" + process "BINETTE" + + tag "modules" + tag "modules_nfcore" + tag "binette" + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used + test("sarscov2 - bam") { + + // TODO nf-core: If you are created a test for a chained module + // (the module requires running more than one process to generate the required output) + // add the 'setup' method here. + // You can find more information about how to use a 'setup' method in the nf-test docs (https://www.nf-test.com/docs/testcases/setup/). + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + //TODO nf-core: Add all required assertions to verify the test output. + // See https://nf-co.re/docs/developing/testing/assertions for more information and examples. + ) + } + + } + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match() } + ) + } + + } + +} From ca5457095c8b41176fe61141bc29d74f4ee39ebc Mon Sep 17 00:00:00 2001 From: Jim Downie Date: Thu, 7 May 2026 16:58:04 +0100 Subject: [PATCH 2/2] feat: add binette --- modules/nf-core/binette/main.nf | 41 +++--- modules/nf-core/binette/meta.yml | 127 +++++++++++++----- modules/nf-core/binette/tests/main.nf.test | 79 +++++++---- .../nf-core/binette/tests/main.nf.test.snap | 108 +++++++++++++++ modules/nf-core/binette/tests/nextflow.config | 5 + 5 files changed, 285 insertions(+), 75 deletions(-) create mode 100644 modules/nf-core/binette/tests/main.nf.test.snap create mode 100644 modules/nf-core/binette/tests/nextflow.config diff --git a/modules/nf-core/binette/main.nf b/modules/nf-core/binette/main.nf index e38d89d338c..f58c7559c8b 100644 --- a/modules/nf-core/binette/main.nf +++ b/modules/nf-core/binette/main.nf @@ -4,19 +4,19 @@ process BINETTE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? - 'community.wave.seqera.io/library/binette:1.2.1--cc07d41be4a5b0b2': - 'quay.io/biocontainers/YOUR-TOOL-HERE' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/de/de7fccc12dc09b996ec3b65df6060b4e3ad284088c5491bc3d97c582e8e7c3f6/data': + 'community.wave.seqera.io/library/binette:1.2.1--cc07d41be4a5b0b2' }" input: - tuple val(meta), path(contig2bin), path(contigs) - tuple val(meta), path(checkm2_db) + tuple val(meta) , path(contig2bin), path(contigs), path(proteins) + tuple val(meta2), path(checkm2_db) output: tuple val(meta), path("final_bins/*.fa.gz") , emit: final_bins tuple val(meta), path("${prefix}.final_contig_to_bin.tsv") , emit: contig2bin tuple val(meta), path("input_bins_quality_reports/*.tsv") , emit: input_bins_quality_reports tuple val(meta), path("${prefix}.final_bins_quality_reports.tsv"), emit: final_bins_quality_report - tuple val("${task.process}"), val('binette'), eval("binette --version"), topic: versions, emit: versions_binette + tuple val("${task.process}"), val('binette'), eval("binette --version | sed 's/Binette //'"), topic: versions, emit: versions_binette when: task.ext.when == null || task.ext.when @@ -24,41 +24,42 @@ process BINETTE { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" + def proteins_input = proteins ? "--proteins ${proteins}" : "" """ binette \\ --contig2bin_tables ${contig2bin} \\ --contigs ${contigs} \\ + ${proteins_input} \\ + --checkm2_db ${checkm2_db} \\ + --threads ${task.cpus} \\ + --prefix ${prefix} \\ --outdir . \\ ${args} - find final_bins/ -maxdepth 1 -name "*.fa" -type f | while read file; do - newname="final_bins/${prefix}.\$(basename "\$file")" - mv "\$file" "\$newname" - gzip "\$newname" - done + find final_bins/ -maxdepth 1 -name "*.fa" -type f -exec gzip {} \\; find input_bins_quality_reports/ -maxdepth 1 -name "*.tsv" -type f | while read file; do - newname="final_bins/${prefix}.\$(basename "\$file")" + newname="input_bins_quality_reports/${prefix}.\$(basename "\$file")" mv "\$file" "\$newname" done - mv final_contig_to_bin.tsv ${prefix}/${prefix}.final_contig_to_bin.tsv - mv final_bins_quality_reports.tsv ${prefix}/${prefix}.final_bins_quality_reports.tsv + mv final_contig_to_bin.tsv ${prefix}.final_contig_to_bin.tsv + mv final_bins_quality_reports.tsv ${prefix}.final_bins_quality_reports.tsv """ stub: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" """ mkdir -p final_bins mkdir -p input_bins_quality_reports - echo "" | gzip > final_bins/${prefix}.binette_bin1.fa.gz - echo "" | gzip > final_bins/${prefix}.binette_bin2.fa.gz + echo "" | gzip > final_bins/${prefix}_bin1.fa.gz + echo "" | gzip > final_bins/${prefix}_bin2.fa.gz - touch ${prefix}/${prefix}.final_contig_to_bin.tsv - touch ${prefix}/${prefix}.final_bins_quality_reports.tsv - touch ${prefix}/input_bins_quality_reports/input_bins_1.concoct_bins.tsv - touch ${prefix}/input_bins_quality_reports/input_bins_1.metabat2_bins.tsv + touch ${prefix}.final_contig_to_bin.tsv + touch ${prefix}.final_bins_quality_reports.tsv + touch input_bins_quality_reports/input_bins_1.concoct_bins.tsv + touch input_bins_quality_reports/input_bins_1.metabat2_bins.tsv """ } diff --git a/modules/nf-core/binette/meta.yml b/modules/nf-core/binette/meta.yml index cf5ceb8aade..fa638ce5338 100644 --- a/modules/nf-core/binette/meta.yml +++ b/modules/nf-core/binette/meta.yml @@ -1,65 +1,128 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "binette" -description: write your description here +name: binette +description: A fast and accurate binning refinement tool to construct high quality MAGs from the output of multiple binning tools. keywords: - - sort - - example + - binning + - refinement - genomics + - metagenomics + - mag tools: - - "binette": - description: "A fast and accurate binning refinement tool to constructs high quality - MAGs from the output of multiple binning tools." - homepage: "https://binette.readthedocs.io" - documentation: "https://binette.readthedocs.io" - tool_dev_url: "None" - doi: "" - licence: ["GPL v3"] - identifier: biotools:binette - + - binette: + description: | + Binette is a fast and accurate binning refinement tool designed t + construct high-quality MAGs from the output of multiple binning tools. + homepage: https://binette.readthedocs.io + documentation: https://binette.readthedocs.io + tool_dev_url: https://github.com/genotoul-bioinfo/Binette + licence: + - GPL-3.0 input: - - meta: type: map - description: Groovy Map containing sample information. e.g. `[ - id:'sample1' ]` - - input: + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - contig2bin: + type: file + description: Contig to bin assignment file + pattern: "*.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - contigs: + type: file + description: Fasta file containing contigs + pattern: "*.{fa.gz,fas.gz,fasta.gz}" + ontologies: + - edam: http://edamontology.org/format_3475 # FASTA + - edam: http://edamontology.org/format_3989 # GZIP format + - proteins: type: file - description: "" - pattern: "" + description: optional AA Fasta file containing contigs + pattern: "*.{fa.gz,fas.gz,fasta.gz}" ontologies: - - edam: "" + - edam: http://edamontology.org/format_3475 # FASTA + - edam: http://edamontology.org/format_3989 # GZIP format + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - checkm2_db: + type: file + description: CheckM2 DIAMOND database file. + ontologies: [] output: - output: + final_bins: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "final_bins/*.fa.gz": + type: file + description: Refined bins in fasta format + pattern: "*.fa.gz" + ontologies: + - edam: http://edamontology.org/format_3475 # FASTA + - edam: http://edamontology.org/format_3989 # GZIP format + contig2bin: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.final_contig_to_bin.tsv": + type: file + description: Final contig to bin assignment file + pattern: "*.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + input_bins_quality_reports: - - meta: type: map - description: Groovy Map containing sample information. e.g. `[ - id:'sample1' ]` - - "*": + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "input_bins_quality_reports/*.tsv": type: file - description: "" - pattern: "" + description: Quality reports for input bins + pattern: "*.tsv" ontologies: - - edam: "" + - edam: http://edamontology.org/format_3475 # TSV + final_bins_quality_report: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.final_bins_quality_reports.tsv": + type: file + description: Quality report for final refined bins + pattern: "*.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV versions_binette: - - - ${task.process}: + - - "${task.process}": type: string description: The name of the process - binette: type: string description: The name of the tool - - binette --version: + - binette --version | sed 's/Binnette //': type: eval description: The expression to obtain the version of the tool topics: versions: - - - ${task.process}: + - - "${task.process}": type: string description: The name of the process - binette: type: string description: The name of the tool - - binette --version: + - binette --version | sed 's/Binnette //': type: eval description: The expression to obtain the version of the tool + authors: - "@prototaxites" maintainers: diff --git a/modules/nf-core/binette/tests/main.nf.test b/modules/nf-core/binette/tests/main.nf.test index 6e4d2859663..553b34c5e84 100644 --- a/modules/nf-core/binette/tests/main.nf.test +++ b/modules/nf-core/binette/tests/main.nf.test @@ -1,5 +1,3 @@ -// TODO nf-core: Once you have added the required tests, please run the following command to build this file: -// nf-core modules test binette nextflow_process { name "Test Process BINETTE" @@ -10,23 +8,46 @@ nextflow_process { tag "modules_nfcore" tag "binette" - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used - test("sarscov2 - bam") { + config './nextflow.config' - // TODO nf-core: If you are created a test for a chained module - // (the module requires running more than one process to generate the required output) - // add the 'setup' method here. - // You can find more information about how to use a 'setup' method in the nf-test docs (https://www.nf-test.com/docs/testcases/setup/). + test("bacteroides_fragilis - genome") { + + setup { + new File("${launchDir}/c2b_1.txt").text = """ + NZ_CP069563.1\tbin1 + NZ_CP069564.1\tbin2 + """.stripIndent().trim() + + new File("${launchDir}/c2b_2.txt").text = """ + NZ_CP069563.1\tbin1 + NZ_CP069564.1\tbin2 + """.stripIndent().trim() + + run("CHECKM2_DATABASEDOWNLOAD") { + script "../../checkm2/databasedownload/main.nf" + process { + """ + input[0] = [] + """ + } + } + } when { + + params { + binette_args = "--low_mem" + } + process { """ - // TODO nf-core: define inputs of the process here. Example: - input[0] = [ [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [file("${launchDir}/c2b_1.txt"), file("${launchDir}/c2b_2.txt")], + file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz"), + [] ] + input[1] = CHECKM2_DATABASEDOWNLOAD.out.database """ } } @@ -34,31 +55,43 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot( - process.out, - process.out.findAll { key, val -> key.startsWith('versions') } - ).match() } - //TODO nf-core: Add all required assertions to verify the test output. - // See https://nf-co.re/docs/developing/testing/assertions for more information and examples. + { assert snapshot(sanitizeOutput(process.out)).match() } ) } } - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. - test("sarscov2 - bam - stub") { + test("bacteroides_fragilis - stub") { options "-stub" + setup { + new File("${launchDir}/c2b_1.txt").text = """ + NZ_CP069563.1\tbin1 + NZ_CP069564.1\tbin2 + """.stripIndent().trim() + + new File("${launchDir}/c2b_2.txt").text = """ + NZ_CP069563.1\tbin1 + NZ_CP069564.1\tbin2 + """.stripIndent().trim() + } + when { + + params { + binette_args = "--low_mem" + } + process { """ - // TODO nf-core: define inputs of the process here. Example: - input[0] = [ [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [file("${launchDir}/c2b_1.txt"), file("${launchDir}/c2b_2.txt")], + file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz"), + [] ] + input[1] = [[:],[]] """ } } @@ -66,7 +99,7 @@ nextflow_process { then { assert process.success assertAll( - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } diff --git a/modules/nf-core/binette/tests/main.nf.test.snap b/modules/nf-core/binette/tests/main.nf.test.snap new file mode 100644 index 00000000000..c743ae71831 --- /dev/null +++ b/modules/nf-core/binette/tests/main.nf.test.snap @@ -0,0 +1,108 @@ +{ + "bacteroides_fragilis - stub": { + "content": [ + { + "contig2bin": [ + [ + { + "id": "test" + }, + "test.final_contig_to_bin.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "final_bins": [ + [ + { + "id": "test" + }, + [ + "test_bin1.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_bin2.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "final_bins_quality_report": [ + [ + { + "id": "test" + }, + "test.final_bins_quality_reports.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "input_bins_quality_reports": [ + [ + { + "id": "test" + }, + [ + "input_bins_1.concoct_bins.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "input_bins_1.metabat2_bins.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_binette": [ + [ + "BINETTE", + "binette", + "1.2.1" + ] + ] + } + ], + "timestamp": "2026-05-07T16:52:53.967477", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "bacteroides_fragilis - genome": { + "content": [ + { + "contig2bin": [ + [ + { + "id": "test" + }, + "test.final_contig_to_bin.tsv:md5,f6506570e4d66c1337696ceeb773272a" + ] + ], + "final_bins": [ + [ + { + "id": "test" + }, + "test_bin1.fa.gz:md5,7ccb4853c0a27ebd51417d3eb2aacc45" + ] + ], + "final_bins_quality_report": [ + [ + { + "id": "test" + }, + "test.final_bins_quality_reports.tsv:md5,3bc09992b65e822869fae102e173cd1d" + ] + ], + "input_bins_quality_reports": [ + [ + { + "id": "test" + }, + "test.input_bins_1.c2b_1.tsv:md5,49ac065431675238d99c279599d12b1e" + ] + ], + "versions_binette": [ + [ + "BINETTE", + "binette", + "1.2.1" + ] + ] + } + ], + "timestamp": "2026-05-07T16:55:56.111876", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/binette/tests/nextflow.config b/modules/nf-core/binette/tests/nextflow.config new file mode 100644 index 00000000000..6a21244aacc --- /dev/null +++ b/modules/nf-core/binette/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: BINETTE { + ext.args = { "${params.binette_args}" } + } +}