Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/nf-core/binette/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::binette=1.2.1
65 changes: 65 additions & 0 deletions modules/nf-core/binette/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
process BINETTE {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/de/de7fccc12dc09b996ec3b65df6060b4e3ad284088c5491bc3d97c582e8e7c3f6/data':
'community.wave.seqera.io/library/binette:1.2.1--cc07d41be4a5b0b2' }"

input:
tuple val(meta) , path(contig2bin), path(contigs), path(proteins)
tuple val(meta2), path(checkm2_db)
Comment thread
prototaxites marked this conversation as resolved.

output:
tuple val(meta), path("final_bins/*.fa.gz") , emit: final_bins
tuple val(meta), path("${prefix}.final_contig_to_bin.tsv") , emit: contig2bin
tuple val(meta), path("input_bins_quality_reports/*.tsv") , emit: input_bins_quality_reports
tuple val(meta), path("${prefix}.final_bins_quality_reports.tsv"), emit: final_bins_quality_report
tuple val("${task.process}"), val('binette'), eval("binette --version | sed 's/Binette //'"), topic: versions, emit: versions_binette

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def proteins_input = proteins ? "--proteins ${proteins}" : ""
"""
binette \\
--contig2bin_tables ${contig2bin} \\
--contigs ${contigs} \\
${proteins_input} \\
--checkm2_db ${checkm2_db} \\
--threads ${task.cpus} \\
--prefix ${prefix} \\
--outdir . \\
${args}
Comment thread
prototaxites marked this conversation as resolved.

find final_bins/ -maxdepth 1 -name "*.fa" -type f -exec gzip {} \\;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it a nf-core standard to compress fasta? I don't know if gz files are convenient for a regular user 🤔

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a general nf-core recommendation to gzip files (https://nf-co.re/docs/specifications/components/modules/general#compression-of-input-and-output-files), but perhaps more importantly all the other nf-core binning modules also write gzipped fasta so its consistent for downstream use.


find input_bins_quality_reports/ -maxdepth 1 -name "*.tsv" -type f | while read file; do
newname="input_bins_quality_reports/${prefix}.\$(basename "\$file")"
mv "\$file" "\$newname"
done

mv final_contig_to_bin.tsv ${prefix}.final_contig_to_bin.tsv
mv final_bins_quality_reports.tsv ${prefix}.final_bins_quality_reports.tsv
"""

stub:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
mkdir -p final_bins
mkdir -p input_bins_quality_reports

echo "" | gzip > final_bins/${prefix}_bin1.fa.gz
echo "" | gzip > final_bins/${prefix}_bin2.fa.gz

touch ${prefix}.final_contig_to_bin.tsv
touch ${prefix}.final_bins_quality_reports.tsv
touch input_bins_quality_reports/input_bins_1.concoct_bins.tsv
touch input_bins_quality_reports/input_bins_1.metabat2_bins.tsv
"""
}
129 changes: 129 additions & 0 deletions modules/nf-core/binette/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
name: binette
description: A fast and accurate binning refinement tool to construct high quality MAGs from the output of multiple binning tools.
keywords:
- binning
- refinement
- genomics
- metagenomics
- mag
tools:
- binette:
description: |
Binette is a fast and accurate binning refinement tool designed t
construct high-quality MAGs from the output of multiple binning tools.
homepage: https://binette.readthedocs.io
documentation: https://binette.readthedocs.io
tool_dev_url: https://github.com/genotoul-bioinfo/Binette
licence:
- GPL-3.0
input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- contig2bin:
type: file
description: Contig to bin assignment file
pattern: "*.tsv"
ontologies:
- edam: http://edamontology.org/format_3475 # TSV
- contigs:
type: file
description: Fasta file containing contigs
pattern: "*.{fa.gz,fas.gz,fasta.gz}"
ontologies:
- edam: http://edamontology.org/format_3475 # FASTA
- edam: http://edamontology.org/format_3989 # GZIP format
- proteins:
type: file
description: optional AA Fasta file containing contigs
pattern: "*.{fa.gz,fas.gz,fasta.gz}"
ontologies:
- edam: http://edamontology.org/format_3475 # FASTA
- edam: http://edamontology.org/format_3989 # GZIP format
- - meta2:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- checkm2_db:
type: file
description: CheckM2 DIAMOND database file.
ontologies: []
output:
final_bins:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "final_bins/*.fa.gz":
type: file
description: Refined bins in fasta format
pattern: "*.fa.gz"
ontologies:
- edam: http://edamontology.org/format_3475 # FASTA
- edam: http://edamontology.org/format_3989 # GZIP format
contig2bin:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "${prefix}.final_contig_to_bin.tsv":
type: file
description: Final contig to bin assignment file
pattern: "*.tsv"
ontologies:
- edam: http://edamontology.org/format_3475 # TSV
input_bins_quality_reports:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "input_bins_quality_reports/*.tsv":
type: file
description: Quality reports for input bins
pattern: "*.tsv"
ontologies:
- edam: http://edamontology.org/format_3475 # TSV
final_bins_quality_report:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "${prefix}.final_bins_quality_reports.tsv":
type: file
description: Quality report for final refined bins
pattern: "*.tsv"
ontologies:
- edam: http://edamontology.org/format_3475 # TSV
versions_binette:
- - "${task.process}":
type: string
description: The name of the process
- binette:
type: string
description: The name of the tool
- binette --version | sed 's/Binnette //':
type: eval
description: The expression to obtain the version of the tool
topics:
versions:
- - "${task.process}":
type: string
description: The name of the process
- binette:
type: string
description: The name of the tool
- binette --version | sed 's/Binnette //':
type: eval
description: The expression to obtain the version of the tool

authors:
- "@prototaxites"
maintainers:
- "@prototaxites"
108 changes: 108 additions & 0 deletions modules/nf-core/binette/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
nextflow_process {

name "Test Process BINETTE"
script "../main.nf"
process "BINETTE"

tag "modules"
tag "modules_nfcore"
tag "binette"

config './nextflow.config'

test("bacteroides_fragilis - genome") {

setup {
new File("${launchDir}/c2b_1.txt").text = """
NZ_CP069563.1\tbin1
NZ_CP069564.1\tbin2
""".stripIndent().trim()

new File("${launchDir}/c2b_2.txt").text = """
NZ_CP069563.1\tbin1
NZ_CP069564.1\tbin2
""".stripIndent().trim()

run("CHECKM2_DATABASEDOWNLOAD") {
script "../../checkm2/databasedownload/main.nf"
process {
"""
input[0] = []
"""
}
}
}

when {

params {
binette_args = "--low_mem"
}

process {
"""
input[0] = [
[ id:'test' ],
[file("${launchDir}/c2b_1.txt"), file("${launchDir}/c2b_2.txt")],
file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz"),
[]
]
input[1] = CHECKM2_DATABASEDOWNLOAD.out.database
"""
}
}

then {
assert process.success
assertAll(
{ assert snapshot(sanitizeOutput(process.out)).match() }
)
}

}

test("bacteroides_fragilis - stub") {

options "-stub"

setup {
new File("${launchDir}/c2b_1.txt").text = """
NZ_CP069563.1\tbin1
NZ_CP069564.1\tbin2
""".stripIndent().trim()

new File("${launchDir}/c2b_2.txt").text = """
NZ_CP069563.1\tbin1
NZ_CP069564.1\tbin2
""".stripIndent().trim()
}

when {

params {
binette_args = "--low_mem"
}

process {
"""
input[0] = [
[ id:'test' ],
[file("${launchDir}/c2b_1.txt"), file("${launchDir}/c2b_2.txt")],
file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz"),
[]
]
input[1] = [[:],[]]
"""
}
}

then {
assert process.success
assertAll(
{ assert snapshot(sanitizeOutput(process.out)).match() }
)
}

}

}
Loading
Loading