-
Notifications
You must be signed in to change notification settings - Fork 1k
Add Binette #11563
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Add Binette #11563
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| --- | ||
| # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
| channels: | ||
| - conda-forge | ||
| - bioconda | ||
| dependencies: | ||
| - bioconda::binette=1.2.1 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,65 @@ | ||
| process BINETTE { | ||
| tag "$meta.id" | ||
| label 'process_medium' | ||
|
|
||
| conda "${moduleDir}/environment.yml" | ||
| container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? | ||
| 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/de/de7fccc12dc09b996ec3b65df6060b4e3ad284088c5491bc3d97c582e8e7c3f6/data': | ||
| 'community.wave.seqera.io/library/binette:1.2.1--cc07d41be4a5b0b2' }" | ||
|
|
||
| input: | ||
| tuple val(meta) , path(contig2bin), path(contigs), path(proteins) | ||
| tuple val(meta2), path(checkm2_db) | ||
|
|
||
| output: | ||
| tuple val(meta), path("final_bins/*.fa.gz") , emit: final_bins | ||
| tuple val(meta), path("${prefix}.final_contig_to_bin.tsv") , emit: contig2bin | ||
| tuple val(meta), path("input_bins_quality_reports/*.tsv") , emit: input_bins_quality_reports | ||
| tuple val(meta), path("${prefix}.final_bins_quality_reports.tsv"), emit: final_bins_quality_report | ||
| tuple val("${task.process}"), val('binette'), eval("binette --version | sed 's/Binette //'"), topic: versions, emit: versions_binette | ||
|
|
||
| when: | ||
| task.ext.when == null || task.ext.when | ||
|
|
||
| script: | ||
| def args = task.ext.args ?: '' | ||
| prefix = task.ext.prefix ?: "${meta.id}" | ||
| def proteins_input = proteins ? "--proteins ${proteins}" : "" | ||
| """ | ||
| binette \\ | ||
| --contig2bin_tables ${contig2bin} \\ | ||
| --contigs ${contigs} \\ | ||
| ${proteins_input} \\ | ||
| --checkm2_db ${checkm2_db} \\ | ||
| --threads ${task.cpus} \\ | ||
| --prefix ${prefix} \\ | ||
| --outdir . \\ | ||
| ${args} | ||
|
prototaxites marked this conversation as resolved.
|
||
|
|
||
| find final_bins/ -maxdepth 1 -name "*.fa" -type f -exec gzip {} \\; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it a nf-core standard to compress fasta? I don't know if gz files are convenient for a regular user 🤔
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a general nf-core recommendation to gzip files (https://nf-co.re/docs/specifications/components/modules/general#compression-of-input-and-output-files), but perhaps more importantly all the other nf-core binning modules also write gzipped fasta so its consistent for downstream use. |
||
|
|
||
| find input_bins_quality_reports/ -maxdepth 1 -name "*.tsv" -type f | while read file; do | ||
| newname="input_bins_quality_reports/${prefix}.\$(basename "\$file")" | ||
| mv "\$file" "\$newname" | ||
| done | ||
|
|
||
| mv final_contig_to_bin.tsv ${prefix}.final_contig_to_bin.tsv | ||
| mv final_bins_quality_reports.tsv ${prefix}.final_bins_quality_reports.tsv | ||
| """ | ||
|
|
||
| stub: | ||
| def args = task.ext.args ?: '' | ||
| prefix = task.ext.prefix ?: "${meta.id}" | ||
| """ | ||
| mkdir -p final_bins | ||
| mkdir -p input_bins_quality_reports | ||
|
|
||
| echo "" | gzip > final_bins/${prefix}_bin1.fa.gz | ||
| echo "" | gzip > final_bins/${prefix}_bin2.fa.gz | ||
|
|
||
| touch ${prefix}.final_contig_to_bin.tsv | ||
| touch ${prefix}.final_bins_quality_reports.tsv | ||
| touch input_bins_quality_reports/input_bins_1.concoct_bins.tsv | ||
| touch input_bins_quality_reports/input_bins_1.metabat2_bins.tsv | ||
| """ | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,129 @@ | ||
| name: binette | ||
| description: A fast and accurate binning refinement tool to construct high quality MAGs from the output of multiple binning tools. | ||
| keywords: | ||
| - binning | ||
| - refinement | ||
| - genomics | ||
| - metagenomics | ||
| - mag | ||
| tools: | ||
| - binette: | ||
| description: | | ||
| Binette is a fast and accurate binning refinement tool designed t | ||
| construct high-quality MAGs from the output of multiple binning tools. | ||
| homepage: https://binette.readthedocs.io | ||
| documentation: https://binette.readthedocs.io | ||
| tool_dev_url: https://github.com/genotoul-bioinfo/Binette | ||
| licence: | ||
| - GPL-3.0 | ||
| input: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. [ id:'test', single_end:false ] | ||
| - contig2bin: | ||
| type: file | ||
| description: Contig to bin assignment file | ||
| pattern: "*.tsv" | ||
| ontologies: | ||
| - edam: http://edamontology.org/format_3475 # TSV | ||
| - contigs: | ||
| type: file | ||
| description: Fasta file containing contigs | ||
| pattern: "*.{fa.gz,fas.gz,fasta.gz}" | ||
| ontologies: | ||
| - edam: http://edamontology.org/format_3475 # FASTA | ||
| - edam: http://edamontology.org/format_3989 # GZIP format | ||
| - proteins: | ||
| type: file | ||
| description: optional AA Fasta file containing contigs | ||
| pattern: "*.{fa.gz,fas.gz,fasta.gz}" | ||
| ontologies: | ||
| - edam: http://edamontology.org/format_3475 # FASTA | ||
| - edam: http://edamontology.org/format_3989 # GZIP format | ||
| - - meta2: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. [ id:'test', single_end:false ] | ||
| - checkm2_db: | ||
| type: file | ||
| description: CheckM2 DIAMOND database file. | ||
| ontologies: [] | ||
| output: | ||
| final_bins: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. [ id:'test', single_end:false ] | ||
| - "final_bins/*.fa.gz": | ||
| type: file | ||
| description: Refined bins in fasta format | ||
| pattern: "*.fa.gz" | ||
| ontologies: | ||
| - edam: http://edamontology.org/format_3475 # FASTA | ||
| - edam: http://edamontology.org/format_3989 # GZIP format | ||
| contig2bin: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. [ id:'test', single_end:false ] | ||
| - "${prefix}.final_contig_to_bin.tsv": | ||
| type: file | ||
| description: Final contig to bin assignment file | ||
| pattern: "*.tsv" | ||
| ontologies: | ||
| - edam: http://edamontology.org/format_3475 # TSV | ||
| input_bins_quality_reports: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. [ id:'test', single_end:false ] | ||
| - "input_bins_quality_reports/*.tsv": | ||
| type: file | ||
| description: Quality reports for input bins | ||
| pattern: "*.tsv" | ||
| ontologies: | ||
| - edam: http://edamontology.org/format_3475 # TSV | ||
| final_bins_quality_report: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. [ id:'test', single_end:false ] | ||
| - "${prefix}.final_bins_quality_reports.tsv": | ||
| type: file | ||
| description: Quality report for final refined bins | ||
| pattern: "*.tsv" | ||
| ontologies: | ||
| - edam: http://edamontology.org/format_3475 # TSV | ||
| versions_binette: | ||
| - - "${task.process}": | ||
| type: string | ||
| description: The name of the process | ||
| - binette: | ||
| type: string | ||
| description: The name of the tool | ||
| - binette --version | sed 's/Binnette //': | ||
| type: eval | ||
| description: The expression to obtain the version of the tool | ||
| topics: | ||
| versions: | ||
| - - "${task.process}": | ||
| type: string | ||
| description: The name of the process | ||
| - binette: | ||
| type: string | ||
| description: The name of the tool | ||
| - binette --version | sed 's/Binnette //': | ||
| type: eval | ||
| description: The expression to obtain the version of the tool | ||
|
|
||
| authors: | ||
| - "@prototaxites" | ||
| maintainers: | ||
| - "@prototaxites" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,108 @@ | ||
| nextflow_process { | ||
|
|
||
| name "Test Process BINETTE" | ||
| script "../main.nf" | ||
| process "BINETTE" | ||
|
|
||
| tag "modules" | ||
| tag "modules_nfcore" | ||
| tag "binette" | ||
|
|
||
| config './nextflow.config' | ||
|
|
||
| test("bacteroides_fragilis - genome") { | ||
|
|
||
| setup { | ||
| new File("${launchDir}/c2b_1.txt").text = """ | ||
| NZ_CP069563.1\tbin1 | ||
| NZ_CP069564.1\tbin2 | ||
| """.stripIndent().trim() | ||
|
|
||
| new File("${launchDir}/c2b_2.txt").text = """ | ||
| NZ_CP069563.1\tbin1 | ||
| NZ_CP069564.1\tbin2 | ||
| """.stripIndent().trim() | ||
|
|
||
| run("CHECKM2_DATABASEDOWNLOAD") { | ||
| script "../../checkm2/databasedownload/main.nf" | ||
| process { | ||
| """ | ||
| input[0] = [] | ||
| """ | ||
| } | ||
| } | ||
| } | ||
|
|
||
| when { | ||
|
|
||
| params { | ||
| binette_args = "--low_mem" | ||
| } | ||
|
|
||
| process { | ||
| """ | ||
| input[0] = [ | ||
| [ id:'test' ], | ||
| [file("${launchDir}/c2b_1.txt"), file("${launchDir}/c2b_2.txt")], | ||
| file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz"), | ||
| [] | ||
| ] | ||
| input[1] = CHECKM2_DATABASEDOWNLOAD.out.database | ||
| """ | ||
| } | ||
| } | ||
|
|
||
| then { | ||
| assert process.success | ||
| assertAll( | ||
| { assert snapshot(sanitizeOutput(process.out)).match() } | ||
| ) | ||
| } | ||
|
|
||
| } | ||
|
|
||
| test("bacteroides_fragilis - stub") { | ||
|
|
||
| options "-stub" | ||
|
|
||
| setup { | ||
| new File("${launchDir}/c2b_1.txt").text = """ | ||
| NZ_CP069563.1\tbin1 | ||
| NZ_CP069564.1\tbin2 | ||
| """.stripIndent().trim() | ||
|
|
||
| new File("${launchDir}/c2b_2.txt").text = """ | ||
| NZ_CP069563.1\tbin1 | ||
| NZ_CP069564.1\tbin2 | ||
| """.stripIndent().trim() | ||
| } | ||
|
|
||
| when { | ||
|
|
||
| params { | ||
| binette_args = "--low_mem" | ||
| } | ||
|
|
||
| process { | ||
| """ | ||
| input[0] = [ | ||
| [ id:'test' ], | ||
| [file("${launchDir}/c2b_1.txt"), file("${launchDir}/c2b_2.txt")], | ||
| file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz"), | ||
| [] | ||
| ] | ||
| input[1] = [[:],[]] | ||
| """ | ||
| } | ||
| } | ||
|
|
||
| then { | ||
| assert process.success | ||
| assertAll( | ||
| { assert snapshot(sanitizeOutput(process.out)).match() } | ||
| ) | ||
| } | ||
|
|
||
| } | ||
|
|
||
| } |
Uh oh!
There was an error while loading. Please reload this page.