diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/environment.yml b/modules/nf-core/mmseqs/makepaddedseqdb/environment.yml new file mode 100644 index 000000000000..072223f2aace --- /dev/null +++ b/modules/nf-core/mmseqs/makepaddedseqdb/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::mmseqs2=18.8cc5c diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/main.nf b/modules/nf-core/mmseqs/makepaddedseqdb/main.nf new file mode 100644 index 000000000000..61857c179f67 --- /dev/null +++ b/modules/nf-core/mmseqs/makepaddedseqdb/main.nf @@ -0,0 +1,51 @@ +process MMSEQS_MAKEPADDEDSEQDB { + tag "${meta.id}" + label 'process_low' + conda "${moduleDir}/environment.yml" + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/fe/fe49c17754753d6cd9a31e5894117edaf1c81e3d6053a12bf6dc8f3af1dffe23/data' + : 'community.wave.seqera.io/library/mmseqs2:18.8cc5c--af05c9a98d9f6139'}" + + input: + tuple val(meta), path(db_in) + + output: + tuple val(meta), path("${prefix}/"), emit: db_padded + tuple val("${task.process}"), val('mmseqs'), eval('mmseqs version'), topic: versions, emit: versions_mmseqs + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '*.dbtype' + prefix = task.ext.prefix ?: "${meta.id}" + if ("${db_in}" == "${prefix}") { + error("Input and output names of databases are the same, set prefix in module configuration to disambiguate!") + } + """ + DB_TARGET_PATH_NAME=\$(find -L "${db_in}/" -maxdepth 1 -name "${args2}" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) + mkdir -p ${prefix} + mmseqs \\ + makepaddedseqdb \\ + \$DB_TARGET_PATH_NAME \\ + ${prefix}/${prefix} \\ + ${args} + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo ${args} + mkdir -p ${prefix} + touch ${prefix}/${prefix} + touch ${prefix}/${prefix}.dbtype + touch ${prefix}/${prefix}.index + touch ${prefix}/${prefix}.lookup + touch ${prefix}/${prefix}_h + touch ${prefix}/${prefix}_h.dbtype + touch ${prefix}/${prefix}_h.index + """ +} diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/meta.yml b/modules/nf-core/mmseqs/makepaddedseqdb/meta.yml new file mode 100644 index 000000000000..8858d9bb9908 --- /dev/null +++ b/modules/nf-core/mmseqs/makepaddedseqdb/meta.yml @@ -0,0 +1,64 @@ +name: "mmseqs_makepaddedseqdb" +description: Create an MMseqs padded database from an existing MMseqs database +keywords: + - protein sequence + - databases + - clustering + - searching + - indexing + - mmseqs2 +tools: + - "mmseqs": + description: "MMseqs2: ultra fast and sensitive sequence search and clustering + suite" + homepage: "https://github.com/soedinglab/MMseqs2" + documentation: "https://mmseqs.com/latest/userguide.pdf" + tool_dev_url: "https://github.com/soedinglab/MMseqs2" + doi: "10.1093/bioinformatics/btw006" + licence: + - "GPL v3" + identifier: biotools:mmseqs +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - db_in: + type: directory + description: Input of existing MMseqs database +output: + db_padded: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "${prefix}/": + type: directory + description: The padded MMseqs2 database + versions_mmseqs: + - - ${task.process}: + type: string + description: The name of the process + - mmseqs: + type: string + description: The name of the tool + - mmseqs version: + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - mmseqs: + type: string + description: The name of the tool + - mmseqs version: + type: eval + description: The expression to obtain the version of the tool +authors: + - "@nbtm-sh" +maintainers: + - "@nbtm-sh" diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test b/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test new file mode 100644 index 000000000000..6c3ae4b7ea60 --- /dev/null +++ b/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test @@ -0,0 +1,49 @@ +nextflow_process { + + name "Test Process MMSEQS_MAKEPADDEDSEQDB" + script "../main.nf" + process "MMSEQS_MAKEPADDEDSEQDB" + tag "modules" + tag "modules_nfcore" + tag "mmseqs" + tag "mmseqs/makepaddedseqdb" + tag "mmseqs/createdb" + + config "./nextflow.config" + + setup { + run("MMSEQS_CREATEDB") { + script "../../../mmseqs/createdb/main.nf" + process { + """ + input[0] = [ [ id:'test_query' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true) + ] + """ + } + } + } + + test("mmseqs_db sarscov2 contigs") { + + when { + params { + module_prefix = "test_query_gpu" + } + process { + """ + input[0] = MMSEQS_CREATEDB.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(sanitizeOutput(process.out)).match() + } + ) + } + + } +} diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test.snap b/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test.snap new file mode 100644 index 000000000000..8ce6f93de878 --- /dev/null +++ b/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test.snap @@ -0,0 +1,36 @@ +{ + "mmseqs_db sarscov2 contigs": { + "content": [ + { + "db_padded": [ + [ + { + "id": "test_query" + }, + [ + "test_query_gpu:md5,5b24585ba92fd826c78b8664c63b4e95", + "test_query_gpu.dbtype:md5,01d39098f2bfee5c808a3b4ff54deac2", + "test_query_gpu.index:md5,5946b4989d08320d9daca503155ba693", + "test_query_gpu.lookup:md5,3eb85c645034a0717db62ef0a3da5479", + "test_query_gpu_h:md5,a9fca4931be476b8f302cc27b5dff9b0", + "test_query_gpu_h.dbtype:md5,740bab4f9ec8808aedb68d6b1281aeb2", + "test_query_gpu_h.index:md5,ce0ca30c2e57677077cc23823ef17206" + ] + ] + ], + "versions_mmseqs": [ + [ + "MMSEQS_MAKEPADDEDSEQDB", + "mmseqs", + "18.8cc5c" + ] + ] + } + ], + "timestamp": "2026-02-25T10:33:19.910807101", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.04.6" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/tests/nextflow.config b/modules/nf-core/mmseqs/makepaddedseqdb/tests/nextflow.config new file mode 100644 index 000000000000..735a3eb0904f --- /dev/null +++ b/modules/nf-core/mmseqs/makepaddedseqdb/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: "MMSEQS_MAKEPADDEDSEQDB" { + ext.prefix = params.module_prefix + } +}