cschlaffner · AnnaPolensky · Mar 31, 2026 · Feb 16, 2026 · Feb 16, 2026 · Feb 27, 2026
diff --git a/backend/main/urls.py b/backend/main/urls.py
@@ -48,6 +48,11 @@
     path("api/save_workflow/", views.save_workflow, name="save_workflow"),
     path("api/get_step_form/", views.get_step_form, name="get_step_form"),
     path("api/get_step_plots/", views.get_step_plots, name="get_step_plots"),
+    path(
+        "api/get_downloads_from_step/",
+        views.get_downloads_from_step,
+        name="get_downloads_from_step",
+    ),
     path(
         "api/get_current_step_output_labels/",
         views.get_current_step_output_labels,

diff --git a/backend/main/views.py b/backend/main/views.py
@@ -682,6 +682,39 @@ def get_step_plots(request):
         )
 
 
+def get_downloads_from_step(request: HttpRequest):
+    if request.method != "POST":
+        return JsonResponse(
+            {"success": False, "message": "Invalid request method"}, status=405
+        )
+
+    data = json.loads(request.body)
+    run_name = data.get("run_name")
+    step_id = data.get("step_id")
+    output_key = data.get("output_key")
+
+    run = Run(run_name)
+    step = run.steps.get_step_by_id(step_id)
+    downloads = step.output.get(output_key)
+    if downloads is None:
+        downloads = {}
+    if not isinstance(downloads, dict):
+        return JsonResponse(
+            {
+                "success": False,
+                "message": f"Requested output must be dict object, is {str(type(downloads))}",
+            },
+            status=405,
+        )
+    return JsonResponse(
+        {
+            "success": True,
+            "message": "Got the available download(s) for the step",
+            "data": downloads,
+        }
+    )
+
+
 # TODO: Move somewhere else
 def _step_output_as_serialised_table(
     label: str, _data: pd.DataFrame | Any, index_delims: tuple[int, int] = (None, None)

diff --git a/backend/protzilla/all_steps.py b/backend/protzilla/all_steps.py
@@ -17,6 +17,7 @@
     importing.FastaImport,
     importing.AlphaFoldPredictionLoad,
     importing.CrosslinkingImport,
+    importing.AlphaFoldQueryJsonGeneration,
     importing.ImportMonomerStructurePredictionFromDisk,
     importing.UploadMultimerPredictions,
     importing.ImportMultimerStructurePredictionFromDisk,

diff --git a/backend/protzilla/importing/query_generation.py b/backend/protzilla/importing/query_generation.py
@@ -0,0 +1,105 @@
+import json
+import logging
+import requests
+
+from backend.protzilla.steps import OutputItem, OutputType
+
+
+def generate_alphafold_query_json(
+    protein_ids: str, number_copies: str, model_seed: int, name: str
+) -> dict:
+    """
+    Generates an AlphaFold JSON query for a set of UniProt protein IDs.
+    For each provided UniProt ID, the corresponding amino acid sequence is fetched
+    from the UniProt REST API and added to the query with the specified copy number.
+    Format of the json is as defined here: https://github.com/google-deepmind/alphafold/blob/main/server/README.md
+
+    Protein IDs and copy numbers must be provided as space- or comma-separated strings and
+    must have the same length. If an invalid copy number is provided or if the
+    lengths do not match, an error message is generated and an exception may be raised.
+
+    :param protein_ids: Space- or comma-separated list of UniProt protein IDs (e.g. "P69905 P68871").
+    :param number_copies: Space- or comma-separated list of integers specifying the number of copies
+                          for each protein ID (e.g. "2 2").
+    :param model_seed: Model seed for the AlphaFold query. If -1 we want AlphaFold to use a random seed.
+    :param name: How the AlphaFold job and the generated file should be named.
+    :return: dict (messages, downloads), downloads contains a dictionary mapping a generated filename
+             to the AlphaFold query JSON string (wrapped in square brackets as required by AlphaFold server)
+    :raises ValueError: If the number of copies or the model seeds cannot be parsed as integers.
+    :raises requests.exceptions.HTTPError: If fetching a UniProt FASTA sequence fails.
+    """
+    messages = []
+
+    # extract protein_ids and number of copies per id and make sure they have the same length
+    uniprot_ids = protein_ids.replace(",", " ").split()
+    try:
+        copies_per_id = [
+            int(input) for input in number_copies.replace(",", " ").split()
+        ]
+    except ValueError as e:
+        msg = f"Invalid list of number of copies per id: please provide space-separated integers"
+        messages.append(
+            dict(
+                level=logging.ERROR,
+                msg=msg,
+            )
+        )
+        raise ValueError(msg)
+    if len(uniprot_ids) != len(copies_per_id):
+        msg = f"There are {len(uniprot_ids)} ids. However, there are {len(copies_per_id)} entries for number of copies. Please make sure that these numbers match."
+        messages.append(
+            dict(
+                level=logging.ERROR,
+                msg=msg,
+            )
+        )
+        raise ValueError(msg)
+    if min(copies_per_id) < 1:
+        msg = f"There can't be a non-positive number of copies."
+        messages.append(
+            dict(
+                level=logging.ERROR,
+                msg=msg,
+            )
+        )
+        raise ValueError(msg)
+
+    # create the json query for alphafold
+    query = {
+        "name": name,
+        "modelSeeds": [],
+        "sequences": [],
+        "dialect": "alphafoldserver",
+        "version": 1,
+    }
+
+    if model_seed != -1:
+        query["modelSeeds"] = [model_seed]
+
+    for uniprot_id, copies in zip(uniprot_ids, copies_per_id):
+        url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.fasta"
+
+        response = requests.get(url, timeout=20)
+        response.raise_for_status()
+
+        fasta = response.text
+        amino_acid_sequence = "".join(
+            line.strip() for line in fasta.splitlines() if not line.startswith(">")
+        )
+        query["sequences"].append(
+            {
+                "proteinChain": {
+                    "sequence": amino_acid_sequence,
+                    "count": copies,
+                }
+            }
+        )
+    messages.append(
+        dict(
+            level=logging.INFO, msg=f"Successfully generated a json file for AlphaFold."
+        )
+    )
+    return dict(
+        messages=messages,
+        downloads=OutputItem(output_type=OutputType.DOWNLOAD, value={name: [query]}),
+    )
diff --git a/backend/protzilla/methods/importing.py b/backend/protzilla/methods/importing.py
@@ -2,6 +2,8 @@
 from abc import ABC
 from typing_extensions import override
 
+import pandas as pd
+
 from backend.protzilla.form import *
 from backend.protzilla import form_helper
 from backend.protzilla.constants.data_types import DataKey
@@ -43,6 +45,7 @@
     FeatureOrientationType,
 )
 from backend.protzilla.constants.intensity_types import IntensityType, IntensityNameType
+from protzilla.importing.query_generation import generate_alphafold_query_json
 
 
 class ImportingStep(Step, ABC):
@@ -618,3 +621,52 @@ def modify_form(self, run: Run):
         )
 
     calc_method = staticmethod(get_multimer_structure_dfs)
+
+
+class AlphaFoldQueryJsonGeneration(Step):
+    section = "importing"
+    display_name = "AlphaFold Query JSON Generation"
+    operation = "Query Generation"
+    method_description = (
+        "Generate a JSON to upload to AlphaFold-Server to generate a prediction."
+    )
+
+    def create_form(self):
+        return Form(
+            label="AlphaFold Query JSON Generation",
+            input_fields=[
+                TextField(
+                    name="name",
+                    label="File name and AlphaFold job name for generated query",
+                ),
+                InfoField(
+                    label="Only enter file stem, '.json' will be added automatically."
+                ),
+                TextField(
+                    name="protein_ids",
+                    label="UniProt Protein IDs",
+                ),
+                InfoField(label="IDs should be space- or comma-separated."),
+                TextField(
+                    name="number_copies",
+                    label="Number of copies of each protein monomer",
+                ),
+                InfoField(
+                    label="For each entered ID a number should be entered.\n"
+                    "Numbers should be should be space- or comma-separated."
+                ),
+                NumberField(
+                    name="model_seed",
+                    label="Model seed for AlphaFold",
+                    min=-1,
+                    max=4294967295,
+                    value=-1,
+                ),
+                InfoField(
+                    label="Leave -1 if you want to use a random seed.\n"
+                    "Otherwise enter a seed (integer between 0 and 4294967295)"
+                ),
+            ],
+        )
+
+    calc_method = staticmethod(generate_alphafold_query_json)
diff --git a/backend/protzilla/steps.py b/backend/protzilla/steps.py
@@ -4,8 +4,10 @@
 import inspect
 import logging
 import traceback
-from enum import StrEnum
-from typing import Any, Literal
+from enum import Enum, StrEnum
+from pathlib import Path
+from types import MethodType
+from typing import Any, Literal, Callable
 
 import pandas as pd
 import yaml
@@ -340,50 +342,46 @@ def handle_messages(self, outputs: dict) -> None:
     calc_method = None
     plot_method = None  # if the plot method uses the output of the calculation method, it should be prefixed with "output_"
 
-    @property
-    def calculation_input(self) -> dict:
-        input_parameters = inspect.signature(self.calc_method).parameters
+    def _get_input_parameters(
+        self, function: Callable[..., Any], relevant_inputs: dict | None = None
+    ) -> dict:
+        if relevant_inputs is None:
+            relevant_inputs = self.inputs
+        input_parameters = inspect.signature(function).parameters
         required_keys = [
             key
             for key, param in input_parameters.items()
             if param.default == inspect.Parameter.empty
         ]
         for key in required_keys:
-            if key not in self.inputs:
+            if key not in relevant_inputs:
                 raise ValueError(
-                    f"Missing required input '{key}' for the calculation method"
+                    f"Missing required input '{key}' for the '{function.__name__}' method"
                 )
 
         return {
             # if there is a default value, we want to use it
             key: (
-                self.inputs.get(key, param.default)
+                relevant_inputs.get(key, param.default)
                 if param.default != inspect.Parameter.empty
-                else self.inputs.get(key)
+                else relevant_inputs.get(key)
             )
             for key, param in input_parameters.items()
+            if key in relevant_inputs
         }
 
+    @property
+    def calculation_input(self) -> dict:
+        return self._get_input_parameters(self.calc_method)
+
     @property
     def plot_input(self) -> dict:
         # if the plot method uses the output of the calculation method, it should be prefixed with "output_"
         prefixed_output = {"output_" + key: item.value for key, item in self.output}
         plot_input = self.inputs | prefixed_output
-
-        input_parameters = inspect.signature(self.plot_method).parameters
-
-        required_keys = [
-            key
-            for key, param in input_parameters.items()
-            if param.default == inspect.Parameter.empty
-        ]
-        for key in required_keys:
-            if key not in plot_input:
-                raise ValueError(f"Missing required input '{key}' for the plot method")
-
-        return {
-            key: plot_input[key] for key in input_parameters.keys() if key in plot_input
-        }
+        return self._get_input_parameters(
+            function=self.plot_method, relevant_inputs=plot_input
+        )
 
     def validate_outputs(self, soft_check: bool = False) -> bool:
         """
@@ -476,6 +474,7 @@ class OutputType(StrEnum):
     FLOAT = "float"
     INT = "int"
     PNG_BASE64 = "png_base64"
+    DOWNLOAD = "download"  # right now only JSONs are supported, value should be dict(filename, json content)
     # for every data type that is not yaml serializable
     JOBLIB_ARTIFACT = "joblib_artifact"
 

diff --git a/backend/tests/main/test_views_helper.py b/backend/tests/main/test_views_helper.py
@@ -14,6 +14,7 @@ def test_get_all_possible_step_names():
         "FastaImport",
         "AlphaFoldPredictionLoad",
         "CrosslinkingImport",
+        "AlphaFoldQueryJsonGeneration",
         "ImportMonomerStructurePredictionFromDisk",
         "UploadMultimerPredictions",
         "ImportMultimerStructurePredictionFromDisk",