Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
cb608b6
feat: add step for generating a json alphafold multimer query
AnnaPolensky Feb 16, 2026
32c657f
feat: add download tab to outputs
AnnaPolensky Feb 16, 2026
cfa88ab
merge: merge crosslinking into 263-step-for-alphafold-multimer-query-…
AnnaPolensky Feb 27, 2026
fe19dc6
fix: fix broken all_steps after merge
AnnaPolensky Mar 1, 2026
ce49260
feat: add download_methods
AnnaPolensky Mar 1, 2026
237f903
feat: add download button for generated alphafold json queries
AnnaPolensky Mar 1, 2026
1268988
refactor: tidy up code
AnnaPolensky Mar 2, 2026
081a912
test: add tests for alphafold multimer query json generation
AnnaPolensky Mar 2, 2026
0a817b5
feat: make sure that at least 2 protein ids or 2 copies of one protei…
AnnaPolensky Mar 2, 2026
d791ed8
feat: add input to use a specific seed and allow not only space- but …
AnnaPolensky Mar 2, 2026
068329e
fix: fix broken test
AnnaPolensky Mar 2, 2026
ed7bb2a
fix: also accept input separated with comma and space, add success me…
AnnaPolensky Mar 3, 2026
288e01a
feat: add input field for file name of prediction query file
AnnaPolensky Mar 3, 2026
2cdf9dd
fix: step only turns green if file was generated
AnnaPolensky Mar 3, 2026
c043e76
fix: address code review feedback
AnnaPolensky Mar 3, 2026
dedfba1
refactor: rename alphafold-multimer-query-json-generation to alphafol…
AnnaPolensky Mar 4, 2026
89e030b
merge: merge crosslinking into 263
AnnaPolensky Mar 25, 2026
e494242
refactor: add broken refactoring of download outputs
AnnaPolensky Mar 30, 2026
a0c8644
refactor: add working refactoring of download outputs
AnnaPolensky Mar 30, 2026
84ff4f8
refactor: add useCertainStepOutputs hook
AnnaPolensky Mar 30, 2026
1fdd897
refactor: remove 'any' type hint
AnnaPolensky Mar 30, 2026
464da38
fix: fix broken tests
AnnaPolensky Mar 30, 2026
a6e791f
style: remove unused parts of the first downloads implementation
AnnaPolensky Mar 30, 2026
79a1059
style: remove unused parts of the first downloads implementation - pa…
AnnaPolensky Mar 30, 2026
de2604f
chore: fix docstring
AnnaPolensky Mar 31, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions backend/main/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@
path("api/save_workflow/", views.save_workflow, name="save_workflow"),
path("api/get_step_form/", views.get_step_form, name="get_step_form"),
path("api/get_step_plots/", views.get_step_plots, name="get_step_plots"),
path(
"api/get_downloads_from_step/",
views.get_downloads_from_step,
name="get_downloads_from_step",
),
path(
"api/get_current_step_output_labels/",
views.get_current_step_output_labels,
Expand Down
33 changes: 33 additions & 0 deletions backend/main/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,39 @@ def get_step_plots(request):
)


def get_downloads_from_step(request: HttpRequest):
if request.method != "POST":
return JsonResponse(
{"success": False, "message": "Invalid request method"}, status=405
)

data = json.loads(request.body)
run_name = data.get("run_name")
step_id = data.get("step_id")
output_key = data.get("output_key")

run = Run(run_name)
step = run.steps.get_step_by_id(step_id)
downloads = step.output.get(output_key)
if downloads is None:
downloads = {}
if not isinstance(downloads, dict):
return JsonResponse(
{
"success": False,
"message": f"Requested output must be dict object, is {str(type(downloads))}",
},
status=405,
)
return JsonResponse(
{
"success": True,
"message": "Got the available download(s) for the step",
"data": downloads,
}
)


# TODO: Move somewhere else
def _step_output_as_serialised_table(
label: str, _data: pd.DataFrame | Any, index_delims: tuple[int, int] = (None, None)
Expand Down
1 change: 1 addition & 0 deletions backend/protzilla/all_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
importing.FastaImport,
importing.AlphaFoldPredictionLoad,
importing.CrosslinkingImport,
importing.AlphaFoldQueryJsonGeneration,
importing.ImportMonomerStructurePredictionFromDisk,
importing.UploadMultimerPredictions,
importing.ImportMultimerStructurePredictionFromDisk,
Expand Down
105 changes: 105 additions & 0 deletions backend/protzilla/importing/query_generation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import json
import logging
import requests

from backend.protzilla.steps import OutputItem, OutputType


def generate_alphafold_query_json(
protein_ids: str, number_copies: str, model_seed: int, name: str
) -> dict:
"""
Generates an AlphaFold JSON query for a set of UniProt protein IDs.
For each provided UniProt ID, the corresponding amino acid sequence is fetched
from the UniProt REST API and added to the query with the specified copy number.
Format of the json is as defined here: https://github.com/google-deepmind/alphafold/blob/main/server/README.md

Protein IDs and copy numbers must be provided as space- or comma-separated strings and
must have the same length. If an invalid copy number is provided or if the
lengths do not match, an error message is generated and an exception may be raised.

:param protein_ids: Space- or comma-separated list of UniProt protein IDs (e.g. "P69905 P68871").
:param number_copies: Space- or comma-separated list of integers specifying the number of copies
for each protein ID (e.g. "2 2").
:param model_seed: Model seed for the AlphaFold query. If -1 we want AlphaFold to use a random seed.
:param name: How the AlphaFold job and the generated file should be named.
:return: dict (messages, downloads), downloads contains a dictionary mapping a generated filename
to the AlphaFold query JSON string (wrapped in square brackets as required by AlphaFold server)
:raises ValueError: If the number of copies or the model seeds cannot be parsed as integers.
:raises requests.exceptions.HTTPError: If fetching a UniProt FASTA sequence fails.
"""
messages = []

# extract protein_ids and number of copies per id and make sure they have the same length
uniprot_ids = protein_ids.replace(",", " ").split()
try:
copies_per_id = [
int(input) for input in number_copies.replace(",", " ").split()
]
except ValueError as e:
msg = f"Invalid list of number of copies per id: please provide space-separated integers"
messages.append(
dict(
level=logging.ERROR,
msg=msg,
)
)
raise ValueError(msg)
if len(uniprot_ids) != len(copies_per_id):
msg = f"There are {len(uniprot_ids)} ids. However, there are {len(copies_per_id)} entries for number of copies. Please make sure that these numbers match."
messages.append(
dict(
level=logging.ERROR,
msg=msg,
)
)
raise ValueError(msg)
if min(copies_per_id) < 1:
msg = f"There can't be a non-positive number of copies."
messages.append(
dict(
level=logging.ERROR,
msg=msg,
)
)
raise ValueError(msg)

# create the json query for alphafold
query = {
"name": name,
"modelSeeds": [],
"sequences": [],
"dialect": "alphafoldserver",
"version": 1,
}

if model_seed != -1:
query["modelSeeds"] = [model_seed]

for uniprot_id, copies in zip(uniprot_ids, copies_per_id):
url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.fasta"

response = requests.get(url, timeout=20)
response.raise_for_status()

fasta = response.text
amino_acid_sequence = "".join(
line.strip() for line in fasta.splitlines() if not line.startswith(">")
)
query["sequences"].append(
{
"proteinChain": {
"sequence": amino_acid_sequence,
"count": copies,
}
}
)
messages.append(
dict(
level=logging.INFO, msg=f"Successfully generated a json file for AlphaFold."
)
)
return dict(
messages=messages,
downloads=OutputItem(output_type=OutputType.DOWNLOAD, value={name: [query]}),
)
52 changes: 52 additions & 0 deletions backend/protzilla/methods/importing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from abc import ABC
from typing_extensions import override

import pandas as pd

from backend.protzilla.form import *
from backend.protzilla import form_helper
from backend.protzilla.constants.data_types import DataKey
Expand Down Expand Up @@ -43,6 +45,7 @@
FeatureOrientationType,
)
from backend.protzilla.constants.intensity_types import IntensityType, IntensityNameType
from protzilla.importing.query_generation import generate_alphafold_query_json


class ImportingStep(Step, ABC):
Expand Down Expand Up @@ -618,3 +621,52 @@ def modify_form(self, run: Run):
)

calc_method = staticmethod(get_multimer_structure_dfs)


class AlphaFoldQueryJsonGeneration(Step):
section = "importing"
display_name = "AlphaFold Query JSON Generation"
operation = "Query Generation"
method_description = (
"Generate a JSON to upload to AlphaFold-Server to generate a prediction."
)

def create_form(self):
return Form(
label="AlphaFold Query JSON Generation",
input_fields=[
TextField(
name="name",
label="File name and AlphaFold job name for generated query",
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this label should probably tell the user that the text they enter is only the stem of the filename, since .json is appended automatically

),
InfoField(
label="Only enter file stem, '.json' will be added automatically."
),
TextField(
name="protein_ids",
label="UniProt Protein IDs",
),
InfoField(label="IDs should be space- or comma-separated."),
TextField(
name="number_copies",
label="Number of copies of each protein monomer",
),
InfoField(
label="For each entered ID a number should be entered.\n"
"Numbers should be should be space- or comma-separated."
),
NumberField(
name="model_seed",
label="Model seed for AlphaFold",
min=-1,
max=4294967295,
value=-1,
),
InfoField(
label="Leave -1 if you want to use a random seed.\n"
"Otherwise enter a seed (integer between 0 and 4294967295)"
),
],
)

calc_method = staticmethod(generate_alphafold_query_json)
47 changes: 23 additions & 24 deletions backend/protzilla/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
import inspect
import logging
import traceback
from enum import StrEnum
from typing import Any, Literal
from enum import Enum, StrEnum
from pathlib import Path
from types import MethodType
from typing import Any, Literal, Callable

import pandas as pd
import yaml
Expand Down Expand Up @@ -340,50 +342,46 @@ def handle_messages(self, outputs: dict) -> None:
calc_method = None
plot_method = None # if the plot method uses the output of the calculation method, it should be prefixed with "output_"

@property
def calculation_input(self) -> dict:
input_parameters = inspect.signature(self.calc_method).parameters
def _get_input_parameters(
self, function: Callable[..., Any], relevant_inputs: dict | None = None
) -> dict:
if relevant_inputs is None:
relevant_inputs = self.inputs
input_parameters = inspect.signature(function).parameters
required_keys = [
key
for key, param in input_parameters.items()
if param.default == inspect.Parameter.empty
]
for key in required_keys:
if key not in self.inputs:
if key not in relevant_inputs:
raise ValueError(
f"Missing required input '{key}' for the calculation method"
f"Missing required input '{key}' for the '{function.__name__}' method"
)

return {
# if there is a default value, we want to use it
key: (
self.inputs.get(key, param.default)
relevant_inputs.get(key, param.default)
if param.default != inspect.Parameter.empty
else self.inputs.get(key)
else relevant_inputs.get(key)
)
for key, param in input_parameters.items()
if key in relevant_inputs
}

@property
def calculation_input(self) -> dict:
return self._get_input_parameters(self.calc_method)

@property
def plot_input(self) -> dict:
# if the plot method uses the output of the calculation method, it should be prefixed with "output_"
prefixed_output = {"output_" + key: item.value for key, item in self.output}
plot_input = self.inputs | prefixed_output

input_parameters = inspect.signature(self.plot_method).parameters

required_keys = [
key
for key, param in input_parameters.items()
if param.default == inspect.Parameter.empty
]
for key in required_keys:
if key not in plot_input:
raise ValueError(f"Missing required input '{key}' for the plot method")

return {
key: plot_input[key] for key in input_parameters.keys() if key in plot_input
}
return self._get_input_parameters(
function=self.plot_method, relevant_inputs=plot_input
)

def validate_outputs(self, soft_check: bool = False) -> bool:
"""
Expand Down Expand Up @@ -476,6 +474,7 @@ class OutputType(StrEnum):
FLOAT = "float"
INT = "int"
PNG_BASE64 = "png_base64"
DOWNLOAD = "download" # right now only JSONs are supported, value should be dict(filename, json content)
# for every data type that is not yaml serializable
JOBLIB_ARTIFACT = "joblib_artifact"

Expand Down
1 change: 1 addition & 0 deletions backend/tests/main/test_views_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def test_get_all_possible_step_names():
"FastaImport",
"AlphaFoldPredictionLoad",
"CrosslinkingImport",
"AlphaFoldQueryJsonGeneration",
"ImportMonomerStructurePredictionFromDisk",
"UploadMultimerPredictions",
"ImportMultimerStructurePredictionFromDisk",
Expand Down
Loading
Loading