diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2fce9ad
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,78 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+*.manifest
+*.spec
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+
+# Project specific
+examples/*/results/
+blind_prediction/
+successed_prediction/
+failed_prediction/
+multimer_prediction/
+test_data/
+msa_folder/
+structures_all.csv
+
+# Foldseek
+pdb*
+pdb_*
+tmp/
\ No newline at end of file
diff --git a/Data/Fold-switch_hits-SPEACH_AF/pdb_pairs.csv b/Data/Fold-switch_hits-SPEACH_AF/pdb_pairs.csv
deleted file mode 100644
index 95a47ab..0000000
--- a/Data/Fold-switch_hits-SPEACH_AF/pdb_pairs.csv
+++ /dev/null
@@ -1,8 +0,0 @@
-FOLD1,FOLD2
-1kct_A,3t1p_A
-5ejb_C,1wp8_C
-2a73_B,3l5n_B
-6c6s_D,2oug_C
-3j7w_B,3j7v_E
-4y0m_J,4xws_D
-2pbk_B,3njq_A
diff --git a/Install/install_colabbatch_linux_101624.sh b/Install/install_colabbatch_linux_101624.sh
deleted file mode 100644
index 070dd35..0000000
--- a/Install/install_colabbatch_linux_101624.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash -e
-
-type wget 2>/dev/null || { echo "wget is not installed. Please install it using apt or yum." ; exit 1 ; }
-
-CURRENTPATH=`pwd`
-COLABFOLDDIR="${CURRENTPATH}/localcolabfold"
-
-mkdir -p "${COLABFOLDDIR}"
-cd "${COLABFOLDDIR}"
-wget -q -P . https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh
-bash ./Miniforge3-Linux-x86_64.sh -b -p "${COLABFOLDDIR}/conda"
-rm Miniforge3-Linux-x86_64.sh
-
-source "${COLABFOLDDIR}/conda/etc/profile.d/conda.sh"
-export PATH="${COLABFOLDDIR}/conda/condabin:${PATH}"
-conda update -n base conda -y
-conda create -p "$COLABFOLDDIR/colabfold-conda" -c conda-forge -c bioconda \
- git python=3.10 openmm==7.7.0 pdbfixer \
- kalign2=2.04 hhsuite=3.3.0 mmseqs2=15.6f452 -y
-conda activate "$COLABFOLDDIR/colabfold-conda"
-
-# install ColabFold and Jaxlib
-"$COLABFOLDDIR/colabfold-conda/bin/pip" install --no-warn-conflicts \
- "colabfold[alphafold-minus-jax] @ git+https://github.com/sokrypton/ColabFold"
-"$COLABFOLDDIR/colabfold-conda/bin/pip" install "colabfold[alphafold]"
-"$COLABFOLDDIR/colabfold-conda/bin/pip" install --upgrade "jax[cuda12]"==0.4.28
-"$COLABFOLDDIR/colabfold-conda/bin/pip" install --upgrade tensorflow
-"$COLABFOLDDIR/colabfold-conda/bin/pip" install silence_tensorflow
-
-# Download the updater
-wget -qnc -O "$COLABFOLDDIR/update_linux.sh" \
- https://raw.githubusercontent.com/YoshitakaMo/localcolabfold/main/update_linux.sh
-chmod +x "$COLABFOLDDIR/update_linux.sh"
-
-pushd "${COLABFOLDDIR}/colabfold-conda/lib/python3.10/site-packages/colabfold"
-# Use 'Agg' for non-GUI backend
-sed -i -e "s#from matplotlib import pyplot as plt#import matplotlib\nmatplotlib.use('Agg')\nimport matplotlib.pyplot as plt#g" plot.py
-# modify the default params directory
-sed -i -e "s#appdirs.user_cache_dir(__package__ or \"colabfold\")#\"${COLABFOLDDIR}/colabfold\"#g" download.py
-# suppress warnings related to tensorflow
-sed -i -e "s#from io import StringIO#from io import StringIO\nfrom silence_tensorflow import silence_tensorflow\nsilence_tensorflow()#g" batch.py
-# remove cache directory
-rm -rf __pycache__
-popd
-
-# Download weights
-"$COLABFOLDDIR/colabfold-conda/bin/python3" -m colabfold.download
-echo "Download of alphafold2 weights finished."
-echo "-----------------------------------------"
-echo "Installation of ColabFold finished."
-echo "Add ${COLABFOLDDIR}/colabfold-conda/bin to your PATH environment variable to run 'colabfold_batch'."
-echo -e "i.e. for Bash:\n\texport PATH=\"${COLABFOLDDIR}/colabfold-conda/bin:\$PATH\""
-echo "For more details, please run 'colabfold_batch --help'."
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..3cb37fd
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,4 @@
+include README.md
+include LICENSE.md
+recursive-include cf_random/data *
+recursive-include examples *
diff --git a/README.md b/README.md
index b35a5cc..ee31095 100644
--- a/README.md
+++ b/README.md
@@ -1,52 +1,27 @@
# Data and code for CF-random
-General installation and usage guidance of CF-random for predicting the alternative conformation and fold-switching proteins.
-To run CF-random in a Colab notebook, please use following [link](https://colab.research.google.com/drive/16pD2tUMkUx1gwDxZXcSr9WOosYp0ZU6j?authuser=0).
+
+General installation and usage guidance of CF-random for predicting the alternative conformation and fold-switching proteins.
+
+To run CF-random in a Colab notebook, please use following [link](https://colab.research.google.com/drive/16pD2tUMkUx1gwDxZXcSr9WOosYp0ZU6j?authuser=0).
+
-
# Installation
-CF-random uses the [localcolabfold](https://github.com/YoshitakaMo/localcolabfold) and [Foldseek](https://github.com/steineggerlab/foldseek) under linux environment.
-For more details about localcolabfold, please visit [here.](https://github.com/YoshitakaMo/localcolabfold)
-We currently not support the Windows and MacOS environment.
-
-Installation process including localcolabfold, dependencies, and Foldseek is done with following commands.
-```
-wget https://raw.githubusercontent.com/YoshitakaMo/localcolabfold/main/install_colabbatch_linux.sh
-bash install_colabbatch_linux.sh
-
-** Or use a bash script in install folder
-bash install_colabbatch_linux.sh
-```
-
-
-After the installation of localcolabfold, add the localcolabfold path to your .bashrc file:.
-```
-export PATH="/path/to/your/localcolabfold/colabfold-conda/bin:$PATH"
-```
-
+CF-random uses [ColabFold](https://github.com/sokrypton/ColabFold) (for structure prediction) and [Foldseek](https://github.com/steineggerlab/foldseek) (for structure search) under Linux environment.
-Then reactivate your .bashrc file
+**For installation details, see [INSTALL.md](INSTALL.md)**
-Now create a conda new conda environment:
-```
-conda create --name CF-random python=3.10
-conda activate CF-random
-pip install textalloc tmtools adjustText thefuzz mdtraj biopython seaborn MDAnalysis
-conda install conda-forge::pymol-open-source
-pip3 install -U scikit-learn
-```
-Once the dependencies are installed, install Foldseek.
-
-```
+Quick start:
+```bash
+conda create --name cf-random python=3.10 -y
+conda activate cf-random
+pip install -e ".[colabfold]"
conda install -c conda-forge -c bioconda foldseek
foldseek databases PDB pdb tmp
```
-
-
-### We recommend running the foldseek databases command in a directory where the libraries can be stored.
# Usage
@@ -65,7 +40,7 @@ foldseek databases PDB pdb tmp
--type #### | can choose the model type of Colabfold. e.g.) ptm, monomer, and multimer
--options ### | AC: predicting alternative conformations of protein with references, FS: predicting the fold-switching protein with references, and blind: predicting the alternative conformations or fold-switching proteins without reference PDB files.
```
-* In default mode (fold-switching and alternative conformation), CF-ramdon produces the results of TM-scores (csv and png files), plDDT, and information of selected random MSA. If CF-random predicts the both folds, generated prediction files are deposited under successed_prediction/pdb1_name and additional_sampling/pdb1_name . If not, it would not generate anything.
+* In default mode (fold-switching and alternative conformation), CF-random produces the results of TM-scores (csv and png files), plDDT, and information of selected random MSA. If CF-random predicts the both folds, generated prediction files are deposited under successed_prediction/pdb1_name and additional_sampling/pdb1_name . If not, it would not generate anything.
* Before running the default mode of fold-switching, setting the "range_fs_pairs_all.txt" file is required. The name of reference PDB files, residue ranges of reference pdb files, and residue ranges of prediction files. ColabFold generates the residue index starting from 1, so please choose the residue range of fold-switching region correctly. CF-random reads the residue index in PDB file, make sure that selection of residue range is correct.
examples) pdb1, pdb2, XXX-XXX, XXX-XXX, XXX-XXX, XXX-XXX (residue range of reference 1, residue range of reference 2, residue range of prediction1, resodie range of prediction2)
* --nMSA can be applied for all options, but --nESN cannot be used for blind mode.
diff --git a/cf_random/__init__.py b/cf_random/__init__.py
new file mode 100644
index 0000000..0fe9f3f
--- /dev/null
+++ b/cf_random/__init__.py
@@ -0,0 +1,18 @@
+"""
+CF-random: Predicting alternative conformations and fold-switching proteins
+
+A package for identifying and analyzing protein fold-switching and alternative conformations
+using AlphaFold predictions and structural analysis tools.
+"""
+
+__version__ = "0.1.0"
+__author__ = "Myeongsang (Samuel) Lee"
+__all__ = [
+ "main",
+]
+
+# Import main modules for easier access
+try:
+ from .core import main
+except ImportError:
+ pass
diff --git a/Data/Fold-switch_hits-AFcluster/CRKL/2bzy_B_tmscores_fs_all.csv b/cf_random/analysis/__init__.py
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/CRKL/2bzy_B_tmscores_fs_all.csv
rename to cf_random/analysis/__init__.py
diff --git a/code/cal_plddt_ACFS.py b/cf_random/analysis/cal_plddt_ACFS.py
similarity index 71%
rename from code/cal_plddt_ACFS.py
rename to cf_random/analysis/cal_plddt_ACFS.py
index fc4f5b8..cd45cb5 100644
--- a/code/cal_plddt_ACFS.py
+++ b/cf_random/analysis/cal_plddt_ACFS.py
@@ -1,21 +1,25 @@
"""
find the average pLDDT score
"""
-import sys,re
+
import glob
import json
-import numpy as np
+import re
+import sys
from pathlib import Path
-#define pattern for regular expression
+import numpy as np
+
+# define pattern for regular expression
# 0_000_scores_rank_001_alphafold2_ptm_model_4_seed_000.json
-pattrn = re.compile(r'.*?_scores_rank_(?P\d+)_alphafold2.*')
+pattrn = re.compile(r".*?_scores_rank_(?P\d+)_alphafold2.*")
# default if pattern doesn't work
rank = "000"
-def read_plddt (jsonfile):
+
+def read_plddt(jsonfile):
"""
read the json file
return the plddt scores
@@ -23,41 +27,37 @@ def read_plddt (jsonfile):
"""
with open(jsonfile) as json_file:
data = json.load(json_file)
-
- plddt_scores = np.array(data['plddt'],dtype='float64')
+
+ plddt_scores = np.array(data["plddt"], dtype="float64")
return plddt_scores
-def fract_good (score):
+
+def fract_good(score):
"""
return percentage
of residue with
plddt score > 70
"""
- vals_greater_70 = (score > 70).sum()
- percent_good = round((vals_greater_70 / score.size)*100,2)
- avg_plddt = round(np.average(score),2)
- #return percent_good,avg_plddt
+ avg_plddt = round(np.average(score), 2)
return avg_plddt
-
-
-class plddt_cal():
+class plddt_cal:
def __init__(self, sub_list, category, pdb_name, nMSA, nENS, model_type):
- # if files found then continue
+ # if files found then continue
if len(sub_list) == 0:
sys.exit(1)
-
+
# create a data dictionary
out_dict_all = {}
-
+
values_all = []
cnt = 0
- if category =='full-MSA':
- #if category == 'additional-MSA':
- print("working...")
+ if category == "full-MSA":
+ # if category == 'additional-MSA':
+ print("working...")
print(sub_list)
for subdir in sub_list:
print(subdir)
@@ -73,19 +73,17 @@ def __init__(self, sub_list, category, pdb_name, nMSA, nENS, model_type):
jsonfilename = jsonfilepath.stem
match = pattrn.match(jsonfilename)
if match:
- rank = match.group('rank')
+ rank = match.group("rank")
key_pair = subdir_name + ":" + rank
# for all
if key_pair not in out_dict_all:
- out_dict_all[key_pair]=values
+ out_dict_all[key_pair] = values
cnt = cnt + 1
cnt = int(cnt / 5)
-
-
- elif category == 'additional-MSA':
+ elif category == "additional-MSA":
print("working...")
print(sub_list)
for subdir in sub_list:
@@ -102,20 +100,18 @@ def __init__(self, sub_list, category, pdb_name, nMSA, nENS, model_type):
jsonfilename = jsonfilepath.stem
match = pattrn.match(jsonfilename)
if match:
- rank = match.group('rank')
+ rank = match.group("rank")
key_pair = subdir_name + ":" + rank
# for all
if key_pair not in out_dict_all:
- out_dict_all[key_pair]=values
+ out_dict_all[key_pair] = values
cnt = cnt + 1
-
-
else:
for subdir in sub_list:
- #for subdir in all_sub_dir_paths:
+ # for subdir in all_sub_dir_paths:
# make sure subdir exists
if Path(subdir).is_dir():
subdir_name = Path(subdir).name
@@ -129,30 +125,29 @@ def __init__(self, sub_list, category, pdb_name, nMSA, nENS, model_type):
jsonfilename = jsonfilepath.stem
match = pattrn.match(jsonfilename)
if match:
- rank = match.group('rank')
-
+ rank = match.group("rank")
+
key_pair = subdir_name + ":" + rank
# for all
if key_pair not in out_dict_all:
- out_dict_all[key_pair]=values
+ out_dict_all[key_pair] = values
cnt = cnt + 1
-
print(cnt)
print(values_all)
- if category =='full-MSA':
+ if category == "full-MSA":
values_all_resh = values_all.reshape(nMSA + 5, 5)
- elif category == 'additional-MSA' and model_type == 'alphafold2_multimer_v3':
+ elif category == "additional-MSA" and model_type == "alphafold2_multimer_v3":
values_all_resh = values_all.reshape(((nENS + 20)), 5)
- elif category == 'additional-MSA' and model_type != 'alphafold2_multimer_v3':
+ elif category == "additional-MSA" and model_type != "alphafold2_multimer_v3":
values_all_resh = values_all.reshape(((nENS + 20)), 5)
- elif category == 'random-MSA' and model_type != 'alphafold2_multimer_v3':
+ elif category == "random-MSA" and model_type != "alphafold2_multimer_v3":
values_all_resh = values_all.reshape(((nMSA + 5) * 7), 5)
- elif category == 'random-MSA' and model_type == 'alphafold2_multimer_v3':
+ elif category == "random-MSA" and model_type == "alphafold2_multimer_v3":
values_all_resh = values_all.reshape(((nMSA + 5) * 7), 5)
print(" ")
print("Calculated pLDDT")
print(values_all_resh)
- np.savetxt('plddt_' + category + '_' + pdb_name +'.csv', values_all_resh, fmt='%2.3f')
+ np.savetxt("plddt_" + category + "_" + pdb_name + ".csv", values_all_resh, fmt="%2.3f")
diff --git a/cf_random/analysis/cal_tmscore_fs_flmsa.py b/cf_random/analysis/cal_tmscore_fs_flmsa.py
new file mode 100644
index 0000000..4c0e78d
--- /dev/null
+++ b/cf_random/analysis/cal_tmscore_fs_flmsa.py
@@ -0,0 +1,280 @@
+#!/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Compare the predicted models with original PDBs
+report TM-scores for ranked 0 to 4
+input line is pdb1 pdb2 preds_of_pdb dirname
+
+This version requires tmtools 0.0.2 (Python bindings around the TM-align code for structural alignment of proteins)
+check this for local installation
+https://pypi.org/project/tmtools/
+
+Usage:
+
+python3.8 compare_strs_fs.py 2k42_A 1cee_B 1cee_B 0_msas_models/
+
+Created on Wed Feb 21 14:51:00 2024
+@author: Myeongsang (Samuel) Lee
+"""
+
+import glob
+import os
+import sys
+from pathlib import Path
+
+import numpy as np
+from Bio.PDB import PDBParser
+
+# call related modules of tmtools after installation
+from tmtools import tm_align
+
+pdbParser = PDBParser(QUIET=True)
+
+# convert three letter code to one letter code
+aa3to1 = {
+ "CYS": "C",
+ "ASP": "D",
+ "SER": "S",
+ "GLN": "Q",
+ "LYS": "K",
+ "ILE": "I",
+ "PRO": "P",
+ "THR": "T",
+ "PHE": "F",
+ "ASN": "N",
+ "GLY": "G",
+ "HIS": "H",
+ "LEU": "L",
+ "ARG": "R",
+ "TRP": "W",
+ "ALA": "A",
+ "VAL": "V",
+ "GLU": "E",
+ "TYR": "Y",
+ "MET": "M",
+}
+
+
+class TM_score_fs:
+ def get_coords(self, pdbfile, fs_range):
+ """
+ parameters:
+ pdbfile - path to pdbfile
+ fs_range - range of residues at the fold-switching region, given as string - "112-162"
+ returns:
+ numpy array of coords
+ string of seqs in 1-letter-code
+ """
+
+ seq = ""
+ struct = pdbParser.get_structure("x", str(pdbfile))
+ coords = []
+ seq_dict = {}
+
+ # for residues within a certain range, using numpy to save the coords
+ # and save the sequence as a dict and then sorted list of tuples
+ # return the coords and the seq
+
+ # convert str to residue range for the fs region
+ start, stop = fs_range.split("-")
+ res_range = range(int(start), int(stop) + 1)
+
+ for atom in struct.get_atoms():
+ residue = atom.get_parent() # from atom we can get the parent residue
+ res_id = residue.get_id()[1]
+ resname = residue.get_resname()
+ if res_id in res_range and atom.get_name() == "CA":
+ x, y, z = atom.get_coord()
+ coords.append([x, y, z])
+ if res_id not in seq_dict:
+ seq_dict[res_id] = aa3to1[resname]
+
+ # print(coords)
+ # convert to np array
+ coords_np = np.array(coords)
+ # sort the seq_dict by keys a.k.a res_ids
+ sorted_data = sorted(seq_dict.items())
+ for i in sorted_data:
+ seq += i[1]
+
+ return coords_np, seq
+
+ def get_tmscore(self, coords1, seq1, predfilepath, res_range):
+ """
+ parameters:
+ coords1, seq1 - the numpy array of PDB coords and its seqs
+ predfilepath - path for predicted files
+ res_range - fs range in predicted models
+
+ returns:
+ tmscore list
+
+ """
+
+ tmscores = []
+ tmscores_ord = []
+ tmscores_rev = []
+ modelfiles = sorted(glob.glob(str(predfilepath) + "/*_unrelaxed*pdb"))
+
+ if len(modelfiles) == 0:
+ tmscores = [0.0, 0.0, 0.0, 0.0, 0.0]
+ return tmscores
+
+ for model in modelfiles:
+ modelpath = Path(model)
+ coords2, seq2 = self.get_coords(modelpath, res_range)
+ res = tm_align(coords1, coords2, seq1, seq2)
+ tmscore = round(res.tm_norm_chain1, 2) # wrt to model
+ tmscores_ord.append(tmscore)
+
+ res = tm_align(coords2, coords1, seq2, seq1)
+ tmscore = round(res.tm_norm_chain1, 2)
+ tmscores_rev.append(tmscore)
+
+ if np.max(tmscores_ord) > tmscores_rev:
+ tmscores = tmscores_ord
+ else:
+ tmscores = tmscores_rev
+
+ return tmscores
+
+ # def run_for_models(self, FH, pdbfile1, pdbfile2, data_dir,pred_range,res_range1,res_range2):
+ def run_for_models(self, pdbfile1, pdbfile2, data_dir, pred_range, res_range1, res_range2):
+ """
+ compare the original PDB
+ with the predicted models, 0 to 5
+
+ parameters:
+ FH - filehandle for writing
+ pdbfile1 - path to original PDB, Fold1
+ pdbfile2 - path to alternate PDB, Fold2
+ data_dir - path for the predicted strs
+ res_range1 - fs range in PDB1 and its models
+ res_range2 - fs range in PDB2 and its models
+
+ returns:
+ nothing
+
+ saves the TM-scores in a local file
+ """
+ # print(res_range1,res_range2)
+
+ # get list of subdirectories
+ all_sub_dir_paths = glob.glob(str(data_dir)) # returns list of sub directory paths
+ tmscores_fs = []
+
+ print(all_sub_dir_paths)
+ # files found then continue
+ if len(all_sub_dir_paths) == 0:
+ pass
+
+ for subdir in all_sub_dir_paths:
+ preddir = Path(subdir)
+
+ # predicted dir doesn't exist then continue
+ if not preddir.exists():
+ pass
+
+ # only comparing on one set of predicted models
+ # but with both PDBs/Folds
+ coords1, seq1 = self.get_coords(pdbfile1, res_range1)
+ print(preddir, pred_range)
+ tmscore_lst1 = self.get_tmscore(coords1, seq1, preddir, pred_range) # wrt pdb1
+ tmp_tm_fs = tmscore_lst1
+ print(tmp_tm_fs)
+ tmscores_fs.append(tmp_tm_fs)
+ # print(tmscore_lst1)
+
+ for subdir in all_sub_dir_paths:
+ preddir = Path(subdir)
+
+ # predicted dir doesn't exist then continue
+ if not preddir.exists():
+ pass
+
+ # only comparing on one set of predicted models
+ # but with both PDBs/Folds
+ coords2, seq2 = self.get_coords(pdbfile2, res_range2)
+ tmscore_lst2 = self.get_tmscore(coords2, seq2, preddir, pred_range) # wrt pdb2
+ tmp_tm_fs = tmscore_lst2
+ print(tmp_tm_fs)
+ tmscores_fs.append(tmp_tm_fs)
+ # print(tmscore_lst2)
+
+ # print(" ")
+ tmscores_fs = np.array(tmscores_fs)
+ self.tmscores_fs = tmscores_fs
+ print(" ")
+ print(tmscores_fs)
+
+ def __init__(self, pdb1, pdb1_name, pdb2, pdb2_name):
+ # get numpy arrays for coords at the fold-switching region
+ # also return the seq in 1-letter code for the same
+
+ # input arguments: sys.argv[1] - pdb1, sys.argv[2] - pdb2
+ # sys.argv[3] - preds, sys.argv[4] - current directory
+
+ current_dir = os.getcwd() + "/"
+ pred_dir = pdb1_name + "_predicted_models_full_*"
+ pred_path = current_dir + pred_dir + "/"
+ data_dir = Path(pred_path) # Path to the predicted models
+ print(data_dir)
+
+ # the range of the fold-switching region
+ range_file = current_dir + "range_fs_pairs_all.txt"
+
+ # convert this file into a dictionary for reference later
+ fs_res = {}
+
+ # The range_file file has the fold-switching residue ranges
+ # for the original PDB/PDB1, alternate PDB/PDB2
+ # Predicted model for PDB1, predicted model for PDB2
+ with open(range_file, "r") as Infile:
+ next(Infile) # skip header line "# pdb1,pdb2,pred1,pred2"
+ for line in Infile:
+ line = line.strip()
+ n1, n2, p1, p2, m1, m2 = line.split(",")
+ # the value of the dictionary is a tuple
+ # the first element of tuple is the fs range in the original PDB
+ # followed by the range in the predicted model
+ if n1 not in fs_res:
+ fs_res[n1] = (p1, m1)
+ if n2 not in fs_res:
+ fs_res[n2] = (p2, m2)
+
+ print("Running for pair ", pdb1_name, pdb2_name, end="..")
+ print(" ")
+ print("comparing predictions of ", pdb1_name, end="...")
+ print(" ")
+
+ try:
+ range_pdb1 = fs_res[
+ pdb1_name
+ ] # so if pdb1 is '1nqd_A', fs_res['1nqd_A']=('895-919', '1-33')
+ range_pdb2 = fs_res[
+ pdb2_name
+ ] # and if pdb2 is '1nqj_B', fs_res['1nqj_B']=('894-919', '1-33')
+ except:
+ print("check PDBIDs ", pdb1_name, pdb2_name)
+ sys.exit(1)
+
+ range_pred = range_pdb1[1]
+ self.run_for_models(pdb1, pdb2, data_dir, range_pred, range_pdb1[0], range_pdb2[0])
+
+
+# if __name__ == "__main__":
+#
+# import warnings
+# warnings.filterwarnings('ignore')
+#
+# parser = argparse.ArgumentParser()
+# parser.add_argument("--pdb1", type=str, help='PDB structure for the target crystal structure (target to be predicted)')
+# parser.add_argument("--pdb2", type=str, help='PDB structure for the alternative crystal structure')
+#
+# args = parser.parse_args()
+#
+# pdb1 = args.pdb1; pdb2 = args.pdb2
+# pdb1_name = pdb1.replace('.pdb',''); pdb2_name = pdb2.replace('.pdb','')
+#
+# TM_score_fs(pdb1, pdb1_name, pdb2, pdb2_name)
+#
diff --git a/cf_random/analysis/cal_tmscore_fs_multimer.py b/cf_random/analysis/cal_tmscore_fs_multimer.py
new file mode 100644
index 0000000..bbb2b70
--- /dev/null
+++ b/cf_random/analysis/cal_tmscore_fs_multimer.py
@@ -0,0 +1,261 @@
+#!/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Compare the predicted models with original PDBs
+report TM-scores for ranked 0 to 4
+input line is pdb1 pdb2 preds_of_pdb dirname
+
+This version requires tmtools 0.0.2 (Python bindings around the TM-align code for structural alignment of proteins)
+check this for local installation
+https://pypi.org/project/tmtools/
+
+Usage:
+
+python3.8 compare_strs_fs.py 2k42_A 1cee_B 1cee_B 0_msas_models/
+
+Created on Wed Feb 21 14:51:00 2024
+@author: Myeongsang (Samuel) Lee
+"""
+
+import glob
+import os
+import sys
+from pathlib import Path
+
+import numpy as np
+from Bio.PDB import PDBParser
+
+# call related modules of tmtools after installation
+from tmtools import tm_align
+
+pdbParser = PDBParser(QUIET=True)
+
+# convert three letter code to one letter code
+aa3to1 = {
+ "CYS": "C",
+ "ASP": "D",
+ "SER": "S",
+ "GLN": "Q",
+ "LYS": "K",
+ "ILE": "I",
+ "PRO": "P",
+ "THR": "T",
+ "PHE": "F",
+ "ASN": "N",
+ "GLY": "G",
+ "HIS": "H",
+ "LEU": "L",
+ "ARG": "R",
+ "TRP": "W",
+ "ALA": "A",
+ "VAL": "V",
+ "GLU": "E",
+ "TYR": "Y",
+ "MET": "M",
+}
+
+
+class TM_score_fs_multi:
+ def get_coords(self, pdbfile, fs_range):
+ """
+ parameters:
+ pdbfile - path to pdbfile
+ fs_range - range of residues at the fold-switching region, given as string - "112-162"
+ returns:
+ numpy array of coords
+ string of seqs in 1-letter-code
+ """
+
+ seq = ""
+ struct = pdbParser.get_structure("x", str(pdbfile))
+ coords = []
+ seq_dict = {}
+
+ # for residues within a certain range, using numpy to save the coords
+ # and save the sequence as a dict and then sorted list of tuples
+ # return the coords and the seq
+
+ # convert str to residue range for the fs region
+ start, stop = fs_range.split("-")
+ res_range = range(int(start), int(stop) + 1)
+
+ for atom in struct.get_atoms():
+ residue = atom.get_parent() # from atom we can get the parent residue
+ res_id = residue.get_id()[1]
+ resname = residue.get_resname()
+ if res_id in res_range and atom.get_name() == "CA":
+ x, y, z = atom.get_coord()
+ coords.append([x, y, z])
+ if res_id not in seq_dict:
+ seq_dict[res_id] = aa3to1[resname]
+
+ # print(coords)
+ # convert to np array
+ coords_np = np.array(coords)
+ # sort the seq_dict by keys a.k.a res_ids
+ sorted_data = sorted(seq_dict.items())
+ for i in sorted_data:
+ seq += i[1]
+
+ return coords_np, seq
+
+ def get_tmscore(self, coords1, seq1, predfilepath, res_range):
+ """
+ parameters:
+ coords1, seq1 - the numpy array of PDB coords and its seqs
+ predfilepath - path for predicted files
+ res_range - fs range in predicted models
+
+ returns:
+ tmscore list
+
+ """
+
+ tmscores = []
+ # modelfiles = sorted(glob.glob(str(predfilepath) + "/*_unrelaxed*pdb"))
+ modelfiles = glob.glob(str(predfilepath) + "/single*_unrelaxed*pdb")
+
+ if len(modelfiles) == 0:
+ tmscores = [0.0, 0.0, 0.0, 0.0, 0.0]
+ return tmscores
+
+ for model in modelfiles:
+ modelpath = Path(model)
+ coords2, seq2 = self.get_coords(modelpath, res_range)
+ res = tm_align(coords1, coords2, seq1, seq2)
+ tmscore = round(res.tm_norm_chain1, 2) # wrt to model
+ tmscores.append(tmscore)
+
+ return tmscores
+
+ # def run_for_models(self, FH, pdbfile1, pdbfile2, data_dir,pred_range,res_range1,res_range2):
+ def run_for_models(self, pdbfile1, pdbfile2, data_dir, pred_range, res_range1, res_range2):
+ """
+ compare the original PDB
+ with the predicted models, 0 to 5
+
+ parameters:
+ FH - filehandle for writing
+ pdbfile1 - path to original PDB, Fold1
+ pdbfile2 - path to alternate PDB, Fold2
+ data_dir - path for the predicted strs
+ res_range1 - fs range in PDB1 and its models
+ res_range2 - fs range in PDB2 and its models
+
+ returns:
+ nothing
+
+ saves the TM-scores in a local file
+ """
+ # print(res_range1,res_range2)
+
+ # get list of subdirectories
+ all_sub_dir_paths = glob.glob(str(data_dir))
+ tmscores_fs = []
+
+ ## files found then continue
+ if len(all_sub_dir_paths) == 0:
+ pass
+
+ for subdir in all_sub_dir_paths:
+ preddir = Path(subdir)
+ # predicted dir doesn't exist then continue
+ if not preddir.exists():
+ pass
+
+ # only comparing on one set of predicted models
+ # but with both PDBs/Folds
+ coords1, seq1 = self.get_coords(pdbfile1, res_range1)
+ tmscore_lst1 = self.get_tmscore(coords1, seq1, preddir, pred_range) # wrt pdb1
+ tmp_tm_fs = tmscore_lst1
+ tmscores_fs.append(tmp_tm_fs)
+
+ for subdir in all_sub_dir_paths:
+ preddir = Path(subdir)
+
+ # predicted dir doesn't exist then continue
+ if not preddir.exists():
+ pass
+
+ # only comparing on one set of predicted models
+ # but with both PDBs/Folds
+ coords2, seq2 = self.get_coords(pdbfile2, res_range2)
+ tmscore_lst2 = self.get_tmscore(coords2, seq2, preddir, pred_range) # wrt pdb2
+ tmp_tm_fs = tmscore_lst2
+ tmscores_fs.append(tmp_tm_fs)
+
+ print(" ")
+ tmscores_fs = np.array(tmscores_fs)
+ print("tmscores_fs")
+ self.tmscores_fs = tmscores_fs
+
+ def __init__(self, pred_path, pdb1, pdb1_name, pdb2, pdb2_name):
+ # get numpy arrays for coords at the fold-switching region
+ # also return the seq in 1-letter code for the same
+
+ # input arguments: sys.argv[1] - pdb1, sys.argv[2] - pdb2
+ # sys.argv[3] - preds, sys.argv[4] - current directory
+
+ current_dir = os.getcwd() + "/"
+ # pred_dir = 'additional_sampling/' + pdb1_name
+ # pred_path = current_dir + pred_dir + '/'
+ data_dir = Path(pred_path) # Path to the predicted models
+
+ # the range of the fold-switching region
+ range_file = current_dir + "range_fs_pairs_all.txt"
+
+ # convert this file into a dictionary for reference later
+ fs_res = {}
+
+ # The range_file file has the fold-switching residue ranges
+ # for the original PDB/PDB1, alternate PDB/PDB2
+ # Predicted model for PDB1, predicted model for PDB2
+ with open(range_file, "r") as Infile:
+ next(Infile) # skip header line "# pdb1,pdb2,pred1,pred2"
+ for line in Infile:
+ line = line.strip()
+ n1, n2, p1, p2, m1, m2 = line.split(",")
+ # the value of the dictionary is a tuple
+ # the first element of tuple is the fs range in the original PDB
+ # followed by the range in the predicted model
+ if n1 not in fs_res:
+ fs_res[n1] = (p1, m1)
+ if n2 not in fs_res:
+ fs_res[n2] = (p2, m2)
+
+ print("Running for pair ", pdb1_name, pdb2_name, end="..")
+ print(" ")
+ print("comparing predictions of ", pdb1_name, end="...")
+ print(" ")
+
+ try:
+ range_pdb1 = fs_res[
+ pdb1_name
+ ] # so if pdb1 is '1nqd_A', fs_res['1nqd_A']=('895-919', '1-33')
+ range_pdb2 = fs_res[
+ pdb2_name
+ ] # and if pdb2 is '1nqj_B', fs_res['1nqj_B']=('894-919', '1-33')
+ except:
+ print("check PDBIDs ", pdb1_name, pdb2_name)
+ sys.exit(1)
+
+ range_pred = range_pdb1[1]
+ self.run_for_models(pdb1, pdb2, data_dir, range_pred, range_pdb1[0], range_pdb2[0])
+
+
+# if __name__ == "__main__":
+#
+# import warnings
+# warnings.filterwarnings('ignore')
+#
+# parser = argparse.ArgumentParser()
+# parser.add_argument("--pdb1", type=str, help='PDB structure for the target crystal structure (target to be predicted)')
+# parser.add_argument("--pdb2", type=str, help='PDB structure for the alternative crystal structure')
+#
+# args = parser.parse_args()
+#
+# pdb1 = args.pdb1; pdb2 = args.pdb2
+# pdb1_name = pdb1.replace('.pdb',''); pdb2_name = pdb2.replace('.pdb','')
+#
+# TM_score_fs(pdb1, pdb1_name, pdb2, pdb2_name)
+#
diff --git a/cf_random/analysis/cal_tmscore_fs_only.py b/cf_random/analysis/cal_tmscore_fs_only.py
new file mode 100644
index 0000000..d7b1fb4
--- /dev/null
+++ b/cf_random/analysis/cal_tmscore_fs_only.py
@@ -0,0 +1,273 @@
+#!/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Compare the predicted models with original PDBs
+report TM-scores for ranked 0 to 4
+input line is pdb1 pdb2 preds_of_pdb dirname
+
+This version requires tmtools 0.0.2 (Python bindings around the TM-align code for structural alignment of proteins)
+check this for local installation
+https://pypi.org/project/tmtools/
+
+Usage:
+
+python3.8 compare_strs_fs.py 2k42_A 1cee_B 1cee_B 0_msas_models/
+
+Created on Wed Feb 21 14:51:00 2024
+@author: Myeongsang (Samuel) Lee
+"""
+
+import glob
+import os
+import sys
+from pathlib import Path
+
+import numpy as np
+from Bio.PDB import PDBParser
+
+# call related modules of tmtools after installation
+from tmtools import tm_align
+
+pdbParser = PDBParser(QUIET=True)
+
+# convert three letter code to one letter code
+aa3to1 = {
+ "CYS": "C",
+ "ASP": "D",
+ "SER": "S",
+ "GLN": "Q",
+ "LYS": "K",
+ "ILE": "I",
+ "PRO": "P",
+ "THR": "T",
+ "PHE": "F",
+ "ASN": "N",
+ "GLY": "G",
+ "HIS": "H",
+ "LEU": "L",
+ "ARG": "R",
+ "TRP": "W",
+ "ALA": "A",
+ "VAL": "V",
+ "GLU": "E",
+ "TYR": "Y",
+ "MET": "M",
+}
+
+
+class TM_score_fs:
+ def get_coords(self, pdbfile, fs_range):
+ """
+ parameters:
+ pdbfile - path to pdbfile
+ fs_range - range of residues at the fold-switching region, given as string - "112-162"
+ returns:
+ numpy array of coords
+ string of seqs in 1-letter-code
+ """
+
+ seq = ""
+ struct = pdbParser.get_structure("x", str(pdbfile))
+ coords = []
+ seq_dict = {}
+
+ # for residues within a certain range, using numpy to save the coords
+ # and save the sequence as a dict and then sorted list of tuples
+ # return the coords and the seq
+
+ # convert str to residue range for the fs region
+ start, stop = fs_range.split("-")
+ res_range = range(int(start), int(stop) + 1)
+
+ for atom in struct.get_atoms():
+ residue = atom.get_parent() # from atom we can get the parent residue
+ res_id = residue.get_id()[1]
+ resname = residue.get_resname()
+ if res_id in res_range and atom.get_name() == "CA":
+ x, y, z = atom.get_coord()
+ coords.append([x, y, z])
+ if res_id not in seq_dict:
+ seq_dict[res_id] = aa3to1[resname]
+
+ # print(coords)
+ # convert to np array
+ coords_np = np.array(coords)
+ # sort the seq_dict by keys a.k.a res_ids
+ sorted_data = sorted(seq_dict.items())
+ for i in sorted_data:
+ seq += i[1]
+
+ return coords_np, seq
+
+ def get_tmscore(self, coords1, seq1, predfilepath, res_range):
+ """
+ parameters:
+ coords1, seq1 - the numpy array of PDB coords and its seqs
+ predfilepath - path for predicted files
+ res_range - fs range in predicted models
+
+ returns:
+ tmscore list
+
+ """
+
+ tmscores = []
+ tmscores_ord = []
+ tmscores_rev = []
+ # modelfiles = sorted(glob.glob(str(predfilepath) + "/*_unrelaxed*pdb"))
+ modelfiles = glob.glob(str(predfilepath) + "/*_unrelaxed*pdb")
+
+ if len(modelfiles) == 0:
+ tmscores = [0.0, 0.0, 0.0, 0.0, 0.0]
+ return tmscores
+
+ for model in modelfiles:
+ modelpath = Path(model)
+ coords2, seq2 = self.get_coords(modelpath, res_range)
+ res = tm_align(coords1, coords2, seq1, seq2)
+ tmscore = round(res.tm_norm_chain1, 2) # wrt to model
+ tmscores_ord.append(tmscore)
+
+ res = tm_align(coords2, coords1, seq2, seq1)
+ tmscore = round(res.tm_norm_chain1, 5) # wrt to model
+ tmscores_rev.append(tmscore)
+
+ if np.max(tmscores_ord) > np.max(tmscores_rev):
+ tmscores = tmscores_ord
+ else:
+ tmscores = tmscores_rev
+
+ return tmscores
+
+ # def run_for_models(self, FH, pdbfile1, pdbfile2, data_dir,pred_range,res_range1,res_range2):
+ def run_for_models(self, pdbfile1, pdbfile2, data_dir, pred_range, res_range1, res_range2):
+ """
+ compare the original PDB
+ with the predicted models, 0 to 5
+
+ parameters:
+ FH - filehandle for writing
+ pdbfile1 - path to original PDB, Fold1
+ pdbfile2 - path to alternate PDB, Fold2
+ data_dir - path for the predicted strs
+ res_range1 - fs range in PDB1 and its models
+ res_range2 - fs range in PDB2 and its models
+
+ returns:
+ nothing
+
+ saves the TM-scores in a local file
+ """
+ # print(res_range1,res_range2)
+
+ # get list of subdirectories
+ all_sub_dir_paths = glob.glob(str(data_dir)) # returns list of sub directory paths
+ tmscores_fs = []
+
+ # files found then continue
+ if len(all_sub_dir_paths) == 0:
+ pass
+
+ for subdir in all_sub_dir_paths:
+ preddir = Path(subdir)
+ # predicted dir doesn't exist then continue
+ if not preddir.exists():
+ pass
+
+ # only comparing on one set of predicted models
+ # but with both PDBs/Folds
+ coords1, seq1 = self.get_coords(pdbfile1, res_range1)
+ tmscore_lst1 = self.get_tmscore(coords1, seq1, preddir, pred_range) # wrt pdb1
+ tmp_tm_fs = tmscore_lst1
+ tmscores_fs.append(tmp_tm_fs)
+
+ for subdir in all_sub_dir_paths:
+ preddir = Path(subdir)
+
+ # predicted dir doesn't exist then continue
+ if not preddir.exists():
+ pass
+
+ # only comparing on one set of predicted models
+ # but with both PDBs/Folds
+ coords2, seq2 = self.get_coords(pdbfile2, res_range2)
+ tmscore_lst2 = self.get_tmscore(coords2, seq2, preddir, pred_range) # wrt pdb2
+ tmp_tm_fs = tmscore_lst2
+ tmscores_fs.append(tmp_tm_fs)
+
+ print(" ")
+ tmscores_fs = np.array(tmscores_fs)
+ print("tmscores_fs")
+ self.tmscores_fs = tmscores_fs
+
+ def __init__(self, pred_path, pdb1, pdb1_name, pdb2, pdb2_name):
+ # get numpy arrays for coords at the fold-switching region
+ # also return the seq in 1-letter code for the same
+
+ # input arguments: sys.argv[1] - pdb1, sys.argv[2] - pdb2
+ # sys.argv[3] - preds, sys.argv[4] - current directory
+
+ current_dir = os.getcwd() + "/"
+ # pred_dir = 'additional_sampling/' + pdb1_name
+ # pred_path = current_dir + pred_dir + '/'
+ # print(pred_path)
+ data_dir = Path(pred_path) # Path to the predicted models
+
+ # the range of the fold-switching region
+ range_file = current_dir + "range_fs_pairs_all.txt"
+
+ # convert this file into a dictionary for reference later
+ fs_res = {}
+
+ # The range_file file has the fold-switching residue ranges
+ # for the original PDB/PDB1, alternate PDB/PDB2
+ # Predicted model for PDB1, predicted model for PDB2
+ with open(range_file, "r") as Infile:
+ next(Infile) # skip header line "# pdb1,pdb2,pred1,pred2"
+ for line in Infile:
+ line = line.strip()
+ n1, n2, p1, p2, m1, m2 = line.split(",")
+ # the value of the dictionary is a tuple
+ # the first element of tuple is the fs range in the original PDB
+ # followed by the range in the predicted model
+ if n1 not in fs_res:
+ fs_res[n1] = (p1, m1)
+ if n2 not in fs_res:
+ fs_res[n2] = (p2, m2)
+
+ print("Running for pair ", pdb1_name, pdb2_name, end="..")
+ print(" ")
+ print("comparing predictions of ", pdb1_name, end="...")
+ print(" ")
+
+ try:
+ range_pdb1 = fs_res[
+ pdb1_name
+ ] # so if pdb1 is '1nqd_A', fs_res['1nqd_A']=('895-919', '1-33')
+ range_pdb2 = fs_res[
+ pdb2_name
+ ] # and if pdb2 is '1nqj_B', fs_res['1nqj_B']=('894-919', '1-33')
+ except:
+ print("check PDBIDs ", pdb1_name, pdb2_name)
+ sys.exit(1)
+
+ range_pred = range_pdb1[1]
+ self.run_for_models(pdb1, pdb2, data_dir, range_pred, range_pdb1[0], range_pdb2[0])
+
+
+# if __name__ == "__main__":
+#
+# import warnings
+# warnings.filterwarnings('ignore')
+#
+# parser = argparse.ArgumentParser()
+# parser.add_argument("--pdb1", type=str, help='PDB structure for the target crystal structure (target to be predicted)')
+# parser.add_argument("--pdb2", type=str, help='PDB structure for the alternative crystal structure')
+#
+# args = parser.parse_args()
+#
+# pdb1 = args.pdb1; pdb2 = args.pdb2
+# pdb1_name = pdb1.replace('.pdb',''); pdb2_name = pdb2.replace('.pdb','')
+#
+# TM_score_fs(pdb1, pdb1_name, pdb2, pdb2_name)
+#
diff --git a/cf_random/cli.py b/cf_random/cli.py
new file mode 100644
index 0000000..0e07aac
--- /dev/null
+++ b/cf_random/cli.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Command-line interface for CF-random
+"""
+
+from .core.main import main
+
+if __name__ == "__main__":
+ main()
diff --git a/cf_random/core/__init__.py b/cf_random/core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/code/main.py b/cf_random/core/main.py
similarity index 51%
rename from code/main.py
rename to cf_random/core/main.py
index d367bf9..ca4fc18 100644
--- a/code/main.py
+++ b/cf_random/core/main.py
@@ -1,66 +1,72 @@
-#!/upyMolsr/bin/env python3
+#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 9 14:51:00 2024
-
+
@author: Myeongsang (Samuel) Lee
"""
-import re
-import Bio
+import argparse
+import glob
import os
-from os import listdir
-from os.path import isfile, join
import sys
-from pathlib import Path
+import warnings
+
import numpy as np
-from numpy import genfromtxt
-import matplotlib.pyplot as plt
-import glob
-import argparse
+warnings.filterwarnings("ignore")
-from pred_cal_tmscore_FS import *
-from pred_cal_tmscore_blind import *
-from pred_cal_tmscore_AC import *
-from cal_plddt_ACFS import *
-from PLOT_AC import *
-from PLOT_FS import *
-from search_w_foldseek_cluster import *
+from ..analysis.cal_plddt_ACFS import plddt_cal
+from ..plotting.plot_ac import plot_2D_scatter_AC
+from ..prediction.pred_cal_tmscore_ac import prediction_all_AC
+from ..prediction.pred_cal_tmscore_blind import prediction_all_blind
+from ..prediction.pred_cal_tmscore_fs import prediction_all
+from ..utils.search_foldseek_cluster import blind_screening
+from ..plotting.plot_fc import plot_2D_scatter
-if __name__ == "__main__":
-
- import warnings
- warnings.filterwarnings('ignore')
+def main():
######################################################################################################
###### initiallization pdb format (removing HETATM)
- #os.system("for i in *pdb;do echo $i;sed -i '/HETATM/d' $i;done")
-
-
+ # os.system("for i in *pdb;do echo $i;sed -i '/HETATM/d' $i;done")
######################################################################################################
###### initiallization and input
parser = argparse.ArgumentParser()
- parser.add_argument("--pdb1", type=str, help='PDB structure for the target crystal structure (target to be predicted)')
- parser.add_argument("--pdb2", type=str, help='PDB structure for the alternative crystal structure')
- parser.add_argument("--fname", type=str, help='put MSA folder name after colabsearch' )
- parser.add_argument("--fmname", type=str, help='put multimer MSA folder name after colabsearch' )
- parser.add_argument("--pname", type=str, help='job name for predicting blind mode' )
- parser.add_argument("--nMSA", type=str, help='number of samples for predicting the structure with MSA')
- parser.add_argument("--nENS", type=str, help='number of samples for predicting the structure for ensemble generation')
- parser.add_argument("--option", type=str, help='select prediction mode inAC, AC and FS e.g. AC = alterantive conformation or FS = fold-switching or inAC = increased sampling for predicting alternative conformation')
- parser.add_argument("--type", type=str, help='select model-type of Colabfold e.g. ptm, monomer, and , multimer')
+ parser.add_argument(
+ "--pdb1",
+ type=str,
+ help="PDB structure for the target crystal structure (target to be predicted)",
+ )
+ parser.add_argument(
+ "--pdb2", type=str, help="PDB structure for the alternative crystal structure"
+ )
+ parser.add_argument("--fname", type=str, help="put MSA folder name after colabsearch")
+ parser.add_argument("--fmname", type=str, help="put multimer MSA folder name after colabsearch")
+ parser.add_argument("--pname", type=str, help="job name for predicting blind mode")
+ parser.add_argument(
+ "--nMSA", type=str, help="number of samples for predicting the structure with MSA"
+ )
+ parser.add_argument(
+ "--nENS",
+ type=str,
+ help="number of samples for predicting the structure for ensemble generation",
+ )
+ parser.add_argument(
+ "--option",
+ type=str,
+ help="select prediction mode inAC, AC and FS e.g. AC = alterantive conformation or FS = fold-switching or inAC = increased sampling for predicting alternative conformation",
+ )
+ parser.add_argument(
+ "--type", type=str, help="select model-type of Colabfold e.g. ptm, monomer, and , multimer"
+ )
args = parser.parse_args()
-
-
- blind = 'blind_prediction'
- success = 'successed_prediction'
- fail = 'failed_prediction'
- multi = 'multimer_prediction'
- pwd = os.getcwd() + '/'
-
+ blind = "blind_prediction"
+ success = "successed_prediction"
+ fail = "failed_prediction"
+ multi = "multimer_prediction"
+ pwd = os.getcwd() + "/"
if args.option == "blind":
if args.pdb1 is None:
@@ -68,34 +74,38 @@
print("work name:", pdb1_name)
elif args.pdb1 is None and args.pname is None:
pdb1_name = args.fname
- pdb1_name = pdb1_name.replace('/','')
+ pdb1_name = pdb1_name.replace("/", "")
print("work name:", pdb1_name)
else:
pdb1_name = args.fname
- pdb1_name = pdb1_name.replace('/','')
+ pdb1_name = pdb1_name.replace("/", "")
print("work name:", pdb1_name)
elif args.pdb1 is None:
pdb1_name = args.fname
elif args.pdb1 is not None and args.pdb2 is not None:
- pdb1 = args.pdb1; pdb2 = args.pdb2
- pdb1_name = pdb1.replace('.pdb',''); pdb2_name = pdb2.replace('.pdb','')
+ pdb1 = args.pdb1
+ pdb2 = args.pdb2
+ pdb1_name = pdb1.replace(".pdb", "")
+ pdb2_name = pdb2.replace(".pdb", "")
print(pdb1_name, pdb2_name)
-
- #if int(args.nMSA) == 0 and int(args.nENS) == 0:
- if args.nMSA is None and args.nENS is None:
- nMSA = 0; nENS = 0;
- elif args.nMSA is not None and args.nENS is not None:
- nMSA = int(args.nMSA); nENS = int(args.nENS)
- elif args.nMSA is None and args.nENS is not None:
- nMSA = 0; nENS = int(args.nENS)
+ # if int(args.nMSA) == 0 and int(args.nENS) == 0:
+ if args.nMSA is None and args.nENS is None:
+ nMSA = 0
+ nENS = 0
+ elif args.nMSA is not None and args.nENS is not None:
+ nMSA = int(args.nMSA)
+ nENS = int(args.nENS)
+ elif args.nMSA is None and args.nENS is not None:
+ nMSA = 0
+ nENS = int(args.nENS)
elif args.nMSA is not None and args.nENS is None:
- nMSA = int(args.nMSA); nENS = 0
+ nMSA = int(args.nMSA)
+ nENS = 0
else:
print("Please put correct option of nMSA or nENS")
exit()
-
if args.fname is None and args.fmname is None:
print("Please put MSA folder and file for monomer prediction")
sys.exit()
@@ -103,12 +113,11 @@
print("Please put MSA folder and file for monomer prediction")
sys.exit()
elif args.fname is not None and args.fmname is None:
- search_dir = ' ' + pwd + args.fname; search_multi_dir = 0
+ search_dir = " " + pwd + args.fname
+ search_multi_dir = 0
elif args.fname is not None and args.fmname is not None:
- search_dir = ' ' + pwd + args.fname; search_multi_dir = ' ' + pwd + args.fmname;
-
-
-
+ search_dir = " " + pwd + args.fname
+ search_multi_dir = " " + pwd + args.fmname
### model-type identification
model_type = []
@@ -119,11 +128,11 @@
elif args.type == "multimer" and args.option == "blind":
model_type = "alphafold2_multimer_v3"
if not os.path.exists(multi):
- os.mkdir(multi)
+ os.mkdir(multi)
elif args.type == "multimer":
### check how many chains in a multimer
- TER_count = 0
- with open(pdb1, 'r') as file:
+ TER_count = 0
+ with open(pdb1, "r") as file:
for line in file:
TER = line.split()
TER_count += TER.count("TER")
@@ -138,115 +147,93 @@
print("Please put correct model-type option")
exit()
+ pwd = os.getcwd() + "/"
+ search_dir = " " + pwd + args.fname
+ # Create main output directories
+ for directory in [fail, success, multi]:
+ if not os.path.exists(directory):
+ os.mkdir(directory)
- pwd = os.getcwd() + '/'
- search_dir = ' ' + pwd + args.fname
-
-
-
-
-
- if not os.path.exists(fail):
- os.mkdir(fail)
- else:
- fail_dir_count = 0
- for root_dir, cur_dir, files in os.walk(pwd + fail + '/' + pdb1_name + '/'):
- fail_dir_count += len(cur_dir)
-
- if os.path.exists(fail + '/' + pdb1_name):
- if fail_dir_count >= 8:
- print("Prediction was already done")
- else:
- print("Folder is already created and cleaning existed subfolders")
- rm_pre_folders = 'rm -rf ' + fail + '/' + pdb1_name + '/'
- os.system(rm_pre_folders)
- else:
- pass
-
-
-
- if not os.path.exists(multi):
- os.mkdir(multi)
- else:
- succ_dir_count = 0
- for root_dir, cur_dir, files in os.walk(pwd + multi + '/' + pdb1_name + '/'):
- succ_dir_count += len(cur_dir)
-
- if os.path.exists(multi + '/' + pdb1_name):
- if succ_dir_count >= 8:
- print("Prediction was already done")
- else:
- print("Folder is already created and cleaning existed subfolders")
- rm_pre_folders = 'rm -rf ' + multi + '/' + pdb1_name + '/'
- os.system(rm_pre_folders)
- else:
- pass
-
+ # Ensure subdirectories exist for this prediction
+ fail_pdb_dir = fail + "/" + pdb1_name
+ success_pdb_dir = success + "/" + pdb1_name
+ multi_pdb_dir = multi + "/" + pdb1_name
+ for pdb_dir in [fail_pdb_dir, success_pdb_dir, multi_pdb_dir]:
+ if not os.path.exists(pdb_dir):
+ os.makedirs(pdb_dir, exist_ok=True)
+ # Count existing subdirectories
+ fail_dir_count = 0
+ for root_dir, cur_dir, files in os.walk(pwd + fail_pdb_dir + "/"):
+ fail_dir_count += len(cur_dir)
+ succ_dir_count = 0
+ for root_dir, cur_dir, files in os.walk(pwd + success_pdb_dir + "/"):
+ succ_dir_count += len(cur_dir)
+ # Clean up incomplete predictions if needed
+ if fail_dir_count > 0 and fail_dir_count < 8:
+ print("Folder is already created and cleaning existed subfolders")
+ os.system(f"rm -rf {fail_pdb_dir}/")
+ os.makedirs(fail_pdb_dir, exist_ok=True)
if args.option == "AC":
print("Predicting alternative conformations")
######################################################################################################
###### running prediction using full- and shallow random-MSA
- if not os.path.exists(success):
- os.mkdir(success)
- else:
+ # Directories already created above, just check status
+ if succ_dir_count >= 8:
+ print("Prediction was already done")
+ elif succ_dir_count > 0:
+ print("Folder is already created and cleaning existed subfolders")
+ os.system(f"rm -rf {success_pdb_dir}/")
+ os.makedirs(success_pdb_dir, exist_ok=True)
succ_dir_count = 0
- for root_dir, cur_dir, files in os.walk(pwd + success + '/' + pdb1_name + '/'):
- succ_dir_count += len(cur_dir)
-
- if os.path.exists(success + '/' + pdb1_name):
- if succ_dir_count >= 8:
- print("Prediction was already done")
- else:
- print("Folder is already created and cleaning existed subfolders")
- rm_pre_folders = 'rm -rf ' + success + '/' + pdb1_name + '/'
- os.system(rm_pre_folders)
- else:
- pass
-
-
-
-
- if os.path.exists(success + '/' + pdb1_name) and succ_dir_count >= 8:
- print("Predictions including full- and random-MSA were already done")
- elif os.path.exists(multi + '/' + pdb1_name) and succ_dir_count >= 8:
+ if succ_dir_count >= 8:
print("Predictions including full- and random-MSA were already done")
else:
-
- rm_pre_folders = 'rm -rf ' + success + '/' + pdb1_name + '/'; os.system(rm_pre_folders)
- rm_pre_folders = 'rm -rf ' + multi + '/' + pdb1_name + '/' ; os.system(rm_pre_folders)
-
-
- pred_1st_all = prediction_all_AC(pdb1, pdb1_name, pdb2, pdb2_name, search_dir, nMSA, model_type, search_multi_dir)
+ # Clean and prepare directories
+ os.system(f"rm -rf {success_pdb_dir}/")
+ os.system(f"rm -rf {multi_pdb_dir}/")
+ os.makedirs(success_pdb_dir, exist_ok=True)
+ os.makedirs(multi_pdb_dir, exist_ok=True)
+
+ pred_1st_all = prediction_all_AC(
+ pdb1, pdb1_name, pdb2, pdb2_name, search_dir, nMSA, model_type, search_multi_dir
+ )
shallow_MSA_size = []
shallow_MSA_size = np.append(shallow_MSA_size, pred_1st_all.size_selection)
print(" ")
print("Specific size of shallow random MSA is similar to full-MSA")
print(shallow_MSA_size)
- np.savetxt('selected_MSA-size_' + pdb1_name + '.csv', shallow_MSA_size)
-
+ np.savetxt("selected_MSA-size_" + pdb1_name + ".csv", shallow_MSA_size)
######################################################################################################
##### calculate plddt of initial predictions
if model_type == "alphafold2_multimer_v3":
- list_org_samplings = glob.glob( str(pwd) + str(multi) + '/' + str(pdb1_name) + '/*full_rand*/')
- list_ran_samplings = glob.glob( str(pwd) + str(multi) + '/' + str(pdb1_name) + '/*max*/')
+ list_org_samplings = glob.glob(
+ str(pwd) + str(multi) + "/" + str(pdb1_name) + "/*full_rand*/"
+ )
+ list_ran_samplings = glob.glob(str(pwd) + str(multi) + "/" + str(pdb1_name) + "/*max*/")
- full = 'full-MSA'; random = 'random-MSA' ;
+ full = "full-MSA"
+ random = "random-MSA"
plddt_cal(list_org_samplings, full, pdb1_name, nMSA, nENS, model_type)
plddt_cal(list_ran_samplings, random, pdb1_name, nMSA, nENS, model_type)
else:
- list_org_samplings = glob.glob( str(pwd) + str(success) + '/' + str(pdb1_name) + '/*full_rand*/')
- list_ran_samplings = glob.glob( str(pwd) + str(success) + '/' + str(pdb1_name) + '/*max*/')
-
- full = 'full-MSA'; random = 'random-MSA' ;
+ list_org_samplings = glob.glob(
+ str(pwd) + str(success) + "/" + str(pdb1_name) + "/*full_rand*/"
+ )
+ list_ran_samplings = glob.glob(
+ str(pwd) + str(success) + "/" + str(pdb1_name) + "/*max*/"
+ )
+
+ full = "full-MSA"
+ random = "random-MSA"
plddt_cal(list_org_samplings, full, pdb1_name, nMSA, nENS, model_type)
plddt_cal(list_ran_samplings, random, pdb1_name, nMSA, nENS, model_type)
@@ -254,39 +241,35 @@
##### plot the 2D-scatter plot of TM-scores with pLDDT
plot_2D_scatter_AC(full, random, pdb1, pdb1_name, pdb2, pdb2_name, nMSA, nENS, model_type)
-
-
-
-
-
elif args.option == "FS":
if not os.path.exists(success):
os.mkdir(success)
else:
succ_dir_count = 0
- for root_dir, cur_dir, files in os.walk(pwd + success + '/' + pdb1_name + '/'):
+ for root_dir, cur_dir, files in os.walk(pwd + success + "/" + pdb1_name + "/"):
succ_dir_count += len(cur_dir)
- if os.path.exists(success + '/' + pdb1_name):
+ if os.path.exists(success + "/" + pdb1_name):
if succ_dir_count >= 8:
print("Prediction was already done")
else:
print("Folder is already created and cleaning existed subfolders")
- rm_pre_folders = 'rm -rf ' + success + '/' + pdb1_name + '/'
+ rm_pre_folders = "rm -rf " + success + "/" + pdb1_name + "/"
os.system(rm_pre_folders)
else:
pass
-
print("Predicting fold-swithcing models")
######################################################################################################
###### running prediction using full- and shallow random-MSA
- if os.path.exists(success + '/' + pdb1_name) and succ_dir_count >= 8:
+ if os.path.exists(success + "/" + pdb1_name) and succ_dir_count >= 8:
print("Predictions including full- and random-MSA were already done")
- elif os.path.exists(multi + '/' + pdb1_name) and succ_dir_count >= 8:
+ elif os.path.exists(multi + "/" + pdb1_name) and succ_dir_count >= 8:
print("Predictions including full- and random-MSA were already done")
else:
- pred_1st_all = prediction_all(pdb1, pdb1_name, pdb2, pdb2_name, search_dir, nMSA, model_type, search_multi_dir)
+ pred_1st_all = prediction_all(
+ pdb1, pdb1_name, pdb2, pdb2_name, search_dir, nMSA, model_type, search_multi_dir
+ )
shallow_MSA_size = []
if args.type != "multimer":
shallow_MSA_size = np.append(shallow_MSA_size, pred_1st_all.size_selection)
@@ -295,107 +278,107 @@
print(" ")
print("Specific size of shallow random MSA is similar to full-MSA")
print(shallow_MSA_size)
- np.savetxt('selected_MSA-size_' + pdb1_name + '.csv', shallow_MSA_size)
-
-
-
+ np.savetxt("selected_MSA-size_" + pdb1_name + ".csv", shallow_MSA_size)
+
######################################################################################################
##### calculate plddt of initial predictions
if model_type == "alphafold2_multimer_v3":
- list_org_samplings = glob.glob( str(pwd) + str(multi) + '/' + str(pdb1_name) + '/*full_rand*/')
- list_ran_samplings = glob.glob( str(pwd) + str(multi) + '/' + str(pdb1_name) + '/*max*/')
+ list_org_samplings = glob.glob(
+ str(pwd) + str(multi) + "/" + str(pdb1_name) + "/*full_rand*/"
+ )
+ list_ran_samplings = glob.glob(str(pwd) + str(multi) + "/" + str(pdb1_name) + "/*max*/")
- full = 'full-MSA'; random = 'random-MSA' ;
+ full = "full-MSA"
+ random = "random-MSA"
plddt_cal(list_org_samplings, full, pdb1_name, nMSA, nENS, model_type)
plddt_cal(list_ran_samplings, random, pdb1_name, nMSA, nENS, model_type)
else:
- list_org_samplings = glob.glob( str(pwd) + str(success) + '/' + str(pdb1_name) + '/*full_rand*/')
- list_ran_samplings = glob.glob( str(pwd) + str(success) + '/' + str(pdb1_name) + '/*max*/')
-
- full = 'full-MSA'; random = 'random-MSA' ;
+ list_org_samplings = glob.glob(
+ str(pwd) + str(success) + "/" + str(pdb1_name) + "/*full_rand*/"
+ )
+ list_ran_samplings = glob.glob(
+ str(pwd) + str(success) + "/" + str(pdb1_name) + "/*max*/"
+ )
+
+ full = "full-MSA"
+ random = "random-MSA"
plddt_cal(list_org_samplings, full, pdb1_name, nMSA, nENS, model_type)
plddt_cal(list_ran_samplings, random, pdb1_name, nMSA, nENS, model_type)
-
-
-
-
+
######################################################################################################
##### plot the 2D-scatter plot of TM-scores with pLDDT
if model_type == "alphafold2_multimer_v3":
- plot_2D_scatter_AC(full, random, pdb1, pdb1_name, pdb2, pdb2_name, nMSA, nENS, model_type)
+ plot_2D_scatter_AC(
+ full, random, pdb1, pdb1_name, pdb2, pdb2_name, nMSA, nENS, model_type
+ )
else:
plot_2D_scatter(full, random, pdb1, pdb1_name, pdb2, pdb2_name, nMSA, nENS)
-
-
-
-
-
elif args.option == "blind":
print("Predicting fold-swithcing proteins without crystal structures of pdbs")
######################################################################################################
###### check previous predictions were performed or not
if not os.path.exists(blind):
os.mkdir(blind)
- else:
- blind_dir_count = 0
- for root_dir, cur_dir, files in os.walk(pwd + blind + '/' + pdb1_name + '/'):
- blind_dir_count += len(cur_dir)
-
- if os.path.exists(blind + '/' + pdb1_name):
+
+ # Create the subdirectory for this prediction
+ blind_pdb_dir = blind + "/" + pdb1_name
+ if not os.path.exists(blind_pdb_dir):
+ os.makedirs(blind_pdb_dir, exist_ok=True)
+
+ blind_dir_count = 0
+ for root_dir, cur_dir, files in os.walk(pwd + blind_pdb_dir + "/"):
+ blind_dir_count += len(cur_dir)
+
+ if os.path.exists(blind_pdb_dir):
if blind_dir_count >= 8:
print("Prediction was already done")
else:
print("Folder is already created and cleaning existed subfolders")
- rm_pre_folders = 'rm -rf ' + blind + '/' + pdb1_name + '/'
+ rm_pre_folders = "rm -rf " + blind_pdb_dir + "/"
os.system(rm_pre_folders)
- else:
- pass
-
-
+ os.makedirs(blind_pdb_dir, exist_ok=True)
###### running prediction using full- and shallow random-MSA
- blind_pred_path = 'blind_prediction/' + pdb1_name
+ blind_pred_path = "blind_prediction/" + pdb1_name
print(blind_pred_path)
- if os.path.exists(blind + '/' + pdb1_name) and blind_dir_count >= 8:
+ if os.path.exists(blind + "/" + pdb1_name) and blind_dir_count >= 8:
print("Predictions including full- and random-MSA were already done")
-
fseek_file_count = 0
- for root_dir, cur_dir, files in os.walk(pwd + blind + '/' + pdb1_name + '/'):
+ for root_dir, cur_dir, files in os.walk(pwd + blind + "/" + pdb1_name + "/"):
fseek_file_count += len(files)
print(fseek_file_count)
- #if fseek_file_count == 856: ##(107 * 8) 107 includes foldseek file and 8 means the numbers of prediction folders
- if fseek_file_count >= 640: ##672
- print(" "); print("Foldseek search was done")
+ # if fseek_file_count == 856: ##(107 * 8) 107 includes foldseek file and 8 means the numbers of prediction folders
+ if fseek_file_count >= 640: ##672
+ print(" ")
+ print("Foldseek search was done")
#### performing the PCA calculation with RMSD
blind_screening(pdb1_name, blind_pred_path)
else:
- #running_foldseek_all(pdb1_name)
+ # running_foldseek_all(pdb1_name)
#### performing the PCA calculation with RMSD
blind_screening(pdb1_name, blind_pred_path)
-
-
else:
prediction_all_blind(pdb1_name, search_dir, nMSA, model_type)
print(" ")
print("Finished running for prediction using full- and shallow random-MSAs")
-
+
print(" ")
print("Running Foldseek to find the relatedcrystal structures")
- #running_foldseek_all(pdb1_name)
+ # running_foldseek_all(pdb1_name)
#### performing the PCA calculation with RMSD
blind_screening(pdb1_name, blind_pred_path)
-
-
-
-
else:
print("Please type correct option")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/cf_random/plotting/__init__.py b/cf_random/plotting/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/cf_random/plotting/plot_ac.py b/cf_random/plotting/plot_ac.py
new file mode 100644
index 0000000..20891e2
--- /dev/null
+++ b/cf_random/plotting/plot_ac.py
@@ -0,0 +1,161 @@
+#!/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Feb 22 13:40:00 2024
+
+@author: Myeongsang (Samuel) Lee
+"""
+
+import os
+
+import numpy as np
+from matplotlib import pyplot as plt
+from numpy import genfromtxt
+
+
+class plot_2D_scatter_AC:
+ def __init__(
+ self, full_cate, random_cate, pdb1, pdb1_name, pdb2, pdb2_name, nMSA, nENS, model_type
+ ):
+ ##### load TM-scores both full- and ramdon-MSA
+ TMs_full = genfromtxt("TMScore_" + full_cate + "_" + pdb1_name + ".csv", delimiter=" ")
+ TMs_random = genfromtxt("TMScore_" + random_cate + "_" + pdb1_name + ".csv", delimiter=" ")
+
+ ############ load pLDDT scores both full- and ramdon-MSA
+ plddt_full = genfromtxt("plddt_" + full_cate + "_" + pdb1_name + ".csv", delimiter=" ")
+ plddt_random = genfromtxt("plddt_" + random_cate + "_" + pdb1_name + ".csv", delimiter=" ")
+
+ #################################################################
+ ########### getting the TM-score values of fold-switching region
+
+ os.getcwd() + "/"
+
+ ######### plotting the TM-score values as 2D scatter plot
+ print(" ")
+ print("Size of column: ", TMs_random.shape[-1])
+ print("Size of row: ", TMs_random.shape[0])
+ print("Dimension: ", TMs_random.ndim)
+
+ print(" ")
+ print(TMs_random)
+ print(" ")
+ print(TMs_full)
+
+ print("checking plddt")
+ print(plddt_full)
+ print(plddt_random)
+
+ plddt_random = np.reshape(plddt_random, (7, (nMSA + 5) * 5))
+ print(plddt_random)
+
+ if model_type != "alphafold2_multimer_v3":
+ TMs_full_resh = np.reshape(TMs_full, ((((nMSA + 5) * 2), 5)))
+
+ # f1 = np.concatenate((TMs_addition[0:(nENS + 20), :], TMs_full_resh[0:(nMSA + 5), :]), axis=0)
+ # print(f1)
+ # f2 = np.concatenate((TMs_addition[(nENS + 20):(nENS + 20) * 2, :], TMs_full_resh[(nMSA + 5):(nMSA + 5) * 2, :]), axis=0)
+ # print(f2)
+ else:
+ TMs_full_resh = np.reshape(TMs_full, (((nMSA + 5) * 2), 5))
+
+ # f1 = np.concatenate((TMs_addition[0:(nENS + 20), :], TMs_full_resh[0:(nMSA + 5), :]), axis=0)
+ # print(f1)
+ # f2 = np.concatenate((TMs_addition[(nENS + 20):(nENS + 20) * 2, :], TMs_full_resh[(nMSA + 5):(nMSA + 5) * 2, :]), axis=0)
+ # print(f2)
+
+ if model_type != "alphafold2_multimer_v3":
+ # if np.all(f1 > f2) or np.all(f1 < f2):
+ # print("Prediction is biased"); sys.exit()
+ # else:
+ # print("Prediction is not biased")
+
+ plt.figure(0)
+ for ii in range(0, int(TMs_random.shape[0] / 2)):
+ plt.scatter(
+ TMs_random[ii * 2, :],
+ TMs_random[(ii * 2 + 1), :],
+ c=plddt_random[ii, :],
+ cmap="rocket_r",
+ vmin=50,
+ vmax=100,
+ s=35,
+ marker="o",
+ )
+
+ clb = plt.colorbar()
+ clb.ax.tick_params(labelsize=15)
+
+ plt.scatter(
+ TMs_full_resh[0 : (nMSA + 5), :],
+ TMs_full_resh[(nMSA + 5) : (nMSA + 5) * 2, :],
+ c=plddt_full,
+ cmap="rocket_r",
+ vmin=50,
+ vmax=100,
+ s=35,
+ marker="o",
+ )
+
+ x = [0, 1]
+ y = [0, 1]
+
+ plt.ylim(0, 1)
+ plt.xlim(0, 1)
+
+ plt.plot(x, y, linestyle="dashed", color="black")
+
+ plt.xticks(fontsize=15)
+ plt.yticks(fontsize=15)
+
+ plt.xlabel("TM-Score similar to fold1(" + pdb1_name + ")", fontsize=15)
+ plt.ylabel("TM-score similar to fold2(" + pdb2_name + ")", fontsize=15)
+ plt.savefig("TMscore_" + full_cate + "_" + pdb1_name + ".png", transparent=True)
+
+ else:
+ ##print("Not determine for the multimer mode")
+ # if np.all(f1 > f2) or np.all(f1 < f2):
+ # print("Prediction is biased"); sys.exit()
+ # else:
+ # print("Prediction is not biased")
+
+ plt.figure(0)
+ for ii in range(0, int(TMs_random.shape[0] / 2)):
+ plt.scatter(
+ TMs_random[ii * 2, :],
+ TMs_random[(ii * 2 + 1), :],
+ c=plddt_random[ii, :],
+ cmap="rocket_r",
+ vmin=50,
+ vmax=100,
+ s=35,
+ marker="o",
+ )
+
+ clb = plt.colorbar()
+ clb.ax.tick_params(labelsize=15)
+
+ plt.scatter(
+ TMs_full_resh[0 : (nMSA + 5), :],
+ TMs_full_resh[(nMSA + 5) : (nMSA + 5) * 2, :],
+ c=plddt_full,
+ cmap="rocket_r",
+ vmin=50,
+ vmax=100,
+ s=35,
+ marker="o",
+ )
+
+ x = [0, 1]
+ y = [0, 1]
+
+ plt.ylim(0, 1)
+ plt.xlim(0, 1)
+
+ plt.plot(x, y, linestyle="dashed", color="black")
+
+ plt.xticks(fontsize=15)
+ plt.yticks(fontsize=15)
+
+ plt.xlabel("TM-Score similar to fold1(" + pdb1_name + ")", fontsize=15)
+ plt.ylabel("TM-score similar to fold2(" + pdb2_name + ")", fontsize=15)
+ plt.savefig("TMscore_" + full_cate + "_" + pdb1_name + ".png", transparent=True)
diff --git a/cf_random/plotting/plot_fc.py b/cf_random/plotting/plot_fc.py
new file mode 100644
index 0000000..95775e1
--- /dev/null
+++ b/cf_random/plotting/plot_fc.py
@@ -0,0 +1,140 @@
+#!/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Feb 22 13:40:00 2024
+
+@author: Myeongsang (Samuel) Lee
+"""
+
+import os
+
+import numpy as np
+from matplotlib import pyplot as plt
+from numpy import genfromtxt
+
+
+class plot_2D_scatter:
+ def __init__(self, full_cate, random_cate, pdb1, pdb1_name, pdb2, pdb2_name, nMSA, nENS):
+ ##### load TM-scores both full- and ramdon-MSA
+ TMs_full = genfromtxt("TMScore_" + full_cate + "_" + pdb1_name + ".csv", delimiter=" ")
+ TMs_random = genfromtxt("TMScore_" + random_cate + "_" + pdb1_name + ".csv", delimiter=" ")
+
+ ############ load pLDDT scores both full- and ramdon-MSA
+ plddt_full = genfromtxt("plddt_" + full_cate + "_" + pdb1_name + ".csv", delimiter=" ")
+ plddt_random = genfromtxt("plddt_" + random_cate + "_" + pdb1_name + ".csv", delimiter=" ")
+
+ #################################################################
+ ########### getting the TM-score values of fold-switching region
+
+ os.getcwd() + "/"
+
+ fs_full_TMs = genfromtxt(
+ "TMScore_fs_" + full_cate + "_" + pdb1_name + ".csv", delimiter=" "
+ )
+ TMs_fs_full = fs_full_TMs
+ fs_random_TMs = genfromtxt(
+ "TMScore_fs_" + random_cate + "_" + pdb1_name + ".csv", delimiter=" "
+ )
+ TMs_fs_random = fs_random_TMs
+
+ ######### plotting the TM-score values as 2D scatter plot
+ print(" ")
+ print("Size of column: ", TMs_random.shape[-1])
+ print("Size of row: ", TMs_random.shape[0])
+ print("Dimension: ", TMs_random.ndim)
+
+ print(" ")
+ print(TMs_random)
+ print(" ")
+ print(TMs_full)
+
+ print("checking plddt")
+ print(plddt_full)
+ print(plddt_random)
+
+ plddt_random = np.reshape(plddt_random, (7, (nMSA + 5) * 5))
+ np.reshape(TMs_fs_full, ((((nMSA + 5) * 2), 5)))
+
+ plt.figure(0)
+
+ for ii in range(0, int(TMs_random.shape[0] / 2)):
+ plt.scatter(
+ TMs_random[ii * 2, :],
+ TMs_random[(ii * 2 + 1), :],
+ c=plddt_random[ii, :],
+ cmap="rocket_r",
+ vmin=50,
+ vmax=100,
+ s=35,
+ marker="o",
+ )
+
+ clb = plt.colorbar()
+ clb.ax.tick_params(labelsize=15)
+
+ plt.scatter(
+ TMs_full[0, :],
+ TMs_full[1, :],
+ c=plddt_full,
+ cmap="plasma",
+ vmin=50,
+ vmax=100,
+ s=35,
+ marker="o",
+ )
+
+ x = [0, 1]
+ y = [0, 1]
+
+ plt.ylim(0, 1)
+ plt.xlim(0, 1)
+
+ plt.plot(x, y, linestyle="dashed", color="black")
+
+ plt.xticks(fontsize=15)
+ plt.yticks(fontsize=15)
+
+ plt.xlabel("TM-Score similar to fold1(" + pdb1_name + ")", fontsize=15)
+ plt.ylabel("TM-score similar to fold2(" + pdb2_name + ")", fontsize=15)
+ plt.savefig("TMscore_" + full_cate + "_" + pdb1_name + ".png", transparent=True)
+
+ plt.figure(1)
+ for ii in range(0, int(TMs_random.shape[0] / 2)):
+ plt.scatter(
+ TMs_fs_random[ii * 2, :],
+ TMs_fs_random[(ii * 2 + 1), :],
+ c=plddt_random[ii, :],
+ cmap="plasma",
+ vmin=50,
+ vmax=100,
+ s=35,
+ marker="o",
+ )
+
+ x = [0.0, 1]
+ y = [0.0, 1]
+ plt.ylim(0.0, 1)
+ plt.xlim(0.0, 1)
+
+ dlb = plt.colorbar()
+ dlb.ax.tick_params(labelsize=15)
+
+ plt.scatter(
+ TMs_fs_full[0, :],
+ TMs_fs_full[1, :],
+ c=plddt_full,
+ cmap="plasma",
+ vmin=50,
+ vmax=100,
+ s=35,
+ marker="o",
+ )
+
+ plt.plot(x, y, linestyle="dashed", color="black")
+
+ plt.xticks(fontsize=15)
+ plt.yticks(fontsize=15)
+
+ plt.xlabel("TM-Score similar to fold1(" + pdb1_name + ")", fontsize=15)
+ plt.ylabel("TM-score similar to fold2(" + pdb2_name + ")", fontsize=15)
+ plt.savefig("TMscore_fs-region_" + full_cate + "_" + pdb1_name + ".png", transparent=True)
diff --git a/cf_random/prediction/__init__.py b/cf_random/prediction/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/code/pred_cal_tmscore_AC.py b/cf_random/prediction/pred_cal_tmscore_ac.py
similarity index 54%
rename from code/pred_cal_tmscore_AC.py
rename to cf_random/prediction/pred_cal_tmscore_ac.py
index 0789e8a..a683040 100644
--- a/code/pred_cal_tmscore_AC.py
+++ b/cf_random/prediction/pred_cal_tmscore_ac.py
@@ -2,62 +2,57 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 21 14:51:00 2024
-
+
@author: Myeongsang (Samuel) Lee
"""
-import re
-import Bio
+
+import glob
import os
-from os import listdir
-from os.path import isfile, join
+import random
import sys
-from pathlib import Path
+
import numpy as np
-from numpy import genfromtxt
-import matplotlib.pyplot as plt
-import glob
-import random
-import argparse
-# call related modules of tmtools after installation
-from tmtools import tm_align
-from tmtools.io import get_structure, get_residue_data
-from tmtools.testing import get_pdb_path
# call calculating TM-scores of fs region
-from cal_tmscore_fs_only import *
+# from ..analysis.cal_tmscore_fs_only import *
# call converting the multimer as a single chain structure
-from convert_multi_single import *
+from ..utils.convert_multi_single import convert_m2s
# call colabfold for multimer option
-from pred_cal_tmscore_multimer import *
+from .pred_cal_tmscore_multimer import CF_MSA_max, CF_MSA_var
+
+# call related modules of tmtools after installation
+from tmtools import tm_align
+from tmtools.io import get_residue_data, get_structure
+from tmtools.testing import get_pdb_path
-class TM_score():
+class TM_score:
def __init__(self, pred_dir, pdb1, pdb1_name, pdb2, pdb2_name, model_type):
## loading reference pdb for TM-score
- pwd = os.getcwd() + '/'
+ pwd = os.getcwd() + "/"
tmscores = []
- tmscores_ord = []; tmscores_rev = []
-
- #files_list = sorted(glob.glob(str(pred_dir) + "/*_unrelaxed*pdb"))
+ tmscores_ord = []
+ tmscores_rev = []
+
+ # files_list = sorted(glob.glob(str(pred_dir) + "/*_unrelaxed*pdb"))
if model_type != "alphafold2_multimer_v3":
- files_list = (glob.glob(str(pred_dir) + "/*_unrelaxed*pdb"))
+ files_list = glob.glob(str(pred_dir) + "/*_unrelaxed*pdb")
print(files_list)
else:
#### convert the multimer file as a single structure
- check_files_list = (glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*pdb"))
+ check_files_list = glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*pdb")
print(check_files_list)
if not check_files_list:
convert_m2s(pred_dir, pdb1_name, pdb2_name)
- files_list = (glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*pdb"))
+ files_list = glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*pdb")
print(files_list)
else:
- files_list = (glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*pdb"))
+ files_list = glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*pdb")
print(files_list)
-
##### pdb1_name part
pdb1_dir = pwd + pdb1_name
r2 = get_structure(get_pdb_path(str(pdb1_dir)))
@@ -66,48 +61,45 @@ def __init__(self, pred_dir, pdb1, pdb1_name, pdb2, pdb2_name, model_type):
if len(files_list) == 0:
tmscores = [0.0, 0.0, 0.0, 0.0, 0.0]
return tmscores
-
+
for model in files_list:
- #modelpath = Path(model)
- #model = str(modelpath.parent) + "/" + modelpath.stem
- model = model.replace('.pdb','')
- #model = model.replace('_converted.pdb','_converted')
+ # modelpath = Path(model)
+ # model = str(modelpath.parent) + "/" + modelpath.stem
+ model = model.replace(".pdb", "")
+ # model = model.replace('_converted.pdb','_converted')
model = pwd + model
s = get_structure(get_pdb_path(model))
coords1, seq1 = get_residue_data(s)
res = tm_align(coords1, coords2, seq1, seq2)
- tmscore = round(res.tm_norm_chain1,5) # wrt to model
+ tmscore = round(res.tm_norm_chain1, 5) # wrt to model
tmscores_ord.append(tmscore)
res = tm_align(coords2, coords1, seq2, seq1)
- tmscore = round(res.tm_norm_chain1,5) # wrt to model
+ tmscore = round(res.tm_norm_chain1, 5) # wrt to model
tmscores_rev.append(tmscore)
-
- #print(tmscores[0:5])
+ # print(tmscores[0:5])
##### pdb2_name part
pdb2_dir = pwd + pdb2_name
r3 = get_structure(get_pdb_path(str(pdb2_dir)))
coords2, seq2 = get_residue_data(r3)
-
for model in files_list:
- #modelpath = Path(model)
- #model = str(modelpath.parent) + "/" + modelpath.stem
- model = model.replace('.pdb','')
- #model = model.replace('_converted.pdb','_converted')
+ # modelpath = Path(model)
+ # model = str(modelpath.parent) + "/" + modelpath.stem
+ model = model.replace(".pdb", "")
+ # model = model.replace('_converted.pdb','_converted')
model = pwd + model
s = get_structure(get_pdb_path(model))
coords1, seq1 = get_residue_data(s)
res = tm_align(coords1, coords2, seq1, seq2)
- tmscore = round(res.tm_norm_chain1,5) # wrt to model
+ tmscore = round(res.tm_norm_chain1, 5) # wrt to model
tmscores_ord.append(tmscore)
res = tm_align(coords2, coords1, seq2, seq1)
- tmscore = round(res.tm_norm_chain1,5) # wrt to model
+ tmscore = round(res.tm_norm_chain1, 5) # wrt to model
tmscores_rev.append(tmscore)
-
print("normal")
print(tmscores_ord)
print("reverse")
@@ -117,75 +109,95 @@ def __init__(self, pred_dir, pdb1, pdb1_name, pdb2, pdb2_name, model_type):
else:
tmscores = tmscores_rev
-
-
print(tmscores)
self.tmscores = tmscores
-
-class CF_MSA_max():
+class CF_MSA_max:
def __init__(self, search_dir, output_dir, pdb_name, rseed, num_seeds, model_type):
- command = 'colabfold_batch --num-seeds ' + str(num_seeds) + ' --model-type ' + str(model_type) + ' --random-seed ' + str(rseed) + search_dir + output_dir
+ command = (
+ "colabfold_batch --num-seeds "
+ + str(num_seeds)
+ + " --model-type "
+ + str(model_type)
+ + " --random-seed "
+ + str(rseed)
+ + search_dir
+ + output_dir
+ )
print(command)
os.system(command)
-
-
-class CF_MSA_var():
- def __init__(self, pdb1, pdb1_name, pdb2, pdb2_name, search_dir, output_dir, rseed, num_seeds, model_type):
+class CF_MSA_var:
+ def __init__(
+ self, pdb1, pdb1_name, pdb2, pdb2_name, search_dir, output_dir, rseed, num_seeds, model_type
+ ):
#### shallow MSA section
- max_msa = 1; ext_msa = 2
- random_seed = np.array(rseed) ## needed to remove future
+ max_msa = 1
+ ext_msa = 2
+ random_seed = np.array(rseed) ## needed to remove future
self.pdb1_name = pdb1_name
- TMscores_random = [] ## whole structure
+ TMscores_random = [] ## whole structure
for multi in (1, 2, 2, 2, 2, 2, 2):
max_msa = max_msa * multi
ext_msa = ext_msa * multi
-
- #### Colabfold part
- command = 'colabfold_batch --num-seeds ' + str(num_seeds) + ' --model-type ' + str(model_type) + ' --max-seq ' + str(max_msa) + ' --max-extra-seq ' + str(ext_msa) + search_dir + output_dir + str(rseed) + '_max_' + str(max_msa) + '_ext_' + str(ext_msa)
- print(command); os.system(command)
+ #### Colabfold part
+ command = (
+ "colabfold_batch --num-seeds "
+ + str(num_seeds)
+ + " --model-type "
+ + str(model_type)
+ + " --max-seq "
+ + str(max_msa)
+ + " --max-extra-seq "
+ + str(ext_msa)
+ + search_dir
+ + output_dir
+ + str(rseed)
+ + "_max_"
+ + str(max_msa)
+ + "_ext_"
+ + str(ext_msa)
+ )
+ print(command)
+ os.system(command)
def select_size(self, TMscores_random_alter, pdb1_name, pdb2_name, alt_name, num_seeds):
-
+
TMscores_random_reshape = TMscores_random_alter.reshape(14, num_seeds * 5)
TMscores_random_locat = np.zeros((7, num_seeds * 5))
-
+
#### finding locatnative pdb_name
-
+
if alt_name == pdb2_name:
- #for i in 1, 3, 5, 7, 9, 11, 13 in TM_scores:
+ # for i in 1, 3, 5, 7, 9, 11, 13 in TM_scores:
tmp_cnt = 0
for i in range(1, 14, 2):
print(TMscores_random_reshape[i, :])
TMscores_random_locat[tmp_cnt, :] = TMscores_random_reshape[i, :]
tmp_cnt = tmp_cnt + 1
else:
- #for i in 0, 2, 4, 6, 8, 10, 12 in TM_scores:
+ # for i in 0, 2, 4, 6, 8, 10, 12 in TM_scores:
tmp_cnt = 0
for i in range(0, 13, 2):
print(TMscores_random_reshape[i, :])
TMscores_random_locat[tmp_cnt, :] = TMscores_random_reshape[i, :]
tmp_cnt = tmp_cnt + 1
-
TMscore_data = TMscores_random_locat
TMscore_data = TMscores_random_locat.reshape(7, num_seeds * 5)
TMscore_data_sum = np.zeros((7, 1))
-
for ii in range(0, int(TMscore_data.shape[0])):
TMscore_data_sum[ii] = np.sum(TMscore_data[ii])
-
location = np.argmax(np.max(TMscore_data_sum, axis=1))
print("Selecting...")
@@ -193,17 +205,11 @@ def select_size(self, TMscores_random_alter, pdb1_name, pdb2_name, alt_name, num
TMscore_data = TMscores_random_alter
TMscore_data = TMscores_random_alter.reshape(14, num_seeds * 5)
-
- location_org = location
-
-
if alt_name == pdb2_name:
location = (location * 2) + 1
else:
- location = (location * 2)
-
+ location = location * 2
-
if alt_name == pdb2_name and np.any(TMscore_data[location, :] >= 0.5):
print(TMscore_data[location, :])
selection = int((location - 1) / 2)
@@ -211,152 +217,190 @@ def select_size(self, TMscores_random_alter, pdb1_name, pdb2_name, alt_name, num
elif alt_name == pdb1_name and np.any(TMscore_data[location, :] >= 0.5):
print(TMscore_data[location, :])
- selection = int(location / 2);
+ selection = int(location / 2)
self.selection = selection
else:
print("Predictions are bad")
print("Predictions of whole structure are bad")
- rm_folder_cmd = 'rm -rf successed_prediction/' + self.pdb1_name + '/'
+ rm_folder_cmd = "rm -rf successed_prediction/" + self.pdb1_name + "/"
print(rm_folder_cmd)
os.system(rm_folder_cmd)
sys.exit()
-
-
-class prediction_all_AC():
- def __init__(self, pdb1, pdb1_name, pdb2, pdb2_name, search_dir, nMSA, model_type, search_multi_dir):
+class prediction_all_AC:
+ def __init__(
+ self, pdb1, pdb1_name, pdb2, pdb2_name, search_dir, nMSA, model_type, search_multi_dir
+ ):
num_seeds = 5 + nMSA
- pwd = os.getcwd() + '/'
if model_type != "alphafold2_multimer_v3":
-
##### Perform prediction with full-length MSA
pre_random_seed = np.random.randint(0, 16, 1)
- random_seed_full_MSA = ''.join(map(str, pre_random_seed))
- output_dir = ' ' + pdb1_name + '_predicted_models_full_rand_' + str(random_seed_full_MSA)
- MSA_full = CF_MSA_max(search_dir, output_dir, pdb1_name, random_seed_full_MSA, num_seeds, model_type)
-
+ random_seed_full_MSA = "".join(map(str, pre_random_seed))
+ output_dir = (
+ " " + pdb1_name + "_predicted_models_full_rand_" + str(random_seed_full_MSA)
+ )
+ MSA_full = CF_MSA_max(
+ search_dir, output_dir, pdb1_name, random_seed_full_MSA, num_seeds, model_type
+ )
##### Perform prediction with random shallow MSA
##### check out varied-MSA with (msa-max: 1, 2, 4, 8, 16, 32, 64) (msa-extra: 2, 4, 8, 16, 32, 64, 128)
- output_dir = ' ' + pdb1_name + '_predicted_models_rand_'
+ output_dir = " " + pdb1_name + "_predicted_models_rand_"
random_seed = random.sample(range(100), 1)
- random_seed = ''.join(map(str, random_seed))
- MSA_var = CF_MSA_var(pdb1, pdb1_name, pdb2, pdb2_name, search_dir, output_dir, random_seed, num_seeds, model_type)
-
-
+ random_seed = "".join(map(str, random_seed))
+ MSA_var = CF_MSA_var(
+ pdb1,
+ pdb1_name,
+ pdb2,
+ pdb2_name,
+ search_dir,
+ output_dir,
+ random_seed,
+ num_seeds,
+ model_type,
+ )
####################################################################
##### check-out TM-scores of prediction with full-length-MSA (whole)
- pred_dir = pdb1_name + '_predicted_models_full_rand_' + str(random_seed_full_MSA) + '/'; print(pred_dir)
+ pred_dir = pdb1_name + "_predicted_models_full_rand_" + str(random_seed_full_MSA) + "/"
+ print(pred_dir)
MSA_full_TMscore = TM_score(pred_dir, pdb1, pdb1_name, pdb2, pdb2_name, model_type)
full_TMscore = np.array(MSA_full_TMscore.tmscores)
full_TMscore = full_TMscore.reshape(2, num_seeds * 5)
-
+
##### check-out the 1st prediction results are good or not
if np.any(full_TMscore[0, :] > 0.5) or np.any(full_TMscore[1, :] > 0.5):
if np.average(full_TMscore[0, :]) > np.average(full_TMscore[1, :]):
- ref_name = pdb1_name; alt_name = pdb2_name
+ ref_name = pdb1_name
+ alt_name = pdb2_name
else:
- ref_name = pdb2_name; alt_name = pdb1_name
+ ref_name = pdb2_name
+ alt_name = pdb1_name
elif np.all(full_TMscore[0, :] < 0.5) and np.all(full_TMscore[1, :] < 0.5):
- #If prediction is failed, move the folder to "failed_prediction""
- gen_dir = 'failed_prediction/' + pdb1_name
+ # If prediction is failed, move the folder to "failed_prediction""
+ gen_dir = "failed_prediction/" + pdb1_name
if not os.path.exists(gen_dir):
os.mkdir(gen_dir)
-
- mv_folder_cmd = 'mv ' + pdb1_name + '_predicted_models_full_rand_' + str(random_seed_full_MSA) + ' failed_prediction/' + pdb1_name
- print(mv_folder_cmd); os.system(mv_folder_cmd)
- print("All predictions with deep MSA are failed"); sys.exit()
+
+ mv_folder_cmd = (
+ "mv "
+ + pdb1_name
+ + "_predicted_models_full_rand_"
+ + str(random_seed_full_MSA)
+ + " failed_prediction/"
+ + pdb1_name
+ )
+ print(mv_folder_cmd)
+ os.system(mv_folder_cmd)
+ print("All predictions with deep MSA are failed")
+ sys.exit()
else:
if np.average(full_TMscore[0, :]) > np.average(full_TMscore[1, :]):
- ref_name = pdb1_name; alt_name = pdb2_name
+ ref_name = pdb1_name
+ alt_name = pdb2_name
else:
- ref_name = pdb2_name; alt_name = pdb1_name
-
+ ref_name = pdb2_name
+ alt_name = pdb1_name
print("Reference structure: ", ref_name)
print("Alternative structure: ", alt_name)
# save TM-score from full-length MSA
- np.savetxt('TMScore_full-MSA_' + pdb1_name + '.csv', full_TMscore, fmt='%2.3f')
+ np.savetxt("TMScore_full-MSA_" + pdb1_name + ".csv", full_TMscore, fmt="%2.3f")
# Directory section and save to successed_prediction folder
- gen_dir = 'successed_prediction/' + pdb1_name
+ gen_dir = "successed_prediction/" + pdb1_name
if not os.path.exists(gen_dir):
os.mkdir(gen_dir)
- mv_folder_cmd = 'mv ' + pred_dir + ' successed_prediction/' + pdb1_name
- print(mv_folder_cmd); os.system(mv_folder_cmd)
- print("Full-MSA prediction is tightly aligned to crystal structure"); print(" ")
-
-
-
+ mv_folder_cmd = "mv " + pred_dir + " successed_prediction/" + pdb1_name
+ print(mv_folder_cmd)
+ os.system(mv_folder_cmd)
+ print("Full-MSA prediction is tightly aligned to crystal structure")
+ print(" ")
################################################################
##### chech-out TM-scores of prediction with shallow random MSAs
- max_msa = 1; ext_msa = 2
+ max_msa = 1
+ ext_msa = 2
TMscores_random = []
-
+
for multi in (1, 2, 2, 2, 2, 2, 2):
max_msa = max_msa * multi
ext_msa = ext_msa * multi
-
- pred_dir = pdb1_name + '_predicted_models_rand_' + str(random_seed) + '_max_' + str(max_msa) + '_ext_' + str(ext_msa) + '/'; print(pred_dir)
- MSA_shallow_TMscore = TM_score(pred_dir, pdb1, pdb1_name, pdb2, pdb2_name, model_type)
+
+ pred_dir = (
+ pdb1_name
+ + "_predicted_models_rand_"
+ + str(random_seed)
+ + "_max_"
+ + str(max_msa)
+ + "_ext_"
+ + str(ext_msa)
+ + "/"
+ )
+ print(pred_dir)
+ MSA_shallow_TMscore = TM_score(
+ pred_dir, pdb1, pdb1_name, pdb2, pdb2_name, model_type
+ )
TMscores_random = np.append(TMscores_random, MSA_shallow_TMscore.tmscores)
-
-
- fin_pred_dir = pdb1_name + '_predicted_models_rand_' + str(random_seed) + '_max_*'
- TMscores_random_reshape = TMscores_random.reshape(14, num_seeds *5)
- TMscores_random_alter = np.zeros((7, num_seeds *5))
+ fin_pred_dir = pdb1_name + "_predicted_models_rand_" + str(random_seed) + "_max_*"
+ TMscores_random_reshape = TMscores_random.reshape(14, num_seeds * 5)
+ TMscores_random_alter = np.zeros((7, num_seeds * 5))
#### finding alternative pdb_name
if alt_name == pdb2_name:
- #for i in 1, 3, 5, 7, 9, 11, 13 in TM_scores:
+ # for i in 1, 3, 5, 7, 9, 11, 13 in TM_scores:
tmp_cnt = 0
for i in range(1, 14, 2):
print(TMscores_random_reshape[i, :])
TMscores_random_alter[tmp_cnt, :] = TMscores_random_reshape[i, :]
tmp_cnt = tmp_cnt + 1
else:
- #for i in 0, 2, 4, 6, 8, 10, 12 in TM_scores:
+ # for i in 0, 2, 4, 6, 8, 10, 12 in TM_scores:
tmp_cnt = 0
for i in range(0, 13, 2):
print(TMscores_random_reshape[i, :])
TMscores_random_alter[tmp_cnt, :] = TMscores_random_reshape[i, :]
tmp_cnt = tmp_cnt + 1
-
-
##### check out varied-MSA with (msa-max: 1, 2, 4, 8, 16, 32, 64) (msa-extra: 2, 4, 8, 16, 32, 64, 128)
if np.all(TMscores_random_alter) < 0.5:
print("All predictions are failed")
- mv_command = 'mv ' + fin_pred_dir + ' failed_prediction/' + pdb1_name
- print(mv_command); os.system(mv_command)
+ mv_command = "mv " + fin_pred_dir + " failed_prediction/" + pdb1_name
+ print(mv_command)
+ os.system(mv_command)
sys.exit()
-
+
else:
- print(" "); print("Finding optimal size of ramdon MSA...")
- MSA_var.select_size(TMscores_random_reshape, pdb1_name, pdb2_name, alt_name, num_seeds)
+ print(" ")
+ print("Finding optimal size of ramdon MSA...")
+ MSA_var.select_size(
+ TMscores_random_reshape, pdb1_name, pdb2_name, alt_name, num_seeds
+ )
size_selection = MSA_var.selection
print(size_selection)
self.size_selection = size_selection
## save all TM-scores from random MSA (1-2, 2-4, 4-8.... in order)
- np.savetxt('TMScore_random-MSA_' + pdb1_name + '.csv', TMscores_random_reshape, fmt='%2.3f')
-
- mv_command = 'mv ' + fin_pred_dir + ' successed_prediction/' + pdb1_name
- print(mv_command); os.system(mv_command)
+ np.savetxt(
+ "TMScore_random-MSA_" + pdb1_name + ".csv", TMscores_random_reshape, fmt="%2.3f"
+ )
+ mv_command = "mv " + fin_pred_dir + " successed_prediction/" + pdb1_name
+ print(mv_command)
+ os.system(mv_command)
elif model_type == "alphafold2_multimer_v3":
print("Currently working on")
- MSA_multi = prediction_all_multimer(pdb1_name, pdb2_name, search_dir, nMSA, model_type, search_multi_dir)
+ MSA_multi = prediction_all_multimer(
+ pdb1_name, pdb2_name, search_dir, nMSA, model_type, search_multi_dir
+ )
self.size_selection = MSA_multi.size_selection
- #sys.exit()
+ # sys.exit()
diff --git a/cf_random/prediction/pred_cal_tmscore_blind.py b/cf_random/prediction/pred_cal_tmscore_blind.py
new file mode 100644
index 0000000..25d6778
--- /dev/null
+++ b/cf_random/prediction/pred_cal_tmscore_blind.py
@@ -0,0 +1,126 @@
+#!/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Feb 21 14:51:00 2024
+
+@author: Myeongsang (Samuel) Lee
+"""
+
+import os
+import random
+
+import numpy as np
+
+
+class CF_MSA_max:
+ def __init__(self, search_dir, output_dir, pdb_name, rseed, num_seeds, model_type):
+
+ command = (
+ "colabfold_batch --num-seeds "
+ + str(num_seeds)
+ + " --random-seed "
+ + str(rseed)
+ + " --model-type "
+ + str(model_type)
+ + search_dir
+ + output_dir
+ )
+ print(command)
+ result = os.system(command)
+ if result != 0:
+ print(f"Warning: colabfold_batch failed with exit code {result}")
+ print("Skipping move operation for this prediction")
+ return
+
+
+class CF_MSA_var:
+ def __init__(self, pdb1_name, search_dir, output_dir, rseed, num_seeds, model_type):
+ #### shallow MSA section
+ #### Global viarlable
+ max_msa = 1
+ ext_msa = 2
+ random_seed = np.array(rseed) ## needed to remove future
+
+ self.pdb1_name = pdb1_name
+
+ for ran_seed in random_seed:
+ max_msa = 1
+ ext_msa = 2
+
+ for multi in (1, 2, 2, 2, 2, 2, 2):
+ max_msa = max_msa * multi
+ ext_msa = ext_msa * multi
+
+ #### Colabfold part
+ command = (
+ "colabfold_batch --num-seeds "
+ + str(num_seeds)
+ + " --model-type "
+ + str(model_type)
+ + " --max-seq "
+ + str(max_msa)
+ + " --max-extra-seq "
+ + str(ext_msa)
+ + search_dir
+ + output_dir
+ + str(ran_seed)
+ + "_max_"
+ + str(max_msa)
+ + "_ext_"
+ + str(ext_msa)
+ )
+ print(command)
+ os.system(command)
+
+ fin_pred_dir = pdb1_name + "_predicted_models_rand_" + str(ran_seed) + "_max_*"
+ gen_dir = "blind_prediction/" + pdb1_name
+
+ if not os.path.exists(gen_dir):
+ os.makedirs(gen_dir)
+
+ mv_command = "mv " + fin_pred_dir + " blind_prediction/" + pdb1_name
+ print(mv_command)
+ result = os.system(mv_command)
+ if result != 0:
+ print(f"Warning: Failed to move prediction directory (exit code {result})")
+ print("The prediction directory may not have been created successfully.")
+ print("Check colabfold_batch output above for errors.")
+
+
+class prediction_all_blind:
+ def __init__(self, pdb1_name, search_dir, nMSA, model_type):
+
+ num_seeds = 5 + nMSA
+
+ pre_random_seed = np.random.randint(0, 16, 1)
+ random_seed = "".join(map(str, pre_random_seed))
+ print(random_seed)
+ output_dir = " " + pdb1_name + "_predicted_models_full_rand_" + str(random_seed)
+ print(output_dir)
+
+ ##### Perform predction with full-length MSA
+ CF_MSA_max(search_dir, output_dir, pdb1_name, random_seed, num_seeds, model_type)
+
+ # Directory section
+ gen_dir = "blind_prediction/" + pdb1_name
+
+ if not os.path.exists(gen_dir):
+ os.mkdir(gen_dir)
+
+ pred_dir = pdb1_name + "_predicted_models_full_rand_" + str(random_seed) + "/"
+
+ # Check if prediction directory was created before trying to move it
+ if os.path.exists(pred_dir):
+ mv_folder_cmd = "mv " + pred_dir + " blind_prediction/" + pdb1_name
+ print(mv_folder_cmd)
+ result = os.system(mv_folder_cmd)
+ if result != 0:
+ print(f"Warning: Failed to move prediction directory (exit code {result})")
+ else:
+ print(f"Warning: Prediction directory {pred_dir} was not created")
+ print("This usually means colabfold_batch failed. Check error messages above.")
+
+ ##### check out varied-MSA with (msa-max: 1, 2, 4, 8, 16, 32, 64) (msa-extra: 2, 4, 8, 16, 32, 64, 128)
+ output_dir = " " + pdb1_name + "_predicted_models_rand_"
+ random_seed = random.sample(range(100), 1)
+ CF_MSA_var(pdb1_name, search_dir, output_dir, random_seed, num_seeds, model_type)
diff --git a/cf_random/prediction/pred_cal_tmscore_fs.py b/cf_random/prediction/pred_cal_tmscore_fs.py
new file mode 100644
index 0000000..2f6e16c
--- /dev/null
+++ b/cf_random/prediction/pred_cal_tmscore_fs.py
@@ -0,0 +1,541 @@
+#!/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Feb 21 14:51:00 2024
+
+@author: Myeongsang (Samuel) Lee
+"""
+
+import glob
+import os
+import random
+import sys
+
+import numpy as np
+
+# call calculating TM-scores of fs region
+from ..analysis.cal_tmscore_fs_only import TM_score_fs
+
+# call converting the multimer as a single chain structure
+from ..utils.convert_multi_single import convert_m2s
+
+# call colabfold for multimer option
+from .pred_cal_tmscore_multimer_fs import prediction_all_multimer_FS
+
+# call related modules of tmtools after installation
+from tmtools import tm_align
+from tmtools.io import get_residue_data, get_structure
+from tmtools.testing import get_pdb_path
+
+
+class TM_score:
+ def __init__(self, pred_dir, pdb1, pdb1_name, pdb2, pdb2_name, model_type):
+
+ ## loading reference pdb for TM-score
+ pwd = os.getcwd() + "/"
+ tmscores = []
+ tmscores_ord = []
+ tmscores_rev = []
+
+ # files_list = sorted(glob.glob(str(pred_dir) + "/*_unrelaxed*pdb"))
+ if model_type != "alphafold2_multimer_v3":
+ files_list = glob.glob(str(pred_dir) + "/*_unrelaxed*pdb")
+ print(files_list)
+ else:
+ #### convert the multimer file as a single structure
+ check_files_list = glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*pdb")
+ print(check_files_list)
+ if not check_files_list:
+ convert_m2s(pred_dir, pdb1_name, pdb2_name)
+ files_list = glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*pdb")
+ print(files_list)
+ else:
+ files_list = glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*pdb")
+ print(files_list)
+
+ ##### pdb1_name part
+ pdb1_dir = pwd + pdb1_name
+ r2 = get_structure(get_pdb_path(str(pdb1_dir)))
+ coords2, seq2 = get_residue_data(r2)
+
+ if len(files_list) == 0:
+ tmscores = [0.0, 0.0, 0.0, 0.0, 0.0]
+ return tmscores
+
+ for model in files_list:
+ # modelpath = Path(model)
+ # model = str(modelpath.parent) + "/" + modelpath.stem
+ model = model.replace(".pdb", "")
+ # model = model.replace('_converted.pdb','_converted')
+ model = pwd + model
+ s = get_structure(get_pdb_path(model))
+ coords1, seq1 = get_residue_data(s)
+ res = tm_align(coords1, coords2, seq1, seq2)
+ tmscore = round(res.tm_norm_chain1, 5) # wrt to model
+ tmscores_ord.append(tmscore)
+
+ res = tm_align(coords2, coords1, seq2, seq1)
+ tmscore = round(res.tm_norm_chain1, 5) # wrt to model
+ tmscores_rev.append(tmscore)
+
+ # print(tmscores[0:5])
+ ##### pdb2_name part
+ pdb2_dir = pwd + pdb2_name
+ r3 = get_structure(get_pdb_path(str(pdb2_dir)))
+ coords2, seq2 = get_residue_data(r3)
+
+ for model in files_list:
+ # modelpath = Path(model)
+ # model = str(modelpath.parent) + "/" + modelpath.stem
+ model = model.replace(".pdb", "")
+ # model = model.replace('_converted.pdb','_converted')
+ model = pwd + model
+ s = get_structure(get_pdb_path(model))
+ coords1, seq1 = get_residue_data(s)
+ res = tm_align(coords1, coords2, seq1, seq2)
+ tmscore = round(res.tm_norm_chain1, 5) # wrt to model
+ tmscores_ord.append(tmscore)
+
+ res = tm_align(coords2, coords1, seq2, seq1)
+ tmscore = round(res.tm_norm_chain1, 5) # wrt to model
+ tmscores_rev.append(tmscore)
+
+ print("normal")
+ print(tmscores_ord)
+ print("reverse")
+ print(tmscores_rev)
+ if np.max(tmscores_ord) > np.max(tmscores_rev):
+ tmscores = tmscores_ord
+ else:
+ tmscores = tmscores_rev
+
+ print(tmscores)
+ self.tmscores = tmscores
+
+
+class CF_MSA_max:
+ def __init__(self, search_dir, output_dir, pdb_name, rseed, num_seeds, model_type):
+
+ command = (
+ "colabfold_batch --num-seeds "
+ + str(num_seeds)
+ + " --model-type "
+ + str(model_type)
+ + " --random-seed "
+ + str(rseed)
+ + search_dir
+ + output_dir
+ )
+ print(command)
+ os.system(command)
+
+
+class CF_MSA_var:
+ def __init__(
+ self, pdb1, pdb1_name, pdb2, pdb2_name, search_dir, output_dir, rseed, num_seeds, model_type
+ ):
+
+ #### shallow MSA section
+ max_msa = 1
+ ext_msa = 2
+ random_seed = np.array(rseed) ## needed to remove future
+
+ self.pdb1_name = pdb1_name
+
+ TMscores_random = [] ## whole structure
+ TMscores_fs_random = [] ## fold-switching region
+
+ for multi in (1, 2, 2, 2, 2, 2, 2):
+ max_msa = max_msa * multi
+ ext_msa = ext_msa * multi
+
+ #### Colabfold part
+ command = (
+ "colabfold_batch --num-seeds "
+ + str(num_seeds)
+ + " --model-type "
+ + str(model_type)
+ + " --max-seq "
+ + str(max_msa)
+ + " --max-extra-seq "
+ + str(ext_msa)
+ + search_dir
+ + output_dir
+ + str(random_seed)
+ + "_max_"
+ + str(max_msa)
+ + "_ext_"
+ + str(ext_msa)
+ )
+ print(command)
+ os.system(command)
+
+ def select_size(
+ self,
+ TMscores_random_alter,
+ TMscores_fs_random_alter,
+ pdb1_name,
+ pdb2_name,
+ alt_name,
+ num_seeds,
+ ):
+
+ TMscores_random_reshape = TMscores_random_alter.reshape(14, num_seeds * 5)
+ TMscores_fs_random_reshape = TMscores_fs_random_alter.reshape(14, num_seeds * 5)
+ TMscores_random_locat = np.zeros((7, num_seeds * 5))
+ TMscores_fs_random_locat = np.zeros((7, num_seeds * 5))
+
+ #### finding locatnative pdb_name
+
+ if alt_name == pdb2_name:
+ # for i in 1, 3, 5, 7, 9, 11, 13 in TM_scores:
+ tmp_cnt = 0
+ for i in range(1, 14, 2):
+ print(TMscores_random_reshape[i, :])
+ print(TMscores_fs_random_reshape[i, :])
+ TMscores_random_locat[tmp_cnt, :] = TMscores_random_reshape[i, :]
+ TMscores_fs_random_locat[tmp_cnt, :] = TMscores_fs_random_reshape[i, :]
+ tmp_cnt = tmp_cnt + 1
+ else:
+ # for i in 0, 2, 4, 6, 8, 10, 12 in TM_scores:
+ tmp_cnt = 0
+ for i in range(0, 13, 2):
+ print(TMscores_random_reshape[i, :])
+ print(TMscores_fs_random_reshape[i, :])
+ TMscores_random_locat[tmp_cnt, :] = TMscores_random_reshape[i, :]
+ TMscores_fs_random_locat[tmp_cnt, :] = TMscores_fs_random_reshape[i, :]
+ tmp_cnt = tmp_cnt + 1
+
+ TMscore_data = TMscores_random_locat
+ TMscore_data = TMscores_random_locat.reshape(7, num_seeds * 5)
+ TMscore_data_sum = np.zeros((7, 1))
+
+ TMscore_fs_data = TMscores_fs_random_locat
+ TMscore_fs_data = TMscores_fs_random_locat.reshape(7, num_seeds * 5)
+
+ for ii in range(0, int(TMscore_data.shape[0])):
+ TMscore_data_sum[ii] = np.sum(TMscore_data[ii])
+
+ location = np.argmax(np.max(TMscore_data_sum, axis=1))
+
+ print("Selecting...")
+
+ TMscore_data = TMscores_random_alter
+ TMscore_data = TMscores_random_alter.reshape(14, num_seeds * 5)
+
+ TMscore_fs_data = TMscores_fs_random_alter
+ TMscore_fs_data = TMscores_fs_random_alter.reshape(14, num_seeds * 5)
+
+ if alt_name == pdb2_name:
+ location = (location * 2) + 1
+ else:
+ location = location * 2
+
+ if alt_name == pdb2_name and (
+ (
+ np.any(TMscore_data[location, :] >= 0.5)
+ and np.any(TMscore_fs_data[location, :] >= 0.5)
+ )
+ ):
+ print(TMscore_data[location, :], TMscore_fs_data[location, :])
+ selection = int((location - 1) / 2)
+ self.selection = selection
+
+ elif alt_name == pdb1_name and (
+ (
+ np.any(TMscore_data[location, :] >= 0.5)
+ and np.any(TMscore_fs_data[location, :] >= 0.5)
+ )
+ ):
+ print(TMscore_data[location, :], TMscore_fs_data[location, :])
+ selection = int(location / 2)
+ self.selection = selection
+
+ # elif location == int(TMscore_data.shape[0]) and np.any(TMscore_fs_data[location, :] < 0.5):
+ elif np.any(TMscore_fs_data[location, :] < 0.5):
+ for jj in range(0, int(TMscore_data.shape[0] / 2)):
+ print(TMscore_data[(2 * jj), :], TMscore_fs_data[(jj * 2) + 1, :])
+ print(TMscore_data[(jj * 2) + 1, :], TMscore_fs_data[(jj * 2), :])
+ if (
+ np.any(TMscore_data[(jj * 2), :] >= 0.4)
+ and np.any(TMscore_fs_data[(jj * 2) + 1, :] >= 0.5)
+ ) or (
+ np.any(TMscore_data[(jj * 2) + 1, :] >= 0.4)
+ and np.any(TMscore_fs_data[(jj * 2), :] >= 0.5)
+ ):
+ selection = jj
+ self.selection = selection
+ break
+ elif (
+ np.any(TMscore_data[(jj * 2), :] >= 0.4)
+ and np.any(TMscore_fs_data[(jj * 2), :] >= 0.5)
+ ) or (
+ np.any(TMscore_data[(jj * 2) + 1, :] >= 0.4)
+ and np.any(TMscore_fs_data[(jj * 2) + 1, :] >= 0.5)
+ ):
+
+ selection = jj
+ self.selection = selection
+ break
+ elif jj == (int(TMscore_data.shape[0])) and np.all(TMscore_data[jj, :] < 0.5):
+ print("Predictions are bad")
+ rm_folder_cmd = "rm -rf successed_prediction/" + self.pdb1_name + "/"
+ print(rm_folder_cmd)
+ os.system(rm_folder_cmd)
+ sys.exit()
+ else:
+ print("Predictions are bad")
+ else:
+ print("Predictions are bad")
+ print("Predictions of whole structure are bad")
+ rm_folder_cmd = "rm -rf successed_prediction/" + self.pdb1_name + "/"
+ print(rm_folder_cmd)
+ os.system(rm_folder_cmd)
+ sys.exit()
+
+
+class prediction_all:
+ def __init__(
+ self, pdb1, pdb1_name, pdb2, pdb2_name, search_dir, nMSA, model_type, search_multi_dir
+ ):
+ num_seeds = 5 + nMSA
+ pwd = os.getcwd() + "/"
+
+ if model_type != "alphafold2_multimer_v3":
+
+ ##### Perform prediction with full-length MSA
+ pre_random_seed = np.random.randint(0, 16, 1)
+ random_seed_full_MSA = "".join(map(str, pre_random_seed))
+ output_dir = (
+ " " + pdb1_name + "_predicted_models_full_rand_" + str(random_seed_full_MSA)
+ )
+ MSA_full = CF_MSA_max(
+ search_dir, output_dir, pdb1_name, random_seed_full_MSA, num_seeds, model_type
+ )
+
+ ##### Perform prediction with random shallow MSA
+ ##### check out varied-MSA with (msa-max: 1, 2, 4, 8, 16, 32, 64) (msa-extra: 2, 4, 8, 16, 32, 64, 128)
+ output_dir = " " + pdb1_name + "_predicted_models_rand_"
+ random_seed = random.sample(range(100), 1)
+ random_seed = "".join(map(str, random_seed))
+ MSA_var = CF_MSA_var(
+ pdb1,
+ pdb1_name,
+ pdb2,
+ pdb2_name,
+ search_dir,
+ output_dir,
+ random_seed,
+ num_seeds,
+ model_type,
+ )
+
+ ####################################################################
+ ##### check-out TM-scores of prediction with full-length-MSA (whole)
+ pred_dir = pdb1_name + "_predicted_models_full_rand_" + str(random_seed_full_MSA) + "/"
+ print(pred_dir)
+ MSA_full_TMscore = TM_score(pred_dir, pdb1, pdb1_name, pdb2, pdb2_name, model_type)
+ full_TMscore = np.array(MSA_full_TMscore.tmscores)
+ full_TMscore = full_TMscore.reshape(2, num_seeds * 5)
+
+ ##### check-out TM-scores of prediction with full-length-MSA (fs region)
+ pred_path = pwd + pdb1_name + "_predicted_models_full_rand_" + str(random_seed_full_MSA)
+ MSA_fs_TMscore = TM_score_fs(pred_path, pdb1, pdb1_name, pdb2, pdb2_name)
+ fs_TMscore = np.array(MSA_fs_TMscore.tmscores_fs)
+ fs_TMscore = fs_TMscore.reshape(2, num_seeds * 5)
+
+ ##### check-out the 1st prediction results are good or not
+ if np.average(full_TMscore[0, :]) > np.average(full_TMscore[1, :]):
+ if np.any(fs_TMscore[0, :] >= 0.5) and np.any(full_TMscore[0, :] >= 0.5):
+ ref_name = pdb1_name
+ alt_name = pdb2_name
+ elif np.any(fs_TMscore[1, :] >= 0.5) and np.any(full_TMscore[1, :] >= 0.5):
+ ref_name = pdb2_name
+ alt_name = pdb1_name
+ else:
+ fin_pred_dir = pwd + pdb1_name + "_predicted_models_*"
+ print("Prediction with deep MSA was failed")
+ gen_dir = "failed_prediction/" + pdb1_name
+ os.makedirs(gen_dir)
+ os.system(mv_command)
+ mv_command = "mv " + fin_pred_dir + " failed_prediction/" + pdb1_name
+ sys.exit()
+ else:
+ if np.any(fs_TMscore[1, :] >= 0.5) and np.any(full_TMscore[1, :] >= 0.5):
+ ref_name = pdb2_name
+ alt_name = pdb1_name
+ elif np.any(fs_TMscore[0, :] >= 0.5) and np.any(full_TMscore[0, :] >= 0.5):
+ ref_name = pdb1_name
+ alt_name = pdb2_name
+ else:
+ fin_pred_dir = pwd + pdb1_name + "_predicted_models_*"
+ print("Prediction with deep MSA was failed")
+ gen_dir = "failed_prediction/" + pdb1_name
+ os.makedirs(gen_dir)
+ os.system(mv_command)
+ mv_command = "mv " + fin_pred_dir + " failed_prediction/" + pdb1_name
+ sys.exit()
+
+ print("Reference structure: ", ref_name)
+ print("Alternative structure: ", alt_name)
+
+ # save TM-score of whole structure from full-length MSA
+ np.savetxt("TMScore_full-MSA_" + pdb1_name + ".csv", full_TMscore, fmt="%2.3f")
+ # save TM-score of fold-switching region from full-length MSA
+ np.savetxt("TMScore_fs_full-MSA_" + pdb1_name + ".csv", fs_TMscore, fmt="%2.3f")
+
+ # Directory section and save to successed_prediction folder
+ gen_dir = "successed_prediction/" + pdb1_name
+
+ if not os.path.exists(gen_dir):
+ os.mkdir(gen_dir)
+
+ mv_folder_cmd = "mv " + pred_dir + " successed_prediction/" + pdb1_name
+ print(mv_folder_cmd)
+ os.system(mv_folder_cmd)
+ print("Full-MSA prediction is tightly aligned to crystal structure")
+ print(" ")
+
+ ########################################################################
+ ##### check-out TM-scores of prediction with shallow random MSAs (whole)
+ max_msa = 1
+ ext_msa = 2
+ TMscores_random = []
+ TMscores_fs_random = []
+
+ for multi in (1, 2, 2, 2, 2, 2, 2):
+ max_msa = max_msa * multi
+ ext_msa = ext_msa * multi
+
+ pred_dir = (
+ pdb1_name
+ + "_predicted_models_rand_"
+ + str(random_seed)
+ + "_max_"
+ + str(max_msa)
+ + "_ext_"
+ + str(ext_msa)
+ + "/"
+ )
+
+ ##### TMscore of whole part
+ MSA_shallow_TMscore = TM_score(
+ pred_dir, pdb1, pdb1_name, pdb2, pdb2_name, model_type
+ )
+ TMscores_random = np.append(TMscores_random, MSA_shallow_TMscore.tmscores)
+ print(TMscores_random)
+
+ ### TMscore fs part
+ MSA_shallow_fs_TMscore = TM_score_fs(pred_dir, pdb1, pdb1_name, pdb2, pdb2_name)
+ TMscores_fs_random = np.append(
+ TMscores_fs_random, MSA_shallow_fs_TMscore.tmscores_fs
+ )
+ print(TMscores_fs_random)
+
+ fin_pred_dir = pdb1_name + "_predicted_models_rand_" + str(random_seed) + "_max_*"
+
+ TMscores_random_reshape = TMscores_random.reshape(14, num_seeds * 5)
+ TMscores_fs_random_reshape = TMscores_fs_random.reshape(14, num_seeds * 5)
+
+ TMscores_random_alter = np.zeros((7, num_seeds * 5))
+ TMscores_fs_random_alter = np.zeros((7, num_seeds * 5))
+
+ ##### finding the TMscores of alternative conformations for determining the length of shallow random MSAs
+ if alt_name == pdb2_name:
+ # for i in 1, 3, 5, 7, 9, 11, 13 in TM_scores:
+ tmp_cnt = 0
+ for i in range(1, 14, 2):
+ print(TMscores_random_reshape[i, :])
+ print(TMscores_fs_random_reshape[i, :])
+ TMscores_random_alter[tmp_cnt, :] = TMscores_random_reshape[i, :]
+ TMscores_fs_random_alter[tmp_cnt, :] = TMscores_fs_random_reshape[i, :]
+ tmp_cnt = tmp_cnt + 1
+ else:
+ # for i in 0, 2, 4, 6, 8, 10, 12 in TM_scores:
+ tmp_cnt = 0
+ for i in range(0, 13, 2):
+ print(TMscores_random_reshape[i, :])
+ print(TMscores_fs_random_reshape[i, :])
+ TMscores_random_alter[tmp_cnt, :] = TMscores_random_reshape[i, :]
+ TMscores_fs_random_alter[tmp_cnt, :] = TMscores_fs_random_reshape[i, :]
+ tmp_cnt = tmp_cnt + 1
+
+ print(" ")
+ print("Confirming the TM-score with alternative conformation is good or not")
+ print(TMscores_random_alter)
+ print(
+ "Confirming the TM-score with fs region of alternative conformation is good or not"
+ )
+ print(TMscores_fs_random_alter)
+ print(" ")
+
+ if np.any(TMscores_random_alter > 0.5) and np.any(TMscores_fs_random_alter > 0.5):
+ # save all TM-scores from random MSA (1-2, 2-4, 4-8.... in order)
+ # TMscores_random_reshape = TMscores_random.reshape(14, 5)
+ np.savetxt(
+ "TMScore_random-MSA_" + pdb1_name + ".csv", TMscores_random_reshape, fmt="%2.3f"
+ )
+ np.savetxt(
+ "TMScore_fs_random-MSA_" + pdb1_name + ".csv",
+ TMscores_fs_random_reshape,
+ fmt="%2.3f",
+ )
+
+ gen_dir = "successed_prediction/" + pdb1_name
+ if not os.path.exists(gen_dir):
+ os.makedirs(gen_dir)
+ mv_command = "mv " + fin_pred_dir + " successed_prediction/" + pdb1_name
+ print(mv_command)
+ os.system(mv_command)
+ MSA_var.select_size(
+ TMscores_random_reshape,
+ TMscores_fs_random_reshape,
+ pdb1_name,
+ pdb2_name,
+ alt_name,
+ num_seeds,
+ )
+ size_selection = MSA_var.selection
+ self.size_selection = size_selection
+ else:
+ mv_command = "mv " + fin_pred_dir + " successed_prediction/" + pdb1_name
+ print(mv_command)
+ os.system(mv_command)
+ MSA_var.select_size(
+ TMscores_random_reshape,
+ TMscores_fs_random_reshape,
+ pdb1_name,
+ pdb2_name,
+ alt_name,
+ num_seeds,
+ )
+ size_selection = MSA_var.selection
+ self.size_selection = size_selection
+
+ else:
+ gen_dir = "failed_prediction/" + pdb1_name
+ if not os.path.exists(gen_dir):
+ os.makedirs(gen_dir)
+ mv_command = "mv " + fin_pred_dir + " failed_prediction/" + pdb1_name
+ print(mv_command)
+ os.system(mv_command)
+ print(
+ "Full-MSA prediction is not tightly aligned to crystal structure with additional seeds"
+ )
+ print("Predcition is done")
+ sys.exit()
+
+ else:
+ mv_command = "mv " + fin_pred_dir + " failed_prediction/" + pdb1_name
+ print(mv_command)
+ os.system(mv_command)
+ print("Predcition is done")
+ sys.exit()
+
+ elif model_type == "alphafold2_multimer_v3":
+ print("Currently working on")
+ # MSA_multi = prediction_all_multimer_FS(pdb1_name, pdb2_name, search_dir, nMSA, model_type, search_multi_dir)
+ MSA_multi = prediction_all_multimer_FS(
+ pdb1_name, pdb2_name, search_dir, nMSA, model_type, search_multi_dir, pdb1, pdb2
+ )
+ self.size_selection = MSA_multi.size_selection
diff --git a/code/pred_cal_tmscore_multimer.py b/cf_random/prediction/pred_cal_tmscore_multimer.py
similarity index 56%
rename from code/pred_cal_tmscore_multimer.py
rename to cf_random/prediction/pred_cal_tmscore_multimer.py
index ef42070..39fc556 100644
--- a/code/pred_cal_tmscore_multimer.py
+++ b/cf_random/prediction/pred_cal_tmscore_multimer.py
@@ -2,43 +2,37 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 21 14:51:00 2024
-
+
@author: Myeongsang (Samuel) Lee
"""
-import re
-import Bio
-import os
-from os import listdir
-from os.path import isfile, join
-import sys
-from pathlib import Path
-import numpy as np
-from numpy import genfromtxt
-import matplotlib.pyplot as plt
+
import glob
+import os
import random
-import argparse
+import sys
-# call related modules of tmtools after installation
-from tmtools import tm_align
-from tmtools.io import get_structure, get_residue_data
-from tmtools.testing import get_pdb_path
+import numpy as np
# call converting the multimer as a single chain structure
-from convert_multi_single import *
+from ..utils.convert_multi_single import convert_m2s
# call converting the multimer as a separated chains
-from split_multi_single import *
+# from ..utils.split_multi_single import *
+
+# call related modules of tmtools after installation
+from tmtools import tm_align
+from tmtools.io import get_residue_data, get_structure
+from tmtools.testing import get_pdb_path
-class TM_score_monomer():
+class TM_score_monomer:
def __init__(self, pred_dir, pdb1_name, pdb2_name):
-
+
## loading reference pdb for TM-score
- pwd = os.getcwd() + '/'
+ pwd = os.getcwd() + "/"
tmscores_monomer = []
- files_list = (glob.glob(str(pred_dir) + "/*_unrelaxed*pdb"))
+ files_list = glob.glob(str(pred_dir) + "/*_unrelaxed*pdb")
print(files_list)
##### pdb1_name part
@@ -51,15 +45,14 @@ def __init__(self, pred_dir, pdb1_name, pdb2_name):
return tmscores_monomer
for model in files_list:
- model = model.replace('.pdb','')
+ model = model.replace(".pdb", "")
model = pwd + model
s = get_structure(get_pdb_path(model))
coords1, seq1 = get_residue_data(s)
res = tm_align(coords1, coords2, seq1, seq2)
- tmscore = round(res.tm_norm_chain1,5) # wrt to model
+ tmscore = round(res.tm_norm_chain1, 5) # wrt to model
tmscores_monomer.append(tmscore)
-
##### pdb2_name part
pdb2_dir = pwd + pdb2_name
r3 = get_structure(get_pdb_path(str(pdb2_dir)))
@@ -70,39 +63,38 @@ def __init__(self, pred_dir, pdb1_name, pdb2_name):
return tmscores_monomer
for model in files_list:
- model = model.replace('.pdb','')
+ model = model.replace(".pdb", "")
model = pwd + model
s = get_structure(get_pdb_path(model))
coords1, seq1 = get_residue_data(s)
res = tm_align(coords1, coords2, seq1, seq2)
- tmscore = round(res.tm_norm_chain1,5) # wrt to model
+ tmscore = round(res.tm_norm_chain1, 5) # wrt to model
tmscores_monomer.append(tmscore)
print(tmscores_monomer)
self.tmscores_monomer = tmscores_monomer
-class TM_score_multimer():
+class TM_score_multimer:
def __init__(self, pred_dir, pdb1_name, pdb2_name):
## loading reference pdb for TM-score
- pwd = os.getcwd() + '/'
+ pwd = os.getcwd() + "/"
tmscores_multimer = []
##### convert the multimer file as a single structure
- check_files_list = (glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*.pdb"))
+ check_files_list = glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*.pdb")
print(check_files_list)
if not check_files_list:
convert_m2s(pred_dir, pdb1_name, pdb2_name)
- files_list = (glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*.pdb"))
+ files_list = glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*.pdb")
print(files_list)
else:
- files_list = (glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*.pdb"))
+ files_list = glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*.pdb")
print(files_list)
-
##### pdb2_name part
- pdb2_dir = pwd + pdb2_name + '_rmTER'
+ pdb2_dir = pwd + pdb2_name + "_rmTER"
r3 = get_structure(get_pdb_path(str(pdb2_dir)))
coords2, seq2 = get_residue_data(r3)
@@ -111,17 +103,16 @@ def __init__(self, pred_dir, pdb1_name, pdb2_name):
return tmscores_multimer
for model in files_list:
- model = model.replace('.pdb','')
+ model = model.replace(".pdb", "")
model = pwd + model
s = get_structure(get_pdb_path(model))
coords1, seq1 = get_residue_data(s)
res = tm_align(coords1, coords2, seq1, seq2)
- tmscore = round(res.tm_norm_chain1,5) # wrt to model
+ tmscore = round(res.tm_norm_chain1, 5) # wrt to model
tmscores_multimer.append(tmscore)
print(tmscores_multimer)
-
##### pdb1_name part
pdb1_dir = pwd + pdb1_name
r2 = get_structure(get_pdb_path(str(pdb1_dir)))
@@ -132,77 +123,102 @@ def __init__(self, pred_dir, pdb1_name, pdb2_name):
return tmscores_multimer
for model in files_list:
- model = model.replace('.pdb','')
+ model = model.replace(".pdb", "")
model = pwd + model
s = get_structure(get_pdb_path(model))
coords1, seq1 = get_residue_data(s)
res = tm_align(coords1, coords2, seq1, seq2)
- tmscore = round(res.tm_norm_chain1,5) # wrt to model
+ tmscore = round(res.tm_norm_chain1, 5) # wrt to model
tmscores_multimer.append(tmscore)
self.tmscores_multimer = tmscores_multimer
-
-
-
-
-class CF_MSA_max():
+class CF_MSA_max:
def __init__(self, search_dir, output_dir, pdb_name, rseed, num_seeds, model_type):
-
- command = 'colabfold_batch --num-seeds ' + str(num_seeds) + ' --model-type alphafold2_ptm --random-seed ' + str(rseed) + search_dir + output_dir
+ command = (
+ "colabfold_batch --num-seeds "
+ + str(num_seeds)
+ + " --model-type alphafold2_ptm --random-seed "
+ + str(rseed)
+ + search_dir
+ + output_dir
+ )
print(command)
os.system(command)
-
-
-class CF_MSA_var():
+class CF_MSA_var:
def __init__(self, pdb1_name, pdb2_name, search_dir, output_dir, rseed, num_seeds, model_type):
#### shallow MSA section
#### Global viarlable
- max_msa = 1; ext_msa = 2
+ max_msa = 1
+ ext_msa = 2
random_seed = rseed
- self.pdb1_name = pdb1_name; self.pdb2_name = pdb2_name
+ self.pdb1_name = pdb1_name
+ self.pdb2_name = pdb2_name
for multi in (1, 2, 2, 2, 2, 2, 2):
max_msa = int(max_msa * multi)
ext_msa = int(ext_msa * multi)
-
+
#### Colabfold part
- command = 'colabfold_batch --num-seeds ' + str(num_seeds) + ' --model-type ' + str(model_type) + ' --max-seq ' + str(max_msa) + ' --max-extra-seq ' + str(ext_msa) + search_dir + output_dir + str(random_seed) + '_max_' + str(max_msa) + '_ext_' + str(ext_msa)
- print(command); os.system(command)
-
-
+ command = (
+ "colabfold_batch --num-seeds "
+ + str(num_seeds)
+ + " --model-type "
+ + str(model_type)
+ + " --max-seq "
+ + str(max_msa)
+ + " --max-extra-seq "
+ + str(ext_msa)
+ + search_dir
+ + output_dir
+ + str(random_seed)
+ + "_max_"
+ + str(max_msa)
+ + "_ext_"
+ + str(ext_msa)
+ )
+ print(command)
+ os.system(command)
def cal_TM_score_multi(self, pdb1_name, pdb2_name, num_seeds, search_dir, output_dir, rseed):
-
- max_msa = 1; ext_msa = 2
- multi_size = 0; random_seed = rseed
- TMscore_multi = []; TMscore_multi_average = np.zeros((7, 1))
+ max_msa = 1
+ ext_msa = 2
+ TMscore_multi = []
+ TMscore_multi_average = np.zeros((7, 1))
for multi in (1, 2, 2, 2, 2, 2, 2):
max_msa = int(max_msa * multi)
ext_msa = int(ext_msa * multi)
- fin_pred_dir = pdb1_name + '_predicted_models_rand_' + str(rseed) + '_max_' + str(max_msa) + '_ext_' + str(ext_msa)
- fin_pred_dir_all = pdb1_name + '_predicted_models_rand_' + str(rseed) + '_max_*'
- pred_files_list = (glob.glob(str(fin_pred_dir) + "/*_unrelaxed*pdb"))
-
+ fin_pred_dir = (
+ pdb1_name
+ + "_predicted_models_rand_"
+ + str(rseed)
+ + "_max_"
+ + str(max_msa)
+ + "_ext_"
+ + str(ext_msa)
+ )
+ fin_pred_dir_all = pdb1_name + "_predicted_models_rand_" + str(rseed) + "_max_*"
+ pred_files_list = glob.glob(str(fin_pred_dir) + "/*_unrelaxed*pdb")
+
if len(pred_files_list) == 0:
print("The TMscore list is empty")
tmp = np.zeros((1, 25))
TMscore_multi = np.append(TMscore_multi, tmp)
else:
run_TMscore_multi = TM_score_multimer(fin_pred_dir, pdb1_name, pdb2_name)
- TMscore_multi = np.append(TMscore_multi, run_TMscore_multi.tmscores_multimer); print(TMscore_multi)
-
+ TMscore_multi = np.append(TMscore_multi, run_TMscore_multi.tmscores_multimer)
+ print(TMscore_multi)
TMscore_multi = TMscore_multi.reshape(7 * 2, num_seeds * 5)
- np.savetxt('TMScore_random-MSA_' + pdb1_name + '.csv', TMscore_multi, fmt='%2.3f')
+ np.savetxt("TMScore_random-MSA_" + pdb1_name + ".csv", TMscore_multi, fmt="%2.3f")
-
- print("TMscore multimer:"); print(TMscore_multi)
+ print("TMscore multimer:")
+ print(TMscore_multi)
if np.any(TMscore_multi > 0.4):
tmp_cnt = 0
@@ -210,54 +226,55 @@ def cal_TM_score_multi(self, pdb1_name, pdb2_name, num_seeds, search_dir, output
TMscore_multi_average[tmp_cnt] = np.average(TMscore_multi[i])
tmp_cnt = tmp_cnt + 1
-
location = np.argmax(np.max(TMscore_multi_average, axis=1))
- print("The selected size of shallow random MSA is: ", np.argmax(np.max(TMscore_multi_average, axis=1)))
+ print(
+ "The selected size of shallow random MSA is: ",
+ np.argmax(np.max(TMscore_multi_average, axis=1)),
+ )
self.size_selection = int(location)
- mv_command = 'mv ' + fin_pred_dir_all + ' multimer_prediction/' + pdb1_name
- print(mv_command); os.system(mv_command)
-
+ mv_command = "mv " + fin_pred_dir_all + " multimer_prediction/" + pdb1_name
+ print(mv_command)
+ os.system(mv_command)
else:
print("All calculated TMscores are not satisfying the creteria")
print("All process is done.")
- mv_command = 'mv ' + fin_pred_dir + ' failed_prediction/'; os.system(mv_command)
+ mv_command = "mv " + fin_pred_dir + " failed_prediction/"
+ os.system(mv_command)
sys.exit()
-
-
-
-class prediction_all_multimer():
+class prediction_all_multimer:
def __init__(self, pdb1_name, pdb2_name, search_dir, nMSA, model_type, search_multi_dir):
### note: pdb1_name should be nomomer and pdb2_name should be multimer
num_seeds = 5 + nMSA
- TER_count = 0
- pwd = os.getcwd() + '/'
- rm_converted_pdb = 'rm ' + pdb2_name + '_rmTER.pdb'; os.system(rm_converted_pdb)
-
+ rm_converted_pdb = "rm " + pdb2_name + "_rmTER.pdb"
+ os.system(rm_converted_pdb)
##############################################################
##### Predicting all CF-random runs before calculate TM-scores
- ##### Predicting the monomer with deep MSA
- #pre_random_seed = np.arange(0, 10, 1)
+ ##### Predicting the monomer with deep MSA
+ # pre_random_seed = np.arange(0, 10, 1)
pre_random_seed = random.sample(range(10), 1)
- random_seed_full_MSA = ''.join(map(str, pre_random_seed))
- output_dir = ' ' + pdb1_name + '_predicted_models_full_rand_' + str(random_seed_full_MSA)
+ random_seed_full_MSA = "".join(map(str, pre_random_seed))
+ output_dir = " " + pdb1_name + "_predicted_models_full_rand_" + str(random_seed_full_MSA)
##### Perform predction with full-length MSA
- MSA_full = CF_MSA_max(search_dir, output_dir, pdb1_name, random_seed_full_MSA, num_seeds, model_type)
+ MSA_full = CF_MSA_max(
+ search_dir, output_dir, pdb1_name, random_seed_full_MSA, num_seeds, model_type
+ )
##### Predicting the multimer with shallow random MSAs
##### check out varied-MSA with (msa-max: 1, 2, 4, 8, 16, 32, 64) (msa-extra: 2, 4, 8, 16, 32, 64, 128)
- output_dir = ' ' + pdb1_name + '_predicted_models_rand_'
+ output_dir = " " + pdb1_name + "_predicted_models_rand_"
random_seed = random.sample(range(100), 1)
- random_seed = ''.join(map(str, random_seed))
- search_dir_update = ' ' + search_multi_dir.replace(' ','') + ' '
-
- MSA_var = CF_MSA_var(pdb1_name, pdb2_name, search_dir_update, output_dir, random_seed, num_seeds, model_type)
+ random_seed = "".join(map(str, random_seed))
+ search_dir_update = " " + search_multi_dir.replace(" ", "") + " "
+ MSA_var = CF_MSA_var(
+ pdb1_name, pdb2_name, search_dir_update, output_dir, random_seed, num_seeds, model_type
+ )
################################################################
##### Calculating all TM-scores including monomer and multimer
@@ -265,33 +282,37 @@ def __init__(self, pdb1_name, pdb2_name, search_dir, nMSA, model_type, search_mu
TMscore_monomer = []
# Directory section
- gen_dir = 'multimer_prediction/' + pdb1_name
+ gen_dir = "multimer_prediction/" + pdb1_name
if not os.path.exists(gen_dir):
os.mkdir(gen_dir)
- pred_dir = pdb1_name + '*predicted_models_full*'
+ pred_dir = pdb1_name + "*predicted_models_full*"
##### Calculate TM-score of monomer
run_TMscore = TM_score_monomer(pred_dir, pdb1_name, pdb2_name)
- TMscore_monomer = np.array(run_TMscore.tmscores_monomer)
- TMscore_monomer = TMscore_monomer.reshape(2, num_seeds * 5); print(TMscore_monomer)
-
+ TMscore_monomer = np.array(run_TMscore.tmscores_monomer)
+ TMscore_monomer = TMscore_monomer.reshape(2, num_seeds * 5)
+ print(TMscore_monomer)
##### TM-score calculation for multimer
if np.any(TMscore_monomer > 0.5):
- pred_dir = pdb1_name + '_predicted_models_full_rand_' + str(random_seed_full_MSA) + '/'
- mv_folder_cmd = 'mv ' + pred_dir + ' multimer_prediction/' + pdb1_name
- print(mv_folder_cmd); os.system(mv_folder_cmd)
- np.savetxt('TMScore_full-MSA_' + pdb1_name + '.csv', TMscore_monomer, fmt='%2.3f')
-
- MSA_var.cal_TM_score_multi(pdb1_name, pdb2_name, num_seeds, search_dir_update, output_dir, random_seed)
- print(MSA_var.size_selection); self.size_selection = MSA_var.size_selection
+ pred_dir = pdb1_name + "_predicted_models_full_rand_" + str(random_seed_full_MSA) + "/"
+ mv_folder_cmd = "mv " + pred_dir + " multimer_prediction/" + pdb1_name
+ print(mv_folder_cmd)
+ os.system(mv_folder_cmd)
+ np.savetxt("TMScore_full-MSA_" + pdb1_name + ".csv", TMscore_monomer, fmt="%2.3f")
+
+ MSA_var.cal_TM_score_multi(
+ pdb1_name, pdb2_name, num_seeds, search_dir_update, output_dir, random_seed
+ )
+ print(MSA_var.size_selection)
+ self.size_selection = MSA_var.size_selection
else:
- pred_dir = pdb1_name + '_predicted_models*_rand_*/'
- mv_command = 'mv ' + pred_dir + ' failed_prediction/';
- print(mv_command); os.system(mv_command)
+ pred_dir = pdb1_name + "_predicted_models*_rand_*/"
+ mv_command = "mv " + pred_dir + " failed_prediction/"
+ print(mv_command)
+ os.system(mv_command)
print("Deep MSA cannot find the monomer")
sys.exit()
-
diff --git a/code/pred_cal_tmscore_multimer_FS.py b/cf_random/prediction/pred_cal_tmscore_multimer_fs.py
similarity index 50%
rename from code/pred_cal_tmscore_multimer_FS.py
rename to cf_random/prediction/pred_cal_tmscore_multimer_fs.py
index 9fb047b..d861da1 100644
--- a/code/pred_cal_tmscore_multimer_FS.py
+++ b/cf_random/prediction/pred_cal_tmscore_multimer_fs.py
@@ -2,47 +2,41 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 21 14:51:00 2024
-
+
@author: Myeongsang (Samuel) Lee
"""
-import re
-import Bio
-import os
-from os import listdir
-from os.path import isfile, join
-import sys
-from pathlib import Path
-import numpy as np
-from numpy import genfromtxt
-import matplotlib.pyplot as plt
+
import glob
+import os
import random
-import argparse
+import sys
-# call related modules of tmtools after installation
-from tmtools import tm_align
-from tmtools.io import get_structure, get_residue_data
-from tmtools.testing import get_pdb_path
+import numpy as np
+from ..analysis.cal_tmscore_fs_multimer import TM_score_fs_multi
# call calculating TM-scores of fs region
-from cal_tmscore_fs_only import *
-from cal_tmscore_fs_multimer import *
+from ..analysis.cal_tmscore_fs_only import TM_score_fs
# call converting the multimer as a single chain structure
-from convert_multi_single import *
+from ..utils.convert_multi_single import convert_m2s
# call converting the multimer as a separated chains
-from split_multi_single import *
+# from ..utils.split_multi_single import *
+
+# call related modules of tmtools after installation
+from tmtools import tm_align
+from tmtools.io import get_residue_data, get_structure
+from tmtools.testing import get_pdb_path
-class TM_score_monomer():
+class TM_score_monomer:
def __init__(self, pred_dir, pdb1_name, pdb2_name):
-
+
## loading reference pdb for TM-score
- pwd = os.getcwd() + '/'
+ pwd = os.getcwd() + "/"
tmscores_monomer = []
- files_list = (glob.glob(str(pred_dir) + "/*_unrelaxed*pdb"))
+ files_list = glob.glob(str(pred_dir) + "/*_unrelaxed*pdb")
print(files_list)
##### pdb1_name part
@@ -55,17 +49,16 @@ def __init__(self, pred_dir, pdb1_name, pdb2_name):
return tmscores_monomer
for model in files_list:
- #modelpath = Path(model)
- #model = str(modelpath.parent) + "/" + modelpath.stem
- model = model.replace('.pdb','')
+ # modelpath = Path(model)
+ # model = str(modelpath.parent) + "/" + modelpath.stem
+ model = model.replace(".pdb", "")
model = pwd + model
s = get_structure(get_pdb_path(model))
coords1, seq1 = get_residue_data(s)
res = tm_align(coords1, coords2, seq1, seq2)
- tmscore = round(res.tm_norm_chain1,5) # wrt to model
+ tmscore = round(res.tm_norm_chain1, 5) # wrt to model
tmscores_monomer.append(tmscore)
-
##### pdb2_name part
pdb2_dir = pwd + pdb2_name
r3 = get_structure(get_pdb_path(str(pdb2_dir)))
@@ -76,45 +69,44 @@ def __init__(self, pred_dir, pdb1_name, pdb2_name):
return tmscores_monomer
for model in files_list:
- #modelpath = Path(model)
- #model = str(modelpath.parent) + "/" + modelpath.stem
- #model = model.replace('_converted.pdb','_converted')
- model = model.replace('.pdb','')
+ # modelpath = Path(model)
+ # model = str(modelpath.parent) + "/" + modelpath.stem
+ # model = model.replace('_converted.pdb','_converted')
+ model = model.replace(".pdb", "")
model = pwd + model
s = get_structure(get_pdb_path(model))
coords1, seq1 = get_residue_data(s)
res = tm_align(coords1, coords2, seq1, seq2)
- tmscore = round(res.tm_norm_chain1,5) # wrt to model
+ tmscore = round(res.tm_norm_chain1, 5) # wrt to model
tmscores_monomer.append(tmscore)
print(tmscores_monomer)
self.tmscores_monomer = tmscores_monomer
-class TM_score_multimer():
+class TM_score_multimer:
def __init__(self, pred_dir, pdb1_name, pdb2_name):
## loading reference pdb for TM-score
- pwd = os.getcwd() + '/'
+ pwd = os.getcwd() + "/"
tmscores_multimer = []
##### convert the multimer file as a single structure
- check_files_list = (glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*.pdb"))
- #check_files_list = (glob.glob(str(pred_dir) + "/*_unrelaxed*pdb"))
+ check_files_list = glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*.pdb")
+ # check_files_list = (glob.glob(str(pred_dir) + "/*_unrelaxed*pdb"))
print(check_files_list)
if not check_files_list:
convert_m2s(pred_dir, pdb1_name, pdb2_name)
- files_list = (glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*.pdb"))
- #files_list = (glob.glob(str(pred_dir) + "/*_unrelaxed*pdb"))
+ files_list = glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*.pdb")
+ # files_list = (glob.glob(str(pred_dir) + "/*_unrelaxed*pdb"))
print(files_list)
else:
- files_list = (glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*.pdb"))
- #files_list = (glob.glob(str(pred_dir) + "/*_unrelaxed*pdb"))
+ files_list = glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*.pdb")
+ # files_list = (glob.glob(str(pred_dir) + "/*_unrelaxed*pdb"))
print(files_list)
-
##### pdb2_name part
- pdb2_dir = pwd + pdb2_name + '_rmTER'
+ pdb2_dir = pwd + pdb2_name + "_rmTER"
r3 = get_structure(get_pdb_path(str(pdb2_dir)))
coords2, seq2 = get_residue_data(r3)
@@ -123,20 +115,19 @@ def __init__(self, pred_dir, pdb1_name, pdb2_name):
return tmscores_multimer
for model in files_list:
- #modelpath = Path(model)
- #model = str(modelpath.parent) + "/" + modelpath.stem
- #model = model.replace('_converted.pdb','_converted')
- model = model.replace('.pdb','')
+ # modelpath = Path(model)
+ # model = str(modelpath.parent) + "/" + modelpath.stem
+ # model = model.replace('_converted.pdb','_converted')
+ model = model.replace(".pdb", "")
model = pwd + model
s = get_structure(get_pdb_path(model))
coords1, seq1 = get_residue_data(s)
res = tm_align(coords1, coords2, seq1, seq2)
- tmscore = round(res.tm_norm_chain1,5) # wrt to model
+ tmscore = round(res.tm_norm_chain1, 5) # wrt to model
tmscores_multimer.append(tmscore)
print(tmscores_multimer)
-
##### pdb1_name part
pdb1_dir = pwd + pdb1_name
r2 = get_structure(get_pdb_path(str(pdb1_dir)))
@@ -147,67 +138,98 @@ def __init__(self, pred_dir, pdb1_name, pdb2_name):
return tmscores_multimer
for model in files_list:
- #modelpath = Path(model)
- #model = str(modelpath.parent) + "/" + modelpath.stem
- model = model.replace('.pdb','')
- #model = model.replace('.pdb','')
+ # modelpath = Path(model)
+ # model = str(modelpath.parent) + "/" + modelpath.stem
+ model = model.replace(".pdb", "")
+ # model = model.replace('.pdb','')
model = pwd + model
s = get_structure(get_pdb_path(model))
coords1, seq1 = get_residue_data(s)
res = tm_align(coords1, coords2, seq1, seq2)
- tmscore = round(res.tm_norm_chain1,5) # wrt to model
+ tmscore = round(res.tm_norm_chain1, 5) # wrt to model
tmscores_multimer.append(tmscore)
self.tmscores_multimer = tmscores_multimer
-
-
-
-
-class CF_MSA_max():
+class CF_MSA_max:
def __init__(self, search_dir, output_dir, pdb_name, rseed, num_seeds, model_type):
- command = 'colabfold_batch --num-seeds ' + str(num_seeds) + ' --model-type alphafold2_ptm --random-seed ' + str(rseed) + search_dir + output_dir
+ command = (
+ "colabfold_batch --num-seeds "
+ + str(num_seeds)
+ + " --model-type alphafold2_ptm --random-seed "
+ + str(rseed)
+ + search_dir
+ + output_dir
+ )
print(command)
os.system(command)
-
-
-class CF_MSA_var():
+class CF_MSA_var:
def __init__(self, pdb1_name, pdb2_name, search_dir, output_dir, rseed, num_seeds, model_type):
#### shallow MSA section
#### Global viarlable
- max_msa = 1; ext_msa = 2
+ max_msa = 1
+ ext_msa = 2
random_seed = rseed
- self.pdb1_name = pdb1_name; self.pdb2_name = pdb2_name
+ self.pdb1_name = pdb1_name
+ self.pdb2_name = pdb2_name
for multi in (1, 2, 2, 2, 2, 2, 2):
max_msa = int(max_msa * multi)
ext_msa = int(ext_msa * multi)
-
- #### Colabfold part
- command = 'colabfold_batch --num-seeds ' + str(num_seeds) + ' --model-type ' + str(model_type) + ' --max-seq ' + str(max_msa) + ' --max-extra-seq ' + str(ext_msa) + search_dir + output_dir + str(random_seed) + '_max_' + str(max_msa) + '_ext_' + str(ext_msa)
- print(command); os.system(command)
-
-
-
- def cal_TM_score_multi(self, pdb1_name, pdb2_name, num_seeds, search_dir, output_dir, rseed, pdb1, pdb2):
- max_msa = 1; ext_msa = 2
- multi_size = 0; random_seed = rseed
- TMscore_multi = []; TMscore_multi_average = np.zeros((7, 1))
- TMscore_multi_fs = []; TMscore_multi_fs_average = np.zeros((7, 1))
+ #### Colabfold part
+ command = (
+ "colabfold_batch --num-seeds "
+ + str(num_seeds)
+ + " --model-type "
+ + str(model_type)
+ + " --max-seq "
+ + str(max_msa)
+ + " --max-extra-seq "
+ + str(ext_msa)
+ + search_dir
+ + output_dir
+ + str(random_seed)
+ + "_max_"
+ + str(max_msa)
+ + "_ext_"
+ + str(ext_msa)
+ )
+ print(command)
+ os.system(command)
+
+ def cal_TM_score_multi(
+ self, pdb1_name, pdb2_name, num_seeds, search_dir, output_dir, rseed, pdb1, pdb2
+ ):
+
+ max_msa = 1
+ ext_msa = 2
+ TMscore_multi = []
+ TMscore_multi_average = np.zeros((7, 1))
+ TMscore_multi_fs = []
+ TMscore_multi_fs_average = np.zeros((7, 1))
for multi in (1, 2, 2, 2, 2, 2, 2):
max_msa = int(max_msa * multi)
ext_msa = int(ext_msa * multi)
- fin_pred_dir = pdb1_name + '_predicted_models_rand_' + str(rseed) + '_max_' + str(max_msa) + '_ext_' + str(ext_msa) + '/'
- fin_pred_dir_all = pdb1_name + '_predicted_models_rand_' + str(rseed) + '_max_*'
- pred_files_list = (glob.glob(str(fin_pred_dir) + "/*_unrelaxed*pdb"))
-
+ fin_pred_dir = (
+ pdb1_name
+ + "_predicted_models_rand_"
+ + str(rseed)
+ + "_max_"
+ + str(max_msa)
+ + "_ext_"
+ + str(ext_msa)
+ + "/"
+ )
+ fin_pred_dir_all = pdb1_name + "_predicted_models_rand_" + str(rseed) + "_max_*"
+ pred_files_list = glob.glob(str(fin_pred_dir) + "/*_unrelaxed*pdb")
+
if len(pred_files_list) == 0:
print("The TMscore list is empty")
tmp = np.zeros((1, 25))
@@ -215,24 +237,39 @@ def cal_TM_score_multi(self, pdb1_name, pdb2_name, num_seeds, search_dir, output
TMscore_multi_fs = np.append(TMscore_multi_fs, tmp)
else:
run_TMscore_multi = TM_score_multimer(fin_pred_dir, pdb1_name, pdb2_name)
- TMscore_multi = np.append(TMscore_multi, run_TMscore_multi.tmscores_multimer); print(TMscore_multi)
- ##### for measuring the fold-switching region in multimer, just measure the TM-score of
- ##### the first chain in between predicted and reference file
- pdb2 = pdb2_name + '_rmTER.pdb'
- fin_fs_pred_dir = pdb1_name + '_predicted_models_rand_' + str(rseed) + '_max_' + str(max_msa) + '_ext_' + str(ext_msa) + '/'
+ TMscore_multi = np.append(TMscore_multi, run_TMscore_multi.tmscores_multimer)
+ print(TMscore_multi)
+ ##### for measuring the fold-switching region in multimer, just measure the TM-score of
+ ##### the first chain in between predicted and reference file
+ pdb2 = pdb2_name + "_rmTER.pdb"
+ fin_fs_pred_dir = (
+ pdb1_name
+ + "_predicted_models_rand_"
+ + str(rseed)
+ + "_max_"
+ + str(max_msa)
+ + "_ext_"
+ + str(ext_msa)
+ + "/"
+ )
print(fin_fs_pred_dir)
- run_TMscore_multi_fs = TM_score_fs_multi(fin_fs_pred_dir, pdb1, pdb1_name, pdb2, pdb2_name)
- TMscore_multi_fs = np.append(TMscore_multi_fs, run_TMscore_multi_fs.tmscores_fs ); print(TMscore_multi_fs)
+ run_TMscore_multi_fs = TM_score_fs_multi(
+ fin_fs_pred_dir, pdb1, pdb1_name, pdb2, pdb2_name
+ )
+ TMscore_multi_fs = np.append(TMscore_multi_fs, run_TMscore_multi_fs.tmscores_fs)
+ print(TMscore_multi_fs)
TMscore_multi = TMscore_multi.reshape(7 * 2, num_seeds * 5)
- np.savetxt('TMScore_random-MSA_' + pdb1_name + '.csv', TMscore_multi, fmt='%2.3f')
+ np.savetxt("TMScore_random-MSA_" + pdb1_name + ".csv", TMscore_multi, fmt="%2.3f")
TMscore_multi = TMscore_multi[::2]
TMscore_multi_fs = TMscore_multi_fs.reshape(7 * 2, num_seeds * 5)
- np.savetxt('TMScore_fs_random-MSA_' + pdb1_name + '.csv', TMscore_multi_fs, fmt='%2.3f')
+ np.savetxt("TMScore_fs_random-MSA_" + pdb1_name + ".csv", TMscore_multi_fs, fmt="%2.3f")
TMscore_multi_fs = TMscore_multi_fs[1::2]
- print("TMscore multimer:"); print(TMscore_multi)
- print("TMscore fold-switching region in multimer:"); print(TMscore_multi_fs)
+ print("TMscore multimer:")
+ print(TMscore_multi)
+ print("TMscore fold-switching region in multimer:")
+ print(TMscore_multi_fs)
if np.any(TMscore_multi > 0.4) and np.any(TMscore_multi_fs > 0.4):
tmp_cnt = 0
@@ -242,50 +279,57 @@ def cal_TM_score_multi(self, pdb1_name, pdb2_name, num_seeds, search_dir, output
tmp_cnt = tmp_cnt + 1
location = np.argmax(np.max(TMscore_multi_average, axis=1))
- print("The selected size of shallow random MSA is: ", np.argmax(np.max(TMscore_multi_fs_average, axis=1)))
+ print(
+ "The selected size of shallow random MSA is: ",
+ np.argmax(np.max(TMscore_multi_fs_average, axis=1)),
+ )
self.size_selection = int(location)
- mv_command = 'mv ' + fin_pred_dir_all + ' multimer_prediction/' + pdb1_name
- print(mv_command); os.system(mv_command)
-
+ mv_command = "mv " + fin_pred_dir_all + " multimer_prediction/" + pdb1_name
+ print(mv_command)
+ os.system(mv_command)
else:
print("All calculated TMscores are not satisfying the creteria")
print("All process is done.")
- mv_command = 'mv ' + fin_pred_dir_all + ' failed_prediction/'; os.system(mv_command)
+ mv_command = "mv " + fin_pred_dir_all + " failed_prediction/"
+ os.system(mv_command)
sys.exit()
-
-class prediction_all_multimer_FS():
- def __init__(self, pdb1_name, pdb2_name, search_dir, nMSA, model_type, search_multi_dir, pdb1, pdb2):
+class prediction_all_multimer_FS:
+ def __init__(
+ self, pdb1_name, pdb2_name, search_dir, nMSA, model_type, search_multi_dir, pdb1, pdb2
+ ):
### note: pdb1_name should be nomomer and pdb2_name should be multimer
num_seeds = 5 + nMSA
TER_count = 0
- pwd = os.getcwd() + '/'
- rm_converted_pdb = 'rm ' + pdb2_name + '_rmTER.pdb'; os.system(rm_converted_pdb)
-
+ rm_converted_pdb = "rm " + pdb2_name + "_rmTER.pdb"
+ os.system(rm_converted_pdb)
##############################################################
##### Predicting all CF-random runs before calculate TM-scores
- ##### Predicting the monomer with deep MSA
- #pre_random_seed = np.arange(0, 10, 1)
+ ##### Predicting the monomer with deep MSA
+ # pre_random_seed = np.arange(0, 10, 1)
pre_random_seed = random.sample(range(10), 1)
- random_seed_full_MSA = ''.join(map(str, pre_random_seed))
- output_dir = ' ' + pdb1_name + '_predicted_models_full_rand_' + str(random_seed_full_MSA)
+ random_seed_full_MSA = "".join(map(str, pre_random_seed))
+ output_dir = " " + pdb1_name + "_predicted_models_full_rand_" + str(random_seed_full_MSA)
##### Perform predction with full-length MSA
- MSA_full = CF_MSA_max(search_dir, output_dir, pdb1_name, random_seed_full_MSA, num_seeds, model_type)
+ MSA_full = CF_MSA_max(
+ search_dir, output_dir, pdb1_name, random_seed_full_MSA, num_seeds, model_type
+ )
##### Predicting the multimer with shallow random MSAs
##### check out varied-MSA with (msa-max: 1, 2, 4, 8, 16, 32, 64) (msa-extra: 2, 4, 8, 16, 32, 64, 128)
- output_dir = ' ' + pdb1_name + '_predicted_models_rand_'
+ output_dir = " " + pdb1_name + "_predicted_models_rand_"
random_seed = random.sample(range(100), 1)
- random_seed = ''.join(map(str, random_seed))
- search_dir_update = ' ' + search_multi_dir.replace(' ','') + ' '
-
- MSA_var = CF_MSA_var(pdb1_name, pdb2_name, search_dir_update, output_dir, random_seed, num_seeds, model_type)
+ random_seed = "".join(map(str, random_seed))
+ search_dir_update = " " + search_multi_dir.replace(" ", "") + " "
+ MSA_var = CF_MSA_var(
+ pdb1_name, pdb2_name, search_dir_update, output_dir, random_seed, num_seeds, model_type
+ )
################################################################
##### Calculating all TM-scores including monomer and multimer
@@ -293,30 +337,28 @@ def __init__(self, pdb1_name, pdb2_name, search_dir, nMSA, model_type, search_mu
TMscore_monomer = []
# Directory section
- gen_dir = 'multimer_prediction/' + pdb1_name
+ gen_dir = "multimer_prediction/" + pdb1_name
if not os.path.exists(gen_dir):
os.mkdir(gen_dir)
- pred_dir = pdb1_name + '*predicted_models_full*'
-
-
+ pred_dir = pdb1_name + "*predicted_models_full*"
##### Calculating the TM-score of fold-switching region
##### Extracting a signle chain from a multimer
TER_count = 0
- with open(pdb2, 'r') as file:
+ with open(pdb2, "r") as file:
for line in file:
TER = line.split()
TER_count += TER.count("TER")
-
+
line_cnt = 0
- #for i in range(0, TER_count):
+ # for i in range(0, TER_count):
for i in range(0, 2):
- output_file_name = pdb2_name.split('_')[0] + '_multi.pdb'
-
+ output_file_name = pdb2_name.split("_")[0] + "_multi.pdb"
+
if line_cnt == 0:
- with open(pdb2, 'r') as infile, open(output_file_name, 'w') as outfile:
+ with open(pdb2, "r") as infile, open(output_file_name, "w") as outfile:
for line in infile:
outfile.write(line)
line_cnt = line_cnt + 1
@@ -324,33 +366,44 @@ def __init__(self, pdb1_name, pdb2_name, search_dir, nMSA, model_type, search_mu
line_cnt = line_cnt + 1
break
- pdb2_name_multi = output_file_name.replace('.pdb','')
-
+ pdb2_name_multi = output_file_name.replace(".pdb", "")
##### Calculate TM-score of monomer
run_TMscore = TM_score_monomer(pred_dir, pdb1_name, pdb2_name)
- TMscore_monomer = np.array(run_TMscore.tmscores_monomer)
- TMscore_monomer = TMscore_monomer.reshape(2, num_seeds * 5); print(TMscore_monomer)
+ TMscore_monomer = np.array(run_TMscore.tmscores_monomer)
+ TMscore_monomer = TMscore_monomer.reshape(2, num_seeds * 5)
+ print(TMscore_monomer)
##### Calculate TM-score of fold-switching region
run_fs_TMscore = TM_score_fs(pred_dir, pdb1, pdb1_name, output_file_name, pdb2_name_multi)
TMscore_monomer_fs = np.array(run_fs_TMscore.tmscores_fs)
- TMscore_monomer_fs = TMscore_monomer_fs.reshape(2, num_seeds * 5); print(TMscore_monomer_fs)
+ TMscore_monomer_fs = TMscore_monomer_fs.reshape(2, num_seeds * 5)
+ print(TMscore_monomer_fs)
-
##### TM-score calculation for multimer
if np.any(TMscore_monomer[0, :] >= 0.5) and np.any(TMscore_monomer_fs[0, :] >= 0.4):
- pred_dir = pdb1_name + '_predicted_models_full_rand_' + str(random_seed_full_MSA) + '/'
- mv_folder_cmd = 'mv ' + pred_dir + ' multimer_prediction/' + pdb1_name
- print(mv_folder_cmd); os.system(mv_folder_cmd)
- np.savetxt('TMScore_full-MSA_' + pdb1_name + '.csv', TMscore_monomer, fmt='%2.3f')
-
- MSA_var.cal_TM_score_multi(pdb1_name, pdb2_name_multi, num_seeds, search_dir_update, output_dir, random_seed, pdb1, output_file_name)
- print(MSA_var.size_selection); self.size_selection = MSA_var.size_selection
-
+ pred_dir = pdb1_name + "_predicted_models_full_rand_" + str(random_seed_full_MSA) + "/"
+ mv_folder_cmd = "mv " + pred_dir + " multimer_prediction/" + pdb1_name
+ print(mv_folder_cmd)
+ os.system(mv_folder_cmd)
+ np.savetxt("TMScore_full-MSA_" + pdb1_name + ".csv", TMscore_monomer, fmt="%2.3f")
+
+ MSA_var.cal_TM_score_multi(
+ pdb1_name,
+ pdb2_name_multi,
+ num_seeds,
+ search_dir_update,
+ output_dir,
+ random_seed,
+ pdb1,
+ output_file_name,
+ )
+ print(MSA_var.size_selection)
+ self.size_selection = MSA_var.size_selection
else:
- pred_dir = pdb1_name + '_predicted_models*_rand_*/'
- mv_command = 'mv ' + pred_dir + ' failed_prediction/';
- print(mv_command); os.system(mv_command)
+ pred_dir = pdb1_name + "_predicted_models*_rand_*/"
+ mv_command = "mv " + pred_dir + " failed_prediction/"
+ print(mv_command)
+ os.system(mv_command)
print("Deep MSA cannot find the monomer")
sys.exit()
diff --git a/code/range_fs_pairs_all.txt b/cf_random/range_fs_pairs_all.txt
similarity index 100%
rename from code/range_fs_pairs_all.txt
rename to cf_random/range_fs_pairs_all.txt
diff --git a/cf_random/utils/__init__.py b/cf_random/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/cf_random/utils/convert_multi_single.py b/cf_random/utils/convert_multi_single.py
new file mode 100644
index 0000000..0986ebb
--- /dev/null
+++ b/cf_random/utils/convert_multi_single.py
@@ -0,0 +1,64 @@
+#!/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+
+Converting the multimer PDB to a single PDB file
+
+Created on Tue Dec 24 14:51:00 2025
+@author: Myeongsang (Samuel) Lee
+"""
+
+import glob
+import os
+
+
+class convert_m2s:
+ def __init__(self, pred_path, pdb1_name, pdb2_name):
+
+ files_list = glob.glob(str(pred_path) + "/*_unrelaxed*pdb")
+ print(files_list)
+ for fl in files_list:
+ fl_name = fl.replace(".pdb", "")
+ predicted_name = fl_name.split("/")[1]
+ # convert = "awk '!/TER/' " + fl + " > " + fl_name + "_converted.pdb"
+ convert = (
+ "awk '!/TER/' "
+ + fl
+ + " > "
+ + fl_name.split("/")[0]
+ + "/"
+ + "rmTER_"
+ + predicted_name
+ + ".pdb"
+ )
+ print(convert)
+ os.system(convert)
+
+ convert_pdb2 = "awk '!/TER/' " + pdb2_name + ".pdb > " + pdb2_name + "_rmTER.pdb"
+ print(convert_pdb2)
+ os.system(convert_pdb2)
+
+ ##### extract a single chain from multimer
+ TER_count = 0
+
+ for fl in files_list:
+ fl_name = fl.replace(".pdb", "")
+ predicted_name = fl_name.split("/")[1]
+
+ with open(fl, "r") as file:
+ for line in file:
+ TER = line.split()
+ TER_count += TER.count("TER")
+
+ line_cnt = 0
+ for i in range(0, 2):
+ output_file_name = fl_name.split("/")[0] + "/" + "single_" + predicted_name + ".pdb"
+
+ if line_cnt == 0:
+ with open(fl, "r") as infile, open(output_file_name, "w") as outfile:
+ for line in infile:
+ outfile.write(line)
+ line_cnt = line_cnt + 1
+ if "TER " in line:
+ line_cnt = line_cnt + 1
+ break
diff --git a/cf_random/utils/fs_seq_compare.py b/cf_random/utils/fs_seq_compare.py
new file mode 100644
index 0000000..f6f9ff1
--- /dev/null
+++ b/cf_random/utils/fs_seq_compare.py
@@ -0,0 +1,238 @@
+import glob
+import os
+
+import numpy as np
+import pandas as pd
+from Bio import *
+from Bio.PDB.PDBParser import PDBParser
+from thefuzz import fuzz
+
+
+class fs_range:
+ def first_res_check(self, pdb1, pdb2):
+ # self.pdb1 = pdb1; self.pdb2 = pdb2
+
+ ## first residue index check
+ structure_1 = PDBParser().get_structure("pdb1", pdb1)
+ model_1 = structure_1[0]
+ print(model_1)
+
+ structure_2 = PDBParser().get_structure("pdb2", pdb2)
+ model_2 = structure_2[0]
+ print(model_2)
+
+ res_index_1 = []
+ res_index_2 = []
+
+ for chain_1 in model_1:
+ for i, residue in enumerate(chain_1.get_residues()):
+ # res_id = list(residue.id)
+ res_index_1.append(residue.id[1])
+ # print(residue.id[1])
+
+ for chain_2 in model_2:
+ for i, residue in enumerate(chain_2.get_residues()):
+ res_index_2.append(residue.id[1])
+
+ # print(int(res_index_1[0]))
+ # print(int(res_index_2[0]))
+
+ self.pdb1_res_index_1 = int(res_index_1[0])
+ self.pdb2_res_index_1 = int(res_index_2[0])
+
+ def pydssp(self, crys_pdb, pred_pdb, number, pdb_name):
+
+ ##### generating the command for pydssp
+ number = str(number)
+ command = (
+ "pydssp " + crys_pdb + " " + pred_pdb + " -o output_" + pdb_name + "_" + number + ".log"
+ )
+ print(command)
+ os.system(command)
+
+ def res_check(self, pdb1, pdb2, pdb1_name, pdb2_name):
+ current_dir = os.getcwd() + "/"
+ range_file = current_dir + "range_fs_pairs_all.txt"
+
+ crys_fs_res_1 = {}
+ crys_fs_res_2 = {}
+ pred_fs_res_1 = {}
+ pred_fs_res_2 = {}
+
+ with open(range_file, "r") as Infile:
+ next(Infile) # skip header line "# pdb1,pdb2,pred1,pred2"
+ for line in Infile:
+ line = line.strip()
+ n1, n2, p1, p2, m1, m2 = line.split(",")
+ # the value of the dictionary is a tuple
+ # the first element of tuple is the fs range in the original PDB
+ # followed by the range in the predicted model
+ # if n1 == pdb1_name and n2 == pdb2_name:
+ if (n1 == pdb1_name and n2 == pdb2_name) or (n2 == pdb1_name and n1 == pdb2_name):
+ # fs_res_1 = (m1); fs_res_2 = (m2)
+ crys_fs_res_1 = p1
+ crys_fs_res_2 = p2
+ pred_fs_res_1 = m1
+ pred_fs_res_2 = m2
+
+ # fs_res_1_update = fs_res_1.split("-"); fs_res_2_update = fs_res_2.split("-");
+ # print(fs_res_1_update, fs_res_2_update)
+
+ crys_fs_res_1_update = crys_fs_res_1.split("-")
+ crys_fs_res_2_update = crys_fs_res_2.split("-")
+ print(crys_fs_res_1_update, crys_fs_res_2_update)
+ pred_fs_res_1_update = pred_fs_res_1.split("-")
+ pred_fs_res_2_update = pred_fs_res_2.split("-")
+ print(pred_fs_res_1_update, pred_fs_res_2_update)
+
+ ##### convert list data to int
+ self.crys_fs_res_1_update = [int(i) for i in crys_fs_res_1_update]
+ self.crys_fs_res_2_update = [int(i) for i in crys_fs_res_2_update]
+
+ self.pred_fs_res_1_update = [int(i) for i in pred_fs_res_1_update]
+ self.pred_fs_res_2_update = [int(i) for i in pred_fs_res_2_update]
+
+ def __init__(self, pdb1, pdb2, pdb1_name, pdb2_name, pred_dir):
+ ##### check first residue index of query proteins
+ # fs_check = fs_range(pdb1, pdb2)
+ self.first_res_check(pdb1, pdb2)
+ print(" ")
+ print("checking first residue index")
+ print(self.pdb1_res_index_1)
+ print(self.pdb2_res_index_1)
+
+ pred_folder = pred_dir
+ # pred_folder = '3hdf_A_predicted_models_full_rand_12'
+ pred_path = pred_folder
+ print(pred_path)
+
+ pred_files = glob.glob(str(pred_path) + "/*_unrelaxed*pdb")
+
+ ##### read range file information
+ self.res_check(pdb1, pdb2, pdb1_name, pdb2_name)
+ print(self.crys_fs_res_1_update, self.pred_fs_res_1_update)
+ print(self.crys_fs_res_2_update, self.pred_fs_res_2_update)
+
+ crys1_fs_res_st = self.crys_fs_res_1_update[0]
+ crys1_fs_res_ed = self.crys_fs_res_1_update[1]
+ crys2_fs_res_st = self.crys_fs_res_2_update[0]
+ crys2_fs_res_ed = self.crys_fs_res_2_update[1]
+ pred1_fs_res_st = self.pred_fs_res_1_update[0]
+ pred1_fs_res_ed = self.pred_fs_res_1_update[1]
+ pred2_fs_res_st = self.pred_fs_res_2_update[0]
+ pred2_fs_res_ed = self.pred_fs_res_2_update[1]
+
+ if int(self.pdb1_res_index_1) > 1:
+ print("Initial residue is not starting from 1")
+ self.crys_fs_res_1_update[0] = self.crys_fs_res_1_update[0] - int(self.pdb1_res_index_1)
+ self.crys_fs_res_1_update[1] = self.crys_fs_res_1_update[1] - int(self.pdb1_res_index_1)
+ crys1_fs_res_st = self.crys_fs_res_1_update[0]
+ crys1_fs_res_ed = self.crys_fs_res_1_update[1]
+
+ if int(self.pdb2_res_index_1) > 1:
+ print("Initial residue is not starting from 1")
+ self.crys_fs_res_2_update[0] = self.crys_fs_res_2_update[0] - int(self.pdb2_res_index_1)
+ self.crys_fs_res_2_update[1] = self.crys_fs_res_2_update[1] - int(self.pdb2_res_index_1)
+ crys2_fs_res_st = self.crys_fs_res_2_update[0]
+ crys2_fs_res_ed = self.crys_fs_res_2_update[1]
+
+ print("checking starting and ending residue number")
+ print("")
+ print("crystal structure")
+ print(crys1_fs_res_st, crys1_fs_res_ed)
+ print(crys2_fs_res_st, crys2_fs_res_ed)
+ print("")
+ print("predicted structure")
+ print(pred1_fs_res_st, pred1_fs_res_ed)
+ print(pred2_fs_res_st, pred2_fs_res_ed)
+
+ ##### perform pydssp and calculate secondary structure similarity
+ index = 0
+ print(np.size(pred_files))
+ print(" ")
+ print("calculating with pdb1 ", pdb1_name)
+ for model in pred_files:
+ print(model)
+ self.pydssp(pdb1, model, index, pdb1_name)
+ dssp_read_tmp = pd.read_csv(
+ "output_" + pdb1_name + "_" + str(index) + ".log", sep=" ", header=None
+ )
+ ## seq1 = crystal structure, seq2 = predicted structure
+ print(dssp_read_tmp)
+ print(dssp_read_tmp[0].iloc[0])
+ seq1 = dssp_read_tmp[0].iloc[0]
+ print(dssp_read_tmp[0].iloc[1])
+ seq2 = dssp_read_tmp[0].iloc[1]
+
+ # crystal protein 1 and predictions
+ print(" ")
+ print(seq1[crys1_fs_res_st:crys1_fs_res_ed])
+ print(seq2[pred2_fs_res_st:pred2_fs_res_ed])
+ if (
+ fuzz.ratio(
+ seq1[crys1_fs_res_st:crys1_fs_res_ed], seq2[pred2_fs_res_st:pred2_fs_res_ed]
+ )
+ > 85
+ ):
+ print("fs region is correctly predicted")
+ f = open("fs_compare_output_" + pdb1_name + ".log", "w")
+ f.write("success")
+ f.close()
+ break
+ elif index == (int(np.size(pred_files)) - 1):
+ print("fs region is not correctly predicted")
+
+ # command = 'mv ' + pred_dir_add + pred_dir_fal
+ # print(command); os.system(command)
+ # command = 'mv ' + pred_dir_suc + pred_dir_fal + pdb1_name + '/'
+ # print(command); os.system(command)
+
+ # command = 'rm *' + pdb1_name + '*csv'
+ # print(command); os.system(command)
+ print("calculating TM-score of fs with alternative pdb")
+
+ index = 0
+ print(" ")
+ print("calculating with pdb2 ", pdb2_name)
+
+ for model in pred_files:
+ self.pydssp(pdb2, model, index, pdb1_name)
+ dssp_read_tmp = pd.read_csv(
+ "output_" + pdb1_name + "_" + str(index) + ".log", sep=" ", header=None
+ )
+ ## seq1 = crystal structure, seq2 = predicted structure
+ print(dssp_read_tmp[0].iloc[0])
+ seq1 = dssp_read_tmp[0].iloc[0]
+ print(dssp_read_tmp[0].iloc[1])
+ seq2 = dssp_read_tmp[0].iloc[1]
+
+ # crystal protein 1 and predictions
+ print(" ")
+ print(seq1[crys2_fs_res_st:crys2_fs_res_ed])
+ print(seq2[pred2_fs_res_st:pred2_fs_res_ed])
+ if (
+ fuzz.ratio(
+ seq1[crys2_fs_res_st:crys2_fs_res_ed],
+ seq2[pred2_fs_res_st:pred2_fs_res_ed],
+ )
+ > 85
+ ):
+ print("fs region is correctly predicted")
+ break
+ elif index == (int(np.size(pred_files)) - 1):
+ print("fs region is not correctly predicted")
+
+ f = open("fs_compare_output_" + pdb1_name + ".log", "w")
+ f.write("fail")
+ f.close()
+
+ # command = 'mv ' + pred_dir_add + pred_dir_fal
+ # print(command); os.system(command)
+ # command = 'mv ' + pred_dir_suc + pred_dir_fal + pdb1_name + '/'
+ # print(command); os.system(command)
+
+ else:
+ index += 1
+
+ else:
+ index += 1
diff --git a/code/search_w_foldseek_cluster.py b/cf_random/utils/search_foldseek_cluster.py
similarity index 56%
rename from code/search_w_foldseek_cluster.py
rename to cf_random/utils/search_foldseek_cluster.py
index cfc3a84..5024742 100644
--- a/code/search_w_foldseek_cluster.py
+++ b/cf_random/utils/search_foldseek_cluster.py
@@ -1,108 +1,114 @@
import glob
-import shutil, os, sys, re
+import os
+import re
+import shutil
import subprocess
-import numpy as np
-import matplotlib.pyplot as plt
+import sys
+import matplotlib.pyplot as plt
+import MDAnalysis as mda
+import numpy as np
+from MDAnalysis.analysis.dssp import DSSP
from scipy import stats
from scipy.spatial import distance
-
+from sklearn.cluster import HDBSCAN
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score
-from sklearn.cluster import HDBSCAN
from sklearn.preprocessing import minmax_scale
-import MDAnalysis as mda
-from MDAnalysis.analysis.dssp import DSSP
+try:
+ import pymol
+except ImportError:
+ pymol = None
-import pymol
-
-class blind_screening():
+class blind_screening:
def cluster_structures(X):
"""
loop through values of k and define best value of k with silhouette_score
-
- Input:
+
+ Input:
X : np.ndarray (n, m) | result of PCA
-
- Output:
+
+ Output:
cluster_labels : (n, 1) | list of optimal clusters for X
"""
-
- k_range = range(2,51)
+
+ k_range = range(2, 51)
sil_score = []
for k in k_range:
- clustering = HDBSCAN(min_cluster_size=k,min_samples=1)
+ clustering = HDBSCAN(min_cluster_size=k, min_samples=1)
clustering.fit(X)
if len(set(clustering.labels_)) > 1 and len(set(clustering.labels_)) < len(X):
- score = silhouette_score(X, clustering.labels_, metric='euclidean')
+ score = silhouette_score(X, clustering.labels_, metric="euclidean")
sil_score.append(score)
else:
sil_score.append(-1)
-
+
opt_k = k_range[np.argmax(sil_score)]
clustering = HDBSCAN(min_cluster_size=opt_k)
clustering.fit(X)
return clustering.labels_
-
+
def k_medoids(X, l, labels, k=3, max_iter=100):
"""
K-Medoid algorithm to find suitable representative structures from each cluster defined by HDBSCAN.
-
+
Input:
X: np.ndarray (n, m) | all points from one HDBSCAN cluster
- k: number of medoids |
+ k: number of medoids |
max_iter: maximum number of iterations allowed to minimize the distance
l: current HDBSAN label
labels: full list of HDBSCAN labels
-
+
Output:
medoids: indices of the K medoids
total_cost: sum of distances of each point to its medoid
"""
np.random.seed(42)
-
- #start with random k points
+
+ # start with random k points
temp = X.copy()
mask = np.zeros(X.shape, dtype=bool)
mask[np.argwhere(labels == l)] = True
-
- #check the number of points in a cluster
- #if less than 4 just return those indices
- _, cluster_count = np.unique(mask[:,0], return_counts=True) # count = False, True
- cluster_count = cluster_count[[idx for idx, val in enumerate(_) if val == True][0]]#<-account for the case of one cluster
-
+
+ # check the number of points in a cluster
+ # if less than 4 just return those indices
+ _, cluster_count = np.unique(mask[:, 0], return_counts=True) # count = False, True
+ cluster_count = cluster_count[
+ [idx for idx, val in enumerate(_) if val == True][0]
+ ] # <-account for the case of one cluster
+
if cluster_count < 4:
- return np.ravel(np.argwhere(mask[:,0] == True)), np.nan
+ return np.ravel(np.argwhere(mask[:, 0] == True)), np.nan
# block out values that are not within the current HDBSCAN group
temp[~mask] = 9999
-
+
number_samples = temp.shape[0]
medoids = np.random.choice(number_samples, k, replace=False)
-
- #distance matrix of randomly chosen points
- D = distance.cdist(temp, temp[medoids], metric='euclidean')
+
+ # distance matrix of randomly chosen points
+ D = distance.cdist(temp, temp[medoids], metric="euclidean")
tot_cost = np.sum(np.min(D, axis=1))
-
+
itr = 0
while itr < max_iter:
reduced = False
-
- #loop through all possibilities
+
+ # loop through all possibilities
for m_idx in range(k):
for current_idx in range(number_samples):
if current_idx in medoids:
continue
-
+
new_medoids = medoids.copy()
new_medoids[m_idx] = current_idx
-
- #new distance matrix
- D_new = distance.cdist(temp, temp[new_medoids], metric='euclidean')
+
+ # new distance matrix
+ D_new = distance.cdist(temp, temp[new_medoids], metric="euclidean")
new_cost = np.sum(np.min(D_new, axis=1))
-
- #if the cost has been reduced move onto the the next sample
+
+ # if the cost has been reduced move onto the the next sample
if new_cost < tot_cost:
medoids = new_medoids
tot_cost = new_cost
@@ -110,128 +116,142 @@ def k_medoids(X, l, labels, k=3, max_iter=100):
break
if reduced:
break
-
+
if not reduced:
- #If there was no improvement we should be converged
+ # If there was no improvement we should be converged
break
- itr+=1
+ itr += 1
return medoids, tot_cost
-
def __init__(self, pdb1_name, blind_path):
- # def main():
+ # def main():
"""
requires Foldseek and Pymol
-
+
Find all pdb files from CF-Random generated directories.
- This script will automatically generate a Foldseek database of these structures
+ This script will automatically generate a Foldseek database of these structures
then calculate a similarity matrix of all structures based on bit-score.
similarity matrix -> PCA -> HDBSCAN -> K-medoids -> structures of interest.
-
+
The final output is then a png file showing the result of PCA and HDBSCAN
a text file containing the coordinates of the structures of interest, file name, and group ID
finally this script will automatically generate a pse file of the structures_of_interest
"""
-
- #_______________collect all pdb files that CF-Random generated_____________________________
- db_directory = blind_path + "/pdbs_for_db/"
- #db_directory = "/pdbs_for_db/"
+ # _______________collect all pdb files that CF-Random generated_____________________________
+ db_directory = blind_path + "/pdbs_for_db/"
+ # db_directory = "/pdbs_for_db/"
if not os.path.isdir(db_directory):
os.mkdir(db_directory)
- #pdb_files = glob.glob("./**/*.pdb", recursive=True)
+ # pdb_files = glob.glob("./**/*.pdb", recursive=True)
pdb_files = glob.glob(blind_path + "/**/*.pdb", recursive=True)
pdb_files = [file for file in pdb_files if db_directory not in file]
print("Gathering pdb pdb files for self-search")
for file in pdb_files:
- dest_name = file.replace('/','-')
+ dest_name = file.replace("/", "-")
if not os.path.isfile(db_directory + dest_name[17:]):
shutil.copyfile(file, db_directory + dest_name[17:])
- #__________________________________________________________________________________________
-
-
+ # __________________________________________________________________________________________
+
print("Creating database...")
create_db = ["foldseek", "createdb", db_directory, db_directory + "DB"]
if not os.path.isfile(db_directory + "DB"):
try:
- response = subprocess.run(create_db, capture_output=True, text=True, check=True )
+ response = subprocess.run(create_db, capture_output=True, text=True, check=True)
except subprocess.CalledProcessError as e:
print("ERROR:\n", e.stderr)
-
- print('Succes database is up!')
+
+ print("Succes database is up!")
else:
print("found an existing DB")
-
- #________________Calculate foldseek self comparison of all predicted structures____________
-
+
+ # ________________Calculate foldseek self comparison of all predicted structures____________
+
for file in pdb_files:
- foldseek_run = ["foldseek", "easy-search", file, db_directory + "DB", file.replace(".pdb","-self.foldseek"), blind_path + "/tmp", "--format-mode", "0", "--format-output", "query,target,alntmscore,qaln,taln,alnlen,evalue,bits", "--exhaustive-search", "1", "-s", "9.5"]
- if not os.path.isfile(file.replace(".pdb","-self.foldseek")):
+ foldseek_run = [
+ "foldseek",
+ "easy-search",
+ file,
+ db_directory + "DB",
+ file.replace(".pdb", "-self.foldseek"),
+ blind_path + "/tmp",
+ "--format-mode",
+ "0",
+ "--format-output",
+ "query,target,alntmscore,qaln,taln,alnlen,evalue,bits",
+ "--exhaustive-search",
+ "1",
+ "-s",
+ "9.5",
+ ]
+ if not os.path.isfile(file.replace(".pdb", "-self.foldseek")):
response = subprocess.run(foldseek_run, capture_output=True, text=True, check=True)
try:
- response = subprocess.run(foldseek_run, capture_output=True, text=True, check=True)
+ response = subprocess.run(
+ foldseek_run, capture_output=True, text=True, check=True
+ )
print(response.check_returncode())
except subprocess.CalledProcessError as e:
print("foldseek failed to run {:}".format(file))
print("Error:", e.stderr)
- print('{:} succeeded!!!'.format(file))
+ print("{:} succeeded!!!".format(file))
else:
- print("{:} already exists".format(file.replace(".pdb","-self.foldseek")))
-
- #__________________________________________________________________________________________
-
-
- #__________Populate a correlation matrix with bit scores_______________________________________________
-
- #everything will be sorted by the text of the file name
+ print("{:} already exists".format(file.replace(".pdb", "-self.foldseek")))
+
+ # __________________________________________________________________________________________
+
+ # __________Populate a correlation matrix with bit scores_______________________________________________
+
+ # everything will be sorted by the text of the file name
files = glob.glob(blind_path + "/**/*-self.foldseek")
-
- #first remove any outliers from the dssp loop distribution, they tend to be unfolded predictions
- files_dssp = [];files_count = [];
+
+ # first remove any outliers from the dssp loop distribution, they tend to be unfolded predictions
+ files_dssp = []
+ files_count = []
for file in files:
- u = mda.Universe(file.replace("-self.foldseek",".pdb"))
+ u = mda.Universe(file.replace("-self.foldseek", ".pdb"))
s = DSSP(u).run().results.dssp[0]
dssp, count = np.unique(s, return_counts=True)
# ['-' 'E' 'H']
if len(dssp) < 3:
- if '-' not in dssp:
- dssp = np.insert(dssp, 0 ,'-')
- count = np.insert(count,0, 0)
- if 'E' not in dssp:
- dssp = np.insert(dssp, 1 ,'E')
- count = np.insert(count,1, 0)
- if 'H' not in dssp:
- dssp = np.insert(dssp, 2 ,'H')
- count = np.insert(count,2, 0)
+ if "-" not in dssp:
+ dssp = np.insert(dssp, 0, "-")
+ count = np.insert(count, 0, 0)
+ if "E" not in dssp:
+ dssp = np.insert(dssp, 1, "E")
+ count = np.insert(count, 1, 0)
+ if "H" not in dssp:
+ dssp = np.insert(dssp, 2, "H")
+ count = np.insert(count, 2, 0)
files_dssp.append(dssp)
files_count.append(count)
- files_dssp = np.array(files_dssp); files_count = np.array(files_count);
+ files_dssp = np.array(files_dssp)
+ files_count = np.array(files_count)
z_scores = stats.zscore(files_count[:, 0])
outlier_idx = np.argwhere(z_scores > 3)
-
- # remove unfolded proteins from file list
+
+ # remove unfolded proteins from file list
files = np.array(files)
mask = np.zeros(files.shape, dtype=bool)
mask[outlier_idx] = True
for file in files[mask]:
- print("removed from analysis: ",file.replace("-self.foldseek",".pdb"))
+ print("removed from analysis: ", file.replace("-self.foldseek", ".pdb"))
files = files[~mask]
files = sorted(files)
- files_pdb = [file.replace('/','-')[17:].replace("-self.foldseek","") for file in files]
- #files_pdb = [file.replace("-self.foldseek",".pdb") for file in files]
+ files_pdb = [file.replace("/", "-")[17:].replace("-self.foldseek", "") for file in files]
+ # files_pdb = [file.replace("-self.foldseek",".pdb") for file in files]
corr_mtx = []
-
- df = {}
+
for file in files:
# it is possible for predictions to be so different that it isn't returned with a bit_score
# in that case we return a zero
- dict_with_all = {file:[0] for file in files_pdb}
- with open(file, 'r') as _:
- data = [l.rstrip().split('\t') for l in _]
+ dict_with_all = {file: [0] for file in files_pdb}
+ with open(file, "r") as _:
+ data = [l.rstrip().split("\t") for l in _]
for d in data:
dict_with_all[d[1]] = d
print(dict_with_all[d[1]])
- #bug in foldseek occasionally returns -2,147,483,648
+ # bug in foldseek occasionally returns -2,147,483,648
_temp = []
for pdb in files_pdb:
print("testing", pdb)
@@ -241,68 +261,67 @@ def __init__(self, pdb1_name, blind_path):
_temp.append(0)
else:
_temp.append(x)
-
+
corr_mtx.append(_temp)
-
+
corr_mtx = np.array(corr_mtx)
-
- #normalize each row and subtract top model from full MSA depth to give more
- #specific meaning to variance
+
+ # normalize each row and subtract top model from full MSA depth to give more
+ # specific meaning to variance
norm_corr_mtx = minmax_scale(corr_mtx, axis=1)
- norm_corr_mtx = (norm_corr_mtx + norm_corr_mtx.T) /2
-
+ norm_corr_mtx = (norm_corr_mtx + norm_corr_mtx.T) / 2
+
sklearn_pca = PCA(n_components=4)
pca = sklearn_pca.fit_transform(norm_corr_mtx)
labels = blind_screening.cluster_structures(pca)
-
- plt.figure(figsize=(8,6))
- plt.scatter(pca[:,0], pca[:,1], c=labels, cmap='viridis', s=45)
- plt.savefig(blind_path + '/' + pdb1_name + '-cluster.png')
+
+ plt.figure(figsize=(8, 6))
+ plt.scatter(pca[:, 0], pca[:, 1], c=labels, cmap="viridis", s=45)
+ plt.savefig(blind_path + "/" + pdb1_name + "-cluster.png")
plt.clf()
-
-
- #find the structures_of_interest
+
+ # find the structures_of_interest
files_of_interest = []
pca_of_interest = []
for l in np.unique(labels):
- kmed_idx, tot_cost = blind_screening.k_medoids(pca, l, labels)
+ kmed_idx, tot_cost = blind_screening.k_medoids(pca, l, labels)
for idx in kmed_idx:
files_of_interest.append([files[idx], l])
pca_of_interest.append(pca[idx])
-
- #create pse file with colors that match viridis colors in cluster.png
- viridis = plt.get_cmap('viridis',len(files_of_interest))
+
+ # create pse file with colors that match viridis colors in cluster.png
+ viridis = plt.get_cmap("viridis", len(files_of_interest))
largest_group_num = max(files_of_interest, key=lambda x: x[1])
- pymol.cmd.load(files[0].replace('-self.foldseek','.pdb'), 'Dominant')
- with open(blind_path + '/' + pdb1_name + "-structures_of_interest.csv", "w") as file:
+ pymol.cmd.load(files[0].replace("-self.foldseek", ".pdb"), "Dominant")
+ with open(blind_path + "/" + pdb1_name + "-structures_of_interest.csv", "w") as file:
file.write("group, file, pca_1, pca_2\n")
-
- with open(blind_path + '/' + pdb1_name + "-structures_of_interest.csv", "a") as file:
+
+ with open(blind_path + "/" + pdb1_name + "-structures_of_interest.csv", "a") as file:
for idx, foi in enumerate(files_of_interest):
if largest_group_num[1] == -1:
color = 0
else:
- color = (foi[1] + 1) / (largest_group_num[1]+1)
+ color = (foi[1] + 1) / (largest_group_num[1] + 1)
color = viridis(color)[:3]
- new_name = re.findall(r'(full)|(max\w+)|(rank_\d+)', foi[0])
- new_name = str(idx)+ '_' + '_'.join([i for n in new_name for i in n if i != ''])
- pymol.cmd.load(foi[0].replace('-self.foldseek','.pdb'), new_name)
- pymol.cmd.align(new_name,'Dominant')
- color_name = 'col_'+str(foi[1])
+ new_name = re.findall(r"(full)|(max\w+)|(rank_\d+)", foi[0])
+ new_name = str(idx) + "_" + "_".join([i for n in new_name for i in n if i != ""])
+ pymol.cmd.load(foi[0].replace("-self.foldseek", ".pdb"), new_name)
+ pymol.cmd.align(new_name, "Dominant")
+ color_name = "col_" + str(foi[1])
pymol.cmd.set_color(color_name, color)
- pymol.cmd.color(color_name,new_name)
- file.write(f"{foi[1]}, {foi[0]}, {pca_of_interest[idx][0]}, {pca_of_interest[idx][1]}\n")
-
- pymol.cmd.save(blind_path + '/' + pdb1_name + '-structures_of_interest.pse', 'pse')
- pymol.cmd.delete('all')
+ pymol.cmd.color(color_name, new_name)
+ file.write(
+ f"{foi[1]}, {foi[0]}, {pca_of_interest[idx][0]}, {pca_of_interest[idx][1]}\n"
+ )
+
+ pymol.cmd.save(blind_path + "/" + pdb1_name + "-structures_of_interest.pse", "pse")
+ pymol.cmd.delete("all")
pymol.cmd.reinitialize()
-
- #save all data with clusters
- with open("structures_all.csv", 'w') as file:
+
+ # save all data with clusters
+ with open("structures_all.csv", "w") as file:
file.write("group, file, pca_1, pca_2\n")
for idx, f in enumerate(files):
file.write(f"{labels[idx]},{f},{pca[idx, 0]},{pca[idx, 1]}\n")
-
+
sys.exit()
-
-
diff --git a/code/split_chains.py b/cf_random/utils/split_chains.py
similarity index 54%
rename from code/split_chains.py
rename to cf_random/utils/split_chains.py
index 8c19a34..1e46a9b 100644
--- a/code/split_chains.py
+++ b/cf_random/utils/split_chains.py
@@ -7,47 +7,61 @@
@author: Myeongsang (Samuel) Lee
"""
-import re
-import Bio
-import os
-from os import listdir
-from os.path import isfile, join
-import sys
-from pathlib import Path
-import numpy as np
-from numpy import genfromtxt
-import glob
-import linecache
-import argparse
+import argparse
+import linecache
parser = argparse.ArgumentParser()
-parser.add_argument("--pdb1", type=str, help='PDB structure for the target crystal structure')
-args = parser.parse_args()
-
-
-chain_char = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
- 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
-
-pdb1 = args.pdb1; pdb1_name = pdb1.replace('.pdb','')
+parser.add_argument("--pdb1", type=str, help="PDB structure for the target crystal structure")
+args = parser.parse_args()
+
+
+chain_char = [
+ "A",
+ "B",
+ "C",
+ "D",
+ "E",
+ "F",
+ "G",
+ "H",
+ "I",
+ "J",
+ "K",
+ "L",
+ "M",
+ "N",
+ "O",
+ "P",
+ "Q",
+ "R",
+ "S",
+ "T",
+ "U",
+ "V",
+ "W",
+ "X",
+ "Y",
+ "Z",
+]
+
+pdb1 = args.pdb1
+pdb1_name = pdb1.replace(".pdb", "")
TER_count = 0
-with open(pdb1, 'r') as file:
+with open(pdb1, "r") as file:
for line in file:
TER = line.split()
TER_count += TER.count("TER")
-
-
-
line_cnt = 0
for i in range(0, TER_count):
- output_file_name = pdb1_name + '_' + chain_char[i] + '.pdb'
+ output_file_name = pdb1_name + "_" + chain_char[i] + ".pdb"
if line_cnt == 0:
- with open(pdb1, 'r') as infile, open(output_file_name, 'w') as outfile:
+ with open(pdb1, "r") as infile, open(output_file_name, "w") as outfile:
for line in infile:
outfile.write(line)
line_cnt = line_cnt + 1
@@ -56,7 +70,7 @@
break
else:
- with open(pdb1, 'r') as infile, open(output_file_name, 'w') as outfile:
+ with open(pdb1, "r") as infile, open(output_file_name, "w") as outfile:
for line in infile:
linecache.getline(pdb1, line_cnt)
outfile.write(linecache.getline(pdb1, line_cnt))
@@ -64,8 +78,3 @@
if linecache.getline(pdb1, line_cnt) == "TER ":
line_cnt = line_cnt + 1
break
-
-
-
-
-
diff --git a/code/split_multi_single.py b/cf_random/utils/split_multi_single.py
similarity index 55%
rename from code/split_multi_single.py
rename to cf_random/utils/split_multi_single.py
index 35e948f..df9b758 100644
--- a/code/split_multi_single.py
+++ b/cf_random/utils/split_multi_single.py
@@ -7,50 +7,60 @@
Created on Tue Dec 24 14:51:00 2025
@author: Myeongsang (Samuel) Lee
"""
-import re
-import Bio
-import os
-from os import listdir
-from os.path import isfile, join
-import sys
-from pathlib import Path
-import numpy as np
-from numpy import genfromtxt
+
import glob
-import random
import linecache
-import argparse
-
-class split_multi_to_chains():
+class split_multi_to_chains:
def __init__(self, pred_path):
-
- chain_char = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
- 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
-
-
- current_dir = os.getcwd() + '/'
- data_dir = Path(pred_path) # Path to the predicted models
-
- files_list = (glob.glob(str(pred_path) + "/*_unrelaxed*pdb"))
+ chain_char = [
+ "A",
+ "B",
+ "C",
+ "D",
+ "E",
+ "F",
+ "G",
+ "H",
+ "I",
+ "J",
+ "K",
+ "L",
+ "M",
+ "N",
+ "O",
+ "P",
+ "Q",
+ "R",
+ "S",
+ "T",
+ "U",
+ "V",
+ "W",
+ "X",
+ "Y",
+ "Z",
+ ]
+
+ files_list = glob.glob(str(pred_path) + "/*_unrelaxed*pdb")
for fl in files_list:
TER_count = 0
- with open(fl, 'r') as file:
- for line in file:
+ with open(fl, "r") as file:
+ for line in file:
TER = line.split()
TER_count += TER.count("TER")
line_cnt = 0
- fl_name = fl.replace('.pdb','')
+ fl_name = fl.replace(".pdb", "")
for i in range(0, TER_count):
- output_file_name = fl_name + '_chain_' + chain_char[i] + '.pdb'
+ output_file_name = fl_name + "_chain_" + chain_char[i] + ".pdb"
if line_cnt == 0:
- with open(fl, 'r') as infile, open(output_file_name, 'w') as outfile:
+ with open(fl, "r") as infile, open(output_file_name, "w") as outfile:
for line in infile:
outfile.write(line)
line_cnt = line_cnt + 1
@@ -59,7 +69,7 @@ def __init__(self, pred_path):
break
else:
- with open(fl, 'r') as infile, open(output_file_name, 'w') as outfile:
+ with open(fl, "r") as infile, open(output_file_name, "w") as outfile:
for line in infile:
linecache.getline(fl, line_cnt)
outfile.write(linecache.getline(fl, line_cnt))
@@ -67,9 +77,3 @@ def __init__(self, pred_path):
if linecache.getline(fl, line_cnt) == "TER ":
line_cnt = line_cnt + 1
break
-
-
-
-
-
-
diff --git a/code/PLOT_AC.py b/code/PLOT_AC.py
deleted file mode 100644
index e80edca..0000000
--- a/code/PLOT_AC.py
+++ /dev/null
@@ -1,139 +0,0 @@
-#!/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Feb 22 13:40:00 2024
-
-@author: Myeongsang (Samuel) Lee
-"""
-import os
-import sys
-import textalloc as ta
-import seaborn as sns
-from pathlib import Path
-import numpy as np
-from numpy import genfromtxt
-from matplotlib import pyplot as plt
-from adjustText import adjust_text
-import glob
-
-
-class plot_2D_scatter_AC():
- def __init__(self, full_cate, random_cate, pdb1, pdb1_name, pdb2, pdb2_name, nMSA, nENS, model_type):
- ##### load TM-scores both full- and ramdon-MSA
- TMs_full = genfromtxt("TMScore_" + full_cate + "_" + pdb1_name + ".csv", delimiter = ' ' )
- TMs_random = genfromtxt("TMScore_" + random_cate + "_" + pdb1_name + ".csv", delimiter = ' ' )
-
- ############ load pLDDT scores both full- and ramdon-MSA
- plddt_full = genfromtxt("plddt_" + full_cate + "_" + pdb1_name + ".csv", delimiter = ' ' )
- plddt_random = genfromtxt("plddt_" + random_cate + "_" + pdb1_name + ".csv", delimiter = ' ' )
-
-
- #################################################################
- ########### getting the TM-score values of fold-switching region
-
- pwd = os.getcwd() + '/'
-
-
- ######### plotting the TM-score values as 2D scatter plot
- print(" ")
- print("Size of column: ", TMs_random.shape[-1])
- print("Size of row: ", TMs_random.shape[0])
- print("Dimension: ", TMs_random.ndim)
-
- print(" ")
- print(TMs_random)
- print(" ")
- print(TMs_full)
-
-
- print("checking plddt")
- print(plddt_full)
- print(plddt_random)
-
- plddt_random = np.reshape(plddt_random, (7, (nMSA + 5) * 5))
- print(plddt_random)
-
-
- if model_type != 'alphafold2_multimer_v3':
- TMs_full_resh = np.reshape(TMs_full, ((((nMSA + 5) * 2), 5)))
-
- #f1 = np.concatenate((TMs_addition[0:(nENS + 20), :], TMs_full_resh[0:(nMSA + 5), :]), axis=0)
- #print(f1)
- #f2 = np.concatenate((TMs_addition[(nENS + 20):(nENS + 20) * 2, :], TMs_full_resh[(nMSA + 5):(nMSA + 5) * 2, :]), axis=0)
- #print(f2)
- else:
- TMs_full_resh = np.reshape(TMs_full, (((nMSA + 5) * 2), 5))
-
- #f1 = np.concatenate((TMs_addition[0:(nENS + 20), :], TMs_full_resh[0:(nMSA + 5), :]), axis=0)
- #print(f1)
- #f2 = np.concatenate((TMs_addition[(nENS + 20):(nENS + 20) * 2, :], TMs_full_resh[(nMSA + 5):(nMSA + 5) * 2, :]), axis=0)
- #print(f2)
-
-
-
-
-
-
- if model_type != 'alphafold2_multimer_v3':
- #if np.all(f1 > f2) or np.all(f1 < f2):
- # print("Prediction is biased"); sys.exit()
- #else:
- # print("Prediction is not biased")
-
- plt.figure(0)
- for ii in range(0, int(TMs_random.shape[0] / 2) ):
- plt.scatter(TMs_random[ii * 2, :], TMs_random[(ii * 2 + 1), :], c = plddt_random[ii, :], cmap='rocket_r', vmin=50, vmax=100, s=35, marker="o")
-
- clb=plt.colorbar()
- clb.ax.tick_params(labelsize=15)
-
- plt.scatter(TMs_full_resh[0 : (nMSA + 5), :], TMs_full_resh[(nMSA + 5):(nMSA + 5) * 2, :], c = plddt_full, cmap='rocket_r', vmin=50, vmax=100, s=35, marker="o")
-
- x = [ 0 , 1 ]
- y = [ 0 , 1 ]
-
- plt.ylim(0, 1)
- plt.xlim(0, 1)
-
- plt.plot(x, y, linestyle='dashed', color = 'black')
-
- plt.xticks(fontsize=15)
- plt.yticks(fontsize=15)
-
- plt.xlabel('TM-Score similar to fold1(' + pdb1_name + ')', fontsize=15); plt.ylabel('TM-score similar to fold2(' + pdb2_name + ')', fontsize=15)
- plt.savefig('TMscore_' + full_cate + '_' + pdb1_name + '.png', transparent = True)
-
-
- else:
- ##print("Not determine for the multimer mode")
- #if np.all(f1 > f2) or np.all(f1 < f2):
- # print("Prediction is biased"); sys.exit()
- #else:
- # print("Prediction is not biased")
-
- plt.figure(0)
- for ii in range(0, int(TMs_random.shape[0] / 2) ):
- plt.scatter(TMs_random[ii * 2, :], TMs_random[(ii * 2 + 1), :], c = plddt_random[ii, :], cmap='rocket_r', vmin=50, vmax=100, s=35, marker="o")
-
-
- clb=plt.colorbar()
- clb.ax.tick_params(labelsize=15)
-
- plt.scatter(TMs_full_resh[0 : (nMSA + 5), :], TMs_full_resh[(nMSA + 5):(nMSA + 5) * 2, :], c = plddt_full, cmap='rocket_r', vmin=50, vmax=100, s=35, marker="o")
-
-
-
- x = [ 0 , 1 ]
- y = [ 0 , 1 ]
-
- plt.ylim(0, 1)
- plt.xlim(0, 1)
-
- plt.plot(x, y, linestyle='dashed', color = 'black')
-
- plt.xticks(fontsize=15)
- plt.yticks(fontsize=15)
-
- plt.xlabel('TM-Score similar to fold1(' + pdb1_name + ')', fontsize=15); plt.ylabel('TM-score similar to fold2(' + pdb2_name + ')', fontsize=15)
- plt.savefig('TMscore_' + full_cate + '_' + pdb1_name + '.png', transparent = True)
-
diff --git a/code/PLOT_FS.py b/code/PLOT_FS.py
deleted file mode 100644
index fb6b6f7..0000000
--- a/code/PLOT_FS.py
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Feb 22 13:40:00 2024
-
-@author: Myeongsang (Samuel) Lee
-"""
-import os
-import sys
-import textalloc as ta
-import seaborn as sns
-from pathlib import Path
-import numpy as np
-from numpy import genfromtxt
-from matplotlib import pyplot as plt
-from adjustText import adjust_text
-import glob
-
-from cal_tmscore_fs_flmsa import *
-from fs_seq_compare import *
-
-class plot_2D_scatter():
- def __init__(self, full_cate, random_cate, pdb1, pdb1_name, pdb2, pdb2_name, nMSA, nENS):
- ##### load TM-scores both full- and ramdon-MSA
- TMs_full = genfromtxt("TMScore_" + full_cate + "_" + pdb1_name + ".csv", delimiter = ' ' )
- TMs_random = genfromtxt("TMScore_" + random_cate + "_" + pdb1_name + ".csv", delimiter = ' ' )
-
- ############ load pLDDT scores both full- and ramdon-MSA
- plddt_full = genfromtxt("plddt_" + full_cate + "_" + pdb1_name + ".csv", delimiter = ' ' )
- plddt_random = genfromtxt("plddt_" + random_cate + "_" + pdb1_name + ".csv", delimiter = ' ' )
-
-
- #################################################################
- ########### getting the TM-score values of fold-switching region
-
- pwd = os.getcwd() + '/'
-
- fs_full_TMs = genfromtxt("TMScore_fs_" + full_cate + "_" + pdb1_name + ".csv", delimiter = ' ')
- TMs_fs_full = fs_full_TMs
- fs_random_TMs = genfromtxt("TMScore_fs_" + random_cate + "_" + pdb1_name + ".csv", delimiter = ' ')
- TMs_fs_random = fs_random_TMs
-
-
-
- ######### plotting the TM-score values as 2D scatter plot
- print(" ")
- print("Size of column: ", TMs_random.shape[-1])
- print("Size of row: ", TMs_random.shape[0])
- print("Dimension: ", TMs_random.ndim)
-
- print(" ")
- print(TMs_random)
- print(" ")
- print(TMs_full)
-
-
- print("checking plddt")
- print(plddt_full)
- print(plddt_random)
-
- plddt_random = np.reshape(plddt_random, (7, (nMSA + 5) * 5))
- TMs_fs_full_resh = np.reshape(TMs_fs_full, ((((nMSA + 5) * 2), 5)))
-
-
-
-
- plt.figure(0)
-
-
- for ii in range(0, int(TMs_random.shape[0] / 2) ):
- plt.scatter(TMs_random[ii * 2, :], TMs_random[(ii * 2 + 1), :], c = plddt_random[ii, :], cmap='rocket_r', vmin=50, vmax=100, s=35, marker="o")
-
-
- clb=plt.colorbar()
- clb.ax.tick_params(labelsize=15)
-
-
- plt.scatter(TMs_full[0, :], TMs_full[1, :], c = plddt_full, cmap='plasma', vmin=50, vmax=100, s=35, marker="o")
-
-
- x = [ 0 , 1 ]
- y = [ 0 , 1 ]
-
- plt.ylim(0, 1)
- plt.xlim(0, 1)
-
-
- plt.plot(x, y, linestyle='dashed', color = 'black')
-
- plt.xticks(fontsize=15)
- plt.yticks(fontsize=15)
-
- plt.xlabel('TM-Score similar to fold1(' + pdb1_name + ')', fontsize=15); plt.ylabel('TM-score similar to fold2(' + pdb2_name + ')', fontsize=15)
- plt.savefig('TMscore_' + full_cate + '_' + pdb1_name + '.png', transparent = True)
-
-
- plt.figure(1)
- for ii in range(0, int(TMs_random.shape[0] / 2) ):
- plt.scatter(TMs_fs_random[ii * 2, :], TMs_fs_random[(ii * 2 + 1), :], c = plddt_random[ii, :], cmap='plasma', vmin=50, vmax=100, s=35, marker="o")
-
-
- x = [ 0.0 , 1 ] ; y = [ 0.0 , 1 ]
- plt.ylim(0.0, 1)
- plt.xlim(0.0, 1)
-
- dlb=plt.colorbar()
- dlb.ax.tick_params(labelsize=15)
-
- plt.scatter(TMs_fs_full[0, :], TMs_fs_full[1, :], c = plddt_full, cmap='plasma', vmin=50, vmax=100, s=35, marker="o")
-
-
- plt.plot(x, y, linestyle='dashed', color = 'black')
-
- plt.xticks(fontsize=15)
- plt.yticks(fontsize=15)
-
- plt.xlabel('TM-Score similar to fold1(' + pdb1_name + ')', fontsize=15); plt.ylabel('TM-score similar to fold2(' + pdb2_name + ')', fontsize=15)
- plt.savefig('TMscore_fs-region_' + full_cate + '_' + pdb1_name + '.png', transparent = True)
-
diff --git a/code/cal_tmscore_fs_flmsa.py b/code/cal_tmscore_fs_flmsa.py
deleted file mode 100644
index c082a50..0000000
--- a/code/cal_tmscore_fs_flmsa.py
+++ /dev/null
@@ -1,283 +0,0 @@
-#!/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Compare the predicted models with original PDBs
-report TM-scores for ranked 0 to 4
-input line is pdb1 pdb2 preds_of_pdb dirname
-
-This version requires tmtools 0.0.2 (Python bindings around the TM-align code for structural alignment of proteins)
-check this for local installation
-https://pypi.org/project/tmtools/
-
-Usage:
-
-python3.8 compare_strs_fs.py 2k42_A 1cee_B 1cee_B 0_msas_models/
-
-Created on Wed Feb 21 14:51:00 2024
-@author: Myeongsang (Samuel) Lee
-"""
-import re
-import Bio
-import os
-from os import listdir
-from os.path import isfile, join
-import sys
-from pathlib import Path
-import numpy as np
-from numpy import genfromtxt
-import matplotlib.pyplot as plt
-import glob
-import random
-import argparse
-# call related modules of tmtools after installation
-from tmtools import tm_align
-from tmtools.io import get_structure, get_residue_data
-from tmtools.testing import get_pdb_path
-import Bio.PDB
-from Bio.PDB import PDBParser, Structure
-
-
-
-
-pdbParser = PDBParser(QUIET=True)
-
-# convert three letter code to one letter code
-aa3to1 = {'CYS': 'C', 'ASP': 'D', 'SER': 'S', 'GLN': 'Q', 'LYS': 'K',
- 'ILE': 'I', 'PRO': 'P', 'THR': 'T', 'PHE': 'F', 'ASN': 'N',
- 'GLY': 'G', 'HIS': 'H', 'LEU': 'L', 'ARG': 'R', 'TRP': 'W',
- 'ALA': 'A', 'VAL':'V', 'GLU': 'E', 'TYR': 'Y', 'MET': 'M'}
-
-
-class TM_score_fs():
- def get_coords(self, pdbfile, fs_range):
- """
- parameters:
- pdbfile - path to pdbfile
- fs_range - range of residues at the fold-switching region, given as string - "112-162"
- returns:
- numpy array of coords
- string of seqs in 1-letter-code
- """
-
- seq = ""
- struct = pdbParser.get_structure('x',str(pdbfile))
- coords = []
- seq_dict = {}
-
- # for residues within a certain range, using numpy to save the coords
- # and save the sequence as a dict and then sorted list of tuples
- # return the coords and the seq
-
- # convert str to residue range for the fs region
- (start,stop) = fs_range.split("-")
- res_range = range(int(start),int(stop)+1)
-
- for atom in struct.get_atoms():
- residue = atom.get_parent() # from atom we can get the parent residue
- res_id = residue.get_id()[1]
- resname = residue.get_resname()
- if res_id in res_range and atom.get_name()=="CA":
- x,y,z = atom.get_coord()
- coords.append([x,y,z])
- if res_id not in seq_dict:
- seq_dict[res_id]=aa3to1[resname]
-
-
- #print(coords)
- # convert to np array
- coords_np = np.array(coords)
- # sort the seq_dict by keys a.k.a res_ids
- sorted_data = sorted(seq_dict.items())
- for i in sorted_data:
- seq+=i[1]
-
- return coords_np,seq
-
-
-
- def get_tmscore(self, coords1, seq1, predfilepath, res_range):
- """
- parameters:
- coords1, seq1 - the numpy array of PDB coords and its seqs
- predfilepath - path for predicted files
- res_range - fs range in predicted models
-
- returns:
- tmscore list
-
- """
-
- tmscores = []
- tmscores_ord = []; tmscores_rev = []
- modelfiles = sorted(glob.glob(str(predfilepath) + "/*_unrelaxed*pdb"))
-
- if len(modelfiles)==0:
- tmscores = [0.0,0.0,0.0,0.0,0.0]
- return tmscores
-
- for model in modelfiles:
- modelpath = Path(model)
- coords2, seq2 = self.get_coords(modelpath,res_range)
- res = tm_align(coords1, coords2, seq1, seq2)
- tmscore = round(res.tm_norm_chain1,2) # wrt to model
- tmscores_ord.append(tmscore)
-
- res = tm_align(coords2, coords1, seq2, seq1)
- tmscore = round(res.tm_norm_chain1,2)
- tmscores_rev.append(tmscore)
-
- if np.max(tmscores_ord) > tmscores_rev:
- tmscores = tmscores_ord
- else:
- tmscores = tmscores_rev
-
-
-
- return tmscores
-
-
-
- #def run_for_models(self, FH, pdbfile1, pdbfile2, data_dir,pred_range,res_range1,res_range2):
- def run_for_models(self, pdbfile1, pdbfile2, data_dir,pred_range,res_range1,res_range2):
- """
- compare the original PDB
- with the predicted models, 0 to 5
-
- parameters:
- FH - filehandle for writing
- pdbfile1 - path to original PDB, Fold1
- pdbfile2 - path to alternate PDB, Fold2
- data_dir - path for the predicted strs
- res_range1 - fs range in PDB1 and its models
- res_range2 - fs range in PDB2 and its models
-
- returns:
- nothing
-
- saves the TM-scores in a local file
- """
- #print(res_range1,res_range2)
-
- # get list of subdirectories
- all_sub_dir_paths = glob.glob(str(data_dir)) # returns list of sub directory paths
- tmscores_fs = []
-
- print(all_sub_dir_paths)
- # files found then continue
- if len(all_sub_dir_paths) == 0:
- pass
-
- for subdir in all_sub_dir_paths:
- preddir = Path(subdir)
-
- # predicted dir doesn't exist then continue
- if not preddir.exists():
- pass
-
- # only comparing on one set of predicted models
- # but with both PDBs/Folds
- coords1,seq1 = self.get_coords(pdbfile1,res_range1)
- print(preddir, pred_range)
- tmscore_lst1 = self.get_tmscore(coords1,seq1,preddir,pred_range) # wrt pdb1
- tmp_tm_fs = tmscore_lst1
- print(tmp_tm_fs)
- tmscores_fs.append(tmp_tm_fs)
- #print(tmscore_lst1)
-
-
-
- for subdir in all_sub_dir_paths:
- preddir = Path(subdir)
-
- # predicted dir doesn't exist then continue
- if not preddir.exists():
- pass
-
- # only comparing on one set of predicted models
- # but with both PDBs/Folds
- coords2,seq2 = self.get_coords(pdbfile2,res_range2)
- tmscore_lst2 = self.get_tmscore(coords2,seq2,preddir,pred_range) # wrt pdb2
- tmp_tm_fs = tmscore_lst2
- print(tmp_tm_fs)
- tmscores_fs.append(tmp_tm_fs)
- #print(tmscore_lst2)
-
- #print(" ")
- tmscores_fs = np.array(tmscores_fs)
- self.tmscores_fs = tmscores_fs
- print(" ")
- print(tmscores_fs)
-
-
-
- def __init__(self, pdb1, pdb1_name, pdb2, pdb2_name):
- # get numpy arrays for coords at the fold-switching region
- # also return the seq in 1-letter code for the same
-
- # input arguments: sys.argv[1] - pdb1, sys.argv[2] - pdb2
- # sys.argv[3] - preds, sys.argv[4] - current directory
-
- current_dir = os.getcwd() + '/'
- pred_dir = pdb1_name + '_predicted_models_full_*'
- pred_path = current_dir + pred_dir + '/'
- data_dir = Path(pred_path) # Path to the predicted models
- print(data_dir)
-
- # the range of the fold-switching region
- range_file = current_dir + 'range_fs_pairs_all.txt'
-
- # convert this file into a dictionary for reference later
- fs_res = {}
-
- # The range_file file has the fold-switching residue ranges
- # for the original PDB/PDB1, alternate PDB/PDB2
- # Predicted model for PDB1, predicted model for PDB2
- with open(range_file,'r') as Infile:
- next(Infile) # skip header line "# pdb1,pdb2,pred1,pred2"
- for line in Infile:
- line=line.strip()
- (n1,n2,p1,p2,m1,m2)=line.split(",")
- # the value of the dictionary is a tuple
- # the first element of tuple is the fs range in the original PDB
- # followed by the range in the predicted model
- if n1 not in fs_res:
- fs_res[n1]=(p1,m1)
- if n2 not in fs_res:
- fs_res[n2]=(p2,m2)
-
-
-
- print("Running for pair ",pdb1_name, pdb2_name, end="..")
- print(" ")
- print("comparing predictions of ", pdb1_name, end="...")
- print(" ")
-
-
- try:
- range_pdb1 = fs_res[pdb1_name] # so if pdb1 is '1nqd_A', fs_res['1nqd_A']=('895-919', '1-33')
- range_pdb2 = fs_res[pdb2_name] # and if pdb2 is '1nqj_B', fs_res['1nqj_B']=('894-919', '1-33')
- except:
- print("check PDBIDs ",pdb1_name, pdb2_name)
- sys.exit(1)
-
-
- range_pred = range_pdb1[1]
- self.run_for_models(pdb1, pdb2, data_dir, range_pred, range_pdb1[0], range_pdb2[0])
-
-
-#if __name__ == "__main__":
-#
-# import warnings
-# warnings.filterwarnings('ignore')
-#
-# parser = argparse.ArgumentParser()
-# parser.add_argument("--pdb1", type=str, help='PDB structure for the target crystal structure (target to be predicted)')
-# parser.add_argument("--pdb2", type=str, help='PDB structure for the alternative crystal structure')
-#
-# args = parser.parse_args()
-#
-# pdb1 = args.pdb1; pdb2 = args.pdb2
-# pdb1_name = pdb1.replace('.pdb',''); pdb2_name = pdb2.replace('.pdb','')
-#
-# TM_score_fs(pdb1, pdb1_name, pdb2, pdb2_name)
-#
diff --git a/code/cal_tmscore_fs_multimer.py b/code/cal_tmscore_fs_multimer.py
deleted file mode 100644
index 4d36261..0000000
--- a/code/cal_tmscore_fs_multimer.py
+++ /dev/null
@@ -1,264 +0,0 @@
-#!/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Compare the predicted models with original PDBs
-report TM-scores for ranked 0 to 4
-input line is pdb1 pdb2 preds_of_pdb dirname
-
-This version requires tmtools 0.0.2 (Python bindings around the TM-align code for structural alignment of proteins)
-check this for local installation
-https://pypi.org/project/tmtools/
-
-Usage:
-
-python3.8 compare_strs_fs.py 2k42_A 1cee_B 1cee_B 0_msas_models/
-
-Created on Wed Feb 21 14:51:00 2024
-@author: Myeongsang (Samuel) Lee
-"""
-import re
-import Bio
-import os
-from os import listdir
-from os.path import isfile, join
-import sys
-from pathlib import Path
-import numpy as np
-from numpy import genfromtxt
-import matplotlib.pyplot as plt
-import glob
-import random
-import argparse
-# call related modules of tmtools after installation
-from tmtools import tm_align
-from tmtools.io import get_structure, get_residue_data
-from tmtools.testing import get_pdb_path
-import Bio.PDB
-from Bio.PDB import PDBParser, Structure
-
-
-
-
-pdbParser = PDBParser(QUIET=True)
-
-# convert three letter code to one letter code
-aa3to1 = {'CYS': 'C', 'ASP': 'D', 'SER': 'S', 'GLN': 'Q', 'LYS': 'K',
- 'ILE': 'I', 'PRO': 'P', 'THR': 'T', 'PHE': 'F', 'ASN': 'N',
- 'GLY': 'G', 'HIS': 'H', 'LEU': 'L', 'ARG': 'R', 'TRP': 'W',
- 'ALA': 'A', 'VAL':'V', 'GLU': 'E', 'TYR': 'Y', 'MET': 'M'}
-
-
-class TM_score_fs_multi():
- def get_coords(self, pdbfile, fs_range):
- """
- parameters:
- pdbfile - path to pdbfile
- fs_range - range of residues at the fold-switching region, given as string - "112-162"
- returns:
- numpy array of coords
- string of seqs in 1-letter-code
- """
-
- seq = ""
- struct = pdbParser.get_structure('x',str(pdbfile))
- coords = []
- seq_dict = {}
-
- # for residues within a certain range, using numpy to save the coords
- # and save the sequence as a dict and then sorted list of tuples
- # return the coords and the seq
-
- # convert str to residue range for the fs region
- (start,stop) = fs_range.split("-")
- res_range = range(int(start),int(stop)+1)
-
- for atom in struct.get_atoms():
- residue = atom.get_parent() # from atom we can get the parent residue
- res_id = residue.get_id()[1]
- resname = residue.get_resname()
- if res_id in res_range and atom.get_name()=="CA":
- x,y,z = atom.get_coord()
- coords.append([x,y,z])
- if res_id not in seq_dict:
- seq_dict[res_id]=aa3to1[resname]
-
-
- #print(coords)
- # convert to np array
- coords_np = np.array(coords)
- # sort the seq_dict by keys a.k.a res_ids
- sorted_data = sorted(seq_dict.items())
- for i in sorted_data:
- seq+=i[1]
-
- return coords_np,seq
-
-
-
- def get_tmscore(self, coords1, seq1, predfilepath, res_range):
- """
- parameters:
- coords1, seq1 - the numpy array of PDB coords and its seqs
- predfilepath - path for predicted files
- res_range - fs range in predicted models
-
- returns:
- tmscore list
-
- """
-
- tmscores = []
- #modelfiles = sorted(glob.glob(str(predfilepath) + "/*_unrelaxed*pdb"))
- modelfiles = (glob.glob(str(predfilepath) + "/single*_unrelaxed*pdb"))
-
- if len(modelfiles)==0:
- tmscores = [0.0,0.0,0.0,0.0,0.0]
- return tmscores
-
- for model in modelfiles:
- modelpath = Path(model)
- coords2, seq2 = self.get_coords(modelpath,res_range)
- res = tm_align(coords1, coords2, seq1, seq2)
- tmscore = round(res.tm_norm_chain1,2) # wrt to model
- tmscores.append(tmscore)
-
- return tmscores
-
-
-
- #def run_for_models(self, FH, pdbfile1, pdbfile2, data_dir,pred_range,res_range1,res_range2):
- def run_for_models(self, pdbfile1, pdbfile2, data_dir,pred_range,res_range1,res_range2):
- """
- compare the original PDB
- with the predicted models, 0 to 5
-
- parameters:
- FH - filehandle for writing
- pdbfile1 - path to original PDB, Fold1
- pdbfile2 - path to alternate PDB, Fold2
- data_dir - path for the predicted strs
- res_range1 - fs range in PDB1 and its models
- res_range2 - fs range in PDB2 and its models
-
- returns:
- nothing
-
- saves the TM-scores in a local file
- """
- #print(res_range1,res_range2)
-
- # get list of subdirectories
- all_sub_dir_paths = glob.glob(str(data_dir))
- tmscores_fs = []
-
-
- ## files found then continue
- if len(all_sub_dir_paths) == 0:
- pass
-
- for subdir in all_sub_dir_paths:
- preddir = Path(subdir)
- # predicted dir doesn't exist then continue
- if not preddir.exists():
- pass
-
- # only comparing on one set of predicted models
- # but with both PDBs/Folds
- coords1,seq1 = self.get_coords(pdbfile1,res_range1)
- tmscore_lst1 = self.get_tmscore(coords1,seq1,preddir,pred_range) # wrt pdb1
- tmp_tm_fs = tmscore_lst1
- tmscores_fs.append(tmp_tm_fs)
-
-
- for subdir in all_sub_dir_paths:
- preddir = Path(subdir)
-
- # predicted dir doesn't exist then continue
- if not preddir.exists():
- pass
-
- # only comparing on one set of predicted models
- # but with both PDBs/Folds
- coords2,seq2 = self.get_coords(pdbfile2,res_range2)
- tmscore_lst2 = self.get_tmscore(coords2,seq2,preddir,pred_range) # wrt pdb2
- tmp_tm_fs = tmscore_lst2
- tmscores_fs.append(tmp_tm_fs)
-
- print(" ")
- tmscores_fs = np.array(tmscores_fs)
- print("tmscores_fs")
- self.tmscores_fs = tmscores_fs
-
-
-
- def __init__(self, pred_path, pdb1, pdb1_name, pdb2, pdb2_name):
- # get numpy arrays for coords at the fold-switching region
- # also return the seq in 1-letter code for the same
-
- # input arguments: sys.argv[1] - pdb1, sys.argv[2] - pdb2
- # sys.argv[3] - preds, sys.argv[4] - current directory
-
- current_dir = os.getcwd() + '/'
- #pred_dir = 'additional_sampling/' + pdb1_name
- #pred_path = current_dir + pred_dir + '/'
- data_dir = Path(pred_path) # Path to the predicted models
-
-
- # the range of the fold-switching region
- range_file = current_dir + 'range_fs_pairs_all.txt'
-
- # convert this file into a dictionary for reference later
- fs_res = {}
-
- # The range_file file has the fold-switching residue ranges
- # for the original PDB/PDB1, alternate PDB/PDB2
- # Predicted model for PDB1, predicted model for PDB2
- with open(range_file,'r') as Infile:
- next(Infile) # skip header line "# pdb1,pdb2,pred1,pred2"
- for line in Infile:
- line=line.strip()
- (n1,n2,p1,p2,m1,m2)=line.split(",")
- # the value of the dictionary is a tuple
- # the first element of tuple is the fs range in the original PDB
- # followed by the range in the predicted model
- if n1 not in fs_res:
- fs_res[n1]=(p1,m1)
- if n2 not in fs_res:
- fs_res[n2]=(p2,m2)
-
-
-
- print("Running for pair ",pdb1_name, pdb2_name, end="..")
- print(" ")
- print("comparing predictions of ", pdb1_name, end="...")
- print(" ")
-
-
- try:
- range_pdb1 = fs_res[pdb1_name] # so if pdb1 is '1nqd_A', fs_res['1nqd_A']=('895-919', '1-33')
- range_pdb2 = fs_res[pdb2_name] # and if pdb2 is '1nqj_B', fs_res['1nqj_B']=('894-919', '1-33')
- except:
- print("check PDBIDs ",pdb1_name, pdb2_name)
- sys.exit(1)
-
-
- range_pred = range_pdb1[1]
- self.run_for_models(pdb1, pdb2, data_dir, range_pred, range_pdb1[0], range_pdb2[0])
-
-
-#if __name__ == "__main__":
-#
-# import warnings
-# warnings.filterwarnings('ignore')
-#
-# parser = argparse.ArgumentParser()
-# parser.add_argument("--pdb1", type=str, help='PDB structure for the target crystal structure (target to be predicted)')
-# parser.add_argument("--pdb2", type=str, help='PDB structure for the alternative crystal structure')
-#
-# args = parser.parse_args()
-#
-# pdb1 = args.pdb1; pdb2 = args.pdb2
-# pdb1_name = pdb1.replace('.pdb',''); pdb2_name = pdb2.replace('.pdb','')
-#
-# TM_score_fs(pdb1, pdb1_name, pdb2, pdb2_name)
-#
diff --git a/code/cal_tmscore_fs_only.py b/code/cal_tmscore_fs_only.py
deleted file mode 100644
index 624df98..0000000
--- a/code/cal_tmscore_fs_only.py
+++ /dev/null
@@ -1,276 +0,0 @@
-#!/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Compare the predicted models with original PDBs
-report TM-scores for ranked 0 to 4
-input line is pdb1 pdb2 preds_of_pdb dirname
-
-This version requires tmtools 0.0.2 (Python bindings around the TM-align code for structural alignment of proteins)
-check this for local installation
-https://pypi.org/project/tmtools/
-
-Usage:
-
-python3.8 compare_strs_fs.py 2k42_A 1cee_B 1cee_B 0_msas_models/
-
-Created on Wed Feb 21 14:51:00 2024
-@author: Myeongsang (Samuel) Lee
-"""
-import re
-import Bio
-import os
-from os import listdir
-from os.path import isfile, join
-import sys
-from pathlib import Path
-import numpy as np
-from numpy import genfromtxt
-import matplotlib.pyplot as plt
-import glob
-import random
-import argparse
-# call related modules of tmtools after installation
-from tmtools import tm_align
-from tmtools.io import get_structure, get_residue_data
-from tmtools.testing import get_pdb_path
-import Bio.PDB
-from Bio.PDB import PDBParser, Structure
-
-
-
-
-pdbParser = PDBParser(QUIET=True)
-
-# convert three letter code to one letter code
-aa3to1 = {'CYS': 'C', 'ASP': 'D', 'SER': 'S', 'GLN': 'Q', 'LYS': 'K',
- 'ILE': 'I', 'PRO': 'P', 'THR': 'T', 'PHE': 'F', 'ASN': 'N',
- 'GLY': 'G', 'HIS': 'H', 'LEU': 'L', 'ARG': 'R', 'TRP': 'W',
- 'ALA': 'A', 'VAL':'V', 'GLU': 'E', 'TYR': 'Y', 'MET': 'M'}
-
-
-class TM_score_fs():
- def get_coords(self, pdbfile, fs_range):
- """
- parameters:
- pdbfile - path to pdbfile
- fs_range - range of residues at the fold-switching region, given as string - "112-162"
- returns:
- numpy array of coords
- string of seqs in 1-letter-code
- """
-
- seq = ""
- struct = pdbParser.get_structure('x',str(pdbfile))
- coords = []
- seq_dict = {}
-
- # for residues within a certain range, using numpy to save the coords
- # and save the sequence as a dict and then sorted list of tuples
- # return the coords and the seq
-
- # convert str to residue range for the fs region
- (start,stop) = fs_range.split("-")
- res_range = range(int(start),int(stop)+1)
-
- for atom in struct.get_atoms():
- residue = atom.get_parent() # from atom we can get the parent residue
- res_id = residue.get_id()[1]
- resname = residue.get_resname()
- if res_id in res_range and atom.get_name()=="CA":
- x,y,z = atom.get_coord()
- coords.append([x,y,z])
- if res_id not in seq_dict:
- seq_dict[res_id]=aa3to1[resname]
-
-
- #print(coords)
- # convert to np array
- coords_np = np.array(coords)
- # sort the seq_dict by keys a.k.a res_ids
- sorted_data = sorted(seq_dict.items())
- for i in sorted_data:
- seq+=i[1]
-
- return coords_np,seq
-
-
-
- def get_tmscore(self, coords1, seq1, predfilepath, res_range):
- """
- parameters:
- coords1, seq1 - the numpy array of PDB coords and its seqs
- predfilepath - path for predicted files
- res_range - fs range in predicted models
-
- returns:
- tmscore list
-
- """
-
- tmscores = []
- tmscores_ord = []; tmscores_rev = []
- #modelfiles = sorted(glob.glob(str(predfilepath) + "/*_unrelaxed*pdb"))
- modelfiles = (glob.glob(str(predfilepath) + "/*_unrelaxed*pdb"))
-
- if len(modelfiles)==0:
- tmscores = [0.0,0.0,0.0,0.0,0.0]
- return tmscores
-
- for model in modelfiles:
- modelpath = Path(model)
- coords2, seq2 = self.get_coords(modelpath,res_range)
- res = tm_align(coords1, coords2, seq1, seq2)
- tmscore = round(res.tm_norm_chain1,2) # wrt to model
- tmscores_ord.append(tmscore)
-
- res = tm_align(coords2, coords1, seq2, seq1)
- tmscore = round(res.tm_norm_chain1,5) # wrt to model
- tmscores_rev.append(tmscore)
-
- if np.max(tmscores_ord) > np.max(tmscores_rev):
- tmscores = tmscores_ord
- else:
- tmscores = tmscores_rev
-
-
- return tmscores
-
-
-
- #def run_for_models(self, FH, pdbfile1, pdbfile2, data_dir,pred_range,res_range1,res_range2):
- def run_for_models(self, pdbfile1, pdbfile2, data_dir,pred_range,res_range1,res_range2):
- """
- compare the original PDB
- with the predicted models, 0 to 5
-
- parameters:
- FH - filehandle for writing
- pdbfile1 - path to original PDB, Fold1
- pdbfile2 - path to alternate PDB, Fold2
- data_dir - path for the predicted strs
- res_range1 - fs range in PDB1 and its models
- res_range2 - fs range in PDB2 and its models
-
- returns:
- nothing
-
- saves the TM-scores in a local file
- """
- #print(res_range1,res_range2)
-
- # get list of subdirectories
- all_sub_dir_paths = glob.glob(str(data_dir)) # returns list of sub directory paths
- tmscores_fs = []
-
-
- # files found then continue
- if len(all_sub_dir_paths) == 0:
- pass
-
- for subdir in all_sub_dir_paths:
- preddir = Path(subdir)
- # predicted dir doesn't exist then continue
- if not preddir.exists():
- pass
-
- # only comparing on one set of predicted models
- # but with both PDBs/Folds
- coords1,seq1 = self.get_coords(pdbfile1,res_range1)
- tmscore_lst1 = self.get_tmscore(coords1,seq1,preddir,pred_range) # wrt pdb1
- tmp_tm_fs = tmscore_lst1
- tmscores_fs.append(tmp_tm_fs)
-
-
- for subdir in all_sub_dir_paths:
- preddir = Path(subdir)
-
- # predicted dir doesn't exist then continue
- if not preddir.exists():
- pass
-
- # only comparing on one set of predicted models
- # but with both PDBs/Folds
- coords2,seq2 = self.get_coords(pdbfile2,res_range2)
- tmscore_lst2 = self.get_tmscore(coords2,seq2,preddir,pred_range) # wrt pdb2
- tmp_tm_fs = tmscore_lst2
- tmscores_fs.append(tmp_tm_fs)
-
- print(" ")
- tmscores_fs = np.array(tmscores_fs)
- print("tmscores_fs")
- self.tmscores_fs = tmscores_fs
-
-
-
- def __init__(self, pred_path, pdb1, pdb1_name, pdb2, pdb2_name):
- # get numpy arrays for coords at the fold-switching region
- # also return the seq in 1-letter code for the same
-
- # input arguments: sys.argv[1] - pdb1, sys.argv[2] - pdb2
- # sys.argv[3] - preds, sys.argv[4] - current directory
-
- current_dir = os.getcwd() + '/'
- #pred_dir = 'additional_sampling/' + pdb1_name
- #pred_path = current_dir + pred_dir + '/'
- #print(pred_path)
- data_dir = Path(pred_path) # Path to the predicted models
-
-
- # the range of the fold-switching region
- range_file = current_dir + 'range_fs_pairs_all.txt'
-
- # convert this file into a dictionary for reference later
- fs_res = {}
-
- # The range_file file has the fold-switching residue ranges
- # for the original PDB/PDB1, alternate PDB/PDB2
- # Predicted model for PDB1, predicted model for PDB2
- with open(range_file,'r') as Infile:
- next(Infile) # skip header line "# pdb1,pdb2,pred1,pred2"
- for line in Infile:
- line=line.strip()
- (n1,n2,p1,p2,m1,m2)=line.split(",")
- # the value of the dictionary is a tuple
- # the first element of tuple is the fs range in the original PDB
- # followed by the range in the predicted model
- if n1 not in fs_res:
- fs_res[n1]=(p1,m1)
- if n2 not in fs_res:
- fs_res[n2]=(p2,m2)
-
-
-
- print("Running for pair ",pdb1_name, pdb2_name, end="..")
- print(" ")
- print("comparing predictions of ", pdb1_name, end="...")
- print(" ")
-
-
- try:
- range_pdb1 = fs_res[pdb1_name] # so if pdb1 is '1nqd_A', fs_res['1nqd_A']=('895-919', '1-33')
- range_pdb2 = fs_res[pdb2_name] # and if pdb2 is '1nqj_B', fs_res['1nqj_B']=('894-919', '1-33')
- except:
- print("check PDBIDs ",pdb1_name, pdb2_name)
- sys.exit(1)
-
-
- range_pred = range_pdb1[1]
- self.run_for_models(pdb1, pdb2, data_dir, range_pred, range_pdb1[0], range_pdb2[0])
-
-
-#if __name__ == "__main__":
-#
-# import warnings
-# warnings.filterwarnings('ignore')
-#
-# parser = argparse.ArgumentParser()
-# parser.add_argument("--pdb1", type=str, help='PDB structure for the target crystal structure (target to be predicted)')
-# parser.add_argument("--pdb2", type=str, help='PDB structure for the alternative crystal structure')
-#
-# args = parser.parse_args()
-#
-# pdb1 = args.pdb1; pdb2 = args.pdb2
-# pdb1_name = pdb1.replace('.pdb',''); pdb2_name = pdb2.replace('.pdb','')
-#
-# TM_score_fs(pdb1, pdb1_name, pdb2, pdb2_name)
-#
diff --git a/code/convert_multi_single.py b/code/convert_multi_single.py
deleted file mode 100644
index a7d5b82..0000000
--- a/code/convert_multi_single.py
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-
-Converting the multimer PDB to a single PDB file
-
-Created on Tue Dec 24 14:51:00 2025
-@author: Myeongsang (Samuel) Lee
-"""
-import re
-import Bio
-import os
-from os import listdir
-from os.path import isfile, join
-import sys
-from pathlib import Path
-import numpy as np
-from numpy import genfromtxt
-import glob
-import random
-import argparse
-
-
-
-
-
-class convert_m2s():
- def __init__(self, pred_path, pdb1_name, pdb2_name):
- current_dir = os.getcwd() + '/'
- data_dir = Path(pred_path) # Path to the predicted models
-
- files_list = (glob.glob(str(pred_path) + "/*_unrelaxed*pdb"))
- print(files_list)
- for fl in files_list:
- fl_name = fl.replace('.pdb','')
- predicted_name = fl_name.split('/')[1]
- #convert = "awk '!/TER/' " + fl + " > " + fl_name + "_converted.pdb"
- convert = "awk '!/TER/' " + fl + " > " + fl_name.split('/')[0] + '/' + "rmTER_" + predicted_name + ".pdb"
- print(convert)
- os.system(convert)
-
-
- convert_pdb2 = "awk '!/TER/' " + pdb2_name + ".pdb > " + pdb2_name + "_rmTER.pdb"
- print(convert_pdb2); os.system(convert_pdb2)
-
-
- ##### extract a single chain from multimer
- TER_count = 0
-
- for fl in files_list:
- fl_name = fl.replace('.pdb','')
- predicted_name = fl_name.split('/')[1]
-
- with open(fl, 'r') as file:
- for line in file:
- TER = line.split()
- TER_count += TER.count("TER")
-
-
- line_cnt = 0
- for i in range(0, 2):
- output_file_name = fl_name.split('/')[0] + '/' + "single_" + predicted_name + ".pdb"
-
- if line_cnt == 0:
- with open(fl, 'r') as infile, open(output_file_name, 'w') as outfile:
- for line in infile:
- outfile.write(line)
- line_cnt = line_cnt + 1
- if "TER " in line:
- line_cnt = line_cnt + 1
- break
-
- #line_cnt = 0
- ##for i in range(0, TER_count):
- #for i in range(0, 2):
- # output_file_name = pdb2_name[0:4] + '_multi.pdb'
-
- # if line_cnt == 0:
- # with open(pdb2, 'r') as infile, open(output_file_name, 'w') as outfile:
- # for line in infile:
- # outfile.write(line)
- # line_cnt = line_cnt + 1
- # if "TER " in line:
- # line_cnt = line_cnt + 1
- # break
-
- #pdb2_name_multi = output_file_name.replace('.pdb','')
-
-
diff --git a/code/fs_seq_compare.py b/code/fs_seq_compare.py
deleted file mode 100644
index f3730c6..0000000
--- a/code/fs_seq_compare.py
+++ /dev/null
@@ -1,278 +0,0 @@
-import sys
-import os
-import re
-from os import listdir
-from os.path import isfile, join
-import pandas as pd
-import numpy as np
-import Bio.PDB
-import matplotlib.pyplot as plt
-import glob
-import random
-import argparse
-from Bio import SeqIO
-from Bio.PDB.PDBParser import PDBParser
-from Bio import *
-from Bio.SeqRecord import SeqRecord
-
-from thefuzz import fuzz
-from thefuzz import process
-
-
-
-class fs_range():
- def first_res_check(self, pdb1, pdb2):
- #self.pdb1 = pdb1; self.pdb2 = pdb2
-
- ## first residue index check
- structure_1 = PDBParser().get_structure('pdb1', pdb1)
- model_1 = structure_1[0]
- print(model_1)
-
- structure_2 = PDBParser().get_structure('pdb2', pdb2)
- model_2 = structure_2[0]
- print(model_2)
-
- res_index_1 = []
- res_index_2 = []
-
- for chain_1 in model_1:
- for i, residue in enumerate(chain_1.get_residues()):
- #res_id = list(residue.id)
- res_index_1.append(residue.id[1])
- #print(residue.id[1])
-
- for chain_2 in model_2:
- for i, residue in enumerate(chain_2.get_residues()):
- res_index_2.append(residue.id[1])
-
- #print(int(res_index_1[0]))
- #print(int(res_index_2[0]))
-
- self.pdb1_res_index_1 = int(res_index_1[0])
- self.pdb2_res_index_1 = int(res_index_2[0])
-
-
-
-
- def pydssp(self, crys_pdb, pred_pdb, number, pdb_name):
-
- ##### generating the command for pydssp
- number = str(number)
- command = 'pydssp ' + crys_pdb + ' ' + pred_pdb + ' -o output_' + pdb_name + '_' + number + '.log'
- print(command)
- os.system(command)
-
-
-
-
- def res_check(self, pdb1, pdb2, pdb1_name, pdb2_name):
- current_dir = os.getcwd() + '/'
- range_file = current_dir + 'range_fs_pairs_all.txt'
-
- crys_fs_res_1 = {}; crys_fs_res_2 = {}
- pred_fs_res_1 = {}; pred_fs_res_2 = {}
-
- with open(range_file,'r') as Infile:
- next(Infile) # skip header line "# pdb1,pdb2,pred1,pred2"
- for line in Infile:
- line=line.strip()
- (n1,n2,p1,p2,m1,m2)=line.split(",")
- # the value of the dictionary is a tuple
- # the first element of tuple is the fs range in the original PDB
- # followed by the range in the predicted model
- #if n1 == pdb1_name and n2 == pdb2_name:
- if (n1 == pdb1_name and n2 == pdb2_name) or (n2 == pdb1_name and n1 == pdb2_name):
- #fs_res_1 = (m1); fs_res_2 = (m2)
- crys_fs_res_1 = (p1); crys_fs_res_2 = (p2);
- pred_fs_res_1 = (m1); pred_fs_res_2 = (m2);
-
- #fs_res_1_update = fs_res_1.split("-"); fs_res_2_update = fs_res_2.split("-");
- #print(fs_res_1_update, fs_res_2_update)
-
-
- crys_fs_res_1_update = crys_fs_res_1.split("-"); crys_fs_res_2_update = crys_fs_res_2.split("-");
- print(crys_fs_res_1_update, crys_fs_res_2_update)
- pred_fs_res_1_update = pred_fs_res_1.split("-"); pred_fs_res_2_update = pred_fs_res_2.split("-");
- print(pred_fs_res_1_update, pred_fs_res_2_update)
-
- ##### convert list data to int
- self.crys_fs_res_1_update = [int(i) for i in crys_fs_res_1_update]
- self.crys_fs_res_2_update = [int(i) for i in crys_fs_res_2_update]
-
- self.pred_fs_res_1_update = [int(i) for i in pred_fs_res_1_update]
- self.pred_fs_res_2_update = [int(i) for i in pred_fs_res_2_update]
-
-
-
-
-
-
-
- def __init__(self, pdb1, pdb2, pdb1_name, pdb2_name, pred_dir):
- ##### check first residue index of query proteins
- #fs_check = fs_range(pdb1, pdb2)
- self.first_res_check(pdb1, pdb2)
- print(" "); print("checking first residue index")
- print(self.pdb1_res_index_1)
- print(self.pdb2_res_index_1)
-
-
-
- pred_folder = pred_dir
- #pred_folder = '3hdf_A_predicted_models_full_rand_12'
- pred_path = pred_folder
- print(pred_path)
-
-
- pred_files = (glob.glob(str(pred_path) + "/*_unrelaxed*pdb"))
-
-
- ##### read range file information
- self.res_check(pdb1, pdb2, pdb1_name, pdb2_name)
- print(self.crys_fs_res_1_update, self.pred_fs_res_1_update)
- print(self.crys_fs_res_2_update, self.pred_fs_res_2_update)
-
- crys1_fs_res_st = self.crys_fs_res_1_update[0]; crys1_fs_res_ed = self.crys_fs_res_1_update[1]
- crys2_fs_res_st = self.crys_fs_res_2_update[0]; crys2_fs_res_ed = self.crys_fs_res_2_update[1]
- pred1_fs_res_st = self.pred_fs_res_1_update[0]; pred1_fs_res_ed = self.pred_fs_res_1_update[1]
- pred2_fs_res_st = self.pred_fs_res_2_update[0]; pred2_fs_res_ed = self.pred_fs_res_2_update[1]
-
-
-
- if int(self.pdb1_res_index_1) > 1:
- print("Initial residue is not starting from 1")
- self.crys_fs_res_1_update[0] = self.crys_fs_res_1_update[0] - int(self.pdb1_res_index_1)
- self.crys_fs_res_1_update[1] = self.crys_fs_res_1_update[1] - int(self.pdb1_res_index_1)
- crys1_fs_res_st = self.crys_fs_res_1_update[0];
- crys1_fs_res_ed = self.crys_fs_res_1_update[1]
-
- if int(self.pdb2_res_index_1) > 1:
- print("Initial residue is not starting from 1")
- self.crys_fs_res_2_update[0] = self.crys_fs_res_2_update[0] - int(self.pdb2_res_index_1)
- self.crys_fs_res_2_update[1] = self.crys_fs_res_2_update[1] - int(self.pdb2_res_index_1)
- crys2_fs_res_st = self.crys_fs_res_2_update[0]
- crys2_fs_res_ed = self.crys_fs_res_2_update[1]
-
- print("checking starting and ending residue number")
- print(""); print("crystal structure")
- print(crys1_fs_res_st, crys1_fs_res_ed)
- print(crys2_fs_res_st, crys2_fs_res_ed)
- print(""); print("predicted structure")
- print(pred1_fs_res_st, pred1_fs_res_ed)
- print(pred2_fs_res_st, pred2_fs_res_ed)
-
-
- pred_dir_add = 'additional_sampling/' + pdb1_name + '/'
- pred_dir_suc = 'successed_prediction/' + pdb1_name + '/*/'
- pred_dir_fal = ' failed_prediction/'
-
-
- ##### perform pydssp and calculate secondary structure similarity
- index = 0
- print(np.size(pred_files))
- print(" "); print("calculating with pdb1 ", pdb1_name)
- for model in pred_files:
- print(model)
- self.pydssp(pdb1, model, index, pdb1_name)
- dssp_read_tmp = pd.read_csv('output_' + pdb1_name + '_' + str(index) + '.log', sep=' ', header = None)
- ## seq1 = crystal structure, seq2 = predicted structure
- print(dssp_read_tmp)
- print(dssp_read_tmp[0].iloc[0]); seq1 = dssp_read_tmp[0].iloc[0]
- print(dssp_read_tmp[0].iloc[1]); seq2 = dssp_read_tmp[0].iloc[1]
-
- # crystal protein 1 and predictions
- print(" ")
- print(seq1[crys1_fs_res_st:crys1_fs_res_ed])
- print(seq2[pred2_fs_res_st:pred2_fs_res_ed])
- if fuzz.ratio(seq1[crys1_fs_res_st:crys1_fs_res_ed], seq2[pred2_fs_res_st:pred2_fs_res_ed]) > 85:
- print("fs region is correctly predicted")
- f = open("fs_compare_output_" + pdb1_name + ".log", "w")
- f.write("success")
- f.close()
- break
- elif index == (int(np.size(pred_files)) - 1):
- print("fs region is not correctly predicted")
-
- #command = 'mv ' + pred_dir_add + pred_dir_fal
- #print(command); os.system(command)
- #command = 'mv ' + pred_dir_suc + pred_dir_fal + pdb1_name + '/'
- #print(command); os.system(command)
-
- #command = 'rm *' + pdb1_name + '*csv'
- #print(command); os.system(command)
- print("calculating TM-score of fs with alternative pdb")
-
- index = 0
- print(" "); print("calculating with pdb2 ", pdb2_name)
-
- for model in pred_files:
- self.pydssp(pdb2, model, index, pdb1_name)
- dssp_read_tmp = pd.read_csv('output_' + pdb1_name + '_' + str(index) + '.log', sep=' ', header = None)
- ## seq1 = crystal structure, seq2 = predicted structure
- print(dssp_read_tmp[0].iloc[0]); seq1 = dssp_read_tmp[0].iloc[0]
- print(dssp_read_tmp[0].iloc[1]); seq2 = dssp_read_tmp[0].iloc[1]
-
-
- # crystal protein 1 and predictions
- print(" ")
- print(seq1[crys2_fs_res_st:crys2_fs_res_ed])
- print(seq2[pred2_fs_res_st:pred2_fs_res_ed])
- if fuzz.ratio(seq1[crys2_fs_res_st:crys2_fs_res_ed], seq2[pred2_fs_res_st:pred2_fs_res_ed]) > 85:
- print("fs region is correctly predicted")
- break
- elif index == (int(np.size(pred_files)) - 1):
- print("fs region is not correctly predicted")
-
- f = open("fs_compare_output_" + pdb1_name + ".log", "w")
- f.write("fail")
- f.close()
-
- #command = 'mv ' + pred_dir_add + pred_dir_fal
- #print(command); os.system(command)
- #command = 'mv ' + pred_dir_suc + pred_dir_fal + pdb1_name + '/'
- #print(command); os.system(command)
-
- else:
- index += 1
-
-
- else:
- index += 1
-
- # index += 1
-
-
- #index = 0
- #print(" "); print("calculating with pdb2 ", pdb2_name)
- #for model in pred_files:
- # self.pydssp(pdb2, model, index)
- # dssp_read_tmp = pd.read_csv('output_' + str(index) + '.log', sep=' ', header = None)
- # ## seq1 = crystal structure, seq2 = predicted structure
- # print(dssp_read_tmp[0].iloc[0]); seq1 = dssp_read_tmp[0].iloc[0]
- # print(dssp_read_tmp[0].iloc[1]); seq2 = dssp_read_tmp[0].iloc[1]
-
- # # crystal protein 1 and predictions
- # print(" ")
- # print(seq1[crys2_fs_res_st:crys2_fs_res_ed])
- # print(seq2[pred2_fs_res_st:pred2_fs_res_ed])
- # if fuzz.ratio(seq1[crys2_fs_res_st:crys2_fs_res_ed], seq2[pred2_fs_res_st:pred2_fs_res_ed]) > 85:
- # print("fs region is correctly predicted")
- # break
- # elif index == (int(np.size(pred_files)) - 1):
- # print("fs region is not correctly predicted")
-
- # command = 'mv ' + pred_dir_add + pred_dir_fal
- # print(command); os.system(command)
- # command = 'mv ' + pred_dir_suc + pred_dir_fal + pdb1_name + '/'
- # print(command); os.system(command)
-
- # #command = 'rm *' + pdb1_name + '*csv'
- # #print(command); os.system(command)
-
-
-
- # else:
- # index += 1
-
-
diff --git a/code/pred_cal_tmscore_FS.py b/code/pred_cal_tmscore_FS.py
deleted file mode 100644
index 2bca3b1..0000000
--- a/code/pred_cal_tmscore_FS.py
+++ /dev/null
@@ -1,444 +0,0 @@
-#!/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Feb 21 14:51:00 2024
-
-@author: Myeongsang (Samuel) Lee
-"""
-import re
-import Bio
-import os
-from os import listdir
-from os.path import isfile, join
-import sys
-from pathlib import Path
-import numpy as np
-from numpy import genfromtxt
-import matplotlib.pyplot as plt
-import glob
-import random
-import argparse
-# call related modules of tmtools after installation
-from tmtools import tm_align
-from tmtools.io import get_structure, get_residue_data
-from tmtools.testing import get_pdb_path
-
-# call calculating TM-scores of fs region
-from cal_tmscore_fs_only import *
-
-# call converting the multimer as a single chain structure
-from convert_multi_single import *
-
-# call colabfold for multimer option
-from pred_cal_tmscore_multimer_FS import *
-
-
-class TM_score():
- def __init__(self, pred_dir, pdb1, pdb1_name, pdb2, pdb2_name, model_type):
-
- ## loading reference pdb for TM-score
- pwd = os.getcwd() + '/'
- tmscores = []
- tmscores_ord = []; tmscores_rev = []
-
- #files_list = sorted(glob.glob(str(pred_dir) + "/*_unrelaxed*pdb"))
- if model_type != "alphafold2_multimer_v3":
- files_list = (glob.glob(str(pred_dir) + "/*_unrelaxed*pdb"))
- print(files_list)
- else:
- #### convert the multimer file as a single structure
- check_files_list = (glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*pdb"))
- print(check_files_list)
- if not check_files_list:
- convert_m2s(pred_dir, pdb1_name, pdb2_name)
- files_list = (glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*pdb"))
- print(files_list)
- else:
- files_list = (glob.glob(str(pred_dir) + "/rmTER*_unrelaxed*pdb"))
- print(files_list)
-
-
- ##### pdb1_name part
- pdb1_dir = pwd + pdb1_name
- r2 = get_structure(get_pdb_path(str(pdb1_dir)))
- coords2, seq2 = get_residue_data(r2)
-
- if len(files_list) == 0:
- tmscores = [0.0, 0.0, 0.0, 0.0, 0.0]
- return tmscores
-
- for model in files_list:
- #modelpath = Path(model)
- #model = str(modelpath.parent) + "/" + modelpath.stem
- model = model.replace('.pdb','')
- #model = model.replace('_converted.pdb','_converted')
- model = pwd + model
- s = get_structure(get_pdb_path(model))
- coords1, seq1 = get_residue_data(s)
- res = tm_align(coords1, coords2, seq1, seq2)
- tmscore = round(res.tm_norm_chain1,5) # wrt to model
- tmscores_ord.append(tmscore)
-
- res = tm_align(coords2, coords1, seq2, seq1)
- tmscore = round(res.tm_norm_chain1,5) # wrt to model
- tmscores_rev.append(tmscore)
-
-
- #print(tmscores[0:5])
- ##### pdb2_name part
- pdb2_dir = pwd + pdb2_name
- r3 = get_structure(get_pdb_path(str(pdb2_dir)))
- coords2, seq2 = get_residue_data(r3)
-
-
- for model in files_list:
- #modelpath = Path(model)
- #model = str(modelpath.parent) + "/" + modelpath.stem
- model = model.replace('.pdb','')
- #model = model.replace('_converted.pdb','_converted')
- model = pwd + model
- s = get_structure(get_pdb_path(model))
- coords1, seq1 = get_residue_data(s)
- res = tm_align(coords1, coords2, seq1, seq2)
- tmscore = round(res.tm_norm_chain1,5) # wrt to model
- tmscores_ord.append(tmscore)
-
- res = tm_align(coords2, coords1, seq2, seq1)
- tmscore = round(res.tm_norm_chain1,5) # wrt to model
- tmscores_rev.append(tmscore)
-
-
- print("normal")
- print(tmscores_ord)
- print("reverse")
- print(tmscores_rev)
- if np.max(tmscores_ord) > np.max(tmscores_rev):
- tmscores = tmscores_ord
- else:
- tmscores = tmscores_rev
-
-
-
- print(tmscores)
- self.tmscores = tmscores
-
-
-
-class CF_MSA_max():
- def __init__(self, search_dir, output_dir, pdb_name, rseed, num_seeds, model_type):
-
- command = 'colabfold_batch --num-seeds ' + str(num_seeds) + ' --model-type ' + str(model_type) + ' --random-seed ' + str(rseed) + search_dir + output_dir
- print(command)
- os.system(command)
-
-
-
-
-class CF_MSA_var():
- def __init__(self, pdb1, pdb1_name, pdb2, pdb2_name, search_dir, output_dir, rseed, num_seeds, model_type):
-
- #### shallow MSA section
- max_msa = 1; ext_msa = 2
- random_seed = np.array(rseed) ## needed to remove future
-
- self.pdb1_name = pdb1_name
-
- TMscores_random = [] ## whole structure
- TMscores_fs_random = [] ## fold-switching region
-
- for multi in (1, 2, 2, 2, 2, 2, 2):
- max_msa = max_msa * multi
- ext_msa = ext_msa * multi
-
- #### Colabfold part
- command = 'colabfold_batch --num-seeds ' + str(num_seeds) + ' --model-type ' + str(model_type) + ' --max-seq ' + str(max_msa) + ' --max-extra-seq ' + str(ext_msa) + search_dir + output_dir + str(random_seed) + '_max_' + str(max_msa) + '_ext_' + str(ext_msa)
- print(command); os.system(command)
-
-
- def select_size(self, TMscores_random_alter, TMscores_fs_random_alter, pdb1_name, pdb2_name, alt_name, num_seeds):
-
- TMscores_random_reshape = TMscores_random_alter.reshape(14, num_seeds * 5)
- TMscores_fs_random_reshape = TMscores_fs_random_alter.reshape(14, num_seeds * 5)
- TMscores_random_locat = np.zeros((7, num_seeds * 5))
- TMscores_fs_random_locat = np.zeros((7, num_seeds * 5))
-
- #### finding locatnative pdb_name
-
- if alt_name == pdb2_name:
- #for i in 1, 3, 5, 7, 9, 11, 13 in TM_scores:
- tmp_cnt = 0
- for i in range(1, 14, 2):
- print(TMscores_random_reshape[i, :])
- print(TMscores_fs_random_reshape[i, :])
- TMscores_random_locat[tmp_cnt, :] = TMscores_random_reshape[i, :]
- TMscores_fs_random_locat[tmp_cnt, :] = TMscores_fs_random_reshape[i, :]
- tmp_cnt = tmp_cnt + 1
- else:
- #for i in 0, 2, 4, 6, 8, 10, 12 in TM_scores:
- tmp_cnt = 0
- for i in range(0, 13, 2):
- print(TMscores_random_reshape[i, :])
- print(TMscores_fs_random_reshape[i, :])
- TMscores_random_locat[tmp_cnt, :] = TMscores_random_reshape[i, :]
- TMscores_fs_random_locat[tmp_cnt, :] = TMscores_fs_random_reshape[i, :]
- tmp_cnt = tmp_cnt + 1
-
-
- TMscore_data = TMscores_random_locat
- TMscore_data = TMscores_random_locat.reshape(7, num_seeds * 5)
- TMscore_data_sum = np.zeros((7, 1))
-
-
- TMscore_fs_data = TMscores_fs_random_locat
- TMscore_fs_data = TMscores_fs_random_locat.reshape(7, num_seeds * 5)
-
-
- for ii in range(0, int(TMscore_data.shape[0])):
- TMscore_data_sum[ii] = np.sum(TMscore_data[ii])
-
-
- location = np.argmax(np.max(TMscore_data_sum, axis=1))
-
- print("Selecting...")
-
- TMscore_data = TMscores_random_alter
- TMscore_data = TMscores_random_alter.reshape(14, num_seeds * 5)
-
- TMscore_fs_data = TMscores_fs_random_alter
- TMscore_fs_data = TMscores_fs_random_alter.reshape(14, num_seeds * 5)
-
- location_org = location
-
-
- if alt_name == pdb2_name:
- location = (location * 2) + 1
- else:
- location = (location * 2)
-
-
-
- if alt_name == pdb2_name and ((np.any(TMscore_data[location, :] >= 0.5) and np.any(TMscore_fs_data[location, :] >= 0.5))):
- print(TMscore_data[location, :], TMscore_fs_data[location, :])
- selection = int((location - 1) / 2)
- self.selection = selection
-
- elif alt_name == pdb1_name and ((np.any(TMscore_data[location, :] >= 0.5) and np.any(TMscore_fs_data[location, :] >= 0.5))):
- print(TMscore_data[location, :], TMscore_fs_data[location, :])
- selection = int(location / 2);
- self.selection = selection
-
-
- #elif location == int(TMscore_data.shape[0]) and np.any(TMscore_fs_data[location, :] < 0.5):
- elif np.any(TMscore_fs_data[location, :] < 0.5):
- for jj in range(0, int(TMscore_data.shape[0] / 2)):
- print(TMscore_data[(2 * jj), :], TMscore_fs_data[(jj * 2) + 1, :])
- print(TMscore_data[(jj * 2) + 1, :], TMscore_fs_data[(jj * 2), :])
- if (np.any(TMscore_data[(jj * 2), :] >= 0.4) and np.any(TMscore_fs_data[(jj * 2) + 1, :] >= 0.5)) or (np.any(TMscore_data[(jj * 2) + 1, :] >= 0.4) and np.any(TMscore_fs_data[(jj * 2), :] >= 0.5)):
- selection = jj
- self.selection = selection
- break
- elif (np.any(TMscore_data[(jj * 2), :] >= 0.4) and np.any(TMscore_fs_data[(jj * 2), :] >= 0.5)) or (np.any(TMscore_data[(jj * 2) + 1, :] >= 0.4) and np.any(TMscore_fs_data[(jj * 2) + 1, :] >= 0.5)):
-
- selection = jj
- self.selection = selection
- break
- elif jj == (int(TMscore_data.shape[0])) and np.all(TMscore_data[jj, :] < 0.5):
- print("Predictions are bad")
- rm_folder_cmd = 'rm -rf successed_prediction/' + self.pdb1_name + '/'
- print(rm_folder_cmd)
- os.system(rm_folder_cmd)
- sys.exit()
- else:
- print("Predictions are bad")
- else:
- print("Predictions are bad")
- print("Predictions of whole structure are bad")
- rm_folder_cmd = 'rm -rf successed_prediction/' + self.pdb1_name + '/'
- print(rm_folder_cmd)
- os.system(rm_folder_cmd)
- sys.exit()
-
-
-
-
-class prediction_all():
- def __init__(self, pdb1, pdb1_name, pdb2, pdb2_name, search_dir, nMSA, model_type, search_multi_dir):
- num_seeds = 5 + nMSA
- pwd = os.getcwd() + '/'
-
- if model_type != "alphafold2_multimer_v3":
-
-
- ##### Perform prediction with full-length MSA
- pre_random_seed = np.random.randint(0, 16, 1)
- random_seed_full_MSA = ''.join(map(str, pre_random_seed))
- output_dir = ' ' + pdb1_name + '_predicted_models_full_rand_' + str(random_seed_full_MSA)
- MSA_full = CF_MSA_max(search_dir, output_dir, pdb1_name, random_seed_full_MSA, num_seeds, model_type)
-
-
- ##### Perform prediction with random shallow MSA
- ##### check out varied-MSA with (msa-max: 1, 2, 4, 8, 16, 32, 64) (msa-extra: 2, 4, 8, 16, 32, 64, 128)
- output_dir = ' ' + pdb1_name + '_predicted_models_rand_'
- random_seed = random.sample(range(100), 1)
- random_seed = ''.join(map(str, random_seed))
- MSA_var = CF_MSA_var(pdb1, pdb1_name, pdb2, pdb2_name, search_dir, output_dir, random_seed, num_seeds, model_type)
-
-
-
- ####################################################################
- ##### check-out TM-scores of prediction with full-length-MSA (whole)
- pred_dir = pdb1_name + '_predicted_models_full_rand_' + str(random_seed_full_MSA) + '/'; print(pred_dir)
- MSA_full_TMscore = TM_score(pred_dir, pdb1, pdb1_name, pdb2, pdb2_name, model_type)
- full_TMscore = np.array(MSA_full_TMscore.tmscores)
- full_TMscore = full_TMscore.reshape(2, num_seeds * 5)
-
- ##### check-out TM-scores of prediction with full-length-MSA (fs region)
- pred_path = pwd + pdb1_name + '_predicted_models_full_rand_' + str(random_seed_full_MSA)
- MSA_fs_TMscore = TM_score_fs(pred_path, pdb1, pdb1_name, pdb2, pdb2_name)
- fs_TMscore = np.array(MSA_fs_TMscore.tmscores_fs)
- fs_TMscore = fs_TMscore.reshape(2, num_seeds * 5)
-
- ##### check-out the 1st prediction results are good or not
- if np.average(full_TMscore[0, :]) > np.average(full_TMscore[1, :]):
- if np.any(fs_TMscore[0, :] >= 0.5) and np.any(full_TMscore[0, :] >= 0.5):
- ref_name = pdb1_name; alt_name = pdb2_name
- elif np.any(fs_TMscore[1, :] >= 0.5) and np.any(full_TMscore[1, :] >= 0.5):
- ref_name = pdb2_name; alt_name = pdb1_name
- else:
- fin_pred_dir = pwd + pdb1_name + '_predicted_models_*'
- print("Prediction with deep MSA was failed"); gen_dir = 'failed_prediction/' + pdb1_name
- os.makedirs(gen_dir); os.system(mv_command)
- mv_command = 'mv ' + fin_pred_dir + ' failed_prediction/' + pdb1_name
- sys.exit()
- else:
- if np.any(fs_TMscore[1, :] >= 0.5) and np.any(full_TMscore[1, :] >= 0.5):
- ref_name = pdb2_name; alt_name = pdb1_name
- elif np.any(fs_TMscore[0, :] >= 0.5) and np.any(full_TMscore[0, :] >= 0.5):
- ref_name = pdb1_name; alt_name = pdb2_name
- else:
- fin_pred_dir = pwd + pdb1_name + '_predicted_models_*'
- print("Prediction with deep MSA was failed"); gen_dir = 'failed_prediction/' + pdb1_name
- os.makedirs(gen_dir); os.system(mv_command)
- mv_command = 'mv ' + fin_pred_dir + ' failed_prediction/' + pdb1_name
- sys.exit()
-
-
- print("Reference structure: ", ref_name); print("Alternative structure: ", alt_name)
-
- # save TM-score of whole structure from full-length MSA
- np.savetxt('TMScore_full-MSA_' + pdb1_name + '.csv', full_TMscore, fmt='%2.3f')
- # save TM-score of fold-switching region from full-length MSA
- np.savetxt('TMScore_fs_full-MSA_' + pdb1_name + '.csv', fs_TMscore, fmt='%2.3f')
-
- # Directory section and save to successed_prediction folder
- gen_dir = 'successed_prediction/' + pdb1_name
-
- if not os.path.exists(gen_dir):
- os.mkdir(gen_dir)
-
- mv_folder_cmd = 'mv ' + pred_dir + ' successed_prediction/' + pdb1_name
- print(mv_folder_cmd); os.system(mv_folder_cmd)
- print("Full-MSA prediction is tightly aligned to crystal structure"); print(" ")
-
-
-
-
-
- ########################################################################
- ##### check-out TM-scores of prediction with shallow random MSAs (whole)
- max_msa = 1; ext_msa = 2
- TMscores_random = []; TMscores_fs_random = []
-
- for multi in (1, 2, 2, 2, 2, 2, 2):
- max_msa = max_msa * multi
- ext_msa = ext_msa * multi
-
- pred_dir = pdb1_name + '_predicted_models_rand_' + str(random_seed) + '_max_' + str(max_msa) + '_ext_' + str(ext_msa) + '/'
-
- ##### TMscore of whole part
- MSA_shallow_TMscore = TM_score(pred_dir, pdb1, pdb1_name, pdb2, pdb2_name, model_type)
- TMscores_random = np.append(TMscores_random, MSA_shallow_TMscore.tmscores); print(TMscores_random)
-
- ### TMscore fs part
- MSA_shallow_fs_TMscore = TM_score_fs(pred_dir, pdb1, pdb1_name, pdb2, pdb2_name)
- TMscores_fs_random = np.append(TMscores_fs_random, MSA_shallow_fs_TMscore.tmscores_fs); print(TMscores_fs_random)
-
- fin_pred_dir = pdb1_name + '_predicted_models_rand_' + str(random_seed) + '_max_*'
-
- TMscores_random_reshape = TMscores_random.reshape(14, num_seeds * 5)
- TMscores_fs_random_reshape = TMscores_fs_random.reshape(14, num_seeds * 5)
-
- TMscores_random_alter = np.zeros((7, num_seeds * 5))
- TMscores_fs_random_alter = np.zeros((7, num_seeds * 5))
-
-
- ##### finding the TMscores of alternative conformations for determining the length of shallow random MSAs
- if alt_name == pdb2_name:
- #for i in 1, 3, 5, 7, 9, 11, 13 in TM_scores:
- tmp_cnt = 0
- for i in range(1, 14, 2):
- print(TMscores_random_reshape[i, :]); print(TMscores_fs_random_reshape[i, :])
- TMscores_random_alter[tmp_cnt, :] = TMscores_random_reshape[i, :]
- TMscores_fs_random_alter[tmp_cnt, :] = TMscores_fs_random_reshape[i, :]
- tmp_cnt = tmp_cnt + 1
- else:
- #for i in 0, 2, 4, 6, 8, 10, 12 in TM_scores:
- tmp_cnt = 0
- for i in range(0, 13, 2):
- print(TMscores_random_reshape[i, :]); print(TMscores_fs_random_reshape[i, :])
- TMscores_random_alter[tmp_cnt, :] = TMscores_random_reshape[i, :]
- TMscores_fs_random_alter[tmp_cnt, :] = TMscores_fs_random_reshape[i, :]
- tmp_cnt = tmp_cnt + 1
-
- print(" ")
- print("Confirming the TM-score with alternative conformation is good or not")
- print(TMscores_random_alter)
- print("Confirming the TM-score with fs region of alternative conformation is good or not")
- print(TMscores_fs_random_alter)
- print(" ")
-
-
- if np.any(TMscores_random_alter > 0.5) and np.any(TMscores_fs_random_alter > 0.5):
- # save all TM-scores from random MSA (1-2, 2-4, 4-8.... in order)
- #TMscores_random_reshape = TMscores_random.reshape(14, 5)
- np.savetxt('TMScore_random-MSA_' + pdb1_name + '.csv', TMscores_random_reshape, fmt='%2.3f')
- np.savetxt('TMScore_fs_random-MSA_' + pdb1_name + '.csv', TMscores_fs_random_reshape, fmt='%2.3f')
-
- gen_dir = 'successed_prediction/' + pdb1_name
- if not os.path.exists(gen_dir):
- os.makedirs(gen_dir)
- mv_command = 'mv ' + fin_pred_dir + ' successed_prediction/' + pdb1_name
- print(mv_command); os.system(mv_command)
- MSA_var.select_size(TMscores_random_reshape, TMscores_fs_random_reshape, pdb1_name, pdb2_name, alt_name, num_seeds)
- size_selection = MSA_var.selection; self.size_selection = size_selection
- pass
- else:
- mv_command = 'mv ' + fin_pred_dir + ' successed_prediction/' + pdb1_name
- print(mv_command); os.system(mv_command)
- MSA_var.select_size(TMscores_random_reshape, TMscores_fs_random_reshape, pdb1_name, pdb2_name, alt_name, num_seeds)
- size_selection = MSA_var.selection; self.size_selection = size_selection
- pass
-
- else:
- gen_dir = 'failed_prediction/' + pdb1_name
- if not os.path.exists(gen_dir):
- os.makedirs(gen_dir)
- mv_command = 'mv ' + fin_pred_dir + ' failed_prediction/' + pdb1_name
- print(mv_command); os.system(mv_command)
- print("Full-MSA prediction is not tightly aligned to crystal structure with additional seeds")
- print("Predcition is done")
- sys.exit()
-
- else:
- mv_command = 'mv ' + fin_pred_dir + ' failed_prediction/' + pdb1_name
- print(mv_command); os.system(mv_command)
- print("Predcition is done")
- sys.exit()
-
-
- elif model_type == "alphafold2_multimer_v3":
- print("Currently working on")
- #MSA_multi = prediction_all_multimer_FS(pdb1_name, pdb2_name, search_dir, nMSA, model_type, search_multi_dir)
- MSA_multi = prediction_all_multimer_FS(pdb1_name, pdb2_name, search_dir, nMSA, model_type, search_multi_dir, pdb1, pdb2)
- self.size_selection = MSA_multi.size_selection
- #sys.exit()
diff --git a/code/pred_cal_tmscore_blind.py b/code/pred_cal_tmscore_blind.py
deleted file mode 100644
index 752150a..0000000
--- a/code/pred_cal_tmscore_blind.py
+++ /dev/null
@@ -1,114 +0,0 @@
-#!/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Feb 21 14:51:00 2024
-
-@author: Myeongsang (Samuel) Lee
-"""
-import re
-import Bio
-import os
-from os import listdir
-from os.path import isfile, join
-import sys
-from pathlib import Path
-import numpy as np
-from numpy import genfromtxt
-import matplotlib.pyplot as plt
-import glob
-import random
-import argparse
-
-
-
-class CF_MSA_max():
- def __init__(self, search_dir, output_dir, pdb_name, rseed, num_seeds, model_type):
-
- command = 'colabfold_batch --num-seeds ' + str(num_seeds) + ' --random-seed ' + str(rseed) + ' --model-type ' + str(model_type) + search_dir + output_dir
- print(command)
- os.system(command)
-
-
-
-
-class CF_MSA_var():
- def __init__(self, pdb1_name, search_dir, output_dir, rseed, num_seeds, model_type):
- #### shallow MSA section
- #### Global viarlable
- max_msa = 1
- ext_msa = 2
- random_seed = np.array(rseed) ## needed to remove future
-
- self.pdb1_name = pdb1_name
-
-
-
- for ran_seed in random_seed:
- max_msa = 1
- ext_msa = 2
-
- TMscores_random = []
-
- for multi in (1, 2, 2, 2, 2, 2, 2):
- max_msa = max_msa * multi
- ext_msa = ext_msa * multi
-
- #### Colabfold part
- command = 'colabfold_batch --num-seeds ' + str(num_seeds) + ' --model-type ' + str(model_type) + ' --max-seq ' + str(max_msa) + ' --max-extra-seq ' + str(ext_msa) + search_dir + output_dir + str(ran_seed) + '_max_' + str(max_msa) + '_ext_' + str(ext_msa)
- print(command)
- os.system(command)
-
-
-
- fin_pred_dir = pdb1_name + '_predicted_models_rand_' + str(ran_seed) + '_max_*'
- gen_dir = 'blind_prediction/' + pdb1_name
-
- if not os.path.exists(gen_dir):
- os.makedirs(gen_dir)
- mv_command = 'mv ' + fin_pred_dir + ' blind_prediction/' + pdb1_name
- print(mv_command); os.system(mv_command)
- else:
- mv_command = 'mv ' + fin_pred_dir + ' blind_prediction/' + pdb1_name
- print(mv_command); os.system(mv_command)
-
-
-
-
-
-class prediction_all_blind():
- def __init__(self, pdb1_name, search_dir, nMSA, model_type):
-
- num_seeds = 5 + nMSA
-
- pre_random_seed = np.random.randint(0, 16, 1)
- random_seed = ''.join(map(str, pre_random_seed))
- print(random_seed)
- output_dir = ' ' + pdb1_name + '_predicted_models_full_rand_' + str(random_seed)
- print(output_dir)
-
-
- ##### Perform predction with full-length MSA
- MSA_full = CF_MSA_max(search_dir, output_dir, pdb1_name, random_seed, num_seeds, model_type)
- pwd = os.getcwd() + '/'
-
-
- # Directory section
- gen_dir = 'blind_prediction/' + pdb1_name
-
- if not os.path.exists(gen_dir):
- os.mkdir(gen_dir)
-
-
- pred_dir = pdb1_name + '_predicted_models_full_rand_' + str(random_seed) + '/'
- mv_folder_cmd = 'mv ' + pred_dir + ' blind_prediction/' + pdb1_name
- print(mv_folder_cmd); os.system(mv_folder_cmd)
-
-
-
- ##### check out varied-MSA with (msa-max: 1, 2, 4, 8, 16, 32, 64) (msa-extra: 2, 4, 8, 16, 32, 64, 128)
- output_dir = ' ' + pdb1_name + '_predicted_models_rand_'
- random_seed = random.sample(range(100), 1)
- MSA_var = CF_MSA_var(pdb1_name, search_dir, output_dir, random_seed, num_seeds, model_type)
-
-
-
diff --git a/Data/AFsample2_benchmark/AFsample2_pse_files /AFsample2_pse_files/.keep b/data/AFsample2_benchmark/AFsample2_pse_files /AFsample2_pse_files/.keep
similarity index 100%
rename from Data/AFsample2_benchmark/AFsample2_pse_files /AFsample2_pse_files/.keep
rename to data/AFsample2_benchmark/AFsample2_pse_files /AFsample2_pse_files/.keep
diff --git a/Data/AFsample2_benchmark/AFsample2_pse_files /AFsample2_pse_files/AFsample2_benchmark_pse_files.zip b/data/AFsample2_benchmark/AFsample2_pse_files /AFsample2_pse_files/AFsample2_benchmark_pse_files.zip
similarity index 100%
rename from Data/AFsample2_benchmark/AFsample2_pse_files /AFsample2_pse_files/AFsample2_benchmark_pse_files.zip
rename to data/AFsample2_benchmark/AFsample2_pse_files /AFsample2_pse_files/AFsample2_benchmark_pse_files.zip
diff --git a/Data/AFsample2_benchmark/OC23_heatmap-nsamples.png b/data/AFsample2_benchmark/OC23_heatmap-nsamples.png
similarity index 100%
rename from Data/AFsample2_benchmark/OC23_heatmap-nsamples.png
rename to data/AFsample2_benchmark/OC23_heatmap-nsamples.png
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_1si1_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_1si1_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_1si1_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_1si1_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_2ktv_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_2ktv_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_2ktv_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_2ktv_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_2olo_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_2olo_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_2olo_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_2olo_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_2rqm_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_2rqm_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_2rqm_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_2rqm_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_2wfa_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_2wfa_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_2wfa_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_2wfa_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_2xe6_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_2xe6_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_2xe6_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_2xe6_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_3fto_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_3fto_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_3fto_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_3fto_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_3iuj_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_3iuj_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_3iuj_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_3iuj_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_3l6g_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_3l6g_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_3l6g_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_3l6g_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_3o6w_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_3o6w_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_3o6w_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_3o6w_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_3tee_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_3tee_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_3tee_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_3tee_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_3zsf_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_3zsf_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_3zsf_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_3zsf_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_4bp8_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_4bp8_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_4bp8_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_4bp8_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_5ho2_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_5ho2_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_5ho2_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_5ho2_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_6hac_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_6hac_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_6hac_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_6hac_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_6hnj_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_6hnj_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_6hnj_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_6hnj_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_6k8b_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_6k8b_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_6k8b_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_6k8b_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_6mka_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_6mka_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_6mka_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_6mka_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_6nc7_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_6nc7_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_6nc7_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_6nc7_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_6yed_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_6yed_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_6yed_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_6yed_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_7c63_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_7c63_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_7c63_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_7c63_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_7cy2_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_7cy2_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_7cy2_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_7cy2_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_full-MSA_7qga_A.csv b/data/AFsample2_benchmark/TMScore_full-MSA_7qga_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_full-MSA_7qga_A.csv
rename to data/AFsample2_benchmark/TMScore_full-MSA_7qga_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_1si1_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_1si1_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_1si1_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_1si1_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_2ktv_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_2ktv_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_2ktv_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_2ktv_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_2olo_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_2olo_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_2olo_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_2olo_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_2rqm_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_2rqm_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_2rqm_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_2rqm_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_2wfa_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_2wfa_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_2wfa_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_2wfa_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_2xe6_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_2xe6_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_2xe6_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_2xe6_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_3fto_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_3fto_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_3fto_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_3fto_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_3iuj_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_3iuj_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_3iuj_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_3iuj_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_3l6g_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_3l6g_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_3l6g_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_3l6g_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_3o6w_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_3o6w_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_3o6w_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_3o6w_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_3tee_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_3tee_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_3tee_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_3tee_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_3zsf_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_3zsf_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_3zsf_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_3zsf_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_4bp8_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_4bp8_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_4bp8_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_4bp8_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_5ho2_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_5ho2_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_5ho2_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_5ho2_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_6hac_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_6hac_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_6hac_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_6hac_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_6hnj_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_6hnj_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_6hnj_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_6hnj_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_6k8b_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_6k8b_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_6k8b_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_6k8b_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_6mka_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_6mka_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_6mka_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_6mka_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_6nc7_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_6nc7_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_6nc7_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_6nc7_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_6yed_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_6yed_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_6yed_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_6yed_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_7c63_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_7c63_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_7c63_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_7c63_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_7cy2_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_7cy2_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_7cy2_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_7cy2_A.csv
diff --git a/Data/AFsample2_benchmark/TMScore_random-MSA_7qga_A.csv b/data/AFsample2_benchmark/TMScore_random-MSA_7qga_A.csv
similarity index 100%
rename from Data/AFsample2_benchmark/TMScore_random-MSA_7qga_A.csv
rename to data/AFsample2_benchmark/TMScore_random-MSA_7qga_A.csv
diff --git a/Data/AFsample2_benchmark/heatmap-max TMscore comparison.png b/data/AFsample2_benchmark/heatmap-max TMscore comparison.png
similarity index 100%
rename from Data/AFsample2_benchmark/heatmap-max TMscore comparison.png
rename to data/AFsample2_benchmark/heatmap-max TMscore comparison.png
diff --git a/Data/AFsample2_benchmark/list_of_OC23-uniprot_ID-PDB_ID.csv b/data/AFsample2_benchmark/list_of_OC23-uniprot_ID-PDB_ID.csv
similarity index 100%
rename from Data/AFsample2_benchmark/list_of_OC23-uniprot_ID-PDB_ID.csv
rename to data/AFsample2_benchmark/list_of_OC23-uniprot_ID-PDB_ID.csv
diff --git a/Data/AFsample2_benchmark/max_TM_heatmap_v4.py b/data/AFsample2_benchmark/max_TM_heatmap_v4.py
similarity index 100%
rename from Data/AFsample2_benchmark/max_TM_heatmap_v4.py
rename to data/AFsample2_benchmark/max_TM_heatmap_v4.py
diff --git a/Data/AFsample2_benchmark/nsample_heatmap.py b/data/AFsample2_benchmark/nsample_heatmap.py
similarity index 100%
rename from Data/AFsample2_benchmark/nsample_heatmap.py
rename to data/AFsample2_benchmark/nsample_heatmap.py
diff --git a/Data/E_coli/.keep b/data/E_coli/.keep
similarity index 100%
rename from Data/E_coli/.keep
rename to data/E_coli/.keep
diff --git a/Data/E_coli/WP_000015473.pse b/data/E_coli/WP_000015473.pse
similarity index 100%
rename from Data/E_coli/WP_000015473.pse
rename to data/E_coli/WP_000015473.pse
diff --git a/Data/E_coli/WP_000024392.pse b/data/E_coli/WP_000024392.pse
similarity index 100%
rename from Data/E_coli/WP_000024392.pse
rename to data/E_coli/WP_000024392.pse
diff --git a/Data/E_coli/WP_000064148.pse b/data/E_coli/WP_000064148.pse
similarity index 100%
rename from Data/E_coli/WP_000064148.pse
rename to data/E_coli/WP_000064148.pse
diff --git a/Data/E_coli/WP_000134927.pse b/data/E_coli/WP_000134927.pse
similarity index 100%
rename from Data/E_coli/WP_000134927.pse
rename to data/E_coli/WP_000134927.pse
diff --git a/Data/E_coli/WP_000190655.pse b/data/E_coli/WP_000190655.pse
similarity index 100%
rename from Data/E_coli/WP_000190655.pse
rename to data/E_coli/WP_000190655.pse
diff --git a/Data/E_coli/WP_000323025.pse b/data/E_coli/WP_000323025.pse
similarity index 100%
rename from Data/E_coli/WP_000323025.pse
rename to data/E_coli/WP_000323025.pse
diff --git a/Data/E_coli/WP_000581937.pse b/data/E_coli/WP_000581937.pse
similarity index 100%
rename from Data/E_coli/WP_000581937.pse
rename to data/E_coli/WP_000581937.pse
diff --git a/Data/E_coli/WP_000617148.pse b/data/E_coli/WP_000617148.pse
similarity index 100%
rename from Data/E_coli/WP_000617148.pse
rename to data/E_coli/WP_000617148.pse
diff --git a/Data/E_coli/WP_000648420.pse b/data/E_coli/WP_000648420.pse
similarity index 100%
rename from Data/E_coli/WP_000648420.pse
rename to data/E_coli/WP_000648420.pse
diff --git a/Data/E_coli/WP_000675390.pse b/data/E_coli/WP_000675390.pse
similarity index 100%
rename from Data/E_coli/WP_000675390.pse
rename to data/E_coli/WP_000675390.pse
diff --git a/Data/E_coli/WP_000699809.pse b/data/E_coli/WP_000699809.pse
similarity index 100%
rename from Data/E_coli/WP_000699809.pse
rename to data/E_coli/WP_000699809.pse
diff --git a/Data/E_coli/WP_000705622.pse b/data/E_coli/WP_000705622.pse
similarity index 100%
rename from Data/E_coli/WP_000705622.pse
rename to data/E_coli/WP_000705622.pse
diff --git a/Data/E_coli/WP_000763330.pse b/data/E_coli/WP_000763330.pse
similarity index 100%
rename from Data/E_coli/WP_000763330.pse
rename to data/E_coli/WP_000763330.pse
diff --git a/Data/E_coli/WP_000803992.pse b/data/E_coli/WP_000803992.pse
similarity index 100%
rename from Data/E_coli/WP_000803992.pse
rename to data/E_coli/WP_000803992.pse
diff --git a/Data/E_coli/WP_000807125.pse b/data/E_coli/WP_000807125.pse
similarity index 100%
rename from Data/E_coli/WP_000807125.pse
rename to data/E_coli/WP_000807125.pse
diff --git a/Data/E_coli/WP_000841554.pse b/data/E_coli/WP_000841554.pse
similarity index 100%
rename from Data/E_coli/WP_000841554.pse
rename to data/E_coli/WP_000841554.pse
diff --git a/Data/E_coli/WP_000847304.pse b/data/E_coli/WP_000847304.pse
similarity index 100%
rename from Data/E_coli/WP_000847304.pse
rename to data/E_coli/WP_000847304.pse
diff --git a/Data/E_coli/WP_000881326.pse b/data/E_coli/WP_000881326.pse
similarity index 100%
rename from Data/E_coli/WP_000881326.pse
rename to data/E_coli/WP_000881326.pse
diff --git a/Data/E_coli/WP_000920571.pse b/data/E_coli/WP_000920571.pse
similarity index 100%
rename from Data/E_coli/WP_000920571.pse
rename to data/E_coli/WP_000920571.pse
diff --git a/Data/E_coli/WP_000951334.pse b/data/E_coli/WP_000951334.pse
similarity index 100%
rename from Data/E_coli/WP_000951334.pse
rename to data/E_coli/WP_000951334.pse
diff --git a/Data/E_coli/WP_000955366.pse b/data/E_coli/WP_000955366.pse
similarity index 100%
rename from Data/E_coli/WP_000955366.pse
rename to data/E_coli/WP_000955366.pse
diff --git a/Data/E_coli/WP_000956458.pse b/data/E_coli/WP_000956458.pse
similarity index 100%
rename from Data/E_coli/WP_000956458.pse
rename to data/E_coli/WP_000956458.pse
diff --git a/Data/E_coli/WP_000976004.pse b/data/E_coli/WP_000976004.pse
similarity index 100%
rename from Data/E_coli/WP_000976004.pse
rename to data/E_coli/WP_000976004.pse
diff --git a/Data/E_coli/WP_000994516.pse b/data/E_coli/WP_000994516.pse
similarity index 100%
rename from Data/E_coli/WP_000994516.pse
rename to data/E_coli/WP_000994516.pse
diff --git a/Data/E_coli/WP_001002059.pse b/data/E_coli/WP_001002059.pse
similarity index 100%
rename from Data/E_coli/WP_001002059.pse
rename to data/E_coli/WP_001002059.pse
diff --git a/Data/E_coli/WP_001023459.pse b/data/E_coli/WP_001023459.pse
similarity index 100%
rename from Data/E_coli/WP_001023459.pse
rename to data/E_coli/WP_001023459.pse
diff --git a/Data/E_coli/WP_001070563.pse b/data/E_coli/WP_001070563.pse
similarity index 100%
rename from Data/E_coli/WP_001070563.pse
rename to data/E_coli/WP_001070563.pse
diff --git a/Data/E_coli/WP_001119863.pse b/data/E_coli/WP_001119863.pse
similarity index 100%
rename from Data/E_coli/WP_001119863.pse
rename to data/E_coli/WP_001119863.pse
diff --git a/Data/E_coli/WP_001129553.pse b/data/E_coli/WP_001129553.pse
similarity index 100%
rename from Data/E_coli/WP_001129553.pse
rename to data/E_coli/WP_001129553.pse
diff --git a/Data/E_coli/WP_001151233.pse b/data/E_coli/WP_001151233.pse
similarity index 100%
rename from Data/E_coli/WP_001151233.pse
rename to data/E_coli/WP_001151233.pse
diff --git a/Data/E_coli/WP_001185665.pse b/data/E_coli/WP_001185665.pse
similarity index 100%
rename from Data/E_coli/WP_001185665.pse
rename to data/E_coli/WP_001185665.pse
diff --git a/Data/E_coli/WP_001192396.pse b/data/E_coli/WP_001192396.pse
similarity index 100%
rename from Data/E_coli/WP_001192396.pse
rename to data/E_coli/WP_001192396.pse
diff --git a/Data/E_coli/WP_001204859.pse b/data/E_coli/WP_001204859.pse
similarity index 100%
rename from Data/E_coli/WP_001204859.pse
rename to data/E_coli/WP_001204859.pse
diff --git a/Data/E_coli/WP_001217394.pse b/data/E_coli/WP_001217394.pse
similarity index 100%
rename from Data/E_coli/WP_001217394.pse
rename to data/E_coli/WP_001217394.pse
diff --git a/Data/E_coli/WP_001241339.pse b/data/E_coli/WP_001241339.pse
similarity index 100%
rename from Data/E_coli/WP_001241339.pse
rename to data/E_coli/WP_001241339.pse
diff --git a/Data/E_coli/WP_001260507.pse b/data/E_coli/WP_001260507.pse
similarity index 100%
rename from Data/E_coli/WP_001260507.pse
rename to data/E_coli/WP_001260507.pse
diff --git a/Data/E_coli/WP_001262174.pse b/data/E_coli/WP_001262174.pse
similarity index 100%
rename from Data/E_coli/WP_001262174.pse
rename to data/E_coli/WP_001262174.pse
diff --git a/Data/E_coli/WP_001264088.pse b/data/E_coli/WP_001264088.pse
similarity index 100%
rename from Data/E_coli/WP_001264088.pse
rename to data/E_coli/WP_001264088.pse
diff --git a/Data/E_coli/WP_001270286.pse b/data/E_coli/WP_001270286.pse
similarity index 100%
rename from Data/E_coli/WP_001270286.pse
rename to data/E_coli/WP_001270286.pse
diff --git a/Data/E_coli/WP_001270809.pse b/data/E_coli/WP_001270809.pse
similarity index 100%
rename from Data/E_coli/WP_001270809.pse
rename to data/E_coli/WP_001270809.pse
diff --git a/Data/E_coli/WP_001272149.pse b/data/E_coli/WP_001272149.pse
similarity index 100%
rename from Data/E_coli/WP_001272149.pse
rename to data/E_coli/WP_001272149.pse
diff --git a/Data/E_coli/WP_001272856.pse b/data/E_coli/WP_001272856.pse
similarity index 100%
rename from Data/E_coli/WP_001272856.pse
rename to data/E_coli/WP_001272856.pse
diff --git a/Data/E_coli/WP_001279084.pse b/data/E_coli/WP_001279084.pse
similarity index 100%
rename from Data/E_coli/WP_001279084.pse
rename to data/E_coli/WP_001279084.pse
diff --git a/Data/E_coli/WP_001280953.pse b/data/E_coli/WP_001280953.pse
similarity index 100%
rename from Data/E_coli/WP_001280953.pse
rename to data/E_coli/WP_001280953.pse
diff --git a/Data/E_coli/WP_001281772.pse b/data/E_coli/WP_001281772.pse
similarity index 100%
rename from Data/E_coli/WP_001281772.pse
rename to data/E_coli/WP_001281772.pse
diff --git a/Data/E_coli/WP_001282181.pse b/data/E_coli/WP_001282181.pse
similarity index 100%
rename from Data/E_coli/WP_001282181.pse
rename to data/E_coli/WP_001282181.pse
diff --git a/Data/E_coli/WP_001295442.pse b/data/E_coli/WP_001295442.pse
similarity index 100%
rename from Data/E_coli/WP_001295442.pse
rename to data/E_coli/WP_001295442.pse
diff --git a/Data/E_coli/WP_001296140.pse b/data/E_coli/WP_001296140.pse
similarity index 100%
rename from Data/E_coli/WP_001296140.pse
rename to data/E_coli/WP_001296140.pse
diff --git a/Data/E_coli/WP_001296901.pse b/data/E_coli/WP_001296901.pse
similarity index 100%
rename from Data/E_coli/WP_001296901.pse
rename to data/E_coli/WP_001296901.pse
diff --git a/Data/E_coli/WP_001300163.pse b/data/E_coli/WP_001300163.pse
similarity index 100%
rename from Data/E_coli/WP_001300163.pse
rename to data/E_coli/WP_001300163.pse
diff --git a/Data/E_coli/WP_001303590.pse b/data/E_coli/WP_001303590.pse
similarity index 100%
rename from Data/E_coli/WP_001303590.pse
rename to data/E_coli/WP_001303590.pse
diff --git a/Data/E_coli/WP_001316982.pse b/data/E_coli/WP_001316982.pse
similarity index 100%
rename from Data/E_coli/WP_001316982.pse
rename to data/E_coli/WP_001316982.pse
diff --git a/Data/Fold-switch_hits-AFcluster/AIAT/1kct_A.zip b/data/Fold-switch_hits-AFcluster/AIAT/1kct_A.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/AIAT/1kct_A.zip
rename to data/Fold-switch_hits-AFcluster/AIAT/1kct_A.zip
diff --git a/Data/Fold-switch_hits-AFcluster/AIAT/1kct_A_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/AIAT/1kct_A_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/AIAT/1kct_A_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/AIAT/1kct_A_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/AIAT/3t1p_A_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/AIAT/3t1p_A_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/AIAT/3t1p_A_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/AIAT/3t1p_A_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/COMT/4pyi_A.zip b/data/Fold-switch_hits-AFcluster/COMT/4pyi_A.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/COMT/4pyi_A.zip
rename to data/Fold-switch_hits-AFcluster/COMT/4pyi_A.zip
diff --git a/Data/Fold-switch_hits-AFcluster/COMT/4pyi_A_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/COMT/4pyi_A_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/COMT/4pyi_A_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/COMT/4pyi_A_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/COMT/4pyj_A.zip b/data/Fold-switch_hits-AFcluster/COMT/4pyj_A.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/COMT/4pyj_A.zip
rename to data/Fold-switch_hits-AFcluster/COMT/4pyj_A.zip
diff --git a/Data/Fold-switch_hits-AFcluster/COMT/4pyj_A_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/COMT/4pyj_A_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/COMT/4pyj_A_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/COMT/4pyj_A_tmscores_fs_all.csv
diff --git a/data/Fold-switch_hits-AFcluster/CRKL/2bzy_B_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/CRKL/2bzy_B_tmscores_fs_all.csv
new file mode 100644
index 0000000..e69de29
diff --git a/Data/Fold-switch_hits-AFcluster/CRKL/2lqw_A.zip b/data/Fold-switch_hits-AFcluster/CRKL/2lqw_A.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/CRKL/2lqw_A.zip
rename to data/Fold-switch_hits-AFcluster/CRKL/2lqw_A.zip
diff --git a/Data/Fold-switch_hits-AFcluster/CRKL/2lqw_A_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/CRKL/2lqw_A_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/CRKL/2lqw_A_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/CRKL/2lqw_A_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/CaBP/1jfk_A.zip b/data/Fold-switch_hits-AFcluster/CaBP/1jfk_A.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/CaBP/1jfk_A.zip
rename to data/Fold-switch_hits-AFcluster/CaBP/1jfk_A.zip
diff --git a/Data/Fold-switch_hits-AFcluster/CaBP/1jfk_A_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/CaBP/1jfk_A_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/CaBP/1jfk_A_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/CaBP/1jfk_A_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/CaBP/2nxq_B_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/CaBP/2nxq_B_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/CaBP/2nxq_B_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/CaBP/2nxq_B_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/Cas9/4cmq_B_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/Cas9/4cmq_B_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/Cas9/4cmq_B_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/Cas9/4cmq_B_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/Cas9/4zt0_C.zip b/data/Fold-switch_hits-AFcluster/Cas9/4zt0_C.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/Cas9/4zt0_C.zip
rename to data/Fold-switch_hits-AFcluster/Cas9/4zt0_C.zip
diff --git a/Data/Fold-switch_hits-AFcluster/Cas9/4zt0_C_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/Cas9/4zt0_C_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/Cas9/4zt0_C_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/Cas9/4zt0_C_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/Cwc2/3tp2_A.zip b/data/Fold-switch_hits-AFcluster/Cwc2/3tp2_A.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/Cwc2/3tp2_A.zip
rename to data/Fold-switch_hits-AFcluster/Cwc2/3tp2_A.zip
diff --git a/Data/Fold-switch_hits-AFcluster/Cwc2/3tp2_A_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/Cwc2/3tp2_A_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/Cwc2/3tp2_A_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/Cwc2/3tp2_A_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/Cwc2/5lj3_M_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/Cwc2/5lj3_M_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/Cwc2/5lj3_M_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/Cwc2/5lj3_M_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/FUS_HENDH/1wp8_C_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/FUS_HENDH/1wp8_C_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/FUS_HENDH/1wp8_C_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/FUS_HENDH/1wp8_C_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/FUS_HENDH/5ejb_C.zip b/data/Fold-switch_hits-AFcluster/FUS_HENDH/5ejb_C.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/FUS_HENDH/5ejb_C.zip
rename to data/Fold-switch_hits-AFcluster/FUS_HENDH/5ejb_C.zip
diff --git a/Data/Fold-switch_hits-AFcluster/FUS_HENDH/5ejb_C_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/FUS_HENDH/5ejb_C_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/FUS_HENDH/5ejb_C_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/FUS_HENDH/5ejb_C_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/Fab/3ztj_E_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/Fab/3ztj_E_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/Fab/3ztj_E_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/Fab/3ztj_E_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/Fab/5hmg_A.zip b/data/Fold-switch_hits-AFcluster/Fab/5hmg_A.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/Fab/5hmg_A.zip
rename to data/Fold-switch_hits-AFcluster/Fab/5hmg_A.zip
diff --git a/Data/Fold-switch_hits-AFcluster/Fab/5hmg_A_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/Fab/5hmg_A_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/Fab/5hmg_A_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/Fab/5hmg_A_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/FraC/3zwg_N_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/FraC/3zwg_N_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/FraC/3zwg_N_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/FraC/3zwg_N_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/FraC/4tsyD.zip b/data/Fold-switch_hits-AFcluster/FraC/4tsyD.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/FraC/4tsyD.zip
rename to data/Fold-switch_hits-AFcluster/FraC/4tsyD.zip
diff --git a/Data/Fold-switch_hits-AFcluster/FraC/4tsy_D_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/FraC/4tsy_D_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/FraC/4tsy_D_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/FraC/4tsy_D_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/GP2/1ebo_E.zip b/data/Fold-switch_hits-AFcluster/GP2/1ebo_E.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/GP2/1ebo_E.zip
rename to data/Fold-switch_hits-AFcluster/GP2/1ebo_E.zip
diff --git a/Data/Fold-switch_hits-AFcluster/GP2/1ebo_E_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/GP2/1ebo_E_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/GP2/1ebo_E_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/GP2/1ebo_E_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/GP2/5fhc_J.zip b/data/Fold-switch_hits-AFcluster/GP2/5fhc_J.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/GP2/5fhc_J.zip
rename to data/Fold-switch_hits-AFcluster/GP2/5fhc_J.zip
diff --git a/Data/Fold-switch_hits-AFcluster/GP2/5fhc_J_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/GP2/5fhc_J_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/GP2/5fhc_J_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/GP2/5fhc_J_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/IscA/1x0g_A.zip b/data/Fold-switch_hits-AFcluster/IscA/1x0g_A.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/IscA/1x0g_A.zip
rename to data/Fold-switch_hits-AFcluster/IscA/1x0g_A.zip
diff --git a/Data/Fold-switch_hits-AFcluster/IscA/1x0g_C_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/IscA/1x0g_C_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/IscA/1x0g_C_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/IscA/1x0g_C_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/KaiB/2qkeE.zip b/data/Fold-switch_hits-AFcluster/KaiB/2qkeE.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/KaiB/2qkeE.zip
rename to data/Fold-switch_hits-AFcluster/KaiB/2qkeE.zip
diff --git a/Data/Fold-switch_hits-AFcluster/KaiB/2qke_E_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/KaiB/2qke_E_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/KaiB/2qke_E_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/KaiB/2qke_E_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/KaiB/5jytA.zip b/data/Fold-switch_hits-AFcluster/KaiB/5jytA.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/KaiB/5jytA.zip
rename to data/Fold-switch_hits-AFcluster/KaiB/5jytA.zip
diff --git a/Data/Fold-switch_hits-AFcluster/KaiB/5jyt_A_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/KaiB/5jyt_A_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/KaiB/5jyt_A_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/KaiB/5jyt_A_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/Mad2/2vfx_L.zip b/data/Fold-switch_hits-AFcluster/Mad2/2vfx_L.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/Mad2/2vfx_L.zip
rename to data/Fold-switch_hits-AFcluster/Mad2/2vfx_L.zip
diff --git a/Data/Fold-switch_hits-AFcluster/Mad2/2vfx_L_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/Mad2/2vfx_L_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/Mad2/2vfx_L_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/Mad2/2vfx_L_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/Mad2/3gmh_L_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/Mad2/3gmh_L_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/Mad2/3gmh_L_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/Mad2/3gmh_L_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/MinE/2kxo_A.zip b/data/Fold-switch_hits-AFcluster/MinE/2kxo_A.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/MinE/2kxo_A.zip
rename to data/Fold-switch_hits-AFcluster/MinE/2kxo_A.zip
diff --git a/Data/Fold-switch_hits-AFcluster/MinE/2kxo_A_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/MinE/2kxo_A_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/MinE/2kxo_A_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/MinE/2kxo_A_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/MinE/3r9j_C_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/MinE/3r9j_C_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/MinE/3r9j_C_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/MinE/3r9j_C_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/Nrp2/2qqj_A_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/Nrp2/2qqj_A_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/Nrp2/2qqj_A_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/Nrp2/2qqj_A_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/Nrp2/4qds_A.zip b/data/Fold-switch_hits-AFcluster/Nrp2/4qds_A.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/Nrp2/4qds_A.zip
rename to data/Fold-switch_hits-AFcluster/Nrp2/4qds_A.zip
diff --git a/Data/Fold-switch_hits-AFcluster/Nrp2/4qds_A_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/Nrp2/4qds_A_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/Nrp2/4qds_A_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/Nrp2/4qds_A_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/OxyR/4xws_D_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/OxyR/4xws_D_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/OxyR/4xws_D_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/OxyR/4xws_D_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/OxyR/4y0m_J.zip b/data/Fold-switch_hits-AFcluster/OxyR/4y0m_J.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/OxyR/4y0m_J.zip
rename to data/Fold-switch_hits-AFcluster/OxyR/4y0m_J.zip
diff --git a/Data/Fold-switch_hits-AFcluster/OxyR/4y0m_J_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/OxyR/4y0m_J_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/OxyR/4y0m_J_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/OxyR/4y0m_J_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/PimA/4n9w_A.zip b/data/Fold-switch_hits-AFcluster/PimA/4n9w_A.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/PimA/4n9w_A.zip
rename to data/Fold-switch_hits-AFcluster/PimA/4n9w_A.zip
diff --git a/Data/Fold-switch_hits-AFcluster/PimA/4n9w_A_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/PimA/4n9w_A_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/PimA/4n9w_A_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/PimA/4n9w_A_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/PimA/4nc9_C.zip b/data/Fold-switch_hits-AFcluster/PimA/4nc9_C.zip
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/PimA/4nc9_C.zip
rename to data/Fold-switch_hits-AFcluster/PimA/4nc9_C.zip
diff --git a/Data/Fold-switch_hits-AFcluster/PimA/4nc9_C_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/PimA/4nc9_C_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/PimA/4nc9_C_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/PimA/4nc9_C_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/RfaH/2oug_C_tmscores_fs_all.csv b/data/Fold-switch_hits-AFcluster/RfaH/2oug_C_tmscores_fs_all.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/RfaH/2oug_C_tmscores_fs_all.csv
rename to data/Fold-switch_hits-AFcluster/RfaH/2oug_C_tmscores_fs_all.csv
diff --git a/Data/Fold-switch_hits-AFcluster/RfaH/RfaH_2ougC_both_folds.pse b/data/Fold-switch_hits-AFcluster/RfaH/RfaH_2ougC_both_folds.pse
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/RfaH/RfaH_2ougC_both_folds.pse
rename to data/Fold-switch_hits-AFcluster/RfaH/RfaH_2ougC_both_folds.pse
diff --git a/Data/Fold-switch_hits-AFcluster/details.csv b/data/Fold-switch_hits-AFcluster/details.csv
similarity index 100%
rename from Data/Fold-switch_hits-AFcluster/details.csv
rename to data/Fold-switch_hits-AFcluster/details.csv
diff --git a/Data/Fold-switch_hits-SPEACH_AF/A1AT.zip b/data/Fold-switch_hits-SPEACH_AF/A1AT.zip
similarity index 100%
rename from Data/Fold-switch_hits-SPEACH_AF/A1AT.zip
rename to data/Fold-switch_hits-SPEACH_AF/A1AT.zip
diff --git a/Data/Fold-switch_hits-SPEACH_AF/FUS_HENDH.zip b/data/Fold-switch_hits-SPEACH_AF/FUS_HENDH.zip
similarity index 100%
rename from Data/Fold-switch_hits-SPEACH_AF/FUS_HENDH.zip
rename to data/Fold-switch_hits-SPEACH_AF/FUS_HENDH.zip
diff --git a/Data/Fold-switch_hits-SPEACH_AF/KSHV_protease.zip b/data/Fold-switch_hits-SPEACH_AF/KSHV_protease.zip
similarity index 100%
rename from Data/Fold-switch_hits-SPEACH_AF/KSHV_protease.zip
rename to data/Fold-switch_hits-SPEACH_AF/KSHV_protease.zip
diff --git a/Data/Fold-switch_hits-SPEACH_AF/OxyR.zip b/data/Fold-switch_hits-SPEACH_AF/OxyR.zip
similarity index 100%
rename from Data/Fold-switch_hits-SPEACH_AF/OxyR.zip
rename to data/Fold-switch_hits-SPEACH_AF/OxyR.zip
diff --git a/Data/Fold-switch_hits-SPEACH_AF/RfAH.zip b/data/Fold-switch_hits-SPEACH_AF/RfAH.zip
similarity index 100%
rename from Data/Fold-switch_hits-SPEACH_AF/RfAH.zip
rename to data/Fold-switch_hits-SPEACH_AF/RfAH.zip
diff --git a/Data/Fold-switch_hits-SPEACH_AF/capsid_protein.zip b/data/Fold-switch_hits-SPEACH_AF/capsid_protein.zip
similarity index 100%
rename from Data/Fold-switch_hits-SPEACH_AF/capsid_protein.zip
rename to data/Fold-switch_hits-SPEACH_AF/capsid_protein.zip
diff --git a/Data/Fold-switch_hits-SPEACH_AF/componentC3.zip b/data/Fold-switch_hits-SPEACH_AF/componentC3.zip
similarity index 100%
rename from Data/Fold-switch_hits-SPEACH_AF/componentC3.zip
rename to data/Fold-switch_hits-SPEACH_AF/componentC3.zip
diff --git a/Data/Fold-switch_hits-SPEACH_AF/details.dat b/data/Fold-switch_hits-SPEACH_AF/details.dat
similarity index 100%
rename from Data/Fold-switch_hits-SPEACH_AF/details.dat
rename to data/Fold-switch_hits-SPEACH_AF/details.dat
diff --git a/Data/Fold-switch_hits/1iyt_plDDT.png b/data/Fold-switch_hits/1iyt_plDDT.png
similarity index 100%
rename from Data/Fold-switch_hits/1iyt_plDDT.png
rename to data/Fold-switch_hits/1iyt_plDDT.png
diff --git a/Data/Fold-switch_hits/1kct_plDDT.png b/data/Fold-switch_hits/1kct_plDDT.png
similarity index 100%
rename from Data/Fold-switch_hits/1kct_plDDT.png
rename to data/Fold-switch_hits/1kct_plDDT.png
diff --git a/Data/Fold-switch_hits/2jmr_A_plDDT.png b/data/Fold-switch_hits/2jmr_A_plDDT.png
similarity index 100%
rename from Data/Fold-switch_hits/2jmr_A_plDDT.png
rename to data/Fold-switch_hits/2jmr_A_plDDT.png
diff --git a/Data/Fold-switch_hits/2n54_A_plDDT.png b/data/Fold-switch_hits/2n54_A_plDDT.png
similarity index 100%
rename from Data/Fold-switch_hits/2n54_A_plDDT.png
rename to data/Fold-switch_hits/2n54_A_plDDT.png
diff --git a/Data/Fold-switch_hits/4phq_A_plDDT.png b/data/Fold-switch_hits/4phq_A_plDDT.png
similarity index 100%
rename from Data/Fold-switch_hits/4phq_A_plDDT.png
rename to data/Fold-switch_hits/4phq_A_plDDT.png
diff --git a/Data/Fold-switch_hits/4zrb_C_plDDT.png b/data/Fold-switch_hits/4zrb_C_plDDT.png
similarity index 100%
rename from Data/Fold-switch_hits/4zrb_C_plDDT.png
rename to data/Fold-switch_hits/4zrb_C_plDDT.png
diff --git a/Data/Fold-switch_hits/Figure1a.py b/data/Fold-switch_hits/Figure1a.py
similarity index 100%
rename from Data/Fold-switch_hits/Figure1a.py
rename to data/Fold-switch_hits/Figure1a.py
diff --git a/Data/Fold-switch_hits/Figure1b.py b/data/Fold-switch_hits/Figure1b.py
similarity index 100%
rename from Data/Fold-switch_hits/Figure1b.py
rename to data/Fold-switch_hits/Figure1b.py
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/1QB3_CFR.pse b/data/Fold-switch_hits/Fold_switch_pse/1QB3_CFR.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/1QB3_CFR.pse
rename to data/Fold-switch_hits/Fold_switch_pse/1QB3_CFR.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/1j9o.pse b/data/Fold-switch_hits/Fold_switch_pse/1j9o.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/1j9o.pse
rename to data/Fold-switch_hits/Fold_switch_pse/1j9o.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/1kct.pse b/data/Fold-switch_hits/Fold_switch_pse/1kct.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/1kct.pse
rename to data/Fold-switch_hits/Fold_switch_pse/1kct.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/1miq_B.pse b/data/Fold-switch_hits/Fold_switch_pse/1miq_B.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/1miq_B.pse
rename to data/Fold-switch_hits/Fold_switch_pse/1miq_B.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/1nqd.pse b/data/Fold-switch_hits/Fold_switch_pse/1nqd.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/1nqd.pse
rename to data/Fold-switch_hits/Fold_switch_pse/1nqd.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/1nqj.pse b/data/Fold-switch_hits/Fold_switch_pse/1nqj.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/1nqj.pse
rename to data/Fold-switch_hits/Fold_switch_pse/1nqj.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/1qb3.pse b/data/Fold-switch_hits/Fold_switch_pse/1qb3.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/1qb3.pse
rename to data/Fold-switch_hits/Fold_switch_pse/1qb3.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/1qs8_best_preds.pse b/data/Fold-switch_hits/Fold_switch_pse/1qs8_best_preds.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/1qs8_best_preds.pse
rename to data/Fold-switch_hits/Fold_switch_pse/1qs8_best_preds.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/1rep.pse b/data/Fold-switch_hits/Fold_switch_pse/1rep.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/1rep.pse
rename to data/Fold-switch_hits/Fold_switch_pse/1rep.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/1xjt.pse b/data/Fold-switch_hits/Fold_switch_pse/1xjt.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/1xjt.pse
rename to data/Fold-switch_hits/Fold_switch_pse/1xjt.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/1xju.pse b/data/Fold-switch_hits/Fold_switch_pse/1xju.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/1xju.pse
rename to data/Fold-switch_hits/Fold_switch_pse/1xju.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/2k0q.pse b/data/Fold-switch_hits/Fold_switch_pse/2k0q.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/2k0q.pse
rename to data/Fold-switch_hits/Fold_switch_pse/2k0q.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/2kxo.pse b/data/Fold-switch_hits/Fold_switch_pse/2kxo.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/2kxo.pse
rename to data/Fold-switch_hits/Fold_switch_pse/2kxo.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/2lel.pse b/data/Fold-switch_hits/Fold_switch_pse/2lel.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/2lel.pse
rename to data/Fold-switch_hits/Fold_switch_pse/2lel.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/2n54.pse b/data/Fold-switch_hits/Fold_switch_pse/2n54.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/2n54.pse
rename to data/Fold-switch_hits/Fold_switch_pse/2n54.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/2p3v_A.pse b/data/Fold-switch_hits/Fold_switch_pse/2p3v_A.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/2p3v_A.pse
rename to data/Fold-switch_hits/Fold_switch_pse/2p3v_A.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/2p3v_D.pse b/data/Fold-switch_hits/Fold_switch_pse/2p3v_D.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/2p3v_D.pse
rename to data/Fold-switch_hits/Fold_switch_pse/2p3v_D.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/2pbk.pse b/data/Fold-switch_hits/Fold_switch_pse/2pbk.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/2pbk.pse
rename to data/Fold-switch_hits/Fold_switch_pse/2pbk.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/2qke.pse b/data/Fold-switch_hits/Fold_switch_pse/2qke.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/2qke.pse
rename to data/Fold-switch_hits/Fold_switch_pse/2qke.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/2qqj.pse b/data/Fold-switch_hits/Fold_switch_pse/2qqj.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/2qqj.pse
rename to data/Fold-switch_hits/Fold_switch_pse/2qqj.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/2vfx_A.pse b/data/Fold-switch_hits/Fold_switch_pse/2vfx_A.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/2vfx_A.pse
rename to data/Fold-switch_hits/Fold_switch_pse/2vfx_A.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/2wcd_X.pse b/data/Fold-switch_hits/Fold_switch_pse/2wcd_X.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/2wcd_X.pse
rename to data/Fold-switch_hits/Fold_switch_pse/2wcd_X.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/2z9o.pse b/data/Fold-switch_hits/Fold_switch_pse/2z9o.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/2z9o.pse
rename to data/Fold-switch_hits/Fold_switch_pse/2z9o.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/3gmh_L.pse b/data/Fold-switch_hits/Fold_switch_pse/3gmh_L.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/3gmh_L.pse
rename to data/Fold-switch_hits/Fold_switch_pse/3gmh_L.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/3hde.pse b/data/Fold-switch_hits/Fold_switch_pse/3hde.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/3hde.pse
rename to data/Fold-switch_hits/Fold_switch_pse/3hde.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/3hdf.pse b/data/Fold-switch_hits/Fold_switch_pse/3hdf.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/3hdf.pse
rename to data/Fold-switch_hits/Fold_switch_pse/3hdf.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/3nqj.pse b/data/Fold-switch_hits/Fold_switch_pse/3nqj.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/3nqj.pse
rename to data/Fold-switch_hits/Fold_switch_pse/3nqj.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/3qy2.pse b/data/Fold-switch_hits/Fold_switch_pse/3qy2.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/3qy2.pse
rename to data/Fold-switch_hits/Fold_switch_pse/3qy2.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/3r9j.pse b/data/Fold-switch_hits/Fold_switch_pse/3r9j.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/3r9j.pse
rename to data/Fold-switch_hits/Fold_switch_pse/3r9j.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/3t1p_alignment.pse b/data/Fold-switch_hits/Fold_switch_pse/3t1p_alignment.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/3t1p_alignment.pse
rename to data/Fold-switch_hits/Fold_switch_pse/3t1p_alignment.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/3tp2.pse b/data/Fold-switch_hits/Fold_switch_pse/3tp2.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/3tp2.pse
rename to data/Fold-switch_hits/Fold_switch_pse/3tp2.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/3zwg.pse b/data/Fold-switch_hits/Fold_switch_pse/3zwg.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/3zwg.pse
rename to data/Fold-switch_hits/Fold_switch_pse/3zwg.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/4hdd.pse b/data/Fold-switch_hits/Fold_switch_pse/4hdd.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/4hdd.pse
rename to data/Fold-switch_hits/Fold_switch_pse/4hdd.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/4o01_preds.pse b/data/Fold-switch_hits/Fold_switch_pse/4o01_preds.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/4o01_preds.pse
rename to data/Fold-switch_hits/Fold_switch_pse/4o01_preds.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/4o0p_preds.pse b/data/Fold-switch_hits/Fold_switch_pse/4o0p_preds.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/4o0p_preds.pse
rename to data/Fold-switch_hits/Fold_switch_pse/4o0p_preds.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/4phq.pse b/data/Fold-switch_hits/Fold_switch_pse/4phq.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/4phq.pse
rename to data/Fold-switch_hits/Fold_switch_pse/4phq.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/4qds.pse b/data/Fold-switch_hits/Fold_switch_pse/4qds.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/4qds.pse
rename to data/Fold-switch_hits/Fold_switch_pse/4qds.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/4rwn.pse b/data/Fold-switch_hits/Fold_switch_pse/4rwn.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/4rwn.pse
rename to data/Fold-switch_hits/Fold_switch_pse/4rwn.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/4rwq_best_pred.pse b/data/Fold-switch_hits/Fold_switch_pse/4rwq_best_pred.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/4rwq_best_pred.pse
rename to data/Fold-switch_hits/Fold_switch_pse/4rwq_best_pred.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/4tsy.pse b/data/Fold-switch_hits/Fold_switch_pse/4tsy.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/4tsy.pse
rename to data/Fold-switch_hits/Fold_switch_pse/4tsy.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/4yhd.pse b/data/Fold-switch_hits/Fold_switch_pse/4yhd.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/4yhd.pse
rename to data/Fold-switch_hits/Fold_switch_pse/4yhd.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/4yhd_G_7ahl_E_pyMol_confirm.pse b/data/Fold-switch_hits/Fold_switch_pse/4yhd_G_7ahl_E_pyMol_confirm.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/4yhd_G_7ahl_E_pyMol_confirm.pse
rename to data/Fold-switch_hits/Fold_switch_pse/4yhd_G_7ahl_E_pyMol_confirm.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/4zrb_A.pse b/data/Fold-switch_hits/Fold_switch_pse/4zrb_A.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/4zrb_A.pse
rename to data/Fold-switch_hits/Fold_switch_pse/4zrb_A.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/4zrb_H.pse b/data/Fold-switch_hits/Fold_switch_pse/4zrb_H.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/4zrb_H.pse
rename to data/Fold-switch_hits/Fold_switch_pse/4zrb_H.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/5ejb_1wp8_best_hits.pse b/data/Fold-switch_hits/Fold_switch_pse/5ejb_1wp8_best_hits.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/5ejb_1wp8_best_hits.pse
rename to data/Fold-switch_hits/Fold_switch_pse/5ejb_1wp8_best_hits.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/5f3k.pse b/data/Fold-switch_hits/Fold_switch_pse/5f3k.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/5f3k.pse
rename to data/Fold-switch_hits/Fold_switch_pse/5f3k.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/5f5r.pse b/data/Fold-switch_hits/Fold_switch_pse/5f5r.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/5f5r.pse
rename to data/Fold-switch_hits/Fold_switch_pse/5f5r.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/5fhc_1ebo.pse b/data/Fold-switch_hits/Fold_switch_pse/5fhc_1ebo.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/5fhc_1ebo.pse
rename to data/Fold-switch_hits/Fold_switch_pse/5fhc_1ebo.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/5i2m_5i2s_best_preds.pse b/data/Fold-switch_hits/Fold_switch_pse/5i2m_5i2s_best_preds.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/5i2m_5i2s_best_preds.pse
rename to data/Fold-switch_hits/Fold_switch_pse/5i2m_5i2s_best_preds.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/5jyt.pse b/data/Fold-switch_hits/Fold_switch_pse/5jyt.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/5jyt.pse
rename to data/Fold-switch_hits/Fold_switch_pse/5jyt.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/5lj3.pse b/data/Fold-switch_hits/Fold_switch_pse/5lj3.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/5lj3.pse
rename to data/Fold-switch_hits/Fold_switch_pse/5lj3.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/5ond.pse b/data/Fold-switch_hits/Fold_switch_pse/5ond.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/5ond.pse
rename to data/Fold-switch_hits/Fold_switch_pse/5ond.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/6c6s.pse b/data/Fold-switch_hits/Fold_switch_pse/6c6s.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/6c6s.pse
rename to data/Fold-switch_hits/Fold_switch_pse/6c6s.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/7ahl.pse b/data/Fold-switch_hits/Fold_switch_pse/7ahl.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/7ahl.pse
rename to data/Fold-switch_hits/Fold_switch_pse/7ahl.pse
diff --git a/Data/Fold-switch_hits/Fold_switch_pse/amyloid_fibrils.pse b/data/Fold-switch_hits/Fold_switch_pse/amyloid_fibrils.pse
similarity index 100%
rename from Data/Fold-switch_hits/Fold_switch_pse/amyloid_fibrils.pse
rename to data/Fold-switch_hits/Fold_switch_pse/amyloid_fibrils.pse
diff --git a/Data/Fold-switch_hits/Hits_CF_parameters_fold2.csv b/data/Fold-switch_hits/Hits_CF_parameters_fold2.csv
similarity index 100%
rename from Data/Fold-switch_hits/Hits_CF_parameters_fold2.csv
rename to data/Fold-switch_hits/Hits_CF_parameters_fold2.csv
diff --git a/Data/Fold-switch_hits/Single-sequence_confirm/1qb3.png b/data/Fold-switch_hits/Single-sequence_confirm/1qb3.png
similarity index 100%
rename from Data/Fold-switch_hits/Single-sequence_confirm/1qb3.png
rename to data/Fold-switch_hits/Single-sequence_confirm/1qb3.png
diff --git a/Data/Fold-switch_hits/Single-sequence_confirm/1qb3_model.pdb b/data/Fold-switch_hits/Single-sequence_confirm/1qb3_model.pdb
similarity index 100%
rename from Data/Fold-switch_hits/Single-sequence_confirm/1qb3_model.pdb
rename to data/Fold-switch_hits/Single-sequence_confirm/1qb3_model.pdb
diff --git a/Data/Fold-switch_hits/Single-sequence_confirm/2bzy.pdb b/data/Fold-switch_hits/Single-sequence_confirm/2bzy.pdb
similarity index 100%
rename from Data/Fold-switch_hits/Single-sequence_confirm/2bzy.pdb
rename to data/Fold-switch_hits/Single-sequence_confirm/2bzy.pdb
diff --git a/Data/Fold-switch_hits/Single-sequence_confirm/2bzy_2lqw_confirm_manual.pse b/data/Fold-switch_hits/Single-sequence_confirm/2bzy_2lqw_confirm_manual.pse
similarity index 100%
rename from Data/Fold-switch_hits/Single-sequence_confirm/2bzy_2lqw_confirm_manual.pse
rename to data/Fold-switch_hits/Single-sequence_confirm/2bzy_2lqw_confirm_manual.pse
diff --git a/Data/Fold-switch_hits/Single-sequence_confirm/3qy2_1qb3_prediction_confirm_maual.pse b/data/Fold-switch_hits/Single-sequence_confirm/3qy2_1qb3_prediction_confirm_maual.pse
similarity index 100%
rename from Data/Fold-switch_hits/Single-sequence_confirm/3qy2_1qb3_prediction_confirm_maual.pse
rename to data/Fold-switch_hits/Single-sequence_confirm/3qy2_1qb3_prediction_confirm_maual.pse
diff --git a/Data/Fold-switch_hits/sample-TMscore_fs-region_full-MSA_2lep_A.png b/data/Fold-switch_hits/sample-TMscore_fs-region_full-MSA_2lep_A.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_fs-region_full-MSA_2lep_A.png
rename to data/Fold-switch_hits/sample-TMscore_fs-region_full-MSA_2lep_A.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_fs-region_full-MSA_4hdd_A.png b/data/Fold-switch_hits/sample-TMscore_fs-region_full-MSA_4hdd_A.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_fs-region_full-MSA_4hdd_A.png
rename to data/Fold-switch_hits/sample-TMscore_fs-region_full-MSA_4hdd_A.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_fs-region_full-MSA_4uv2_D.png b/data/Fold-switch_hits/sample-TMscore_fs-region_full-MSA_4uv2_D.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_fs-region_full-MSA_4uv2_D.png
rename to data/Fold-switch_hits/sample-TMscore_fs-region_full-MSA_4uv2_D.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_1miq_B.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_1miq_B.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_1miq_B.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_1miq_B.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_1nqj_B.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_1nqj_B.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_1nqj_B.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_1nqj_B.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_1xju_B.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_1xju_B.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_1xju_B.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_1xju_B.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_2a73_B.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_2a73_B.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_2a73_B.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_2a73_B.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_2c1u_C.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_2c1u_C.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_2c1u_C.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_2c1u_C.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_2kxo_A.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_2kxo_A.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_2kxo_A.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_2kxo_A.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_2oug_C.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_2oug_C.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_2oug_C.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_2oug_C.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_2p3v_A.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_2p3v_A.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_2p3v_A.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_2p3v_A.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_2qke_E.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_2qke_E.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_2qke_E.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_2qke_E.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_2vfx_L.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_2vfx_L.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_2vfx_L.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_2vfx_L.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_3hdf_A.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_3hdf_A.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_3hdf_A.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_3hdf_A.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_3njq_A.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_3njq_A.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_3njq_A.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_3njq_A.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_3tp2_A.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_3tp2_A.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_3tp2_A.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_3tp2_A.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_3zwg_N.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_3zwg_N.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_3zwg_N.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_3zwg_N.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_4o0p_A.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_4o0p_A.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_4o0p_A.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_4o0p_A.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_4qds_A.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_4qds_A.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_4qds_A.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_4qds_A.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_4tsy_D.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_4tsy_D.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_4tsy_D.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_4tsy_D.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_5f3k_A.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_5f3k_A.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_5f3k_A.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_5f3k_A.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_5fhc_J.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_5fhc_J.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_5fhc_J.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_5fhc_J.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_5i2m_A.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_5i2m_A.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_5i2m_A.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_5i2m_A.png
diff --git a/Data/Fold-switch_hits/sample-TMscore_full-MSA_7ahl_E.png b/data/Fold-switch_hits/sample-TMscore_full-MSA_7ahl_E.png
similarity index 100%
rename from Data/Fold-switch_hits/sample-TMscore_full-MSA_7ahl_E.png
rename to data/Fold-switch_hits/sample-TMscore_full-MSA_7ahl_E.png
diff --git a/Data/SPEACH_AF_benchmark/SPEACH_AF-heatmap-nsamples.png b/data/SPEACH_AF_benchmark/SPEACH_AF-heatmap-nsamples.png
similarity index 100%
rename from Data/SPEACH_AF_benchmark/SPEACH_AF-heatmap-nsamples.png
rename to data/SPEACH_AF_benchmark/SPEACH_AF-heatmap-nsamples.png
diff --git a/Data/SPEACH_AF_benchmark/TMScore_full-MSA_AK.csv b/data/SPEACH_AF_benchmark/TMScore_full-MSA_AK.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_full-MSA_AK.csv
rename to data/SPEACH_AF_benchmark/TMScore_full-MSA_AK.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_full-MSA_ASCT2.csv b/data/SPEACH_AF_benchmark/TMScore_full-MSA_ASCT2.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_full-MSA_ASCT2.csv
rename to data/SPEACH_AF_benchmark/TMScore_full-MSA_ASCT2.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_full-MSA_CCR5.csv b/data/SPEACH_AF_benchmark/TMScore_full-MSA_CCR5.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_full-MSA_CCR5.csv
rename to data/SPEACH_AF_benchmark/TMScore_full-MSA_CCR5.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_full-MSA_CGRPR.csv b/data/SPEACH_AF_benchmark/TMScore_full-MSA_CGRPR.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_full-MSA_CGRPR.csv
rename to data/SPEACH_AF_benchmark/TMScore_full-MSA_CGRPR.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_full-MSA_FZD7.csv b/data/SPEACH_AF_benchmark/TMScore_full-MSA_FZD7.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_full-MSA_FZD7.csv
rename to data/SPEACH_AF_benchmark/TMScore_full-MSA_FZD7.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_full-MSA_LAT1.csv b/data/SPEACH_AF_benchmark/TMScore_full-MSA_LAT1.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_full-MSA_LAT1.csv
rename to data/SPEACH_AF_benchmark/TMScore_full-MSA_LAT1.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_full-MSA_MCT1.csv b/data/SPEACH_AF_benchmark/TMScore_full-MSA_MCT1.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_full-MSA_MCT1.csv
rename to data/SPEACH_AF_benchmark/TMScore_full-MSA_MCT1.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_full-MSA_MurJ.csv b/data/SPEACH_AF_benchmark/TMScore_full-MSA_MurJ.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_full-MSA_MurJ.csv
rename to data/SPEACH_AF_benchmark/TMScore_full-MSA_MurJ.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_full-MSA_PTH1R.csv b/data/SPEACH_AF_benchmark/TMScore_full-MSA_PTH1R.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_full-MSA_PTH1R.csv
rename to data/SPEACH_AF_benchmark/TMScore_full-MSA_PTH1R.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_full-MSA_PfMATE.csv b/data/SPEACH_AF_benchmark/TMScore_full-MSA_PfMATE.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_full-MSA_PfMATE.csv
rename to data/SPEACH_AF_benchmark/TMScore_full-MSA_PfMATE.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_full-MSA_RBP.csv b/data/SPEACH_AF_benchmark/TMScore_full-MSA_RBP.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_full-MSA_RBP.csv
rename to data/SPEACH_AF_benchmark/TMScore_full-MSA_RBP.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_full-MSA_SERT.csv b/data/SPEACH_AF_benchmark/TMScore_full-MSA_SERT.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_full-MSA_SERT.csv
rename to data/SPEACH_AF_benchmark/TMScore_full-MSA_SERT.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_full-MSA_STP10.csv b/data/SPEACH_AF_benchmark/TMScore_full-MSA_STP10.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_full-MSA_STP10.csv
rename to data/SPEACH_AF_benchmark/TMScore_full-MSA_STP10.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_full-MSA_ZnT8.csv b/data/SPEACH_AF_benchmark/TMScore_full-MSA_ZnT8.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_full-MSA_ZnT8.csv
rename to data/SPEACH_AF_benchmark/TMScore_full-MSA_ZnT8.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_random-MSA_AK.csv b/data/SPEACH_AF_benchmark/TMScore_random-MSA_AK.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_random-MSA_AK.csv
rename to data/SPEACH_AF_benchmark/TMScore_random-MSA_AK.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_random-MSA_ASCT2.csv b/data/SPEACH_AF_benchmark/TMScore_random-MSA_ASCT2.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_random-MSA_ASCT2.csv
rename to data/SPEACH_AF_benchmark/TMScore_random-MSA_ASCT2.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_random-MSA_CCR5.csv b/data/SPEACH_AF_benchmark/TMScore_random-MSA_CCR5.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_random-MSA_CCR5.csv
rename to data/SPEACH_AF_benchmark/TMScore_random-MSA_CCR5.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_random-MSA_CGRPR.csv b/data/SPEACH_AF_benchmark/TMScore_random-MSA_CGRPR.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_random-MSA_CGRPR.csv
rename to data/SPEACH_AF_benchmark/TMScore_random-MSA_CGRPR.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_random-MSA_FZD7.csv b/data/SPEACH_AF_benchmark/TMScore_random-MSA_FZD7.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_random-MSA_FZD7.csv
rename to data/SPEACH_AF_benchmark/TMScore_random-MSA_FZD7.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_random-MSA_LAT1.csv b/data/SPEACH_AF_benchmark/TMScore_random-MSA_LAT1.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_random-MSA_LAT1.csv
rename to data/SPEACH_AF_benchmark/TMScore_random-MSA_LAT1.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_random-MSA_MCT1.csv b/data/SPEACH_AF_benchmark/TMScore_random-MSA_MCT1.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_random-MSA_MCT1.csv
rename to data/SPEACH_AF_benchmark/TMScore_random-MSA_MCT1.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_random-MSA_MurJ.csv b/data/SPEACH_AF_benchmark/TMScore_random-MSA_MurJ.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_random-MSA_MurJ.csv
rename to data/SPEACH_AF_benchmark/TMScore_random-MSA_MurJ.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_random-MSA_PTH1R.csv b/data/SPEACH_AF_benchmark/TMScore_random-MSA_PTH1R.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_random-MSA_PTH1R.csv
rename to data/SPEACH_AF_benchmark/TMScore_random-MSA_PTH1R.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_random-MSA_PfMATE.csv b/data/SPEACH_AF_benchmark/TMScore_random-MSA_PfMATE.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_random-MSA_PfMATE.csv
rename to data/SPEACH_AF_benchmark/TMScore_random-MSA_PfMATE.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_random-MSA_RBP.csv b/data/SPEACH_AF_benchmark/TMScore_random-MSA_RBP.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_random-MSA_RBP.csv
rename to data/SPEACH_AF_benchmark/TMScore_random-MSA_RBP.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_random-MSA_SERT.csv b/data/SPEACH_AF_benchmark/TMScore_random-MSA_SERT.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_random-MSA_SERT.csv
rename to data/SPEACH_AF_benchmark/TMScore_random-MSA_SERT.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_random-MSA_STP10.csv b/data/SPEACH_AF_benchmark/TMScore_random-MSA_STP10.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_random-MSA_STP10.csv
rename to data/SPEACH_AF_benchmark/TMScore_random-MSA_STP10.csv
diff --git a/Data/SPEACH_AF_benchmark/TMScore_random-MSA_ZnT8.csv b/data/SPEACH_AF_benchmark/TMScore_random-MSA_ZnT8.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/TMScore_random-MSA_ZnT8.csv
rename to data/SPEACH_AF_benchmark/TMScore_random-MSA_ZnT8.csv
diff --git a/Data/SPEACH_AF_benchmark/heatmap-max TMscore comparison.png b/data/SPEACH_AF_benchmark/heatmap-max TMscore comparison.png
similarity index 100%
rename from Data/SPEACH_AF_benchmark/heatmap-max TMscore comparison.png
rename to data/SPEACH_AF_benchmark/heatmap-max TMscore comparison.png
diff --git a/Data/SPEACH_AF_benchmark/heatmap-max TMscore comparison.svg b/data/SPEACH_AF_benchmark/heatmap-max TMscore comparison.svg
similarity index 100%
rename from Data/SPEACH_AF_benchmark/heatmap-max TMscore comparison.svg
rename to data/SPEACH_AF_benchmark/heatmap-max TMscore comparison.svg
diff --git a/Data/SPEACH_AF_benchmark/list_of_SPEACH_AF-PDB_ID.csv b/data/SPEACH_AF_benchmark/list_of_SPEACH_AF-PDB_ID.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/list_of_SPEACH_AF-PDB_ID.csv
rename to data/SPEACH_AF_benchmark/list_of_SPEACH_AF-PDB_ID.csv
diff --git a/Data/SPEACH_AF_benchmark/max_TM_heatmap.py b/data/SPEACH_AF_benchmark/max_TM_heatmap.py
similarity index 100%
rename from Data/SPEACH_AF_benchmark/max_TM_heatmap.py
rename to data/SPEACH_AF_benchmark/max_TM_heatmap.py
diff --git a/Data/SPEACH_AF_benchmark/nsample_heatmap.py b/data/SPEACH_AF_benchmark/nsample_heatmap.py
similarity index 100%
rename from Data/SPEACH_AF_benchmark/nsample_heatmap.py
rename to data/SPEACH_AF_benchmark/nsample_heatmap.py
diff --git a/Data/SPEACH_AF_benchmark/number_of_predictions.csv b/data/SPEACH_AF_benchmark/number_of_predictions.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/number_of_predictions.csv
rename to data/SPEACH_AF_benchmark/number_of_predictions.csv
diff --git a/Data/SPEACH_AF_benchmark/pse_files/.keep b/data/SPEACH_AF_benchmark/pse_files/.keep
similarity index 100%
rename from Data/SPEACH_AF_benchmark/pse_files/.keep
rename to data/SPEACH_AF_benchmark/pse_files/.keep
diff --git a/Data/SPEACH_AF_benchmark/pse_files/pse_files.zip b/data/SPEACH_AF_benchmark/pse_files/pse_files.zip
similarity index 100%
rename from Data/SPEACH_AF_benchmark/pse_files/pse_files.zip
rename to data/SPEACH_AF_benchmark/pse_files/pse_files.zip
diff --git a/Data/SPEACH_AF_benchmark/ref_data_Mchaourab_AK.csv b/data/SPEACH_AF_benchmark/ref_data_Mchaourab_AK.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/ref_data_Mchaourab_AK.csv
rename to data/SPEACH_AF_benchmark/ref_data_Mchaourab_AK.csv
diff --git a/Data/SPEACH_AF_benchmark/ref_data_Mchaourab_ASCT2.csv b/data/SPEACH_AF_benchmark/ref_data_Mchaourab_ASCT2.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/ref_data_Mchaourab_ASCT2.csv
rename to data/SPEACH_AF_benchmark/ref_data_Mchaourab_ASCT2.csv
diff --git a/Data/SPEACH_AF_benchmark/ref_data_Mchaourab_CCR5.csv b/data/SPEACH_AF_benchmark/ref_data_Mchaourab_CCR5.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/ref_data_Mchaourab_CCR5.csv
rename to data/SPEACH_AF_benchmark/ref_data_Mchaourab_CCR5.csv
diff --git a/Data/SPEACH_AF_benchmark/ref_data_Mchaourab_CGRPR.csv b/data/SPEACH_AF_benchmark/ref_data_Mchaourab_CGRPR.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/ref_data_Mchaourab_CGRPR.csv
rename to data/SPEACH_AF_benchmark/ref_data_Mchaourab_CGRPR.csv
diff --git a/Data/SPEACH_AF_benchmark/ref_data_Mchaourab_FZD7.csv b/data/SPEACH_AF_benchmark/ref_data_Mchaourab_FZD7.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/ref_data_Mchaourab_FZD7.csv
rename to data/SPEACH_AF_benchmark/ref_data_Mchaourab_FZD7.csv
diff --git a/Data/SPEACH_AF_benchmark/ref_data_Mchaourab_LAT1.csv b/data/SPEACH_AF_benchmark/ref_data_Mchaourab_LAT1.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/ref_data_Mchaourab_LAT1.csv
rename to data/SPEACH_AF_benchmark/ref_data_Mchaourab_LAT1.csv
diff --git a/Data/SPEACH_AF_benchmark/ref_data_Mchaourab_MCT1.csv b/data/SPEACH_AF_benchmark/ref_data_Mchaourab_MCT1.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/ref_data_Mchaourab_MCT1.csv
rename to data/SPEACH_AF_benchmark/ref_data_Mchaourab_MCT1.csv
diff --git a/Data/SPEACH_AF_benchmark/ref_data_Mchaourab_MurJ.csv b/data/SPEACH_AF_benchmark/ref_data_Mchaourab_MurJ.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/ref_data_Mchaourab_MurJ.csv
rename to data/SPEACH_AF_benchmark/ref_data_Mchaourab_MurJ.csv
diff --git a/Data/SPEACH_AF_benchmark/ref_data_Mchaourab_PTH1R.csv b/data/SPEACH_AF_benchmark/ref_data_Mchaourab_PTH1R.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/ref_data_Mchaourab_PTH1R.csv
rename to data/SPEACH_AF_benchmark/ref_data_Mchaourab_PTH1R.csv
diff --git a/Data/SPEACH_AF_benchmark/ref_data_Mchaourab_PfMATE.csv b/data/SPEACH_AF_benchmark/ref_data_Mchaourab_PfMATE.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/ref_data_Mchaourab_PfMATE.csv
rename to data/SPEACH_AF_benchmark/ref_data_Mchaourab_PfMATE.csv
diff --git a/Data/SPEACH_AF_benchmark/ref_data_Mchaourab_RBP.csv b/data/SPEACH_AF_benchmark/ref_data_Mchaourab_RBP.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/ref_data_Mchaourab_RBP.csv
rename to data/SPEACH_AF_benchmark/ref_data_Mchaourab_RBP.csv
diff --git a/Data/SPEACH_AF_benchmark/ref_data_Mchaourab_SERT.csv b/data/SPEACH_AF_benchmark/ref_data_Mchaourab_SERT.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/ref_data_Mchaourab_SERT.csv
rename to data/SPEACH_AF_benchmark/ref_data_Mchaourab_SERT.csv
diff --git a/Data/SPEACH_AF_benchmark/ref_data_Mchaourab_STP10.csv b/data/SPEACH_AF_benchmark/ref_data_Mchaourab_STP10.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/ref_data_Mchaourab_STP10.csv
rename to data/SPEACH_AF_benchmark/ref_data_Mchaourab_STP10.csv
diff --git a/Data/SPEACH_AF_benchmark/ref_data_Mchaourab_ZnT8.csv b/data/SPEACH_AF_benchmark/ref_data_Mchaourab_ZnT8.csv
similarity index 100%
rename from Data/SPEACH_AF_benchmark/ref_data_Mchaourab_ZnT8.csv
rename to data/SPEACH_AF_benchmark/ref_data_Mchaourab_ZnT8.csv
diff --git a/Data/Sa1/8E6Y_GA_CFR_2_4.pse b/data/Sa1/8E6Y_GA_CFR_2_4.pse
similarity index 100%
rename from Data/Sa1/8E6Y_GA_CFR_2_4.pse
rename to data/Sa1/8E6Y_GA_CFR_2_4.pse
diff --git a/Data/Sa1/8e5y_rebalanced.a3m b/data/Sa1/8e5y_rebalanced.a3m
similarity index 100%
rename from Data/Sa1/8e5y_rebalanced.a3m
rename to data/Sa1/8e5y_rebalanced.a3m
diff --git a/Data/Sa1/8e6y_1gjs.a3m b/data/Sa1/8e6y_1gjs.a3m
similarity index 100%
rename from Data/Sa1/8e6y_1gjs.a3m
rename to data/Sa1/8e6y_1gjs.a3m
diff --git a/Data/Sa1/8e6y_1gjs.pse b/data/Sa1/8e6y_1gjs.pse
similarity index 100%
rename from Data/Sa1/8e6y_1gjs.pse
rename to data/Sa1/8e6y_1gjs.pse
diff --git a/Data/Sa1/8e6y_2fs1.a3m b/data/Sa1/8e6y_2fs1.a3m
similarity index 100%
rename from Data/Sa1/8e6y_2fs1.a3m
rename to data/Sa1/8e6y_2fs1.a3m
diff --git a/Data/Sa1/8e6y_2fs1.pse b/data/Sa1/8e6y_2fs1.pse
similarity index 100%
rename from Data/Sa1/8e6y_2fs1.pse
rename to data/Sa1/8e6y_2fs1.pse
diff --git a/Data/Sa1/8e6y_2mh8.a3m b/data/Sa1/8e6y_2mh8.a3m
similarity index 100%
rename from Data/Sa1/8e6y_2mh8.a3m
rename to data/Sa1/8e6y_2mh8.a3m
diff --git a/Data/Sa1/8e6y_2mh8.pse b/data/Sa1/8e6y_2mh8.pse
similarity index 100%
rename from Data/Sa1/8e6y_2mh8.pse
rename to data/Sa1/8e6y_2mh8.pse
diff --git a/Data/Sa1/8e6y_SA1_CFR_Full.pse b/data/Sa1/8e6y_SA1_CFR_Full.pse
similarity index 100%
rename from Data/Sa1/8e6y_SA1_CFR_Full.pse
rename to data/Sa1/8e6y_SA1_CFR_Full.pse
diff --git a/environment.yml b/environment.yml
new file mode 100644
index 0000000..6297c5d
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,15 @@
+name: cf-random # conda activate cf-random
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - python=3.10
+ - pandas=1.5.3
+ - biopython=1.79
+ - colabfold
+ - foldseek
+ - pymol-open-source
+ - pip
+ - pip:
+ - -e .
\ No newline at end of file
diff --git a/install.sh b/install.sh
new file mode 100755
index 0000000..81a3f95
--- /dev/null
+++ b/install.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+set -e
+
+echo "=== CF-random Installation ==="
+echo "[1/2] Creating conda environment..."
+conda env create -f environment.yml
+eval "$(conda shell.bash hook)"
+conda activate cf-random
+
+# Install JAX with GPU or CPU depending on hardware
+if command -v nvidia-smi &> /dev/null; then
+ CUDA_VERSION=$(nvidia-smi | grep -oP "CUDA Version: \K[0-9]+" | head -1)
+ echo " GPU detected (CUDA $CUDA_VERSION), installing GPU-enabled JAX..."
+ if [ "$CUDA_VERSION" -ge 12 ]; then
+ pip install "jax[cuda12_pip]==0.4.25" \
+ -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+ else
+ pip install "jax[cuda11_pip]==0.4.25" \
+ -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+ fi
+else
+ echo " No GPU detected, installing CPU-only JAX..."
+ pip install "jax==0.4.25" "jaxlib==0.4.25"
+fi
+
+echo "[2/2] Verifying installation..."
+python -c "from Bio.Data import SCOPData; print(' biopython ok')"
+python -c "import numpy; print(f' numpy {numpy.__version__}')"
+python -c "import pandas; print(f' pandas {pandas.__version__}')"
+python -c "import jax; print(f' jax {jax.__version__} | devices: {jax.devices()}')"
+colabfold_batch --help > /dev/null && echo " colabfold ok"
+cf-random --help > /dev/null && echo " cf-random ok"
+
+echo ""
+echo "=== Installation complete ==="
+echo "Activate with: conda activate cf-random"
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..7b72ce7
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,66 @@
+[build-system]
+requires = ["setuptools>=65.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "cf-random"
+version = "0.1.0"
+description = "CF-random: Predicting alternative conformations and fold-switching proteins"
+readme = "README.md"
+requires-python = ">=3.10"
+license = {text = "Public Domain (NCBI)"}
+authors = [
+ {name = "Myeongsang (Samuel) Lee"},
+ {name = "Pramesh Sharma"},
+]
+keywords = ["protein-structure", "fold-switching", "alternative-conformation", "colabfold"]
+classifiers = [
+ "Development Status :: 3 - Alpha",
+ "Intended Audience :: Science/Research",
+ "License :: Public Domain",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.10",
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
+]
+dependencies = [
+ "matplotlib",
+ "seaborn",
+ "scikit-learn",
+ "mdtraj",
+ "MDAnalysis",
+ "textalloc",
+ "tmtools",
+ "adjustText",
+ "thefuzz",
+ "numpy>=1.23.5,<2.0",
+]
+
+[project.optional-dependencies]
+dev = [
+ "pytest>=7.0",
+ "black",
+ "isort",
+ "flake8",
+ "autoflake"
+]
+
+[project.urls]
+Homepage = "https://github.com/ncbi/CF-random_software"
+
+[project.scripts]
+cf-random = "cf_random.cli:main"
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["cf_random*", "code*"]
+
+[tool.setuptools.package-data]
+cf_random = ["data/*"]
+
+[tool.black]
+line-length = 100
+target-version = ["py310"]
+
+[tool.isort]
+profile = "black"
+line_length = 10
\ No newline at end of file
diff --git a/structures_all.csv b/structures_all.csv
new file mode 100644
index 0000000..0732988
--- /dev/null
+++ b/structures_all.csv
@@ -0,0 +1,198 @@
+group, file, pca_1, pca_2
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_001_alphafold2_ptm_model_4_seed_014-self.foldseek,3.513358814421883,-0.20158051827612167
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_002_alphafold2_ptm_model_4_seed_017-self.foldseek,3.4961018979686562,-0.18830686170865948
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_003_alphafold2_ptm_model_4_seed_015-self.foldseek,3.528157963580295,-0.19731742639202024
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_004_alphafold2_ptm_model_4_seed_016-self.foldseek,3.5200377647632166,-0.19774564366419642
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_005_alphafold2_ptm_model_4_seed_013-self.foldseek,3.696003270225536,-0.1521557583862445
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_006_alphafold2_ptm_model_3_seed_013-self.foldseek,3.744918505661837,-0.17824897479657956
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_007_alphafold2_ptm_model_3_seed_014-self.foldseek,3.5518673734544897,-0.19170558781555214
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_008_alphafold2_ptm_model_3_seed_015-self.foldseek,3.5468045373203796,-0.18659367081820677
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_009_alphafold2_ptm_model_3_seed_016-self.foldseek,3.5601237812898128,-0.19567501463558606
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_010_alphafold2_ptm_model_3_seed_017-self.foldseek,3.767637167735202,-0.1646513085968849
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_011_alphafold2_ptm_model_5_seed_016-self.foldseek,3.6488490581082704,-0.14539904704837914
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_012_alphafold2_ptm_model_5_seed_015-self.foldseek,3.6526469640565056,-0.14471727563274198
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_013_alphafold2_ptm_model_2_seed_016-self.foldseek,3.532360342566499,-0.2072501869150471
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_014_alphafold2_ptm_model_2_seed_015-self.foldseek,3.5430636388205294,-0.17975788498982168
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_015_alphafold2_ptm_model_5_seed_013-self.foldseek,3.688533096012158,-0.14330132957538994
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_016_alphafold2_ptm_model_2_seed_013-self.foldseek,3.5388283717653346,-0.18317209922830904
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_017_alphafold2_ptm_model_2_seed_017-self.foldseek,3.659260377627193,-0.1447833763445402
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_018_alphafold2_ptm_model_1_seed_015-self.foldseek,3.7630744421692173,-0.14889257962660002
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_019_alphafold2_ptm_model_2_seed_014-self.foldseek,3.7692345445300033,-0.14430710473099423
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_020_alphafold2_ptm_model_5_seed_014-self.foldseek,3.6914031794670863,-0.14435461923168413
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_021_alphafold2_ptm_model_1_seed_016-self.foldseek,3.71269824733543,-0.1627135939826589
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_022_alphafold2_ptm_model_1_seed_014-self.foldseek,3.8103594896868263,-0.15535741596800187
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_023_alphafold2_ptm_model_1_seed_013-self.foldseek,3.7846147962264567,-0.1596336482097755
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_024_alphafold2_ptm_model_5_seed_017-self.foldseek,3.672598864479458,-0.14552615299358362
+0,blind_prediction/test/test_predicted_models_full_rand_13/test_unrelaxed_rank_025_alphafold2_ptm_model_1_seed_017-self.foldseek,3.7394199238494368,-0.15118144938686265
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_001_alphafold2_ptm_model_5_seed_000-self.foldseek,3.3020287283784704,-0.05849038002270092
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_002_alphafold2_ptm_model_5_seed_003-self.foldseek,3.659940812631571,-0.10996895797136942
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_003_alphafold2_ptm_model_4_seed_000-self.foldseek,3.5289002307377006,-0.13565031897260257
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_004_alphafold2_ptm_model_4_seed_004-self.foldseek,3.5075662988122307,-0.16714207184351057
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_005_alphafold2_ptm_model_5_seed_004-self.foldseek,3.555431826130826,-0.09183437129679092
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_006_alphafold2_ptm_model_4_seed_003-self.foldseek,3.399145557254689,-0.12925970507820755
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_007_alphafold2_ptm_model_4_seed_001-self.foldseek,3.4220738773635486,-0.1281109801337362
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_008_alphafold2_ptm_model_1_seed_000-self.foldseek,3.529016407655275,-0.06548261305819573
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_009_alphafold2_ptm_model_2_seed_003-self.foldseek,3.0586793375363115,0.00413774659486195
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_010_alphafold2_ptm_model_1_seed_003-self.foldseek,3.163277143192264,-0.010827795745730402
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_011_alphafold2_ptm_model_2_seed_000-self.foldseek,3.426404882419951,-0.042993985683936
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_012_alphafold2_ptm_model_5_seed_001-self.foldseek,3.1196051954818875,-0.128817946332863
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_013_alphafold2_ptm_model_5_seed_002-self.foldseek,3.159666848783049,-0.011312252956242882
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_014_alphafold2_ptm_model_3_seed_003-self.foldseek,3.0536636364635723,0.027271061221664648
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_015_alphafold2_ptm_model_3_seed_001-self.foldseek,2.239722897979137,-0.10407252034741248
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_016_alphafold2_ptm_model_2_seed_001-self.foldseek,2.9618810235947257,-0.057670620546616404
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_017_alphafold2_ptm_model_1_seed_001-self.foldseek,2.446937806086043,-0.10925102612831558
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_018_alphafold2_ptm_model_4_seed_002-self.foldseek,1.4570231034015766,0.25049468837020883
+1,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_019_alphafold2_ptm_model_3_seed_000-self.foldseek,-2.794340785047333,0.5152168259066472
+0,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_020_alphafold2_ptm_model_1_seed_002-self.foldseek,1.6657049553094794,0.3561030560809203
+1,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_021_alphafold2_ptm_model_3_seed_002-self.foldseek,-1.7501263466232728,0.5612265520432265
+1,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_022_alphafold2_ptm_model_1_seed_004-self.foldseek,-2.3966335784294928,1.3710554312870733
+1,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_023_alphafold2_ptm_model_3_seed_004-self.foldseek,-2.523399406922231,1.5317088092698732
+1,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_024_alphafold2_ptm_model_2_seed_004-self.foldseek,-2.4894439259206416,1.6702006315177882
+-1,blind_prediction/test/test_predicted_models_rand_30_max_16_ext_32/test_unrelaxed_rank_025_alphafold2_ptm_model_2_seed_002-self.foldseek,-0.002880687580092586,0.44974566589407644
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_001_alphafold2_ptm_model_4_seed_004-self.foldseek,-4.424262414816576,-1.0512458066494355
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_002_alphafold2_ptm_model_3_seed_001-self.foldseek,-4.346589477634217,-1.0477395972922405
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_003_alphafold2_ptm_model_4_seed_003-self.foldseek,-4.824602557790946,-1.2113486372379407
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_004_alphafold2_ptm_model_3_seed_004-self.foldseek,-4.336097803409506,-1.2255573063828904
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_005_alphafold2_ptm_model_4_seed_000-self.foldseek,-4.617101937414942,-1.1416012233477275
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_006_alphafold2_ptm_model_1_seed_001-self.foldseek,-4.375688705244509,-0.9983776843087943
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_007_alphafold2_ptm_model_3_seed_000-self.foldseek,-4.313067968378704,-1.2166650251887405
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_008_alphafold2_ptm_model_1_seed_002-self.foldseek,-4.475745318224873,-0.9989674530480945
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_009_alphafold2_ptm_model_3_seed_003-self.foldseek,-4.392015516213259,-1.0020845268479945
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_010_alphafold2_ptm_model_4_seed_002-self.foldseek,-3.755305233691352,-0.42308246622739487
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_011_alphafold2_ptm_model_3_seed_002-self.foldseek,-4.522818425148209,-0.8186751984643752
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_012_alphafold2_ptm_model_2_seed_001-self.foldseek,-4.47787347495838,-1.020167169724318
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_013_alphafold2_ptm_model_2_seed_004-self.foldseek,-5.23476720536417,-1.0669706794545202
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_014_alphafold2_ptm_model_1_seed_000-self.foldseek,-4.540054589559504,-1.1427761393123073
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_015_alphafold2_ptm_model_1_seed_003-self.foldseek,-4.528140406807251,-1.1162513033104458
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_016_alphafold2_ptm_model_1_seed_004-self.foldseek,-4.614121227337988,-1.1184056076085567
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_017_alphafold2_ptm_model_2_seed_000-self.foldseek,-5.231411323883544,-1.0633230469993273
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_018_alphafold2_ptm_model_2_seed_003-self.foldseek,-5.239837132626775,-1.0658999285243256
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_019_alphafold2_ptm_model_2_seed_002-self.foldseek,-5.193492699045576,-1.1623609166564732
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_020_alphafold2_ptm_model_5_seed_002-self.foldseek,-5.16633623405136,-1.087141533951945
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_021_alphafold2_ptm_model_4_seed_001-self.foldseek,-5.1997466102161445,-1.3030827648602334
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_022_alphafold2_ptm_model_5_seed_003-self.foldseek,-5.195072706531031,-0.9673804510756304
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_023_alphafold2_ptm_model_5_seed_004-self.foldseek,-5.189763909079562,-0.9028979609565029
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_024_alphafold2_ptm_model_5_seed_000-self.foldseek,-5.178741285814734,-1.2605660547831625
+1,blind_prediction/test/test_predicted_models_rand_30_max_1_ext_2/test_unrelaxed_rank_025_alphafold2_ptm_model_5_seed_001-self.foldseek,-5.123596408962306,-1.0303634767852412
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_001_alphafold2_ptm_model_4_seed_001-self.foldseek,-3.6941776121852405,0.8722081966806897
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_002_alphafold2_ptm_model_5_seed_002-self.foldseek,-3.5477250298125855,0.2200385680287784
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_003_alphafold2_ptm_model_3_seed_000-self.foldseek,-3.201171268682646,1.176383085485645
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_004_alphafold2_ptm_model_4_seed_003-self.foldseek,-2.650857380543156,1.4506404467478753
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_005_alphafold2_ptm_model_4_seed_000-self.foldseek,-3.279533377523067,0.44783875123250677
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_006_alphafold2_ptm_model_1_seed_004-self.foldseek,-4.035139268552952,-0.38561689601964433
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_007_alphafold2_ptm_model_5_seed_004-self.foldseek,-4.085663405662234,-0.8531192702481025
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_008_alphafold2_ptm_model_5_seed_003-self.foldseek,-2.9275247837400884,1.5899103641735395
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_009_alphafold2_ptm_model_4_seed_002-self.foldseek,-4.176691738504193,-0.6903489343697867
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_010_alphafold2_ptm_model_5_seed_001-self.foldseek,-3.1023801143416283,1.4883859937320103
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_011_alphafold2_ptm_model_1_seed_001-self.foldseek,-3.9027978060800055,-0.5512440959820485
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_012_alphafold2_ptm_model_5_seed_000-self.foldseek,-3.3616955588271655,0.2876511961885061
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_013_alphafold2_ptm_model_2_seed_002-self.foldseek,-4.404479483398961,-0.6264015008332726
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_014_alphafold2_ptm_model_1_seed_000-self.foldseek,-3.6884200027065166,-0.349356152049838
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_015_alphafold2_ptm_model_3_seed_002-self.foldseek,-4.590391992009664,-0.9944207885302103
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_016_alphafold2_ptm_model_3_seed_003-self.foldseek,-4.424140917961008,-0.6565893465469546
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_017_alphafold2_ptm_model_2_seed_004-self.foldseek,-4.623438023241475,-0.8596423636014809
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_018_alphafold2_ptm_model_3_seed_004-self.foldseek,-4.254168912028012,-0.9439480839536321
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_019_alphafold2_ptm_model_2_seed_001-self.foldseek,-4.740746647560398,-0.8028760333533607
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_020_alphafold2_ptm_model_3_seed_001-self.foldseek,-4.009356057153362,-0.6566973902487182
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_021_alphafold2_ptm_model_2_seed_003-self.foldseek,-4.467042228108775,-0.5025316432958906
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_022_alphafold2_ptm_model_4_seed_004-self.foldseek,-3.9016484873382864,-0.4563467474888489
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_023_alphafold2_ptm_model_1_seed_003-self.foldseek,-3.8205174140995313,-0.2334415467377654
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_024_alphafold2_ptm_model_1_seed_002-self.foldseek,-4.208618771537565,-0.7803176331160451
+1,blind_prediction/test/test_predicted_models_rand_30_max_2_ext_4/test_unrelaxed_rank_025_alphafold2_ptm_model_2_seed_000-self.foldseek,-3.8206826565634158,-0.2474965280930931
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_001_alphafold2_ptm_model_5_seed_002-self.foldseek,3.7412723439209263,-0.14273445710591948
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_002_alphafold2_ptm_model_5_seed_004-self.foldseek,3.7551307486385306,-0.14643224402506755
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_003_alphafold2_ptm_model_5_seed_003-self.foldseek,3.696801485372531,-0.16274769608492107
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_004_alphafold2_ptm_model_5_seed_000-self.foldseek,3.66422904878372,-0.11064676923090247
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_005_alphafold2_ptm_model_5_seed_001-self.foldseek,3.778348543788953,-0.14351319266589938
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_006_alphafold2_ptm_model_4_seed_004-self.foldseek,3.5783783530423454,-0.19205138504418512
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_007_alphafold2_ptm_model_4_seed_000-self.foldseek,3.547520894365896,-0.20168692544649064
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_008_alphafold2_ptm_model_3_seed_002-self.foldseek,3.6700363517112415,-0.09054186690597282
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_009_alphafold2_ptm_model_3_seed_000-self.foldseek,3.6050706511939947,-0.11130814977590094
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_010_alphafold2_ptm_model_4_seed_003-self.foldseek,3.513044621073544,-0.1491144057159877
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_011_alphafold2_ptm_model_4_seed_001-self.foldseek,3.5432421331889126,-0.18459905121655593
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_012_alphafold2_ptm_model_4_seed_002-self.foldseek,3.5491169958888267,-0.18156954641492276
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_013_alphafold2_ptm_model_2_seed_000-self.foldseek,3.720767442346653,-0.11377402755623826
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_014_alphafold2_ptm_model_1_seed_002-self.foldseek,3.6011370845625397,-0.07947663972588839
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_015_alphafold2_ptm_model_2_seed_002-self.foldseek,3.772233013057549,-0.1188727402884119
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_016_alphafold2_ptm_model_1_seed_000-self.foldseek,3.4665111763125394,-0.137119978786081
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_017_alphafold2_ptm_model_3_seed_004-self.foldseek,3.3181049882072022,-0.07242268516814535
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_018_alphafold2_ptm_model_1_seed_004-self.foldseek,3.58182802343062,-0.12170032065242328
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_019_alphafold2_ptm_model_1_seed_001-self.foldseek,3.6384290493337352,-0.10275215433355363
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_020_alphafold2_ptm_model_2_seed_003-self.foldseek,3.605433415755848,-0.07319286921123737
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_021_alphafold2_ptm_model_3_seed_001-self.foldseek,3.267419116149893,-0.08119377731815423
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_022_alphafold2_ptm_model_1_seed_003-self.foldseek,3.6277084291384156,-0.11548897338880941
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_023_alphafold2_ptm_model_3_seed_003-self.foldseek,3.091329037513417,-0.015765903794725457
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_024_alphafold2_ptm_model_2_seed_001-self.foldseek,3.354094586907791,-0.08410595694745737
+0,blind_prediction/test/test_predicted_models_rand_30_max_32_ext_64/test_unrelaxed_rank_025_alphafold2_ptm_model_2_seed_004-self.foldseek,3.297783930368719,-0.07108607789619567
+0,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_001_alphafold2_ptm_model_4_seed_001-self.foldseek,2.5831330411417404,0.1175564344022249
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_002_alphafold2_ptm_model_4_seed_004-self.foldseek,-2.4404755714401727,1.333531655029551
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_003_alphafold2_ptm_model_4_seed_002-self.foldseek,-3.2369132509221346,0.577375844560065
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_004_alphafold2_ptm_model_5_seed_002-self.foldseek,-3.374302361008444,0.8661303087578077
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_005_alphafold2_ptm_model_4_seed_003-self.foldseek,-2.3081199018708585,1.6206838312570562
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_006_alphafold2_ptm_model_1_seed_004-self.foldseek,-3.127662404452066,1.200687952053083
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_007_alphafold2_ptm_model_5_seed_001-self.foldseek,-3.0991818858776106,0.6649453942409861
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_008_alphafold2_ptm_model_3_seed_004-self.foldseek,-3.103478933357975,1.3566752170992344
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_009_alphafold2_ptm_model_2_seed_004-self.foldseek,-2.8904797844068577,1.3074825686993725
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_010_alphafold2_ptm_model_3_seed_001-self.foldseek,-2.487269739986404,0.7883638294011447
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_011_alphafold2_ptm_model_5_seed_003-self.foldseek,-2.844624975188965,1.4946444130101868
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_012_alphafold2_ptm_model_5_seed_000-self.foldseek,-3.1522829176252407,0.94899834990238
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_013_alphafold2_ptm_model_2_seed_001-self.foldseek,-3.013428097466531,0.7384479525641445
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_014_alphafold2_ptm_model_1_seed_001-self.foldseek,-1.7878011628438701,0.5735254481598266
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_015_alphafold2_ptm_model_4_seed_000-self.foldseek,-2.55790604364274,1.161843108077506
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_016_alphafold2_ptm_model_2_seed_003-self.foldseek,-2.509547066983407,1.5083853756003238
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_017_alphafold2_ptm_model_1_seed_002-self.foldseek,-2.7330469892284226,0.5860951104748557
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_018_alphafold2_ptm_model_5_seed_004-self.foldseek,-3.038341681403404,0.47164978549624953
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_019_alphafold2_ptm_model_2_seed_002-self.foldseek,-4.474635560259316,-0.7933711371478542
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_020_alphafold2_ptm_model_1_seed_003-self.foldseek,-3.1908486035777237,0.5536402076950452
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_021_alphafold2_ptm_model_3_seed_002-self.foldseek,-4.47753203238732,-0.9816010767436734
+1,blind_prediction/test/test_predicted_models_rand_30_max_4_ext_8/test_unrelaxed_rank_022_alphafold2_ptm_model_3_seed_003-self.foldseek,-4.091888131847541,-0.5098433165775483
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_001_alphafold2_ptm_model_5_seed_003-self.foldseek,3.6809927269576166,-0.1182273427020713
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_002_alphafold2_ptm_model_5_seed_002-self.foldseek,3.7918532283469495,-0.12560783848655369
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_003_alphafold2_ptm_model_3_seed_003-self.foldseek,3.762538747912813,-0.12298125141610429
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_004_alphafold2_ptm_model_3_seed_000-self.foldseek,3.76233920083818,-0.144284237594678
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_005_alphafold2_ptm_model_4_seed_004-self.foldseek,3.532138874786946,-0.21722372599861428
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_006_alphafold2_ptm_model_5_seed_004-self.foldseek,3.696587051141183,-0.13688978698935808
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_007_alphafold2_ptm_model_4_seed_002-self.foldseek,3.5727262080872157,-0.19508580953620502
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_008_alphafold2_ptm_model_3_seed_002-self.foldseek,3.7427656815249977,-0.12876093687434015
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_009_alphafold2_ptm_model_4_seed_001-self.foldseek,3.607407768893841,-0.18725597603417013
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_010_alphafold2_ptm_model_3_seed_001-self.foldseek,3.568073173493242,-0.17164828736878307
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_011_alphafold2_ptm_model_3_seed_004-self.foldseek,3.687410479569247,-0.15388516902619565
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_012_alphafold2_ptm_model_4_seed_003-self.foldseek,3.5672577908184135,-0.20519631705057398
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_013_alphafold2_ptm_model_5_seed_000-self.foldseek,3.729660024629257,-0.1433639891205143
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_014_alphafold2_ptm_model_5_seed_001-self.foldseek,3.719456171181277,-0.12521481707095447
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_015_alphafold2_ptm_model_4_seed_000-self.foldseek,3.550948005198439,-0.2183683556930658
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_016_alphafold2_ptm_model_2_seed_002-self.foldseek,3.7578100135879855,-0.1632750004530893
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_017_alphafold2_ptm_model_1_seed_001-self.foldseek,3.542603005588896,-0.1254916161712615
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_018_alphafold2_ptm_model_2_seed_003-self.foldseek,3.5897824871788,-0.156774420438515
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_019_alphafold2_ptm_model_1_seed_002-self.foldseek,3.6625581801443152,-0.09452405945364654
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_020_alphafold2_ptm_model_1_seed_000-self.foldseek,3.7426400684953642,-0.14945447187946606
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_021_alphafold2_ptm_model_1_seed_003-self.foldseek,3.5636868445646845,-0.1330029531435439
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_022_alphafold2_ptm_model_1_seed_004-self.foldseek,3.657086302660595,-0.15084348529274486
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_023_alphafold2_ptm_model_2_seed_000-self.foldseek,3.5847213185346205,-0.1896963506200404
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_024_alphafold2_ptm_model_2_seed_001-self.foldseek,3.5774053426998127,-0.16062518708439436
+0,blind_prediction/test/test_predicted_models_rand_30_max_64_ext_128/test_unrelaxed_rank_025_alphafold2_ptm_model_2_seed_004-self.foldseek,3.7178192858281194,-0.15243531661055126
+0,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_001_alphafold2_ptm_model_4_seed_000-self.foldseek,3.6687753360611346,-0.1179933916299856
+0,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_002_alphafold2_ptm_model_5_seed_003-self.foldseek,3.2934961537596723,-0.00856798516138676
+0,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_003_alphafold2_ptm_model_5_seed_001-self.foldseek,3.494577821424328,-0.057121763947718916
+0,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_004_alphafold2_ptm_model_5_seed_000-self.foldseek,2.5519728367708603,0.1225918119283269
+-1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_005_alphafold2_ptm_model_5_seed_002-self.foldseek,0.5515822809092157,0.3600812416934586
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_006_alphafold2_ptm_model_4_seed_002-self.foldseek,-2.156353835592091,1.629327531867282
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_007_alphafold2_ptm_model_3_seed_003-self.foldseek,-3.0713721635528453,0.7881587623293105
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_008_alphafold2_ptm_model_3_seed_002-self.foldseek,-3.3325942132971464,0.48927245383693174
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_009_alphafold2_ptm_model_3_seed_001-self.foldseek,-3.175681091944353,0.7938515897012158
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_010_alphafold2_ptm_model_1_seed_002-self.foldseek,-2.815799222202489,1.2234597451017066
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_011_alphafold2_ptm_model_4_seed_003-self.foldseek,-2.3903738809216124,1.5066842107189617
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_012_alphafold2_ptm_model_4_seed_004-self.foldseek,-2.414958017037828,1.5411336808807297
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_013_alphafold2_ptm_model_3_seed_000-self.foldseek,-3.103435307719647,0.35438612352552495
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_014_alphafold2_ptm_model_4_seed_001-self.foldseek,-2.643068936218155,0.8443465657360394
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_015_alphafold2_ptm_model_1_seed_000-self.foldseek,-2.5908570059097644,0.4816620424066161
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_016_alphafold2_ptm_model_2_seed_002-self.foldseek,-2.6738448890698607,1.7366469182408097
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_017_alphafold2_ptm_model_1_seed_003-self.foldseek,-2.6885015833256567,0.8610180831239869
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_018_alphafold2_ptm_model_2_seed_000-self.foldseek,-3.13349235505119,0.601484973119843
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_019_alphafold2_ptm_model_2_seed_004-self.foldseek,-2.9766355443233583,1.364608432069586
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_020_alphafold2_ptm_model_5_seed_004-self.foldseek,-2.7571321659743724,1.4881892065815194
+0,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_021_alphafold2_ptm_model_1_seed_001-self.foldseek,-0.7135600775746948,0.5885724298861221
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_022_alphafold2_ptm_model_2_seed_001-self.foldseek,-2.3385482128060766,0.9382484731086748
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_023_alphafold2_ptm_model_2_seed_003-self.foldseek,-2.5506728334475475,0.7605291575033611
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_024_alphafold2_ptm_model_1_seed_004-self.foldseek,-3.1769278325098043,1.2441823584862377
+1,blind_prediction/test/test_predicted_models_rand_30_max_8_ext_16/test_unrelaxed_rank_025_alphafold2_ptm_model_3_seed_004-self.foldseek,-3.269743393972181,0.08173850777444533