Tonic commited on
Commit
95a3026
·
unverified ·
2 Parent(s): b42d649 afd257c

Merge branch 'main' of https://huggingface.co/spaces/MISATO-dataset/esm3-conformity-sampling

Browse files
Files changed (1) hide show
  1. app.py +48 -1
app.py CHANGED
@@ -16,6 +16,8 @@ from dotenv import load_dotenv
16
  import torch
17
  import json
18
  import time
 
 
19
 
20
  load_dotenv()
21
 
@@ -39,6 +41,49 @@ amino3to1 = {
39
  }
40
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def read_pdb_io(pdb_file):
43
  if isinstance(pdb_file, io.StringIO):
44
  pdb_content = pdb_file.getvalue()
@@ -242,6 +287,7 @@ def run_prediction(pdb_file, num_runs, noise_level, num_frames, progress=gr.Prog
242
 
243
  progress(0, desc="Starting prediction")
244
  view_data, crmsd_text = prediction_visualization(pdb_file, num_runs, noise_level, num_frames, progress)
 
245
  if view_data is None:
246
  return "No successful predictions were made. Try adjusting the parameters or check the PDB file.", crmsd_text
247
 
@@ -260,7 +306,7 @@ def run_prediction(pdb_file, num_runs, noise_level, num_frames, progress=gr.Prog
260
  </div>
261
  """
262
  progress(1.0, desc="Completed")
263
- return html_content, crmsd_text
264
  except Exception as e:
265
  error_message = str(e)
266
  stack_trace = traceback.format_exc()
@@ -304,6 +350,7 @@ def create_demo():
304
  5. Click the "Run Prediction" button to start the process.
305
  6. The 3D visualization will show the original structure (grey) and the best predicted structure (green).
306
  7. The alignment result will display the best cRMSD (lower is better).
 
307
 
308
  ## About
309
  This demo uses the ESM3 model to predict protein structures from PDB files.
 
16
  import torch
17
  import json
18
  import time
19
+ from Bio.PDB import PDBParser
20
+ import itertools
21
 
22
  load_dotenv()
23
 
 
41
  }
42
 
43
 
44
+ # Covalent radii dictionary
45
+ COVALENT_RADIUS = {
46
+ "H": 0.31, "HE": 0.28, "LI": 1.28, "BE": 0.96, "B": 0.84, "C": 0.76, "N": 0.71, "O": 0.66, "F": 0.57, "NE": 0.58,
47
+ "NA": 1.66, "MG": 1.41, "AL": 1.21, "SI": 1.11, "P": 1.07, "S": 1.05, "CL": 1.02, "AR": 1.06, "K": 2.03,
48
+ "CA": 1.76, "SC": 1.7, "TI": 1.6, "V": 1.53, "CR": 1.39, "MN": 1.5, "FE": 1.42, "CO": 1.38, "NI": 1.24,
49
+ "CU": 1.32, "ZN": 1.22, "GA": 1.22, "GE": 1.2, "AS": 1.19, "SE": 1.2, "BR": 1.2, "KR": 1.16, "RB": 2.2,
50
+ "SR": 1.95, "Y": 1.9, "ZR": 1.75, "NB": 1.64, "MO": 1.54, "TC": 1.47, "RU": 1.46, "RH": 1.42, "PD": 1.39,
51
+ "AG": 1.45, "CD": 1.44, "IN": 1.42, "SN": 1.39, "SB": 1.39, "TE": 1.38, "I": 1.39, "XE": 1.4, "CS": 2.44,
52
+ "BA": 2.15, "LA": 2.07, "CE": 2.04, "PR": 2.03, "ND": 2.01, "PM": 1.99, "SM": 1.98, "EU": 1.98, "GD": 1.96,
53
+ "TB": 1.94, "DY": 1.92, "HO": 1.92, "ER": 1.89, "TM": 1.9, "YB": 1.87, "LU": 1.87, "HF": 1.75, "TA": 1.7,
54
+ "W": 1.62, "RE": 1.51, "OS": 1.44, "IR": 1.41, "PT": 1.36, "AU": 1.36, "HG": 1.32, "TL": 1.45, "PB": 1.46,
55
+ "BI": 1.48, "PO": 1.4, "AT": 1.5, "RN": 1.5, "FR": 2.6, "RA": 2.21, "AC": 2.15, "TH": 2.06, "PA": 2.0,
56
+ "U": 1.96, "NP": 1.9, "PU": 1.87, "AM": 1.8, "CM": 1.69, "BK": 2.0, "CF": 2.0, "ES": 2.0, "FM": 2.0,
57
+ "MD": 2.0, "NO": 2.0, "LR": 2.0, "RF": 2.0, "DB": 2.0, "SG": 2.0, "BH": 2.0, "HS": 2.0, "MT": 2.0,
58
+ "DS": 2.0, "RG": 2.0, "CN": 2.0, "UUT": 2.0, "UUQ": 2.0, "UUP": 2.0, "UUH": 2.0, "UUS": 2.0, "UUO": 2.0
59
+ }
60
+
61
+ # Function to get the covalent radius of an atom
62
+ def get_covalent_radius(atom):
63
+ element = atom.element.upper()
64
+ return COVALENT_RADIUS.get(element, 2.0) # Default to 2.0 Å if element is not in the dictionary
65
+
66
+ def calculate_clashes_for_pdb(pdb_file):
67
+ parser = PDBParser(QUIET=True)
68
+ structure = parser.get_structure("protein", pdb_file)
69
+ atoms = list(structure.get_atoms())
70
+ steric_clash_count = 0
71
+
72
+ num_atoms = len(atoms)
73
+
74
+ # Check atom pairs for steric clashes
75
+ for atom1, atom2 in itertools.combinations(atoms, 2):
76
+ covalent_radius_sum = get_covalent_radius(atom1) + get_covalent_radius(atom2)
77
+ distance = atom1 - atom2 # Distance between atom1 and atom2
78
+
79
+ # Check if the distance is less than the sum of covalent radii
80
+ if distance + 0.5 < covalent_radius_sum:
81
+ steric_clash_count += 1
82
+
83
+ # Normalize steric clashes per number of atoms
84
+ norm_ster_clash_count = steric_clash_count / num_atoms
85
+ return f"Total steric clashes in {pdb_file}: {steric_clash_count}", f"Normalized steric clashes per atom in {pdb_file}: {norm_ster_clash_count}"
86
+
87
  def read_pdb_io(pdb_file):
88
  if isinstance(pdb_file, io.StringIO):
89
  pdb_content = pdb_file.getvalue()
 
287
 
288
  progress(0, desc="Starting prediction")
289
  view_data, crmsd_text = prediction_visualization(pdb_file, num_runs, noise_level, num_frames, progress)
290
+ steric_clash_text, norm_steric_clas_text = calculate_clashes_for_pdb(pdb_file)
291
  if view_data is None:
292
  return "No successful predictions were made. Try adjusting the parameters or check the PDB file.", crmsd_text
293
 
 
306
  </div>
307
  """
308
  progress(1.0, desc="Completed")
309
+ return html_content, crmsd_text, steric_clash_text, norm_steric_clas_text
310
  except Exception as e:
311
  error_message = str(e)
312
  stack_trace = traceback.format_exc()
 
350
  5. Click the "Run Prediction" button to start the process.
351
  6. The 3D visualization will show the original structure (grey) and the best predicted structure (green).
352
  7. The alignment result will display the best cRMSD (lower is better).
353
+ 8. Total and Normalized (per atom) steric clashes (lower is better)
354
 
355
  ## About
356
  This demo uses the ESM3 model to predict protein structures from PDB files.