Simon Duerr commited on
Commit
de38e57
1 Parent(s): fceeb17

add sasa and plddt selection

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +63 -28
  3. requirements.txt +2 -2
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: ⚡
4
  colorFrom: red
5
  colorTo: pink
6
  sdk: gradio
7
- sdk_version: 3.0.11
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
4
  colorFrom: red
5
  colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 3.1
8
  app_file: app.py
9
  pinned: false
10
  license: mit
app.py CHANGED
@@ -1,33 +1,30 @@
1
  import json, time, os, sys, glob
2
-
3
- import gradio as gr
4
-
5
- sys.path.append("/home/user/app/ProteinMPNN/vanilla_proteinmpnn")
6
-
7
- sys.path.append("/home/duerr/phd/08_Code/ProteinMPNN/ProteinMPNN/vanilla_proteinmpnn")
8
-
9
- import matplotlib.pyplot as plt
10
  import shutil
11
  import warnings
12
- import numpy as np
13
- import torch
14
- from torch import optim
15
- from torch.utils.data import DataLoader
16
- from torch.utils.data.dataset import random_split, Subset
17
  import copy
18
- import torch.nn as nn
19
- import torch.nn.functional as F
20
  import random
21
- import os
 
22
  import os.path
23
 
 
 
 
 
 
 
 
24
  import plotly.express as px
25
- import urllib
26
  import jax.numpy as jnp
27
  import tensorflow as tf
 
 
 
28
 
 
 
29
 
30
- from moleculekit.molecule import Molecule
31
 
32
  if "/home/user/app/af_backprop" not in sys.path:
33
  sys.path.append("/home/user/app/af_backprop")
@@ -36,24 +33,22 @@ if "/home/user/app/af_backprop" not in sys.path:
36
  if "/home/duerr/phd/08_Code/ProteinMPNN/af_backprop" not in sys.path:
37
  sys.path.append("/home/duerr/phd/08_Code/ProteinMPNN/af_backprop")
38
 
39
- from utils import *
40
 
41
- # import libraries
42
- import colabfold as cf
 
 
43
  from alphafold.common import protein
44
  from alphafold.data import pipeline
45
  from alphafold.model import data, config
46
  from alphafold.model import model as afmodel
47
  from alphafold.common import residue_constants
48
 
 
49
 
50
- import plotly.graph_objects as go
51
- import ray
52
-
53
- import re
54
 
55
- import numpy as np
56
- import jax
57
 
58
  tf.config.set_visible_devices([], "GPU")
59
 
@@ -322,10 +317,43 @@ def preprocess_mol(pdb_code="", filepath=""):
322
  mol.write("cleaned.pdb")
323
  return "cleaned.pdb", df
324
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  def make_fixed_positions_dict(atomsel, residue_index_df):
326
  # we use the uploaded file for the selection
327
  mol = Molecule('original.pdb')
328
  # use index for selection as resids will change
 
 
 
 
 
 
 
329
  selected_residues = mol.get("index",atomsel)
330
 
331
  # clean up
@@ -1123,7 +1151,13 @@ with proteinMPNN:
1123
  - <code>resid 94 96 119</code> Residues 94, 94 and 119
1124
  - <code>within 5 of resname ZN</code> All residues with any atom <5 Å of zinc ion
1125
  - <code>chain A and within 5 of chain B </code> All residues of chain A that are part of the interface with chain B
1126
- - <code>protein and within 5 of nucleic </code> All residues that bind to DNA (if present in structure)""")
 
 
 
 
 
 
1127
  atomsel = gr.Textbox(placeholder="Specify atom selection ", label="Fixed positions")
1128
 
1129
  btn = gr.Button("Run")
@@ -1145,6 +1179,7 @@ with proteinMPNN:
1145
  ["Redesign of Homomer to Heteromer", "3HTN", "A,B", "C", False, 2, 0.1, ""],
1146
  ["Redesign of MID1 scaffold keeping binding site fixed", "3V1C", "A,B", "", False, 2, 0.1, "within 5 of resname ZN"],
1147
  ["Redesign of DNA binding protein", "3JRD", "A,B", "", False, 2, 0.1, "within 8 of nucleic"],
 
1148
  ],
1149
  )
1150
 
 
1
  import json, time, os, sys, glob
2
+ import urllib
 
 
 
 
 
 
 
3
  import shutil
4
  import warnings
 
 
 
 
 
5
  import copy
 
 
6
  import random
7
+ import re
8
+
9
  import os.path
10
 
11
+ import torch
12
+ import ray
13
+ import jax
14
+
15
+ import gradio as gr
16
+ import pandas as pd
17
+ import numpy as np
18
  import plotly.express as px
 
19
  import jax.numpy as jnp
20
  import tensorflow as tf
21
+ import matplotlib.pyplot as plt
22
+ import colabfold as cf
23
+ import plotly.graph_objects as go
24
 
25
+ import torch.nn as nn
26
+ import torch.nn.functional as F
27
 
 
28
 
29
  if "/home/user/app/af_backprop" not in sys.path:
30
  sys.path.append("/home/user/app/af_backprop")
 
33
  if "/home/duerr/phd/08_Code/ProteinMPNN/af_backprop" not in sys.path:
34
  sys.path.append("/home/duerr/phd/08_Code/ProteinMPNN/af_backprop")
35
 
 
36
 
37
+ from torch import optim
38
+ from torch.utils.data import DataLoader
39
+ from torch.utils.data.dataset import random_split, Subset
40
+ from moleculekit.molecule import Molecule
41
  from alphafold.common import protein
42
  from alphafold.data import pipeline
43
  from alphafold.model import data, config
44
  from alphafold.model import model as afmodel
45
  from alphafold.common import residue_constants
46
 
47
+ from utils import *
48
 
49
+ sys.path.append("/home/user/app/ProteinMPNN/vanilla_proteinmpnn")
50
+ sys.path.append("/home/duerr/phd/08_Code/ProteinMPNN/ProteinMPNN/vanilla_proteinmpnn")
 
 
51
 
 
 
52
 
53
  tf.config.set_visible_devices([], "GPU")
54
 
 
317
  mol.write("cleaned.pdb")
318
  return "cleaned.pdb", df
319
 
320
+ def assign_sasa(mol):
321
+ from moleculekit.projections.metricsasa import MetricSasa
322
+ metr = MetricSasa(
323
+ mode="residue", filtersel="protein"
324
+ )
325
+ sasaR = metr.project(mol)[0]
326
+ is_prot = mol.atomselect("protein")
327
+ resids=pd.DataFrame.from_dict({"resid":mol.resid, "is_prot":is_prot})
328
+ new_masses=[]
329
+ i_without_non_prot = 0
330
+ for i, g in resids.groupby((resids['resid'].shift() != resids['resid']).cumsum()):
331
+ if g["is_prot"].unique()[0]==True:
332
+ g["sasa"]=sasaR[i_without_non_prot]
333
+ i_without_non_prot+=1
334
+ else:
335
+ g["sasa"]=0
336
+ new_masses.extend(list(g.sasa))
337
+ return np.array(new_masses)
338
+
339
+ def process_atomsel(atomsel):
340
+ """everything lowercase and replace some keywords not relevant for protein design"""
341
+ atomsel=re.sub('sasa', 'mass',atomsel, flags=re.I)
342
+ atomsel=re.sub('plddt', 'beta',atomsel, flags=re.I)
343
+ return atomsel
344
+
345
+
346
  def make_fixed_positions_dict(atomsel, residue_index_df):
347
  # we use the uploaded file for the selection
348
  mol = Molecule('original.pdb')
349
  # use index for selection as resids will change
350
+
351
+
352
+ # set sasa to 0 for all non protein atoms (all non protein atoms are deleted later)
353
+ mol.masses = assign_sasa(mol)
354
+ print(mol.masses.shape)
355
+ print(assign_sasa(mol).shape)
356
+ atomsel = process_atomsel("chain B or (chain A and Sasa < 30)")
357
  selected_residues = mol.get("index",atomsel)
358
 
359
  # clean up
 
1151
  - <code>resid 94 96 119</code> Residues 94, 94 and 119
1152
  - <code>within 5 of resname ZN</code> All residues with any atom <5 Å of zinc ion
1153
  - <code>chain A and within 5 of chain B </code> All residues of chain A that are part of the interface with chain B
1154
+ - <code>protein and within 5 of nucleic </code> All residues that bind to DNA (if present in structure)
1155
+ - <code>not (chain A and within 5 of chain B) </code> only modify residues that are in the interface with the fixed chain, not further away
1156
+ - <code>chain A or (chain B and sasa < 20) </code> Keep chain A and all core residues fixeds
1157
+ - <code>pLDDT >70 </code> Redesign all residues with low pLDDT
1158
+
1159
+ Note that <code>sasa</code> and <code>pLDDT</code> selectors modify default VMD behavior. SASA is calculated using moleculekit and written to the mass attribute. Selections based on mass do not work.
1160
+ pLDDT is an alias for beta, it only works correctly with structures that contain the appropriate values in the beta column of the PDB file. """)
1161
  atomsel = gr.Textbox(placeholder="Specify atom selection ", label="Fixed positions")
1162
 
1163
  btn = gr.Button("Run")
 
1179
  ["Redesign of Homomer to Heteromer", "3HTN", "A,B", "C", False, 2, 0.1, ""],
1180
  ["Redesign of MID1 scaffold keeping binding site fixed", "3V1C", "A,B", "", False, 2, 0.1, "within 5 of resname ZN"],
1181
  ["Redesign of DNA binding protein", "3JRD", "A,B", "", False, 2, 0.1, "within 8 of nucleic"],
1182
+ ["Surface Redesign of miniprotein", "7JZM", "A,B", "", False, 2, 0.1, "chain B or (chain A and sasa < 20)"],
1183
  ],
1184
  )
1185
 
requirements.txt CHANGED
@@ -18,6 +18,6 @@ plotly
18
  GPUtil
19
  ray
20
  tqdm
21
- gradio==3.0.11
22
  protobuf<4
23
- -f https://storage.googleapis.com/jax-releases/jax_releases.html
 
18
  GPUtil
19
  ray
20
  tqdm
21
+ gradio==3.1
22
  protobuf<4
23
+ -f https://storage.googleapis.com/jax-releases/jax_releases.html