Simon Duerr commited on
Commit
135c853
1 Parent(s): 8397910

fix memory problems and add 700 seq cutoff

Browse files
Files changed (1) hide show
  1. app.py +50 -21
app.py CHANGED
@@ -20,19 +20,7 @@ import torch.nn.functional as F
20
  import random
21
  import os
22
  import os.path
23
- from protein_mpnn_utils import (
24
- loss_nll,
25
- loss_smoothed,
26
- gather_edges,
27
- gather_nodes,
28
- gather_nodes_t,
29
- cat_neighbors_nodes,
30
- _scores,
31
- _S_to_seq,
32
- tied_featurize,
33
- parse_PDB,
34
- )
35
- from protein_mpnn_utils import StructureDataset, StructureDatasetPDB, ProteinMPNN
36
  import plotly.express as px
37
  import urllib
38
  import jax.numpy as jnp
@@ -203,7 +191,7 @@ def save_pdb(outs, filename, LEN):
203
  f.write(pdb_lines)
204
 
205
 
206
- # @ray.remote(num_gpus=1, max_calls=1)
207
  def run_alphafold(sequence, num_recycles):
208
  recycles = num_recycles
209
  RUNNER, OPT = setup_af(sequence)
@@ -232,8 +220,10 @@ def run_alphafold(sequence, num_recycles):
232
  OPT["prev"] = outs["prev"]
233
  if recycles > 0:
234
  print(r, plddts[-1].mean())
235
- save_pdb(outs, "out.pdb", LEN)
236
-
 
 
237
  return plddts, outs["pae"], LEN
238
 
239
 
@@ -246,6 +236,20 @@ else:
246
 
247
 
248
  def setup_proteinmpnn(model_name="v_48_020", backbone_noise=0.00):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
250
  # ProteinMPNN model name: v_48_002, v_48_010, v_48_020, v_48_030, v_32_002, v_32_010; v_32_020, v_32_030; v_48_010=version with 48 edges 0.10A noise
251
  # Standard deviation of Gaussian noise to add to backbone atoms
@@ -298,6 +302,20 @@ def update(
298
  model_name,
299
  backbone_noise,
300
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  pdb_path = get_pdb(pdb_code=inp, filepath=file)
302
 
303
  if pdb_path == None:
@@ -655,9 +673,20 @@ def update(
655
  def update_AF(startsequence, pdb, num_recycles):
656
 
657
  # # run alphafold using ray
658
- plddts, pae, num_res = run_alphafold(
659
- startsequence, num_recycles
660
- ) # ray.get(run_alphafold.remote(startsequence))
 
 
 
 
 
 
 
 
 
 
 
661
  x = np.arange(10)
662
  plots = []
663
  for recycle, plddts_val in enumerate(plddts):
@@ -784,7 +813,7 @@ select{
784
  <div class="text-sm">
785
  <div> RMSD AlphaFold vs. native: """
786
  + f"{rms:.2f}"
787
- + """Å</div>
788
  <div class="font-medium mt-4"><b>AlphaFold model confidence:</b></div>
789
  <div class="flex space-x-2 py-1"><span class="w-4 h-4"
790
  style="background-color: rgb(0, 83, 214);">&nbsp;</span><span class="legendlabel">Very high
@@ -1073,6 +1102,6 @@ bioRxiv 2022.06.03.494563; doi: [10.1101/2022.06.03.494563](https://doi.org/10.1
1073
  )
1074
 
1075
 
1076
- # ray.init(runtime_env={"working_dir": "./af_backprop"})
1077
 
1078
  proteinMPNN.launch(share=True, debug=True)
 
20
  import random
21
  import os
22
  import os.path
23
+
 
 
 
 
 
 
 
 
 
 
 
 
24
  import plotly.express as px
25
  import urllib
26
  import jax.numpy as jnp
 
191
  f.write(pdb_lines)
192
 
193
 
194
+ @ray.remote(num_gpus=1, max_calls=1)
195
  def run_alphafold(sequence, num_recycles):
196
  recycles = num_recycles
197
  RUNNER, OPT = setup_af(sequence)
 
220
  OPT["prev"] = outs["prev"]
221
  if recycles > 0:
222
  print(r, plddts[-1].mean())
223
+ if os.path.exists("/home/duerr/phd/08_Code/ProteinMPNN"):
224
+ save_pdb(outs, "/home/duerr/phd/08_Code/ProteinMPNN/out.pdb", LEN)
225
+ else:
226
+ save_pdb(outs, "/home/user/app/out.pdb", LEN)
227
  return plddts, outs["pae"], LEN
228
 
229
 
 
236
 
237
 
238
  def setup_proteinmpnn(model_name="v_48_020", backbone_noise=0.00):
239
+ from protein_mpnn_utils import (
240
+ loss_nll,
241
+ loss_smoothed,
242
+ gather_edges,
243
+ gather_nodes,
244
+ gather_nodes_t,
245
+ cat_neighbors_nodes,
246
+ _scores,
247
+ _S_to_seq,
248
+ tied_featurize,
249
+ parse_PDB,
250
+ )
251
+ from protein_mpnn_utils import StructureDataset, StructureDatasetPDB, ProteinMPNN
252
+
253
  device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
254
  # ProteinMPNN model name: v_48_002, v_48_010, v_48_020, v_48_030, v_32_002, v_32_010; v_32_020, v_32_030; v_48_010=version with 48 edges 0.10A noise
255
  # Standard deviation of Gaussian noise to add to backbone atoms
 
302
  model_name,
303
  backbone_noise,
304
  ):
305
+ from protein_mpnn_utils import (
306
+ loss_nll,
307
+ loss_smoothed,
308
+ gather_edges,
309
+ gather_nodes,
310
+ gather_nodes_t,
311
+ cat_neighbors_nodes,
312
+ _scores,
313
+ _S_to_seq,
314
+ tied_featurize,
315
+ parse_PDB,
316
+ )
317
+ from protein_mpnn_utils import StructureDataset, StructureDatasetPDB, ProteinMPNN
318
+
319
  pdb_path = get_pdb(pdb_code=inp, filepath=file)
320
 
321
  if pdb_path == None:
 
673
  def update_AF(startsequence, pdb, num_recycles):
674
 
675
  # # run alphafold using ray
676
+ # plddts, pae, num_res = run_alphafold(
677
+ # startsequence, num_recycles
678
+ # )
679
+ if len(startsequence) > 700:
680
+ return (
681
+ """
682
+ <div class="p-4 mb-4 text-sm text-yellow-700 bg-orange-50 rounded-lg" role="alert">
683
+ <span class="font-medium">Sorry!</span> Currently only small proteins can be run in the server in order to reduce wait time. Try a protein <700 aa. Bigger proteins you can run on <a href="https://github.com/sokrypton/colabfold">ColabFold</a>
684
+ </div>
685
+ """,
686
+ plt.figure(),
687
+ plt.figure(),
688
+ )
689
+ plddts, pae, num_res = ray.get(run_alphafold.remote(startsequence, num_recycles))
690
  x = np.arange(10)
691
  plots = []
692
  for recycle, plddts_val in enumerate(plddts):
 
813
  <div class="text-sm">
814
  <div> RMSD AlphaFold vs. native: """
815
  + f"{rms:.2f}"
816
+ + """Å computed using CEAlign on the aligned fragment</div>
817
  <div class="font-medium mt-4"><b>AlphaFold model confidence:</b></div>
818
  <div class="flex space-x-2 py-1"><span class="w-4 h-4"
819
  style="background-color: rgb(0, 83, 214);">&nbsp;</span><span class="legendlabel">Very high
 
1102
  )
1103
 
1104
 
1105
+ ray.init(runtime_env={"working_dir": "./af_backprop"})
1106
 
1107
  proteinMPNN.launch(share=True, debug=True)