Simon Duerr commited on
Commit
3279dbd
1 Parent(s): 8b47127

cpu requirements

Browse files
Files changed (2) hide show
  1. app.py +189 -86
  2. requirements.txt +3 -3
app.py CHANGED
@@ -69,10 +69,10 @@ def clear_mem():
69
  buf.delete()
70
 
71
 
72
- print("Is cuda available",torch.cuda.is_available())
73
- stream = os.popen('nvcc --version')
74
- output = stream.read()
75
- print(output)
76
 
77
 
78
  def setup_af(seq, model_name="model_5_ptm"):
@@ -177,7 +177,7 @@ def align_structures(pdb1, pdb2, lenRes, index):
177
  io.set_structure(sample_structure)
178
  io.save(f"outputs/out_{index}_aligned.pdb")
179
  # Doing this to get around biopython CEALIGN bug
180
- #subprocess.call("pymol -c -Q -r cealign.pml", shell=True)
181
 
182
  return aligner.rms, "outputs/reference.pdb", f"outputs/out_{index}_aligned.pdb"
183
 
@@ -219,7 +219,7 @@ def run_alphafold(sequences, num_recycles):
219
  }
220
 
221
  positions = []
222
-
223
  for r in range(recycles + 1):
224
  outs = RUNNER(x, OPT)
225
  outs = jax.tree_map(lambda x: np.asarray(x), outs)
@@ -228,10 +228,12 @@ def run_alphafold(sequences, num_recycles):
228
  plddts.append(outs["plddt"][:LEN])
229
  paes.append(outs["pae"])
230
  if os.path.exists("/home/duerr/phd/08_Code/ProteinMPNN"):
231
- save_pdb(outs, f"/home/duerr/phd/08_Code/ProteinMPNN/outputs/out_{i}.pdb", LEN)
 
 
232
  else:
233
  save_pdb(outs, f"/home/user/app/outputs/out_{i}.pdb", LEN)
234
- return plddts,paes, LEN
235
 
236
 
237
  if os.path.exists("/home/duerr/phd/08_Code/ProteinMPNN"):
@@ -308,7 +310,7 @@ def preprocess_mol(pdb_code="", filepath=""):
308
  return None
309
  else:
310
  mol = Molecule(pdb_code)
311
- mol.write('original.pdb')
312
  # clean messy files and only include protein itself
313
  mol.filter("protein")
314
  # renumber using moleculekit 0...len(protein)
@@ -324,70 +326,73 @@ def preprocess_mol(pdb_code="", filepath=""):
324
  mol.write("cleaned.pdb")
325
  return "cleaned.pdb", df
326
 
 
327
  def assign_sasa(mol):
328
  from moleculekit.projections.metricsasa import MetricSasa
329
- metr = MetricSasa(
330
- mode="residue", filtersel="protein"
331
- )
332
  sasaR = metr.project(mol)[0]
333
  is_prot = mol.atomselect("protein")
334
- resids=pd.DataFrame.from_dict({"resid":mol.resid, "is_prot":is_prot})
335
- new_masses=[]
336
  i_without_non_prot = 0
337
- for i, g in resids.groupby((resids['resid'].shift() != resids['resid']).cumsum()):
338
- if g["is_prot"].unique()[0]==True:
339
- g["sasa"]=sasaR[i_without_non_prot]
340
- i_without_non_prot+=1
341
  else:
342
- g["sasa"]=0
343
  new_masses.extend(list(g.sasa))
344
  return np.array(new_masses)
345
 
 
346
  def process_atomsel(atomsel):
347
  """everything lowercase and replace some keywords not relevant for protein design"""
348
- atomsel=re.sub('sasa', 'mass',atomsel, flags=re.I)
349
- atomsel=re.sub('plddt', 'beta',atomsel, flags=re.I)
350
  return atomsel
351
 
352
 
353
  def make_fixed_positions_dict(atomsel, residue_index_df):
354
  # we use the uploaded file for the selection
355
- mol = Molecule('original.pdb')
356
  # use index for selection as resids will change
357
 
358
-
359
  # set sasa to 0 for all non protein atoms (all non protein atoms are deleted later)
360
  mol.masses = assign_sasa(mol)
361
  print(mol.masses.shape)
362
  print(assign_sasa(mol).shape)
363
  atomsel = process_atomsel(atomsel)
364
- selected_residues = mol.get("index",atomsel)
365
 
366
  # clean up
367
  mol.filter("protein")
368
  mol.renumberResidues()
369
  # based on selected index now get resids
370
  selected_residues = [str(i) for i in selected_residues]
371
- if len(selected_residues)==0:
372
  return None, []
373
  selected_residues_str = " ".join(selected_residues)
374
- selected_residues=set(mol.get('resid', sel=f"index {selected_residues_str}"))
375
 
376
  # use the proteinMPNN index nomenclature to assemble fixed_positions_dict
377
- fixed_positions_df = residue_index_df[residue_index_df['new_resid'].isin(selected_residues)]
 
 
378
 
379
- chains = set(mol.get('chain', sel="all"))
380
- fixed_position_dict = {'cleaned':{}}
381
- #store the selected residues in a list for the visualization later with cleaned.pdb
382
- selected_residues = list(fixed_positions_df['new_resid'])
383
 
384
  for c in chains:
385
- fixed_position_dict['cleaned'][c]=[]
386
 
387
  for i, row in fixed_positions_df.iterrows():
388
- fixed_position_dict['cleaned'][row['chain']].append(row['proteinMPNN_index'])
389
  return fixed_position_dict, selected_residues
390
 
 
391
  def update(
392
  inp,
393
  file,
@@ -398,7 +403,7 @@ def update(
398
  sampling_temp,
399
  model_name,
400
  backbone_noise,
401
- atomsel
402
  ):
403
  from protein_mpnn_utils import (
404
  loss_nll,
@@ -471,8 +476,10 @@ def update(
471
  if atomsel == "":
472
  fixed_positions_dict, selected_residues = None, []
473
  else:
474
- fixed_positions_dict, selected_residues=make_fixed_positions_dict(atomsel, mol_index)
475
-
 
 
476
  pssm_dict = None
477
  omit_AA_dict = None
478
  bias_AA_dict = None
@@ -489,7 +496,7 @@ def update(
489
  tied_positions_dict = make_tied_positions_for_homomers(pdb_dict_list)
490
  else:
491
  tied_positions_dict = None
492
-
493
  chain_id_dict = {}
494
  chain_id_dict[pdb_dict_list[0]["name"]] = (designed_chain_list, fixed_chain_list)
495
  with torch.no_grad():
@@ -738,7 +745,7 @@ def update(
738
  )
739
  seq_list.append(seq + chain_s)
740
  message += f"{line}\n"
741
- if fixed_positions_dict!=None:
742
  message += f"\nfixed positions:* {fixed_positions_dict['cleaned']} \n\n*uses CHAIN:[1..len(chain)] residue numbering"
743
  # somehow sequences still contain X, remove again
744
  for i, x in enumerate(seq_list):
@@ -769,7 +776,7 @@ def update(
769
  )
770
 
771
  fig_tadjusted.update_xaxes(side="top")
772
- seq_dict = {"seq_list":seq_list, "recovery":seq_recovery, "seq_score":seq_score}
773
  return (
774
  message,
775
  fig,
@@ -779,17 +786,17 @@ def update(
779
  pdb_path,
780
  gr.Dropdown.update(choices=seq_list),
781
  selected_residues,
782
- seq_dict
783
  )
784
 
785
 
786
- def update_AF(seq_dict, pdb, num_recycles,selectedResidues):
787
 
788
  # # run alphafold using ray
789
  # plddts, pae, num_res = run_alphafold(
790
  # startsequence, num_recycles
791
  # )
792
- allSeqs = seq_dict['seq_list']
793
  lenSeqs = len(allSeqs)
794
  if len(allSeqs[0]) > 700:
795
  return (
@@ -801,14 +808,22 @@ def update_AF(seq_dict, pdb, num_recycles,selectedResidues):
801
  plt.figure(),
802
  plt.figure(),
803
  )
804
-
805
  plddts, paes, num_res = ray.get(run_alphafold.remote(allSeqs, num_recycles))
806
 
807
  sequences = {}
808
  for i in range(lenSeqs):
809
- rms, input_pdb, aligned_pdb = align_structures(pdb, f"outputs/out_{i}.pdb", num_res,i)
810
- sequences[i]={"Seq":i,"RMSD":f"{rms:.2f}","Score":seq_dict['seq_score'][i],"Recovery":seq_dict["recovery"][i],"Mean pLDDT":f"{np.mean(plddts[i]):.4f}"}
811
- results=pd.DataFrame.from_dict(sequences, orient="index")
 
 
 
 
 
 
 
 
812
  print(results)
813
  plots = []
814
  for index, plddts_val in enumerate(plddts):
@@ -837,7 +852,7 @@ def update_AF(seq_dict, pdb, num_recycles,selectedResidues):
837
  legend=dict(yanchor="bottom", y=0.01, xanchor="left", x=0.99),
838
  )
839
  pae_plots = []
840
- for i,pae in enumerate(paes):
841
  plt.figure()
842
  plt.title(f"Predicted Aligned Error sequence {i}")
843
  Ln = pae.shape[0]
@@ -858,7 +873,20 @@ def update_AF(seq_dict, pdb, num_recycles,selectedResidues):
858
  # plotAF_pae.write_html("test.html")
859
  # plotAF_pae.update_layout(title="Predicted Aligned Error", template="simple_white")
860
 
861
- return molecule(input_pdb, aligned_pdb, lenSeqs, num_res, selectedResidues, allSeqs, sequences), plotAF_plddt, pae_plots, results
 
 
 
 
 
 
 
 
 
 
 
 
 
862
 
863
 
864
  def read_mol(molpath):
@@ -870,26 +898,39 @@ def read_mol(molpath):
870
  return mol
871
 
872
 
873
- def molecule(input_pdb, aligned_pdb, lenSeqs, num_res, selectedResidues, allSeqs, sequences):
 
 
874
 
875
  mol = read_mol("outputs/reference.pdb")
876
- options =""
877
  pred_mol = "["
878
  seqdata = "{"
879
  selected = "selected"
880
  for i in range(lenSeqs):
881
- seqdata+=str(i)+': { "score": '+sequences[i]["Score"]+', "rmsd": '+sequences[i]["RMSD"]+', "recovery": '+sequences[i]["Recovery"]+', "plddt": '+sequences[i]["Mean pLDDT"]+', "seq":"'+allSeqs[i]+'"}'
882
- options+=f'<option {selected} value="{i}">sequence {i} </option>' #RMSD {sequences[i]["RMSD"]}, score {sequences[i]["Score"]}, recovery {sequences[i]["Recovery"]} pLDDT {sequences[i]["Mean pLDDT"]}
883
- p=f"outputs/out_{i}_aligned.pdb"
884
- pred_mol+=f"`{read_mol(p)}`"
 
 
 
 
 
 
 
 
 
 
 
 
 
885
  selected = ""
886
- if i!=lenSeqs-1:
887
- pred_mol+=","
888
- seqdata+=","
889
- pred_mol+="]"
890
- seqdata+="}"
891
-
892
-
893
 
894
  x = (
895
  """<!DOCTYPE html>
@@ -934,7 +975,9 @@ def molecule(input_pdb, aligned_pdb, lenSeqs, num_res, selectedResidues, allSeqs
934
  a sequence</label>
935
  <select id="seq"
936
  class="bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500">
937
- """+options+"""
 
 
938
  </select>
939
  </div>
940
  <div class="font-mono bg-gray-100 py-3 px-2 font-sm rounded">
@@ -1033,16 +1076,18 @@ def molecule(input_pdb, aligned_pdb, lenSeqs, num_res, selectedResidues, allSeqs
1033
  let element = null;
1034
  let config = null;
1035
  let currentIndex = 0;
1036
- let seqs = """+seqdata+"""
 
 
1037
  let data = """
1038
  + pred_mol
1039
  + """
1040
  let pdb = `"""
1041
  + mol
1042
  + """`
1043
- var selectedResidues = """+
1044
- f"{selectedResidues}"
1045
- +"""
1046
  //AlphaFold code from https://gist.github.com/piroyon/30d1c1099ad488a7952c3b21a5bebc96
1047
  let colorAlpha = function (atom) {
1048
  if (atom.b < 50) {
@@ -1057,7 +1102,9 @@ def molecule(input_pdb, aligned_pdb, lenSeqs, num_res, selectedResidues, allSeqs
1057
  };
1058
 
1059
  let colors = {}
1060
- for (let i=0; i<"""+str(num_res)+""";i++){
 
 
1061
  if (selectedResidues.includes(i)){
1062
  colors[i]="hotpink"
1063
  }else{
@@ -1152,7 +1199,16 @@ def molecule(input_pdb, aligned_pdb, lenSeqs, num_res, selectedResidues, allSeqs
1152
 
1153
 
1154
  def set_examples(example):
1155
- label, inp, designed_chain, fixed_chain, homomer, num_seqs, sampling_temp, atomsel = example
 
 
 
 
 
 
 
 
 
1156
  return [
1157
  label,
1158
  inp,
@@ -1161,7 +1217,7 @@ def set_examples(example):
1161
  homomer,
1162
  gr.Slider.update(value=num_seqs),
1163
  gr.Radio.update(value=sampling_temp),
1164
- atomsel
1165
  ]
1166
 
1167
 
@@ -1198,7 +1254,8 @@ with proteinMPNN:
1198
  value=0.1,
1199
  label="Sampling temperature",
1200
  )
1201
- gr.Markdown(""" Sampling temperature for amino acids, `T=0.0` means taking argmax, `T>>1.0` means sample randomly. Suggested values `0.1, 0.15, 0.2, 0.25, 0.3`. Higher values will lead to more diversity.
 
1202
  """
1203
  )
1204
  with gr.Row():
@@ -1220,8 +1277,9 @@ with proteinMPNN:
1220
  gr.Markdown(
1221
  "for correct symmetric tying lenghts of homomer chains should be the same"
1222
  )
1223
- gr.Markdown('## Fixed positions')
1224
- gr.Markdown("""You can fix important positions in the protein. Resid should be specified with the same numbering as in the input pdb file. The fixed residues will be highlighted in the output.
 
1225
  The [VMD selection](http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html) synthax is used. You can also select based on ligands or chains in the input structure to specify interfaces to be fixed.
1226
 
1227
  - <code>within 5 of resid 94</code> All residues that have >1 atom closer than 5 Å to any atom of residue 94
@@ -1235,8 +1293,11 @@ with proteinMPNN:
1235
  - <code>pLDDT >70 </code> Redesign all residues with low pLDDT
1236
 
1237
  Note that <code>sasa</code> and <code>pLDDT</code> selectors modify default VMD behavior. SASA is calculated using moleculekit and written to the mass attribute. Selections based on mass do not work.
1238
- pLDDT is an alias for beta, it only works correctly with structures that contain the appropriate values in the beta column of the PDB file. """)
1239
- atomsel = gr.Textbox(placeholder="Specify atom selection ", label="Fixed positions")
 
 
 
1240
 
1241
  btn = gr.Button("Run")
1242
  label = gr.Textbox(label="Label", visible=False)
@@ -1249,19 +1310,45 @@ with proteinMPNN:
1249
  homomer,
1250
  num_seqs,
1251
  sampling_temp,
1252
- atomsel
1253
  ],
1254
  samples=[
1255
- ["Homomer design", "1O91", "A,B,C", "", True, 2, 0.1,""],
1256
- ["Monomer design", "6MRR", "A", "", False, 2, 0.1,""],
1257
  ["Redesign of Homomer to Heteromer", "3HTN", "A,B", "C", False, 2, 0.1, ""],
1258
- ["Redesign of MID1 scaffold keeping binding site fixed", "3V1C", "A,B", "", False, 2, 0.1, "within 5 of resname ZN"],
1259
- ["Redesign of DNA binding protein", "3JRD", "A,B", "", False, 2, 0.1, "within 8 of nucleic"],
1260
- ["Surface Redesign of miniprotein", "7JZM", "A,B", "", False, 2, 0.1, "chain B or (chain A and sasa < 20)"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1261
  ],
1262
  )
1263
 
1264
-
1265
  gr.Markdown("# Output")
1266
 
1267
  with gr.Tabs():
@@ -1296,8 +1383,9 @@ with proteinMPNN:
1296
  with gr.Row():
1297
  with gr.Row():
1298
  chosen_seq = gr.Dropdown(
1299
- choices=[], label="Select a sequence for validation",
1300
- visible=False
 
1301
  )
1302
  num_recycles = gr.Dropdown(
1303
  choices=[0, 1, 3, 5], value=3, label="num Recycles"
@@ -1307,10 +1395,25 @@ with proteinMPNN:
1307
  mol = gr.HTML()
1308
  with gr.Column():
1309
  gr.Markdown("## Metrics")
1310
- results = gr.Dataframe(nteractive=False, row_count=(0, 'dynamic'), headers=["Seq","RMSD","Score","Recovery","Mean pLDDT"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1311
  plotAF_plddt = gr.Plot(label="pLDDT")
1312
  # remove maxh80 class from css
1313
- plotAF_pae = gr.Gallery(label="PAE plots") #gr.Plot(label="PAE")
1314
  tempFile = gr.Variable()
1315
  selectedResidues = gr.Variable()
1316
  seq_dict = gr.Variable()
@@ -1337,7 +1440,7 @@ with proteinMPNN:
1337
  tempFile,
1338
  chosen_seq,
1339
  selectedResidues,
1340
- seq_dict
1341
  ],
1342
  )
1343
  btnAF.click(
 
69
  buf.delete()
70
 
71
 
72
+ print("Is cuda available", torch.cuda.is_available())
73
+ # stream = os.popen("nvcc --version")
74
+ # output = stream.read()
75
+ # print(output)
76
 
77
 
78
  def setup_af(seq, model_name="model_5_ptm"):
 
177
  io.set_structure(sample_structure)
178
  io.save(f"outputs/out_{index}_aligned.pdb")
179
  # Doing this to get around biopython CEALIGN bug
180
+ # subprocess.call("pymol -c -Q -r cealign.pml", shell=True)
181
 
182
  return aligner.rms, "outputs/reference.pdb", f"outputs/out_{index}_aligned.pdb"
183
 
 
219
  }
220
 
221
  positions = []
222
+
223
  for r in range(recycles + 1):
224
  outs = RUNNER(x, OPT)
225
  outs = jax.tree_map(lambda x: np.asarray(x), outs)
 
228
  plddts.append(outs["plddt"][:LEN])
229
  paes.append(outs["pae"])
230
  if os.path.exists("/home/duerr/phd/08_Code/ProteinMPNN"):
231
+ save_pdb(
232
+ outs, f"/home/duerr/phd/08_Code/ProteinMPNN/outputs/out_{i}.pdb", LEN
233
+ )
234
  else:
235
  save_pdb(outs, f"/home/user/app/outputs/out_{i}.pdb", LEN)
236
+ return plddts, paes, LEN
237
 
238
 
239
  if os.path.exists("/home/duerr/phd/08_Code/ProteinMPNN"):
 
310
  return None
311
  else:
312
  mol = Molecule(pdb_code)
313
+ mol.write("original.pdb")
314
  # clean messy files and only include protein itself
315
  mol.filter("protein")
316
  # renumber using moleculekit 0...len(protein)
 
326
  mol.write("cleaned.pdb")
327
  return "cleaned.pdb", df
328
 
329
+
330
  def assign_sasa(mol):
331
  from moleculekit.projections.metricsasa import MetricSasa
332
+
333
+ metr = MetricSasa(mode="residue", filtersel="protein")
 
334
  sasaR = metr.project(mol)[0]
335
  is_prot = mol.atomselect("protein")
336
+ resids = pd.DataFrame.from_dict({"resid": mol.resid, "is_prot": is_prot})
337
+ new_masses = []
338
  i_without_non_prot = 0
339
+ for i, g in resids.groupby((resids["resid"].shift() != resids["resid"]).cumsum()):
340
+ if g["is_prot"].unique()[0] == True:
341
+ g["sasa"] = sasaR[i_without_non_prot]
342
+ i_without_non_prot += 1
343
  else:
344
+ g["sasa"] = 0
345
  new_masses.extend(list(g.sasa))
346
  return np.array(new_masses)
347
 
348
+
349
  def process_atomsel(atomsel):
350
  """everything lowercase and replace some keywords not relevant for protein design"""
351
+ atomsel = re.sub("sasa", "mass", atomsel, flags=re.I)
352
+ atomsel = re.sub("plddt", "beta", atomsel, flags=re.I)
353
  return atomsel
354
 
355
 
356
  def make_fixed_positions_dict(atomsel, residue_index_df):
357
  # we use the uploaded file for the selection
358
+ mol = Molecule("original.pdb")
359
  # use index for selection as resids will change
360
 
 
361
  # set sasa to 0 for all non protein atoms (all non protein atoms are deleted later)
362
  mol.masses = assign_sasa(mol)
363
  print(mol.masses.shape)
364
  print(assign_sasa(mol).shape)
365
  atomsel = process_atomsel(atomsel)
366
+ selected_residues = mol.get("index", atomsel)
367
 
368
  # clean up
369
  mol.filter("protein")
370
  mol.renumberResidues()
371
  # based on selected index now get resids
372
  selected_residues = [str(i) for i in selected_residues]
373
+ if len(selected_residues) == 0:
374
  return None, []
375
  selected_residues_str = " ".join(selected_residues)
376
+ selected_residues = set(mol.get("resid", sel=f"index {selected_residues_str}"))
377
 
378
  # use the proteinMPNN index nomenclature to assemble fixed_positions_dict
379
+ fixed_positions_df = residue_index_df[
380
+ residue_index_df["new_resid"].isin(selected_residues)
381
+ ]
382
 
383
+ chains = set(mol.get("chain", sel="all"))
384
+ fixed_position_dict = {"cleaned": {}}
385
+ # store the selected residues in a list for the visualization later with cleaned.pdb
386
+ selected_residues = list(fixed_positions_df["new_resid"])
387
 
388
  for c in chains:
389
+ fixed_position_dict["cleaned"][c] = []
390
 
391
  for i, row in fixed_positions_df.iterrows():
392
+ fixed_position_dict["cleaned"][row["chain"]].append(row["proteinMPNN_index"])
393
  return fixed_position_dict, selected_residues
394
 
395
+
396
  def update(
397
  inp,
398
  file,
 
403
  sampling_temp,
404
  model_name,
405
  backbone_noise,
406
+ atomsel,
407
  ):
408
  from protein_mpnn_utils import (
409
  loss_nll,
 
476
  if atomsel == "":
477
  fixed_positions_dict, selected_residues = None, []
478
  else:
479
+ fixed_positions_dict, selected_residues = make_fixed_positions_dict(
480
+ atomsel, mol_index
481
+ )
482
+
483
  pssm_dict = None
484
  omit_AA_dict = None
485
  bias_AA_dict = None
 
496
  tied_positions_dict = make_tied_positions_for_homomers(pdb_dict_list)
497
  else:
498
  tied_positions_dict = None
499
+
500
  chain_id_dict = {}
501
  chain_id_dict[pdb_dict_list[0]["name"]] = (designed_chain_list, fixed_chain_list)
502
  with torch.no_grad():
 
745
  )
746
  seq_list.append(seq + chain_s)
747
  message += f"{line}\n"
748
+ if fixed_positions_dict != None:
749
  message += f"\nfixed positions:* {fixed_positions_dict['cleaned']} \n\n*uses CHAIN:[1..len(chain)] residue numbering"
750
  # somehow sequences still contain X, remove again
751
  for i, x in enumerate(seq_list):
 
776
  )
777
 
778
  fig_tadjusted.update_xaxes(side="top")
779
+ seq_dict = {"seq_list": seq_list, "recovery": seq_recovery, "seq_score": seq_score}
780
  return (
781
  message,
782
  fig,
 
786
  pdb_path,
787
  gr.Dropdown.update(choices=seq_list),
788
  selected_residues,
789
+ seq_dict,
790
  )
791
 
792
 
793
+ def update_AF(seq_dict, pdb, num_recycles, selectedResidues):
794
 
795
  # # run alphafold using ray
796
  # plddts, pae, num_res = run_alphafold(
797
  # startsequence, num_recycles
798
  # )
799
+ allSeqs = seq_dict["seq_list"]
800
  lenSeqs = len(allSeqs)
801
  if len(allSeqs[0]) > 700:
802
  return (
 
808
  plt.figure(),
809
  plt.figure(),
810
  )
811
+
812
  plddts, paes, num_res = ray.get(run_alphafold.remote(allSeqs, num_recycles))
813
 
814
  sequences = {}
815
  for i in range(lenSeqs):
816
+ rms, input_pdb, aligned_pdb = align_structures(
817
+ pdb, f"outputs/out_{i}.pdb", num_res, i
818
+ )
819
+ sequences[i] = {
820
+ "Seq": i,
821
+ "RMSD": f"{rms:.2f}",
822
+ "Score": seq_dict["seq_score"][i],
823
+ "Recovery": seq_dict["recovery"][i],
824
+ "Mean pLDDT": f"{np.mean(plddts[i]):.4f}",
825
+ }
826
+ results = pd.DataFrame.from_dict(sequences, orient="index")
827
  print(results)
828
  plots = []
829
  for index, plddts_val in enumerate(plddts):
 
852
  legend=dict(yanchor="bottom", y=0.01, xanchor="left", x=0.99),
853
  )
854
  pae_plots = []
855
+ for i, pae in enumerate(paes):
856
  plt.figure()
857
  plt.title(f"Predicted Aligned Error sequence {i}")
858
  Ln = pae.shape[0]
 
873
  # plotAF_pae.write_html("test.html")
874
  # plotAF_pae.update_layout(title="Predicted Aligned Error", template="simple_white")
875
 
876
+ return (
877
+ molecule(
878
+ input_pdb,
879
+ aligned_pdb,
880
+ lenSeqs,
881
+ num_res,
882
+ selectedResidues,
883
+ allSeqs,
884
+ sequences,
885
+ ),
886
+ plotAF_plddt,
887
+ pae_plots,
888
+ results,
889
+ )
890
 
891
 
892
  def read_mol(molpath):
 
898
  return mol
899
 
900
 
901
+ def molecule(
902
+ input_pdb, aligned_pdb, lenSeqs, num_res, selectedResidues, allSeqs, sequences
903
+ ):
904
 
905
  mol = read_mol("outputs/reference.pdb")
906
+ options = ""
907
  pred_mol = "["
908
  seqdata = "{"
909
  selected = "selected"
910
  for i in range(lenSeqs):
911
+ seqdata += (
912
+ str(i)
913
+ + ': { "score": '
914
+ + sequences[i]["Score"]
915
+ + ', "rmsd": '
916
+ + sequences[i]["RMSD"]
917
+ + ', "recovery": '
918
+ + sequences[i]["Recovery"]
919
+ + ', "plddt": '
920
+ + sequences[i]["Mean pLDDT"]
921
+ + ', "seq":"'
922
+ + allSeqs[i]
923
+ + '"}'
924
+ )
925
+ options += f'<option {selected} value="{i}">sequence {i} </option>' # RMSD {sequences[i]["RMSD"]}, score {sequences[i]["Score"]}, recovery {sequences[i]["Recovery"]} pLDDT {sequences[i]["Mean pLDDT"]}
926
+ p = f"outputs/out_{i}_aligned.pdb"
927
+ pred_mol += f"`{read_mol(p)}`"
928
  selected = ""
929
+ if i != lenSeqs - 1:
930
+ pred_mol += ","
931
+ seqdata += ","
932
+ pred_mol += "]"
933
+ seqdata += "}"
 
 
934
 
935
  x = (
936
  """<!DOCTYPE html>
 
975
  a sequence</label>
976
  <select id="seq"
977
  class="bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500">
978
+ """
979
+ + options
980
+ + """
981
  </select>
982
  </div>
983
  <div class="font-mono bg-gray-100 py-3 px-2 font-sm rounded">
 
1076
  let element = null;
1077
  let config = null;
1078
  let currentIndex = 0;
1079
+ let seqs = """
1080
+ + seqdata
1081
+ + """
1082
  let data = """
1083
  + pred_mol
1084
  + """
1085
  let pdb = `"""
1086
  + mol
1087
  + """`
1088
+ var selectedResidues = """
1089
+ + f"{selectedResidues}"
1090
+ + """
1091
  //AlphaFold code from https://gist.github.com/piroyon/30d1c1099ad488a7952c3b21a5bebc96
1092
  let colorAlpha = function (atom) {
1093
  if (atom.b < 50) {
 
1102
  };
1103
 
1104
  let colors = {}
1105
+ for (let i=0; i<"""
1106
+ + str(num_res)
1107
+ + """;i++){
1108
  if (selectedResidues.includes(i)){
1109
  colors[i]="hotpink"
1110
  }else{
 
1199
 
1200
 
1201
  def set_examples(example):
1202
+ (
1203
+ label,
1204
+ inp,
1205
+ designed_chain,
1206
+ fixed_chain,
1207
+ homomer,
1208
+ num_seqs,
1209
+ sampling_temp,
1210
+ atomsel,
1211
+ ) = example
1212
  return [
1213
  label,
1214
  inp,
 
1217
  homomer,
1218
  gr.Slider.update(value=num_seqs),
1219
  gr.Radio.update(value=sampling_temp),
1220
+ atomsel,
1221
  ]
1222
 
1223
 
 
1254
  value=0.1,
1255
  label="Sampling temperature",
1256
  )
1257
+ gr.Markdown(
1258
+ """ Sampling temperature for amino acids, `T=0.0` means taking argmax, `T>>1.0` means sample randomly. Suggested values `0.1, 0.15, 0.2, 0.25, 0.3`. Higher values will lead to more diversity.
1259
  """
1260
  )
1261
  with gr.Row():
 
1277
  gr.Markdown(
1278
  "for correct symmetric tying lenghts of homomer chains should be the same"
1279
  )
1280
+ gr.Markdown("## Fixed positions")
1281
+ gr.Markdown(
1282
+ """You can fix important positions in the protein. Resid should be specified with the same numbering as in the input pdb file. The fixed residues will be highlighted in the output.
1283
  The [VMD selection](http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html) synthax is used. You can also select based on ligands or chains in the input structure to specify interfaces to be fixed.
1284
 
1285
  - <code>within 5 of resid 94</code> All residues that have >1 atom closer than 5 Å to any atom of residue 94
 
1293
  - <code>pLDDT >70 </code> Redesign all residues with low pLDDT
1294
 
1295
  Note that <code>sasa</code> and <code>pLDDT</code> selectors modify default VMD behavior. SASA is calculated using moleculekit and written to the mass attribute. Selections based on mass do not work.
1296
+ pLDDT is an alias for beta, it only works correctly with structures that contain the appropriate values in the beta column of the PDB file. """
1297
+ )
1298
+ atomsel = gr.Textbox(
1299
+ placeholder="Specify atom selection ", label="Fixed positions"
1300
+ )
1301
 
1302
  btn = gr.Button("Run")
1303
  label = gr.Textbox(label="Label", visible=False)
 
1310
  homomer,
1311
  num_seqs,
1312
  sampling_temp,
1313
+ atomsel,
1314
  ],
1315
  samples=[
1316
+ ["Homomer design", "1O91", "A,B,C", "", True, 2, 0.1, ""],
1317
+ ["Monomer design", "6MRR", "A", "", False, 2, 0.1, ""],
1318
  ["Redesign of Homomer to Heteromer", "3HTN", "A,B", "C", False, 2, 0.1, ""],
1319
+ [
1320
+ "Redesign of MID1 scaffold keeping binding site fixed",
1321
+ "3V1C",
1322
+ "A,B",
1323
+ "",
1324
+ False,
1325
+ 2,
1326
+ 0.1,
1327
+ "within 5 of resname ZN",
1328
+ ],
1329
+ [
1330
+ "Redesign of DNA binding protein",
1331
+ "3JRD",
1332
+ "A,B",
1333
+ "",
1334
+ False,
1335
+ 2,
1336
+ 0.1,
1337
+ "within 8 of nucleic",
1338
+ ],
1339
+ [
1340
+ "Surface Redesign of miniprotein",
1341
+ "7JZM",
1342
+ "A,B",
1343
+ "",
1344
+ False,
1345
+ 2,
1346
+ 0.1,
1347
+ "chain B or (chain A and sasa < 20)",
1348
+ ],
1349
  ],
1350
  )
1351
 
 
1352
  gr.Markdown("# Output")
1353
 
1354
  with gr.Tabs():
 
1383
  with gr.Row():
1384
  with gr.Row():
1385
  chosen_seq = gr.Dropdown(
1386
+ choices=[],
1387
+ label="Select a sequence for validation",
1388
+ visible=False,
1389
  )
1390
  num_recycles = gr.Dropdown(
1391
  choices=[0, 1, 3, 5], value=3, label="num Recycles"
 
1395
  mol = gr.HTML()
1396
  with gr.Column():
1397
  gr.Markdown("## Metrics")
1398
+ p = {
1399
+ 0: {
1400
+ "Seq": "NA",
1401
+ "RMSD": "NA",
1402
+ "Score": "NA",
1403
+ "Recovery": "NA",
1404
+ "Mean pLDDT": "NA",
1405
+ }
1406
+ }
1407
+ placeholder = pd.DataFrame.from_dict(p, orient="index")
1408
+ results = gr.Dataframe(
1409
+ placeholder,
1410
+ interactive=False,
1411
+ row_count=(1, "dynamic"),
1412
+ headers=["Seq", "RMSD", "Score", "Recovery", "Mean pLDDT"],
1413
+ )
1414
  plotAF_plddt = gr.Plot(label="pLDDT")
1415
  # remove maxh80 class from css
1416
+ plotAF_pae = gr.Gallery(label="PAE plots") # gr.Plot(label="PAE")
1417
  tempFile = gr.Variable()
1418
  selectedResidues = gr.Variable()
1419
  seq_dict = gr.Variable()
 
1440
  tempFile,
1441
  chosen_seq,
1442
  selectedResidues,
1443
+ seq_dict,
1444
  ],
1445
  )
1446
  btnAF.click(
requirements.txt CHANGED
@@ -6,8 +6,8 @@ dm-haiku==0.0.5
6
  dm-tree==0.1.6
7
  docker==5.0.0
8
  immutabledict==2.0.0
9
- https://storage.googleapis.com/jax-releases/jax/jax-0.3.7.tar.gz
10
- https://storage.googleapis.com/jax-releases/cuda11/jaxlib-0.3.7+cuda11.cudnn805-cp38-none-manylinux2014_x86_64.whl
11
  ml-collections==0.1.0
12
  numpy
13
  pandas==1.3.4
@@ -18,7 +18,7 @@ plotly
18
  GPUtil
19
  ray
20
  tqdm
21
- gradio==3.0.11
22
  protobuf<4
23
  mdtraj
24
  -f https://storage.googleapis.com/jax-releases/jax_releases.html
 
6
  dm-tree==0.1.6
7
  docker==5.0.0
8
  immutabledict==2.0.0
9
+ jax==0.3.7
10
+ jaxlib==0.3.7
11
  ml-collections==0.1.0
12
  numpy
13
  pandas==1.3.4
 
18
  GPUtil
19
  ray
20
  tqdm
21
+ gradio==3.1.4
22
  protobuf<4
23
  mdtraj
24
  -f https://storage.googleapis.com/jax-releases/jax_releases.html