colbyford commited on
Commit
d0c2974
1 Parent(s): ed36f4f

Ensure all ints are ints

Browse files
Files changed (2) hide show
  1. README.md +4 -4
  2. app.py +11 -8
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Evodiff
3
- emoji: 🐠
4
- colorFrom: pink
5
- colorTo: pink
6
  sdk: gradio
7
  sdk_version: 3.45.2
8
  app_file: app.py
 
1
  ---
2
+ title: EvoDiff
3
+ emoji: 🧬
4
+ colorFrom: blue
5
+ colorTo: orange
6
  sdk: gradio
7
  sdk_version: 3.45.2
8
  app_file: app.py
app.py CHANGED
@@ -70,12 +70,12 @@ def make_uncond_seq(seq_len, model_type, pred_structure):
70
  if model_type == "EvoDiff-Seq-OADM 38M":
71
  checkpoint = OA_DM_38M()
72
  model, collater, tokenizer, scheme = checkpoint
73
- tokeinzed_sample, generated_sequence = generate_oaardm(model, tokenizer, seq_len, batch_size=1, device='cpu')
74
 
75
  if model_type == "EvoDiff-D3PM-Uniform 38M":
76
  checkpoint = D3PM_UNIFORM_38M(return_all=True)
77
  model, collater, tokenizer, scheme, timestep, Q_bar, Q = checkpoint
78
- tokeinzed_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, seq_len, batch_size=1, device='cpu')
79
 
80
  if pred_structure:
81
  path_to_pdb = predict_protein(generated_sequence)
@@ -85,11 +85,11 @@ def make_uncond_seq(seq_len, model_type, pred_structure):
85
  else:
86
  return generated_sequence, None
87
 
88
- def make_cond_seq(seq_len, msa_file, model_type, pred_structure):
89
  if model_type == "EvoDiff-MSA":
90
  checkpoint = MSA_OA_DM_MAXSUB()
91
  model, collater, tokenizer, scheme = checkpoint
92
- tokeinzed_sample, generated_sequence = generate_query_oadm_msa_simple(msa_file.name, model, tokenizer, n_sequences=64, seq_length=seq_len, device='cpu', selection_type='random')
93
 
94
  if pred_structure:
95
  path_to_pdb = predict_protein(generated_sequence)
@@ -103,7 +103,7 @@ def make_inpainted_idrs(sequence, start_idx, end_idx, model_type, pred_structure
103
  if model_type == "EvoDiff-Seq":
104
  checkpoint = OA_DM_38M()
105
  model, collater, tokenizer, scheme = checkpoint
106
- sample, entire_sequence, generated_idr = inpaint_simple(model, sequence, start_idx, end_idx, tokenizer=tokenizer, device='cpu')
107
 
108
  generated_idr_output = {
109
  "original_sequence": sequence,
@@ -125,6 +125,8 @@ def make_scaffold_motifs(pdb_code, start_idx, end_idx, scaffold_length, model_ty
125
  checkpoint = OA_DM_38M()
126
  model, collater, tokenizer, scheme = checkpoint
127
  data_top_dir = './'
 
 
128
  generated_sequence, new_start_idx, new_end_idx = generate_scaffold(model, pdb_code, start_idx, end_idx, scaffold_length, data_top_dir, tokenizer, device='cpu')
129
 
130
  generated_scaffold_output = {
@@ -162,6 +164,7 @@ csg_app = gr.Interface(
162
  inputs=[
163
  gr.Slider(10, 100, label = "Sequence Length"),
164
  gr.File(file_types=["a3m"], label = "MSA File"),
 
165
  gr.Dropdown(["EvoDiff-MSA"], value="EvoDiff-MSA", type="value", label = "Model"),
166
  gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
167
  ],
@@ -178,8 +181,8 @@ idr_app = gr.Interface(
178
  fn=make_inpainted_idrs,
179
  inputs=[
180
  gr.Textbox(placeholder="DQTERTVRSFEGRRTAPYLDSRNVLTIGYGHLLNRPGANKSWEGRLTSALPREFKQRLTELAASQLHETDVRLATARAQALYGSGAYFESVPVSLNDLWFDSVFNLGERKLLNWSGLRTKLESRDWGAAAKDLGRHTFGREPVSRRMAESMRMRRGIDLNHYNI", label = "Sequence"),
181
- gr.Number(value=20, placeholder=20, label = "Start Index"),
182
- gr.Number(value=50, placeholder=50, label = "End Index"),
183
  gr.Dropdown(["EvoDiff-Seq"], value="EvoDiff-Seq", type="value", label = "Model"),
184
  gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
185
  ],
@@ -197,7 +200,7 @@ scaffold_app = gr.Interface(
197
  gr.Textbox(placeholder="1prw", label = "PDB Code"),
198
  gr.Textbox(value="[15, 51]", placeholder="[15, 51]", label = "Start Index (as list)"),
199
  gr.Textbox(value="[34, 70]", placeholder="[34, 70]", label = "End Index (as list)"),
200
- gr.Number(value=75, placeholder=75, label = "Scaffold Length"),
201
  gr.Dropdown(["EvoDiff-Seq", "EvoDiff-MSA"], value="EvoDiff-Seq", type="value", label = "Model"),
202
  gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
203
  ],
 
70
  if model_type == "EvoDiff-Seq-OADM 38M":
71
  checkpoint = OA_DM_38M()
72
  model, collater, tokenizer, scheme = checkpoint
73
+ tokeinzed_sample, generated_sequence = generate_oaardm(model, tokenizer, int(seq_len), batch_size=1, device='cpu')
74
 
75
  if model_type == "EvoDiff-D3PM-Uniform 38M":
76
  checkpoint = D3PM_UNIFORM_38M(return_all=True)
77
  model, collater, tokenizer, scheme, timestep, Q_bar, Q = checkpoint
78
+ tokeinzed_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, int(seq_len), batch_size=1, device='cpu')
79
 
80
  if pred_structure:
81
  path_to_pdb = predict_protein(generated_sequence)
 
85
  else:
86
  return generated_sequence, None
87
 
88
+ def make_cond_seq(seq_len, msa_file, n_sequences, model_type, pred_structure):
89
  if model_type == "EvoDiff-MSA":
90
  checkpoint = MSA_OA_DM_MAXSUB()
91
  model, collater, tokenizer, scheme = checkpoint
92
+ tokeinzed_sample, generated_sequence = generate_query_oadm_msa_simple(msa_file.name, model, tokenizer, int(n_sequences), seq_length=int(seq_len), device='cpu', selection_type='random')
93
 
94
  if pred_structure:
95
  path_to_pdb = predict_protein(generated_sequence)
 
103
  if model_type == "EvoDiff-Seq":
104
  checkpoint = OA_DM_38M()
105
  model, collater, tokenizer, scheme = checkpoint
106
+ sample, entire_sequence, generated_idr = inpaint_simple(model, sequence, int(start_idx), int(end_idx), tokenizer=tokenizer, device='cpu')
107
 
108
  generated_idr_output = {
109
  "original_sequence": sequence,
 
125
  checkpoint = OA_DM_38M()
126
  model, collater, tokenizer, scheme = checkpoint
127
  data_top_dir = './'
128
+ start_idx = list(map(int, start_idx.strip('][').split(', ')))
129
+ end_idx = list(map(int, end_idx.strip('][').split(', ')))
130
  generated_sequence, new_start_idx, new_end_idx = generate_scaffold(model, pdb_code, start_idx, end_idx, scaffold_length, data_top_dir, tokenizer, device='cpu')
131
 
132
  generated_scaffold_output = {
 
164
  inputs=[
165
  gr.Slider(10, 100, label = "Sequence Length"),
166
  gr.File(file_types=["a3m"], label = "MSA File"),
167
+ gr.Number(value=1, placeholder=1, precision=0, label = "Number of Sequences")
168
  gr.Dropdown(["EvoDiff-MSA"], value="EvoDiff-MSA", type="value", label = "Model"),
169
  gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
170
  ],
 
181
  fn=make_inpainted_idrs,
182
  inputs=[
183
  gr.Textbox(placeholder="DQTERTVRSFEGRRTAPYLDSRNVLTIGYGHLLNRPGANKSWEGRLTSALPREFKQRLTELAASQLHETDVRLATARAQALYGSGAYFESVPVSLNDLWFDSVFNLGERKLLNWSGLRTKLESRDWGAAAKDLGRHTFGREPVSRRMAESMRMRRGIDLNHYNI", label = "Sequence"),
184
+ gr.Number(value=20, placeholder=20, precision=0, label = "Start Index"),
185
+ gr.Number(value=50, placeholder=50, precision=0, label = "End Index"),
186
  gr.Dropdown(["EvoDiff-Seq"], value="EvoDiff-Seq", type="value", label = "Model"),
187
  gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
188
  ],
 
200
  gr.Textbox(placeholder="1prw", label = "PDB Code"),
201
  gr.Textbox(value="[15, 51]", placeholder="[15, 51]", label = "Start Index (as list)"),
202
  gr.Textbox(value="[34, 70]", placeholder="[34, 70]", label = "End Index (as list)"),
203
+ gr.Number(value=75, placeholder=75, precision=0, label = "Scaffold Length"),
204
  gr.Dropdown(["EvoDiff-Seq", "EvoDiff-MSA"], value="EvoDiff-Seq", type="value", label = "Model"),
205
  gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
206
  ],