wenkai commited on
Commit
6b16660
1 Parent(s): 061f874

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -44
app.py CHANGED
@@ -7,7 +7,6 @@ from lavis.models.protein_models.protein_function_opt import Blip2ProteinMistral
7
  from lavis.models.base_model import FAPMConfig
8
  import spaces
9
  import gradio as gr
10
- # from esm_scripts.extract import run_demo
11
  from esm import pretrained, FastaBatchedDataset
12
  from data.evaluate_data.utils import Ontology
13
  import difflib
@@ -15,30 +14,9 @@ import re
15
 
16
 
17
  # Load the model
18
- # model = Blip2ProteinMistral(config=FAPMConfig(), esm_size='3b')
19
- # model.load_checkpoint("model/checkpoint_mf2.pth")
20
- # model.to('cuda')
21
-
22
- def get_model(type='Molecule Function'):
23
- model = Blip2ProteinMistral(config=FAPMConfig(), esm_size='3b')
24
- if type == 'Molecule Function':
25
- model.load_checkpoint("model/checkpoint_mf2.pth")
26
- model.to('cuda')
27
- elif type == 'Biological Process':
28
- model.load_checkpoint("model/checkpoint_bp1.pth")
29
- model.to('cuda')
30
- # elif type == 'Cellar Component':
31
- # model.load_checkpoint("model/checkpoint_cc2.pth")
32
- # model.to('cuda')
33
- return model
34
-
35
-
36
- models = {
37
- 'Molecule Function': get_model('Molecule Function'),
38
- 'Biological Process': get_model('Biological Process'),
39
- # 'Cellar Component': get_model('Cellar Component'),
40
- }
41
-
42
 
43
  model_esm, alphabet = pretrained.load_model_and_alphabet('esm2_t36_3B_UR50D')
44
  model_esm.to('cuda')
@@ -61,7 +39,7 @@ choices = {x.lower(): x for x in choices_mf}
61
 
62
 
63
  @spaces.GPU
64
- def generate_caption(model_id, protein, prompt):
65
  # Process the image and the prompt
66
  # with open('/home/user/app/example.fasta', 'w') as f:
67
  # f.write('>{}\n'.format("protein_name"))
@@ -144,7 +122,6 @@ def generate_caption(model_id, protein, prompt):
144
  'text_input': ['none'],
145
  'prompt': [prompt]}
146
 
147
- model = models[model_id]
148
  # Generate the output
149
  prediction = model.generate(samples, length_penalty=0., num_beams=15, num_captions=10, temperature=1.,
150
  repetition_penalty=1.0)
@@ -162,15 +139,13 @@ def generate_caption(model_id, protein, prompt):
162
  if t_standard not in temp:
163
  pred_terms.append(t_standard+f'({prob})')
164
  temp.append(t_standard)
165
- res_str = "No available predictions for this protein! You can try the other two types of model or remove prompt."
 
 
 
166
  if len(pred_terms) == 0:
167
  return res_str
168
- if model_id == 'Molecule Function':
169
- res_str = f"Based on the given amino acid sequence, the protein appears to have a primary function of {', '.join(pred_terms)}"
170
- elif model_id == 'Biological Process':
171
- res_str = f"Based on the given amino acid sequence, it is likely involved in the {', '.join(pred_terms)}"
172
- elif model_id == 'Cellar Component':
173
- res_str = f"Based on the given amino acid sequence, it's subcellular localization is within the {', '.join(pred_terms)}"
174
  return res_str
175
  # return "test"
176
 
@@ -180,6 +155,7 @@ description = """Quick demonstration of the FAPM model for protein function pred
180
 
181
  The model used in this app is available at [Hugging Face Model Hub](https://huggingface.co/wenkai/FAPM) and the source code can be found on [GitHub](https://github.com/xiangwenkai/FAPM/tree/main)."""
182
 
 
183
  # iface = gr.Interface(
184
  # fn=generate_caption,
185
  # inputs=[gr.Textbox(type="text", label="Upload sequence"), gr.Textbox(type="text", label="Prompt")],
@@ -189,6 +165,7 @@ The model used in this app is available at [Hugging Face Model Hub](https://hugg
189
  # # Launch the interface
190
  # iface.launch()
191
 
 
192
  css = """
193
  #output {
194
  height: 500px;
@@ -202,29 +179,30 @@ with gr.Blocks(css=css) as demo:
202
  with gr.Tab(label="Protein caption"):
203
  with gr.Row():
204
  with gr.Column():
205
- model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value='Molecule Function')
206
  input_protein = gr.Textbox(type="text", label="Upload sequence")
 
207
  prompt = gr.Textbox(type="text", label="Taxonomy Prompt (Optional)")
208
  submit_btn = gr.Button(value="Submit")
209
  with gr.Column():
210
  output_text = gr.Textbox(label="Output Text")
211
- # O14813 train index 127, 266, 738, 1060 test index 4
212
  gr.Examples(
213
  examples=[
214
- ["Molecule Function", "MDYSYLNSYDSCVAAMEASAYGDFGACSQPGGFQYSPLRPAFPAAGPPCPALGSSNCALGALRDHQPAPYSAVPYKFFPEPSGLHEKRKQRRIRTTFTSAQLKELERVFAETHYPDIYTREELALKIDLTEARVQVWFQNRRAKFRKQERAASAKGAAGAAGAKKGEARCSSEDDDSKESTCSPTPDSTASLPPPPAPGLASPRLSPSPLPVALGSGPGPGPGPQPLKGALWAGVAGGGGGGPGAGAAELLKAWQPAESGPGPFSGVLSSFHRKPGPALKTNLF", ''],
215
- ["Molecule Function", "MKTLALFLVLVCVLGLVQSWEWPWNRKPTKFPIPSPNPRDKWCRLNLGPAWGGRC", ''],
216
- ["Molecule Function", "MAAAGGARLLRAASAVLGGPAGRWLHHAGSRAGSSGLLRNRGPGGSAEASRSLSVSARARSSSEDKITVHFINRDGETLTTKGKVGDSLLDVVVENNLDIDGFGACEGTLACSTCHLIFEDHIYEKLDAITDEENDMLDLAYGLTDRSRLGCQICLTKSMDNMTVRVPETVADARQSIDVGKTS", 'Homo'],
217
- ["Molecule Function", 'MASAELSREENVYMAKLAEQAERYEEMVEFMEKVAKTVDSEELTVEERNLLSVAYKNVIGARRASWRIISSIEQKEEGRGNEDRVTLIKDYRGKIETELTKICDGILKLLETHLVPSSTAPESKVFYLKMKGDYYRYLAEFKTGAERKDAAENTMVAYKAAQDIALAELAPTHPIRLGLALNFSVFYYEILNSPDRACSLAKQAFDEAISELDTLSEESYKDSTLIMQLLRDNLTLWTSDISEDPAEEIREAPKRDSSEGQ', 'Zea'],
218
- ["Molecule Function", 'MIKAAVTKESLYRMNTLMEAFQGFLGLDLGEFTFKVKPGVFLLTDVKSYLIGDKYDDAFNALIDFVLRNDRDAVEGTETDVSIRLGLSPSDMVVKRQDKTFTFTHGDLEFEVHWINL', 'Bacteriophage'],
219
- ["Molecule Function", 'MNDLMIQLLDQFEMGLRERAIKVMATINDEKHRFPMELNKKQCSLMLLGTTDTTTFDMRFNSKKDFPRIKGAREKYPRDAVIEWYHQNWMRTEVKQ', 'Bacteriophage'],
220
  ],
221
- inputs=[model_selector, input_protein, prompt],
222
  outputs=[output_text],
223
  fn=generate_caption,
224
  cache_examples=True,
225
  label='Try examples'
226
  )
227
- submit_btn.click(generate_caption, [model_selector, input_protein, prompt], [output_text])
 
228
 
229
  demo.launch(debug=True)
230
 
 
7
  from lavis.models.base_model import FAPMConfig
8
  import spaces
9
  import gradio as gr
 
10
  from esm import pretrained, FastaBatchedDataset
11
  from data.evaluate_data.utils import Ontology
12
  import difflib
 
14
 
15
 
16
  # Load the model
17
+ model = Blip2ProteinMistral(config=FAPMConfig(), esm_size='3b')
18
+ model.load_checkpoint("model/checkpoint_mf2.pth")
19
+ model.to('cuda')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  model_esm, alphabet = pretrained.load_model_and_alphabet('esm2_t36_3B_UR50D')
22
  model_esm.to('cuda')
 
39
 
40
 
41
  @spaces.GPU
42
+ def generate_caption(protein, prompt):
43
  # Process the image and the prompt
44
  # with open('/home/user/app/example.fasta', 'w') as f:
45
  # f.write('>{}\n'.format("protein_name"))
 
122
  'text_input': ['none'],
123
  'prompt': [prompt]}
124
 
 
125
  # Generate the output
126
  prediction = model.generate(samples, length_penalty=0., num_beams=15, num_captions=10, temperature=1.,
127
  repetition_penalty=1.0)
 
139
  if t_standard not in temp:
140
  pred_terms.append(t_standard+f'({prob})')
141
  temp.append(t_standard)
142
+ if prompt == 'none':
143
+ res_str = "No available predictions for this protein, you can try to remove prompt!"
144
+ else:
145
+ res_str = "No available predictions for this protein, you can try another protein sequence!"
146
  if len(pred_terms) == 0:
147
  return res_str
148
+ res_str = f"Based on the given amino acid sequence, the protein appears to have a primary function of {', '.join(pred_terms)}"
 
 
 
 
 
149
  return res_str
150
  # return "test"
151
 
 
155
 
156
  The model used in this app is available at [Hugging Face Model Hub](https://huggingface.co/wenkai/FAPM) and the source code can be found on [GitHub](https://github.com/xiangwenkai/FAPM/tree/main)."""
157
 
158
+
159
  # iface = gr.Interface(
160
  # fn=generate_caption,
161
  # inputs=[gr.Textbox(type="text", label="Upload sequence"), gr.Textbox(type="text", label="Prompt")],
 
165
  # # Launch the interface
166
  # iface.launch()
167
 
168
+
169
  css = """
170
  #output {
171
  height: 500px;
 
179
  with gr.Tab(label="Protein caption"):
180
  with gr.Row():
181
  with gr.Column():
 
182
  input_protein = gr.Textbox(type="text", label="Upload sequence")
183
+ # model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value='microsoft/Florence-2-large')
184
  prompt = gr.Textbox(type="text", label="Taxonomy Prompt (Optional)")
185
  submit_btn = gr.Button(value="Submit")
186
  with gr.Column():
187
  output_text = gr.Textbox(label="Output Text")
188
+ # train index 127, 266, 738, 1060 test index 4
189
  gr.Examples(
190
  examples=[
191
+ ["MDYSYLNSYDSCVAAMEASAYGDFGACSQPGGFQYSPLRPAFPAAGPPCPALGSSNCALGALRDHQPAPYSAVPYKFFPEPSGLHEKRKQRRIRTTFTSAQLKELERVFAETHYPDIYTREELALKIDLTEARVQVWFQNRRAKFRKQERAASAKGAAGAAGAKKGEARCSSEDDDSKESTCSPTPDSTASLPPPPAPGLASPRLSPSPLPVALGSGPGPGPGPQPLKGALWAGVAGGGGGGPGAGAAELLKAWQPAESGPGPFSGVLSSFHRKPGPALKTNLF", ''],
192
+ ["MKTLALFLVLVCVLGLVQSWEWPWNRKPTKFPIPSPNPRDKWCRLNLGPAWGGRC", ''],
193
+ ["MAAAGGARLLRAASAVLGGPAGRWLHHAGSRAGSSGLLRNRGPGGSAEASRSLSVSARARSSSEDKITVHFINRDGETLTTKGKVGDSLLDVVVENNLDIDGFGACEGTLACSTCHLIFEDHIYEKLDAITDEENDMLDLAYGLTDRSRLGCQICLTKSMDNMTVRVPETVADARQSIDVGKTS", 'Homo'],
194
+ ['MASAELSREENVYMAKLAEQAERYEEMVEFMEKVAKTVDSEELTVEERNLLSVAYKNVIGARRASWRIISSIEQKEEGRGNEDRVTLIKDYRGKIETELTKICDGILKLLETHLVPSSTAPESKVFYLKMKGDYYRYLAEFKTGAERKDAAENTMVAYKAAQDIALAELAPTHPIRLGLALNFSVFYYEILNSPDRACSLAKQAFDEAISELDTLSEESYKDSTLIMQLLRDNLTLWTSDISEDPAEEIREAPKRDSSEGQ', 'Zea'],
195
+ ['MIKAAVTKESLYRMNTLMEAFQGFLGLDLGEFTFKVKPGVFLLTDVKSYLIGDKYDDAFNALIDFVLRNDRDAVEGTETDVSIRLGLSPSDMVVKRQDKTFTFTHGDLEFEVHWINL', 'Bacteriophage'],
196
+ ['MNDLMIQLLDQFEMGLRERAIKVMATINDEKHRFPMELNKKQCSLMLLGTTDTTTFDMRFNSKKDFPRIKGAREKYPRDAVIEWYHQNWMRTEVKQ', 'Bacteriophage'],
197
  ],
198
+ inputs=[input_protein, prompt],
199
  outputs=[output_text],
200
  fn=generate_caption,
201
  cache_examples=True,
202
  label='Try examples'
203
  )
204
+
205
+ submit_btn.click(generate_caption, [input_protein, prompt], [output_text])
206
 
207
  demo.launch(debug=True)
208