Spaces:

wenkai
/

FAPM_demo

Sleeping

App Files Files Community

wenkai commited on Jun 25

Commit

72b0e49

•

1 Parent(s): d3edc5a

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -70

app.py CHANGED Viewed

@@ -1,44 +1,41 @@
-import os
-import torch
-import torch.nn as nn
-import pandas as pd
-import torch.nn.functional as F
-from lavis.models.protein_models.protein_function_opt import Blip2ProteinMistral
-from lavis.models.base_model import FAPMConfig
-import spaces
 import gradio as gr
-from esm_scripts.extract import run_demo
 from esm import pretrained, FastaBatchedDataset
-# from transformers import EsmTokenizer, EsmModel
-# Load the model
-model = Blip2ProteinMistral(config=FAPMConfig(), esm_size='3b')
-model.load_checkpoint("model/checkpoint_mf2.pth")
-# model.to('cuda')
-model_esm, alphabet = pretrained.load_model_and_alphabet('esm2_t36_3B_UR50D')
-# model_esm.to('cuda')
-model_esm.eval()
-# tokenizer = EsmTokenizer.from_pretrained("facebook/esm2_t36_3B_UR50D")
-# model_esm = EsmModel.from_pretrained("facebook/esm2_t36_3B_UR50D")
-# model_esm.to('cuda')
-# model_esm.eval()
-@spaces.GPU
-def generate_caption(protein, prompt):
-    # Process the image and the prompt
-    # with open('/home/user/app/example.fasta', 'w') as f:
-    #     f.write('>{}\n'.format("protein_name"))
-    #     f.write('{}\n'.format(protein.strip()))
-    # os.system("python esm_scripts/extract.py esm2_t36_3B_UR50D /home/user/app/example.fasta /home/user/app --repr_layers 36 --truncation_seq_length 1024 --include per_tok")
-    # esm_emb = run_demo(protein_name='protein_name', protein_seq=protein,
-    #                    model=model_esm, alphabet=alphabet,
-    #                    include='per_tok', repr_layers=[36], truncation_seq_length=1024)
     protein_name = 'protein_name'
     protein_seq = protein
     include = 'per_tok'
@@ -51,8 +48,6 @@ def generate_caption(protein, prompt):
     batches = dataset.get_batch_indices(toks_per_batch, extra_toks_per_seq=1)
     print("batches prepared")
-    model_esm.to('cuda')
     data_loader = torch.utils.data.DataLoader(
         dataset, collate_fn=alphabet.get_batch_converter(truncation_seq_length), batch_sampler=batches
     )
@@ -70,7 +65,6 @@ def generate_caption(protein, prompt):
             if torch.cuda.is_available():
                 toks = toks.to(device="cuda", non_blocking=True)
             out = model_esm(toks, repr_layers=repr_layers, return_contacts=return_contacts)
-            logits = out["logits"].to(device="cpu")
             representations = {
                 layer: t.to(device="cpu") for layer, t in out["representations"].items()
             }
@@ -105,39 +99,40 @@ def generate_caption(protein, prompt):
     esm_emb = outputs.last_hidden_state.detach()[0]
     '''
     print("esm embedding generated")
-    esm_emb = F.pad(esm_emb.t(), (0, 1024 - len(esm_emb))).t().to('cuda')
-    print("esm embedding processed")
-    samples = {'name': ['protein_name'],
-               'image': torch.unsqueeze(esm_emb, dim=0),
-               'text_input': ['none'],
-               'prompt': [prompt]}
-    del model_esm
-    model.to('cuda')
-    # Generate the output
-    prediction = model.generate(samples, length_penalty=0., num_beams=15, num_captions=10, temperature=1.,
-                                repetition_penalty=1.0)
-    return prediction
-    # return "test"
-# Define the FAPM interface
-description = """Quick demonstration of the FAPM model for protein function prediction. Upload an protein sequence to generate a function description. Modify the Prompt to provide the taxonomy information.
-The model used in this app is available at [Hugging Face Model Hub](https://huggingface.co/wenkai/FAPM) and the source code can be found on [GitHub](https://github.com/xiangwenkai/FAPM/tree/main)."""
-iface = gr.Interface(
-    fn=generate_caption,
-    inputs=[gr.Textbox(type="text", label="Upload sequence"), gr.Textbox(type="text", label="Prompt")],
-    outputs=gr.Textbox(label="Generated description"),
-    description=description
-)
-# Launch the interface
-iface.launch()

 import gradio as gr
+from transformers import AutoProcessor, AutoModelForCausalLM
+import spaces
+import torch.nn.functional as F
+import requests
+import copy
+import torch
+from PIL import Image, ImageDraw, ImageFont
+import io
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+import random
+import numpy as np
 from esm import pretrained, FastaBatchedDataset
+models = {
+    'facebook/esm2_t36_3B_UR50D': pretrained.load_model_and_alphabet('esm2_t36_3B_UR50D').to("cuda").eval(),
+    }
+processors = {
+    'microsoft/Florence-2-large-ft': AutoProcessor.from_pretrained('microsoft/Florence-2-large-ft', trust_remote_code=True),
+    'microsoft/Florence-2-large': AutoProcessor.from_pretrained('microsoft/Florence-2-large', trust_remote_code=True),
+    'microsoft/Florence-2-base-ft': AutoProcessor.from_pretrained('microsoft/Florence-2-base-ft', trust_remote_code=True),
+    'microsoft/Florence-2-base': AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True),
+}
+DESCRIPTION = "Esm2 embedding"
+colormap = ['blue','orange','green','purple','brown','pink','gray','olive','cyan','red',
+            'lime','indigo','violet','aqua','magenta','coral','gold','tan','skyblue']
+@spaces.GPU
+def run_example(protein, model_id='facebook/esm2_t36_3B_UR50D'):
+    model_esm, alphabet = models[model_id]
     protein_name = 'protein_name'
     protein_seq = protein
     include = 'per_tok'
     batches = dataset.get_batch_indices(toks_per_batch, extra_toks_per_seq=1)
     print("batches prepared")
     data_loader = torch.utils.data.DataLoader(
         dataset, collate_fn=alphabet.get_batch_converter(truncation_seq_length), batch_sampler=batches
     )
             if torch.cuda.is_available():
                 toks = toks.to(device="cuda", non_blocking=True)
             out = model_esm(toks, repr_layers=repr_layers, return_contacts=return_contacts)
             representations = {
                 layer: t.to(device="cpu") for layer, t in out["representations"].items()
             }
     esm_emb = outputs.last_hidden_state.detach()[0]
     '''
     print("esm embedding generated")
+    esm_emb = F.pad(esm_emb.t(), (0, 1024 - len(esm_emb))).t()
+    torch.save(esm_emb, 'example.pt')
+    return gr.File.update(value="example.pt", visible=True)
+css = """
+  #output {
+    height: 500px;
+    overflow: auto;
+    border: 1px solid #ccc;
+  }
+"""
+with gr.Blocks(css=css) as demo:
+    gr.Markdown(DESCRIPTION)
+    with gr.Tab(label="Esm2 embedding generation"):
+        with gr.Row():
+            with gr.Column():
+                input_protein = gr.Textbox(type="text", label="Upload sequence")
+                model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value='microsoft/Florence-2-large')
+                submit_btn = gr.Button(value="Submit")
+            with gr.Column():
+                button = gr.Button("Export")
+                pt = gr.File(interactive=False, visible=False)
+        # gr.Examples(
+        #     examples=[
+        #         ["image1.jpg", 'Object Detection'],
+        #     ],
+        #     inputs=[input_img, task_prompt],
+        #     outputs=[output_text, output_img],
+        #     fn=process_image,
+        #     cache_examples=True,
+        #     label='Try examples'
+        # )
+        button.click(run_example, [input_protein, model_selector], pt)
+demo.launch(debug=True)