Spaces:

wenkai
/

FAPM_demo

Running on Zero

App Files Files Community

wenkai commited on 25 days ago

Commit

1a0324b

•

1 Parent(s): cdf31f1

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -20

app.py CHANGED Viewed

@@ -9,17 +9,20 @@ import spaces
 import gradio as gr
 from esm_scripts.extract import run_demo
 from esm import pretrained, FastaBatchedDataset
 # from transformers import EsmTokenizer, EsmModel
 # Load the model
-model = Blip2ProteinMistral(config=FAPMConfig(), esm_size='3b')
-model.load_checkpoint("model/checkpoint_mf2.pth")
-model.to('cuda')
-model_esm, alphabet = pretrained.load_model_and_alphabet('esm2_t36_3B_UR50D')
-model_esm.to('cuda')
-model_esm.eval()
 # tokenizer = EsmTokenizer.from_pretrained("facebook/esm2_t36_3B_UR50D")
 # model_esm = EsmModel.from_pretrained("facebook/esm2_t36_3B_UR50D")
 # model_esm.to('cuda')
@@ -32,22 +35,26 @@ def generate_caption(protein, prompt):
     #     f.write('>{}\n'.format("protein_name"))
     #     f.write('{}\n'.format(protein.strip()))
     # os.system("python esm_scripts/extract.py esm2_t36_3B_UR50D /home/user/app/example.fasta /home/user/app --repr_layers 36 --truncation_seq_length 1024 --include per_tok")
-    # esm_emb = run_demo(protein_name='protein_name', protein_seq=protein,
-    #                    model=model_esm, alphabet=alphabet,
     #                    include='per_tok', repr_layers=[36], truncation_seq_length=1024)
-    protein_name='protein_name'
-    protein_seq=protein
-    include='per_tok'
-    repr_layers=[36]
-    truncation_seq_length=1024
-    toks_per_batch=4096
     print("start")
     dataset = FastaBatchedDataset([protein_name], [protein_seq])
     print("dataset prepared")
     batches = dataset.get_batch_indices(toks_per_batch, extra_toks_per_seq=1)
     print("batches prepared")
     data_loader = torch.utils.data.DataLoader(
         dataset, collate_fn=alphabet.get_batch_converter(truncation_seq_length), batch_sampler=batches
     )
@@ -78,12 +85,12 @@ def generate_caption(protein, prompt):
                 # See https://github.com/pytorch/pytorch/issues/1995
                 if "per_tok" in include:
                     result["representations"] = {
-                        layer: t[i, 1 : truncate_len + 1].clone()
                         for layer, t in representations.items()
                     }
                 if "mean" in include:
                     result["mean_representations"] = {
-                        layer: t[i, 1 : truncate_len + 1].mean(0).clone()
                         for layer, t in representations.items()
                     }
                 if "bos" in include:
@@ -106,18 +113,25 @@ def generate_caption(protein, prompt):
                'image': torch.unsqueeze(esm_emb, dim=0),
                'text_input': ['none'],
                'prompt': [prompt]}
     # Generate the output
-    prediction = model.generate(samples, length_penalty=0., num_beams=15, num_captions=10, temperature=1., repetition_penalty=1.0)
     return prediction
     # return "test"
 # Define the FAPM interface
 description = """Quick demonstration of the FAPM model for protein function prediction. Upload an protein sequence to generate a function description. Modify the Prompt to provide the taxonomy information.
 The model used in this app is available at [Hugging Face Model Hub](https://huggingface.co/wenkai/FAPM) and the source code can be found on [GitHub](https://github.com/xiangwenkai/FAPM/tree/main)."""
 iface = gr.Interface(
     fn=generate_caption,
     inputs=[gr.Textbox(type="text", label="Upload sequence"), gr.Textbox(type="text", label="Prompt")],

 import gradio as gr
 from esm_scripts.extract import run_demo
 from esm import pretrained, FastaBatchedDataset
 # from transformers import EsmTokenizer, EsmModel
 # Load the model
+# model = Blip2ProteinMistral(config=FAPMConfig(), esm_size='3b')
+# model.load_checkpoint("model/checkpoint_mf2.pth")
+# model.to('cuda')
+# model_esm, alphabet = pretrained.load_model_and_alphabet('esm2_t36_3B_UR50D')
+# model_esm.to('cuda')
+# model_esm.eval()
 # tokenizer = EsmTokenizer.from_pretrained("facebook/esm2_t36_3B_UR50D")
 # model_esm = EsmModel.from_pretrained("facebook/esm2_t36_3B_UR50D")
 # model_esm.to('cuda')
     #     f.write('>{}\n'.format("protein_name"))
     #     f.write('{}\n'.format(protein.strip()))
     # os.system("python esm_scripts/extract.py esm2_t36_3B_UR50D /home/user/app/example.fasta /home/user/app --repr_layers 36 --truncation_seq_length 1024 --include per_tok")
+    # esm_emb = run_demo(protein_name='protein_name', protein_seq=protein,
+    #                    model=model_esm, alphabet=alphabet,
     #                    include='per_tok', repr_layers=[36], truncation_seq_length=1024)
+    protein_name = 'protein_name'
+    protein_seq = protein
+    include = 'per_tok'
+    repr_layers = [36]
+    truncation_seq_length = 1024
+    toks_per_batch = 4096
     print("start")
     dataset = FastaBatchedDataset([protein_name], [protein_seq])
     print("dataset prepared")
     batches = dataset.get_batch_indices(toks_per_batch, extra_toks_per_seq=1)
     print("batches prepared")
+    model_esm, alphabet = pretrained.load_model_and_alphabet('esm2_t36_3B_UR50D')
+    model_esm.to('cuda')
+    model_esm.eval()
     data_loader = torch.utils.data.DataLoader(
         dataset, collate_fn=alphabet.get_batch_converter(truncation_seq_length), batch_sampler=batches
     )
                 # See https://github.com/pytorch/pytorch/issues/1995
                 if "per_tok" in include:
                     result["representations"] = {
+                        layer: t[i, 1: truncate_len + 1].clone()
                         for layer, t in representations.items()
                     }
                 if "mean" in include:
                     result["mean_representations"] = {
+                        layer: t[i, 1: truncate_len + 1].mean(0).clone()
                         for layer, t in representations.items()
                     }
                 if "bos" in include:
                'image': torch.unsqueeze(esm_emb, dim=0),
                'text_input': ['none'],
                'prompt': [prompt]}
+    del model_esm
+    model = Blip2ProteinMistral(config=FAPMConfig(), esm_size='3b')
+    model.load_checkpoint("model/checkpoint_mf2.pth")
+    model.to('cuda')
     # Generate the output
+    prediction = model.generate(samples, length_penalty=0., num_beams=15, num_captions=10, temperature=1.,
+                                repetition_penalty=1.0)
     return prediction
     # return "test"
 # Define the FAPM interface
 description = """Quick demonstration of the FAPM model for protein function prediction. Upload an protein sequence to generate a function description. Modify the Prompt to provide the taxonomy information.
 The model used in this app is available at [Hugging Face Model Hub](https://huggingface.co/wenkai/FAPM) and the source code can be found on [GitHub](https://github.com/xiangwenkai/FAPM/tree/main)."""
 iface = gr.Interface(
     fn=generate_caption,
     inputs=[gr.Textbox(type="text", label="Upload sequence"), gr.Textbox(type="text", label="Prompt")],