biomed-multi-alignment

Sleeping

App Files Files Community

matanninio commited on Dec 2, 2024

Commit

0c8cec9

1 Parent(s): 994bf05

added the protein solubility demo

Browse files

Files changed (2) hide show

app.py +14 -1
mammal_demo/ps_task.py +127 -0

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ from mammal_demo.demo_framework import MammalObjectBroker, MammalTask
 from mammal_demo.dti_task import DtiTask
 from mammal_demo.ppi_task import PpiTask
 from mammal_demo.tcr_task import TcrTask
 all_tasks: dict[str, MammalTask] = dict()
 all_models: dict[str, MammalObjectBroker] = dict()
@@ -22,6 +23,10 @@ tcr_task = TcrTask(model_dict=all_models)
 all_tasks[tcr_task.name] = tcr_task
 # create the model holders. hold the model and the tokenizer, lazy download
 # note that the list of relevent tasks needs to be stated.
 ppi_model = MammalObjectBroker(
@@ -41,6 +46,13 @@ tcr_model = MammalObjectBroker(
 )
 all_models[tcr_model.name] = tcr_model
 def create_application():
     def task_change(value):
         visibility = [gr.update(visible=(task == value)) for task in all_tasks.keys()]
@@ -95,7 +107,8 @@ full_demo = None
 def main():
     global full_demo
     full_demo = create_application()
-    full_demo.launch(show_error=True, share=True)
 if __name__ == "__main__":

 from mammal_demo.dti_task import DtiTask
 from mammal_demo.ppi_task import PpiTask
 from mammal_demo.tcr_task import TcrTask
+from mammal_demo.ps_task import PsTask
 all_tasks: dict[str, MammalTask] = dict()
 all_models: dict[str, MammalObjectBroker] = dict()
 all_tasks[tcr_task.name] = tcr_task
+ps_task = PsTask(model_dict=all_models)
+all_tasks[ps_task.name] = ps_task
 # create the model holders. hold the model and the tokenizer, lazy download
 # note that the list of relevent tasks needs to be stated.
 ppi_model = MammalObjectBroker(
 )
 all_models[tcr_model.name] = tcr_model
+ps_model = MammalObjectBroker(
+    model_path="ibm/biomed.omics.bl.sm.ma-ted-458m.protein_solubility",
+    task_list=[ps_task.name]
+)
+all_models[ps_model.name] = ps_model
 def create_application():
     def task_change(value):
         visibility = [gr.update(visible=(task == value)) for task in all_tasks.keys()]
 def main():
     global full_demo
     full_demo = create_application()
+    full_demo.launch(show_error=True, share=False)
+    # full_demo.launch(show_error=True, share=True)
 if __name__ == "__main__":

mammal_demo/ps_task.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import gradio as gr
+import torch
+from fuse.data.tokenizers.modular_tokenizer.op import ModularTokenizerOp
+from mammal.examples.protein_solubility.task import ProteinSolubilityTask
+from mammal.keys import (
+    ENCODER_INPUTS_STR,
+    CLS_PRED,
+    SCORES,
+)
+from mammal.model import Mammal
+from mammal_demo.demo_framework import MammalObjectBroker, MammalTask
+class PsTask(MammalTask):
+    def __init__(self, model_dict):
+        super().__init__(name="Protein Solubility", model_dict=model_dict)
+        self.description = "Protein Solubility (PS)"
+        self.examples = {
+            "protein_seq": "LLQTGIHVRVSQPSL",
+        }
+        self.markup_text = """
+# Mammal based TODO:  T-cell receptors-peptide binding specificity demonstration
+Given the TCR beta sequance and the epitope sequacne, estimate the binding specificity.
+"""
+    def crate_sample_dict(self, sample_inputs: dict, model_holder: MammalObjectBroker):
+        """convert sample_inputs to sample_dict including creating a proper prompt
+        Args:
+            sample_inputs (dict): dictionary containing the inputs to the model
+            model_holder (MammalObjectBroker): model holder
+        Returns:
+           dict: sample_dict for feeding into model
+        """
+        sample_dict = dict(sample_inputs) # shallow copy
+        sample_dict = ProteinSolubilityTask.data_preprocessing(
+        sample_dict=sample_dict,
+        protein_sequence_key="protein_seq",
+        tokenizer_op=model_holder.tokenizer_op,
+        device=model_holder.model.device,
+        )
+        return sample_dict
+    def run_model(self, sample_dict, model: Mammal):
+        # Generate Prediction
+        batch_dict = model.generate(
+            [sample_dict],
+            output_scores=True,
+            return_dict_in_generate=True,
+            max_new_tokens=5,
+        )
+        return batch_dict
+    def decode_output(self, batch_dict, tokenizer_op: ModularTokenizerOp)-> dict:
+        """
+        Extract predicted class and scores
+        """
+        ans_dict = ProteinSolubilityTask.process_model_output(
+            tokenizer_op=tokenizer_op,
+            decoder_output=batch_dict[CLS_PRED][0],
+            decoder_output_scores=batch_dict[SCORES][0],
+        )
+        ans = [
+            tokenizer_op._tokenizer.decode(batch_dict[CLS_PRED][0]),
+            ans_dict["pred"],
+            ans_dict["not_normalized_scores"].item(),
+            ans_dict["normalized_scores"].item(),
+        ]
+        return ans
+    def create_and_run_prompt(self, model_name, protein_seq):
+        model_holder = self.model_dict[model_name]
+        inputs = {
+            "protein_seq": protein_seq,
+        }
+        sample_dict = self.crate_sample_dict(
+            sample_inputs=inputs, model_holder=model_holder
+        )
+        prompt = sample_dict[ENCODER_INPUTS_STR]
+        batch_dict = self.run_model(sample_dict=sample_dict, model=model_holder.model)
+        res = prompt, *self.decode_output(batch_dict, tokenizer_op=model_holder.tokenizer_op)
+        return res
+    def create_demo(self, model_name_widget):
+        with gr.Group() as demo:
+            gr.Markdown(self.markup_text)
+            with gr.Row():
+                protein_textbox = gr.Textbox(
+                    label="Protein sequance",
+                    # info="standard",
+                    interactive=True,
+                    lines=3,
+                    value=self.examples["protein_seq"],
+                )
+            with gr.Row():
+                run_mammal = gr.Button(
+                    "Run Mammal prompt for TCL-Epitope Interaction",
+                    variant="primary",
+                )
+            with gr.Row():
+                prompt_box = gr.Textbox(label="Mammal prompt", lines=5)
+            with gr.Row():
+                decoded = gr.Textbox(label="Mammal output")
+                predicted_class = gr.Textbox(label="Mammal prediction")
+                with gr.Column():
+                    non_norm_score = gr.Number(label="Non normelized score")
+                    norm_score = gr.Number(label="Normelized score")
+                run_mammal.click(
+                    fn=self.create_and_run_prompt,
+                    inputs=[model_name_widget, protein_textbox],
+                    outputs=[prompt_box, decoded, predicted_class,non_norm_score,norm_score],
+                )
+            demo.visible = False
+            return demo