Spaces:
Sleeping
Sleeping
matanninio
commited on
Commit
•
f8080fc
1
Parent(s):
93d0d1a
improved handling of global all_models
Browse files- __init__.py → mammal_demo/__init__.py +0 -0
- demo_framework.py → mammal_demo/demo_framework.py +2 -1
- mammal_demo/dti_task.py +117 -0
- mammal_demo/ppi_task.py +152 -0
- new_app.py +7 -261
__init__.py → mammal_demo/__init__.py
RENAMED
File without changes
|
demo_framework.py → mammal_demo/demo_framework.py
RENAMED
@@ -41,10 +41,11 @@ class MammalObjectBroker():
|
|
41 |
|
42 |
|
43 |
class MammalTask(ABC):
|
44 |
-
def __init__(self, name:str) -> None:
|
45 |
self.name = name
|
46 |
self.description = None
|
47 |
self._demo = None
|
|
|
48 |
|
49 |
# @abstractmethod
|
50 |
# def _generate_prompt(self, **kwargs) -> str:
|
|
|
41 |
|
42 |
|
43 |
class MammalTask(ABC):
|
44 |
+
def __init__(self, name:str, model_dict: dict[str,MammalObjectBroker]) -> None:
|
45 |
self.name = name
|
46 |
self.description = None
|
47 |
self._demo = None
|
48 |
+
self.model_dict = model_dict
|
49 |
|
50 |
# @abstractmethod
|
51 |
# def _generate_prompt(self, **kwargs) -> str:
|
mammal_demo/dti_task.py
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from mammal.examples.dti_bindingdb_kd.task import DtiBindingdbKdTask
|
3 |
+
from mammal.keys import *
|
4 |
+
from mammal.model import Mammal
|
5 |
+
|
6 |
+
from mammal_demo.demo_framework import MammalObjectBroker, MammalTask
|
7 |
+
|
8 |
+
class DtiTask(MammalTask):
|
9 |
+
def __init__(self, model_dict):
|
10 |
+
super().__init__(name="Drug-Target Binding Affinity", model_dict=model_dict)
|
11 |
+
self.description = "Drug-Target Binding Affinity (tdi)"
|
12 |
+
self.examples = {
|
13 |
+
"target_seq": "NLMKRCTRGFRKLGKCTTLEEEKCKTLYPRGQCTCSDSKMNTHSCDCKSC",
|
14 |
+
"drug_seq":"CC(=O)NCCC1=CNc2c1cc(OC)cc2"
|
15 |
+
}
|
16 |
+
self.markup_text = """
|
17 |
+
# Mammal based Target-Drug binding affinity demonstration
|
18 |
+
|
19 |
+
Given a protein sequence and a drug (in SMILES), estimate the binding affinity.
|
20 |
+
"""
|
21 |
+
|
22 |
+
def crate_sample_dict(self, sample_inputs:dict, model_holder:MammalObjectBroker):
|
23 |
+
"""convert sample_inputs to sample_dict including creating a proper prompt
|
24 |
+
|
25 |
+
Args:
|
26 |
+
sample_inputs (dict): dictionary containing the inputs to the model
|
27 |
+
model_holder (MammalObjectBroker): model holder
|
28 |
+
Returns:
|
29 |
+
dict: sample_dict for feeding into model
|
30 |
+
"""
|
31 |
+
sample_dict = dict(sample_inputs)
|
32 |
+
sample_dict = DtiBindingdbKdTask.data_preprocessing(
|
33 |
+
sample_dict=sample_dict,
|
34 |
+
tokenizer_op=model_holder.tokenizer_op,
|
35 |
+
target_sequence_key="target_seq",
|
36 |
+
drug_sequence_key="drug_seq",
|
37 |
+
norm_y_mean=None,
|
38 |
+
norm_y_std=None,
|
39 |
+
device=model_holder.model.device,
|
40 |
+
)
|
41 |
+
return sample_dict
|
42 |
+
|
43 |
+
|
44 |
+
def run_model(self, sample_dict, model: Mammal):
|
45 |
+
# Generate Prediction
|
46 |
+
batch_dict = model.forward_encoder_only([sample_dict])
|
47 |
+
return batch_dict
|
48 |
+
|
49 |
+
def decode_output(self,batch_dict, model_holder):
|
50 |
+
|
51 |
+
# Get output
|
52 |
+
batch_dict = DtiBindingdbKdTask.process_model_output(
|
53 |
+
batch_dict,
|
54 |
+
scalars_preds_processed_key="model.out.dti_bindingdb_kd",
|
55 |
+
norm_y_mean=5.79384684128215,
|
56 |
+
norm_y_std=1.33808027428196,
|
57 |
+
)
|
58 |
+
ans = (
|
59 |
+
"model.out.dti_bindingdb_kd",
|
60 |
+
float(batch_dict["model.out.dti_bindingdb_kd"][0]),
|
61 |
+
)
|
62 |
+
return ans
|
63 |
+
|
64 |
+
|
65 |
+
def create_and_run_prompt(self,model_name,target_seq, drug_seq):
|
66 |
+
model_holder = self.model_dict[model_name]
|
67 |
+
inputs = {
|
68 |
+
"target_seq": target_seq,
|
69 |
+
"drug_seq": drug_seq,
|
70 |
+
}
|
71 |
+
sample_dict = self.crate_sample_dict(sample_inputs=inputs, model_holder=model_holder)
|
72 |
+
prompt=sample_dict[ENCODER_INPUTS_STR]
|
73 |
+
batch_dict = self.run_model(sample_dict=sample_dict, model=model_holder.model)
|
74 |
+
res = prompt, *self.decode_output(batch_dict,model_holder=model_holder)
|
75 |
+
return res
|
76 |
+
|
77 |
+
|
78 |
+
def create_demo(self,model_name_widget):
|
79 |
+
|
80 |
+
# """
|
81 |
+
# ### Using the model from
|
82 |
+
|
83 |
+
# ```{model} ```
|
84 |
+
# """
|
85 |
+
with gr.Group() as demo:
|
86 |
+
gr.Markdown(self.markup_text)
|
87 |
+
with gr.Row():
|
88 |
+
target_textbox = gr.Textbox(
|
89 |
+
label="target sequence",
|
90 |
+
# info="standard",
|
91 |
+
interactive=True,
|
92 |
+
lines=3,
|
93 |
+
value=self.examples["target_seq"],
|
94 |
+
)
|
95 |
+
drug_textbox = gr.Textbox(
|
96 |
+
label="Drug sequance (in SMILES)",
|
97 |
+
# info="standard",
|
98 |
+
interactive=True,
|
99 |
+
lines=3,
|
100 |
+
value=self.examples["drug_seq"],
|
101 |
+
)
|
102 |
+
with gr.Row():
|
103 |
+
run_mammal = gr.Button(
|
104 |
+
"Run Mammal prompt for Protein-Protein Interaction", variant="primary"
|
105 |
+
)
|
106 |
+
with gr.Row():
|
107 |
+
prompt_box = gr.Textbox(label="Mammal prompt", lines=5)
|
108 |
+
|
109 |
+
with gr.Row():
|
110 |
+
decoded = gr.Textbox(label="Mammal output key")
|
111 |
+
run_mammal.click(
|
112 |
+
fn=self.create_and_run_prompt,
|
113 |
+
inputs=[model_name_widget, target_textbox, drug_textbox],
|
114 |
+
outputs=[prompt_box, decoded, gr.Number(label="binding affinity")],
|
115 |
+
)
|
116 |
+
demo.visible = False
|
117 |
+
return demo
|
mammal_demo/ppi_task.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
from fuse.data.tokenizers.modular_tokenizer.op import ModularTokenizerOp
|
4 |
+
from mammal.examples.dti_bindingdb_kd.task import DtiBindingdbKdTask
|
5 |
+
from mammal.keys import *
|
6 |
+
from mammal.model import Mammal
|
7 |
+
|
8 |
+
from mammal_demo.demo_framework import MammalObjectBroker, MammalTask
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
class PpiTask(MammalTask):
|
13 |
+
def __init__(self, model_dict):
|
14 |
+
super().__init__(name="Protein-Protein Interaction", model_dict=model_dict)
|
15 |
+
self.description = "Protein-Protein Interaction (PPI)"
|
16 |
+
self.examples = {
|
17 |
+
"protein_calmodulin": "MADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMISELDQDGFIDKEDLHDGDGKISFEEFLNLVNKEMTADVDGDGQVNYEEFVTMMTSK",
|
18 |
+
"protein_calcineurin": "MSSKLLLAGLDIERVLAEKNFYKEWDTWIIEAMNVGDEEVDRIKEFKEDEIFEEAKTLGTAEMQEYKKQKLEEAIEGAFDIFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIRQMWDQNGDWDRIKELKFGEIKKLSAKDTRGTIFIKVFENLGTGVDSEYEDVSKYMLKHQ",
|
19 |
+
}
|
20 |
+
self.markup_text = """
|
21 |
+
# Mammal based {self.description} demonstration
|
22 |
+
|
23 |
+
Given two protein sequences, estimate if the proteins interact or not."""
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
@staticmethod
|
28 |
+
def positive_token_id(model_holder: MammalObjectBroker):
|
29 |
+
"""token for positive binding
|
30 |
+
|
31 |
+
Args:
|
32 |
+
model (MammalTrainedModel): model holding tokenizer
|
33 |
+
|
34 |
+
Returns:
|
35 |
+
int: id of positive binding token
|
36 |
+
"""
|
37 |
+
return model_holder.tokenizer_op.get_token_id("<1>")
|
38 |
+
|
39 |
+
def generate_prompt(self, prot1, prot2):
|
40 |
+
"""Formatting prompt to match pre-training syntax
|
41 |
+
|
42 |
+
Args:
|
43 |
+
prot1 (str): sequance of protein number 1
|
44 |
+
prot2 (str): sequance of protein number 2
|
45 |
+
|
46 |
+
Returns:
|
47 |
+
str: prompt
|
48 |
+
"""
|
49 |
+
prompt = f"<@TOKENIZER-TYPE=AA><BINDING_AFFINITY_CLASS><SENTINEL_ID_0>"\
|
50 |
+
"<MOLECULAR_ENTITY><MOLECULAR_ENTITY_GENERAL_PROTEIN>"\
|
51 |
+
"<SEQUENCE_NATURAL_START>{prot1}<SEQUENCE_NATURAL_END>"\
|
52 |
+
"<MOLECULAR_ENTITY><MOLECULAR_ENTITY_GENERAL_PROTEIN>"\
|
53 |
+
"<SEQUENCE_NATURAL_START>{prot2}<SEQUENCE_NATURAL_END><EOS>"
|
54 |
+
return prompt
|
55 |
+
|
56 |
+
|
57 |
+
def crate_sample_dict(self,sample_inputs: dict, model_holder:MammalObjectBroker):
|
58 |
+
# Create and load sample
|
59 |
+
sample_dict = dict()
|
60 |
+
prompt = self.generate_prompt(*sample_inputs)
|
61 |
+
sample_dict[ENCODER_INPUTS_STR] = prompt
|
62 |
+
|
63 |
+
# Tokenize
|
64 |
+
sample_dict = model_holder.tokenizer_op(
|
65 |
+
sample_dict=sample_dict,
|
66 |
+
key_in=ENCODER_INPUTS_STR,
|
67 |
+
key_out_tokens_ids=ENCODER_INPUTS_TOKENS,
|
68 |
+
key_out_attention_mask=ENCODER_INPUTS_ATTENTION_MASK,
|
69 |
+
)
|
70 |
+
sample_dict[ENCODER_INPUTS_TOKENS] = torch.tensor(
|
71 |
+
sample_dict[ENCODER_INPUTS_TOKENS]
|
72 |
+
)
|
73 |
+
sample_dict[ENCODER_INPUTS_ATTENTION_MASK] = torch.tensor(
|
74 |
+
sample_dict[ENCODER_INPUTS_ATTENTION_MASK]
|
75 |
+
)
|
76 |
+
return sample_dict
|
77 |
+
|
78 |
+
def run_model(self, sample_dict, model: Mammal):
|
79 |
+
# Generate Prediction
|
80 |
+
batch_dict = model.generate(
|
81 |
+
[sample_dict],
|
82 |
+
output_scores=True,
|
83 |
+
return_dict_in_generate=True,
|
84 |
+
max_new_tokens=5,
|
85 |
+
)
|
86 |
+
return batch_dict
|
87 |
+
|
88 |
+
def decode_output(self,batch_dict, model_holder:MammalObjectBroker):
|
89 |
+
|
90 |
+
# Get output
|
91 |
+
generated_output = model_holder.tokenizer_op._tokenizer.decode(batch_dict[CLS_PRED][0])
|
92 |
+
score = batch_dict["model.out.scores"][0][1][self.positive_token_id(model_holder)].item()
|
93 |
+
|
94 |
+
return generated_output, score
|
95 |
+
|
96 |
+
|
97 |
+
def create_and_run_prompt(self,model_name,protein1, protein2):
|
98 |
+
model_holder = self.model_dict[model_name]
|
99 |
+
sample_inputs = {"prot1":protein1,
|
100 |
+
"prot2":protein2
|
101 |
+
}
|
102 |
+
sample_dict = self.crate_sample_dict(sample_inputs=sample_inputs, model_holder=model_holder)
|
103 |
+
prompt = sample_dict[ENCODER_INPUTS_STR]
|
104 |
+
batch_dict = self.run_model(sample_dict=sample_dict, model=model_holder.model)
|
105 |
+
res = prompt, *self.decode_output(batch_dict,model_holder=model_holder)
|
106 |
+
return res
|
107 |
+
|
108 |
+
|
109 |
+
def create_demo(self,model_name_widget:gr.component):
|
110 |
+
|
111 |
+
# """
|
112 |
+
# ### Using the model from
|
113 |
+
|
114 |
+
# ```{model} ```
|
115 |
+
# """
|
116 |
+
with gr.Group() as demo:
|
117 |
+
gr.Markdown(self.markup_text)
|
118 |
+
with gr.Row():
|
119 |
+
prot1 = gr.Textbox(
|
120 |
+
label="Protein 1 sequence",
|
121 |
+
# info="standard",
|
122 |
+
interactive=True,
|
123 |
+
lines=3,
|
124 |
+
value=self.examples["protein_calmodulin"],
|
125 |
+
)
|
126 |
+
prot2 = gr.Textbox(
|
127 |
+
label="Protein 2 sequence",
|
128 |
+
# info="standard",
|
129 |
+
interactive=True,
|
130 |
+
lines=3,
|
131 |
+
value=self.examples["protein_calcineurin"],
|
132 |
+
)
|
133 |
+
with gr.Row():
|
134 |
+
run_mammal: gr.Button = gr.Button(
|
135 |
+
"Run Mammal prompt for Protein-Protein Interaction", variant="primary"
|
136 |
+
)
|
137 |
+
with gr.Row():
|
138 |
+
prompt_box = gr.Textbox(label="Mammal prompt", lines=5)
|
139 |
+
|
140 |
+
with gr.Row():
|
141 |
+
decoded = gr.Textbox(label="Mammal output")
|
142 |
+
run_mammal.click(
|
143 |
+
fn=self.create_and_run_prompt,
|
144 |
+
inputs=[model_name_widget, prot1, prot2],
|
145 |
+
outputs=[prompt_box, decoded, gr.Number(label="PPI score")],
|
146 |
+
)
|
147 |
+
with gr.Row():
|
148 |
+
gr.Markdown(
|
149 |
+
"```<SENTINEL_ID_0>``` contains the binding affinity class, which is ```<1>``` for interacting and ```<0>``` for non-interacting"
|
150 |
+
)
|
151 |
+
demo.visible = False
|
152 |
+
return demo
|
new_app.py
CHANGED
@@ -1,273 +1,19 @@
|
|
1 |
import gradio as gr
|
2 |
-
import torch
|
3 |
-
from fuse.data.tokenizers.modular_tokenizer.op import ModularTokenizerOp
|
4 |
-
from mammal.examples.dti_bindingdb_kd.task import DtiBindingdbKdTask
|
5 |
from mammal.keys import *
|
6 |
-
from mammal.model import Mammal
|
7 |
|
8 |
-
from demo_framework import MammalObjectBroker
|
9 |
|
10 |
-
all_tasks = dict()
|
11 |
-
all_models= dict()
|
12 |
-
|
13 |
-
class PpiTask(MammalTask):
|
14 |
-
def __init__(self):
|
15 |
-
super().__init__(name="Protein-Protein Interaction")
|
16 |
-
self.description = "Protein-Protein Interaction (PPI)"
|
17 |
-
self.examples = {
|
18 |
-
"protein_calmodulin": "MADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMISELDQDGFIDKEDLHDGDGKISFEEFLNLVNKEMTADVDGDGQVNYEEFVTMMTSK",
|
19 |
-
"protein_calcineurin": "MSSKLLLAGLDIERVLAEKNFYKEWDTWIIEAMNVGDEEVDRIKEFKEDEIFEEAKTLGTAEMQEYKKQKLEEAIEGAFDIFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIRQMWDQNGDWDRIKELKFGEIKKLSAKDTRGTIFIKVFENLGTGVDSEYEDVSKYMLKHQ",
|
20 |
-
}
|
21 |
-
self.markup_text = """
|
22 |
-
# Mammal based {self.description} demonstration
|
23 |
-
|
24 |
-
Given two protein sequences, estimate if the proteins interact or not."""
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
@staticmethod
|
29 |
-
def positive_token_id(model_holder: MammalObjectBroker):
|
30 |
-
"""token for positive binding
|
31 |
-
|
32 |
-
Args:
|
33 |
-
model (MammalTrainedModel): model holding tokenizer
|
34 |
-
|
35 |
-
Returns:
|
36 |
-
int: id of positive binding token
|
37 |
-
"""
|
38 |
-
return model_holder.tokenizer_op.get_token_id("<1>")
|
39 |
-
|
40 |
-
def generate_prompt(self, prot1, prot2):
|
41 |
-
"""Formatting prompt to match pre-training syntax
|
42 |
-
|
43 |
-
Args:
|
44 |
-
prot1 (str): sequance of protein number 1
|
45 |
-
prot2 (str): sequance of protein number 2
|
46 |
-
|
47 |
-
Returns:
|
48 |
-
str: prompt
|
49 |
-
"""
|
50 |
-
prompt = f"<@TOKENIZER-TYPE=AA><BINDING_AFFINITY_CLASS><SENTINEL_ID_0>"\
|
51 |
-
"<MOLECULAR_ENTITY><MOLECULAR_ENTITY_GENERAL_PROTEIN>"\
|
52 |
-
"<SEQUENCE_NATURAL_START>{prot1}<SEQUENCE_NATURAL_END>"\
|
53 |
-
"<MOLECULAR_ENTITY><MOLECULAR_ENTITY_GENERAL_PROTEIN>"\
|
54 |
-
"<SEQUENCE_NATURAL_START>{prot2}<SEQUENCE_NATURAL_END><EOS>"
|
55 |
-
return prompt
|
56 |
-
|
57 |
-
|
58 |
-
def crate_sample_dict(self,sample_inputs: dict, model_holder:MammalObjectBroker):
|
59 |
-
# Create and load sample
|
60 |
-
sample_dict = dict()
|
61 |
-
prompt = self.generate_prompt(*sample_inputs)
|
62 |
-
sample_dict[ENCODER_INPUTS_STR] = prompt
|
63 |
-
|
64 |
-
# Tokenize
|
65 |
-
sample_dict = model_holder.tokenizer_op(
|
66 |
-
sample_dict=sample_dict,
|
67 |
-
key_in=ENCODER_INPUTS_STR,
|
68 |
-
key_out_tokens_ids=ENCODER_INPUTS_TOKENS,
|
69 |
-
key_out_attention_mask=ENCODER_INPUTS_ATTENTION_MASK,
|
70 |
-
)
|
71 |
-
sample_dict[ENCODER_INPUTS_TOKENS] = torch.tensor(
|
72 |
-
sample_dict[ENCODER_INPUTS_TOKENS]
|
73 |
-
)
|
74 |
-
sample_dict[ENCODER_INPUTS_ATTENTION_MASK] = torch.tensor(
|
75 |
-
sample_dict[ENCODER_INPUTS_ATTENTION_MASK]
|
76 |
-
)
|
77 |
-
return sample_dict
|
78 |
-
|
79 |
-
def run_model(self, sample_dict, model: Mammal):
|
80 |
-
# Generate Prediction
|
81 |
-
batch_dict = model.generate(
|
82 |
-
[sample_dict],
|
83 |
-
output_scores=True,
|
84 |
-
return_dict_in_generate=True,
|
85 |
-
max_new_tokens=5,
|
86 |
-
)
|
87 |
-
return batch_dict
|
88 |
-
|
89 |
-
def decode_output(self,batch_dict, model_holder:MammalObjectBroker):
|
90 |
-
|
91 |
-
# Get output
|
92 |
-
generated_output = model_holder.tokenizer_op._tokenizer.decode(batch_dict[CLS_PRED][0])
|
93 |
-
score = batch_dict["model.out.scores"][0][1][self.positive_token_id(model_holder)].item()
|
94 |
-
|
95 |
-
return generated_output, score
|
96 |
-
|
97 |
-
|
98 |
-
def create_and_run_prompt(self,model_name,protein1, protein2):
|
99 |
-
model_holder = all_models[model_name]
|
100 |
-
sample_inputs = {"prot1":protein1,
|
101 |
-
"prot2":protein2
|
102 |
-
}
|
103 |
-
sample_dict = self.crate_sample_dict(sample_inputs=sample_inputs, model_holder=model_holder)
|
104 |
-
prompt = sample_dict[ENCODER_INPUTS_STR]
|
105 |
-
batch_dict = self.run_model(sample_dict=sample_dict, model=model_holder.model)
|
106 |
-
res = prompt, *self.decode_output(batch_dict,model_holder=model_holder)
|
107 |
-
return res
|
108 |
-
|
109 |
-
|
110 |
-
def create_demo(self,model_name_widget:gr.component):
|
111 |
-
|
112 |
-
# """
|
113 |
-
# ### Using the model from
|
114 |
|
115 |
-
|
116 |
-
|
117 |
-
with gr.Group() as demo:
|
118 |
-
gr.Markdown(self.markup_text)
|
119 |
-
with gr.Row():
|
120 |
-
prot1 = gr.Textbox(
|
121 |
-
label="Protein 1 sequence",
|
122 |
-
# info="standard",
|
123 |
-
interactive=True,
|
124 |
-
lines=3,
|
125 |
-
value=self.examples["protein_calmodulin"],
|
126 |
-
)
|
127 |
-
prot2 = gr.Textbox(
|
128 |
-
label="Protein 2 sequence",
|
129 |
-
# info="standard",
|
130 |
-
interactive=True,
|
131 |
-
lines=3,
|
132 |
-
value=self.examples["protein_calcineurin"],
|
133 |
-
)
|
134 |
-
with gr.Row():
|
135 |
-
run_mammal: gr.Button = gr.Button(
|
136 |
-
"Run Mammal prompt for Protein-Protein Interaction", variant="primary"
|
137 |
-
)
|
138 |
-
with gr.Row():
|
139 |
-
prompt_box = gr.Textbox(label="Mammal prompt", lines=5)
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
run_mammal.click(
|
144 |
-
fn=self.create_and_run_prompt,
|
145 |
-
inputs=[model_name_widget, prot1, prot2],
|
146 |
-
outputs=[prompt_box, decoded, gr.Number(label="PPI score")],
|
147 |
-
)
|
148 |
-
with gr.Row():
|
149 |
-
gr.Markdown(
|
150 |
-
"```<SENTINEL_ID_0>``` contains the binding affinity class, which is ```<1>``` for interacting and ```<0>``` for non-interacting"
|
151 |
-
)
|
152 |
-
demo.visible = False
|
153 |
-
return demo
|
154 |
|
155 |
-
ppi_task = PpiTask()
|
156 |
all_tasks[ppi_task.name]=ppi_task
|
157 |
|
158 |
-
|
159 |
-
class DtiTask(MammalTask):
|
160 |
-
def __init__(self):
|
161 |
-
super().__init__(name="Drug-Target Binding Affinity")
|
162 |
-
self.description = "Drug-Target Binding Affinity (tdi)"
|
163 |
-
self.examples = {
|
164 |
-
"target_seq": "NLMKRCTRGFRKLGKCTTLEEEKCKTLYPRGQCTCSDSKMNTHSCDCKSC",
|
165 |
-
"drug_seq":"CC(=O)NCCC1=CNc2c1cc(OC)cc2"
|
166 |
-
}
|
167 |
-
self.markup_text = """
|
168 |
-
# Mammal based Target-Drug binding affinity demonstration
|
169 |
-
|
170 |
-
Given a protein sequence and a drug (in SMILES), estimate the binding affinity.
|
171 |
-
"""
|
172 |
-
|
173 |
-
def crate_sample_dict(self, sample_inputs:dict, model_holder:MammalObjectBroker):
|
174 |
-
"""convert sample_inputs to sample_dict including creating a proper prompt
|
175 |
-
|
176 |
-
Args:
|
177 |
-
sample_inputs (dict): dictionary containing the inputs to the model
|
178 |
-
model_holder (MammalObjectBroker): model holder
|
179 |
-
Returns:
|
180 |
-
dict: sample_dict for feeding into model
|
181 |
-
"""
|
182 |
-
sample_dict = dict(sample_inputs)
|
183 |
-
sample_dict = DtiBindingdbKdTask.data_preprocessing(
|
184 |
-
sample_dict=sample_dict,
|
185 |
-
tokenizer_op=model_holder.tokenizer_op,
|
186 |
-
target_sequence_key="target_seq",
|
187 |
-
drug_sequence_key="drug_seq",
|
188 |
-
norm_y_mean=None,
|
189 |
-
norm_y_std=None,
|
190 |
-
device=model_holder.model.device,
|
191 |
-
)
|
192 |
-
return sample_dict
|
193 |
-
|
194 |
-
|
195 |
-
def run_model(self, sample_dict, model: Mammal):
|
196 |
-
# Generate Prediction
|
197 |
-
batch_dict = model.forward_encoder_only([sample_dict])
|
198 |
-
return batch_dict
|
199 |
-
|
200 |
-
def decode_output(self,batch_dict, model_holder):
|
201 |
-
|
202 |
-
# Get output
|
203 |
-
batch_dict = DtiBindingdbKdTask.process_model_output(
|
204 |
-
batch_dict,
|
205 |
-
scalars_preds_processed_key="model.out.dti_bindingdb_kd",
|
206 |
-
norm_y_mean=5.79384684128215,
|
207 |
-
norm_y_std=1.33808027428196,
|
208 |
-
)
|
209 |
-
ans = (
|
210 |
-
"model.out.dti_bindingdb_kd",
|
211 |
-
float(batch_dict["model.out.dti_bindingdb_kd"][0]),
|
212 |
-
)
|
213 |
-
return ans
|
214 |
-
|
215 |
-
|
216 |
-
def create_and_run_prompt(self,model_name,target_seq, drug_seq):
|
217 |
-
model_holder = all_models[model_name]
|
218 |
-
inputs = {
|
219 |
-
"target_seq": target_seq,
|
220 |
-
"drug_seq": drug_seq,
|
221 |
-
}
|
222 |
-
sample_dict = self.crate_sample_dict(sample_inputs=inputs, model_holder=model_holder)
|
223 |
-
prompt=sample_dict[ENCODER_INPUTS_STR]
|
224 |
-
batch_dict = self.run_model(sample_dict=sample_dict, model=model_holder.model)
|
225 |
-
res = prompt, *self.decode_output(batch_dict,model_holder=model_holder)
|
226 |
-
return res
|
227 |
-
|
228 |
-
|
229 |
-
def create_demo(self,model_name_widget):
|
230 |
-
|
231 |
-
# """
|
232 |
-
# ### Using the model from
|
233 |
-
|
234 |
-
# ```{model} ```
|
235 |
-
# """
|
236 |
-
with gr.Group() as demo:
|
237 |
-
gr.Markdown(self.markup_text)
|
238 |
-
with gr.Row():
|
239 |
-
target_textbox = gr.Textbox(
|
240 |
-
label="target sequence",
|
241 |
-
# info="standard",
|
242 |
-
interactive=True,
|
243 |
-
lines=3,
|
244 |
-
value=self.examples["target_seq"],
|
245 |
-
)
|
246 |
-
drug_textbox = gr.Textbox(
|
247 |
-
label="Drug sequance (in SMILES)",
|
248 |
-
# info="standard",
|
249 |
-
interactive=True,
|
250 |
-
lines=3,
|
251 |
-
value=self.examples["drug_seq"],
|
252 |
-
)
|
253 |
-
with gr.Row():
|
254 |
-
run_mammal = gr.Button(
|
255 |
-
"Run Mammal prompt for Protein-Protein Interaction", variant="primary"
|
256 |
-
)
|
257 |
-
with gr.Row():
|
258 |
-
prompt_box = gr.Textbox(label="Mammal prompt", lines=5)
|
259 |
-
|
260 |
-
with gr.Row():
|
261 |
-
decoded = gr.Textbox(label="Mammal output key")
|
262 |
-
run_mammal.click(
|
263 |
-
fn=self.create_and_run_prompt,
|
264 |
-
inputs=[model_name_widget, target_textbox, drug_textbox],
|
265 |
-
outputs=[prompt_box, decoded, gr.Number(label="binding affinity")],
|
266 |
-
)
|
267 |
-
demo.visible = False
|
268 |
-
return demo
|
269 |
-
|
270 |
-
tdi_task = DtiTask()
|
271 |
all_tasks[tdi_task.name]=tdi_task
|
272 |
|
273 |
ppi_model = MammalObjectBroker(model_path="ibm/biomed.omics.bl.sm.ma-ted-458m", task_list=[ppi_task.name])
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
2 |
from mammal.keys import *
|
|
|
3 |
|
4 |
+
from mammal_demo.demo_framework import MammalObjectBroker
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
+
from mammal_demo.ppi_task import PpiTask
|
8 |
+
from mammal_demo.dti_task import DtiTask
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
+
all_tasks = dict()
|
11 |
+
all_models= dict()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
ppi_task = PpiTask(model_dict = all_models)
|
14 |
all_tasks[ppi_task.name]=ppi_task
|
15 |
|
16 |
+
tdi_task = DtiTask(model_dict = all_models)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
all_tasks[tdi_task.name]=tdi_task
|
18 |
|
19 |
ppi_model = MammalObjectBroker(model_path="ibm/biomed.omics.bl.sm.ma-ted-458m", task_list=[ppi_task.name])
|