Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
placeholder = 'GATGCTACTGCTAGCTAATCAGTAATCACCAATGCATAAACACAACACATGCCTTCGTTCCAAAGTTTTCATTCCTCGTCATAGACTTAAAGAAGGGGCAACAAGTTCTCTACGAGTCTTCTGGACTGGACTGGCTACCCCCTCGGCCCATTCTGGCCCAGTTGCGGGCGGCCTTTCATTTAATAAATATTTCTAATAGATATAAATTATTTTATCTAATATTATTAATTTTTTTCTTATAAAACATATAAT'
|
6 |
+
model_names = ['plant-dnabert', 'plant-dnagpt', 'plant-nucleotide-transformer', 'plant-dnagemma',
|
7 |
+
'dnabert2', 'nucleotide-transformer-v2-100m', 'agront-1b']
|
8 |
+
tokenizer_type = "singlebase"
|
9 |
+
model_names = [x + '-' + tokenizer_type if x.startswith("plant") else x for x in model_names]
|
10 |
+
task_map = {
|
11 |
+
"promoter": ["Not promoter", "Core promoter"],
|
12 |
+
"conservation": ["Not conserved", "Conserved"],
|
13 |
+
"H3K27ac": ["Not H3K27ac", "H3K27ac"],
|
14 |
+
"H3K27me3": ["Not H3K27me3", "H3K27me3"],
|
15 |
+
"H3K4me3": ["Not H3K4me3", "H3K4me3"],
|
16 |
+
"lncRNAs": ["Not lncRNA", "lncRNA"],
|
17 |
+
"open_chromatin": ['Not open chromatin', 'Full open chromatin', 'Partial open chromatin'],
|
18 |
+
}
|
19 |
+
task_lists = task_map.keys()
|
20 |
+
|
21 |
+
def inference(seq,model,task):
|
22 |
+
if not seq:
|
23 |
+
gr.Warning("No sequence provided, use the default sequence.")
|
24 |
+
seq = placeholder
|
25 |
+
# Load model and tokenizer
|
26 |
+
model_name = f'zhangtaolab/{model}-{task}'
|
27 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name,ignore_mismatched_sizes=True)
|
28 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
29 |
+
|
30 |
+
# Inference
|
31 |
+
inputs = tokenizer(seq, return_tensors='pt', padding=True, truncation=True, max_length=512)
|
32 |
+
outputs = model(**inputs)
|
33 |
+
probabilities = F.softmax(outputs.logits,dim=-1).tolist()[0]
|
34 |
+
#Map probabilities to labels
|
35 |
+
labels = task_map[task]
|
36 |
+
result = {labels[i]: probabilities[i] for i in range(len(labels))}
|
37 |
+
return result
|
38 |
+
|
39 |
+
|
40 |
+
# Create Gradio interface
|
41 |
+
with gr.Blocks() as demo:
|
42 |
+
gr.HTML(
|
43 |
+
"""
|
44 |
+
<h1 style="text-align: center;">Prediction of open chromatin regions in plant with LLMs</h1>
|
45 |
+
"""
|
46 |
+
)
|
47 |
+
with gr.Row():
|
48 |
+
drop1 = gr.Dropdown(choices=task_lists,
|
49 |
+
label="Selected Task",
|
50 |
+
interactive=False,
|
51 |
+
value='open_chromatin')
|
52 |
+
drop2 = gr.Dropdown(choices=model_names,
|
53 |
+
label="Select Model",
|
54 |
+
interactive=True,
|
55 |
+
value=model_names[0])
|
56 |
+
seq_input = gr.Textbox(label="Input Sequence", lines=6, placeholder=placeholder)
|
57 |
+
with gr.Row():
|
58 |
+
predict_btn = gr.Button("Predict",variant="primary")
|
59 |
+
clear_btn = gr.Button("Clear")
|
60 |
+
output = gr.Label(label="Predict result")
|
61 |
+
|
62 |
+
predict_btn.click(inference, inputs=[seq_input,drop2, drop1], outputs=output)
|
63 |
+
clear_btn.click(lambda: ("", None), inputs=[], outputs=[seq_input, output])
|
64 |
+
|
65 |
+
# Launch Gradio app
|
66 |
+
demo.launch()
|