|
import gradio as gr |
|
import pandas as pd |
|
|
|
import io |
|
|
|
from Bio import SeqIO |
|
from tape import ProteinBertModel, ProteinBertConfig, TAPETokenizer |
|
from tape.models import modeling_bert |
|
import numpy as np |
|
import torch |
|
|
|
|
|
tokenizer = TAPETokenizer(vocab='iupac') |
|
config=modeling_bert.ProteinBertConfig(num_hidden_layers=5,num_attention_heads=8,hidden_size=400) |
|
|
|
bert_model = torch.load('models/transformer1500_95p_500.pt') |
|
class_model=torch.load('models/down_model_500_kfold1.pt') |
|
|
|
bert_model=bert_model.module |
|
bert_model=bert_model.to("cpu") |
|
bert_model=bert_model.eval() |
|
|
|
|
|
def func(name): |
|
|
|
translation_table = str.maketrans("", "", " \t\n\r\f\v") |
|
name = name.translate(translation_table) |
|
token_ids = torch.tensor([tokenizer.encode(name)]) |
|
token_ids = token_ids |
|
bert_output = bert_model(token_ids) |
|
class_output=class_model(bert_output[1]) |
|
class_output = torch.softmax(class_output, dim=1) |
|
cluster = torch.argmax(class_output, dim=1) + 1 |
|
cluster=cluster.item() |
|
|
|
return "cluster "+str(cluster) |
|
|
|
|
|
def func_mult(name): |
|
sequence_list = process_fasta(name) |
|
|
|
|
|
output=[] |
|
for i in range(0, len(sequence_list), 1): |
|
output.append(func(sequence_list[i])) |
|
result = "\n".join(output) |
|
return result |
|
|
|
|
|
def process_fasta(fasta_content): |
|
sequences = [] |
|
fasta_file = io.StringIO(fasta_content) |
|
for record in SeqIO.parse(fasta_file, "fasta"): |
|
sequences.append(str(record.seq)) |
|
return sequences |
|
|
|
|
|
def read_fasta_file(file_path): |
|
sequences = [] |
|
for seq_record in SeqIO.parse(file_path, "fasta"): |
|
sequences.append(str(seq_record.seq)) |
|
return sequences |
|
|
|
|
|
def func_file(file_path): |
|
sequence_list = read_fasta_file(file_path) |
|
output=[] |
|
for i in range(0, len(sequence_list), 1): |
|
output.append(func(sequence_list[i])) |
|
result = "\n".join(output) |
|
return result |
|
|
|
|
|
def upload_file(files): |
|
file_paths = [file.name for file in files] |
|
return file_paths[0] |
|
|
|
|
|
def save_to_txt(data): |
|
|
|
file_name="output.txt" |
|
with open(file_name, mode='w') as file: |
|
file.write(data) |
|
|
|
|
|
return file_name |
|
|
|
|
|
css = """ |
|
.gradio-container {background-color: #EDEFF7} |
|
.button {background-color: #515D90; color:#FFFFFF} |
|
.feedback {font-size: 36px} |
|
""" |
|
|
|
with gr.Blocks(css=css, title="GH29 Prediction", theme=gr.themes.Soft()) as demo: |
|
gr.Markdown("GH29 Prediction", elem_classes="feedback") |
|
|
|
|
|
|
|
with gr.Tab("Single sequence input"): |
|
with gr.Row(): |
|
single_input = gr.Textbox(lines=10, placeholder="Please input sequence data (note: do not input fasta data)", label="Input") |
|
single_output = gr.Textbox(lines=10, label="Output", show_copy_button=True) |
|
single_button = gr.Button("Predict", elem_classes="button") |
|
|
|
with gr.Tab("Multiple sequence input"): |
|
multiple_input = gr.Textbox(lines=10, placeholder="Please enter multiple sequence data separated by line breaks (do not enter fasta data)", label="Input") |
|
multiple_button = gr.Button("Predict", elem_classes="button") |
|
multiple_output = gr.Textbox(lines=10, label="Output", show_copy_button=True) |
|
|
|
with gr.Tab("FASTA input"): |
|
with gr.Row(): |
|
file_upload = gr.File(label="Fasta File", interactive=False, scale=2) |
|
file_output_textbox = gr.Textbox(lines=15, label="Output", scale=3, container=True, autoscroll=True, show_copy_button=True) |
|
file_output_file = gr.File(label="Output File", scale=2) |
|
with gr.Row(): |
|
upload_button = gr.UploadButton("Click to Upload a File", file_types=["fasta"], scale=2, size="sm", file_count="multiple") |
|
upload_button.upload(upload_file, upload_button, file_upload) |
|
file_button = gr.Button("Predict", scale=3, size="lg", elem_classes="button") |
|
file_button_GenerateFile = gr.Button("Save to File", scale=2, size="sm") |
|
|
|
single_button.click(func, inputs=single_input, outputs=single_output) |
|
multiple_button.click(func_mult, inputs=multiple_input, outputs=multiple_output) |
|
file_button.click(func_file, inputs=file_upload, outputs=file_output_textbox) |
|
file_button_GenerateFile.click(save_to_txt, inputs=file_output_textbox, outputs=file_output_file) |
|
|
|
|
|
demo.launch(share=True) |