import gradio as gr
from transformers import AutoModel, AutoTokenizer, AutoModelForTokenClassification
import torch
import numpy as np
import torch.nn.functional as F

tokenizer = AutoTokenizer.from_pretrained("./checkpoint-final/")
model = AutoModelForTokenClassification.from_pretrained("./checkpoint-final/")
model = model.eval()

examples = [
    ["GSHMSDNEDNFDGDDFDDVEEDEGLDDLENAEEEGQENVEILPSGERPQANQKRITTPYMTKYERARVLGTRALQIAMCAPVMVELEGETDPLLIAMKELKARKIPIIIRRYLPDGSYEDWGVDELIITD"]]

def get_out(sent):
    encoded = tokenizer.encode_plus(sent, return_tensors="pt")
    with torch.no_grad():
        output = model(**encoded)
    output = F.softmax(torch.squeeze(output['logits']))[1:-1,1].detach().numpy()
    output = np.array2string(output, precision=4, separator=',',
                      suppress_small=True)
    return output

gr.Interface(
    get_out,
    [
        gr.components.Textbox(label="Input Amino Acid Sequence", placeholder = " Amino acid sequence here ...")
    ],
    ["text"],
    examples=examples,
    title="DR-BERT: A Protein Language Model to Predict Disordered Regions",
    description="The app uses DR-BERT to predict disordered regions in proteins. Outputs generated are the probability that a residue is disordered."
).launch()