import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import numpy as np
import torch

BASE_MODEL = "AlekseyDorkin/xlm-roberta-en-ru-emoji"
TOP_N = 5

model = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)

def preprocess(text):
    new_text = []
    for t in text.split(" "):
        t = '@user' if t.startswith('@') and len(t) > 1 else t
        t = 'http' if t.startswith('http') else t
        new_text.append(t)
    return " ".join(new_text)
    
def get_top_emojis(text, top_n=TOP_N):
    preprocessed = preprocess(text)
    inputs = tokenizer(preprocessed, return_tensors="pt")
    preds = model(**inputs).logits
    scores = torch.nn.functional.softmax(preds, dim=-1).detach().numpy()
    ranking = np.argsort(scores)
    ranking = ranking.squeeze()[::-1][:top_n]
    emojis = [model.config.id2label[i] for i in ranking]
    return emojis
    
    
gradio_ui = gr.Interface(
    fn=get_top_emojis,
    title="Predicting emojis for tweets",
    description="Enter a tweet to predict emojis",
    inputs=[
        gr.inputs.Textbox(lines=3, label="Paste a tweet here"),
    ],
    outputs=[
        gr.outputs.Textbox(label=f"Predicted emojis")
    ],
    examples=[
        ["Awesome!"], ["Круто!"], ["lol"]
    ],
    enable_queue=True,
    allow_screenshot=False,
    allow_flagging=False
)

gradio_ui.launch(debug=True)