import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification import numpy as np import torch BASE_MODEL = "AlekseyDorkin/xlm-roberta-en-ru-emoji" TOP_N = 5 model = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL) tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) def preprocess(text): new_text = [] for t in text.split(" "): t = '@user' if t.startswith('@') and len(t) > 1 else t t = 'http' if t.startswith('http') else t new_text.append(t) return " ".join(new_text) def get_top_emojis(text, top_n=TOP_N): preprocessed = preprocess(text) inputs = tokenizer(preprocessed, return_tensors="pt") preds = model(**inputs).logits scores = torch.nn.functional.softmax(preds, dim=-1).detach().numpy() ranking = np.argsort(scores) ranking = ranking.squeeze()[::-1][:top_n] emojis = [model.config.id2label[i] for i in ranking] return emojis gradio_ui = gr.Interface( fn=get_top_emojis, title="Predicting emojis for tweets", description="Enter a tweet to predict emojis", inputs=[ gr.inputs.Textbox(lines=3, label="Paste a tweet here"), ], outputs=[ gr.outputs.Textbox(label=f"Predicted emojis") ], examples=[ ["Awesome!"], ["Круто!"], ["lol"] ], enable_queue=True, allow_screenshot=False, allow_flagging=False ) gradio_ui.launch(debug=True)