|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import torch |
|
|
|
|
|
model_name = "data-silence/rus-news-classifier" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model = model.to(device) |
|
model.eval() |
|
|
|
|
|
id2label = { |
|
0: 'climate', 1: 'conflicts', 2: 'culture', 3: 'economy', 4: 'gloss', |
|
5: 'health', 6: 'politics', 7: 'science', 8: 'society', 9: 'sports', 10: 'travel' |
|
} |
|
|
|
def predict(text): |
|
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device) |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
predicted_label_id = outputs.logits.argmax(-1).item() |
|
predicted_label = id2label[predicted_label_id] |
|
|
|
|
|
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) |
|
probs_dict = {id2label[i]: float(prob) for i, prob in enumerate(probabilities[0])} |
|
|
|
return predicted_label, probs_dict |
|
|
|
|
|
iface = gr.Interface( |
|
fn=predict, |
|
inputs=gr.Textbox(lines=5, label="Enter news text | Введите текст новости"), |
|
outputs=[ |
|
gr.Label(label="Predicted category | Предсказанная категория"), |
|
gr.Label(label="Category probabilities | Вероятности категорий") |
|
], |
|
title="News Classifier | Классификатор новостей", |
|
description="Enter the news text in any language and the model will predict its category. | Введите текст новости на любом языке, и модель предскажет её категорию" |
|
) |
|
|
|
iface.launch() |