Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
import numpy as np | |
from scipy.special import softmax | |
import csv | |
import urllib.request | |
# Preprocess text (username and link placeholders) | |
def preprocess(text): | |
new_text = [] | |
for t in text.split(" "): | |
t = '@user' if t.startswith('@') and len(t) > 1 else t | |
t = 'http' if t.startswith('http') else t | |
new_text.append(t) | |
return " ".join(new_text) | |
def classify_text(text): | |
# Tasks: emoji, emotion, hate, irony, offensive, sentiment | |
# stance/abortion, stance/atheism, stance/climate, stance/feminist, stance/hillary | |
task = 'emoji' | |
MODEL = f"cardiffnlp/twitter-roberta-base-{task}" | |
tokenizer = AutoTokenizer.from_pretrained(MODEL) | |
# Download label mapping | |
labels = [] | |
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt" | |
with urllib.request.urlopen(mapping_link) as f: | |
html = f.read().decode('utf-8').split("\n") | |
csvreader = csv.reader(html, delimiter='\t') | |
labels = [row[1] for row in csvreader if len(row) > 1] | |
# Load model | |
model = AutoModelForSequenceClassification.from_pretrained(MODEL) | |
text = preprocess(text) | |
encoded_input = tokenizer(text, return_tensors='pt') | |
output = model(**encoded_input) | |
scores = output.logits[0].detach().numpy() | |
scores = softmax(scores) | |
ranking = np.argsort(scores) | |
ranking = ranking[::-1] | |
results = [] | |
for i in range(scores.shape[0]): | |
label = labels[ranking[i]] | |
score = scores[ranking[i]] | |
result = f"{i+1}) {label} {np.round(float(score), 4)}" | |
results.append(result) | |
return results | |
iface = gr.Interface( | |
fn=classify_text, | |
inputs="text", | |
outputs="text", | |
title="Text Classification", | |
description="Classify the text into different categories.", | |
example="Looking forward to Christmas" | |
) | |
iface.launch() | |