|
import os |
|
import numpy as np |
|
import torch |
|
from huggingface_hub import hf_hub_download |
|
from safetensors.torch import load_file as load_safetensors |
|
from transformers import AutoTokenizer |
|
from openvino.runtime import Core |
|
import gradio as gr |
|
import pandas as pd |
|
|
|
|
|
HF_MODEL = "Kaiyeee/goemotions-multilabel" |
|
tokenizer = AutoTokenizer.from_pretrained("roberta-base") |
|
core = Core() |
|
onnx_path = hf_hub_download(repo_id=HF_MODEL, filename="goemotions_multilabel.onnx") |
|
ov_model = core.read_model(model=onnx_path) |
|
compiled = core.compile_model(model=ov_model, device_name="CPU") |
|
|
|
emotion_labels = [ |
|
"admiration","amusement","anger","annoyance","approval","caring","confusion", |
|
"curiosity","desire","disappointment","disapproval","disgust","embarrassment", |
|
"excitement","fear","gratitude","grief","joy","love","nervousness","optimism", |
|
"pride","realization","relief","remorse","sadness","surprise","neutral" |
|
] |
|
|
|
def predict(texts, threshold=0.3): |
|
toks = tokenizer(texts, padding="max_length", truncation=True, max_length=128, return_tensors="np") |
|
outs = compiled([toks["input_ids"], toks["attention_mask"]]) |
|
logits = outs[compiled.output(0)] |
|
probs = 1 / (1 + np.exp(-logits)) |
|
preds = (probs > threshold).astype(int) |
|
|
|
results = [] |
|
for i, ps in enumerate(preds): |
|
fired = [emotion_labels[j] for j, flag in enumerate(ps) if flag] |
|
results.append(", ".join(fired) or "none") |
|
return results |
|
|
|
|
|
def predict_bulk(texts_str, threshold=0.3): |
|
texts = [line.strip() for line in texts_str.split("\n") if line.strip()] |
|
results = predict(texts, threshold) |
|
formatted = "\n\n".join(f"{t}: " + ", ".join(f"**{e}**" for e in r.split(", ")) for t, r in zip(texts, results)) |
|
return formatted |
|
|
|
|
|
|
|
|
|
def predict_file(file_obj, threshold=0.3): |
|
df = pd.read_csv(file_obj.name) |
|
if 'text' not in df.columns: |
|
return "CSV must have a 'text' column." |
|
texts = df['text'].astype(str).tolist() |
|
results = predict(texts, threshold) |
|
df['emotions'] = results |
|
out_path = "predictions.csv" |
|
df.to_csv(out_path, index=False) |
|
return out_path |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# 🦄 Sentiment Analyzer for 28 different emotions!") |
|
|
|
thr = gr.Slider(0.1, 0.9, 0.3, label="Threshold") |
|
|
|
with gr.Tab("Paste Text (one per line)"): |
|
inp = gr.Textbox(label="Enter texts (one per line)", lines=10, placeholder="Enter sentences here") |
|
out = gr.Markdown(label="Predicted emotions") |
|
btn = gr.Button("Analyze") |
|
btn.click(fn=predict_bulk, inputs=[inp, thr], outputs=out) |
|
|
|
with gr.Tab("Upload CSV"): |
|
file_inp = gr.File(label="Upload CSV with a 'text' column") |
|
out_file = gr.File(label="Download CSV with emotions") |
|
file_btn = gr.Button("Analyze CSV") |
|
file_btn.click(fn=predict_file, inputs=[file_inp, thr], outputs=out_file) |
|
|
|
demo.launch() |