Spaces:

Kaiyeee
/

SentIntel

Sleeping

File size: 2,971 Bytes

57ccc66
 
 
 
 
 
 
 
1b1eae0
57ccc66
1b1eae0
57ccc66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b1eae0
 
57ccc66
 
 
 
 
 
 
1b1eae0
 
 
 
da0d2a4
78dfad9
1b1eae0
da0d2a4
 
1b1eae0
 
 
 
 
 
 
 
 
 
 
 
57ccc66
e4c2e74
1b1eae0
57ccc66
1b1eae0
 
 
78dfad9
1b1eae0
 
 
 
 
 
 
 
57ccc66

import os
import numpy as np
import torch
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file as load_safetensors
from transformers import AutoTokenizer
from openvino.runtime import Core
import gradio as gr
import pandas as pd

# Model & tokenizer loading (same as before)
HF_MODEL = "Kaiyeee/goemotions-multilabel"
tokenizer = AutoTokenizer.from_pretrained("roberta-base")
core = Core()
onnx_path = hf_hub_download(repo_id=HF_MODEL, filename="goemotions_multilabel.onnx")
ov_model = core.read_model(model=onnx_path)
compiled = core.compile_model(model=ov_model, device_name="CPU")

emotion_labels = [
    "admiration","amusement","anger","annoyance","approval","caring","confusion",
    "curiosity","desire","disappointment","disapproval","disgust","embarrassment",
    "excitement","fear","gratitude","grief","joy","love","nervousness","optimism",
    "pride","realization","relief","remorse","sadness","surprise","neutral"
]

def predict(texts, threshold=0.3):
    toks = tokenizer(texts, padding="max_length", truncation=True, max_length=128, return_tensors="np")
    outs = compiled([toks["input_ids"], toks["attention_mask"]])
    logits = outs[compiled.output(0)]
    probs = 1 / (1 + np.exp(-logits))
    preds = (probs > threshold).astype(int)

    results = []
    for i, ps in enumerate(preds):
        fired = [emotion_labels[j] for j, flag in enumerate(ps) if flag]
        results.append(", ".join(fired) or "none")
    return results

# Process multiline text input
def predict_bulk(texts_str, threshold=0.3):
    texts = [line.strip() for line in texts_str.split("\n") if line.strip()]
    results = predict(texts, threshold)
    formatted = "\n\n".join(f"{t}: " + ", ".join(f"**{e}**" for e in r.split(", ")) for t, r in zip(texts, results))
    return formatted



# Process CSV file upload
def predict_file(file_obj, threshold=0.3):
    df = pd.read_csv(file_obj.name)
    if 'text' not in df.columns:
        return "CSV must have a 'text' column."
    texts = df['text'].astype(str).tolist()
    results = predict(texts, threshold)
    df['emotions'] = results
    out_path = "predictions.csv"
    df.to_csv(out_path, index=False)
    return out_path

with gr.Blocks() as demo:
    gr.Markdown("# 🦄 Sentiment Analyzer for 28 different emotions!")

    thr = gr.Slider(0.1, 0.9, 0.3, label="Threshold")

    with gr.Tab("Paste Text (one per line)"):
        inp = gr.Textbox(label="Enter texts (one per line)", lines=10, placeholder="Enter sentences here")
        out = gr.Markdown(label="Predicted emotions")
        btn = gr.Button("Analyze")
        btn.click(fn=predict_bulk, inputs=[inp, thr], outputs=out)

    with gr.Tab("Upload CSV"):
        file_inp = gr.File(label="Upload CSV with a 'text' column")
        out_file = gr.File(label="Download CSV with emotions")
        file_btn = gr.Button("Analyze CSV")
        file_btn.click(fn=predict_file, inputs=[file_inp, thr], outputs=out_file)

demo.launch()