|
|
""" |
|
|
Gradio app for Polish Twitter Emotion Classifier. |
|
|
|
|
|
This application provides an interactive interface for predicting emotions |
|
|
and sentiment in Polish text using a fine-tuned RoBERTa model. |
|
|
|
|
|
Environment Variables: |
|
|
HF_TOKEN: HuggingFace authentication token (required for private models and auto-logging) |
|
|
export HF_TOKEN=your_huggingface_token |
|
|
|
|
|
HF_DATASET_REPO: HuggingFace dataset name for storing predictions (optional) |
|
|
export HF_DATASET_REPO=your-username/predictions-dataset |
|
|
Default: "twitter-emotion-pl-feedback" |
|
|
|
|
|
Features: |
|
|
- Multi-label emotion and sentiment classification |
|
|
- Calibrated predictions with temperature scaling |
|
|
- Automatic prediction logging to HuggingFace datasets |
|
|
- Persistent data storage across space restarts |
|
|
""" |
|
|
|
|
|
import gradio as gr |
|
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer |
|
|
import torch |
|
|
import numpy as np |
|
|
import json |
|
|
import os |
|
|
import re |
|
|
from datetime import datetime |
|
|
from datasets import Dataset, load_dataset |
|
|
from huggingface_hub import HfApi |
|
|
|
|
|
|
|
|
|
|
|
MODEL_NAME = "yazoniak/twitter-emotion-pl-classifier" |
|
|
MAX_LENGTH = 8192 |
|
|
DEFAULT_THRESHOLD = 0.5 |
|
|
|
|
|
|
|
|
HF_TOKEN = os.environ.get("HF_TOKEN", None) |
|
|
|
|
|
|
|
|
|
|
|
HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "twitter-emotion-pl-feedback") |
|
|
|
|
|
|
|
|
LABEL_EMOJIS = { |
|
|
"radość": "😊", |
|
|
"wstręt": "🤢", |
|
|
"gniew": "😠", |
|
|
"przeczuwanie": "🤔", |
|
|
"pozytywny": "👍", |
|
|
"negatywny": "👎", |
|
|
"neutralny": "😐", |
|
|
"sarkazm": "😏", |
|
|
} |
|
|
|
|
|
|
|
|
class HFDatasetLogger: |
|
|
""" |
|
|
Custom logger that saves predictions to a HuggingFace dataset. |
|
|
|
|
|
This provides persistent storage across space restarts by storing data |
|
|
directly to a HuggingFace dataset repository. |
|
|
""" |
|
|
|
|
|
def __init__(self, dataset_name: str, hf_token: str, private: bool = True): |
|
|
""" |
|
|
Initialize the HuggingFace dataset logger. |
|
|
|
|
|
Args: |
|
|
dataset_name: Name of the dataset (e.g., "username/dataset-name") |
|
|
hf_token: HuggingFace authentication token |
|
|
private: Whether to create a private dataset |
|
|
""" |
|
|
self.dataset_name = dataset_name |
|
|
self.hf_token = hf_token |
|
|
self.private = private |
|
|
self.api = HfApi() |
|
|
self.dataset_exists = False |
|
|
|
|
|
|
|
|
try: |
|
|
load_dataset(dataset_name, split="train", token=hf_token, streaming=True) |
|
|
self.dataset_exists = True |
|
|
except Exception: |
|
|
self.dataset_exists = False |
|
|
|
|
|
def log( |
|
|
self, |
|
|
text: str, |
|
|
mode: str, |
|
|
threshold: float, |
|
|
anonymize: bool, |
|
|
predictions: str, |
|
|
json_output: str, |
|
|
) -> None: |
|
|
""" |
|
|
Log a prediction to the HuggingFace dataset. |
|
|
|
|
|
Args: |
|
|
text: Input text |
|
|
mode: Prediction mode |
|
|
threshold: Threshold value |
|
|
anonymize: Anonymization setting |
|
|
predictions: Prediction output (markdown) |
|
|
json_output: JSON output with scores |
|
|
""" |
|
|
try: |
|
|
|
|
|
data_entry = { |
|
|
"timestamp": datetime.utcnow().isoformat(), |
|
|
"text": text, |
|
|
"mode": mode, |
|
|
"threshold": float(threshold), |
|
|
"anonymize": bool(anonymize), |
|
|
"predictions": predictions, |
|
|
"json_output": json_output, |
|
|
} |
|
|
|
|
|
|
|
|
new_data = Dataset.from_dict({k: [v] for k, v in data_entry.items()}) |
|
|
|
|
|
if self.dataset_exists: |
|
|
|
|
|
try: |
|
|
existing_dataset = load_dataset( |
|
|
self.dataset_name, split="train", token=self.hf_token |
|
|
) |
|
|
from datasets import concatenate_datasets |
|
|
|
|
|
combined_dataset = concatenate_datasets([existing_dataset, new_data]) |
|
|
combined_dataset.push_to_hub( |
|
|
self.dataset_name, |
|
|
token=self.hf_token, |
|
|
private=self.private, |
|
|
) |
|
|
except Exception as e: |
|
|
print(f"⚠ Error appending to dataset: {e}") |
|
|
|
|
|
new_data.push_to_hub( |
|
|
self.dataset_name, |
|
|
token=self.hf_token, |
|
|
private=self.private, |
|
|
) |
|
|
self.dataset_exists = True |
|
|
else: |
|
|
|
|
|
new_data.push_to_hub( |
|
|
self.dataset_name, token=self.hf_token, private=self.private |
|
|
) |
|
|
self.dataset_exists = True |
|
|
|
|
|
except Exception as e: |
|
|
print(f"⚠ Error logging to HuggingFace dataset: {e}") |
|
|
|
|
|
|
|
|
def preprocess_text(text: str, anonymize_mentions: bool = True) -> str: |
|
|
""" |
|
|
Preprocess input text by anonymizing mentions. |
|
|
|
|
|
Args: |
|
|
text: Input text to preprocess |
|
|
anonymize_mentions: Whether to replace @mentions with @anonymized_account |
|
|
|
|
|
Returns: |
|
|
Preprocessed text |
|
|
""" |
|
|
if anonymize_mentions: |
|
|
text = re.sub(r"@\w+", "@anonymized_account", text) |
|
|
return text |
|
|
|
|
|
|
|
|
def load_model(): |
|
|
""" |
|
|
Load the model, tokenizer, and calibration artifacts. |
|
|
|
|
|
For private models, requires HF_TOKEN environment variable to be set. |
|
|
|
|
|
Returns: |
|
|
tuple: (model, tokenizer, labels, calibration_artifacts) |
|
|
""" |
|
|
print(f"Loading model: {MODEL_NAME}") |
|
|
|
|
|
if HF_TOKEN: |
|
|
print(f"Using authentication token for model: {MODEL_NAME}") |
|
|
model = AutoModelForSequenceClassification.from_pretrained( |
|
|
MODEL_NAME, token=HF_TOKEN |
|
|
) |
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN) |
|
|
else: |
|
|
print(f"Loading public model: {MODEL_NAME}") |
|
|
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) |
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
|
|
|
|
model.eval() |
|
|
|
|
|
|
|
|
labels = [model.config.id2label[i] for i in range(model.config.num_labels)] |
|
|
|
|
|
|
|
|
calibration_artifacts = None |
|
|
try: |
|
|
|
|
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
calib_path = hf_hub_download( |
|
|
repo_id=MODEL_NAME, filename="calibration_artifacts.json", token=HF_TOKEN |
|
|
) |
|
|
with open(calib_path, "r") as f: |
|
|
calibration_artifacts = json.load(f) |
|
|
print("✓ Calibration artifacts loaded") |
|
|
except Exception as e: |
|
|
print(f"⚠ Could not load calibration artifacts: {e}") |
|
|
print(" Calibrated mode will not be available") |
|
|
|
|
|
return model, tokenizer, labels, calibration_artifacts |
|
|
|
|
|
|
|
|
|
|
|
print("Loading model...") |
|
|
model, tokenizer, labels, calibration_artifacts = load_model() |
|
|
print(f"✓ Model loaded successfully with {len(labels)} labels") |
|
|
print(f" Labels: {', '.join(labels)}") |
|
|
|
|
|
|
|
|
hf_logger = None |
|
|
if HF_TOKEN: |
|
|
try: |
|
|
hf_logger = HFDatasetLogger( |
|
|
dataset_name=HF_DATASET_REPO, |
|
|
hf_token=HF_TOKEN, |
|
|
private=True, |
|
|
) |
|
|
print(f"✓ Auto-logging enabled - all predictions will be saved to: {HF_DATASET_REPO}") |
|
|
if hf_logger.dataset_exists: |
|
|
print(" Dataset found - will append new predictions") |
|
|
else: |
|
|
print(" Dataset will be created on first prediction") |
|
|
except Exception as e: |
|
|
print(f"⚠ Could not initialize auto-logging: {e}") |
|
|
print(" Predictions will not be logged") |
|
|
else: |
|
|
print("⚠ HF_TOKEN not set - auto-logging disabled") |
|
|
|
|
|
|
|
|
def predict_emotions( |
|
|
text: str, |
|
|
mode: str = "Calibrated", |
|
|
threshold: float = DEFAULT_THRESHOLD, |
|
|
anonymize: bool = True, |
|
|
) -> tuple[str, str]: |
|
|
""" |
|
|
Predict emotions and sentiment for Polish text. |
|
|
|
|
|
Automatically logs all predictions to HuggingFace dataset if flagging is enabled. |
|
|
|
|
|
Args: |
|
|
text: Input Polish text |
|
|
mode: Prediction mode ("Simple" or "Calibrated") |
|
|
threshold: Classification threshold (0-1) - used only in Simple mode |
|
|
anonymize: Whether to anonymize @mentions |
|
|
|
|
|
Returns: |
|
|
tuple: (formatted_predictions, all_scores_json) |
|
|
""" |
|
|
|
|
|
if not text or not text.strip(): |
|
|
return "⚠️ Please enter some text to analyze", "" |
|
|
|
|
|
|
|
|
processed_text = preprocess_text(text, anonymize_mentions=anonymize) |
|
|
text_changed = processed_text != text |
|
|
|
|
|
|
|
|
if mode == "Calibrated" and calibration_artifacts is None: |
|
|
return ( |
|
|
"⚠️ Calibrated mode not available (calibration artifacts not found). Please use Default mode.", |
|
|
"", |
|
|
) |
|
|
|
|
|
|
|
|
if mode == "Default" and (threshold < 0 or threshold > 1): |
|
|
return "⚠️ Threshold must be between 0 and 1", "" |
|
|
|
|
|
|
|
|
inputs = tokenizer( |
|
|
processed_text, return_tensors="pt", truncation=True, max_length=MAX_LENGTH |
|
|
) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model(**inputs) |
|
|
logits = outputs.logits.squeeze().numpy() |
|
|
|
|
|
|
|
|
if mode == "Calibrated": |
|
|
temperatures = calibration_artifacts["temperatures"] |
|
|
optimal_thresholds = calibration_artifacts["optimal_thresholds"] |
|
|
|
|
|
probabilities = [] |
|
|
predictions = [] |
|
|
used_thresholds = [] |
|
|
|
|
|
for i, label in enumerate(labels): |
|
|
temp = temperatures[label] |
|
|
thresh = optimal_thresholds[label] |
|
|
|
|
|
calibrated_logit = logits[i] / temp |
|
|
prob = 1 / (1 + np.exp(-calibrated_logit)) |
|
|
|
|
|
probabilities.append(prob) |
|
|
predictions.append(prob > thresh) |
|
|
used_thresholds.append(thresh) |
|
|
|
|
|
probabilities = np.array(probabilities) |
|
|
else: |
|
|
probabilities = 1 / (1 + np.exp(-logits)) |
|
|
predictions = probabilities > threshold |
|
|
used_thresholds = [threshold] * len(labels) |
|
|
|
|
|
|
|
|
assigned_labels = [labels[i] for i in range(len(labels)) if predictions[i]] |
|
|
|
|
|
|
|
|
result_text = "# Detected Labels\n\n" |
|
|
|
|
|
|
|
|
if assigned_labels: |
|
|
for label in assigned_labels: |
|
|
emoji = LABEL_EMOJIS.get(label, "🏷️") |
|
|
idx = labels.index(label) |
|
|
result_text += f"## {emoji} **{label}** `{probabilities[idx]:.1%}`\n\n" |
|
|
else: |
|
|
result_text += "## No Labels Detected\n\n" |
|
|
result_text += "All confidence scores are below the threshold(s).\n\n" |
|
|
|
|
|
result_text += "---\n\n" |
|
|
|
|
|
|
|
|
emotions = ["radość", "wstręt", "gniew", "przeczuwanie"] |
|
|
sentiments = ["pozytywny", "negatywny", "neutralny"] |
|
|
special = ["sarkazm"] |
|
|
|
|
|
|
|
|
result_text += "<details>\n" |
|
|
result_text += "<summary><b>📊 All Scores (click to expand)</b></summary>\n\n" |
|
|
|
|
|
if text_changed and anonymize: |
|
|
result_text += f"**Preprocessed text:** _{processed_text}_\n\n" |
|
|
|
|
|
result_text += f"**Original text:** {text}\n\n" |
|
|
result_text += f"**Mode:** {mode}" |
|
|
if mode == "Default": |
|
|
result_text += f" (threshold: {threshold:.2f})" |
|
|
result_text += "\n\n" |
|
|
|
|
|
|
|
|
result_text += "**Emotions:**\n\n" |
|
|
for label in emotions: |
|
|
if label in labels: |
|
|
idx = labels.index(label) |
|
|
emoji = LABEL_EMOJIS.get(label, "🏷️") |
|
|
status = "✓" if predictions[idx] else "·" |
|
|
thresh_info = ( |
|
|
f" (threshold: {used_thresholds[idx]:.2f})" |
|
|
if mode == "Calibrated" |
|
|
else "" |
|
|
) |
|
|
result_text += f"{status} {emoji} {label:15s}: {probabilities[idx]:.4f}{thresh_info}\n\n" |
|
|
|
|
|
|
|
|
result_text += "**Sentiment:**\n\n" |
|
|
for label in sentiments: |
|
|
if label in labels: |
|
|
idx = labels.index(label) |
|
|
emoji = LABEL_EMOJIS.get(label, "🏷️") |
|
|
status = "✓" if predictions[idx] else "·" |
|
|
thresh_info = ( |
|
|
f" (threshold: {used_thresholds[idx]:.2f})" |
|
|
if mode == "Calibrated" |
|
|
else "" |
|
|
) |
|
|
result_text += f"{status} {emoji} {label:15s}: {probabilities[idx]:.4f}{thresh_info}\n\n" |
|
|
|
|
|
|
|
|
result_text += "**Special:**\n\n" |
|
|
for label in special: |
|
|
if label in labels: |
|
|
idx = labels.index(label) |
|
|
emoji = LABEL_EMOJIS.get(label, "🏷️") |
|
|
status = "✓" if predictions[idx] else "·" |
|
|
thresh_info = ( |
|
|
f" (threshold: {used_thresholds[idx]:.2f})" |
|
|
if mode == "Calibrated" |
|
|
else "" |
|
|
) |
|
|
result_text += f"{status} {emoji} {label:15s}: {probabilities[idx]:.4f}{thresh_info}\n\n" |
|
|
|
|
|
result_text += "</details>" |
|
|
|
|
|
|
|
|
all_scores = {label: float(probabilities[i]) for i, label in enumerate(labels)} |
|
|
json_output = { |
|
|
"assigned_labels": assigned_labels, |
|
|
"all_scores": all_scores, |
|
|
"mode": mode, |
|
|
"text_length": len(text), |
|
|
"preprocessed": text_changed, |
|
|
} |
|
|
|
|
|
if mode == "Calibrated": |
|
|
json_output["temperatures"] = calibration_artifacts["temperatures"] |
|
|
json_output["optimal_thresholds"] = calibration_artifacts["optimal_thresholds"] |
|
|
else: |
|
|
json_output["threshold"] = threshold |
|
|
|
|
|
all_scores_json = json.dumps(json_output, indent=2, ensure_ascii=False) |
|
|
|
|
|
|
|
|
if hf_logger: |
|
|
try: |
|
|
hf_logger.log( |
|
|
text=text, |
|
|
mode=mode, |
|
|
threshold=threshold, |
|
|
anonymize=anonymize, |
|
|
predictions=result_text, |
|
|
json_output=all_scores_json, |
|
|
) |
|
|
except Exception as e: |
|
|
print(f"⚠ Error logging prediction: {e}") |
|
|
|
|
|
return result_text, all_scores_json |
|
|
|
|
|
|
|
|
|
|
|
examples = [ |
|
|
["@zgp_intervillage Uwielbiam czekać na peronie 3 godziny! Gratulacje dla #zgp"], |
|
|
] |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks( |
|
|
title="Polish Twitter Emotion Classifier", theme=gr.themes.Soft() |
|
|
) as demo: |
|
|
gr.Markdown(""" |
|
|
# 🎭 Polish Twitter Emotion Classifier |
|
|
|
|
|
This **[model](https://huggingface.co/yazoniak/twitter-emotion-pl-classifier)** predicts emotions and sentiment in Polish text using a fine-tuned **[PKOBP/polish-roberta-8k](https://huggingface.co/PKOBP/polish-roberta-8k)** model. |
|
|
|
|
|
**Detected labels:** |
|
|
- **Emotions**: 😊 radość (joy), 🤢 wstręt (disgust), 😠 gniew (anger), 🤔 przeczuwanie (anticipation) |
|
|
- **Sentiment**: 👍 pozytywny (positive), 👎 negatywny (negative), 😐 neutralny (neutral) |
|
|
- **Special**: 😏 sarkazm (sarcasm) |
|
|
|
|
|
The model uses **multi-label classification** - text can have multiple emotions/sentiments simultaneously. |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=2): |
|
|
text_input = gr.Textbox( |
|
|
label="Tweet to Analyze", |
|
|
placeholder="e.g., Wspaniały dzień! Jestem bardzo szczęśliwy :)", |
|
|
lines=4, |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
mode_input = gr.Radio( |
|
|
choices=["Calibrated", "Default"], |
|
|
value="Calibrated", |
|
|
label="Prediction Mode", |
|
|
info="Calibrated uses optimal thresholds per label (recommended)", |
|
|
) |
|
|
|
|
|
anonymize_input = gr.Checkbox( |
|
|
value=True, |
|
|
label="Anonymize @mentions", |
|
|
info="Replace @username with @anonymized_account", |
|
|
) |
|
|
|
|
|
threshold_input = gr.Slider( |
|
|
minimum=0.0, |
|
|
maximum=1.0, |
|
|
value=DEFAULT_THRESHOLD, |
|
|
step=0.05, |
|
|
label="Threshold (Default mode only)", |
|
|
info="Only used when Default mode is selected", |
|
|
) |
|
|
|
|
|
predict_btn = gr.Button("Analyze Emotions", variant="primary", size="lg") |
|
|
|
|
|
with gr.Column(scale=3): |
|
|
prediction_output = gr.Markdown(label="Predictions") |
|
|
|
|
|
with gr.Accordion("Detailed JSON Output", open=False): |
|
|
json_output = gr.Code(label="Full Prediction Details", language="json") |
|
|
|
|
|
|
|
|
predict_btn.click( |
|
|
fn=predict_emotions, |
|
|
inputs=[text_input, mode_input, threshold_input, anonymize_input], |
|
|
outputs=[prediction_output, json_output], |
|
|
) |
|
|
|
|
|
|
|
|
gr.Markdown("### Example Input") |
|
|
gr.Examples( |
|
|
examples=examples, |
|
|
inputs=[text_input], |
|
|
outputs=[prediction_output, json_output], |
|
|
fn=predict_emotions, |
|
|
cache_examples=False, |
|
|
) |
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
### Model Performance |
|
|
|
|
|
| Metric | Validation Score | |
|
|
|--------|------------------| |
|
|
| F1 Macro | 0.85 | |
|
|
| F1 Micro | 0.89 | |
|
|
| F1 Weighted | 0.89 | |
|
|
| Subset Accuracy | 0.89 | |
|
|
|
|
|
### How to Use |
|
|
|
|
|
1. **Enter Polish text**: Paste a tweet, social media post, or any Polish text |
|
|
2. **Select mode**: |
|
|
- **Calibrated** (recommended): Uses temperature scaling and optimal thresholds per label |
|
|
- **Default**: Uses a single threshold for all labels |
|
|
3. **Adjust settings**: Toggle mention anonymization, adjust threshold (Default mode) |
|
|
4. **Click Analyze**: Get emotion and sentiment predictions with confidence scores |
|
|
|
|
|
### Prediction Modes |
|
|
|
|
|
- **Calibrated Mode** (Recommended): Uses temperature scaling and label-specific optimal thresholds for better accuracy and calibration. This mode is recommended for most use cases. |
|
|
- **Default Mode**: Uses sigmoid activation with a single threshold across all labels. Useful for quick predictions or when you want uniform threshold control. |
|
|
|
|
|
|
|
|
### Limitations |
|
|
|
|
|
- Model is trained on Polish Twitter data and works best with informal social media text |
|
|
- May not generalize well to formal Polish text (news, academic writing) |
|
|
- Optimal for tweet-length texts (not very long documents) |
|
|
- Multi-label nature means texts can have seemingly contradictory labels (e.g., sarkazm + pozytywny) |
|
|
|
|
|
### Citation |
|
|
|
|
|
If you use this model, please cite: |
|
|
```bibtex |
|
|
@model{yazoniak2025twitteremotionpl, |
|
|
author = {yazoniak}, |
|
|
title = {Polish Twitter Emotion Classifier}, |
|
|
year = {2025}, |
|
|
publisher = {Hugging Face}, |
|
|
url = {https://huggingface.co/yazoniak/twitter-emotion-pl-classifier} |
|
|
} |
|
|
``` |
|
|
|
|
|
### 📄 License |
|
|
|
|
|
GPL-3.0 License |
|
|
|
|
|
--- |
|
|
|
|
|
### 📊 Data Collection Notice |
|
|
|
|
|
This space automatically logs all predictions for model improvement and research purposes. The collected data includes: |
|
|
- Input text and analysis settings |
|
|
- Model predictions and confidence scores |
|
|
|
|
|
All data is stored securely in a private HuggingFace dataset and used solely for improving the model's performance. |
|
|
""") |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|