| |
| """ |
| Sentiment Analysis App for HuggingFace Spaces |
| Clean, robust implementation with proper visualization |
| """ |
|
|
| import gradio as gr |
| import torch |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| import numpy as np |
| import plotly.graph_objects as go |
| import pandas as pd |
| from typing import Dict, List, Tuple |
| import logging |
|
|
| |
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
|
|
| |
| |
| |
|
|
| class SentimentAnalyzer: |
| """Production-ready sentiment analyzer""" |
| |
| def __init__(self, model_name: str = "MartinRodrigo/distilbert-sentiment-imdb"): |
| self.model_name = model_name |
| self.tokenizer = None |
| self.model = None |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" |
| self.load_model() |
| |
| def load_model(self): |
| """Load model from HuggingFace Hub""" |
| try: |
| logger.info(f"Loading model: {self.model_name}") |
| self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) |
| self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name) |
| self.model.to(self.device) |
| self.model.eval() |
| logger.info(f"β Model loaded successfully on {self.device}") |
| except Exception as e: |
| logger.error(f"β Error loading model: {e}") |
| logger.info("Falling back to base DistilBERT model...") |
| self.model_name = "distilbert-base-uncased-finetuned-sst-2-english" |
| self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) |
| self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name) |
| self.model.to(self.device) |
| self.model.eval() |
| |
| def predict(self, text: str) -> Dict: |
| """Predict sentiment for a single text""" |
| if not text or not text.strip(): |
| return { |
| "sentiment": "ERROR", |
| "confidence": 0.0, |
| "probabilities": {"Negative": 0.5, "Positive": 0.5}, |
| "error": "Please enter some text" |
| } |
| |
| try: |
| |
| inputs = self.tokenizer( |
| text, |
| return_tensors="pt", |
| truncation=True, |
| max_length=512, |
| padding=True |
| ) |
| inputs = {k: v.to(self.device) for k, v in inputs.items()} |
| |
| |
| with torch.no_grad(): |
| outputs = self.model(**inputs) |
| probs = torch.nn.functional.softmax(outputs.logits, dim=-1) |
| |
| |
| probs_cpu = probs.cpu().numpy()[0] |
| predicted_class = int(np.argmax(probs_cpu)) |
| confidence = float(probs_cpu[predicted_class]) |
| |
| sentiment = "POSITIVE" if predicted_class == 1 else "NEGATIVE" |
| |
| return { |
| "sentiment": sentiment, |
| "confidence": confidence, |
| "probabilities": { |
| "Negative": float(probs_cpu[0]), |
| "Positive": float(probs_cpu[1]) |
| }, |
| "error": None |
| } |
| |
| except Exception as e: |
| logger.error(f"Prediction error: {e}") |
| return { |
| "sentiment": "ERROR", |
| "confidence": 0.0, |
| "probabilities": {"Negative": 0.5, "Positive": 0.5}, |
| "error": str(e) |
| } |
| |
| def predict_batch(self, texts: List[str]) -> List[Dict]: |
| """Predict sentiment for multiple texts""" |
| return [self.predict(text) for text in texts if text.strip()] |
|
|
| |
| |
| |
|
|
| def create_probability_chart(probabilities: Dict[str, float]) -> go.Figure: |
| """Create a bar chart for sentiment probabilities""" |
| fig = go.Figure() |
| |
| sentiments = list(probabilities.keys()) |
| values = list(probabilities.values()) |
| colors = ["#ff4444" if s == "Negative" else "#44ff44" for s in sentiments] |
| |
| fig.add_trace(go.Bar( |
| x=sentiments, |
| y=values, |
| marker_color=colors, |
| text=[f"{v:.1%}" for v in values], |
| textposition='outside', |
| textfont=dict(size=14, color='white'), |
| hovertemplate='<b>%{x}</b><br>Probability: %{y:.2%}<extra></extra>' |
| )) |
| |
| fig.update_layout( |
| title={ |
| 'text': "Sentiment Probability Distribution", |
| 'x': 0.5, |
| 'xanchor': 'center', |
| 'font': {'size': 16, 'color': 'white'} |
| }, |
| xaxis_title="Sentiment", |
| yaxis_title="Probability", |
| yaxis_range=[0, 1], |
| height=350, |
| template="plotly_dark", |
| showlegend=False, |
| margin=dict(t=60, b=60, l=60, r=40), |
| paper_bgcolor='rgba(0,0,0,0)', |
| plot_bgcolor='rgba(0,0,0,0)' |
| ) |
| |
| return fig |
|
|
| def create_batch_chart(results: List[Dict], texts: List[str]) -> go.Figure: |
| """Create a bar chart for batch analysis results""" |
| fig = go.Figure() |
| |
| text_labels = [f"Text {i+1}" for i in range(len(results))] |
| confidences = [r['confidence'] for r in results] |
| sentiments = [r['sentiment'] for r in results] |
| colors = ["#ff4444" if s == "NEGATIVE" else "#44ff44" for s in sentiments] |
| |
| |
| hover_texts = [t[:100] + "..." if len(t) > 100 else t for t in texts] |
| |
| fig.add_trace(go.Bar( |
| x=text_labels, |
| y=confidences, |
| marker_color=colors, |
| text=[f"{c:.1%}" for c in confidences], |
| textposition='outside', |
| textfont=dict(size=12, color='white'), |
| customdata=list(zip(sentiments, hover_texts)), |
| hovertemplate='<b>%{x}</b><br>' + |
| 'Sentiment: %{customdata[0]}<br>' + |
| 'Confidence: %{y:.1%}<br>' + |
| 'Text: %{customdata[1]}<extra></extra>' |
| )) |
| |
| fig.update_layout( |
| title={ |
| 'text': "Batch Analysis Results", |
| 'x': 0.5, |
| 'xanchor': 'center', |
| 'font': {'size': 16, 'color': 'white'} |
| }, |
| xaxis_title="Text Number", |
| yaxis_title="Confidence", |
| yaxis_range=[0, 1.1], |
| height=400, |
| template="plotly_dark", |
| showlegend=False, |
| margin=dict(t=60, b=80, l=60, r=40), |
| paper_bgcolor='rgba(0,0,0,0)', |
| plot_bgcolor='rgba(0,0,0,0)' |
| ) |
| |
| return fig |
|
|
| |
| |
| |
|
|
| def analyze_text(text: str) -> Tuple[str, str, go.Figure]: |
| """Analyze single text and return formatted results""" |
| result = analyzer.predict(text) |
| |
| if result['error']: |
| return ( |
| f"β οΈ **Error:** {result['error']}", |
| "0.0", |
| create_probability_chart({"Negative": 0.5, "Positive": 0.5}) |
| ) |
| |
| |
| emoji = "π" if result['sentiment'] == "POSITIVE" else "π" |
| sentiment_text = f"{emoji} **{result['sentiment']}**" |
| confidence_text = f"{result['confidence']:.1%}" |
| |
| |
| chart = create_probability_chart(result['probabilities']) |
| |
| return sentiment_text, confidence_text, chart |
|
|
| def analyze_batch(text_input: str) -> Tuple[str, go.Figure]: |
| """Analyze multiple texts""" |
| if not text_input or not text_input.strip(): |
| empty_fig = go.Figure() |
| empty_fig.update_layout( |
| template="plotly_dark", |
| paper_bgcolor='rgba(0,0,0,0)', |
| annotations=[{ |
| 'text': "Please enter texts to analyze", |
| 'xref': 'paper', |
| 'yref': 'paper', |
| 'x': 0.5, |
| 'y': 0.5, |
| 'showarrow': False, |
| 'font': {'size': 16, 'color': 'gray'} |
| }] |
| ) |
| return "Please enter texts (one per line)", empty_fig |
| |
| |
| texts = [line.strip() for line in text_input.split('\n') if line.strip()] |
| |
| if not texts: |
| empty_fig = go.Figure() |
| empty_fig.update_layout( |
| template="plotly_dark", |
| paper_bgcolor='rgba(0,0,0,0)', |
| annotations=[{ |
| 'text': "No valid texts found", |
| 'xref': 'paper', |
| 'yref': 'paper', |
| 'x': 0.5, |
| 'y': 0.5, |
| 'showarrow': False, |
| 'font': {'size': 16, 'color': 'gray'} |
| }] |
| ) |
| return "No valid texts found", empty_fig |
| |
| |
| results = analyzer.predict_batch(texts) |
| |
| |
| summary_lines = ["### π Analysis Results\n"] |
| for i, (text, result) in enumerate(zip(texts, results), 1): |
| emoji = "π" if result['sentiment'] == "POSITIVE" else "π" |
| text_preview = text[:60] + "..." if len(text) > 60 else text |
| summary_lines.append( |
| f"{i}. {emoji} **{result['sentiment']}** ({result['confidence']:.1%}) - *{text_preview}*" |
| ) |
| |
| summary = "\n".join(summary_lines) |
| |
| |
| chart = create_batch_chart(results, texts) |
| |
| return summary, chart |
|
|
| |
| |
| |
|
|
| logger.info("Initializing sentiment analyzer...") |
| analyzer = SentimentAnalyzer() |
|
|
| |
| |
| |
|
|
| SINGLE_EXAMPLES = [ |
| ["This movie is absolutely fantastic! I loved every minute of it."], |
| ["Terrible experience. Waste of time and money."], |
| ["The product is okay, nothing special but it works."], |
| ["Best purchase ever! Highly recommend to everyone!"], |
| ["Disappointed with the quality. Expected much better."] |
| ] |
|
|
| BATCH_EXAMPLE = """This movie is absolutely fantastic! I loved every minute of it. |
| Terrible experience. Waste of time and money. |
| The product is okay, nothing special but it works. |
| Best purchase ever! Highly recommend to everyone!""" |
|
|
| |
| |
| |
|
|
| with gr.Blocks( |
| theme=gr.themes.Soft( |
| primary_hue="blue", |
| secondary_hue="green", |
| ), |
| css=""" |
| .gradio-container { |
| max-width: 1200px !important; |
| } |
| #component-0 { |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
| } |
| """ |
| ) as demo: |
| |
| |
| gr.Markdown(""" |
| # π Sentiment Analysis with DistilBERT |
| |
| Analyze text sentiment with a fine-tuned transformer model trained on IMDB reviews. |
| |
| **Model:** DistilBERT | **Accuracy:** 80% | **F1:** 0.7981 |
| """) |
| |
| |
| with gr.Tabs(): |
| |
| with gr.TabItem("π Single Analysis"): |
| gr.Markdown("### Analyze individual texts") |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| single_input = gr.Textbox( |
| label="Enter your text", |
| placeholder="Type or paste your text here...", |
| lines=6, |
| max_lines=10 |
| ) |
| single_btn = gr.Button( |
| "π Analyze", |
| variant="primary", |
| size="lg" |
| ) |
| |
| gr.Examples( |
| examples=SINGLE_EXAMPLES, |
| inputs=single_input, |
| label="Try these examples:" |
| ) |
| |
| with gr.Column(scale=1): |
| single_sentiment = gr.Markdown( |
| label="Result", |
| value="*Results will appear here*" |
| ) |
| single_confidence = gr.Textbox( |
| label="Confidence Score", |
| interactive=False |
| ) |
| single_plot = gr.Plot(label="Probability Distribution") |
| |
| |
| with gr.TabItem("π Batch Processing"): |
| gr.Markdown("### Process multiple texts at once (one per line)") |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| batch_input = gr.Textbox( |
| label="Enter multiple texts (one per line)", |
| placeholder="Enter texts, one per line...", |
| lines=10, |
| value=BATCH_EXAMPLE |
| ) |
| batch_btn = gr.Button( |
| "π Process Batch", |
| variant="primary", |
| size="lg" |
| ) |
| |
| with gr.Column(scale=1): |
| batch_results = gr.Markdown( |
| label="Results Summary", |
| value="*Results will appear here*" |
| ) |
| batch_plot = gr.Plot(label="Batch Analytics") |
| |
| |
| with gr.TabItem("βΉοΈ About"): |
| gr.Markdown(""" |
| ## About This Model |
| |
| ### ποΈ Architecture |
| - **Base Model:** DistilBERT (Distilled BERT) |
| - **Parameters:** 66 million |
| - **Training Data:** IMDB Movie Reviews (50k reviews) |
| - **Fine-tuning:** Binary sentiment classification |
| |
| ### π Performance Metrics |
| - **Test Accuracy:** 80.0% |
| - **F1 Score:** 0.7981 |
| - **Precision:** High |
| - **Recall:** Balanced |
| |
| ### β‘ Features |
| - Fast inference (~100ms per prediction) |
| - Batch processing support |
| - Interactive visualizations |
| - Production-ready deployment |
| |
| ### π Resources |
| - **Model Repository:** [MartinRodrigo/distilbert-sentiment-imdb](https://huggingface.co/MartinRodrigo/distilbert-sentiment-imdb) |
| - **Space:** [transformer-sentiment-analysis](https://huggingface.co/spaces/MartinRodrigo/transformer-sentiment-analysis) |
| - **GitHub:** [ransformer-sentiment-analysis](https://github.com/mrdesautu/ransformer-sentiment-analysis) |
| |
| ### π οΈ Tech Stack |
| - **Framework:** PyTorch + Transformers |
| - **UI:** Gradio |
| - **Visualization:** Plotly |
| - **Tracking:** MLflow (local development) |
| |
| --- |
| |
| Built with β€οΈ by Martin Rodrigo |
| """) |
| |
| |
| single_btn.click( |
| fn=analyze_text, |
| inputs=[single_input], |
| outputs=[single_sentiment, single_confidence, single_plot] |
| ) |
| |
| batch_btn.click( |
| fn=analyze_batch, |
| inputs=[batch_input], |
| outputs=[batch_results, batch_plot] |
| ) |
|
|
| |
| |
| |
|
|
| if __name__ == "__main__": |
| demo.launch( |
| share=False, |
| show_error=True |
| ) |
|
|