Spaces:

kluvin
/

158-mls-tweet-sentiment

Sleeping

+# Use Python 3.13 slim image
+FROM python:3.13-slim
+# Set working directory
+WORKDIR /app
+# Install uv for faster dependency management
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
+# Copy dependency files from parent directory
+COPY ../pyproject.toml /tmp/pyproject.toml
+# Install dependencies
+RUN uv pip install --system --no-cache-dir flask gunicorn transformers torch accelerate huggingface-hub
+# Copy web app files
+COPY . .
+# Download model at build time (optional - caches model in image)
+# Comment out to download on first run instead
+RUN python -c "from transformers import pipeline; pipeline('sentiment-analysis', model='kluvin/bertweet-tweet-sentiment')"
+# Expose port (7860 for HF Spaces, 5000 for local)
+EXPOSE 7860
+# Set environment variables
+ENV FLASK_APP=app.py
+ENV PYTHONUNBUFFERED=1
+ENV PORT=7860
+# Run the app with gunicorn for production
+# --preload loads models once before forking workers (saves memory)
+CMD gunicorn --preload --bind 0.0.0.0:$PORT --workers 2 --timeout 120 app:app

README.md CHANGED Viewed

@@ -1,11 +1,36 @@
 ---
-title: 158 Mls Tweet Sentiment
-emoji: 💻
-colorFrom: indigo
-colorTo: pink
 sdk: docker
 pinned: false
-short_description: Sentiment Analysis over bertweet + ML
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Tweet Sentiment Classifier
+emoji: 🐦
+colorFrom: blue
+colorTo: green
 sdk: docker
+app_port: 7860
 pinned: false
 ---
+# Tweet Sentiment Classifier
+Compare 5 different sentiment analysis models side-by-side on tweet text:
+- **BERTweet** (Transformer) - Fine-tuned from `vinai/bertweet-base`
+- **Decision Tree** (TF-IDF)
+- **Random Forest** (TF-IDF)
+- **Logistic Regression** (TF-IDF)
+- **Linear SVM** (TF-IDF)
+## How it works
+Type or paste a tweet and click "Analyze Sentiment" to see predictions from all 5 models simultaneously.
+## First run
+Models download and train on first request (~2-3 minutes). Subsequent requests are instant thanks to caching.
+## Tech Stack
+- **Frontend**: HTMX for reactive updates
+- **Backend**: Flask
+- **Models**: HuggingFace Transformers (BERTweet) + scikit-learn
+- **Data**: SetFit/tweet_sentiment_extraction dataset
+Built as a class project for DAT158 Machine Learning.

app.py ADDED Viewed

	@@ -0,0 +1,156 @@

+from flask import Flask, request, render_template
+from transformers import pipeline
+from sklearn.pipeline import Pipeline
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.svm import LinearSVC
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.tree import DecisionTreeClassifier
+import polars as pl
+import joblib
+from pathlib import Path
+import logging
+import os
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+app = Flask(__name__)
+CLASS_ID_TO_SENTIMENT = {
+    "0": "negative",
+    "1": "neutral",
+    "2": "positive"
+}
+# Use HF Spaces persistent storage if available, otherwise local cache
+CACHE_DIR = Path(os.getenv("HF_HOME", ".")) / ".model_cache"
+CACHE_DIR.mkdir(exist_ok=True)
+logger.info("Loading BERTweet from HuggingFace Hub...")
+bertweet_pipeline = pipeline("sentiment-analysis", model="kluvin/bertweet-tweet-sentiment")
+logger.info("BERTweet loaded successfully")
+# Define model configurations
+model_configs = {
+    "Decision Tree": Pipeline([
+        ("tfidf", TfidfVectorizer(max_features=2000, stop_words="english")),
+        ("clf", DecisionTreeClassifier(max_depth=10, random_state=42))
+    ]),
+    "Random Forest": Pipeline([
+        ("tfidf", TfidfVectorizer(max_features=500, stop_words="english")),
+        ("clf", RandomForestClassifier(n_estimators=100, random_state=42))
+    ]),
+    "Logistic Regression": Pipeline([
+        ("tfidf", TfidfVectorizer(max_features=2000, stop_words="english")),
+        ("clf", LogisticRegression(max_iter=1000, random_state=42))
+    ]),
+    "Linear SVM": Pipeline([
+        ("tfidf", TfidfVectorizer(max_features=2000, stop_words="english")),
+        ("clf", LinearSVC(random_state=42))
+    ])
+}
+sklearn_pipelines = {}
+cache_file = CACHE_DIR / "ml_models.joblib"
+if cache_file.exists():
+    logger.info("Loading cached ML models...")
+    try:
+        sklearn_pipelines = joblib.load(cache_file)
+        logger.info("✓ Cached models loaded successfully!")
+    except Exception as e:
+        logger.error(f"Failed to load cache: {e}")
+        logger.info("Will retrain models...")
+if not sklearn_pipelines:
+    logger.info("Loading training data and training ML models...")
+    splits = {'train': 'train.jsonl'}
+    df = pl.read_ndjson('hf://datasets/SetFit/tweet_sentiment_extraction/' + splits['train'])
+    X_train = df['text'].to_list()
+    y_train = df['label'].to_list()
+    logger.info("Training models...")
+    for model_name, sklearn_pipeline in model_configs.items():
+        logger.info(f"  Training {model_name}...")
+        sklearn_pipeline.fit(X_train, y_train)
+        sklearn_pipelines[model_name] = sklearn_pipeline
+    logger.info("Saving models to cache...")
+    joblib.dump(sklearn_pipelines, cache_file)
+    logger.info(f"✓ Models cached at {cache_file}")
+logger.info("All models loaded and ready!")
+def render_model_result(model_name: str, sentiment_name: str, probability: float | None) -> str:
+    probability_text = f"Probability: {probability:.2%}" if probability else "N/A"
+    return f'''
+        <div class="model-result {sentiment_name}">
+            <h3>{model_name}</h3>
+            <p class="sentiment">{sentiment_name.capitalize()}</p>
+            <p class="confidence">{probability_text}</p>
+        </div>
+    '''
+@app.route('/')
+def home():
+    return render_template('index.html')
+@app.route('/classify', methods=['POST'])
+def clasify():
+    try:
+        text_input = request.form['text']
+        if not text_input.strip():
+            return '''
+                <div class="result error">
+                    <h2>Error: Please enter some text</h2>
+                </div>
+            '''
+        logger.info(f"Classifying: {text_input[:50]}...")
+        results_html = ""
+        pipeline_output = bertweet_pipeline(text_input)[0]
+        predicted_class_id = pipeline_output['label']
+        probability = pipeline_output['score']
+        sentiment_name = CLASS_ID_TO_SENTIMENT[predicted_class_id]
+        results_html += render_model_result("BERTweet (Transformer)", sentiment_name, probability)
+        for model_name, sklearn_pipeline in sklearn_pipelines.items():
+            inputs = [text_input]
+            predicted_class = sklearn_pipeline.predict(inputs)[0]
+            classifier = sklearn_pipeline.named_steps['clf']
+            if hasattr(classifier, 'predict_proba'):
+                class_probabilities = sklearn_pipeline.predict_proba(inputs)[0]
+                probability = class_probabilities.max()
+            elif hasattr(classifier, 'decision_function'):
+                decision_scores = sklearn_pipeline.decision_function(inputs)[0]
+                probability = 1.0 / (1.0 + abs(decision_scores.min()))
+            else:
+                probability = None
+            sentiment_name = CLASS_ID_TO_SENTIMENT[str(predicted_class)]
+            results_html += render_model_result(model_name, sentiment_name, probability)
+        return f'<div class="results-grid">{results_html}</div>'
+    except Exception as e:
+        logger.error(f"Classification error: {e}", exc_info=True)
+        return f'''
+            <div class="result error">
+                <h2>Error: {e}</h2>
+            </div>
+        '''
+if __name__ == "__main__":
+    if app.debug:
+        logger.setLevel(logging.DEBUG)
+    app.run(debug=True)

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,11 @@

+version: '3.8'
+services:
+  web:
+    build: .
+    ports:
+      - "5000:7860"
+    environment:
+      - PYTHONUNBUFFERED=1
+      - PORT=7860
+    restart: unless-stopped

static/images/bird.png ADDED Viewed

static/index.css ADDED Viewed

	@@ -0,0 +1,134 @@

+body {
+    font-family: 'Segoe UI', sans-serif;
+    background: linear-gradient(135deg, #74ebd5, #9face6);
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    height: 100vh;
+    margin: 0;
+}
+.container {
+    background-color: white;
+    padding: 40px;
+    border-radius: 15px;
+    box-shadow: 0 8px 20px rgba(0, 0, 0, 0.2);
+    width: 90%;
+    max-width: 1200px;
+    text-align: center;
+}
+textarea {
+    width: 100%;
+    padding: 10px;
+    font-size: 1rem;
+    border-radius: 8px;
+    border: 1px solid #ccc;
+    resize: none;
+}
+button {
+    background-color: #007bff;
+    color: white;
+    border: none;
+    padding: 10px 20px;
+    margin-top: 10px;
+    border-radius: 8px;
+    font-size: 1rem;
+    cursor: pointer;
+}
+button:hover {
+    background-color: #0056b3;
+}
+.result {
+    margin-top: 20px;
+    padding: 10px;
+    border-radius: 8px;
+}
+.positive {
+    color: #00953e;
+}
+.negative {
+    color: #b00400;
+}
+.neutral {
+    color: #c99e00;
+}
+.bird {
+    width: 25px;
+}
+/* Results grid layout */
+.results-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+    gap: 15px;
+    margin-top: 20px;
+}
+.model-result {
+    padding: 15px;
+    border-radius: 8px;
+    border: 2px solid;
+    text-align: center;
+}
+.model-result h3 {
+    margin: 0 0 10px 0;
+    font-size: 1rem;
+    color: #333;
+}
+.model-result .sentiment {
+    font-size: 1.3rem;
+    font-weight: bold;
+    margin: 5px 0;
+}
+.model-result .confidence {
+    font-size: 0.9rem;
+    color: #666;
+    margin: 5px 0;
+}
+/* Sentiment-specific background colors */
+.model-result.positive {
+    background-color: #d4edda;
+    border-color: #28a745;
+}
+.model-result.positive .sentiment {
+    color: #00953e;
+}
+.model-result.negative {
+    background-color: #f8d7da;
+    border-color: #dc3545;
+}
+.model-result.negative .sentiment {
+    color: #b00400;
+}
+.model-result.neutral {
+    background-color: #fff3cd;
+    border-color: #ffc107;
+}
+.model-result.neutral .sentiment {
+    color: #c99e00;
+}
+.result.error {
+    background-color: #f8d7da;
+    border: 2px solid #dc3545;
+    padding: 15px;
+    border-radius: 8px;
+    text-align: center;
+}

templates/index.html ADDED Viewed

	@@ -0,0 +1,22 @@

+<!DOCTYPE html>
+<html>
+<head>
+    <title>Tweet Sentiment Classifier</title>
+    <link rel="stylesheet" href="{{ url_for('static', filename='index.css') }}">
+    <script src="https://unpkg.com/htmx.org@1.9.10"></script>
+</head>
+<body>
+    <div class="container">
+        <h1><img class="bird" src="../{{ url_for('static', filename='images/bird.png') }}"> Tweet Sentiment Classifier</h1>
+        <p style="text-align: center; color: #666; margin-bottom: 20px;">
+        </p>
+        <form hx-post="/classify" hx-target="#result">
+            <textarea name="text" rows="4" placeholder="Type or paste a tweet..." required></textarea>
+            <br>
+            <button type="submit">Analyze Sentiment</button>
+        </form>
+        <div id="result"></div>
+    </div>
+</body>
+</html>