Spaces:

PsalmsJava
/

emotion-detection-api

No application file

App Files Files Community

PsalmsJava commited on 24 days ago

Commit

28a1786

1 Parent(s): 12f6795

Some other change

Browse files

Files changed (12) hide show

DockerFile +30 -4
README.md +14 -17
app/__init__.py +0 -0
app/audio/__init__.py +0 -0
app/audio/processor.py +112 -0
app/auth.py +68 -0
app/config.py +66 -0
app/main.py +0 -0
app/models/__init__.py +0 -0
app/models/ensemble.py +183 -0
app/utils/logger.py +17 -0
requirements.txt +11 -4

DockerFile CHANGED Viewed

@@ -1,9 +1,35 @@
 FROM python:3.9-slim
-RUN apt-get update && apt-get install -y ffmpeg && rm -rf /var/lib/apt/lists/*
-RUN pip install fastapi uvicorn aiohttp numpy
 WORKDIR /app
-COPY main.py .
-CMD uvicorn main:app --host 0.0.0.0 --port 7860

+# Production Dockerfile for Hugging Face Spaces
 FROM python:3.9-slim
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    libsndfile1 \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Create app directory
 WORKDIR /app
+# Copy requirements first (for caching)
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY app/ ./app/
+# Hugging Face Spaces uses PORT
+ENV PORT=7860
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
+    CMD curl -f http://localhost:${PORT}/health || exit 1
+# Run the application
+CMD uvicorn app.main:app --host 0.0.0.0 --port ${PORT}

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Emotion Detection Ensemble API
 emoji: 🎭
 colorFrom: blue
 colorTo: purple
@@ -9,24 +9,21 @@ pinned: false
 license: mit
 ---
-# 🎭 Emotion Detection Ensemble API
-A production-ready emotion detection API that combines 5 state-of-the-art models for accurate emotion recognition from speech.
-## ✨ Features
-- **Ensemble Learning**: Combines 5 models with weighted voting
-- **Advanced Audio Processing**: VAD, noise reduction, format conversion
-- **Multi-Emotion Output**: Returns probability distribution across 7 emotions
-- **Secure Authentication**: Bearer token authentication
-- **Interactive Docs**: Built-in Swagger UI
-## 🚀 Quick Start
-### API Endpoints
-- `GET /health` - Health check
-- `GET /models` - List all models
-- `POST /analyze` - Analyze emotion from audio
-- `POST /analyze-batch` - Analyze multiple files
-### Authentication
-Include your API token in the header:

 ---
+title: Emotion Detection API
 emoji: 🎭
 colorFrom: blue
 colorTo: purple
 license: mit
 ---
+# 🎭 Emotion Detection API
+Production-grade emotion detection API using 5-model ensemble with JWT authentication.
+## Features
+- ✅ **5-Model Ensemble**: Weighted voting for maximum accuracy
+- ✅ **JWT Authentication**: Secure token-based access
+- ✅ **Multiple Audio Formats**: WAV, MP3, M4A, OGG, FLAC, AAC
+- ✅ **Smart Caching**: Reduces latency for repeated files
+- ✅ **Swagger Documentation**: Interactive API explorer
+- ✅ **Docker Deployment**: Ready for Hugging Face Spaces
+## Quick Start
+### Get Authentication Token
+```bash
+curl -X POST https://your-space.hf.space/auth/token?client_id=your_app

app/__init__.py ADDED Viewed

File without changes

app/audio/__init__.py ADDED Viewed

File without changes

app/audio/processor.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import os
+import tempfile
+import subprocess
+import numpy as np
+import librosa
+from fastapi import UploadFile, HTTPException
+from typing import Tuple
+import logging
+from app.config import settings
+logger = logging.getLogger(__name__)
+class AudioProcessor:
+    """Production-grade audio preprocessing"""
+    @staticmethod
+    async def validate_file(file: UploadFile) -> Tuple[bytes, str]:
+        """Validate file size and format"""
+        # Check file size
+        contents = await file.read()
+        size_mb = len(contents) / (1024 * 1024)
+        if size_mb > settings.MAX_FILE_SIZE_MB:
+            raise HTTPException(
+                status_code=413,
+                detail=f"File too large. Max {settings.MAX_FILE_SIZE_MB}MB"
+            )
+        # Check format
+        ext = file.filename.split('.')[-1].lower()
+        if ext not in settings.SUPPORTED_FORMATS:
+            raise HTTPException(
+                status_code=415,
+                detail=f"Unsupported format. Supported: {settings.SUPPORTED_FORMATS}"
+            )
+        return contents, ext
+    @staticmethod
+    async def convert_to_wav(input_bytes: bytes, input_ext: str) -> bytes:
+        """Convert audio to WAV format (16kHz, mono)"""
+        with tempfile.NamedTemporaryFile(delete=False, suffix=f".{input_ext}") as f_in:
+            f_in.write(input_bytes)
+            input_path = f_in.name
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f_out:
+            output_path = f_out.name
+        try:
+            # FFmpeg conversion
+            cmd = [
+                "ffmpeg",
+                "-i", input_path,
+                "-ar", str(settings.TARGET_SAMPLE_RATE),
+                "-ac", "1",
+                "-acodec", "pcm_s16le",
+                "-y",  # Overwrite output
+                output_path
+            ]
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=30
+            )
+            if result.returncode != 0:
+                logger.error(f"FFmpeg error: {result.stderr}")
+                raise HTTPException(
+                    status_code=422,
+                    detail="Audio conversion failed"
+                )
+            # Read converted file
+            with open(output_path, "rb") as f:
+                return f.read()
+        except subprocess.TimeoutExpired:
+            raise HTTPException(
+                status_code=408,
+                detail="Audio conversion timeout"
+            )
+        finally:
+            # Cleanup
+            for path in [input_path, output_path]:
+                if os.path.exists(path):
+                    os.unlink(path)
+    @staticmethod
+    def get_audio_info(audio_bytes: bytes) -> dict:
+        """Get audio metadata"""
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+            tmp.write(audio_bytes)
+            path = tmp.name
+        try:
+            y, sr = librosa.load(path, sr=None)
+            duration = len(y) / sr
+            return {
+                "duration_seconds": round(duration, 2),
+                "sample_rate": sr,
+                "channels": 1 if len(y.shape) == 1 else y.shape[1],
+                "samples": len(y)
+            }
+        finally:
+            os.unlink(path)
+audio_processor = AudioProcessor()

app/auth.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from datetime import datetime, timedelta
+from typing import Optional
+from fastapi import HTTPException, Security
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+import jwt
+from app.config import settings
+security = HTTPBearer()
+class AuthHandler:
+    """JWT-based authentication handler"""
+    def __init__(self):
+        self.secret_key = settings.API_SECRET_KEY
+        self.algorithm = settings.ALGORITHM
+        self.token_expiry = settings.ACCESS_TOKEN_EXPIRE_MINUTES
+    def create_token(self, client_id: str) -> str:
+        """Create JWT token for authenticated clients"""
+        expire = datetime.utcnow() + timedelta(minutes=self.token_expiry)
+        payload = {
+            "sub": client_id,
+            "exp": expire,
+            "iat": datetime.utcnow(),
+            "type": "access"
+        }
+        return jwt.encode(payload, self.secret_key, algorithm=self.algorithm)
+    def verify_token(self, credentials: HTTPAuthorizationCredentials = Security(security)) -> str:
+        """Verify JWT token and return client_id"""
+        token = credentials.credentials
+        try:
+            payload = jwt.decode(
+                token,
+                self.secret_key,
+                algorithms=[self.algorithm]
+            )
+            # Validate token type
+            if payload.get("type") != "access":
+                raise HTTPException(
+                    status_code=401,
+                    detail="Invalid token type"
+                )
+            # Check expiration
+            exp = datetime.fromtimestamp(payload.get("exp", 0))
+            if exp < datetime.utcnow():
+                raise HTTPException(
+                    status_code=401,
+                    detail="Token has expired"
+                )
+            return payload.get("sub", "anonymous")
+        except jwt.ExpiredSignatureError:
+            raise HTTPException(
+                status_code=401,
+                detail="Token has expired"
+            )
+        except jwt.InvalidTokenError:
+            raise HTTPException(
+                status_code=401,
+                detail="Invalid token"
+            )
+auth_handler = AuthHandler()

app/config.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import os
+from typing import Dict, Any
+from pydantic import BaseSettings
+class Settings(BaseSettings):
+    """Application settings with validation"""
+    # API Settings
+    API_V1_PREFIX: str = "/api/v1"
+    PROJECT_NAME: str = "Emotion Detection API"
+    VERSION: str = "1.0.0"
+    # Security - Critical: These must be set in environment
+    HF_TOKEN: str
+    API_SECRET_KEY: str
+    ALGORITHM: str = "HS256"
+    ACCESS_TOKEN_EXPIRE_MINUTES: int = 30
+    # Model Configuration
+    ENABLED_MODELS: Dict[str, Dict[str, Any]] = {
+        "emotion2vec_plus": {
+            "url": "https://api-inference.huggingface.co/models/emotion2vec/emotion2vec_plus_base",
+            "weight": 0.50,
+            "timeout": 30,
+            "enabled": True
+        },
+        "meralion_ser": {
+            "url": "https://api-inference.huggingface.co/models/MERaLiON/MERaLiON-SER-v1",
+            "weight": 0.25,
+            "timeout": 30,
+            "enabled": True
+        },
+        "wav2vec2_english": {
+            "url": "https://api-inference.huggingface.co/models/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
+            "weight": 0.15,
+            "timeout": 25,
+            "enabled": True
+        },
+        "hubert_er": {
+            "url": "https://api-inference.huggingface.co/models/superb/hubert-large-superb-er",
+            "weight": 0.07,
+            "timeout": 25,
+            "enabled": True
+        },
+        "gigam_emo": {
+            "url": "https://api-inference.huggingface.co/models/salute-developers/GigaAM-emo",
+            "weight": 0.03,
+            "timeout": 20,
+            "enabled": True
+        }
+    }
+    # Audio Processing
+    MAX_FILE_SIZE_MB: int = 10
+    SUPPORTED_FORMATS: list = ["wav", "mp3", "m4a", "ogg", "flac", "aac"]
+    TARGET_SAMPLE_RATE: int = 16000
+    # Rate Limiting
+    RATE_LIMIT_REQUESTS: int = 60
+    RATE_LIMIT_PERIOD: int = 60  # seconds
+    class Config:
+        env_file = ".env"
+        case_sensitive = True
+settings = Settings()

app/main.py ADDED Viewed

File without changes

app/models/__init__.py ADDED Viewed

File without changes

app/models/ensemble.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import asyncio
+import aiohttp
+from typing import Dict, List, Any, Optional
+from collections import defaultdict
+import logging
+from app.config import settings
+logger = logging.getLogger(__name__)
+class EmotionEnsemble:
+    """Ensemble of emotion detection models"""
+    def __init__(self):
+        self.models = settings.ENABLED_MODELS
+        self.emotion_mapping = {
+            "angry": ["angry", "ang", "anger"],
+            "happy": ["happy", "hap", "happiness", "joy"],
+            "sad": ["sad", "sadness"],
+            "fear": ["fear", "fearful"],
+            "surprise": ["surprise", "surprised"],
+            "disgust": ["disgust", "disgusted"],
+            "neutral": ["neutral", "neu"]
+        }
+    async def predict(self, audio_bytes: bytes) -> Dict[str, Any]:
+        """
+        Run ensemble prediction on audio bytes
+        Returns fused predictions from all models
+        """
+        headers = {"Authorization": f"Bearer {settings.HF_TOKEN}"}
+        async with aiohttp.ClientSession() as session:
+            # Create tasks for all enabled models
+            tasks = []
+            model_names = []
+            for name, config in self.models.items():
+                if config.get("enabled", True):
+                    tasks.append(self._query_model(
+                        session, name, config, audio_bytes, headers
+                    ))
+                    model_names.append(name)
+            # Run all tasks concurrently
+            results = await asyncio.gather(*tasks, return_exceptions=True)
+        # Process successful predictions
+        model_outputs = {}
+        for name, result in zip(model_names, results):
+            if result and not isinstance(result, Exception):
+                model_outputs[name] = result
+                logger.info(f"✓ {name} succeeded")
+            else:
+                logger.warning(f"✗ {name} failed: {result}")
+        if not model_outputs:
+            raise Exception("No models returned valid predictions")
+        # Fuse predictions
+        return self._fuse_predictions(model_outputs)
+    async def _query_model(self, session, name, config, audio_bytes, headers):
+        """Query a single model with timeout"""
+        try:
+            async with session.post(
+                config["url"],
+                headers=headers,
+                data=audio_bytes,
+                timeout=aiohttp.ClientTimeout(total=config["timeout"])
+            ) as response:
+                if response.status == 200:
+                    return await response.json()
+                elif response.status == 503:
+                    # Model loading - wait and retry once
+                    await asyncio.sleep(2)
+                    async with session.post(
+                        config["url"],
+                        headers=headers,
+                        data=audio_bytes
+                    ) as retry:
+                        if retry.status == 200:
+                            return await retry.json()
+                logger.warning(f"{name} returned {response.status}")
+                return None
+        except asyncio.TimeoutError:
+            logger.warning(f"{name} timeout")
+            return None
+        except Exception as e:
+            logger.warning(f"{name} error: {e}")
+            return None
+    def _fuse_predictions(self, model_outputs: Dict[str, List]) -> Dict[str, Any]:
+        """Fuse predictions using weighted voting"""
+        emotion_scores = defaultdict(float)
+        total_weight = 0.0
+        model_contributions = []
+        for name, predictions in model_outputs.items():
+            weight = self.models[name]["weight"]
+            total_weight += weight
+            contribution = {
+                "model": name,
+                "weight": weight,
+                "predictions": []
+            }
+            for pred in predictions:
+                label = pred.get("label", "").lower()
+                score = pred.get("score", 0.0)
+                # Map to standard emotions
+                mapped = self._map_emotion(label)
+                contribution["predictions"].append({
+                    "original": label,
+                    "mapped": mapped,
+                    "score": score
+                })
+                emotion_scores[mapped] += score * weight
+            model_contributions.append(contribution)
+        # Normalize scores
+        if total_weight > 0:
+            emotion_scores = {
+                k: v / total_weight
+                for k, v in emotion_scores.items()
+            }
+        # Get primary emotion
+        if emotion_scores:
+            primary = max(emotion_scores.items(), key=lambda x: x[1])
+        else:
+            primary = ("unknown", 0.0)
+        return {
+            "primary_emotion": primary[0],
+            "confidence": round(primary[1], 4),
+            "all_emotions": {
+                k: round(v, 4)
+                for k, v in sorted(
+                    emotion_scores.items(),
+                    key=lambda x: x[1],
+                    reverse=True
+                )
+            },
+            "ensemble_details": {
+                "models_used": list(model_outputs.keys()),
+                "total_models": len(self.models),
+                "model_contributions": model_contributions
+            }
+        }
+    def _map_emotion(self, label: str) -> str:
+        """Map model-specific label to standard emotion"""
+        label_lower = label.lower()
+        for std_emo, variations in self.emotion_mapping.items():
+            if any(var in label_lower for var in variations):
+                return std_emo
+        # Default fallback
+        if "ang" in label_lower:
+            return "angry"
+        elif "hap" in label_lower:
+            return "happy"
+        elif "sad" in label_lower:
+            return "sad"
+        elif "neu" in label_lower:
+            return "neutral"
+        elif "fea" in label_lower:
+            return "fear"
+        elif "sur" in label_lower:
+            return "surprise"
+        elif "dis" in label_lower:
+            return "disgust"
+        return "neutral"
+ensemble = EmotionEnsemble()

app/utils/logger.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import logging
+import sys
+from app.config import settings
+def setup_logging():
+    """Configure logging for the application"""
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+        handlers=[
+            logging.StreamHandler(sys.stdout)
+        ]
+    )
+    # Set levels for noisy libraries
+    logging.getLogger("aiohttp").setLevel(logging.WARNING)
+    logging.getLogger("urllib3").setLevel(logging.WARNING)

requirements.txt CHANGED Viewed

@@ -1,4 +1,11 @@
-fastapi
-uvicorn
-aiohttp
-numpy

+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+python-jose[cryptography]==3.3.0
+passlib[bcrypt]==1.7.4
+python-multipart==0.0.6
+aiohttp==3.9.1
+librosa==0.10.1
+soundfile==0.12.1
+numpy==1.24.3
+pydantic==1.10.13
+python-dotenv==1.0.0