Spaces:

PsalmsJava
/

emotion-detection-api

No application file

App Files Files Community

PsalmsJava commited on 24 days ago

Commit

8ef2cca

1 Parent(s): 95c58ff

Changed Everything to 2 Files

Browse files

Files changed (3) hide show

DockerFile +22 -11
app.py +187 -135
requirements.txt +8 -11

DockerFile CHANGED Viewed

@@ -1,26 +1,37 @@
 FROM python:3.9-slim
-# THIS IS CRITICAL - Create the required user
 RUN useradd -m -u 1000 user
 USER user
-# Set environment variables
-ENV PYTHONUNBUFFERED=1 \
-    PYTHONDONTWRITEBYTECODE=1 \
-    PATH="/home/user/.local/bin:$PATH"
 WORKDIR /app
-# Copy requirements first
 COPY --chown=user requirements.txt .
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
-# Copy application code
-COPY --chown=user app/ ./app/
-COPY --chown=user main.py .
-# Hugging Face Spaces requires port 7860
 ENV PORT=7860
 # Run the application
-CMD uvicorn main:app --host 0.0.0.0 --port ${PORT}

 FROM python:3.9-slim
+# Create required user
 RUN useradd -m -u 1000 user
 USER user
+# Set environment
+ENV PATH="/home/user/.local/bin:$PATH" \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1
 WORKDIR /app
+# Install system dependencies (as root, then switch back)
+USER root
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    libsndfile1 \
+    && rm -rf /var/lib/apt/lists/*
+USER user
+# Copy requirements first (better caching)
 COPY --chown=user requirements.txt .
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Copy application
+COPY --chown=user app.py .
+# Hugging Face requires port 7860
 ENV PORT=7860
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
+    CMD curl -f http://localhost:${PORT}/health || exit 1
 # Run the application
+CMD uvicorn app:app --host 0.0.0.0 --port ${PORT}

app.py CHANGED Viewed

@@ -1,149 +1,201 @@
 import os
 import tempfile
 import subprocess
-from fastapi import FastAPI, File, UploadFile, HTTPException
-from fastapi.responses import JSONResponse
 import aiohttp
 import numpy as np
-from datetime import datetime
-import logging
-# Setup
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-app = FastAPI(title="Emotion Detection API", docs_url="/docs")
-# Config - get from environment
-HF_TOKEN = os.getenv("HF_TOKEN", "")
-API_TOKEN = os.getenv("API_TOKEN", "test123")
-# Models - using only 2 for reliability
-MODELS = {
-    "wav2vec2_english": {
-        "url": "https://api-inference.huggingface.co/models/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
-        "weight": 0.7,
-    },
-    "gigam_emo": {
-        "url": "https://api-inference.huggingface.co/models/salute-developers/GigaAM-emo",
-        "weight": 0.3,
     }
-}
-# Emotion mapping
-EMOTION_MAPPING = {
-    "angry": ["angry", "ang"],
-    "happy": ["happy", "hap"],
-    "sad": ["sad"],
-    "fear": ["fear"],
-    "surprise": ["surprise"],
-    "disgust": ["disgust"],
-    "neutral": ["neutral", "neu"]
-}
-@app.get("/health")
-async def health():
-    return {"status": "ok", "hf_token": bool(HF_TOKEN)}
 @app.get("/")
-async def root():
-    return {
-        "message": "Emotion Detection API",
-        "docs": "/docs",
-        "endpoints": ["POST /analyze"]
-    }
 @app.post("/analyze")
-async def analyze(file: UploadFile = File(...)):
-    """Analyze emotion from audio file"""
-    # Check auth header
-    auth = file.headers.get("authorization", "")
-    if not auth or auth.replace("Bearer ", "") != API_TOKEN:
-        return JSONResponse(
-            status_code=401,
-            content={"error": "Invalid or missing Authorization header"}
-        )
-    # Save uploaded file
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-        content = await file.read()
-        tmp.write(content)
-        input_path = tmp.name
-    try:
-        # Convert to proper format
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as out:
-            output_path = out.name
-        subprocess.run([
-            "ffmpeg", "-i", input_path,
-            "-ar", "16000", "-ac", "1",
-            "-y", output_path
-        ], check=True, capture_output=True)
-        # Read converted file
-        with open(output_path, "rb") as f:
-            audio_bytes = f.read()
-        # Query models
-        headers = {"Authorization": f"Bearer {HF_TOKEN}"}
-        results = {}
-        async with aiohttp.ClientSession() as session:
-            for name, config in MODELS.items():
-                try:
-                    async with session.post(
-                        config["url"],
-                        headers=headers,
-                        data=audio_bytes,
-                        timeout=10
-                    ) as resp:
-                        if resp.status == 200:
-                            results[name] = await resp.json()
-                except Exception as e:
-                    logger.warning(f"{name} failed: {e}")
-        # Simple ensemble
-        emotion_scores = {}
-        total_weight = 0
-        for name, predictions in results.items():
-            weight = MODELS[name]["weight"]
-            total_weight += weight
-            for pred in predictions:
-                label = pred.get("label", "").lower()
-                score = pred.get("score", 0)
-                # Map to standard emotions
-                for std_emo, variations in EMOTION_MAPPING.items():
-                    if any(v in label for v in variations):
-                        emotion_scores[std_emo] = emotion_scores.get(std_emo, 0) + score * weight
-                        break
-        # Normalize
-        if total_weight > 0:
-            emotion_scores = {k: v/total_weight for k, v in emotion_scores.items()}
-        # Get primary emotion
-        primary = max(emotion_scores.items(), key=lambda x: x[1]) if emotion_scores else ("unknown", 0)
-        return {
-            "primary_emotion": primary[0],
-            "confidence": round(primary[1], 3),
-            "all_emotions": {k: round(v, 3) for k, v in emotion_scores.items()},
-            "models_used": list(results.keys())
-        }
-    except Exception as e:
-        logger.error(f"Error: {e}")
-        return JSONResponse(status_code=500, content={"error": str(e)})
-    finally:
-        # Cleanup
-        for path in [input_path, output_path]:
-            if os.path.exists(path):
-                os.unlink(path)
-# For Hugging Face
-from fastapi.middleware.cors import CORSMiddleware
-app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])

 import os
+import time
+import jwt
+import hashlib
 import tempfile
 import subprocess
+import logging
+import asyncio
+from datetime import datetime, timedelta, timezone
+from typing import Dict, List, Any, Optional
+from collections import defaultdict
+from contextlib import asynccontextmanager
 import aiohttp
 import numpy as np
+import librosa
+from fastapi import FastAPI, File, UploadFile, Depends, HTTPException, status
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.openapi.docs import get_swagger_ui_html
+from fastapi.openapi.utils import get_openapi
+from pydantic import BaseModel
+# ==================== CONFIGURATION ====================
+class Config:
+    HF_TOKEN = os.getenv("HF_TOKEN", "")
+    # Default secret for dev; HF Spaces should set this in Settings > Variables
+    API_SECRET_KEY = os.getenv("API_SECRET_KEY", "hf_space_default_secret_123")
+    ALGORITHM = "HS256"
+    ACCESS_TOKEN_EXPIRE_MINUTES = 30
+    MODELS = {
+        "emotion2vec_plus": {"url": "https://api-inference.huggingface.co/models/emotion2vec/emotion2vec_plus_base", "weight": 0.50, "timeout": 30, "description": "Foundation SER model"},
+        "meralion_ser": {"url": "https://api-inference.huggingface.co/models/MERaLiON/MERaLiON-SER-v1", "weight": 0.25, "timeout": 30, "description": "English/SEA optimized"},
+        "wav2vec2_english": {"url": "https://api-inference.huggingface.co/models/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition", "weight": 0.15, "timeout": 25, "description": "English fine-tuned"},
+        "hubert_er": {"url": "https://api-inference.huggingface.co/models/superb/hubert-large-superb-er", "weight": 0.07, "timeout": 25, "description": "Acoustic specialist"},
+        "gigam_emo": {"url": "https://api-inference.huggingface.co/models/salute-developers/GigaAM-emo", "weight": 0.03, "timeout": 20, "description": "Acoustic pattern expert"}
     }
+    MAX_FILE_SIZE_MB = 10
+    SUPPORTED_FORMATS = ["wav", "mp3", "m4a", "ogg", "flac", "aac"]
+    TARGET_SAMPLE_RATE = 16000
+    MAX_DURATION_SECONDS = 30
+    EMOTION_MAPPING = {
+        "angry": ["angry", "ang", "anger"],
+        "happy": ["happy", "hap", "happiness", "joy"],
+        "sad": ["sad", "sadness"],
+        "fear": ["fear", "fearful"],
+        "surprise": ["surprise", "surprised"],
+        "disgust": ["disgust", "disgusted"],
+        "neutral": ["neutral", "neu"]
+    }
+config = Config()
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger(__name__)
+# ==================== AUTH & UTILS ====================
+security = HTTPBearer()
+class AuthHandler:
+    @staticmethod
+    def create_token(client_id: str = "api_client") -> str:
+        expire = datetime.now(timezone.utc) + timedelta(minutes=config.ACCESS_TOKEN_EXPIRE_MINUTES)
+        payload = {"sub": client_id, "exp": expire, "iat": datetime.now(timezone.utc), "type": "access"}
+        return jwt.encode(payload, config.API_SECRET_KEY, algorithm=config.ALGORITHM)
+    @staticmethod
+    def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)) -> str:
+        token = credentials.credentials
+        try:
+            payload = jwt.decode(token, config.API_SECRET_KEY, algorithms=[config.ALGORITHM])
+            return payload.get("sub", "anonymous")
+        except Exception:
+            raise HTTPException(status_code=401, detail="Invalid or expired token")
+# ==================== CORE LOGIC ====================
+class AudioProcessor:
+    @staticmethod
+    async def validate_and_process(file: UploadFile) -> tuple:
+        contents = await file.read()
+        if len(contents) / (1024 * 1024) > config.MAX_FILE_SIZE_MB:
+            raise HTTPException(413, "File too large")
+        ext = file.filename.split('.')[-1].lower()
+        with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as f_in:
+            f_in.write(contents)
+            input_path = f_in.name
+        output_path = input_path + ".wav"
+        try:
+            cmd = ["ffmpeg", "-i", input_path, "-ar", str(config.TARGET_SAMPLE_RATE), "-ac", "1", "-y", output_path]
+            subprocess.run(cmd, capture_output=True, check=True, timeout=30)
+            y, sr = librosa.load(output_path, sr=config.TARGET_SAMPLE_RATE)
+            duration = len(y) / sr
+            if duration > config.MAX_DURATION_SECONDS:
+                raise HTTPException(400, "Audio too long")
+            with open(output_path, "rb") as f:
+                return f.read(), {"duration": round(duration, 2), "format": ext}
+        finally:
+            for p in [input_path, output_path]:
+                if os.path.exists(p): os.unlink(p)
+class EmotionEnsemble:
+    def __init__(self):
+        self.models = config.MODELS
+    async def predict(self, audio_bytes: bytes) -> Dict[str, Any]:
+        if not config.HF_TOKEN:
+            raise HTTPException(503, "HF_TOKEN missing")
+        headers = {"Authorization": f"Bearer {config.HF_TOKEN}"}
+        async with aiohttp.ClientSession() as session:
+            tasks = [self._query(session, name, m_cfg, audio_bytes, headers) for name, m_cfg in self.models.items()]
+            results = await asyncio.gather(*tasks)
+        model_outputs = {name: res for name, res in zip(self.models.keys(), results) if res}
+        if not model_outputs:
+            raise HTTPException(503, "All models failed to respond")
+        return self._fuse(model_outputs)
+    async def _query(self, session, name, cfg, data, headers):
+        try:
+            async with session.post(cfg["url"], headers=headers, data=data, timeout=cfg["timeout"]) as resp:
+                if resp.status == 200: return await resp.json()
+        except: return None
+    def _fuse(self, model_outputs):
+        scores = defaultdict(float)
+        for name, preds in model_outputs.items():
+            w = self.models[name]["weight"]
+            for p in preds:
+                label = self._map(p['label'])
+                scores[label] += p['score'] * w
+        sorted_scores = dict(sorted(scores.items(), key=lambda x: x[1], reverse=True))
+        primary = list(sorted_scores.items())[0]
+        return {"primary_emotion": primary[0], "confidence": round(primary[1], 3), "all_emotions": sorted_scores}
+    def _map(self, label: str) -> str:
+        label = label.lower()
+        for std, vars in config.EMOTION_MAPPING.items():
+            if any(v in label for v in vars): return std
+        return "neutral"
+# ==================== APP SETUP ====================
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    logger.info("🚀 API Starting Up...")
+    yield
+    logger.info("🛑 API Shutting Down...")
+app = FastAPI(title="Emotion API", lifespan=lifespan, docs_url=None)
+auth_handler = AuthHandler()
+audio_proc = AudioProcessor()
+ensemble = EmotionEnsemble()
+cache = {}
 @app.get("/")
+async def root(): return {"message": "Emotion API Active", "docs": "/docs"}
+@app.get("/auth/token")
+async def get_token(client_id: str = "api_client"):
+    return {"access_token": auth_handler.create_token(client_id)}
 @app.post("/analyze")
+async def analyze(file: UploadFile = File(...), user: str = Depends(auth_handler.verify_token)):
+    content = await file.read()
+    await file.seek(0) # Reset for the processor
+    ckey = hashlib.md5(content).hexdigest()
+    if ckey in cache: return cache[ckey]
+    audio_bytes, info = await audio_proc.validate_and_process(file)
+    res = await ensemble.predict(audio_bytes)
+    res.update({"audio_info": info, "user": user})
+    if len(cache) < 100: cache[ckey] = res
+    return res
+@app.get("/docs", include_in_schema=False)
+async def custom_docs():
+    return get_swagger_ui_html(openapi_url="/openapi.json", title="API Docs")
+@app.get("/openapi.json", include_in_schema=False)
+async def get_open_api_endpoint():
+    if app.openapi_schema: return app.openapi_schema
+    schema = get_openapi(title="Emotion Ensemble API", version="1.0.0", routes=app.routes)
+    schema["components"]["securitySchemes"] = {
+        "bearerAuth": {"type": "http", "scheme": "bearer", "bearerFormat": "JWT"}
+    }
+    schema["security"] = [{"bearerAuth": []}]
+    app.openapi_schema = schema
+    return schema
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))

requirements.txt CHANGED Viewed

@@ -1,11 +1,8 @@
-fastapi==0.104.1
-uvicorn[standard]==0.24.0
-python-jose[cryptography]==3.3.0
-passlib[bcrypt]==1.7.4
-python-multipart==0.0.6
-aiohttp==3.9.1
-librosa==0.10.1
-soundfile==0.12.1
-numpy==1.24.3
-pydantic==1.10.13
-python-dotenv==1.0.0

+fastapi
+uvicorn
+python-multipart
+PyJWT
+aiohttp
+numpy
+librosa
+pydantic