Spaces:

PsalmsJava
/

emotion-detection-api

No application file

App Files Files Community

PsalmsJava commited on 23 days ago

Commit

84f84c3

1 Parent(s): 8ef2cca

Updated Again

Browse files

Files changed (2) hide show

DockerFile +1 -1
app.py +146 -168

DockerFile CHANGED Viewed

@@ -31,7 +31,7 @@ ENV PORT=7860
 # Health check
 HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
-    CMD curl -f http://localhost:${PORT}/health || exit 1
 # Run the application
 CMD uvicorn app:app --host 0.0.0.0 --port ${PORT}

 # Health check
 HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
+    CMD curl apt-get -f http://localhost:${PORT}/health || exit 1
 # Run the application
 CMD uvicorn app:app --host 0.0.0.0 --port ${PORT}

app.py CHANGED Viewed

@@ -1,201 +1,179 @@
 import os
 import time
 import jwt
 import hashlib
 import tempfile
 import subprocess
-import logging
-import asyncio
 from datetime import datetime, timedelta, timezone
-from typing import Dict, List, Any, Optional
-from collections import defaultdict
-from contextlib import asynccontextmanager
 import aiohttp
-import numpy as np
 import librosa
 from fastapi import FastAPI, File, UploadFile, Depends, HTTPException, status
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.openapi.docs import get_swagger_ui_html
-from fastapi.openapi.utils import get_openapi
-from pydantic import BaseModel
-# ==================== CONFIGURATION ====================
-class Config:
     HF_TOKEN = os.getenv("HF_TOKEN", "")
-    # Default secret for dev; HF Spaces should set this in Settings > Variables
-    API_SECRET_KEY = os.getenv("API_SECRET_KEY", "hf_space_default_secret_123")
-    ALGORITHM = "HS256"
-    ACCESS_TOKEN_EXPIRE_MINUTES = 30
     MODELS = {
-        "emotion2vec_plus": {"url": "https://api-inference.huggingface.co/models/emotion2vec/emotion2vec_plus_base", "weight": 0.50, "timeout": 30, "description": "Foundation SER model"},
-        "meralion_ser": {"url": "https://api-inference.huggingface.co/models/MERaLiON/MERaLiON-SER-v1", "weight": 0.25, "timeout": 30, "description": "English/SEA optimized"},
-        "wav2vec2_english": {"url": "https://api-inference.huggingface.co/models/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition", "weight": 0.15, "timeout": 25, "description": "English fine-tuned"},
-        "hubert_er": {"url": "https://api-inference.huggingface.co/models/superb/hubert-large-superb-er", "weight": 0.07, "timeout": 25, "description": "Acoustic specialist"},
-        "gigam_emo": {"url": "https://api-inference.huggingface.co/models/salute-developers/GigaAM-emo", "weight": 0.03, "timeout": 20, "description": "Acoustic pattern expert"}
     }
-    MAX_FILE_SIZE_MB = 10
-    SUPPORTED_FORMATS = ["wav", "mp3", "m4a", "ogg", "flac", "aac"]
-    TARGET_SAMPLE_RATE = 16000
-    MAX_DURATION_SECONDS = 30
-    EMOTION_MAPPING = {
-        "angry": ["angry", "ang", "anger"],
-        "happy": ["happy", "hap", "happiness", "joy"],
-        "sad": ["sad", "sadness"],
-        "fear": ["fear", "fearful"],
-        "surprise": ["surprise", "surprised"],
-        "disgust": ["disgust", "disgusted"],
-        "neutral": ["neutral", "neu"]
     }
-config = Config()
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
-logger = logging.getLogger(__name__)
-# ==================== AUTH & UTILS ====================
 security = HTTPBearer()
-class AuthHandler:
-    @staticmethod
-    def create_token(client_id: str = "api_client") -> str:
-        expire = datetime.now(timezone.utc) + timedelta(minutes=config.ACCESS_TOKEN_EXPIRE_MINUTES)
-        payload = {"sub": client_id, "exp": expire, "iat": datetime.now(timezone.utc), "type": "access"}
-        return jwt.encode(payload, config.API_SECRET_KEY, algorithm=config.ALGORITHM)
-    @staticmethod
-    def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)) -> str:
-        token = credentials.credentials
-        try:
-            payload = jwt.decode(token, config.API_SECRET_KEY, algorithms=[config.ALGORITHM])
-            return payload.get("sub", "anonymous")
-        except Exception:
-            raise HTTPException(status_code=401, detail="Invalid or expired token")
-# ==================== CORE LOGIC ====================
-class AudioProcessor:
-    @staticmethod
-    async def validate_and_process(file: UploadFile) -> tuple:
-        contents = await file.read()
-        if len(contents) / (1024 * 1024) > config.MAX_FILE_SIZE_MB:
-            raise HTTPException(413, "File too large")
-        ext = file.filename.split('.')[-1].lower()
-        with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as f_in:
-            f_in.write(contents)
-            input_path = f_in.name
-        output_path = input_path + ".wav"
-        try:
-            cmd = ["ffmpeg", "-i", input_path, "-ar", str(config.TARGET_SAMPLE_RATE), "-ac", "1", "-y", output_path]
-            subprocess.run(cmd, capture_output=True, check=True, timeout=30)
-            y, sr = librosa.load(output_path, sr=config.TARGET_SAMPLE_RATE)
-            duration = len(y) / sr
-            if duration > config.MAX_DURATION_SECONDS:
-                raise HTTPException(400, "Audio too long")
-            with open(output_path, "rb") as f:
-                return f.read(), {"duration": round(duration, 2), "format": ext}
-        finally:
-            for p in [input_path, output_path]:
-                if os.path.exists(p): os.unlink(p)
-class EmotionEnsemble:
-    def __init__(self):
-        self.models = config.MODELS
-    async def predict(self, audio_bytes: bytes) -> Dict[str, Any]:
-        if not config.HF_TOKEN:
-            raise HTTPException(503, "HF_TOKEN missing")
-        headers = {"Authorization": f"Bearer {config.HF_TOKEN}"}
-        async with aiohttp.ClientSession() as session:
-            tasks = [self._query(session, name, m_cfg, audio_bytes, headers) for name, m_cfg in self.models.items()]
-            results = await asyncio.gather(*tasks)
-        model_outputs = {name: res for name, res in zip(self.models.keys(), results) if res}
-        if not model_outputs:
-            raise HTTPException(503, "All models failed to respond")
-        return self._fuse(model_outputs)
-    async def _query(self, session, name, cfg, data, headers):
-        try:
-            async with session.post(cfg["url"], headers=headers, data=data, timeout=cfg["timeout"]) as resp:
-                if resp.status == 200: return await resp.json()
-        except: return None
-    def _fuse(self, model_outputs):
-        scores = defaultdict(float)
-        for name, preds in model_outputs.items():
-            w = self.models[name]["weight"]
-            for p in preds:
-                label = self._map(p['label'])
-                scores[label] += p['score'] * w
-        sorted_scores = dict(sorted(scores.items(), key=lambda x: x[1], reverse=True))
-        primary = list(sorted_scores.items())[0]
-        return {"primary_emotion": primary[0], "confidence": round(primary[1], 3), "all_emotions": sorted_scores}
-    def _map(self, label: str) -> str:
-        label = label.lower()
-        for std, vars in config.EMOTION_MAPPING.items():
-            if any(v in label for v in vars): return std
-        return "neutral"
-# ==================== APP SETUP ====================
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    logger.info("🚀 API Starting Up...")
-    yield
-    logger.info("🛑 API Shutting Down...")
-app = FastAPI(title="Emotion API", lifespan=lifespan, docs_url=None)
-auth_handler = AuthHandler()
-audio_proc = AudioProcessor()
-ensemble = EmotionEnsemble()
-cache = {}
-@app.get("/")
-async def root(): return {"message": "Emotion API Active", "docs": "/docs"}
-@app.get("/auth/token")
-async def get_token(client_id: str = "api_client"):
-    return {"access_token": auth_handler.create_token(client_id)}
 @app.post("/analyze")
-async def analyze(file: UploadFile = File(...), user: str = Depends(auth_handler.verify_token)):
-    content = await file.read()
-    await file.seek(0) # Reset for the processor
-    ckey = hashlib.md5(content).hexdigest()
-    if ckey in cache: return cache[ckey]
-    audio_bytes, info = await audio_proc.validate_and_process(file)
-    res = await ensemble.predict(audio_bytes)
-    res.update({"audio_info": info, "user": user})
-    if len(cache) < 100: cache[ckey] = res
-    return res
-@app.get("/docs", include_in_schema=False)
-async def custom_docs():
-    return get_swagger_ui_html(openapi_url="/openapi.json", title="API Docs")
-@app.get("/openapi.json", include_in_schema=False)
-async def get_open_api_endpoint():
-    if app.openapi_schema: return app.openapi_schema
-    schema = get_openapi(title="Emotion Ensemble API", version="1.0.0", routes=app.routes)
-    schema["components"]["securitySchemes"] = {
-        "bearerAuth": {"type": "http", "scheme": "bearer", "bearerFormat": "JWT"}
     }
-    schema["security"] = [{"bearerAuth": []}]
-    app.openapi_schema = schema
-    return schema
 if __name__ == "__main__":
-    import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))

 import os
 import time
 import jwt
+import logging
+import asyncio
 import hashlib
 import tempfile
 import subprocess
 from datetime import datetime, timedelta, timezone
+from typing import Dict, List, Any
 import aiohttp
 import librosa
+import uvicorn
 from fastapi import FastAPI, File, UploadFile, Depends, HTTPException, status
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from fastapi.middleware.cors import CORSMiddleware
+# --- 1. CONFIGURATION ---
+class GlobalConfig:
+    # Set these in HF Space Secrets
     HF_TOKEN = os.getenv("HF_TOKEN", "")
+    API_SECRET = os.getenv("API_SECRET_KEY", "default_secret_change_me_in_production")
     MODELS = {
+        "emotion2vec": {"url": "https://api-inference.huggingface.co/models/emotion2vec/emotion2vec_plus_base", "w": 0.50},
+        "meralion": {"url": "https://api-inference.huggingface.co/models/MERaLiON/MERaLiON-SER-v1", "w": 0.25},
+        "wav2vec2": {"url": "https://api-inference.huggingface.co/models/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition", "w": 0.15},
+        "hubert": {"url": "https://api-inference.huggingface.co/models/superb/hubert-large-superb-er", "w": 0.07},
+        "gigam": {"url": "https://api-inference.huggingface.co/models/salute-developers/GigaAM-emo", "w": 0.03}
     }
+    # Standardized internal labels
+    MAPPING = {
+        "angry": ["ang", "fear"], # Merging high-arousal negative
+        "happy": ["hap", "joy", "surp"],
+        "sad": ["sad"],
+        "neutral": ["neu", "calm"]
     }
+cfg = GlobalConfig()
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("EmotionAPI")
+# --- 2. AUTHENTICATION ---
 security = HTTPBearer()
+def create_access_token(data: dict):
+    to_encode = data.copy()
+    expire = datetime.now(timezone.utc) + timedelta(minutes=60)
+    to_encode.update({"exp": expire})
+    return jwt.encode(to_encode, cfg.API_SECRET, algorithm="HS256")
+async def verify_jwt(credentials: HTTPAuthorizationCredentials = Depends(security)):
+    try:
+        payload = jwt.decode(credentials.credentials, cfg.API_SECRET, algorithms=["HS256"])
+        return payload
+    except Exception:
+        raise HTTPException(status_code=401, detail="Invalid/Expired Token")
+# --- 3. CORE LOGIC ---
+async def process_audio(file: UploadFile):
+    """Handles format conversion and validation"""
+    suffix = f".{file.filename.split('.')[-1]}"
+    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_in:
+        content = await file.read()
+        tmp_in.write(content)
+        input_path = tmp_in.name
+    output_path = input_path + ".wav"
+    try:
+        # Standardize to 16kHz Mono WAV
+        proc = subprocess.run(
+            ["ffmpeg", "-i", input_path, "-ar", "16000", "-ac", "1", "-y", output_path],
+            capture_output=True, text=True
+        )
+        if proc.returncode != 0:
+            raise Exception(f"FFmpeg error: {proc.stderr}")
+        with open(output_path, "rb") as f:
+            audio_bytes = f.read()
+        duration = librosa.get_duration(path=output_path)
+        return audio_bytes, duration
+    finally:
+        for p in [input_path, output_path]:
+            if os.path.exists(p): os.unlink(p)
+async def query_hf(session, name, url, data):
+    """Individual model call with retry for 'loading' status"""
+    headers = {"Authorization": f"Bearer {cfg.HF_TOKEN}"}
+    for _ in range(3): # Simple retry if model is loading
+        async with session.post(url, headers=headers, data=data) as resp:
+            res = await resp.json()
+            if resp.status == 200:
+                return res
+            elif resp.status == 503: # Model loading
+                await asyncio.sleep(5)
+                continue
+            return None
+def ensemble_logic(responses: dict):
+    """Weighted average of results"""
+    final_scores = defaultdict(float)
+    for name, preds in responses.items():
+        if not isinstance(preds, list): continue
+        weight = cfg.MODELS[name]["w"]
+        for p in preds:
+            label = p['label'].lower()
+            # Map labels to our standard set
+            mapped = "neutral"
+            for std, keywords in cfg.MAPPING.items():
+                if any(k in label for k in keywords):
+                    mapped = std
+                    break
+            final_scores[mapped] += p['score'] * weight
+    sorted_res = sorted(final_scores.items(), key=lambda x: x[1], reverse=True)
+    return {
+        "primary": sorted_res[0][0] if sorted_res else "unknown",
+        "confidence": round(sorted_res[0][1], 3) if sorted_res else 0,
+        "distribution": {k: round(v, 3) for k, v in sorted_res}
+    }
+# --- 4. API ENDPOINTS ---
+app = FastAPI(title="Emotion Ensemble API")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/health")
+def health():
+    return {"status": "online", "hf_configured": bool(cfg.HF_TOKEN)}
+@app.get("/token")
+def get_token(user: str = "hf_user"):
+    return {"token": create_access_token({"sub": user})}
 @app.post("/analyze")
+async def analyze(file: UploadFile = File(...), auth=Depends(verify_jwt)):
+    start_time = time.time()
+    # 1. Process Audio
+    try:
+        audio_bytes, duration = await process_audio(file)
+    except Exception as e:
+        raise HTTPException(400, f"Audio processing failed: {str(e)}")
+    # 2. Run Parallel Inference
+    async with aiohttp.ClientSession() as session:
+        tasks = {name: query_hf(session, name, m["url"], audio_bytes)
+                 for name, m in cfg.MODELS.items()}
+        results = await asyncio.gather(*tasks.values())
+        raw_responses = dict(zip(tasks.keys(), results))
+    # 3. Ensemble & Format
+    successful_models = {k: v for k, v in raw_responses.items() if v is not None}
+    if not successful_models:
+        raise HTTPException(503, "All upstream models failed.")
+    analysis = ensemble_logic(successful_models)
+    return {
+        "emotion": analysis["primary"],
+        "confidence": analysis["confidence"],
+        "scores": analysis["distribution"],
+        "meta": {
+            "duration_sec": round(duration, 2),
+            "latency_sec": round(time.time() - start_time, 2),
+            "models_responding": len(successful_models)
+        }
     }
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))