PsalmsJava commited on
Commit
28a1786
·
1 Parent(s): 12f6795

Some other change

Browse files
DockerFile CHANGED
@@ -1,9 +1,35 @@
 
1
  FROM python:3.9-slim
2
 
3
- RUN apt-get update && apt-get install -y ffmpeg && rm -rf /var/lib/apt/lists/*
4
- RUN pip install fastapi uvicorn aiohttp numpy
 
 
 
5
 
 
 
 
 
 
 
 
 
6
  WORKDIR /app
7
- COPY main.py .
8
 
9
- CMD uvicorn main:app --host 0.0.0.0 --port 7860
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Production Dockerfile for Hugging Face Spaces
2
  FROM python:3.9-slim
3
 
4
+ # Set environment variables
5
+ ENV PYTHONUNBUFFERED=1 \
6
+ PYTHONDONTWRITEBYTECODE=1 \
7
+ PIP_NO_CACHE_DIR=1 \
8
+ PIP_DISABLE_PIP_VERSION_CHECK=1
9
 
10
+ # Install system dependencies
11
+ RUN apt-get update && apt-get install -y \
12
+ ffmpeg \
13
+ libsndfile1 \
14
+ curl \
15
+ && rm -rf /var/lib/apt/lists/*
16
+
17
+ # Create app directory
18
  WORKDIR /app
 
19
 
20
+ # Copy requirements first (for caching)
21
+ COPY requirements.txt .
22
+ RUN pip install --no-cache-dir -r requirements.txt
23
+
24
+ # Copy application code
25
+ COPY app/ ./app/
26
+
27
+ # Hugging Face Spaces uses PORT
28
+ ENV PORT=7860
29
+
30
+ # Health check
31
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
32
+ CMD curl -f http://localhost:${PORT}/health || exit 1
33
+
34
+ # Run the application
35
+ CMD uvicorn app.main:app --host 0.0.0.0 --port ${PORT}
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Emotion Detection Ensemble API
3
  emoji: 🎭
4
  colorFrom: blue
5
  colorTo: purple
@@ -9,24 +9,21 @@ pinned: false
9
  license: mit
10
  ---
11
 
12
- # 🎭 Emotion Detection Ensemble API
13
 
14
- A production-ready emotion detection API that combines 5 state-of-the-art models for accurate emotion recognition from speech.
15
 
16
- ## Features
17
- - **Ensemble Learning**: Combines 5 models with weighted voting
18
- - **Advanced Audio Processing**: VAD, noise reduction, format conversion
19
- - **Multi-Emotion Output**: Returns probability distribution across 7 emotions
20
- - **Secure Authentication**: Bearer token authentication
21
- - **Interactive Docs**: Built-in Swagger UI
22
 
23
- ## 🚀 Quick Start
 
 
 
 
 
24
 
25
- ### API Endpoints
26
- - `GET /health` - Health check
27
- - `GET /models` - List all models
28
- - `POST /analyze` - Analyze emotion from audio
29
- - `POST /analyze-batch` - Analyze multiple files
30
 
31
- ### Authentication
32
- Include your API token in the header:
 
 
1
  ---
2
+ title: Emotion Detection API
3
  emoji: 🎭
4
  colorFrom: blue
5
  colorTo: purple
 
9
  license: mit
10
  ---
11
 
12
+ # 🎭 Emotion Detection API
13
 
14
+ Production-grade emotion detection API using 5-model ensemble with JWT authentication.
15
 
16
+ ## Features
 
 
 
 
 
17
 
18
+ - **5-Model Ensemble**: Weighted voting for maximum accuracy
19
+ - ✅ **JWT Authentication**: Secure token-based access
20
+ - ✅ **Multiple Audio Formats**: WAV, MP3, M4A, OGG, FLAC, AAC
21
+ - ✅ **Smart Caching**: Reduces latency for repeated files
22
+ - ✅ **Swagger Documentation**: Interactive API explorer
23
+ - ✅ **Docker Deployment**: Ready for Hugging Face Spaces
24
 
25
+ ## Quick Start
 
 
 
 
26
 
27
+ ### Get Authentication Token
28
+ ```bash
29
+ curl -X POST https://your-space.hf.space/auth/token?client_id=your_app
app/__init__.py ADDED
File without changes
app/audio/__init__.py ADDED
File without changes
app/audio/processor.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import subprocess
4
+ import numpy as np
5
+ import librosa
6
+ from fastapi import UploadFile, HTTPException
7
+ from typing import Tuple
8
+ import logging
9
+ from app.config import settings
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class AudioProcessor:
14
+ """Production-grade audio preprocessing"""
15
+
16
+ @staticmethod
17
+ async def validate_file(file: UploadFile) -> Tuple[bytes, str]:
18
+ """Validate file size and format"""
19
+
20
+ # Check file size
21
+ contents = await file.read()
22
+ size_mb = len(contents) / (1024 * 1024)
23
+
24
+ if size_mb > settings.MAX_FILE_SIZE_MB:
25
+ raise HTTPException(
26
+ status_code=413,
27
+ detail=f"File too large. Max {settings.MAX_FILE_SIZE_MB}MB"
28
+ )
29
+
30
+ # Check format
31
+ ext = file.filename.split('.')[-1].lower()
32
+ if ext not in settings.SUPPORTED_FORMATS:
33
+ raise HTTPException(
34
+ status_code=415,
35
+ detail=f"Unsupported format. Supported: {settings.SUPPORTED_FORMATS}"
36
+ )
37
+
38
+ return contents, ext
39
+
40
+ @staticmethod
41
+ async def convert_to_wav(input_bytes: bytes, input_ext: str) -> bytes:
42
+ """Convert audio to WAV format (16kHz, mono)"""
43
+
44
+ with tempfile.NamedTemporaryFile(delete=False, suffix=f".{input_ext}") as f_in:
45
+ f_in.write(input_bytes)
46
+ input_path = f_in.name
47
+
48
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f_out:
49
+ output_path = f_out.name
50
+
51
+ try:
52
+ # FFmpeg conversion
53
+ cmd = [
54
+ "ffmpeg",
55
+ "-i", input_path,
56
+ "-ar", str(settings.TARGET_SAMPLE_RATE),
57
+ "-ac", "1",
58
+ "-acodec", "pcm_s16le",
59
+ "-y", # Overwrite output
60
+ output_path
61
+ ]
62
+
63
+ result = subprocess.run(
64
+ cmd,
65
+ capture_output=True,
66
+ text=True,
67
+ timeout=30
68
+ )
69
+
70
+ if result.returncode != 0:
71
+ logger.error(f"FFmpeg error: {result.stderr}")
72
+ raise HTTPException(
73
+ status_code=422,
74
+ detail="Audio conversion failed"
75
+ )
76
+
77
+ # Read converted file
78
+ with open(output_path, "rb") as f:
79
+ return f.read()
80
+
81
+ except subprocess.TimeoutExpired:
82
+ raise HTTPException(
83
+ status_code=408,
84
+ detail="Audio conversion timeout"
85
+ )
86
+ finally:
87
+ # Cleanup
88
+ for path in [input_path, output_path]:
89
+ if os.path.exists(path):
90
+ os.unlink(path)
91
+
92
+ @staticmethod
93
+ def get_audio_info(audio_bytes: bytes) -> dict:
94
+ """Get audio metadata"""
95
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
96
+ tmp.write(audio_bytes)
97
+ path = tmp.name
98
+
99
+ try:
100
+ y, sr = librosa.load(path, sr=None)
101
+ duration = len(y) / sr
102
+
103
+ return {
104
+ "duration_seconds": round(duration, 2),
105
+ "sample_rate": sr,
106
+ "channels": 1 if len(y.shape) == 1 else y.shape[1],
107
+ "samples": len(y)
108
+ }
109
+ finally:
110
+ os.unlink(path)
111
+
112
+ audio_processor = AudioProcessor()
app/auth.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timedelta
2
+ from typing import Optional
3
+ from fastapi import HTTPException, Security
4
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
5
+ import jwt
6
+ from app.config import settings
7
+
8
+ security = HTTPBearer()
9
+
10
+ class AuthHandler:
11
+ """JWT-based authentication handler"""
12
+
13
+ def __init__(self):
14
+ self.secret_key = settings.API_SECRET_KEY
15
+ self.algorithm = settings.ALGORITHM
16
+ self.token_expiry = settings.ACCESS_TOKEN_EXPIRE_MINUTES
17
+
18
+ def create_token(self, client_id: str) -> str:
19
+ """Create JWT token for authenticated clients"""
20
+ expire = datetime.utcnow() + timedelta(minutes=self.token_expiry)
21
+ payload = {
22
+ "sub": client_id,
23
+ "exp": expire,
24
+ "iat": datetime.utcnow(),
25
+ "type": "access"
26
+ }
27
+ return jwt.encode(payload, self.secret_key, algorithm=self.algorithm)
28
+
29
+ def verify_token(self, credentials: HTTPAuthorizationCredentials = Security(security)) -> str:
30
+ """Verify JWT token and return client_id"""
31
+ token = credentials.credentials
32
+
33
+ try:
34
+ payload = jwt.decode(
35
+ token,
36
+ self.secret_key,
37
+ algorithms=[self.algorithm]
38
+ )
39
+
40
+ # Validate token type
41
+ if payload.get("type") != "access":
42
+ raise HTTPException(
43
+ status_code=401,
44
+ detail="Invalid token type"
45
+ )
46
+
47
+ # Check expiration
48
+ exp = datetime.fromtimestamp(payload.get("exp", 0))
49
+ if exp < datetime.utcnow():
50
+ raise HTTPException(
51
+ status_code=401,
52
+ detail="Token has expired"
53
+ )
54
+
55
+ return payload.get("sub", "anonymous")
56
+
57
+ except jwt.ExpiredSignatureError:
58
+ raise HTTPException(
59
+ status_code=401,
60
+ detail="Token has expired"
61
+ )
62
+ except jwt.InvalidTokenError:
63
+ raise HTTPException(
64
+ status_code=401,
65
+ detail="Invalid token"
66
+ )
67
+
68
+ auth_handler = AuthHandler()
app/config.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Dict, Any
3
+ from pydantic import BaseSettings
4
+
5
+ class Settings(BaseSettings):
6
+ """Application settings with validation"""
7
+
8
+ # API Settings
9
+ API_V1_PREFIX: str = "/api/v1"
10
+ PROJECT_NAME: str = "Emotion Detection API"
11
+ VERSION: str = "1.0.0"
12
+
13
+ # Security - Critical: These must be set in environment
14
+ HF_TOKEN: str
15
+ API_SECRET_KEY: str
16
+ ALGORITHM: str = "HS256"
17
+ ACCESS_TOKEN_EXPIRE_MINUTES: int = 30
18
+
19
+ # Model Configuration
20
+ ENABLED_MODELS: Dict[str, Dict[str, Any]] = {
21
+ "emotion2vec_plus": {
22
+ "url": "https://api-inference.huggingface.co/models/emotion2vec/emotion2vec_plus_base",
23
+ "weight": 0.50,
24
+ "timeout": 30,
25
+ "enabled": True
26
+ },
27
+ "meralion_ser": {
28
+ "url": "https://api-inference.huggingface.co/models/MERaLiON/MERaLiON-SER-v1",
29
+ "weight": 0.25,
30
+ "timeout": 30,
31
+ "enabled": True
32
+ },
33
+ "wav2vec2_english": {
34
+ "url": "https://api-inference.huggingface.co/models/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
35
+ "weight": 0.15,
36
+ "timeout": 25,
37
+ "enabled": True
38
+ },
39
+ "hubert_er": {
40
+ "url": "https://api-inference.huggingface.co/models/superb/hubert-large-superb-er",
41
+ "weight": 0.07,
42
+ "timeout": 25,
43
+ "enabled": True
44
+ },
45
+ "gigam_emo": {
46
+ "url": "https://api-inference.huggingface.co/models/salute-developers/GigaAM-emo",
47
+ "weight": 0.03,
48
+ "timeout": 20,
49
+ "enabled": True
50
+ }
51
+ }
52
+
53
+ # Audio Processing
54
+ MAX_FILE_SIZE_MB: int = 10
55
+ SUPPORTED_FORMATS: list = ["wav", "mp3", "m4a", "ogg", "flac", "aac"]
56
+ TARGET_SAMPLE_RATE: int = 16000
57
+
58
+ # Rate Limiting
59
+ RATE_LIMIT_REQUESTS: int = 60
60
+ RATE_LIMIT_PERIOD: int = 60 # seconds
61
+
62
+ class Config:
63
+ env_file = ".env"
64
+ case_sensitive = True
65
+
66
+ settings = Settings()
app/main.py ADDED
File without changes
app/models/__init__.py ADDED
File without changes
app/models/ensemble.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import aiohttp
3
+ from typing import Dict, List, Any, Optional
4
+ from collections import defaultdict
5
+ import logging
6
+ from app.config import settings
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ class EmotionEnsemble:
11
+ """Ensemble of emotion detection models"""
12
+
13
+ def __init__(self):
14
+ self.models = settings.ENABLED_MODELS
15
+ self.emotion_mapping = {
16
+ "angry": ["angry", "ang", "anger"],
17
+ "happy": ["happy", "hap", "happiness", "joy"],
18
+ "sad": ["sad", "sadness"],
19
+ "fear": ["fear", "fearful"],
20
+ "surprise": ["surprise", "surprised"],
21
+ "disgust": ["disgust", "disgusted"],
22
+ "neutral": ["neutral", "neu"]
23
+ }
24
+
25
+ async def predict(self, audio_bytes: bytes) -> Dict[str, Any]:
26
+ """
27
+ Run ensemble prediction on audio bytes
28
+ Returns fused predictions from all models
29
+ """
30
+ headers = {"Authorization": f"Bearer {settings.HF_TOKEN}"}
31
+
32
+ async with aiohttp.ClientSession() as session:
33
+ # Create tasks for all enabled models
34
+ tasks = []
35
+ model_names = []
36
+
37
+ for name, config in self.models.items():
38
+ if config.get("enabled", True):
39
+ tasks.append(self._query_model(
40
+ session, name, config, audio_bytes, headers
41
+ ))
42
+ model_names.append(name)
43
+
44
+ # Run all tasks concurrently
45
+ results = await asyncio.gather(*tasks, return_exceptions=True)
46
+
47
+ # Process successful predictions
48
+ model_outputs = {}
49
+ for name, result in zip(model_names, results):
50
+ if result and not isinstance(result, Exception):
51
+ model_outputs[name] = result
52
+ logger.info(f"✓ {name} succeeded")
53
+ else:
54
+ logger.warning(f"✗ {name} failed: {result}")
55
+
56
+ if not model_outputs:
57
+ raise Exception("No models returned valid predictions")
58
+
59
+ # Fuse predictions
60
+ return self._fuse_predictions(model_outputs)
61
+
62
+ async def _query_model(self, session, name, config, audio_bytes, headers):
63
+ """Query a single model with timeout"""
64
+ try:
65
+ async with session.post(
66
+ config["url"],
67
+ headers=headers,
68
+ data=audio_bytes,
69
+ timeout=aiohttp.ClientTimeout(total=config["timeout"])
70
+ ) as response:
71
+ if response.status == 200:
72
+ return await response.json()
73
+ elif response.status == 503:
74
+ # Model loading - wait and retry once
75
+ await asyncio.sleep(2)
76
+ async with session.post(
77
+ config["url"],
78
+ headers=headers,
79
+ data=audio_bytes
80
+ ) as retry:
81
+ if retry.status == 200:
82
+ return await retry.json()
83
+
84
+ logger.warning(f"{name} returned {response.status}")
85
+ return None
86
+
87
+ except asyncio.TimeoutError:
88
+ logger.warning(f"{name} timeout")
89
+ return None
90
+ except Exception as e:
91
+ logger.warning(f"{name} error: {e}")
92
+ return None
93
+
94
+ def _fuse_predictions(self, model_outputs: Dict[str, List]) -> Dict[str, Any]:
95
+ """Fuse predictions using weighted voting"""
96
+ emotion_scores = defaultdict(float)
97
+ total_weight = 0.0
98
+ model_contributions = []
99
+
100
+ for name, predictions in model_outputs.items():
101
+ weight = self.models[name]["weight"]
102
+ total_weight += weight
103
+
104
+ contribution = {
105
+ "model": name,
106
+ "weight": weight,
107
+ "predictions": []
108
+ }
109
+
110
+ for pred in predictions:
111
+ label = pred.get("label", "").lower()
112
+ score = pred.get("score", 0.0)
113
+
114
+ # Map to standard emotions
115
+ mapped = self._map_emotion(label)
116
+ contribution["predictions"].append({
117
+ "original": label,
118
+ "mapped": mapped,
119
+ "score": score
120
+ })
121
+
122
+ emotion_scores[mapped] += score * weight
123
+
124
+ model_contributions.append(contribution)
125
+
126
+ # Normalize scores
127
+ if total_weight > 0:
128
+ emotion_scores = {
129
+ k: v / total_weight
130
+ for k, v in emotion_scores.items()
131
+ }
132
+
133
+ # Get primary emotion
134
+ if emotion_scores:
135
+ primary = max(emotion_scores.items(), key=lambda x: x[1])
136
+ else:
137
+ primary = ("unknown", 0.0)
138
+
139
+ return {
140
+ "primary_emotion": primary[0],
141
+ "confidence": round(primary[1], 4),
142
+ "all_emotions": {
143
+ k: round(v, 4)
144
+ for k, v in sorted(
145
+ emotion_scores.items(),
146
+ key=lambda x: x[1],
147
+ reverse=True
148
+ )
149
+ },
150
+ "ensemble_details": {
151
+ "models_used": list(model_outputs.keys()),
152
+ "total_models": len(self.models),
153
+ "model_contributions": model_contributions
154
+ }
155
+ }
156
+
157
+ def _map_emotion(self, label: str) -> str:
158
+ """Map model-specific label to standard emotion"""
159
+ label_lower = label.lower()
160
+
161
+ for std_emo, variations in self.emotion_mapping.items():
162
+ if any(var in label_lower for var in variations):
163
+ return std_emo
164
+
165
+ # Default fallback
166
+ if "ang" in label_lower:
167
+ return "angry"
168
+ elif "hap" in label_lower:
169
+ return "happy"
170
+ elif "sad" in label_lower:
171
+ return "sad"
172
+ elif "neu" in label_lower:
173
+ return "neutral"
174
+ elif "fea" in label_lower:
175
+ return "fear"
176
+ elif "sur" in label_lower:
177
+ return "surprise"
178
+ elif "dis" in label_lower:
179
+ return "disgust"
180
+
181
+ return "neutral"
182
+
183
+ ensemble = EmotionEnsemble()
app/utils/logger.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ from app.config import settings
4
+
5
+ def setup_logging():
6
+ """Configure logging for the application"""
7
+ logging.basicConfig(
8
+ level=logging.INFO,
9
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
10
+ handlers=[
11
+ logging.StreamHandler(sys.stdout)
12
+ ]
13
+ )
14
+
15
+ # Set levels for noisy libraries
16
+ logging.getLogger("aiohttp").setLevel(logging.WARNING)
17
+ logging.getLogger("urllib3").setLevel(logging.WARNING)
requirements.txt CHANGED
@@ -1,4 +1,11 @@
1
- fastapi
2
- uvicorn
3
- aiohttp
4
- numpy
 
 
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn[standard]==0.24.0
3
+ python-jose[cryptography]==3.3.0
4
+ passlib[bcrypt]==1.7.4
5
+ python-multipart==0.0.6
6
+ aiohttp==3.9.1
7
+ librosa==0.10.1
8
+ soundfile==0.12.1
9
+ numpy==1.24.3
10
+ pydantic==1.10.13
11
+ python-dotenv==1.0.0