PsalmsJava commited on
Commit
8ef2cca
·
1 Parent(s): 95c58ff

Changed Everything to 2 Files

Browse files
Files changed (3) hide show
  1. DockerFile +22 -11
  2. app.py +187 -135
  3. requirements.txt +8 -11
DockerFile CHANGED
@@ -1,26 +1,37 @@
1
  FROM python:3.9-slim
2
 
3
- # THIS IS CRITICAL - Create the required user
4
  RUN useradd -m -u 1000 user
5
  USER user
6
 
7
- # Set environment variables
8
- ENV PYTHONUNBUFFERED=1 \
9
- PYTHONDONTWRITEBYTECODE=1 \
10
- PATH="/home/user/.local/bin:$PATH"
11
 
12
  WORKDIR /app
13
 
14
- # Copy requirements first
 
 
 
 
 
 
 
 
15
  COPY --chown=user requirements.txt .
16
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
17
 
18
- # Copy application code
19
- COPY --chown=user app/ ./app/
20
- COPY --chown=user main.py .
21
 
22
- # Hugging Face Spaces requires port 7860
23
  ENV PORT=7860
24
 
 
 
 
 
25
  # Run the application
26
- CMD uvicorn main:app --host 0.0.0.0 --port ${PORT}
 
1
  FROM python:3.9-slim
2
 
3
+ # Create required user
4
  RUN useradd -m -u 1000 user
5
  USER user
6
 
7
+ # Set environment
8
+ ENV PATH="/home/user/.local/bin:$PATH" \
9
+ PYTHONUNBUFFERED=1 \
10
+ PIP_NO_CACHE_DIR=1
11
 
12
  WORKDIR /app
13
 
14
+ # Install system dependencies (as root, then switch back)
15
+ USER root
16
+ RUN apt-get update && apt-get install -y \
17
+ ffmpeg \
18
+ libsndfile1 \
19
+ && rm -rf /var/lib/apt/lists/*
20
+ USER user
21
+
22
+ # Copy requirements first (better caching)
23
  COPY --chown=user requirements.txt .
24
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
25
 
26
+ # Copy application
27
+ COPY --chown=user app.py .
 
28
 
29
+ # Hugging Face requires port 7860
30
  ENV PORT=7860
31
 
32
+ # Health check
33
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
34
+ CMD curl -f http://localhost:${PORT}/health || exit 1
35
+
36
  # Run the application
37
+ CMD uvicorn app:app --host 0.0.0.0 --port ${PORT}
app.py CHANGED
@@ -1,149 +1,201 @@
1
  import os
 
 
 
2
  import tempfile
3
  import subprocess
4
- from fastapi import FastAPI, File, UploadFile, HTTPException
5
- from fastapi.responses import JSONResponse
 
 
 
 
 
6
  import aiohttp
7
  import numpy as np
8
- from datetime import datetime
9
- import logging
 
 
 
 
 
10
 
11
- # Setup
12
- logging.basicConfig(level=logging.INFO)
13
- logger = logging.getLogger(__name__)
14
- app = FastAPI(title="Emotion Detection API", docs_url="/docs")
15
-
16
- # Config - get from environment
17
- HF_TOKEN = os.getenv("HF_TOKEN", "")
18
- API_TOKEN = os.getenv("API_TOKEN", "test123")
19
-
20
- # Models - using only 2 for reliability
21
- MODELS = {
22
- "wav2vec2_english": {
23
- "url": "https://api-inference.huggingface.co/models/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
24
- "weight": 0.7,
25
- },
26
- "gigam_emo": {
27
- "url": "https://api-inference.huggingface.co/models/salute-developers/GigaAM-emo",
28
- "weight": 0.3,
29
  }
30
- }
31
-
32
- # Emotion mapping
33
- EMOTION_MAPPING = {
34
- "angry": ["angry", "ang"],
35
- "happy": ["happy", "hap"],
36
- "sad": ["sad"],
37
- "fear": ["fear"],
38
- "surprise": ["surprise"],
39
- "disgust": ["disgust"],
40
- "neutral": ["neutral", "neu"]
41
- }
42
-
43
- @app.get("/health")
44
- async def health():
45
- return {"status": "ok", "hf_token": bool(HF_TOKEN)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  @app.get("/")
48
- async def root():
49
- return {
50
- "message": "Emotion Detection API",
51
- "docs": "/docs",
52
- "endpoints": ["POST /analyze"]
53
- }
54
 
55
  @app.post("/analyze")
56
- async def analyze(file: UploadFile = File(...)):
57
- """Analyze emotion from audio file"""
 
 
58
 
59
- # Check auth header
60
- auth = file.headers.get("authorization", "")
61
- if not auth or auth.replace("Bearer ", "") != API_TOKEN:
62
- return JSONResponse(
63
- status_code=401,
64
- content={"error": "Invalid or missing Authorization header"}
65
- )
66
 
67
- # Save uploaded file
68
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
69
- content = await file.read()
70
- tmp.write(content)
71
- input_path = tmp.name
72
 
73
- try:
74
- # Convert to proper format
75
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as out:
76
- output_path = out.name
77
-
78
- subprocess.run([
79
- "ffmpeg", "-i", input_path,
80
- "-ar", "16000", "-ac", "1",
81
- "-y", output_path
82
- ], check=True, capture_output=True)
83
-
84
- # Read converted file
85
- with open(output_path, "rb") as f:
86
- audio_bytes = f.read()
87
-
88
- # Query models
89
- headers = {"Authorization": f"Bearer {HF_TOKEN}"}
90
- results = {}
91
-
92
- async with aiohttp.ClientSession() as session:
93
- for name, config in MODELS.items():
94
- try:
95
- async with session.post(
96
- config["url"],
97
- headers=headers,
98
- data=audio_bytes,
99
- timeout=10
100
- ) as resp:
101
- if resp.status == 200:
102
- results[name] = await resp.json()
103
- except Exception as e:
104
- logger.warning(f"{name} failed: {e}")
105
-
106
- # Simple ensemble
107
- emotion_scores = {}
108
- total_weight = 0
109
-
110
- for name, predictions in results.items():
111
- weight = MODELS[name]["weight"]
112
- total_weight += weight
113
-
114
- for pred in predictions:
115
- label = pred.get("label", "").lower()
116
- score = pred.get("score", 0)
117
-
118
- # Map to standard emotions
119
- for std_emo, variations in EMOTION_MAPPING.items():
120
- if any(v in label for v in variations):
121
- emotion_scores[std_emo] = emotion_scores.get(std_emo, 0) + score * weight
122
- break
123
-
124
- # Normalize
125
- if total_weight > 0:
126
- emotion_scores = {k: v/total_weight for k, v in emotion_scores.items()}
127
-
128
- # Get primary emotion
129
- primary = max(emotion_scores.items(), key=lambda x: x[1]) if emotion_scores else ("unknown", 0)
130
-
131
- return {
132
- "primary_emotion": primary[0],
133
- "confidence": round(primary[1], 3),
134
- "all_emotions": {k: round(v, 3) for k, v in emotion_scores.items()},
135
- "models_used": list(results.keys())
136
- }
137
-
138
- except Exception as e:
139
- logger.error(f"Error: {e}")
140
- return JSONResponse(status_code=500, content={"error": str(e)})
141
- finally:
142
- # Cleanup
143
- for path in [input_path, output_path]:
144
- if os.path.exists(path):
145
- os.unlink(path)
146
-
147
- # For Hugging Face
148
- from fastapi.middleware.cors import CORSMiddleware
149
- app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
 
1
  import os
2
+ import time
3
+ import jwt
4
+ import hashlib
5
  import tempfile
6
  import subprocess
7
+ import logging
8
+ import asyncio
9
+ from datetime import datetime, timedelta, timezone
10
+ from typing import Dict, List, Any, Optional
11
+ from collections import defaultdict
12
+ from contextlib import asynccontextmanager
13
+
14
  import aiohttp
15
  import numpy as np
16
+ import librosa
17
+ from fastapi import FastAPI, File, UploadFile, Depends, HTTPException, status
18
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
19
+ from fastapi.middleware.cors import CORSMiddleware
20
+ from fastapi.openapi.docs import get_swagger_ui_html
21
+ from fastapi.openapi.utils import get_openapi
22
+ from pydantic import BaseModel
23
 
24
+ # ==================== CONFIGURATION ====================
25
+ class Config:
26
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
27
+ # Default secret for dev; HF Spaces should set this in Settings > Variables
28
+ API_SECRET_KEY = os.getenv("API_SECRET_KEY", "hf_space_default_secret_123")
29
+ ALGORITHM = "HS256"
30
+ ACCESS_TOKEN_EXPIRE_MINUTES = 30
31
+
32
+ MODELS = {
33
+ "emotion2vec_plus": {"url": "https://api-inference.huggingface.co/models/emotion2vec/emotion2vec_plus_base", "weight": 0.50, "timeout": 30, "description": "Foundation SER model"},
34
+ "meralion_ser": {"url": "https://api-inference.huggingface.co/models/MERaLiON/MERaLiON-SER-v1", "weight": 0.25, "timeout": 30, "description": "English/SEA optimized"},
35
+ "wav2vec2_english": {"url": "https://api-inference.huggingface.co/models/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition", "weight": 0.15, "timeout": 25, "description": "English fine-tuned"},
36
+ "hubert_er": {"url": "https://api-inference.huggingface.co/models/superb/hubert-large-superb-er", "weight": 0.07, "timeout": 25, "description": "Acoustic specialist"},
37
+ "gigam_emo": {"url": "https://api-inference.huggingface.co/models/salute-developers/GigaAM-emo", "weight": 0.03, "timeout": 20, "description": "Acoustic pattern expert"}
 
 
 
 
38
  }
39
+
40
+ MAX_FILE_SIZE_MB = 10
41
+ SUPPORTED_FORMATS = ["wav", "mp3", "m4a", "ogg", "flac", "aac"]
42
+ TARGET_SAMPLE_RATE = 16000
43
+ MAX_DURATION_SECONDS = 30
44
+ EMOTION_MAPPING = {
45
+ "angry": ["angry", "ang", "anger"],
46
+ "happy": ["happy", "hap", "happiness", "joy"],
47
+ "sad": ["sad", "sadness"],
48
+ "fear": ["fear", "fearful"],
49
+ "surprise": ["surprise", "surprised"],
50
+ "disgust": ["disgust", "disgusted"],
51
+ "neutral": ["neutral", "neu"]
52
+ }
53
+
54
+ config = Config()
55
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
56
+ logger = logging.getLogger(__name__)
57
+
58
+ # ==================== AUTH & UTILS ====================
59
+ security = HTTPBearer()
60
+
61
+ class AuthHandler:
62
+ @staticmethod
63
+ def create_token(client_id: str = "api_client") -> str:
64
+ expire = datetime.now(timezone.utc) + timedelta(minutes=config.ACCESS_TOKEN_EXPIRE_MINUTES)
65
+ payload = {"sub": client_id, "exp": expire, "iat": datetime.now(timezone.utc), "type": "access"}
66
+ return jwt.encode(payload, config.API_SECRET_KEY, algorithm=config.ALGORITHM)
67
+
68
+ @staticmethod
69
+ def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)) -> str:
70
+ token = credentials.credentials
71
+ try:
72
+ payload = jwt.decode(token, config.API_SECRET_KEY, algorithms=[config.ALGORITHM])
73
+ return payload.get("sub", "anonymous")
74
+ except Exception:
75
+ raise HTTPException(status_code=401, detail="Invalid or expired token")
76
+
77
+ # ==================== CORE LOGIC ====================
78
+ class AudioProcessor:
79
+ @staticmethod
80
+ async def validate_and_process(file: UploadFile) -> tuple:
81
+ contents = await file.read()
82
+ if len(contents) / (1024 * 1024) > config.MAX_FILE_SIZE_MB:
83
+ raise HTTPException(413, "File too large")
84
+
85
+ ext = file.filename.split('.')[-1].lower()
86
+ with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as f_in:
87
+ f_in.write(contents)
88
+ input_path = f_in.name
89
+
90
+ output_path = input_path + ".wav"
91
+ try:
92
+ cmd = ["ffmpeg", "-i", input_path, "-ar", str(config.TARGET_SAMPLE_RATE), "-ac", "1", "-y", output_path]
93
+ subprocess.run(cmd, capture_output=True, check=True, timeout=30)
94
+
95
+ y, sr = librosa.load(output_path, sr=config.TARGET_SAMPLE_RATE)
96
+ duration = len(y) / sr
97
+ if duration > config.MAX_DURATION_SECONDS:
98
+ raise HTTPException(400, "Audio too long")
99
+
100
+ with open(output_path, "rb") as f:
101
+ return f.read(), {"duration": round(duration, 2), "format": ext}
102
+ finally:
103
+ for p in [input_path, output_path]:
104
+ if os.path.exists(p): os.unlink(p)
105
+
106
+ class EmotionEnsemble:
107
+ def __init__(self):
108
+ self.models = config.MODELS
109
+
110
+ async def predict(self, audio_bytes: bytes) -> Dict[str, Any]:
111
+ if not config.HF_TOKEN:
112
+ raise HTTPException(503, "HF_TOKEN missing")
113
+
114
+ headers = {"Authorization": f"Bearer {config.HF_TOKEN}"}
115
+ async with aiohttp.ClientSession() as session:
116
+ tasks = [self._query(session, name, m_cfg, audio_bytes, headers) for name, m_cfg in self.models.items()]
117
+ results = await asyncio.gather(*tasks)
118
+
119
+ model_outputs = {name: res for name, res in zip(self.models.keys(), results) if res}
120
+ if not model_outputs:
121
+ raise HTTPException(503, "All models failed to respond")
122
+
123
+ return self._fuse(model_outputs)
124
+
125
+ async def _query(self, session, name, cfg, data, headers):
126
+ try:
127
+ async with session.post(cfg["url"], headers=headers, data=data, timeout=cfg["timeout"]) as resp:
128
+ if resp.status == 200: return await resp.json()
129
+ except: return None
130
+
131
+ def _fuse(self, model_outputs):
132
+ scores = defaultdict(float)
133
+ for name, preds in model_outputs.items():
134
+ w = self.models[name]["weight"]
135
+ for p in preds:
136
+ label = self._map(p['label'])
137
+ scores[label] += p['score'] * w
138
+
139
+ sorted_scores = dict(sorted(scores.items(), key=lambda x: x[1], reverse=True))
140
+ primary = list(sorted_scores.items())[0]
141
+ return {"primary_emotion": primary[0], "confidence": round(primary[1], 3), "all_emotions": sorted_scores}
142
+
143
+ def _map(self, label: str) -> str:
144
+ label = label.lower()
145
+ for std, vars in config.EMOTION_MAPPING.items():
146
+ if any(v in label for v in vars): return std
147
+ return "neutral"
148
+
149
+ # ==================== APP SETUP ====================
150
+ @asynccontextmanager
151
+ async def lifespan(app: FastAPI):
152
+ logger.info("🚀 API Starting Up...")
153
+ yield
154
+ logger.info("🛑 API Shutting Down...")
155
+
156
+ app = FastAPI(title="Emotion API", lifespan=lifespan, docs_url=None)
157
+ auth_handler = AuthHandler()
158
+ audio_proc = AudioProcessor()
159
+ ensemble = EmotionEnsemble()
160
+ cache = {}
161
 
162
  @app.get("/")
163
+ async def root(): return {"message": "Emotion API Active", "docs": "/docs"}
164
+
165
+ @app.get("/auth/token")
166
+ async def get_token(client_id: str = "api_client"):
167
+ return {"access_token": auth_handler.create_token(client_id)}
 
168
 
169
  @app.post("/analyze")
170
+ async def analyze(file: UploadFile = File(...), user: str = Depends(auth_handler.verify_token)):
171
+ content = await file.read()
172
+ await file.seek(0) # Reset for the processor
173
+ ckey = hashlib.md5(content).hexdigest()
174
 
175
+ if ckey in cache: return cache[ckey]
 
 
 
 
 
 
176
 
177
+ audio_bytes, info = await audio_proc.validate_and_process(file)
178
+ res = await ensemble.predict(audio_bytes)
179
+ res.update({"audio_info": info, "user": user})
 
 
180
 
181
+ if len(cache) < 100: cache[ckey] = res
182
+ return res
183
+
184
+ @app.get("/docs", include_in_schema=False)
185
+ async def custom_docs():
186
+ return get_swagger_ui_html(openapi_url="/openapi.json", title="API Docs")
187
+
188
+ @app.get("/openapi.json", include_in_schema=False)
189
+ async def get_open_api_endpoint():
190
+ if app.openapi_schema: return app.openapi_schema
191
+ schema = get_openapi(title="Emotion Ensemble API", version="1.0.0", routes=app.routes)
192
+ schema["components"]["securitySchemes"] = {
193
+ "bearerAuth": {"type": "http", "scheme": "bearer", "bearerFormat": "JWT"}
194
+ }
195
+ schema["security"] = [{"bearerAuth": []}]
196
+ app.openapi_schema = schema
197
+ return schema
198
+
199
+ if __name__ == "__main__":
200
+ import uvicorn
201
+ uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,11 +1,8 @@
1
- fastapi==0.104.1
2
- uvicorn[standard]==0.24.0
3
- python-jose[cryptography]==3.3.0
4
- passlib[bcrypt]==1.7.4
5
- python-multipart==0.0.6
6
- aiohttp==3.9.1
7
- librosa==0.10.1
8
- soundfile==0.12.1
9
- numpy==1.24.3
10
- pydantic==1.10.13
11
- python-dotenv==1.0.0
 
1
+ fastapi
2
+ uvicorn
3
+ python-multipart
4
+ PyJWT
5
+ aiohttp
6
+ numpy
7
+ librosa
8
+ pydantic