EurekaPotato commited on
Commit
8263279
·
verified ·
1 Parent(s): dde584b

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. __pycache__/handler.cpython-313.pyc +0 -0
  2. handler.py +103 -91
__pycache__/handler.cpython-313.pyc ADDED
Binary file (13.4 kB). View file
 
handler.py CHANGED
@@ -8,12 +8,12 @@ Extracts all 17 voice features from uploaded audio:
8
  Derived from: src/audio_features.py, src/emotion_features.py
9
  """
10
 
11
- import io
12
- import os
13
- import tempfile
14
- import numpy as np
15
- import librosa
16
- from scipy import signal as scipy_signal
17
  from typing import Dict
18
  import torch
19
  import torch.nn as nn
@@ -88,9 +88,21 @@ import base64
88
  import traceback
89
 
90
  app = FastAPI(title="Audio Feature Extraction API", version="1.0.0")
 
 
 
 
 
 
 
 
 
 
91
  app.add_middleware(
92
  CORSMiddleware,
93
- allow_origins=["*"], allow_credentials=True,
 
 
94
  allow_methods=["*"], allow_headers=["*"],
95
  )
96
 
@@ -131,72 +143,72 @@ DEFAULT_AUDIO_FEATURES = {
131
  "v13_emotion_valence": 0.0,
132
  }
133
 
134
- class AudioBase64Request(BaseModel):
135
- audio_base64: str = ""
136
- transcript: str = ""
137
- mime_type: str = ""
138
-
139
-
140
- def infer_audio_extension(audio_bytes: bytes, mime_type: str = "") -> str:
141
- normalized = (mime_type or "").lower().split(";")[0].strip()
142
- mime_map = {
143
- "audio/webm": ".webm",
144
- "audio/ogg": ".ogg",
145
- "audio/wav": ".wav",
146
- "audio/x-wav": ".wav",
147
- "audio/mpeg": ".mp3",
148
- "audio/mp3": ".mp3",
149
- "audio/mp4": ".m4a",
150
- "audio/x-m4a": ".m4a",
151
- "audio/aac": ".aac",
152
- "audio/flac": ".flac",
153
- }
154
- if normalized in mime_map:
155
- return mime_map[normalized]
156
-
157
- if audio_bytes.startswith(b"RIFF"):
158
- return ".wav"
159
- if audio_bytes.startswith(b"OggS"):
160
- return ".ogg"
161
- if audio_bytes.startswith(b"\x1A\x45\xDF\xA3"):
162
- return ".webm"
163
- if audio_bytes.startswith(b"fLaC"):
164
- return ".flac"
165
- if audio_bytes[4:8] == b"ftyp":
166
- return ".m4a"
167
- if audio_bytes.startswith(b"ID3") or (len(audio_bytes) > 1 and audio_bytes[0] == 0xFF and (audio_bytes[1] & 0xE0) == 0xE0):
168
- return ".mp3"
169
-
170
- return ".bin"
171
-
172
-
173
- def decode_audio_bytes(audio_bytes: bytes, mime_type: str = ""):
174
- import soundfile as sf
175
-
176
- try:
177
- y, sr = sf.read(io.BytesIO(audio_bytes))
178
- return y, sr
179
- except Exception as sf_err:
180
- print(f"[WARN] soundfile failed ({sf_err}), trying librosa from buffer...")
181
-
182
- try:
183
- y, sr = librosa.load(io.BytesIO(audio_bytes), sr=16000, mono=True)
184
- return y, sr
185
- except Exception as librosa_err:
186
- print(f"[WARN] librosa buffer decode failed ({librosa_err}), trying temp file...")
187
-
188
- suffix = infer_audio_extension(audio_bytes, mime_type)
189
- temp_path = None
190
- try:
191
- with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
192
- temp_file.write(audio_bytes)
193
- temp_path = temp_file.name
194
-
195
- y, sr = librosa.load(temp_path, sr=16000, mono=True)
196
- return y, sr
197
- finally:
198
- if temp_path and os.path.exists(temp_path):
199
- os.remove(temp_path)
200
 
201
 
202
  @app.get("/")
@@ -238,14 +250,14 @@ async def extract_audio_features(audio: UploadFile = File(...), transcript: str
238
 
239
 
240
  @app.post("/extract-audio-features-base64")
241
- async def extract_audio_features_base64(data: AudioBase64Request):
242
- """Extract features from base64-encoded audio (for Vercel serverless calls)."""
243
- audio_b64 = data.audio_base64
244
- transcript = data.transcript
245
- mime_type = data.mime_type
246
-
247
- # Handle empty / missing audio — return default features
248
- if not audio_b64 or len(audio_b64) < 100:
249
  print("[INFO] Empty or too-short audio_base64, returning defaults")
250
  return {**DEFAULT_AUDIO_FEATURES}
251
 
@@ -253,16 +265,16 @@ async def extract_audio_features_base64(data: AudioBase64Request):
253
  # Strip data URL prefix if present (e.g. "data:audio/wav;base64,...")
254
  if "," in audio_b64[:80]:
255
  audio_b64 = audio_b64.split(",", 1)[1]
256
-
257
- audio_bytes = base64.b64decode(audio_b64)
258
- print(f"[INFO] Decoded {len(audio_bytes)} bytes of audio")
259
- if mime_type:
260
- print(f"[INFO] MIME type hint: {mime_type}")
261
-
262
- y, sr = decode_audio_bytes(audio_bytes, mime_type)
263
-
264
- if hasattr(y, 'shape') and len(y.shape) > 1:
265
- y = np.mean(y, axis=1)
266
  y = np.asarray(y, dtype=np.float32)
267
  if sr != 16000:
268
  y = librosa.resample(y, orig_sr=sr, target_sr=16000)
 
8
  Derived from: src/audio_features.py, src/emotion_features.py
9
  """
10
 
11
+ import io
12
+ import os
13
+ import tempfile
14
+ import numpy as np
15
+ import librosa
16
+ from scipy import signal as scipy_signal
17
  from typing import Dict
18
  import torch
19
  import torch.nn as nn
 
88
  import traceback
89
 
90
  app = FastAPI(title="Audio Feature Extraction API", version="1.0.0")
91
+
92
+
93
+ def _cors_origins_from_env() -> list[str]:
94
+ raw = (os.getenv("ALLOWED_ORIGINS") or "").strip()
95
+ if not raw:
96
+ return ["*"]
97
+ return [o.strip() for o in raw.split(",") if o.strip()]
98
+
99
+
100
+ _cors_origins = _cors_origins_from_env()
101
  app.add_middleware(
102
  CORSMiddleware,
103
+ allow_origins=_cors_origins,
104
+ # Browsers reject: Access-Control-Allow-Origin="*" with credentials=true.
105
+ allow_credentials=("*" not in _cors_origins),
106
  allow_methods=["*"], allow_headers=["*"],
107
  )
108
 
 
143
  "v13_emotion_valence": 0.0,
144
  }
145
 
146
+ class AudioBase64Request(BaseModel):
147
+ audio_base64: str = ""
148
+ transcript: str = ""
149
+ mime_type: str = ""
150
+
151
+
152
+ def infer_audio_extension(audio_bytes: bytes, mime_type: str = "") -> str:
153
+ normalized = (mime_type or "").lower().split(";")[0].strip()
154
+ mime_map = {
155
+ "audio/webm": ".webm",
156
+ "audio/ogg": ".ogg",
157
+ "audio/wav": ".wav",
158
+ "audio/x-wav": ".wav",
159
+ "audio/mpeg": ".mp3",
160
+ "audio/mp3": ".mp3",
161
+ "audio/mp4": ".m4a",
162
+ "audio/x-m4a": ".m4a",
163
+ "audio/aac": ".aac",
164
+ "audio/flac": ".flac",
165
+ }
166
+ if normalized in mime_map:
167
+ return mime_map[normalized]
168
+
169
+ if audio_bytes.startswith(b"RIFF"):
170
+ return ".wav"
171
+ if audio_bytes.startswith(b"OggS"):
172
+ return ".ogg"
173
+ if audio_bytes.startswith(b"\x1A\x45\xDF\xA3"):
174
+ return ".webm"
175
+ if audio_bytes.startswith(b"fLaC"):
176
+ return ".flac"
177
+ if audio_bytes[4:8] == b"ftyp":
178
+ return ".m4a"
179
+ if audio_bytes.startswith(b"ID3") or (len(audio_bytes) > 1 and audio_bytes[0] == 0xFF and (audio_bytes[1] & 0xE0) == 0xE0):
180
+ return ".mp3"
181
+
182
+ return ".bin"
183
+
184
+
185
+ def decode_audio_bytes(audio_bytes: bytes, mime_type: str = ""):
186
+ import soundfile as sf
187
+
188
+ try:
189
+ y, sr = sf.read(io.BytesIO(audio_bytes))
190
+ return y, sr
191
+ except Exception as sf_err:
192
+ print(f"[WARN] soundfile failed ({sf_err}), trying librosa from buffer...")
193
+
194
+ try:
195
+ y, sr = librosa.load(io.BytesIO(audio_bytes), sr=16000, mono=True)
196
+ return y, sr
197
+ except Exception as librosa_err:
198
+ print(f"[WARN] librosa buffer decode failed ({librosa_err}), trying temp file...")
199
+
200
+ suffix = infer_audio_extension(audio_bytes, mime_type)
201
+ temp_path = None
202
+ try:
203
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
204
+ temp_file.write(audio_bytes)
205
+ temp_path = temp_file.name
206
+
207
+ y, sr = librosa.load(temp_path, sr=16000, mono=True)
208
+ return y, sr
209
+ finally:
210
+ if temp_path and os.path.exists(temp_path):
211
+ os.remove(temp_path)
212
 
213
 
214
  @app.get("/")
 
250
 
251
 
252
  @app.post("/extract-audio-features-base64")
253
+ async def extract_audio_features_base64(data: AudioBase64Request):
254
+ """Extract features from base64-encoded audio (for Vercel serverless calls)."""
255
+ audio_b64 = data.audio_base64
256
+ transcript = data.transcript
257
+ mime_type = data.mime_type
258
+
259
+ # Handle empty / missing audio — return default features
260
+ if not audio_b64 or len(audio_b64) < 100:
261
  print("[INFO] Empty or too-short audio_base64, returning defaults")
262
  return {**DEFAULT_AUDIO_FEATURES}
263
 
 
265
  # Strip data URL prefix if present (e.g. "data:audio/wav;base64,...")
266
  if "," in audio_b64[:80]:
267
  audio_b64 = audio_b64.split(",", 1)[1]
268
+
269
+ audio_bytes = base64.b64decode(audio_b64)
270
+ print(f"[INFO] Decoded {len(audio_bytes)} bytes of audio")
271
+ if mime_type:
272
+ print(f"[INFO] MIME type hint: {mime_type}")
273
+
274
+ y, sr = decode_audio_bytes(audio_bytes, mime_type)
275
+
276
+ if hasattr(y, 'shape') and len(y.shape) > 1:
277
+ y = np.mean(y, axis=1)
278
  y = np.asarray(y, dtype=np.float32)
279
  if sr != 16000:
280
  y = librosa.resample(y, orig_sr=sr, target_sr=16000)