Spaces:

divAIne
/

busy-module-audio

Sleeping

App Files Files Community

EurekaPotato commited on Apr 13

Commit

8263279

verified ·

1 Parent(s): dde584b

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

__pycache__/handler.cpython-313.pyc +0 -0
handler.py +103 -91

__pycache__/handler.cpython-313.pyc ADDED Viewed

Binary file (13.4 kB). View file

handler.py CHANGED Viewed

@@ -8,12 +8,12 @@ Extracts all 17 voice features from uploaded audio:
 Derived from: src/audio_features.py, src/emotion_features.py
 """
-import io
-import os
-import tempfile
-import numpy as np
-import librosa
-from scipy import signal as scipy_signal
 from typing import Dict
 import torch
 import torch.nn as nn
@@ -88,9 +88,21 @@ import base64
 import traceback
 app = FastAPI(title="Audio Feature Extraction API", version="1.0.0")
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"], allow_credentials=True,
     allow_methods=["*"], allow_headers=["*"],
 )
@@ -131,72 +143,72 @@ DEFAULT_AUDIO_FEATURES = {
     "v13_emotion_valence": 0.0,
 }
-class AudioBase64Request(BaseModel):
-    audio_base64: str = ""
-    transcript: str = ""
-    mime_type: str = ""
-def infer_audio_extension(audio_bytes: bytes, mime_type: str = "") -> str:
-    normalized = (mime_type or "").lower().split(";")[0].strip()
-    mime_map = {
-        "audio/webm": ".webm",
-        "audio/ogg": ".ogg",
-        "audio/wav": ".wav",
-        "audio/x-wav": ".wav",
-        "audio/mpeg": ".mp3",
-        "audio/mp3": ".mp3",
-        "audio/mp4": ".m4a",
-        "audio/x-m4a": ".m4a",
-        "audio/aac": ".aac",
-        "audio/flac": ".flac",
-    }
-    if normalized in mime_map:
-        return mime_map[normalized]
-    if audio_bytes.startswith(b"RIFF"):
-        return ".wav"
-    if audio_bytes.startswith(b"OggS"):
-        return ".ogg"
-    if audio_bytes.startswith(b"\x1A\x45\xDF\xA3"):
-        return ".webm"
-    if audio_bytes.startswith(b"fLaC"):
-        return ".flac"
-    if audio_bytes[4:8] == b"ftyp":
-        return ".m4a"
-    if audio_bytes.startswith(b"ID3") or (len(audio_bytes) > 1 and audio_bytes[0] == 0xFF and (audio_bytes[1] & 0xE0) == 0xE0):
-        return ".mp3"
-    return ".bin"
-def decode_audio_bytes(audio_bytes: bytes, mime_type: str = ""):
-    import soundfile as sf
-    try:
-        y, sr = sf.read(io.BytesIO(audio_bytes))
-        return y, sr
-    except Exception as sf_err:
-        print(f"[WARN] soundfile failed ({sf_err}), trying librosa from buffer...")
-    try:
-        y, sr = librosa.load(io.BytesIO(audio_bytes), sr=16000, mono=True)
-        return y, sr
-    except Exception as librosa_err:
-        print(f"[WARN] librosa buffer decode failed ({librosa_err}), trying temp file...")
-    suffix = infer_audio_extension(audio_bytes, mime_type)
-    temp_path = None
-    try:
-        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
-            temp_file.write(audio_bytes)
-            temp_path = temp_file.name
-        y, sr = librosa.load(temp_path, sr=16000, mono=True)
-        return y, sr
-    finally:
-        if temp_path and os.path.exists(temp_path):
-            os.remove(temp_path)
 @app.get("/")
@@ -238,14 +250,14 @@ async def extract_audio_features(audio: UploadFile = File(...), transcript: str
 @app.post("/extract-audio-features-base64")
-async def extract_audio_features_base64(data: AudioBase64Request):
-    """Extract features from base64-encoded audio (for Vercel serverless calls)."""
-    audio_b64 = data.audio_base64
-    transcript = data.transcript
-    mime_type = data.mime_type
-    # Handle empty / missing audio — return default features
-    if not audio_b64 or len(audio_b64) < 100:
         print("[INFO] Empty or too-short audio_base64, returning defaults")
         return {**DEFAULT_AUDIO_FEATURES}
@@ -253,16 +265,16 @@ async def extract_audio_features_base64(data: AudioBase64Request):
         # Strip data URL prefix if present (e.g. "data:audio/wav;base64,...")
         if "," in audio_b64[:80]:
             audio_b64 = audio_b64.split(",", 1)[1]
-        audio_bytes = base64.b64decode(audio_b64)
-        print(f"[INFO] Decoded {len(audio_bytes)} bytes of audio")
-        if mime_type:
-            print(f"[INFO] MIME type hint: {mime_type}")
-        y, sr = decode_audio_bytes(audio_bytes, mime_type)
-        if hasattr(y, 'shape') and len(y.shape) > 1:
-            y = np.mean(y, axis=1)
         y = np.asarray(y, dtype=np.float32)
         if sr != 16000:
             y = librosa.resample(y, orig_sr=sr, target_sr=16000)

 Derived from: src/audio_features.py, src/emotion_features.py
 """
+import io
+import os
+import tempfile
+import numpy as np
+import librosa
+from scipy import signal as scipy_signal
 from typing import Dict
 import torch
 import torch.nn as nn
 import traceback
 app = FastAPI(title="Audio Feature Extraction API", version="1.0.0")
+def _cors_origins_from_env() -> list[str]:
+    raw = (os.getenv("ALLOWED_ORIGINS") or "").strip()
+    if not raw:
+        return ["*"]
+    return [o.strip() for o in raw.split(",") if o.strip()]
+_cors_origins = _cors_origins_from_env()
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=_cors_origins,
+    # Browsers reject: Access-Control-Allow-Origin="*" with credentials=true.
+    allow_credentials=("*" not in _cors_origins),
     allow_methods=["*"], allow_headers=["*"],
 )
     "v13_emotion_valence": 0.0,
 }
+class AudioBase64Request(BaseModel):
+    audio_base64: str = ""
+    transcript: str = ""
+    mime_type: str = ""
+def infer_audio_extension(audio_bytes: bytes, mime_type: str = "") -> str:
+    normalized = (mime_type or "").lower().split(";")[0].strip()
+    mime_map = {
+        "audio/webm": ".webm",
+        "audio/ogg": ".ogg",
+        "audio/wav": ".wav",
+        "audio/x-wav": ".wav",
+        "audio/mpeg": ".mp3",
+        "audio/mp3": ".mp3",
+        "audio/mp4": ".m4a",
+        "audio/x-m4a": ".m4a",
+        "audio/aac": ".aac",
+        "audio/flac": ".flac",
+    }
+    if normalized in mime_map:
+        return mime_map[normalized]
+    if audio_bytes.startswith(b"RIFF"):
+        return ".wav"
+    if audio_bytes.startswith(b"OggS"):
+        return ".ogg"
+    if audio_bytes.startswith(b"\x1A\x45\xDF\xA3"):
+        return ".webm"
+    if audio_bytes.startswith(b"fLaC"):
+        return ".flac"
+    if audio_bytes[4:8] == b"ftyp":
+        return ".m4a"
+    if audio_bytes.startswith(b"ID3") or (len(audio_bytes) > 1 and audio_bytes[0] == 0xFF and (audio_bytes[1] & 0xE0) == 0xE0):
+        return ".mp3"
+    return ".bin"
+def decode_audio_bytes(audio_bytes: bytes, mime_type: str = ""):
+    import soundfile as sf
+    try:
+        y, sr = sf.read(io.BytesIO(audio_bytes))
+        return y, sr
+    except Exception as sf_err:
+        print(f"[WARN] soundfile failed ({sf_err}), trying librosa from buffer...")
+    try:
+        y, sr = librosa.load(io.BytesIO(audio_bytes), sr=16000, mono=True)
+        return y, sr
+    except Exception as librosa_err:
+        print(f"[WARN] librosa buffer decode failed ({librosa_err}), trying temp file...")
+    suffix = infer_audio_extension(audio_bytes, mime_type)
+    temp_path = None
+    try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
+            temp_file.write(audio_bytes)
+            temp_path = temp_file.name
+        y, sr = librosa.load(temp_path, sr=16000, mono=True)
+        return y, sr
+    finally:
+        if temp_path and os.path.exists(temp_path):
+            os.remove(temp_path)
 @app.get("/")
 @app.post("/extract-audio-features-base64")
+async def extract_audio_features_base64(data: AudioBase64Request):
+    """Extract features from base64-encoded audio (for Vercel serverless calls)."""
+    audio_b64 = data.audio_base64
+    transcript = data.transcript
+    mime_type = data.mime_type
+    # Handle empty / missing audio — return default features
+    if not audio_b64 or len(audio_b64) < 100:
         print("[INFO] Empty or too-short audio_base64, returning defaults")
         return {**DEFAULT_AUDIO_FEATURES}
         # Strip data URL prefix if present (e.g. "data:audio/wav;base64,...")
         if "," in audio_b64[:80]:
             audio_b64 = audio_b64.split(",", 1)[1]
+        audio_bytes = base64.b64decode(audio_b64)
+        print(f"[INFO] Decoded {len(audio_bytes)} bytes of audio")
+        if mime_type:
+            print(f"[INFO] MIME type hint: {mime_type}")
+        y, sr = decode_audio_bytes(audio_bytes, mime_type)
+        if hasattr(y, 'shape') and len(y.shape) > 1:
+            y = np.mean(y, axis=1)
         y = np.asarray(y, dtype=np.float32)
         if sr != 16000:
             y = librosa.resample(y, orig_sr=sr, target_sr=16000)