Spaces:

AtomCosmic
/

CatalystDiarization

Running

App Files Files Community

AtomCosmic commited on 29 days ago

Commit

0bb5a3a

verified ·

1 Parent(s): 653802e

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -14

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import tempfile
 import logging
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 import uvicorn
@@ -19,21 +20,48 @@ app.add_middleware(
 pipeline = None
 @app.on_event("startup")
 async def load_pipeline():
     global pipeline
     hf_token = os.environ.get("HF_TOKEN")
     logger.info(f"HF_TOKEN exists: {bool(hf_token)}")
     if not hf_token:
         logger.error("HF_TOKEN not set — diarization will not work")
         return
     try:
         from pyannote.audio import Pipeline
         import torch
-        from huggingface_hub import login
-        login(token=hf_token)
         logger.info("Loading pyannote speaker diarization pipeline...")
         pipeline = Pipeline.from_pretrained(
@@ -41,7 +69,6 @@ async def load_pipeline():
             use_auth_token=hf_token
         )
-        # Explicitly use CPU
         pipeline = pipeline.to(torch.device("cpu"))
         logger.info("Pipeline loaded successfully on cpu")
@@ -53,6 +80,9 @@ async def load_pipeline():
         pipeline = None
 @app.get("/health")
 def health():
     return {
@@ -61,6 +91,9 @@ def health():
     }
 @app.post("/diarize")
 async def diarize(
     file: UploadFile = File(...),
@@ -72,29 +105,31 @@ async def diarize(
             detail="Diarization pipeline not loaded. Check HF_TOKEN and logs."
         )
-    suffix = os.path.splitext(file.filename or "audio.wav")[1] or ".wav"
     tmp_path = None
     try:
         with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
             content = await file.read()
             tmp.write(content)
             tmp_path = tmp.name
-        logger.info(f"Diarizing {file.filename} ({len(content)/1024:.1f}KB), num_speakers={num_speakers}")
         diarize_kwargs = {}
         if num_speakers and num_speakers > 1:
             diarize_kwargs["num_speakers"] = num_speakers
-        # FIX: Set min_duration thresholds so short speech bursts from
-        # judges speaking briefly in a demo meeting are not missed.
-        # min_duration_on=0.1 means any speech segment >= 100ms is kept.
-        # min_duration_off=0.1 means silence gaps >= 100ms split speakers.
-        # Previously pyannote used its defaults (~500ms) which caused
-        # brief utterances in short meetings to be silently dropped.
-        diarization = pipeline(tmp_path, **diarize_kwargs)
         segments = []
         speakers_seen = set()
@@ -119,9 +154,16 @@ async def diarize(
         raise HTTPException(status_code=500, detail=str(e))
     finally:
         if tmp_path and os.path.exists(tmp_path):
             os.unlink(tmp_path)
-#trigger rebuild
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)

 import os
 import tempfile
 import logging
+import subprocess
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 import uvicorn
 pipeline = None
+# ─────────────────────────────────────────────────────────────
+# Convert webm → wav (REQUIRED for pyannote)
+# ─────────────────────────────────────────────────────────────
+def convert_to_wav(input_path):
+    output_path = input_path.replace(".webm", ".wav")
+    try:
+        subprocess.run([
+            "ffmpeg",
+            "-y",
+            "-i", input_path,
+            "-ac", "1",        # mono
+            "-ar", "16000",    # 16kHz (required)
+            output_path
+        ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        return output_path
+    except subprocess.CalledProcessError as e:
+        logger.error(f"FFmpeg conversion failed: {e}")
+        raise Exception("Audio conversion failed (ffmpeg error)")
+# ─────────────────────────────────────────────────────────────
+# Load diarization pipeline
+# ─────────────────────────────────────────────────────────────
 @app.on_event("startup")
 async def load_pipeline():
     global pipeline
     hf_token = os.environ.get("HF_TOKEN")
     logger.info(f"HF_TOKEN exists: {bool(hf_token)}")
     if not hf_token:
         logger.error("HF_TOKEN not set — diarization will not work")
         return
     try:
         from pyannote.audio import Pipeline
         import torch
         logger.info("Loading pyannote speaker diarization pipeline...")
         pipeline = Pipeline.from_pretrained(
             use_auth_token=hf_token
         )
         pipeline = pipeline.to(torch.device("cpu"))
         logger.info("Pipeline loaded successfully on cpu")
         pipeline = None
+# ─────────────────────────────────────────────────────────────
+# Health check
+# ─────────────────────────────────────────────────────────────
 @app.get("/health")
 def health():
     return {
     }
+# ─────────────────────────────────────────────────────────────
+# Diarization endpoint
+# ─────────────────────────────────────────────────────────────
 @app.post("/diarize")
 async def diarize(
     file: UploadFile = File(...),
             detail="Diarization pipeline not loaded. Check HF_TOKEN and logs."
         )
+    suffix = os.path.splitext(file.filename or "audio.webm")[1] or ".webm"
     tmp_path = None
+    wav_path = None
     try:
+        # Save uploaded file
         with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
             content = await file.read()
             tmp.write(content)
             tmp_path = tmp.name
+        logger.info(
+            f"Diarizing {file.filename} ({len(content)/1024:.1f}KB), "
+            f"num_speakers={num_speakers}"
+        )
+        # ── Convert to WAV (CRITICAL FIX) ───────────────────────
+        wav_path = convert_to_wav(tmp_path)
         diarize_kwargs = {}
         if num_speakers and num_speakers > 1:
             diarize_kwargs["num_speakers"] = num_speakers
+        diarization = pipeline(wav_path, **diarize_kwargs)
         segments = []
         speakers_seen = set()
         raise HTTPException(status_code=500, detail=str(e))
     finally:
+        # Cleanup temp files
         if tmp_path and os.path.exists(tmp_path):
             os.unlink(tmp_path)
+        if wav_path and os.path.exists(wav_path):
+            os.unlink(wav_path)
+# ─────────────────────────────────────────────────────────────
+# Run server
+# ─────────────────────────────────────────────────────────────
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)