AtomCosmic commited on
Commit
0bb5a3a
Β·
verified Β·
1 Parent(s): 653802e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -14
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import tempfile
3
  import logging
 
4
  from fastapi import FastAPI, UploadFile, File, HTTPException
5
  from fastapi.middleware.cors import CORSMiddleware
6
  import uvicorn
@@ -19,21 +20,48 @@ app.add_middleware(
19
 
20
  pipeline = None
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  @app.on_event("startup")
23
  async def load_pipeline():
24
  global pipeline
 
25
  hf_token = os.environ.get("HF_TOKEN")
26
  logger.info(f"HF_TOKEN exists: {bool(hf_token)}")
 
27
  if not hf_token:
28
  logger.error("HF_TOKEN not set β€” diarization will not work")
29
  return
 
30
  try:
31
  from pyannote.audio import Pipeline
32
  import torch
33
 
34
- from huggingface_hub import login
35
- login(token=hf_token)
36
-
37
  logger.info("Loading pyannote speaker diarization pipeline...")
38
 
39
  pipeline = Pipeline.from_pretrained(
@@ -41,7 +69,6 @@ async def load_pipeline():
41
  use_auth_token=hf_token
42
  )
43
 
44
- # Explicitly use CPU
45
  pipeline = pipeline.to(torch.device("cpu"))
46
 
47
  logger.info("Pipeline loaded successfully on cpu")
@@ -53,6 +80,9 @@ async def load_pipeline():
53
  pipeline = None
54
 
55
 
 
 
 
56
  @app.get("/health")
57
  def health():
58
  return {
@@ -61,6 +91,9 @@ def health():
61
  }
62
 
63
 
 
 
 
64
  @app.post("/diarize")
65
  async def diarize(
66
  file: UploadFile = File(...),
@@ -72,29 +105,31 @@ async def diarize(
72
  detail="Diarization pipeline not loaded. Check HF_TOKEN and logs."
73
  )
74
 
75
- suffix = os.path.splitext(file.filename or "audio.wav")[1] or ".wav"
76
  tmp_path = None
 
77
 
78
  try:
 
79
  with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
80
  content = await file.read()
81
  tmp.write(content)
82
  tmp_path = tmp.name
83
 
84
- logger.info(f"Diarizing {file.filename} ({len(content)/1024:.1f}KB), num_speakers={num_speakers}")
 
 
 
 
 
 
85
 
86
  diarize_kwargs = {}
87
 
88
  if num_speakers and num_speakers > 1:
89
  diarize_kwargs["num_speakers"] = num_speakers
90
 
91
- # FIX: Set min_duration thresholds so short speech bursts from
92
- # judges speaking briefly in a demo meeting are not missed.
93
- # min_duration_on=0.1 means any speech segment >= 100ms is kept.
94
- # min_duration_off=0.1 means silence gaps >= 100ms split speakers.
95
- # Previously pyannote used its defaults (~500ms) which caused
96
- # brief utterances in short meetings to be silently dropped.
97
- diarization = pipeline(tmp_path, **diarize_kwargs)
98
 
99
  segments = []
100
  speakers_seen = set()
@@ -119,9 +154,16 @@ async def diarize(
119
  raise HTTPException(status_code=500, detail=str(e))
120
 
121
  finally:
 
122
  if tmp_path and os.path.exists(tmp_path):
123
  os.unlink(tmp_path)
124
 
125
- #trigger rebuild
 
 
 
 
 
 
126
  if __name__ == "__main__":
127
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  import os
2
  import tempfile
3
  import logging
4
+ import subprocess
5
  from fastapi import FastAPI, UploadFile, File, HTTPException
6
  from fastapi.middleware.cors import CORSMiddleware
7
  import uvicorn
 
20
 
21
  pipeline = None
22
 
23
+
24
+ # ─────────────────────────────────────────────────────────────
25
+ # Convert webm β†’ wav (REQUIRED for pyannote)
26
+ # ─────────────────────────────────────────────────────────────
27
+ def convert_to_wav(input_path):
28
+ output_path = input_path.replace(".webm", ".wav")
29
+
30
+ try:
31
+ subprocess.run([
32
+ "ffmpeg",
33
+ "-y",
34
+ "-i", input_path,
35
+ "-ac", "1", # mono
36
+ "-ar", "16000", # 16kHz (required)
37
+ output_path
38
+ ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
39
+
40
+ return output_path
41
+
42
+ except subprocess.CalledProcessError as e:
43
+ logger.error(f"FFmpeg conversion failed: {e}")
44
+ raise Exception("Audio conversion failed (ffmpeg error)")
45
+
46
+
47
+ # ─────────────────────────────────────────────────────────────
48
+ # Load diarization pipeline
49
+ # ─────────────────────────────────────────────────────────────
50
  @app.on_event("startup")
51
  async def load_pipeline():
52
  global pipeline
53
+
54
  hf_token = os.environ.get("HF_TOKEN")
55
  logger.info(f"HF_TOKEN exists: {bool(hf_token)}")
56
+
57
  if not hf_token:
58
  logger.error("HF_TOKEN not set β€” diarization will not work")
59
  return
60
+
61
  try:
62
  from pyannote.audio import Pipeline
63
  import torch
64
 
 
 
 
65
  logger.info("Loading pyannote speaker diarization pipeline...")
66
 
67
  pipeline = Pipeline.from_pretrained(
 
69
  use_auth_token=hf_token
70
  )
71
 
 
72
  pipeline = pipeline.to(torch.device("cpu"))
73
 
74
  logger.info("Pipeline loaded successfully on cpu")
 
80
  pipeline = None
81
 
82
 
83
+ # ─────────────────────────────────────────────────────────────
84
+ # Health check
85
+ # ─────────────────────────────────────────────────────────────
86
  @app.get("/health")
87
  def health():
88
  return {
 
91
  }
92
 
93
 
94
+ # ─────────────────────────────────────────────────────────────
95
+ # Diarization endpoint
96
+ # ─────────────────────────────────────────────────────────────
97
  @app.post("/diarize")
98
  async def diarize(
99
  file: UploadFile = File(...),
 
105
  detail="Diarization pipeline not loaded. Check HF_TOKEN and logs."
106
  )
107
 
108
+ suffix = os.path.splitext(file.filename or "audio.webm")[1] or ".webm"
109
  tmp_path = None
110
+ wav_path = None
111
 
112
  try:
113
+ # Save uploaded file
114
  with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
115
  content = await file.read()
116
  tmp.write(content)
117
  tmp_path = tmp.name
118
 
119
+ logger.info(
120
+ f"Diarizing {file.filename} ({len(content)/1024:.1f}KB), "
121
+ f"num_speakers={num_speakers}"
122
+ )
123
+
124
+ # ── Convert to WAV (CRITICAL FIX) ───────────────────────
125
+ wav_path = convert_to_wav(tmp_path)
126
 
127
  diarize_kwargs = {}
128
 
129
  if num_speakers and num_speakers > 1:
130
  diarize_kwargs["num_speakers"] = num_speakers
131
 
132
+ diarization = pipeline(wav_path, **diarize_kwargs)
 
 
 
 
 
 
133
 
134
  segments = []
135
  speakers_seen = set()
 
154
  raise HTTPException(status_code=500, detail=str(e))
155
 
156
  finally:
157
+ # Cleanup temp files
158
  if tmp_path and os.path.exists(tmp_path):
159
  os.unlink(tmp_path)
160
 
161
+ if wav_path and os.path.exists(wav_path):
162
+ os.unlink(wav_path)
163
+
164
+
165
+ # ─────────────────────────────────────────────────────────────
166
+ # Run server
167
+ # ─────────────────────────────────────────────────────────────
168
  if __name__ == "__main__":
169
  uvicorn.run(app, host="0.0.0.0", port=7860)