Clearwave48 commited on
Commit
785a835
Β·
verified Β·
1 Parent(s): 3b1c60e

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +193 -55
main.py CHANGED
@@ -1,36 +1,42 @@
1
  """
2
  ClearWave AI β€” API Space (FastAPI only)
3
  Handles /api/health and /api/process-url
4
- No Gradio, no routing conflicts.
 
 
 
 
5
  """
6
 
7
  import os
8
  import json
 
9
  import tempfile
10
  import logging
11
  import requests
12
- import numpy as np
13
  import cloudinary
14
  import cloudinary.uploader
15
  from fastapi import FastAPI, Request
16
  from fastapi.responses import StreamingResponse, JSONResponse
17
  from fastapi.middleware.cors import CORSMiddleware
18
 
19
- # Cloudinary config β€” set these in your HF Space secrets
20
  cloudinary.config(
21
  cloud_name = os.environ.get("CLOUD_NAME"),
22
  api_key = os.environ.get("API_KEY"),
23
  api_secret = os.environ.get("API_SECRET"),
24
  )
25
 
 
 
 
 
26
  logging.basicConfig(level=logging.INFO)
27
  logger = logging.getLogger(__name__)
28
 
29
- from denoiser import Denoiser
30
  from transcriber import Transcriber
31
  from translator import Translator
32
 
33
- denoiser = Denoiser()
34
  transcriber = Transcriber()
35
  translator = Translator()
36
 
@@ -43,78 +49,208 @@ app.add_middleware(
43
  allow_headers=["*"],
44
  )
45
 
46
- # ══════════════════════════════════════════════════════════════════════
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  # PIPELINE
48
- # ══════════════════════════════════════════════════════════════════════
 
49
  def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
50
  opt_fillers=True, opt_stutters=True, opt_silences=True,
51
  opt_breaths=True, opt_mouth=True):
52
- out_dir = tempfile.mkdtemp()
 
 
 
 
53
  try:
54
- yield {"status": "processing", "step": 1, "message": "Step 1/5 β€” Denoising..."}
55
- denoise1 = denoiser.process(
56
- audio_path, out_dir,
57
- remove_fillers=False, remove_stutters=False,
58
- remove_silences=opt_silences, remove_breaths=opt_breaths,
59
- remove_mouth_sounds=opt_mouth, word_segments=None,
60
- )
61
- clean1 = denoise1["audio_path"]
62
- stats = denoise1["stats"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- yield {"status": "processing", "step": 2, "message": "Step 2/5 β€” Transcribing..."}
 
 
65
  transcript, detected_lang, t_method = transcriber.transcribe(clean1, src_lang)
66
  word_segs = transcriber._last_segments
 
67
 
68
- if (opt_fillers or opt_stutters) and word_segs:
69
- yield {"status": "processing", "step": 3, "message": "Step 3/5 β€” Removing fillers & stutters..."}
70
- import soundfile as sf
71
- # Read the denoised audio β€” soundfile can read both WAV and MP3
72
- audio_data, sr = sf.read(clean1)
73
- if audio_data.ndim == 2:
74
- audio_data = audio_data.mean(axis=1)
75
- audio_data = audio_data.astype(np.float32)
76
- if opt_fillers:
77
- audio_data, n_f = denoiser._remove_fillers(audio_data, sr, word_segs)
78
- stats["fillers_removed"] = n_f
79
- transcript = denoiser.clean_transcript_fillers(transcript)
80
- if opt_stutters:
81
- audio_data, n_s = denoiser._remove_stutters(audio_data, sr, word_segs)
82
- stats["stutters_removed"] = n_s
83
- # Write to a fresh .wav β€” PCM_24 is WAV-only, never write to .mp3 path
84
- clean_wav = os.path.join(out_dir, "clean_step3.wav")
85
- sf.write(clean_wav, audio_data, sr, format="WAV", subtype="PCM_24")
86
- clean1 = clean_wav # downstream steps (Cloudinary upload) use this
87
- else:
88
- stats["fillers_removed"] = 0
89
- stats["stutters_removed"] = 0
90
-
91
  translation = transcript
92
  tl_method = "same language"
93
  if tgt_lang != "auto" and detected_lang != tgt_lang:
94
- yield {"status": "processing", "step": 4, "message": "Step 4/5 β€” Translating..."}
 
95
  translation, tl_method = translator.translate(transcript, detected_lang, tgt_lang)
 
 
 
 
96
 
97
- yield {"status": "processing", "step": 5, "message": "Step 5/5 β€” Summarizing..."}
 
 
98
  summary = translator.summarize(transcript)
99
 
100
- # Upload enhanced audio to Cloudinary β€” returns a URL instead of base64.
101
- # This keeps the done SSE event tiny (~200 bytes) instead of ~700KB,
102
- # which was causing the JSON to be split across 85+ TCP chunks.
103
  try:
104
  upload_result = cloudinary.uploader.upload(
105
  clean1,
106
- resource_type = "video", # Cloudinary uses "video" for audio
107
- folder = "clearwave_enhanced",
108
  )
109
  enhanced_url = upload_result["secure_url"]
110
- logger.info(f"Enhanced audio uploaded: {enhanced_url}")
111
  except Exception as e:
112
- logger.error(f"Cloudinary upload failed: {e}")
113
  enhanced_url = None
114
 
 
115
  yield {
116
  "status": "done",
117
- "step": 5,
118
  "message": "Done!",
119
  "transcript": transcript,
120
  "translation": translation,
@@ -122,7 +258,7 @@ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
122
  "enhancedAudio": enhanced_url,
123
  "stats": {
124
  "language": detected_lang.upper(),
125
- "noise_method": stats.get("noise_method", "noisereduce"),
126
  "fillers_removed": stats.get("fillers_removed", 0),
127
  "stutters_removed": stats.get("stutters_removed", 0),
128
  "silences_removed_sec": stats.get("silences_removed_sec", 0),
@@ -130,19 +266,21 @@ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
130
  "mouth_sounds_removed": stats.get("mouth_sounds_removed", 0),
131
  "transcription_method": t_method,
132
  "translation_method": tl_method,
133
- "processing_sec": stats.get("processing_sec", 0),
134
  "word_segments": len(word_segs),
135
  "transcript_words": len(transcript.split()),
136
  },
137
  }
 
138
  except Exception as e:
139
  logger.error(f"Pipeline failed: {e}", exc_info=True)
140
  yield {"status": "error", "message": f"Error: {str(e)}"}
141
 
142
 
143
- # ══════════════════════════════════════════════════════════════════════
144
  # ROUTES
145
- # ══════════════════════════════════════════════════════════════════════
 
146
  @app.get("/api/health")
147
  async def health():
148
  return JSONResponse({"status": "ok", "service": "ClearWave AI API"})
 
1
  """
2
  ClearWave AI β€” API Space (FastAPI only)
3
  Handles /api/health and /api/process-url
4
+
5
+ Audio enhancement : Cleanvoice API (noise, fillers, stutters, silences, breaths)
6
+ Transcription : Groq Whisper large-v3 (primary) / faster-whisper (fallback)
7
+ Translation : NLLB-200-1.3B (primary) / Google Translate (fallback)
8
+ Summary : Extractive (position-scored)
9
  """
10
 
11
  import os
12
  import json
13
+ import time
14
  import tempfile
15
  import logging
16
  import requests
 
17
  import cloudinary
18
  import cloudinary.uploader
19
  from fastapi import FastAPI, Request
20
  from fastapi.responses import StreamingResponse, JSONResponse
21
  from fastapi.middleware.cors import CORSMiddleware
22
 
23
+ # ── Cloudinary config ─────────────────────────────────────────────────────────
24
  cloudinary.config(
25
  cloud_name = os.environ.get("CLOUD_NAME"),
26
  api_key = os.environ.get("API_KEY"),
27
  api_secret = os.environ.get("API_SECRET"),
28
  )
29
 
30
+ # ── Cleanvoice config ─────────────────────────────────────────────────────────
31
+ CLEANVOICE_API_KEY = os.environ.get("CLEANVOICE_API_KEY")
32
+ CLEANVOICE_BASE = "https://api.cleanvoice.ai/v2"
33
+
34
  logging.basicConfig(level=logging.INFO)
35
  logger = logging.getLogger(__name__)
36
 
 
37
  from transcriber import Transcriber
38
  from translator import Translator
39
 
 
40
  transcriber = Transcriber()
41
  translator = Translator()
42
 
 
49
  allow_headers=["*"],
50
  )
51
 
52
+
53
+ # ══════════════════════════════════════════════════════════════════════════════
54
+ # CLEANVOICE HELPER
55
+ # ══════════════════════════════════════════════════════════════════════════════
56
+
57
+ def cleanvoice_enhance(audio_path: str, out_dir: str,
58
+ opt_fillers: bool = True,
59
+ opt_stutters: bool = True,
60
+ opt_silences: bool = True,
61
+ opt_breaths: bool = True,
62
+ opt_mouth: bool = True) -> dict:
63
+ """
64
+ Full Cleanvoice enhancement pipeline:
65
+ 1. Upload audio file β†’ get signed URL
66
+ 2. Submit edit job β†’ configure which features to enable
67
+ 3. Poll until done β†’ max 30 attempts Γ— 10s = 5 minutes
68
+ 4. Download result β†’ save to out_dir
69
+ Returns: {"audio_path": str, "stats": dict}
70
+ Raises RuntimeError on failure so run_pipeline() can catch and report it.
71
+ """
72
+ if not CLEANVOICE_API_KEY:
73
+ raise RuntimeError("CLEANVOICE_API_KEY is not set in HF Space secrets.")
74
+
75
+ headers = {"X-API-Key": CLEANVOICE_API_KEY}
76
+
77
+ # ── Step 1: Upload ────────────────────────────────────────────────────────
78
+ logger.info("[Cleanvoice] Uploading audio...")
79
+ with open(audio_path, "rb") as f:
80
+ up_resp = requests.post(
81
+ f"{CLEANVOICE_BASE}/uploads",
82
+ headers=headers,
83
+ files={"file": (os.path.basename(audio_path), f)},
84
+ timeout=120,
85
+ )
86
+ up_resp.raise_for_status()
87
+ file_url = up_resp.json().get("url") or up_resp.json().get("signedUrl")
88
+ if not file_url:
89
+ raise RuntimeError(f"Cleanvoice upload gave no URL: {up_resp.json()}")
90
+ logger.info(f"[Cleanvoice] Upload done β†’ {file_url[:60]}...")
91
+
92
+ # ── Step 2: Submit edit job ───────────────────────────────────────────────
93
+ # Cleanvoice config flags β€” map your pipeline options to Cleanvoice features
94
+ config = {
95
+ "enhance_speech": True, # always on β€” core noise removal
96
+ "remove_filler_words": opt_fillers, # um, uh, like, basically...
97
+ "remove_stutters": opt_stutters, # word repetitions
98
+ "remove_silence": opt_silences, # long pauses
99
+ "remove_breathing": opt_breaths, # breath sounds
100
+ "remove_mouth_sounds": opt_mouth, # clicks, pops, smacks
101
+ }
102
+ logger.info(f"[Cleanvoice] Submitting edit job with config: {config}")
103
+ edit_resp = requests.post(
104
+ f"{CLEANVOICE_BASE}/edits",
105
+ headers={**headers, "Content-Type": "application/json"},
106
+ json={"input": {"files": [file_url], "config": config}},
107
+ timeout=30,
108
+ )
109
+ edit_resp.raise_for_status()
110
+ edit_data = edit_resp.json()
111
+ edit_id = edit_data.get("id") or edit_data.get("editId")
112
+ if not edit_id:
113
+ raise RuntimeError(f"Cleanvoice edit job gave no ID: {edit_data}")
114
+ logger.info(f"[Cleanvoice] Edit job submitted β†’ id={edit_id}")
115
+
116
+ # ── Step 3: Poll until done ───────────────────────────────────────────────
117
+ max_attempts = 36 # 36 Γ— 10s = 6 minutes max
118
+ for attempt in range(1, max_attempts + 1):
119
+ time.sleep(10)
120
+ status_resp = requests.get(
121
+ f"{CLEANVOICE_BASE}/edits/{edit_id}",
122
+ headers=headers,
123
+ timeout=15,
124
+ )
125
+ status_resp.raise_for_status()
126
+ status_data = status_resp.json()
127
+ status = status_data.get("status", "unknown")
128
+ logger.info(f"[Cleanvoice] Poll {attempt}/{max_attempts} β†’ status={status}")
129
+
130
+ if status == "completed":
131
+ # Grab the output URL β€” try common key names
132
+ output = status_data.get("output") or {}
133
+ enhanced_dl = (
134
+ output.get("url")
135
+ or output.get("downloadUrl")
136
+ or status_data.get("downloadUrl")
137
+ )
138
+ if not enhanced_dl:
139
+ raise RuntimeError(f"Cleanvoice completed but no download URL: {status_data}")
140
+
141
+ # ── Step 4: Download enhanced audio ──────────────────────────────
142
+ logger.info(f"[Cleanvoice] Downloading result from {enhanced_dl[:60]}...")
143
+ dl = requests.get(enhanced_dl, timeout=120)
144
+ dl.raise_for_status()
145
+
146
+ # Preserve original extension if possible, default to .mp3
147
+ ext = os.path.splitext(enhanced_dl.split("?")[0])[-1] or ".mp3"
148
+ out_path = os.path.join(out_dir, f"cleanvoice_enhanced{ext}")
149
+ with open(out_path, "wb") as f:
150
+ f.write(dl.content)
151
+ logger.info(f"[Cleanvoice] βœ… Enhanced audio saved β†’ {out_path}")
152
+
153
+ return {
154
+ "audio_path": out_path,
155
+ "stats": {
156
+ "noise_method": "Cleanvoice API",
157
+ "fillers_removed": "yes" if opt_fillers else "no",
158
+ "stutters_removed": "yes" if opt_stutters else "no",
159
+ "silences_removed_sec": "yes" if opt_silences else "no",
160
+ "breaths_reduced": opt_breaths,
161
+ "mouth_sounds_removed": "yes" if opt_mouth else "no",
162
+ },
163
+ }
164
+
165
+ elif status in ("error", "failed"):
166
+ raise RuntimeError(f"Cleanvoice job failed: {status_data.get('message', status_data)}")
167
+
168
+ # still processing β€” keep polling
169
+
170
+ raise RuntimeError(f"Cleanvoice timed out after {max_attempts * 10}s (edit_id={edit_id})")
171
+
172
+
173
+ # ══════════════════════════════════════════════════════════════════════════════
174
  # PIPELINE
175
+ # ══════════════════════════════════════════════════════════════════════════════
176
+
177
  def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
178
  opt_fillers=True, opt_stutters=True, opt_silences=True,
179
  opt_breaths=True, opt_mouth=True):
180
+
181
+ out_dir = tempfile.mkdtemp()
182
+ stats = {}
183
+ word_segs = []
184
+
185
  try:
186
+ # ── Step 1: Cleanvoice β€” full audio enhancement ───────────────────────
187
+ yield {"status": "processing", "step": 1,
188
+ "message": "Step 1/4 β€” Enhancing audio with Cleanvoice..."}
189
+ try:
190
+ result = cleanvoice_enhance(
191
+ audio_path, out_dir,
192
+ opt_fillers=opt_fillers,
193
+ opt_stutters=opt_stutters,
194
+ opt_silences=opt_silences,
195
+ opt_breaths=opt_breaths,
196
+ opt_mouth=opt_mouth,
197
+ )
198
+ clean1 = result["audio_path"]
199
+ stats = result["stats"]
200
+ logger.info("[Pipeline] Cleanvoice enhancement complete")
201
+ except Exception as e:
202
+ # Cleanvoice failed β€” log it and continue with original audio
203
+ logger.error(f"[Pipeline] Cleanvoice failed: {e} β€” using original audio")
204
+ clean1 = audio_path
205
+ stats = {
206
+ "noise_method": f"Cleanvoice failed: {e}",
207
+ "fillers_removed": 0,
208
+ "stutters_removed": 0,
209
+ "silences_removed_sec": 0,
210
+ "breaths_reduced": False,
211
+ "mouth_sounds_removed": 0,
212
+ }
213
 
214
+ # ── Step 2: Transcribe ────────────────────────────────────────────────
215
+ yield {"status": "processing", "step": 2,
216
+ "message": "Step 2/4 β€” Transcribing..."}
217
  transcript, detected_lang, t_method = transcriber.transcribe(clean1, src_lang)
218
  word_segs = transcriber._last_segments
219
+ logger.info(f"[Pipeline] Transcription done: {len(transcript.split())} words, lang={detected_lang}")
220
 
221
+ # ── Step 3: Translate ─────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  translation = transcript
223
  tl_method = "same language"
224
  if tgt_lang != "auto" and detected_lang != tgt_lang:
225
+ yield {"status": "processing", "step": 3,
226
+ "message": "Step 3/4 β€” Translating..."}
227
  translation, tl_method = translator.translate(transcript, detected_lang, tgt_lang)
228
+ logger.info(f"[Pipeline] Translation done via {tl_method}")
229
+ else:
230
+ yield {"status": "processing", "step": 3,
231
+ "message": "Step 3/4 β€” Skipping translation (same language)..."}
232
 
233
+ # ── Step 4: Summarize + upload to Cloudinary ──────────────────────────
234
+ yield {"status": "processing", "step": 4,
235
+ "message": "Step 4/4 β€” Summarizing & uploading..."}
236
  summary = translator.summarize(transcript)
237
 
 
 
 
238
  try:
239
  upload_result = cloudinary.uploader.upload(
240
  clean1,
241
+ resource_type="video", # Cloudinary uses "video" for audio files
242
+ folder="clearwave_enhanced",
243
  )
244
  enhanced_url = upload_result["secure_url"]
245
+ logger.info(f"[Pipeline] Cloudinary upload done: {enhanced_url}")
246
  except Exception as e:
247
+ logger.error(f"[Pipeline] Cloudinary upload failed: {e}")
248
  enhanced_url = None
249
 
250
+ # ── Done ─────────────────────────���────────────────────────────────────
251
  yield {
252
  "status": "done",
253
+ "step": 4,
254
  "message": "Done!",
255
  "transcript": transcript,
256
  "translation": translation,
 
258
  "enhancedAudio": enhanced_url,
259
  "stats": {
260
  "language": detected_lang.upper(),
261
+ "noise_method": stats.get("noise_method", "Cleanvoice API"),
262
  "fillers_removed": stats.get("fillers_removed", 0),
263
  "stutters_removed": stats.get("stutters_removed", 0),
264
  "silences_removed_sec": stats.get("silences_removed_sec", 0),
 
266
  "mouth_sounds_removed": stats.get("mouth_sounds_removed", 0),
267
  "transcription_method": t_method,
268
  "translation_method": tl_method,
269
+ "processing_sec": 0,
270
  "word_segments": len(word_segs),
271
  "transcript_words": len(transcript.split()),
272
  },
273
  }
274
+
275
  except Exception as e:
276
  logger.error(f"Pipeline failed: {e}", exc_info=True)
277
  yield {"status": "error", "message": f"Error: {str(e)}"}
278
 
279
 
280
+ # ══════════════════════════════════════════════════════════════════════════════
281
  # ROUTES
282
+ # ══════════════════════════════════════════════════════════════════════════════
283
+
284
  @app.get("/api/health")
285
  async def health():
286
  return JSONResponse({"status": "ok", "service": "ClearWave AI API"})