Spaces:

Clearwave48
/

clearwave-api

Running

App Files Files Community

Clearwave48 commited on 2 days ago

Commit

785a835

verified ·

1 Parent(s): 3b1c60e

Update main.py

Browse files

Files changed (1) hide show

main.py +193 -55

main.py CHANGED Viewed

@@ -1,36 +1,42 @@
 """
 ClearWave AI — API Space (FastAPI only)
 Handles /api/health and /api/process-url
-No Gradio, no routing conflicts.
 """
 import os
 import json
 import tempfile
 import logging
 import requests
-import numpy as np
 import cloudinary
 import cloudinary.uploader
 from fastapi import FastAPI, Request
 from fastapi.responses import StreamingResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
-# Cloudinary config — set these in your HF Space secrets
 cloudinary.config(
     cloud_name = os.environ.get("CLOUD_NAME"),
     api_key    = os.environ.get("API_KEY"),
     api_secret = os.environ.get("API_SECRET"),
 )
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-from denoiser    import Denoiser
 from transcriber import Transcriber
 from translator  import Translator
-denoiser    = Denoiser()
 transcriber = Transcriber()
 translator  = Translator()
@@ -43,78 +49,208 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# ══════════════════════════════════════════════════════════════════════
 # PIPELINE
-# ══════════════════════════════════════════════════════════════════════
 def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
                  opt_fillers=True, opt_stutters=True, opt_silences=True,
                  opt_breaths=True, opt_mouth=True):
-    out_dir = tempfile.mkdtemp()
     try:
-        yield {"status": "processing", "step": 1, "message": "Step 1/5 — Denoising..."}
-        denoise1 = denoiser.process(
-            audio_path, out_dir,
-            remove_fillers=False, remove_stutters=False,
-            remove_silences=opt_silences, remove_breaths=opt_breaths,
-            remove_mouth_sounds=opt_mouth, word_segments=None,
-        )
-        clean1 = denoise1["audio_path"]
-        stats  = denoise1["stats"]
-        yield {"status": "processing", "step": 2, "message": "Step 2/5 — Transcribing..."}
         transcript, detected_lang, t_method = transcriber.transcribe(clean1, src_lang)
         word_segs = transcriber._last_segments
-        if (opt_fillers or opt_stutters) and word_segs:
-            yield {"status": "processing", "step": 3, "message": "Step 3/5 — Removing fillers & stutters..."}
-            import soundfile as sf
-            # Read the denoised audio — soundfile can read both WAV and MP3
-            audio_data, sr = sf.read(clean1)
-            if audio_data.ndim == 2:
-                audio_data = audio_data.mean(axis=1)
-            audio_data = audio_data.astype(np.float32)
-            if opt_fillers:
-                audio_data, n_f = denoiser._remove_fillers(audio_data, sr, word_segs)
-                stats["fillers_removed"] = n_f
-                transcript = denoiser.clean_transcript_fillers(transcript)
-            if opt_stutters:
-                audio_data, n_s = denoiser._remove_stutters(audio_data, sr, word_segs)
-                stats["stutters_removed"] = n_s
-            # Write to a fresh .wav — PCM_24 is WAV-only, never write to .mp3 path
-            clean_wav = os.path.join(out_dir, "clean_step3.wav")
-            sf.write(clean_wav, audio_data, sr, format="WAV", subtype="PCM_24")
-            clean1 = clean_wav  # downstream steps (Cloudinary upload) use this
-        else:
-            stats["fillers_removed"]  = 0
-            stats["stutters_removed"] = 0
         translation = transcript
         tl_method   = "same language"
         if tgt_lang != "auto" and detected_lang != tgt_lang:
-            yield {"status": "processing", "step": 4, "message": "Step 4/5 — Translating..."}
             translation, tl_method = translator.translate(transcript, detected_lang, tgt_lang)
-        yield {"status": "processing", "step": 5, "message": "Step 5/5 — Summarizing..."}
         summary = translator.summarize(transcript)
-        # Upload enhanced audio to Cloudinary — returns a URL instead of base64.
-        # This keeps the done SSE event tiny (~200 bytes) instead of ~700KB,
-        # which was causing the JSON to be split across 85+ TCP chunks.
         try:
             upload_result = cloudinary.uploader.upload(
                 clean1,
-                resource_type = "video",  # Cloudinary uses "video" for audio
-                folder        = "clearwave_enhanced",
             )
             enhanced_url = upload_result["secure_url"]
-            logger.info(f"Enhanced audio uploaded: {enhanced_url}")
         except Exception as e:
-            logger.error(f"Cloudinary upload failed: {e}")
             enhanced_url = None
         yield {
             "status":        "done",
-            "step":          5,
             "message":       "Done!",
             "transcript":    transcript,
             "translation":   translation,
@@ -122,7 +258,7 @@ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
             "enhancedAudio": enhanced_url,
             "stats": {
                 "language":             detected_lang.upper(),
-                "noise_method":         stats.get("noise_method", "noisereduce"),
                 "fillers_removed":      stats.get("fillers_removed", 0),
                 "stutters_removed":     stats.get("stutters_removed", 0),
                 "silences_removed_sec": stats.get("silences_removed_sec", 0),
@@ -130,19 +266,21 @@ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
                 "mouth_sounds_removed": stats.get("mouth_sounds_removed", 0),
                 "transcription_method": t_method,
                 "translation_method":   tl_method,
-                "processing_sec":       stats.get("processing_sec", 0),
                 "word_segments":        len(word_segs),
                 "transcript_words":     len(transcript.split()),
             },
         }
     except Exception as e:
         logger.error(f"Pipeline failed: {e}", exc_info=True)
         yield {"status": "error", "message": f"Error: {str(e)}"}
-# ══════════════════════════════════════════════════════════════════════
 # ROUTES
-# ══════════════════════════════════════════════════════════════════════
 @app.get("/api/health")
 async def health():
     return JSONResponse({"status": "ok", "service": "ClearWave AI API"})

 """
 ClearWave AI — API Space (FastAPI only)
 Handles /api/health and /api/process-url
+Audio enhancement  : Cleanvoice API (noise, fillers, stutters, silences, breaths)
+Transcription      : Groq Whisper large-v3 (primary) / faster-whisper (fallback)
+Translation        : NLLB-200-1.3B (primary) / Google Translate (fallback)
+Summary            : Extractive (position-scored)
 """
 import os
 import json
+import time
 import tempfile
 import logging
 import requests
 import cloudinary
 import cloudinary.uploader
 from fastapi import FastAPI, Request
 from fastapi.responses import StreamingResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
+# ── Cloudinary config ─────────────────────────────────────────────────────────
 cloudinary.config(
     cloud_name = os.environ.get("CLOUD_NAME"),
     api_key    = os.environ.get("API_KEY"),
     api_secret = os.environ.get("API_SECRET"),
 )
+# ── Cleanvoice config ─────────────────────────────────────────────────────────
+CLEANVOICE_API_KEY = os.environ.get("CLEANVOICE_API_KEY")
+CLEANVOICE_BASE    = "https://api.cleanvoice.ai/v2"
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 from transcriber import Transcriber
 from translator  import Translator
 transcriber = Transcriber()
 translator  = Translator()
     allow_headers=["*"],
 )
+# ══════════════════════════════════════════════════════════════════════════════
+# CLEANVOICE HELPER
+# ══════════════════════════════════════════════════════════════════════════════
+def cleanvoice_enhance(audio_path: str, out_dir: str,
+                       opt_fillers: bool  = True,
+                       opt_stutters: bool = True,
+                       opt_silences: bool = True,
+                       opt_breaths: bool  = True,
+                       opt_mouth: bool    = True) -> dict:
+    """
+    Full Cleanvoice enhancement pipeline:
+      1. Upload audio file  → get signed URL
+      2. Submit edit job    → configure which features to enable
+      3. Poll until done    → max 30 attempts × 10s = 5 minutes
+      4. Download result    → save to out_dir
+    Returns: {"audio_path": str, "stats": dict}
+    Raises RuntimeError on failure so run_pipeline() can catch and report it.
+    """
+    if not CLEANVOICE_API_KEY:
+        raise RuntimeError("CLEANVOICE_API_KEY is not set in HF Space secrets.")
+    headers = {"X-API-Key": CLEANVOICE_API_KEY}
+    # ── Step 1: Upload ────────────────────────────────────────────────────────
+    logger.info("[Cleanvoice] Uploading audio...")
+    with open(audio_path, "rb") as f:
+        up_resp = requests.post(
+            f"{CLEANVOICE_BASE}/uploads",
+            headers=headers,
+            files={"file": (os.path.basename(audio_path), f)},
+            timeout=120,
+        )
+    up_resp.raise_for_status()
+    file_url = up_resp.json().get("url") or up_resp.json().get("signedUrl")
+    if not file_url:
+        raise RuntimeError(f"Cleanvoice upload gave no URL: {up_resp.json()}")
+    logger.info(f"[Cleanvoice] Upload done → {file_url[:60]}...")
+    # ── Step 2: Submit edit job ───────────────────────────────────────────────
+    # Cleanvoice config flags — map your pipeline options to Cleanvoice features
+    config = {
+        "enhance_speech":      True,           # always on — core noise removal
+        "remove_filler_words": opt_fillers,    # um, uh, like, basically...
+        "remove_stutters":     opt_stutters,   # word repetitions
+        "remove_silence":      opt_silences,   # long pauses
+        "remove_breathing":    opt_breaths,    # breath sounds
+        "remove_mouth_sounds": opt_mouth,      # clicks, pops, smacks
+    }
+    logger.info(f"[Cleanvoice] Submitting edit job with config: {config}")
+    edit_resp = requests.post(
+        f"{CLEANVOICE_BASE}/edits",
+        headers={**headers, "Content-Type": "application/json"},
+        json={"input": {"files": [file_url], "config": config}},
+        timeout=30,
+    )
+    edit_resp.raise_for_status()
+    edit_data = edit_resp.json()
+    edit_id   = edit_data.get("id") or edit_data.get("editId")
+    if not edit_id:
+        raise RuntimeError(f"Cleanvoice edit job gave no ID: {edit_data}")
+    logger.info(f"[Cleanvoice] Edit job submitted → id={edit_id}")
+    # ── Step 3: Poll until done ───────────────────────────────────────────────
+    max_attempts = 36   # 36 × 10s = 6 minutes max
+    for attempt in range(1, max_attempts + 1):
+        time.sleep(10)
+        status_resp = requests.get(
+            f"{CLEANVOICE_BASE}/edits/{edit_id}",
+            headers=headers,
+            timeout=15,
+        )
+        status_resp.raise_for_status()
+        status_data = status_resp.json()
+        status      = status_data.get("status", "unknown")
+        logger.info(f"[Cleanvoice] Poll {attempt}/{max_attempts} → status={status}")
+        if status == "completed":
+            # Grab the output URL — try common key names
+            output      = status_data.get("output") or {}
+            enhanced_dl = (
+                output.get("url")
+                or output.get("downloadUrl")
+                or status_data.get("downloadUrl")
+            )
+            if not enhanced_dl:
+                raise RuntimeError(f"Cleanvoice completed but no download URL: {status_data}")
+            # ── Step 4: Download enhanced audio ──────────────────────────────
+            logger.info(f"[Cleanvoice] Downloading result from {enhanced_dl[:60]}...")
+            dl = requests.get(enhanced_dl, timeout=120)
+            dl.raise_for_status()
+            # Preserve original extension if possible, default to .mp3
+            ext      = os.path.splitext(enhanced_dl.split("?")[0])[-1] or ".mp3"
+            out_path = os.path.join(out_dir, f"cleanvoice_enhanced{ext}")
+            with open(out_path, "wb") as f:
+                f.write(dl.content)
+            logger.info(f"[Cleanvoice] ✅ Enhanced audio saved → {out_path}")
+            return {
+                "audio_path": out_path,
+                "stats": {
+                    "noise_method":         "Cleanvoice API",
+                    "fillers_removed":      "yes" if opt_fillers  else "no",
+                    "stutters_removed":     "yes" if opt_stutters else "no",
+                    "silences_removed_sec": "yes" if opt_silences else "no",
+                    "breaths_reduced":      opt_breaths,
+                    "mouth_sounds_removed": "yes" if opt_mouth    else "no",
+                },
+            }
+        elif status in ("error", "failed"):
+            raise RuntimeError(f"Cleanvoice job failed: {status_data.get('message', status_data)}")
+        # still processing — keep polling
+    raise RuntimeError(f"Cleanvoice timed out after {max_attempts * 10}s (edit_id={edit_id})")
+# ══════════════════════════════════════════════════════════════════════════════
 # PIPELINE
+# ══════════════════════════════════════════════════════════════════════════════
 def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
                  opt_fillers=True, opt_stutters=True, opt_silences=True,
                  opt_breaths=True, opt_mouth=True):
+    out_dir  = tempfile.mkdtemp()
+    stats    = {}
+    word_segs = []
     try:
+        # ── Step 1: Cleanvoice — full audio enhancement ───────────────────────
+        yield {"status": "processing", "step": 1,
+               "message": "Step 1/4 — Enhancing audio with Cleanvoice..."}
+        try:
+            result = cleanvoice_enhance(
+                audio_path, out_dir,
+                opt_fillers=opt_fillers,
+                opt_stutters=opt_stutters,
+                opt_silences=opt_silences,
+                opt_breaths=opt_breaths,
+                opt_mouth=opt_mouth,
+            )
+            clean1 = result["audio_path"]
+            stats  = result["stats"]
+            logger.info("[Pipeline] Cleanvoice enhancement complete")
+        except Exception as e:
+            # Cleanvoice failed — log it and continue with original audio
+            logger.error(f"[Pipeline] Cleanvoice failed: {e} — using original audio")
+            clean1 = audio_path
+            stats  = {
+                "noise_method":         f"Cleanvoice failed: {e}",
+                "fillers_removed":      0,
+                "stutters_removed":     0,
+                "silences_removed_sec": 0,
+                "breaths_reduced":      False,
+                "mouth_sounds_removed": 0,
+            }
+        # ── Step 2: Transcribe ────────────────────────────────────────────────
+        yield {"status": "processing", "step": 2,
+               "message": "Step 2/4 — Transcribing..."}
         transcript, detected_lang, t_method = transcriber.transcribe(clean1, src_lang)
         word_segs = transcriber._last_segments
+        logger.info(f"[Pipeline] Transcription done: {len(transcript.split())} words, lang={detected_lang}")
+        # ── Step 3: Translate ─────────────────────────────────────────────────
         translation = transcript
         tl_method   = "same language"
         if tgt_lang != "auto" and detected_lang != tgt_lang:
+            yield {"status": "processing", "step": 3,
+                   "message": "Step 3/4 — Translating..."}
             translation, tl_method = translator.translate(transcript, detected_lang, tgt_lang)
+            logger.info(f"[Pipeline] Translation done via {tl_method}")
+        else:
+            yield {"status": "processing", "step": 3,
+                   "message": "Step 3/4 — Skipping translation (same language)..."}
+        # ── Step 4: Summarize + upload to Cloudinary ──────────────────────────
+        yield {"status": "processing", "step": 4,
+               "message": "Step 4/4 — Summarizing & uploading..."}
         summary = translator.summarize(transcript)
         try:
             upload_result = cloudinary.uploader.upload(
                 clean1,
+                resource_type="video",   # Cloudinary uses "video" for audio files
+                folder="clearwave_enhanced",
             )
             enhanced_url = upload_result["secure_url"]
+            logger.info(f"[Pipeline] Cloudinary upload done: {enhanced_url}")
         except Exception as e:
+            logger.error(f"[Pipeline] Cloudinary upload failed: {e}")
             enhanced_url = None
+        # ── Done ─────────────────────────���────────────────────────────────────
         yield {
             "status":        "done",
+            "step":          4,
             "message":       "Done!",
             "transcript":    transcript,
             "translation":   translation,
             "enhancedAudio": enhanced_url,
             "stats": {
                 "language":             detected_lang.upper(),
+                "noise_method":         stats.get("noise_method", "Cleanvoice API"),
                 "fillers_removed":      stats.get("fillers_removed", 0),
                 "stutters_removed":     stats.get("stutters_removed", 0),
                 "silences_removed_sec": stats.get("silences_removed_sec", 0),
                 "mouth_sounds_removed": stats.get("mouth_sounds_removed", 0),
                 "transcription_method": t_method,
                 "translation_method":   tl_method,
+                "processing_sec":       0,
                 "word_segments":        len(word_segs),
                 "transcript_words":     len(transcript.split()),
             },
         }
     except Exception as e:
         logger.error(f"Pipeline failed: {e}", exc_info=True)
         yield {"status": "error", "message": f"Error: {str(e)}"}
+# ══════════════════════════════════════════════════════════════════════════════
 # ROUTES
+# ══════════════════════════════════════════════════════════════════════════════
 @app.get("/api/health")
 async def health():
     return JSONResponse({"status": "ok", "service": "ClearWave AI API"})