Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

App Files Files Community

malek-messaoudii commited on about 9 hours ago

Commit

4c992d0

1 Parent(s): a61d0cb

feat: Enhance TTS functionality to support Base64 audio output alongside file response. Introduce new text_to_speech_base64 function for Base64 conversion and update generate_tts endpoint to handle return_base64 parameter.

Browse files

Files changed (2) hide show

routes/tts_routes.py +67 -16
services/tts_service.py +52 -1

routes/tts_routes.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from fastapi import APIRouter, HTTPException
 from fastapi.responses import FileResponse
 from models.tts import TTSRequest
-from services.tts_service import text_to_speech
 from pathlib import Path
 router = APIRouter(prefix="/tts", tags=["Text To Speech"])
@@ -10,27 +10,78 @@ router = APIRouter(prefix="/tts", tags=["Text To Speech"])
 async def generate_tts(request: TTSRequest):
     """
     Convert text to speech using the free gTTS backend (MP3 only).
     """
     try:
-        audio_path = text_to_speech(
-            text=request.text,
-            voice=request.voice,
-            fmt=request.format,
-        )
-        if not Path(audio_path).exists():
-            raise HTTPException(status_code=500, detail="Audio file generation failed")
-        media_type = "audio/mpeg"
-        return FileResponse(
-            path=audio_path,
-            filename=f"speech.{request.format}",
-            media_type=media_type,
-            headers={
-                "Content-Disposition": f"attachment; filename=speech.{request.format}"
-            },
         )
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

 from fastapi import APIRouter, HTTPException
 from fastapi.responses import FileResponse
 from models.tts import TTSRequest
+from services.tts_service import text_to_speech, text_to_speech_base64
 from pathlib import Path
 router = APIRouter(prefix="/tts", tags=["Text To Speech"])
 async def generate_tts(request: TTSRequest):
     """
     Convert text to speech using the free gTTS backend (MP3 only).
+    Can return either Base64 or file based on return_base64 parameter.
     """
     try:
+        # Option 1: Return Base64
+        if request.return_base64:
+            result = text_to_speech_base64(
+                text=request.text,
+                voice=request.voice,
+                fmt=request.format,
+            )
+            return {
+                "success": True,
+                "audio_base64": result["audio_base64"],
+                "mime_type": result["mime_type"],
+                "format": result["format"],
+                "filename": result["filename"],
+                "size_bytes": result["size_bytes"],
+                "size_base64": result["size_base64"]
+            }
+        # Option 2: Return file (default behavior)
+        else:
+            audio_path = text_to_speech(
+                text=request.text,
+                voice=request.voice,
+                fmt=request.format,
+            )
+            if not Path(audio_path).exists():
+                raise HTTPException(status_code=500, detail="Audio file generation failed")
+            media_type = "audio/mpeg"
+            return FileResponse(
+                path=audio_path,
+                filename=f"speech.{request.format}",
+                media_type=media_type,
+                headers={
+                    "Content-Disposition": f"attachment; filename=speech.{request.format}"
+                },
+            )
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/base64")
+async def generate_tts_base64(request: TTSRequest):
+    """
+    Convert text to speech and return as Base64.
+    """
+    try:
+        result = text_to_speech_base64(
+            text=request.text,
+            voice=request.voice,
+            fmt=request.format,
         )
+        return {
+            "success": True,
+            "audio_base64": result["audio_base64"],
+            "mime_type": result["mime_type"],
+            "format": result["format"],
+            "filename": result["filename"],
+            "size_bytes": result["size_bytes"],
+            "size_base64": result["size_base64"]
+        }
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

services/tts_service.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import uuid
 from pathlib import Path
 from gtts import gTTS
 def text_to_speech(
@@ -11,6 +13,7 @@ def text_to_speech(
     """
     Convert text to speech using gTTS (Google Translate, free).
     Only MP3 is supported.
     """
     if not text or not text.strip():
         raise ValueError("Text cannot be empty")
@@ -32,4 +35,52 @@ def text_to_speech(
         return str(output_path)
     except Exception as e:
-        raise Exception(f"Unexpected error in text_to_speech: {str(e)}")

 import uuid
+import base64
 from pathlib import Path
 from gtts import gTTS
+from fastapi import HTTPException
 def text_to_speech(
     """
     Convert text to speech using gTTS (Google Translate, free).
     Only MP3 is supported.
+    Returns file path.
     """
     if not text or not text.strip():
         raise ValueError("Text cannot be empty")
         return str(output_path)
     except Exception as e:
+        raise Exception(f"Unexpected error in text_to_speech: {str(e)}")
+def text_to_speech_base64(
+    text: str,
+    voice: str = "en",
+    fmt: str = "mp3",
+) -> dict:
+    """
+    Convert text to speech and return as Base64.
+    Only MP3 is supported.
+    Returns dict with Base64 and metadata.
+    """
+    if not text or not text.strip():
+        raise ValueError("Text cannot be empty")
+    if fmt != "mp3":
+        raise ValueError("Only MP3 format is supported by the free TTS backend")
+    try:
+        temp_dir = Path("temp_audio")
+        temp_dir.mkdir(exist_ok=True)
+        output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}"
+        output_path = temp_dir / output_filename
+        # Generate speech
+        tts = gTTS(text=text.strip(), lang=voice or "en")
+        tts.save(str(output_path))
+        # Read file and convert to Base64
+        with open(output_path, "rb") as audio_file:
+            audio_bytes = audio_file.read()
+            audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
+        # Clean up temp file
+        output_path.unlink(missing_ok=True)
+        return {
+            "audio_base64": audio_base64,
+            "mime_type": "audio/mpeg",
+            "format": fmt,
+            "filename": output_filename,
+            "size_bytes": len(audio_bytes),
+            "size_base64": len(audio_base64)
+        }
+    except Exception as e:
+        raise Exception(f"Unexpected error in text_to_speech_base64: {str(e)}")