malek-messaoudii commited on
Commit
4c992d0
·
1 Parent(s): a61d0cb

feat: Enhance TTS functionality to support Base64 audio output alongside file response. Introduce new text_to_speech_base64 function for Base64 conversion and update generate_tts endpoint to handle return_base64 parameter.

Browse files
Files changed (2) hide show
  1. routes/tts_routes.py +67 -16
  2. services/tts_service.py +52 -1
routes/tts_routes.py CHANGED
@@ -1,7 +1,7 @@
1
  from fastapi import APIRouter, HTTPException
2
  from fastapi.responses import FileResponse
3
  from models.tts import TTSRequest
4
- from services.tts_service import text_to_speech
5
  from pathlib import Path
6
 
7
  router = APIRouter(prefix="/tts", tags=["Text To Speech"])
@@ -10,27 +10,78 @@ router = APIRouter(prefix="/tts", tags=["Text To Speech"])
10
  async def generate_tts(request: TTSRequest):
11
  """
12
  Convert text to speech using the free gTTS backend (MP3 only).
 
13
  """
14
  try:
15
- audio_path = text_to_speech(
16
- text=request.text,
17
- voice=request.voice,
18
- fmt=request.format,
19
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- if not Path(audio_path).exists():
22
- raise HTTPException(status_code=500, detail="Audio file generation failed")
 
 
23
 
24
- media_type = "audio/mpeg"
25
 
26
- return FileResponse(
27
- path=audio_path,
28
- filename=f"speech.{request.format}",
29
- media_type=media_type,
30
- headers={
31
- "Content-Disposition": f"attachment; filename=speech.{request.format}"
32
- },
 
 
 
33
  )
 
 
 
 
 
 
 
 
 
 
34
 
 
 
35
  except Exception as e:
36
  raise HTTPException(status_code=500, detail=str(e))
 
1
  from fastapi import APIRouter, HTTPException
2
  from fastapi.responses import FileResponse
3
  from models.tts import TTSRequest
4
+ from services.tts_service import text_to_speech, text_to_speech_base64
5
  from pathlib import Path
6
 
7
  router = APIRouter(prefix="/tts", tags=["Text To Speech"])
 
10
  async def generate_tts(request: TTSRequest):
11
  """
12
  Convert text to speech using the free gTTS backend (MP3 only).
13
+ Can return either Base64 or file based on return_base64 parameter.
14
  """
15
  try:
16
+ # Option 1: Return Base64
17
+ if request.return_base64:
18
+ result = text_to_speech_base64(
19
+ text=request.text,
20
+ voice=request.voice,
21
+ fmt=request.format,
22
+ )
23
+
24
+ return {
25
+ "success": True,
26
+ "audio_base64": result["audio_base64"],
27
+ "mime_type": result["mime_type"],
28
+ "format": result["format"],
29
+ "filename": result["filename"],
30
+ "size_bytes": result["size_bytes"],
31
+ "size_base64": result["size_base64"]
32
+ }
33
+
34
+ # Option 2: Return file (default behavior)
35
+ else:
36
+ audio_path = text_to_speech(
37
+ text=request.text,
38
+ voice=request.voice,
39
+ fmt=request.format,
40
+ )
41
+
42
+ if not Path(audio_path).exists():
43
+ raise HTTPException(status_code=500, detail="Audio file generation failed")
44
+
45
+ media_type = "audio/mpeg"
46
+
47
+ return FileResponse(
48
+ path=audio_path,
49
+ filename=f"speech.{request.format}",
50
+ media_type=media_type,
51
+ headers={
52
+ "Content-Disposition": f"attachment; filename=speech.{request.format}"
53
+ },
54
+ )
55
 
56
+ except ValueError as e:
57
+ raise HTTPException(status_code=400, detail=str(e))
58
+ except Exception as e:
59
+ raise HTTPException(status_code=500, detail=str(e))
60
 
 
61
 
62
+ @router.post("/base64")
63
+ async def generate_tts_base64(request: TTSRequest):
64
+ """
65
+ Convert text to speech and return as Base64.
66
+ """
67
+ try:
68
+ result = text_to_speech_base64(
69
+ text=request.text,
70
+ voice=request.voice,
71
+ fmt=request.format,
72
  )
73
+
74
+ return {
75
+ "success": True,
76
+ "audio_base64": result["audio_base64"],
77
+ "mime_type": result["mime_type"],
78
+ "format": result["format"],
79
+ "filename": result["filename"],
80
+ "size_bytes": result["size_bytes"],
81
+ "size_base64": result["size_base64"]
82
+ }
83
 
84
+ except ValueError as e:
85
+ raise HTTPException(status_code=400, detail=str(e))
86
  except Exception as e:
87
  raise HTTPException(status_code=500, detail=str(e))
services/tts_service.py CHANGED
@@ -1,6 +1,8 @@
1
  import uuid
 
2
  from pathlib import Path
3
  from gtts import gTTS
 
4
 
5
 
6
  def text_to_speech(
@@ -11,6 +13,7 @@ def text_to_speech(
11
  """
12
  Convert text to speech using gTTS (Google Translate, free).
13
  Only MP3 is supported.
 
14
  """
15
  if not text or not text.strip():
16
  raise ValueError("Text cannot be empty")
@@ -32,4 +35,52 @@ def text_to_speech(
32
  return str(output_path)
33
 
34
  except Exception as e:
35
- raise Exception(f"Unexpected error in text_to_speech: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import uuid
2
+ import base64
3
  from pathlib import Path
4
  from gtts import gTTS
5
+ from fastapi import HTTPException
6
 
7
 
8
  def text_to_speech(
 
13
  """
14
  Convert text to speech using gTTS (Google Translate, free).
15
  Only MP3 is supported.
16
+ Returns file path.
17
  """
18
  if not text or not text.strip():
19
  raise ValueError("Text cannot be empty")
 
35
  return str(output_path)
36
 
37
  except Exception as e:
38
+ raise Exception(f"Unexpected error in text_to_speech: {str(e)}")
39
+
40
+
41
+ def text_to_speech_base64(
42
+ text: str,
43
+ voice: str = "en",
44
+ fmt: str = "mp3",
45
+ ) -> dict:
46
+ """
47
+ Convert text to speech and return as Base64.
48
+ Only MP3 is supported.
49
+ Returns dict with Base64 and metadata.
50
+ """
51
+ if not text or not text.strip():
52
+ raise ValueError("Text cannot be empty")
53
+
54
+ if fmt != "mp3":
55
+ raise ValueError("Only MP3 format is supported by the free TTS backend")
56
+
57
+ try:
58
+ temp_dir = Path("temp_audio")
59
+ temp_dir.mkdir(exist_ok=True)
60
+
61
+ output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}"
62
+ output_path = temp_dir / output_filename
63
+
64
+ # Generate speech
65
+ tts = gTTS(text=text.strip(), lang=voice or "en")
66
+ tts.save(str(output_path))
67
+
68
+ # Read file and convert to Base64
69
+ with open(output_path, "rb") as audio_file:
70
+ audio_bytes = audio_file.read()
71
+ audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
72
+
73
+ # Clean up temp file
74
+ output_path.unlink(missing_ok=True)
75
+
76
+ return {
77
+ "audio_base64": audio_base64,
78
+ "mime_type": "audio/mpeg",
79
+ "format": fmt,
80
+ "filename": output_filename,
81
+ "size_bytes": len(audio_bytes),
82
+ "size_base64": len(audio_base64)
83
+ }
84
+
85
+ except Exception as e:
86
+ raise Exception(f"Unexpected error in text_to_speech_base64: {str(e)}")