anuj-exe commited on
Commit
b633034
·
verified ·
1 Parent(s): 1c7dda3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -17
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  from TTS.api import TTS
3
- from fastapi import FastAPI, Response
4
- from fastapi.responses import FileResponse
5
  import uvicorn
6
  import time
7
  import os
@@ -10,7 +10,10 @@ import os
10
  # Configuration
11
  # -----------------------------
12
  YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
13
- FIXED_SPEAKER_PATH = "speakers/voice2.wav"
 
 
 
14
  OUTPUT_PATH = "output.wav"
15
 
16
  tts = TTS(YOURTTS_MODEL, gpu=False)
@@ -18,15 +21,16 @@ tts = TTS(YOURTTS_MODEL, gpu=False)
18
  # -----------------------------
19
  # Core synthesis function
20
  # -----------------------------
21
- def synthesize(text: str):
22
- start_time = time.time()
23
- if not os.path.exists(FIXED_SPEAKER_PATH):
24
- return None, {"error": f"❌ Speaker file not found: {FIXED_SPEAKER_PATH}"}
25
 
 
26
  try:
27
  tts.tts_to_file(
28
  text=text,
29
- speaker_wav=FIXED_SPEAKER_PATH,
30
  file_path=OUTPUT_PATH,
31
  language="en"
32
  )
@@ -41,7 +45,7 @@ def synthesize(text: str):
41
  "processing_time_sec": round(total_time, 3),
42
  "real_time_factor": rtf,
43
  "model_used": YOURTTS_MODEL,
44
- "speaker_used": os.path.basename(FIXED_SPEAKER_PATH),
45
  }
46
  return OUTPUT_PATH, info
47
 
@@ -51,16 +55,17 @@ def synthesize(text: str):
51
  app = FastAPI(title="YourTTS FastAPI", description="Text-to-Speech API")
52
 
53
  @app.post("/synthesize")
54
- async def predict(request: dict):
55
- text = request.get("text")
 
 
56
  if not text:
57
- return {"error": "Missing 'text' field"}
58
 
59
- audio_path, info = synthesize(text)
60
  if audio_path is None:
61
- return info
62
 
63
- # Use headers to send metadata, file as response
64
  headers = {f"x-{k}": str(v) for k, v in info.items()}
65
  return FileResponse(audio_path, media_type="audio/wav", filename="output.wav", headers=headers)
66
 
@@ -69,9 +74,12 @@ async def predict(request: dict):
69
  # -----------------------------
70
  demo = gr.Interface(
71
  fn=synthesize,
72
- inputs=[gr.Textbox(label="Text")],
 
 
 
73
  outputs=[gr.Audio(type="filepath"), gr.JSON()],
74
- title="YourTTS Voice Cloning (English Only, Fixed Speaker)",
75
  allow_flagging="never"
76
  )
77
 
 
1
  import gradio as gr
2
  from TTS.api import TTS
3
+ from fastapi import FastAPI, Request
4
+ from fastapi.responses import FileResponse, JSONResponse
5
  import uvicorn
6
  import time
7
  import os
 
10
  # Configuration
11
  # -----------------------------
12
  YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
13
+ SPEAKERS = {
14
+ "male": "speakers/voice1.wav",
15
+ "female": "speakers/voice2.wav"
16
+ }
17
  OUTPUT_PATH = "output.wav"
18
 
19
  tts = TTS(YOURTTS_MODEL, gpu=False)
 
21
  # -----------------------------
22
  # Core synthesis function
23
  # -----------------------------
24
+ def synthesize(text: str, speaker: str = "female"):
25
+ speaker_path = SPEAKERS.get(speaker.lower())
26
+ if not speaker_path or not os.path.exists(speaker_path):
27
+ return None, {"error": f"❌ Speaker file not found: {speaker_path}"}
28
 
29
+ start_time = time.time()
30
  try:
31
  tts.tts_to_file(
32
  text=text,
33
+ speaker_wav=speaker_path,
34
  file_path=OUTPUT_PATH,
35
  language="en"
36
  )
 
45
  "processing_time_sec": round(total_time, 3),
46
  "real_time_factor": rtf,
47
  "model_used": YOURTTS_MODEL,
48
+ "speaker_used": os.path.basename(speaker_path),
49
  }
50
  return OUTPUT_PATH, info
51
 
 
55
  app = FastAPI(title="YourTTS FastAPI", description="Text-to-Speech API")
56
 
57
  @app.post("/synthesize")
58
+ async def predict(request: Request):
59
+ data = await request.json()
60
+ text = data.get("text")
61
+ speaker = data.get("speaker", "female")
62
  if not text:
63
+ return JSONResponse({"error": "Missing 'text' field"}, status_code=400)
64
 
65
+ audio_path, info = synthesize(text, speaker)
66
  if audio_path is None:
67
+ return JSONResponse(info, status_code=500)
68
 
 
69
  headers = {f"x-{k}": str(v) for k, v in info.items()}
70
  return FileResponse(audio_path, media_type="audio/wav", filename="output.wav", headers=headers)
71
 
 
74
  # -----------------------------
75
  demo = gr.Interface(
76
  fn=synthesize,
77
+ inputs=[
78
+ gr.Textbox(label="Text"),
79
+ gr.Dropdown(choices=["male", "female"], value="female", label="Speaker")
80
+ ],
81
  outputs=[gr.Audio(type="filepath"), gr.JSON()],
82
+ title="YourTTS Voice Cloning (English Only, Select Speaker)",
83
  allow_flagging="never"
84
  )
85